@promptbook/website-crawler 0.85.0-11 → 0.85.0-13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -6,7 +6,7 @@ import type { PipelineCollection } from '../PipelineCollection';
|
|
|
6
6
|
/**
|
|
7
7
|
* Options for `createCollectionFromDirectory` function
|
|
8
8
|
*
|
|
9
|
-
* Note: `rootDirname` is not needed because it is the folder in which `.book.md` file is located
|
|
9
|
+
* Note: `rootDirname` is not needed because it is the folder in which `.book` or `.book.md` file is located
|
|
10
10
|
* This is not same as `path` which is the first argument of `createCollectionFromDirectory` - it can be a subfolder
|
|
11
11
|
*/
|
|
12
12
|
type CreatePipelineCollectionFromDirectoryOptions = Omit<PrepareAndScrapeOptions, 'rootDirname'> & {
|
|
@@ -51,13 +51,13 @@ type CreatePipelineCollectionFromDirectoryOptions = Omit<PrepareAndScrapeOptions
|
|
|
51
51
|
*
|
|
52
52
|
* Note: Works only in Node.js environment because it reads the file system
|
|
53
53
|
*
|
|
54
|
-
* @param
|
|
54
|
+
* @param rootPath - path to the directory with pipelines
|
|
55
55
|
* @param tools - Execution tools to be used for pipeline preparation if needed - If not provided, `$provideExecutionToolsForNode` will be used
|
|
56
56
|
* @param options - Options for the collection creation
|
|
57
57
|
* @returns PipelineCollection
|
|
58
58
|
* @public exported from `@promptbook/node`
|
|
59
59
|
*/
|
|
60
|
-
export declare function createCollectionFromDirectory(
|
|
60
|
+
export declare function createCollectionFromDirectory(rootPath: string_dirname, tools?: Pick<ExecutionTools, 'llm' | 'fs' | 'scrapers'>, options?: CreatePipelineCollectionFromDirectoryOptions): Promise<PipelineCollection>;
|
|
61
61
|
export {};
|
|
62
62
|
/**
|
|
63
63
|
* TODO: [🖇] What about symlinks? Maybe option isSymlinksFollowed
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@promptbook/website-crawler",
|
|
3
|
-
"version": "0.85.0-
|
|
3
|
+
"version": "0.85.0-13",
|
|
4
4
|
"description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
|
|
5
5
|
"private": false,
|
|
6
6
|
"sideEffects": false,
|
|
@@ -47,7 +47,7 @@
|
|
|
47
47
|
"module": "./esm/index.es.js",
|
|
48
48
|
"typings": "./esm/typings/src/_packages/website-crawler.index.d.ts",
|
|
49
49
|
"peerDependencies": {
|
|
50
|
-
"@promptbook/core": "0.85.0-
|
|
50
|
+
"@promptbook/core": "0.85.0-13"
|
|
51
51
|
},
|
|
52
52
|
"dependencies": {
|
|
53
53
|
"@mozilla/readability": "0.5.0",
|
package/umd/index.umd.js
CHANGED
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
* @generated
|
|
26
26
|
* @see https://github.com/webgptorg/promptbook
|
|
27
27
|
*/
|
|
28
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.85.0-
|
|
28
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.85.0-12';
|
|
29
29
|
/**
|
|
30
30
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
31
31
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -1785,57 +1785,6 @@
|
|
|
1785
1785
|
return true;
|
|
1786
1786
|
}
|
|
1787
1787
|
|
|
1788
|
-
/**
|
|
1789
|
-
* Checks if an URL is reserved for private networks or localhost.
|
|
1790
|
-
*
|
|
1791
|
-
* Note: There are two simmilar functions:
|
|
1792
|
-
* - `isUrlOnPrivateNetwork` which tests full URL
|
|
1793
|
-
* - `isHostnameOnPrivateNetwork` *(this one)* which tests just hostname
|
|
1794
|
-
*
|
|
1795
|
-
* @public exported from `@promptbook/utils`
|
|
1796
|
-
*/
|
|
1797
|
-
function isHostnameOnPrivateNetwork(hostname) {
|
|
1798
|
-
if (hostname === 'example.com' ||
|
|
1799
|
-
hostname === 'localhost' ||
|
|
1800
|
-
hostname.endsWith('.localhost') ||
|
|
1801
|
-
hostname.endsWith('.local') ||
|
|
1802
|
-
hostname.endsWith('.test') ||
|
|
1803
|
-
hostname === '127.0.0.1' ||
|
|
1804
|
-
hostname === '::1') {
|
|
1805
|
-
return true;
|
|
1806
|
-
}
|
|
1807
|
-
if (hostname.includes(':')) {
|
|
1808
|
-
// IPv6
|
|
1809
|
-
var ipParts = hostname.split(':');
|
|
1810
|
-
return ipParts[0] === 'fc00' || ipParts[0] === 'fd00' || ipParts[0] === 'fe80';
|
|
1811
|
-
}
|
|
1812
|
-
else {
|
|
1813
|
-
// IPv4
|
|
1814
|
-
var ipParts = hostname.split('.').map(function (part) { return Number.parseInt(part, 10); });
|
|
1815
|
-
return (ipParts[0] === 10 ||
|
|
1816
|
-
(ipParts[0] === 172 && ipParts[1] >= 16 && ipParts[1] <= 31) ||
|
|
1817
|
-
(ipParts[0] === 192 && ipParts[1] === 168));
|
|
1818
|
-
}
|
|
1819
|
-
}
|
|
1820
|
-
|
|
1821
|
-
/**
|
|
1822
|
-
* Checks if an IP address or hostname is reserved for private networks or localhost.
|
|
1823
|
-
*
|
|
1824
|
-
* Note: There are two simmilar functions:
|
|
1825
|
-
* - `isUrlOnPrivateNetwork` *(this one)* which tests full URL
|
|
1826
|
-
* - `isHostnameOnPrivateNetwork` which tests just hostname
|
|
1827
|
-
*
|
|
1828
|
-
* @param {string} ipAddress - The IP address to check.
|
|
1829
|
-
* @returns {boolean} Returns true if the IP address is reserved for private networks or localhost, otherwise false.
|
|
1830
|
-
* @public exported from `@promptbook/utils`
|
|
1831
|
-
*/
|
|
1832
|
-
function isUrlOnPrivateNetwork(url) {
|
|
1833
|
-
if (typeof url === 'string') {
|
|
1834
|
-
url = new URL(url);
|
|
1835
|
-
}
|
|
1836
|
-
return isHostnameOnPrivateNetwork(url.hostname);
|
|
1837
|
-
}
|
|
1838
|
-
|
|
1839
1788
|
/**
|
|
1840
1789
|
* Tests if given string is valid pipeline URL URL.
|
|
1841
1790
|
*
|
|
@@ -1849,16 +1798,19 @@
|
|
|
1849
1798
|
if (!isValidUrl(url)) {
|
|
1850
1799
|
return false;
|
|
1851
1800
|
}
|
|
1852
|
-
if (!url.startsWith('https://')) {
|
|
1801
|
+
if (!url.startsWith('https://') && !url.startsWith('http://') /* <- Note: [👣] */) {
|
|
1853
1802
|
return false;
|
|
1854
1803
|
}
|
|
1855
1804
|
if (url.includes('#')) {
|
|
1856
1805
|
// TODO: [🐠]
|
|
1857
1806
|
return false;
|
|
1858
1807
|
}
|
|
1808
|
+
/*
|
|
1809
|
+
Note: [👣][🧠] Is it secure to allow pipeline URLs on private and unsecured networks?
|
|
1859
1810
|
if (isUrlOnPrivateNetwork(url)) {
|
|
1860
1811
|
return false;
|
|
1861
1812
|
}
|
|
1813
|
+
*/
|
|
1862
1814
|
return true;
|
|
1863
1815
|
}
|
|
1864
1816
|
/**
|