@promptbook/website-crawler 0.75.3 → 0.75.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +141 -104
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +6 -2
- package/esm/typings/src/config.d.ts +9 -1
- package/esm/typings/src/errors/0-BoilerplateError.d.ts +12 -0
- package/esm/typings/src/errors/{index.d.ts → 0-index.d.ts} +10 -2
- package/esm/typings/src/errors/utils/ErrorJson.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/register/$registeredScrapersMessage.d.ts +2 -1
- package/package.json +2 -2
- package/umd/index.umd.js +141 -104
- package/umd/index.umd.js.map +1 -1
|
@@ -24,6 +24,7 @@ import { DEFAULT_REMOTE_URL } from '../config';
|
|
|
24
24
|
import { DEFAULT_REMOTE_URL_PATH } from '../config';
|
|
25
25
|
import { DEFAULT_CSV_SETTINGS } from '../config';
|
|
26
26
|
import { DEFAULT_IS_VERBOSE } from '../config';
|
|
27
|
+
import { SET_IS_VERBOSE } from '../config';
|
|
27
28
|
import { DEFAULT_IS_AUTO_INSTALLED } from '../config';
|
|
28
29
|
import { pipelineJsonToString } from '../conversion/pipelineJsonToString';
|
|
29
30
|
import { pipelineStringToJson } from '../conversion/pipelineStringToJson';
|
|
@@ -33,11 +34,12 @@ import { stringifyPipelineJson } from '../conversion/utils/stringifyPipelineJson
|
|
|
33
34
|
import { validatePipeline } from '../conversion/validation/validatePipeline';
|
|
34
35
|
import { CallbackInterfaceTools } from '../dialogs/callback/CallbackInterfaceTools';
|
|
35
36
|
import type { CallbackInterfaceToolsOptions } from '../dialogs/callback/CallbackInterfaceToolsOptions';
|
|
37
|
+
import { BoilerplateError } from '../errors/0-BoilerplateError';
|
|
38
|
+
import { ERRORS } from '../errors/0-index';
|
|
36
39
|
import { AbstractFormatError } from '../errors/AbstractFormatError';
|
|
37
40
|
import { CollectionError } from '../errors/CollectionError';
|
|
38
41
|
import { EnvironmentMismatchError } from '../errors/EnvironmentMismatchError';
|
|
39
42
|
import { ExpectError } from '../errors/ExpectError';
|
|
40
|
-
import { ERRORS } from '../errors/index';
|
|
41
43
|
import { KnowledgeScrapeError } from '../errors/KnowledgeScrapeError';
|
|
42
44
|
import { LimitReachedError } from '../errors/LimitReachedError';
|
|
43
45
|
import { MissingToolsError } from '../errors/MissingToolsError';
|
|
@@ -136,6 +138,7 @@ export { DEFAULT_REMOTE_URL };
|
|
|
136
138
|
export { DEFAULT_REMOTE_URL_PATH };
|
|
137
139
|
export { DEFAULT_CSV_SETTINGS };
|
|
138
140
|
export { DEFAULT_IS_VERBOSE };
|
|
141
|
+
export { SET_IS_VERBOSE };
|
|
139
142
|
export { DEFAULT_IS_AUTO_INSTALLED };
|
|
140
143
|
export { pipelineJsonToString };
|
|
141
144
|
export { pipelineStringToJson };
|
|
@@ -145,11 +148,12 @@ export { stringifyPipelineJson };
|
|
|
145
148
|
export { validatePipeline };
|
|
146
149
|
export { CallbackInterfaceTools };
|
|
147
150
|
export type { CallbackInterfaceToolsOptions };
|
|
151
|
+
export { BoilerplateError };
|
|
152
|
+
export { ERRORS };
|
|
148
153
|
export { AbstractFormatError };
|
|
149
154
|
export { CollectionError };
|
|
150
155
|
export { EnvironmentMismatchError };
|
|
151
156
|
export { ExpectError };
|
|
152
|
-
export { ERRORS };
|
|
153
157
|
export { KnowledgeScrapeError };
|
|
154
158
|
export { LimitReachedError };
|
|
155
159
|
export { MissingToolsError };
|
|
@@ -204,7 +204,15 @@ export declare const DEFAULT_CSV_SETTINGS: CsvSettings;
|
|
|
204
204
|
*
|
|
205
205
|
* @public exported from `@promptbook/core`
|
|
206
206
|
*/
|
|
207
|
-
export declare
|
|
207
|
+
export declare let DEFAULT_IS_VERBOSE: boolean;
|
|
208
|
+
/**
|
|
209
|
+
* @@@
|
|
210
|
+
*
|
|
211
|
+
* Note: This is experimental feature
|
|
212
|
+
*
|
|
213
|
+
* @public exported from `@promptbook/core`
|
|
214
|
+
*/
|
|
215
|
+
export declare function SET_IS_VERBOSE(isVerbose: boolean): void;
|
|
208
216
|
/**
|
|
209
217
|
* @@@
|
|
210
218
|
*
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This error indicates @@@
|
|
3
|
+
*
|
|
4
|
+
* @public exported from `@promptbook/core`
|
|
5
|
+
*/
|
|
6
|
+
export declare class BoilerplateError extends Error {
|
|
7
|
+
readonly name = "BoilerplateError";
|
|
8
|
+
constructor(message: string);
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* TODO: @@@ Do not forget to add the error into `0-index.ts` ERRORS
|
|
12
|
+
*/
|
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
import { CsvFormatError } from '../formats/csv/CsvFormatError';
|
|
2
|
+
import { AbstractFormatError } from './AbstractFormatError';
|
|
1
3
|
import { CollectionError } from './CollectionError';
|
|
2
4
|
import { EnvironmentMismatchError } from './EnvironmentMismatchError';
|
|
3
5
|
import { ExpectError } from './ExpectError';
|
|
6
|
+
import { KnowledgeScrapeError } from './KnowledgeScrapeError';
|
|
4
7
|
import { LimitReachedError } from './LimitReachedError';
|
|
8
|
+
import { MissingToolsError } from './MissingToolsError';
|
|
5
9
|
import { NotFoundError } from './NotFoundError';
|
|
6
10
|
import { NotYetImplementedError } from './NotYetImplementedError';
|
|
7
11
|
import { ParseError } from './ParseError';
|
|
@@ -15,10 +19,14 @@ import { UnexpectedError } from './UnexpectedError';
|
|
|
15
19
|
* @public exported from `@promptbook/core`
|
|
16
20
|
*/
|
|
17
21
|
export declare const ERRORS: {
|
|
18
|
-
readonly
|
|
22
|
+
readonly AbstractFormatError: typeof AbstractFormatError;
|
|
23
|
+
readonly CsvFormatError: typeof CsvFormatError;
|
|
19
24
|
readonly CollectionError: typeof CollectionError;
|
|
20
25
|
readonly EnvironmentMismatchError: typeof EnvironmentMismatchError;
|
|
26
|
+
readonly ExpectError: typeof ExpectError;
|
|
27
|
+
readonly KnowledgeScrapeError: typeof KnowledgeScrapeError;
|
|
21
28
|
readonly LimitReachedError: typeof LimitReachedError;
|
|
29
|
+
readonly MissingToolsError: typeof MissingToolsError;
|
|
22
30
|
readonly NotFoundError: typeof NotFoundError;
|
|
23
31
|
readonly NotYetImplementedError: typeof NotYetImplementedError;
|
|
24
32
|
readonly ParseError: typeof ParseError;
|
|
@@ -29,4 +37,4 @@ export declare const ERRORS: {
|
|
|
29
37
|
};
|
|
30
38
|
/**
|
|
31
39
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
32
|
-
*/
|
|
40
|
+
*/
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { string_markdown } from '../../../types/typeAliases';
|
|
2
|
+
import type { Scraper } from '../Scraper';
|
|
2
3
|
/**
|
|
3
4
|
* Creates a message with all registered scrapers
|
|
4
5
|
*
|
|
@@ -6,7 +7,7 @@ import type { string_markdown } from '../../../types/typeAliases';
|
|
|
6
7
|
*
|
|
7
8
|
* @private internal function of `createScrapersFromConfiguration` and `createScrapersFromEnv`
|
|
8
9
|
*/
|
|
9
|
-
export declare function $registeredScrapersMessage(): string_markdown;
|
|
10
|
+
export declare function $registeredScrapersMessage(availableScrapers: ReadonlyArray<Scraper>): string_markdown;
|
|
10
11
|
/**
|
|
11
12
|
* TODO: [®] DRY Register logic
|
|
12
13
|
*/
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@promptbook/website-crawler",
|
|
3
|
-
"version": "0.75.
|
|
3
|
+
"version": "0.75.5",
|
|
4
4
|
"description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
|
|
5
5
|
"--note-0": " <- [🐊]",
|
|
6
6
|
"private": false,
|
|
@@ -54,7 +54,7 @@
|
|
|
54
54
|
"module": "./esm/index.es.js",
|
|
55
55
|
"typings": "./esm/typings/src/_packages/website-crawler.index.d.ts",
|
|
56
56
|
"peerDependencies": {
|
|
57
|
-
"@promptbook/core": "0.75.
|
|
57
|
+
"@promptbook/core": "0.75.5"
|
|
58
58
|
},
|
|
59
59
|
"dependencies": {
|
|
60
60
|
"@mozilla/readability": "0.5.0",
|
package/umd/index.umd.js
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
*
|
|
23
23
|
* @see https://github.com/webgptorg/promptbook
|
|
24
24
|
*/
|
|
25
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.75.
|
|
25
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.75.4';
|
|
26
26
|
/**
|
|
27
27
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
28
28
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -2090,6 +2090,42 @@
|
|
|
2090
2090
|
return PipelineExecutionError;
|
|
2091
2091
|
}(Error));
|
|
2092
2092
|
|
|
2093
|
+
/**
|
|
2094
|
+
* This error indicates problems parsing the format value
|
|
2095
|
+
*
|
|
2096
|
+
* For example, when the format value is not a valid JSON or CSV
|
|
2097
|
+
* This is not thrown directly but in extended classes
|
|
2098
|
+
*
|
|
2099
|
+
* @public exported from `@promptbook/core`
|
|
2100
|
+
*/
|
|
2101
|
+
var AbstractFormatError = /** @class */ (function (_super) {
|
|
2102
|
+
__extends(AbstractFormatError, _super);
|
|
2103
|
+
// Note: To allow instanceof do not put here error `name`
|
|
2104
|
+
// public readonly name = 'AbstractFormatError';
|
|
2105
|
+
function AbstractFormatError(message) {
|
|
2106
|
+
var _this = _super.call(this, message) || this;
|
|
2107
|
+
Object.setPrototypeOf(_this, AbstractFormatError.prototype);
|
|
2108
|
+
return _this;
|
|
2109
|
+
}
|
|
2110
|
+
return AbstractFormatError;
|
|
2111
|
+
}(Error));
|
|
2112
|
+
|
|
2113
|
+
/**
|
|
2114
|
+
* This error indicates problem with parsing of CSV
|
|
2115
|
+
*
|
|
2116
|
+
* @public exported from `@promptbook/core`
|
|
2117
|
+
*/
|
|
2118
|
+
var CsvFormatError = /** @class */ (function (_super) {
|
|
2119
|
+
__extends(CsvFormatError, _super);
|
|
2120
|
+
function CsvFormatError(message) {
|
|
2121
|
+
var _this = _super.call(this, message) || this;
|
|
2122
|
+
_this.name = 'CsvFormatError';
|
|
2123
|
+
Object.setPrototypeOf(_this, CsvFormatError.prototype);
|
|
2124
|
+
return _this;
|
|
2125
|
+
}
|
|
2126
|
+
return CsvFormatError;
|
|
2127
|
+
}(AbstractFormatError));
|
|
2128
|
+
|
|
2093
2129
|
/**
|
|
2094
2130
|
* This error indicates that the pipeline collection cannot be propperly loaded
|
|
2095
2131
|
*
|
|
@@ -2147,10 +2183,14 @@
|
|
|
2147
2183
|
* @public exported from `@promptbook/core`
|
|
2148
2184
|
*/
|
|
2149
2185
|
var ERRORS = {
|
|
2150
|
-
|
|
2186
|
+
AbstractFormatError: AbstractFormatError,
|
|
2187
|
+
CsvFormatError: CsvFormatError,
|
|
2151
2188
|
CollectionError: CollectionError,
|
|
2152
2189
|
EnvironmentMismatchError: EnvironmentMismatchError,
|
|
2190
|
+
ExpectError: ExpectError,
|
|
2191
|
+
KnowledgeScrapeError: KnowledgeScrapeError,
|
|
2153
2192
|
LimitReachedError: LimitReachedError,
|
|
2193
|
+
MissingToolsError: MissingToolsError,
|
|
2154
2194
|
NotFoundError: NotFoundError,
|
|
2155
2195
|
NotYetImplementedError: NotYetImplementedError,
|
|
2156
2196
|
ParseError: ParseError,
|
|
@@ -2970,8 +3010,8 @@
|
|
|
2970
3010
|
*
|
|
2971
3011
|
* @private internal function of `createScrapersFromConfiguration` and `createScrapersFromEnv`
|
|
2972
3012
|
*/
|
|
2973
|
-
function $registeredScrapersMessage() {
|
|
2974
|
-
var e_1, _a, e_2, _b;
|
|
3013
|
+
function $registeredScrapersMessage(availableScrapers) {
|
|
3014
|
+
var e_1, _a, e_2, _b, e_3, _c;
|
|
2975
3015
|
/**
|
|
2976
3016
|
* Mixes registered scrapers from $scrapersMetadataRegister and $scrapersRegister
|
|
2977
3017
|
*/
|
|
@@ -2983,15 +3023,15 @@
|
|
|
2983
3023
|
all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
|
|
2984
3024
|
};
|
|
2985
3025
|
try {
|
|
2986
|
-
for (var
|
|
2987
|
-
var
|
|
3026
|
+
for (var _d = __values($scrapersMetadataRegister.list()), _e = _d.next(); !_e.done; _e = _d.next()) {
|
|
3027
|
+
var _f = _e.value, packageName = _f.packageName, className = _f.className, mimeTypes = _f.mimeTypes, documentationUrl = _f.documentationUrl, isAvilableInBrowser = _f.isAvilableInBrowser;
|
|
2988
3028
|
_loop_1(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
|
|
2989
3029
|
}
|
|
2990
3030
|
}
|
|
2991
3031
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
2992
3032
|
finally {
|
|
2993
3033
|
try {
|
|
2994
|
-
if (
|
|
3034
|
+
if (_e && !_e.done && (_a = _d.return)) _a.call(_d);
|
|
2995
3035
|
}
|
|
2996
3036
|
finally { if (e_1) throw e_1.error; }
|
|
2997
3037
|
}
|
|
@@ -3002,18 +3042,31 @@
|
|
|
3002
3042
|
all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
|
|
3003
3043
|
};
|
|
3004
3044
|
try {
|
|
3005
|
-
for (var
|
|
3006
|
-
var
|
|
3045
|
+
for (var _g = __values($scrapersRegister.list()), _h = _g.next(); !_h.done; _h = _g.next()) {
|
|
3046
|
+
var _j = _h.value, packageName = _j.packageName, className = _j.className, mimeTypes = _j.mimeTypes, documentationUrl = _j.documentationUrl, isAvilableInBrowser = _j.isAvilableInBrowser;
|
|
3007
3047
|
_loop_2(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
|
|
3008
3048
|
}
|
|
3009
3049
|
}
|
|
3010
3050
|
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
3011
3051
|
finally {
|
|
3012
3052
|
try {
|
|
3013
|
-
if (
|
|
3053
|
+
if (_h && !_h.done && (_b = _g.return)) _b.call(_g);
|
|
3014
3054
|
}
|
|
3015
3055
|
finally { if (e_2) throw e_2.error; }
|
|
3016
3056
|
}
|
|
3057
|
+
try {
|
|
3058
|
+
for (var availableScrapers_1 = __values(availableScrapers), availableScrapers_1_1 = availableScrapers_1.next(); !availableScrapers_1_1.done; availableScrapers_1_1 = availableScrapers_1.next()) {
|
|
3059
|
+
var metadata_1 = availableScrapers_1_1.value.metadata;
|
|
3060
|
+
all.push(metadata_1);
|
|
3061
|
+
}
|
|
3062
|
+
}
|
|
3063
|
+
catch (e_3_1) { e_3 = { error: e_3_1 }; }
|
|
3064
|
+
finally {
|
|
3065
|
+
try {
|
|
3066
|
+
if (availableScrapers_1_1 && !availableScrapers_1_1.done && (_c = availableScrapers_1.return)) _c.call(availableScrapers_1);
|
|
3067
|
+
}
|
|
3068
|
+
finally { if (e_3) throw e_3.error; }
|
|
3069
|
+
}
|
|
3017
3070
|
var metadata = all.map(function (metadata) {
|
|
3018
3071
|
var isMetadataAviailable = $scrapersMetadataRegister
|
|
3019
3072
|
.list()
|
|
@@ -3027,42 +3080,44 @@
|
|
|
3027
3080
|
var packageName = _a.packageName, className = _a.className;
|
|
3028
3081
|
return metadata.packageName === packageName && metadata.className === className;
|
|
3029
3082
|
});
|
|
3030
|
-
|
|
3083
|
+
var isAvilableInTools = availableScrapers.some(function (_a) {
|
|
3084
|
+
var _b = _a.metadata, packageName = _b.packageName, className = _b.className;
|
|
3085
|
+
return metadata.packageName === packageName && metadata.className === className;
|
|
3086
|
+
});
|
|
3087
|
+
return __assign(__assign({}, metadata), { isMetadataAviailable: isMetadataAviailable, isInstalled: isInstalled, isAvilableInTools: isAvilableInTools });
|
|
3031
3088
|
});
|
|
3032
3089
|
if (metadata.length === 0) {
|
|
3033
|
-
return "No scrapers are available";
|
|
3090
|
+
return spaceTrim__default["default"]("\n **No scrapers are available**\n\n This is a unexpected behavior, you are probably using some broken version of Promptbook\n At least there should be available the metadata of the scrapers\n ");
|
|
3034
3091
|
}
|
|
3035
3092
|
return spaceTrim__default["default"](function (block) { return "\n Available scrapers are:\n ".concat(block(metadata
|
|
3036
3093
|
.map(function (_a, i) {
|
|
3037
|
-
var packageName = _a.packageName, className = _a.className, isMetadataAviailable = _a.isMetadataAviailable, isInstalled = _a.isInstalled, mimeTypes = _a.mimeTypes, isAvilableInBrowser = _a.isAvilableInBrowser;
|
|
3038
|
-
var more;
|
|
3039
|
-
// TODO:
|
|
3040
|
-
if (
|
|
3041
|
-
more
|
|
3042
|
-
}
|
|
3043
|
-
|
|
3044
|
-
|
|
3045
|
-
|
|
3046
|
-
|
|
3047
|
-
|
|
3048
|
-
|
|
3049
|
-
|
|
3050
|
-
|
|
3051
|
-
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3055
|
-
more = "(installed)";
|
|
3056
|
-
}
|
|
3057
|
-
else {
|
|
3058
|
-
more = "*(unknown state, looks like a unexpected behavior)*";
|
|
3059
|
-
}
|
|
3094
|
+
var packageName = _a.packageName, className = _a.className, isMetadataAviailable = _a.isMetadataAviailable, isInstalled = _a.isInstalled, mimeTypes = _a.mimeTypes, isAvilableInBrowser = _a.isAvilableInBrowser, isAvilableInTools = _a.isAvilableInTools;
|
|
3095
|
+
var more = [];
|
|
3096
|
+
// TODO: [🧠] Maybe use `documentationUrl`
|
|
3097
|
+
if (isMetadataAviailable) {
|
|
3098
|
+
more.push("\u2B1C Metadata registered");
|
|
3099
|
+
} // not else
|
|
3100
|
+
if (isInstalled) {
|
|
3101
|
+
more.push("\uD83D\uDFE9 Installed");
|
|
3102
|
+
} // not else
|
|
3103
|
+
if (isAvilableInTools) {
|
|
3104
|
+
more.push("\uD83D\uDFE6 Available in tools");
|
|
3105
|
+
} // not else
|
|
3106
|
+
if (!isMetadataAviailable && isInstalled) {
|
|
3107
|
+
more.push("When no metadata registered but scraper is installed, it is an unexpected behavior");
|
|
3108
|
+
} // not else
|
|
3109
|
+
if (!isInstalled && isAvilableInTools) {
|
|
3110
|
+
more.push("When the scraper is not installed but available in tools, it is an unexpected compatibility behavior");
|
|
3111
|
+
} // not else
|
|
3060
3112
|
if (!isAvilableInBrowser) {
|
|
3061
|
-
more
|
|
3113
|
+
more.push("Not usable in browser");
|
|
3062
3114
|
}
|
|
3063
|
-
|
|
3115
|
+
var moreText = more.length === 0 ? '' : " *(".concat(more.join('; '), ")*");
|
|
3116
|
+
return "".concat(i + 1, ") `").concat(className, "` from `").concat(packageName, "` compatible to scrape ").concat(mimeTypes
|
|
3117
|
+
.map(function (mimeType) { return "\"".concat(mimeType, "\""); })
|
|
3118
|
+
.join(', ')).concat(moreText);
|
|
3064
3119
|
})
|
|
3065
|
-
.join('\n')), "\n "); });
|
|
3120
|
+
.join('\n')), "\n\n Legend:\n - \u2B1C **Metadata registered** means that Promptbook knows about the scraper, it is similar to registration in some registry\n - \uD83D\uDFE9 **Installed** means that you have imported package with particular scraper\n - \uD83D\uDFE6 **Available in tools** means that you have passed scraper as dependency into prepare or execution process\n\n "); });
|
|
3066
3121
|
}
|
|
3067
3122
|
/**
|
|
3068
3123
|
* TODO: [®] DRY Register logic
|
|
@@ -3310,57 +3365,75 @@
|
|
|
3310
3365
|
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3311
3366
|
knowledgePreparedUnflatten = new Array(knowledgeSources.length);
|
|
3312
3367
|
return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
3313
|
-
var partialPieces, sourceHandler,
|
|
3314
|
-
var e_1,
|
|
3315
|
-
return __generator(this, function (
|
|
3316
|
-
switch (
|
|
3368
|
+
var partialPieces, sourceHandler, scrapers, _loop_1, scrapers_1, scrapers_1_1, scraper, state_1, e_1_1, pieces;
|
|
3369
|
+
var e_1, _a;
|
|
3370
|
+
return __generator(this, function (_b) {
|
|
3371
|
+
switch (_b.label) {
|
|
3317
3372
|
case 0:
|
|
3318
3373
|
partialPieces = null;
|
|
3319
3374
|
return [4 /*yield*/, makeKnowledgeSourceHandler(knowledgeSource, tools, { rootDirname: rootDirname, isVerbose: isVerbose })];
|
|
3320
3375
|
case 1:
|
|
3321
|
-
sourceHandler =
|
|
3322
|
-
|
|
3376
|
+
sourceHandler = _b.sent();
|
|
3377
|
+
scrapers = arrayableToArray(tools.scrapers);
|
|
3378
|
+
_loop_1 = function (scraper) {
|
|
3379
|
+
var partialPiecesUnchecked;
|
|
3380
|
+
return __generator(this, function (_c) {
|
|
3381
|
+
switch (_c.label) {
|
|
3382
|
+
case 0:
|
|
3383
|
+
if (!scraper.metadata.mimeTypes.includes(sourceHandler.mimeType)
|
|
3384
|
+
// <- TODO: [🦔] Implement mime-type wildcards
|
|
3385
|
+
) {
|
|
3386
|
+
return [2 /*return*/, "continue"];
|
|
3387
|
+
}
|
|
3388
|
+
return [4 /*yield*/, scraper.scrape(sourceHandler)];
|
|
3389
|
+
case 1:
|
|
3390
|
+
partialPiecesUnchecked = _c.sent();
|
|
3391
|
+
if (partialPiecesUnchecked !== null) {
|
|
3392
|
+
partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
|
|
3393
|
+
return [2 /*return*/, "break"];
|
|
3394
|
+
}
|
|
3395
|
+
console.warn(spaceTrim__default["default"](function (block) { return "\n Cannot scrape knowledge from source despite the scraper `".concat(scraper.metadata.className, "` supports the mime type \"").concat(sourceHandler.mimeType, "\".\n \n The source:\n > ").concat(block(knowledgeSource.sourceContent
|
|
3396
|
+
.split('\n')
|
|
3397
|
+
.map(function (line) { return "> ".concat(line); })
|
|
3398
|
+
.join('\n')), "\n\n ").concat(block($registeredScrapersMessage(scrapers)), "\n\n\n "); }));
|
|
3399
|
+
return [2 /*return*/];
|
|
3400
|
+
}
|
|
3401
|
+
});
|
|
3402
|
+
};
|
|
3403
|
+
_b.label = 2;
|
|
3323
3404
|
case 2:
|
|
3324
|
-
|
|
3325
|
-
|
|
3326
|
-
|
|
3405
|
+
_b.trys.push([2, 7, 8, 9]);
|
|
3406
|
+
scrapers_1 = __values(scrapers), scrapers_1_1 = scrapers_1.next();
|
|
3407
|
+
_b.label = 3;
|
|
3327
3408
|
case 3:
|
|
3328
|
-
if (!!
|
|
3329
|
-
scraper =
|
|
3330
|
-
|
|
3331
|
-
// <- TODO: [🦔] Implement mime-type wildcards
|
|
3332
|
-
) {
|
|
3333
|
-
return [3 /*break*/, 5];
|
|
3334
|
-
}
|
|
3335
|
-
return [4 /*yield*/, scraper.scrape(sourceHandler)];
|
|
3409
|
+
if (!!scrapers_1_1.done) return [3 /*break*/, 6];
|
|
3410
|
+
scraper = scrapers_1_1.value;
|
|
3411
|
+
return [5 /*yield**/, _loop_1(scraper)];
|
|
3336
3412
|
case 4:
|
|
3337
|
-
|
|
3338
|
-
if (
|
|
3339
|
-
partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
|
|
3340
|
-
// <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
|
|
3413
|
+
state_1 = _b.sent();
|
|
3414
|
+
if (state_1 === "break")
|
|
3341
3415
|
return [3 /*break*/, 6];
|
|
3342
|
-
|
|
3343
|
-
_d.label = 5;
|
|
3416
|
+
_b.label = 5;
|
|
3344
3417
|
case 5:
|
|
3345
|
-
|
|
3418
|
+
scrapers_1_1 = scrapers_1.next();
|
|
3346
3419
|
return [3 /*break*/, 3];
|
|
3347
3420
|
case 6: return [3 /*break*/, 9];
|
|
3348
3421
|
case 7:
|
|
3349
|
-
e_1_1 =
|
|
3422
|
+
e_1_1 = _b.sent();
|
|
3350
3423
|
e_1 = { error: e_1_1 };
|
|
3351
3424
|
return [3 /*break*/, 9];
|
|
3352
3425
|
case 8:
|
|
3353
3426
|
try {
|
|
3354
|
-
if (
|
|
3427
|
+
if (scrapers_1_1 && !scrapers_1_1.done && (_a = scrapers_1.return)) _a.call(scrapers_1);
|
|
3355
3428
|
}
|
|
3356
3429
|
finally { if (e_1) throw e_1.error; }
|
|
3357
3430
|
return [7 /*endfinally*/];
|
|
3358
3431
|
case 9:
|
|
3359
3432
|
if (partialPieces === null) {
|
|
3360
|
-
throw new KnowledgeScrapeError(spaceTrim__default["default"](function (block) { return "\n Cannot scrape knowledge
|
|
3433
|
+
throw new KnowledgeScrapeError(spaceTrim__default["default"](function (block) { return "\n Cannot scrape knowledge\n \n The source:\n > ".concat(block(knowledgeSource.sourceContent
|
|
3361
3434
|
.split('\n')
|
|
3362
3435
|
.map(function (line) { return "> ".concat(line); })
|
|
3363
|
-
.join('\n')), "\n\n No scraper found for the mime type \"").concat(sourceHandler.mimeType, "\"\n\n ").concat(block($registeredScrapersMessage()), "\n\n\n "); }));
|
|
3436
|
+
.join('\n')), "\n\n No scraper found for the mime type \"").concat(sourceHandler.mimeType, "\"\n\n ").concat(block($registeredScrapersMessage(scrapers)), "\n\n\n "); }));
|
|
3364
3437
|
}
|
|
3365
3438
|
pieces = partialPieces.map(function (partialPiece) { return (__assign(__assign({}, partialPiece), { sources: [
|
|
3366
3439
|
{
|
|
@@ -3786,42 +3859,6 @@
|
|
|
3786
3859
|
return union;
|
|
3787
3860
|
}
|
|
3788
3861
|
|
|
3789
|
-
/**
|
|
3790
|
-
* This error indicates problems parsing the format value
|
|
3791
|
-
*
|
|
3792
|
-
* For example, when the format value is not a valid JSON or CSV
|
|
3793
|
-
* This is not thrown directly but in extended classes
|
|
3794
|
-
*
|
|
3795
|
-
* @public exported from `@promptbook/core`
|
|
3796
|
-
*/
|
|
3797
|
-
var AbstractFormatError = /** @class */ (function (_super) {
|
|
3798
|
-
__extends(AbstractFormatError, _super);
|
|
3799
|
-
// Note: To allow instanceof do not put here error `name`
|
|
3800
|
-
// public readonly name = 'AbstractFormatError';
|
|
3801
|
-
function AbstractFormatError(message) {
|
|
3802
|
-
var _this = _super.call(this, message) || this;
|
|
3803
|
-
Object.setPrototypeOf(_this, AbstractFormatError.prototype);
|
|
3804
|
-
return _this;
|
|
3805
|
-
}
|
|
3806
|
-
return AbstractFormatError;
|
|
3807
|
-
}(Error));
|
|
3808
|
-
|
|
3809
|
-
/**
|
|
3810
|
-
* This error indicates problem with parsing of CSV
|
|
3811
|
-
*
|
|
3812
|
-
* @public exported from `@promptbook/core`
|
|
3813
|
-
*/
|
|
3814
|
-
var CsvFormatError = /** @class */ (function (_super) {
|
|
3815
|
-
__extends(CsvFormatError, _super);
|
|
3816
|
-
function CsvFormatError(message) {
|
|
3817
|
-
var _this = _super.call(this, message) || this;
|
|
3818
|
-
_this.name = 'CsvFormatError';
|
|
3819
|
-
Object.setPrototypeOf(_this, CsvFormatError.prototype);
|
|
3820
|
-
return _this;
|
|
3821
|
-
}
|
|
3822
|
-
return CsvFormatError;
|
|
3823
|
-
}(AbstractFormatError));
|
|
3824
|
-
|
|
3825
3862
|
/**
|
|
3826
3863
|
* @@@
|
|
3827
3864
|
*
|
|
@@ -3862,7 +3899,7 @@
|
|
|
3862
3899
|
case 0:
|
|
3863
3900
|
csv = papaparse.parse(value, __assign(__assign({}, settings), MANDATORY_CSV_SETTINGS));
|
|
3864
3901
|
if (csv.errors.length !== 0) {
|
|
3865
|
-
throw new CsvFormatError(spaceTrim__default["default"](function (block) { return "\n CSV parsing error\n\n ".concat(block(csv.errors.map(function (error) { return error.message; }).join('\n\n')), "\n "); }));
|
|
3902
|
+
throw new CsvFormatError(spaceTrim__default["default"](function (block) { return "\n CSV parsing error\n\n Error(s) from CSV parsing:\n ".concat(block(csv.errors.map(function (error) { return error.message; }).join('\n\n')), "\n\n The CSV data:\n ").concat(block(value), "\n "); }));
|
|
3866
3903
|
}
|
|
3867
3904
|
return [4 /*yield*/, Promise.all(csv.data.map(function (row, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
3868
3905
|
var _a, _b;
|
|
@@ -3900,7 +3937,7 @@
|
|
|
3900
3937
|
case 0:
|
|
3901
3938
|
csv = papaparse.parse(value, __assign(__assign({}, settings), MANDATORY_CSV_SETTINGS));
|
|
3902
3939
|
if (csv.errors.length !== 0) {
|
|
3903
|
-
throw new CsvFormatError(spaceTrim__default["default"](function (block) { return "\n CSV parsing error\n\n ".concat(block(csv.errors.map(function (error) { return error.message; }).join('\n\n')), "\n "); }));
|
|
3940
|
+
throw new CsvFormatError(spaceTrim__default["default"](function (block) { return "\n CSV parsing error\n\n Error(s) from CSV parsing:\n ".concat(block(csv.errors.map(function (error) { return error.message; }).join('\n\n')), "\n\n The CSV data:\n ").concat(block(value), "\n "); }));
|
|
3904
3941
|
}
|
|
3905
3942
|
return [4 /*yield*/, Promise.all(csv.data.map(function (row, rowIndex) { return __awaiter(_this, void 0, void 0, function () {
|
|
3906
3943
|
var _this = this;
|