@promptbook/pdf 0.75.2 → 0.75.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +107 -67
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +2 -0
- package/esm/typings/src/config.d.ts +9 -1
- package/esm/typings/src/scrapers/_common/register/$registeredScrapersMessage.d.ts +2 -1
- package/package.json +2 -2
- package/umd/index.umd.js +107 -67
- package/umd/index.umd.js.map +1 -1
|
@@ -24,6 +24,7 @@ import { DEFAULT_REMOTE_URL } from '../config';
|
|
|
24
24
|
import { DEFAULT_REMOTE_URL_PATH } from '../config';
|
|
25
25
|
import { DEFAULT_CSV_SETTINGS } from '../config';
|
|
26
26
|
import { DEFAULT_IS_VERBOSE } from '../config';
|
|
27
|
+
import { SET_IS_VERBOSE } from '../config';
|
|
27
28
|
import { DEFAULT_IS_AUTO_INSTALLED } from '../config';
|
|
28
29
|
import { pipelineJsonToString } from '../conversion/pipelineJsonToString';
|
|
29
30
|
import { pipelineStringToJson } from '../conversion/pipelineStringToJson';
|
|
@@ -136,6 +137,7 @@ export { DEFAULT_REMOTE_URL };
|
|
|
136
137
|
export { DEFAULT_REMOTE_URL_PATH };
|
|
137
138
|
export { DEFAULT_CSV_SETTINGS };
|
|
138
139
|
export { DEFAULT_IS_VERBOSE };
|
|
140
|
+
export { SET_IS_VERBOSE };
|
|
139
141
|
export { DEFAULT_IS_AUTO_INSTALLED };
|
|
140
142
|
export { pipelineJsonToString };
|
|
141
143
|
export { pipelineStringToJson };
|
|
@@ -204,7 +204,15 @@ export declare const DEFAULT_CSV_SETTINGS: CsvSettings;
|
|
|
204
204
|
*
|
|
205
205
|
* @public exported from `@promptbook/core`
|
|
206
206
|
*/
|
|
207
|
-
export declare
|
|
207
|
+
export declare let DEFAULT_IS_VERBOSE: boolean;
|
|
208
|
+
/**
|
|
209
|
+
* @@@
|
|
210
|
+
*
|
|
211
|
+
* Note: This is experimental feature
|
|
212
|
+
*
|
|
213
|
+
* @public exported from `@promptbook/core`
|
|
214
|
+
*/
|
|
215
|
+
export declare function SET_IS_VERBOSE(isVerbose: boolean): void;
|
|
208
216
|
/**
|
|
209
217
|
* @@@
|
|
210
218
|
*
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { string_markdown } from '../../../types/typeAliases';
|
|
2
|
+
import type { Scraper } from '../Scraper';
|
|
2
3
|
/**
|
|
3
4
|
* Creates a message with all registered scrapers
|
|
4
5
|
*
|
|
@@ -6,7 +7,7 @@ import type { string_markdown } from '../../../types/typeAliases';
|
|
|
6
7
|
*
|
|
7
8
|
* @private internal function of `createScrapersFromConfiguration` and `createScrapersFromEnv`
|
|
8
9
|
*/
|
|
9
|
-
export declare function $registeredScrapersMessage(): string_markdown;
|
|
10
|
+
export declare function $registeredScrapersMessage(availableScrapers: ReadonlyArray<Scraper>): string_markdown;
|
|
10
11
|
/**
|
|
11
12
|
* TODO: [®] DRY Register logic
|
|
12
13
|
*/
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@promptbook/pdf",
|
|
3
|
-
"version": "0.75.
|
|
3
|
+
"version": "0.75.4",
|
|
4
4
|
"description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
|
|
5
5
|
"--note-0": " <- [🐊]",
|
|
6
6
|
"private": false,
|
|
@@ -54,7 +54,7 @@
|
|
|
54
54
|
"module": "./esm/index.es.js",
|
|
55
55
|
"typings": "./esm/typings/src/_packages/pdf.index.d.ts",
|
|
56
56
|
"peerDependencies": {
|
|
57
|
-
"@promptbook/core": "0.75.
|
|
57
|
+
"@promptbook/core": "0.75.4"
|
|
58
58
|
},
|
|
59
59
|
"dependencies": {
|
|
60
60
|
"crypto-js": "4.2.0",
|
package/umd/index.umd.js
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
*
|
|
23
23
|
* @see https://github.com/webgptorg/promptbook
|
|
24
24
|
*/
|
|
25
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.75.
|
|
25
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.75.3';
|
|
26
26
|
/**
|
|
27
27
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
28
28
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -2862,50 +2862,63 @@
|
|
|
2862
2862
|
*
|
|
2863
2863
|
* @private internal function of `createScrapersFromConfiguration` and `createScrapersFromEnv`
|
|
2864
2864
|
*/
|
|
2865
|
-
function $registeredScrapersMessage() {
|
|
2866
|
-
var e_1, _a, e_2, _b;
|
|
2865
|
+
function $registeredScrapersMessage(availableScrapers) {
|
|
2866
|
+
var e_1, _a, e_2, _b, e_3, _c;
|
|
2867
2867
|
/**
|
|
2868
2868
|
* Mixes registered scrapers from $scrapersMetadataRegister and $scrapersRegister
|
|
2869
2869
|
*/
|
|
2870
2870
|
var all = [];
|
|
2871
|
-
var _loop_1 = function (packageName, className) {
|
|
2871
|
+
var _loop_1 = function (packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser) {
|
|
2872
2872
|
if (all.some(function (item) { return item.packageName === packageName && item.className === className; })) {
|
|
2873
2873
|
return "continue";
|
|
2874
2874
|
}
|
|
2875
|
-
all.push({ packageName: packageName, className: className });
|
|
2875
|
+
all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
|
|
2876
2876
|
};
|
|
2877
2877
|
try {
|
|
2878
|
-
for (var
|
|
2879
|
-
var
|
|
2880
|
-
_loop_1(packageName, className);
|
|
2878
|
+
for (var _d = __values($scrapersMetadataRegister.list()), _e = _d.next(); !_e.done; _e = _d.next()) {
|
|
2879
|
+
var _f = _e.value, packageName = _f.packageName, className = _f.className, mimeTypes = _f.mimeTypes, documentationUrl = _f.documentationUrl, isAvilableInBrowser = _f.isAvilableInBrowser;
|
|
2880
|
+
_loop_1(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
|
|
2881
2881
|
}
|
|
2882
2882
|
}
|
|
2883
2883
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
2884
2884
|
finally {
|
|
2885
2885
|
try {
|
|
2886
|
-
if (
|
|
2886
|
+
if (_e && !_e.done && (_a = _d.return)) _a.call(_d);
|
|
2887
2887
|
}
|
|
2888
2888
|
finally { if (e_1) throw e_1.error; }
|
|
2889
2889
|
}
|
|
2890
|
-
var _loop_2 = function (packageName, className) {
|
|
2890
|
+
var _loop_2 = function (packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser) {
|
|
2891
2891
|
if (all.some(function (item) { return item.packageName === packageName && item.className === className; })) {
|
|
2892
2892
|
return "continue";
|
|
2893
2893
|
}
|
|
2894
|
-
all.push({ packageName: packageName, className: className });
|
|
2894
|
+
all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
|
|
2895
2895
|
};
|
|
2896
2896
|
try {
|
|
2897
|
-
for (var
|
|
2898
|
-
var
|
|
2899
|
-
_loop_2(packageName, className);
|
|
2897
|
+
for (var _g = __values($scrapersRegister.list()), _h = _g.next(); !_h.done; _h = _g.next()) {
|
|
2898
|
+
var _j = _h.value, packageName = _j.packageName, className = _j.className, mimeTypes = _j.mimeTypes, documentationUrl = _j.documentationUrl, isAvilableInBrowser = _j.isAvilableInBrowser;
|
|
2899
|
+
_loop_2(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
|
|
2900
2900
|
}
|
|
2901
2901
|
}
|
|
2902
2902
|
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
2903
2903
|
finally {
|
|
2904
2904
|
try {
|
|
2905
|
-
if (
|
|
2905
|
+
if (_h && !_h.done && (_b = _g.return)) _b.call(_g);
|
|
2906
2906
|
}
|
|
2907
2907
|
finally { if (e_2) throw e_2.error; }
|
|
2908
2908
|
}
|
|
2909
|
+
try {
|
|
2910
|
+
for (var availableScrapers_1 = __values(availableScrapers), availableScrapers_1_1 = availableScrapers_1.next(); !availableScrapers_1_1.done; availableScrapers_1_1 = availableScrapers_1.next()) {
|
|
2911
|
+
var metadata_1 = availableScrapers_1_1.value.metadata;
|
|
2912
|
+
all.push(metadata_1);
|
|
2913
|
+
}
|
|
2914
|
+
}
|
|
2915
|
+
catch (e_3_1) { e_3 = { error: e_3_1 }; }
|
|
2916
|
+
finally {
|
|
2917
|
+
try {
|
|
2918
|
+
if (availableScrapers_1_1 && !availableScrapers_1_1.done && (_c = availableScrapers_1.return)) _c.call(availableScrapers_1);
|
|
2919
|
+
}
|
|
2920
|
+
finally { if (e_3) throw e_3.error; }
|
|
2921
|
+
}
|
|
2909
2922
|
var metadata = all.map(function (metadata) {
|
|
2910
2923
|
var isMetadataAviailable = $scrapersMetadataRegister
|
|
2911
2924
|
.list()
|
|
@@ -2919,38 +2932,44 @@
|
|
|
2919
2932
|
var packageName = _a.packageName, className = _a.className;
|
|
2920
2933
|
return metadata.packageName === packageName && metadata.className === className;
|
|
2921
2934
|
});
|
|
2922
|
-
|
|
2935
|
+
var isAvilableInTools = availableScrapers.some(function (_a) {
|
|
2936
|
+
var _b = _a.metadata, packageName = _b.packageName, className = _b.className;
|
|
2937
|
+
return metadata.packageName === packageName && metadata.className === className;
|
|
2938
|
+
});
|
|
2939
|
+
return __assign(__assign({}, metadata), { isMetadataAviailable: isMetadataAviailable, isInstalled: isInstalled, isAvilableInTools: isAvilableInTools });
|
|
2923
2940
|
});
|
|
2924
2941
|
if (metadata.length === 0) {
|
|
2925
|
-
return "No scrapers are available";
|
|
2942
|
+
return spaceTrim__default["default"]("\n **No scrapers are available**\n\n This is a unexpected behavior, you are probably using some broken version of Promptbook\n At least there should be available the metadata of the scrapers\n ");
|
|
2926
2943
|
}
|
|
2927
2944
|
return spaceTrim__default["default"](function (block) { return "\n Available scrapers are:\n ".concat(block(metadata
|
|
2928
2945
|
.map(function (_a, i) {
|
|
2929
|
-
var packageName = _a.packageName, className = _a.className, isMetadataAviailable = _a.isMetadataAviailable, isInstalled = _a.isInstalled;
|
|
2930
|
-
var more;
|
|
2931
|
-
|
|
2932
|
-
|
|
2933
|
-
|
|
2934
|
-
|
|
2935
|
-
|
|
2936
|
-
more
|
|
2937
|
-
}
|
|
2938
|
-
|
|
2939
|
-
|
|
2940
|
-
|
|
2941
|
-
|
|
2942
|
-
|
|
2943
|
-
|
|
2944
|
-
|
|
2945
|
-
|
|
2946
|
-
|
|
2947
|
-
|
|
2948
|
-
|
|
2949
|
-
more = "(unknown state, looks like a unexpected behavior)";
|
|
2946
|
+
var packageName = _a.packageName, className = _a.className, isMetadataAviailable = _a.isMetadataAviailable, isInstalled = _a.isInstalled, mimeTypes = _a.mimeTypes, isAvilableInBrowser = _a.isAvilableInBrowser, isAvilableInTools = _a.isAvilableInTools;
|
|
2947
|
+
var more = [];
|
|
2948
|
+
// TODO: [🧠] Maybe use `documentationUrl`
|
|
2949
|
+
if (isMetadataAviailable) {
|
|
2950
|
+
more.push("\u2B1C Metadata registered");
|
|
2951
|
+
} // not else
|
|
2952
|
+
if (isInstalled) {
|
|
2953
|
+
more.push("\uD83D\uDFE9 Installed");
|
|
2954
|
+
} // not else
|
|
2955
|
+
if (isAvilableInTools) {
|
|
2956
|
+
more.push("\uD83D\uDFE6 Available in tools");
|
|
2957
|
+
} // not else
|
|
2958
|
+
if (!isMetadataAviailable && isInstalled) {
|
|
2959
|
+
more.push("When no metadata registered but scraper is installed, it is an unexpected behavior");
|
|
2960
|
+
} // not else
|
|
2961
|
+
if (!isInstalled && isAvilableInTools) {
|
|
2962
|
+
more.push("When the scraper is not installed but available in tools, it is an unexpected compatibility behavior");
|
|
2963
|
+
} // not else
|
|
2964
|
+
if (!isAvilableInBrowser) {
|
|
2965
|
+
more.push("Not usable in browser");
|
|
2950
2966
|
}
|
|
2951
|
-
|
|
2967
|
+
var moreText = more.length === 0 ? '' : " *(".concat(more.join('; '), ")*");
|
|
2968
|
+
return "".concat(i + 1, ") `").concat(className, "` from `").concat(packageName, "` compatible to scrape ").concat(mimeTypes
|
|
2969
|
+
.map(function (mimeType) { return "\"".concat(mimeType, "\""); })
|
|
2970
|
+
.join(', ')).concat(moreText);
|
|
2952
2971
|
})
|
|
2953
|
-
.join('\n')), "\n "); });
|
|
2972
|
+
.join('\n')), "\n\n Legend:\n - \u2B1C **Metadata registered** means that Promptbook knows about the scraper, it is similar to registration in some registry\n - \uD83D\uDFE9 **Installed** means that you have imported package with particular scraper\n - \uD83D\uDFE6 **Available in tools** means that you have passed scraper as dependency into prepare or execution process\n\n "); });
|
|
2954
2973
|
}
|
|
2955
2974
|
/**
|
|
2956
2975
|
* TODO: [®] DRY Register logic
|
|
@@ -3198,54 +3217,75 @@
|
|
|
3198
3217
|
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3199
3218
|
knowledgePreparedUnflatten = new Array(knowledgeSources.length);
|
|
3200
3219
|
return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
3201
|
-
var partialPieces, sourceHandler,
|
|
3202
|
-
var e_1,
|
|
3203
|
-
return __generator(this, function (
|
|
3204
|
-
switch (
|
|
3220
|
+
var partialPieces, sourceHandler, scrapers, _loop_1, scrapers_1, scrapers_1_1, scraper, state_1, e_1_1, pieces;
|
|
3221
|
+
var e_1, _a;
|
|
3222
|
+
return __generator(this, function (_b) {
|
|
3223
|
+
switch (_b.label) {
|
|
3205
3224
|
case 0:
|
|
3206
3225
|
partialPieces = null;
|
|
3207
3226
|
return [4 /*yield*/, makeKnowledgeSourceHandler(knowledgeSource, tools, { rootDirname: rootDirname, isVerbose: isVerbose })];
|
|
3208
3227
|
case 1:
|
|
3209
|
-
sourceHandler =
|
|
3210
|
-
|
|
3228
|
+
sourceHandler = _b.sent();
|
|
3229
|
+
scrapers = arrayableToArray(tools.scrapers);
|
|
3230
|
+
_loop_1 = function (scraper) {
|
|
3231
|
+
var partialPiecesUnchecked;
|
|
3232
|
+
return __generator(this, function (_c) {
|
|
3233
|
+
switch (_c.label) {
|
|
3234
|
+
case 0:
|
|
3235
|
+
if (!scraper.metadata.mimeTypes.includes(sourceHandler.mimeType)
|
|
3236
|
+
// <- TODO: [🦔] Implement mime-type wildcards
|
|
3237
|
+
) {
|
|
3238
|
+
return [2 /*return*/, "continue"];
|
|
3239
|
+
}
|
|
3240
|
+
return [4 /*yield*/, scraper.scrape(sourceHandler)];
|
|
3241
|
+
case 1:
|
|
3242
|
+
partialPiecesUnchecked = _c.sent();
|
|
3243
|
+
if (partialPiecesUnchecked !== null) {
|
|
3244
|
+
partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
|
|
3245
|
+
return [2 /*return*/, "break"];
|
|
3246
|
+
}
|
|
3247
|
+
console.warn(spaceTrim__default["default"](function (block) { return "\n Cannot scrape knowledge from source despite the scraper `".concat(scraper.metadata.className, "` supports the mime type \"").concat(sourceHandler.mimeType, "\".\n \n The source:\n > ").concat(block(knowledgeSource.sourceContent
|
|
3248
|
+
.split('\n')
|
|
3249
|
+
.map(function (line) { return "> ".concat(line); })
|
|
3250
|
+
.join('\n')), "\n\n ").concat(block($registeredScrapersMessage(scrapers)), "\n\n\n "); }));
|
|
3251
|
+
return [2 /*return*/];
|
|
3252
|
+
}
|
|
3253
|
+
});
|
|
3254
|
+
};
|
|
3255
|
+
_b.label = 2;
|
|
3211
3256
|
case 2:
|
|
3212
|
-
|
|
3213
|
-
|
|
3214
|
-
|
|
3257
|
+
_b.trys.push([2, 7, 8, 9]);
|
|
3258
|
+
scrapers_1 = __values(scrapers), scrapers_1_1 = scrapers_1.next();
|
|
3259
|
+
_b.label = 3;
|
|
3215
3260
|
case 3:
|
|
3216
|
-
if (!!
|
|
3217
|
-
scraper =
|
|
3218
|
-
|
|
3219
|
-
// <- TODO: [🦔] Implement mime-type wildcards
|
|
3220
|
-
) {
|
|
3221
|
-
return [3 /*break*/, 5];
|
|
3222
|
-
}
|
|
3223
|
-
return [4 /*yield*/, scraper.scrape(sourceHandler)];
|
|
3261
|
+
if (!!scrapers_1_1.done) return [3 /*break*/, 6];
|
|
3262
|
+
scraper = scrapers_1_1.value;
|
|
3263
|
+
return [5 /*yield**/, _loop_1(scraper)];
|
|
3224
3264
|
case 4:
|
|
3225
|
-
|
|
3226
|
-
if (
|
|
3227
|
-
partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
|
|
3228
|
-
// <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
|
|
3265
|
+
state_1 = _b.sent();
|
|
3266
|
+
if (state_1 === "break")
|
|
3229
3267
|
return [3 /*break*/, 6];
|
|
3230
|
-
|
|
3231
|
-
_d.label = 5;
|
|
3268
|
+
_b.label = 5;
|
|
3232
3269
|
case 5:
|
|
3233
|
-
|
|
3270
|
+
scrapers_1_1 = scrapers_1.next();
|
|
3234
3271
|
return [3 /*break*/, 3];
|
|
3235
3272
|
case 6: return [3 /*break*/, 9];
|
|
3236
3273
|
case 7:
|
|
3237
|
-
e_1_1 =
|
|
3274
|
+
e_1_1 = _b.sent();
|
|
3238
3275
|
e_1 = { error: e_1_1 };
|
|
3239
3276
|
return [3 /*break*/, 9];
|
|
3240
3277
|
case 8:
|
|
3241
3278
|
try {
|
|
3242
|
-
if (
|
|
3279
|
+
if (scrapers_1_1 && !scrapers_1_1.done && (_a = scrapers_1.return)) _a.call(scrapers_1);
|
|
3243
3280
|
}
|
|
3244
3281
|
finally { if (e_1) throw e_1.error; }
|
|
3245
3282
|
return [7 /*endfinally*/];
|
|
3246
3283
|
case 9:
|
|
3247
3284
|
if (partialPieces === null) {
|
|
3248
|
-
throw new KnowledgeScrapeError(spaceTrim__default["default"](function (block) { return "\n Cannot scrape knowledge
|
|
3285
|
+
throw new KnowledgeScrapeError(spaceTrim__default["default"](function (block) { return "\n Cannot scrape knowledge\n \n The source:\n > ".concat(block(knowledgeSource.sourceContent
|
|
3286
|
+
.split('\n')
|
|
3287
|
+
.map(function (line) { return "> ".concat(line); })
|
|
3288
|
+
.join('\n')), "\n\n No scraper found for the mime type \"").concat(sourceHandler.mimeType, "\"\n\n ").concat(block($registeredScrapersMessage(scrapers)), "\n\n\n "); }));
|
|
3249
3289
|
}
|
|
3250
3290
|
pieces = partialPieces.map(function (partialPiece) { return (__assign(__assign({}, partialPiece), { sources: [
|
|
3251
3291
|
{
|