@promptbook/pdf 0.75.3 → 0.75.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -20,7 +20,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
20
20
  *
21
21
  * @see https://github.com/webgptorg/promptbook
22
22
  */
23
- var PROMPTBOOK_ENGINE_VERSION = '0.75.2';
23
+ var PROMPTBOOK_ENGINE_VERSION = '0.75.3';
24
24
  /**
25
25
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
26
26
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -2860,8 +2860,8 @@ var $scrapersRegister = new $Register('scraper_constructors');
2860
2860
  *
2861
2861
  * @private internal function of `createScrapersFromConfiguration` and `createScrapersFromEnv`
2862
2862
  */
2863
- function $registeredScrapersMessage() {
2864
- var e_1, _a, e_2, _b;
2863
+ function $registeredScrapersMessage(availableScrapers) {
2864
+ var e_1, _a, e_2, _b, e_3, _c;
2865
2865
  /**
2866
2866
  * Mixes registered scrapers from $scrapersMetadataRegister and $scrapersRegister
2867
2867
  */
@@ -2873,15 +2873,15 @@ function $registeredScrapersMessage() {
2873
2873
  all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
2874
2874
  };
2875
2875
  try {
2876
- for (var _c = __values($scrapersMetadataRegister.list()), _d = _c.next(); !_d.done; _d = _c.next()) {
2877
- var _e = _d.value, packageName = _e.packageName, className = _e.className, mimeTypes = _e.mimeTypes, documentationUrl = _e.documentationUrl, isAvilableInBrowser = _e.isAvilableInBrowser;
2876
+ for (var _d = __values($scrapersMetadataRegister.list()), _e = _d.next(); !_e.done; _e = _d.next()) {
2877
+ var _f = _e.value, packageName = _f.packageName, className = _f.className, mimeTypes = _f.mimeTypes, documentationUrl = _f.documentationUrl, isAvilableInBrowser = _f.isAvilableInBrowser;
2878
2878
  _loop_1(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
2879
2879
  }
2880
2880
  }
2881
2881
  catch (e_1_1) { e_1 = { error: e_1_1 }; }
2882
2882
  finally {
2883
2883
  try {
2884
- if (_d && !_d.done && (_a = _c.return)) _a.call(_c);
2884
+ if (_e && !_e.done && (_a = _d.return)) _a.call(_d);
2885
2885
  }
2886
2886
  finally { if (e_1) throw e_1.error; }
2887
2887
  }
@@ -2892,18 +2892,31 @@ function $registeredScrapersMessage() {
2892
2892
  all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
2893
2893
  };
2894
2894
  try {
2895
- for (var _f = __values($scrapersRegister.list()), _g = _f.next(); !_g.done; _g = _f.next()) {
2896
- var _h = _g.value, packageName = _h.packageName, className = _h.className, mimeTypes = _h.mimeTypes, documentationUrl = _h.documentationUrl, isAvilableInBrowser = _h.isAvilableInBrowser;
2895
+ for (var _g = __values($scrapersRegister.list()), _h = _g.next(); !_h.done; _h = _g.next()) {
2896
+ var _j = _h.value, packageName = _j.packageName, className = _j.className, mimeTypes = _j.mimeTypes, documentationUrl = _j.documentationUrl, isAvilableInBrowser = _j.isAvilableInBrowser;
2897
2897
  _loop_2(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
2898
2898
  }
2899
2899
  }
2900
2900
  catch (e_2_1) { e_2 = { error: e_2_1 }; }
2901
2901
  finally {
2902
2902
  try {
2903
- if (_g && !_g.done && (_b = _f.return)) _b.call(_f);
2903
+ if (_h && !_h.done && (_b = _g.return)) _b.call(_g);
2904
2904
  }
2905
2905
  finally { if (e_2) throw e_2.error; }
2906
2906
  }
2907
+ try {
2908
+ for (var availableScrapers_1 = __values(availableScrapers), availableScrapers_1_1 = availableScrapers_1.next(); !availableScrapers_1_1.done; availableScrapers_1_1 = availableScrapers_1.next()) {
2909
+ var metadata_1 = availableScrapers_1_1.value.metadata;
2910
+ all.push(metadata_1);
2911
+ }
2912
+ }
2913
+ catch (e_3_1) { e_3 = { error: e_3_1 }; }
2914
+ finally {
2915
+ try {
2916
+ if (availableScrapers_1_1 && !availableScrapers_1_1.done && (_c = availableScrapers_1.return)) _c.call(availableScrapers_1);
2917
+ }
2918
+ finally { if (e_3) throw e_3.error; }
2919
+ }
2907
2920
  var metadata = all.map(function (metadata) {
2908
2921
  var isMetadataAviailable = $scrapersMetadataRegister
2909
2922
  .list()
@@ -2917,42 +2930,44 @@ function $registeredScrapersMessage() {
2917
2930
  var packageName = _a.packageName, className = _a.className;
2918
2931
  return metadata.packageName === packageName && metadata.className === className;
2919
2932
  });
2920
- return __assign(__assign({}, metadata), { isMetadataAviailable: isMetadataAviailable, isInstalled: isInstalled });
2933
+ var isAvilableInTools = availableScrapers.some(function (_a) {
2934
+ var _b = _a.metadata, packageName = _b.packageName, className = _b.className;
2935
+ return metadata.packageName === packageName && metadata.className === className;
2936
+ });
2937
+ return __assign(__assign({}, metadata), { isMetadataAviailable: isMetadataAviailable, isInstalled: isInstalled, isAvilableInTools: isAvilableInTools });
2921
2938
  });
2922
2939
  if (metadata.length === 0) {
2923
- return "No scrapers are available";
2940
+ return spaceTrim$1("\n **No scrapers are available**\n\n This is a unexpected behavior, you are probably using some broken version of Promptbook\n At least there should be available the metadata of the scrapers\n ");
2924
2941
  }
2925
2942
  return spaceTrim$1(function (block) { return "\n Available scrapers are:\n ".concat(block(metadata
2926
2943
  .map(function (_a, i) {
2927
- var packageName = _a.packageName, className = _a.className, isMetadataAviailable = _a.isMetadataAviailable, isInstalled = _a.isInstalled, mimeTypes = _a.mimeTypes, isAvilableInBrowser = _a.isAvilableInBrowser;
2928
- var more;
2929
- // TODO: Use documentationUrl
2930
- if (just(false)) {
2931
- more = '';
2932
- }
2933
- else if (!isMetadataAviailable && !isInstalled) {
2934
- // TODO: [�][�] Maybe do allow to do auto-install if package not registered and not found
2935
- more = "*(not installed and no metadata, looks like a unexpected behavior)*";
2936
- }
2937
- else if (isMetadataAviailable && !isInstalled) {
2938
- // TODO: [�][�]
2939
- more = "*(not installed)*";
2940
- }
2941
- else if (!isMetadataAviailable && isInstalled) {
2942
- more = "*(no metadata, looks like a unexpected behavior)*";
2943
- }
2944
- else if (isMetadataAviailable && isInstalled) {
2945
- more = "(installed)";
2946
- }
2947
- else {
2948
- more = "*(unknown state, looks like a unexpected behavior)*";
2949
- }
2944
+ var packageName = _a.packageName, className = _a.className, isMetadataAviailable = _a.isMetadataAviailable, isInstalled = _a.isInstalled, mimeTypes = _a.mimeTypes, isAvilableInBrowser = _a.isAvilableInBrowser, isAvilableInTools = _a.isAvilableInTools;
2945
+ var more = [];
2946
+ // TODO: [🧠] Maybe use `documentationUrl`
2947
+ if (isMetadataAviailable) {
2948
+ more.push("\u2B1C Metadata registered");
2949
+ } // not else
2950
+ if (isInstalled) {
2951
+ more.push("\uD83D\uDFE9 Installed");
2952
+ } // not else
2953
+ if (isAvilableInTools) {
2954
+ more.push("\uD83D\uDFE6 Available in tools");
2955
+ } // not else
2956
+ if (!isMetadataAviailable && isInstalled) {
2957
+ more.push("When no metadata registered but scraper is installed, it is an unexpected behavior");
2958
+ } // not else
2959
+ if (!isInstalled && isAvilableInTools) {
2960
+ more.push("When the scraper is not installed but available in tools, it is an unexpected compatibility behavior");
2961
+ } // not else
2950
2962
  if (!isAvilableInBrowser) {
2951
- more += " *(not available in browser)*";
2963
+ more.push("Not usable in browser");
2952
2964
  }
2953
- return "".concat(i + 1, ") `").concat(className, "` from `").concat(packageName, "` compatible to scrape ").concat(mimeTypes.join(', '), " ").concat(more);
2965
+ var moreText = more.length === 0 ? '' : " *(".concat(more.join('; '), ")*");
2966
+ return "".concat(i + 1, ") `").concat(className, "` from `").concat(packageName, "` compatible to scrape ").concat(mimeTypes
2967
+ .map(function (mimeType) { return "\"".concat(mimeType, "\""); })
2968
+ .join(', ')).concat(moreText);
2954
2969
  })
2955
- .join('\n')), "\n "); });
2970
+ .join('\n')), "\n\n Legend:\n - \u2B1C **Metadata registered** means that Promptbook knows about the scraper, it is similar to registration in some registry\n - \uD83D\uDFE9 **Installed** means that you have imported package with particular scraper\n - \uD83D\uDFE6 **Available in tools** means that you have passed scraper as dependency into prepare or execution process\n\n "); });
2956
2971
  }
2957
2972
  /**
2958
2973
  * TODO: [®] DRY Register logic
@@ -3200,57 +3215,75 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
3200
3215
  _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
3201
3216
  knowledgePreparedUnflatten = new Array(knowledgeSources.length);
3202
3217
  return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
3203
- var partialPieces, sourceHandler, _a, _b, scraper, partialPiecesUnchecked, e_1_1, pieces;
3204
- var e_1, _c;
3205
- return __generator(this, function (_d) {
3206
- switch (_d.label) {
3218
+ var partialPieces, sourceHandler, scrapers, _loop_1, scrapers_1, scrapers_1_1, scraper, state_1, e_1_1, pieces;
3219
+ var e_1, _a;
3220
+ return __generator(this, function (_b) {
3221
+ switch (_b.label) {
3207
3222
  case 0:
3208
3223
  partialPieces = null;
3209
3224
  return [4 /*yield*/, makeKnowledgeSourceHandler(knowledgeSource, tools, { rootDirname: rootDirname, isVerbose: isVerbose })];
3210
3225
  case 1:
3211
- sourceHandler = _d.sent();
3212
- _d.label = 2;
3226
+ sourceHandler = _b.sent();
3227
+ scrapers = arrayableToArray(tools.scrapers);
3228
+ _loop_1 = function (scraper) {
3229
+ var partialPiecesUnchecked;
3230
+ return __generator(this, function (_c) {
3231
+ switch (_c.label) {
3232
+ case 0:
3233
+ if (!scraper.metadata.mimeTypes.includes(sourceHandler.mimeType)
3234
+ // <- TODO: [🦔] Implement mime-type wildcards
3235
+ ) {
3236
+ return [2 /*return*/, "continue"];
3237
+ }
3238
+ return [4 /*yield*/, scraper.scrape(sourceHandler)];
3239
+ case 1:
3240
+ partialPiecesUnchecked = _c.sent();
3241
+ if (partialPiecesUnchecked !== null) {
3242
+ partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
3243
+ return [2 /*return*/, "break"];
3244
+ }
3245
+ console.warn(spaceTrim$1(function (block) { return "\n Cannot scrape knowledge from source despite the scraper `".concat(scraper.metadata.className, "` supports the mime type \"").concat(sourceHandler.mimeType, "\".\n \n The source:\n > ").concat(block(knowledgeSource.sourceContent
3246
+ .split('\n')
3247
+ .map(function (line) { return "> ".concat(line); })
3248
+ .join('\n')), "\n\n ").concat(block($registeredScrapersMessage(scrapers)), "\n\n\n "); }));
3249
+ return [2 /*return*/];
3250
+ }
3251
+ });
3252
+ };
3253
+ _b.label = 2;
3213
3254
  case 2:
3214
- _d.trys.push([2, 7, 8, 9]);
3215
- _a = __values(arrayableToArray(tools.scrapers)), _b = _a.next();
3216
- _d.label = 3;
3255
+ _b.trys.push([2, 7, 8, 9]);
3256
+ scrapers_1 = __values(scrapers), scrapers_1_1 = scrapers_1.next();
3257
+ _b.label = 3;
3217
3258
  case 3:
3218
- if (!!_b.done) return [3 /*break*/, 6];
3219
- scraper = _b.value;
3220
- if (!scraper.metadata.mimeTypes.includes(sourceHandler.mimeType)
3221
- // <- TODO: [🦔] Implement mime-type wildcards
3222
- ) {
3223
- return [3 /*break*/, 5];
3224
- }
3225
- return [4 /*yield*/, scraper.scrape(sourceHandler)];
3259
+ if (!!scrapers_1_1.done) return [3 /*break*/, 6];
3260
+ scraper = scrapers_1_1.value;
3261
+ return [5 /*yield**/, _loop_1(scraper)];
3226
3262
  case 4:
3227
- partialPiecesUnchecked = _d.sent();
3228
- if (partialPiecesUnchecked !== null) {
3229
- partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
3230
- // <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
3263
+ state_1 = _b.sent();
3264
+ if (state_1 === "break")
3231
3265
  return [3 /*break*/, 6];
3232
- }
3233
- _d.label = 5;
3266
+ _b.label = 5;
3234
3267
  case 5:
3235
- _b = _a.next();
3268
+ scrapers_1_1 = scrapers_1.next();
3236
3269
  return [3 /*break*/, 3];
3237
3270
  case 6: return [3 /*break*/, 9];
3238
3271
  case 7:
3239
- e_1_1 = _d.sent();
3272
+ e_1_1 = _b.sent();
3240
3273
  e_1 = { error: e_1_1 };
3241
3274
  return [3 /*break*/, 9];
3242
3275
  case 8:
3243
3276
  try {
3244
- if (_b && !_b.done && (_c = _a.return)) _c.call(_a);
3277
+ if (scrapers_1_1 && !scrapers_1_1.done && (_a = scrapers_1.return)) _a.call(scrapers_1);
3245
3278
  }
3246
3279
  finally { if (e_1) throw e_1.error; }
3247
3280
  return [7 /*endfinally*/];
3248
3281
  case 9:
3249
3282
  if (partialPieces === null) {
3250
- throw new KnowledgeScrapeError(spaceTrim$1(function (block) { return "\n Cannot scrape knowledge from source:\n \n > ".concat(block(knowledgeSource.sourceContent
3283
+ throw new KnowledgeScrapeError(spaceTrim$1(function (block) { return "\n Cannot scrape knowledge\n \n The source:\n > ".concat(block(knowledgeSource.sourceContent
3251
3284
  .split('\n')
3252
3285
  .map(function (line) { return "> ".concat(line); })
3253
- .join('\n')), "\n\n No scraper found for the mime type \"").concat(sourceHandler.mimeType, "\"\n\n ").concat(block($registeredScrapersMessage()), "\n\n\n "); }));
3286
+ .join('\n')), "\n\n No scraper found for the mime type \"").concat(sourceHandler.mimeType, "\"\n\n ").concat(block($registeredScrapersMessage(scrapers)), "\n\n\n "); }));
3254
3287
  }
3255
3288
  pieces = partialPieces.map(function (partialPiece) { return (__assign(__assign({}, partialPiece), { sources: [
3256
3289
  {