@promptbook/markdown-utils 0.75.3 → 0.75.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -20,7 +20,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
20
20
  *
21
21
  * @see https://github.com/webgptorg/promptbook
22
22
  */
23
- var PROMPTBOOK_ENGINE_VERSION = '0.75.2';
23
+ var PROMPTBOOK_ENGINE_VERSION = '0.75.3';
24
24
  /**
25
25
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
26
26
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -3042,8 +3042,8 @@ var $scrapersRegister = new $Register('scraper_constructors');
3042
3042
  *
3043
3043
  * @private internal function of `createScrapersFromConfiguration` and `createScrapersFromEnv`
3044
3044
  */
3045
- function $registeredScrapersMessage() {
3046
- var e_1, _a, e_2, _b;
3045
+ function $registeredScrapersMessage(availableScrapers) {
3046
+ var e_1, _a, e_2, _b, e_3, _c;
3047
3047
  /**
3048
3048
  * Mixes registered scrapers from $scrapersMetadataRegister and $scrapersRegister
3049
3049
  */
@@ -3055,15 +3055,15 @@ function $registeredScrapersMessage() {
3055
3055
  all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
3056
3056
  };
3057
3057
  try {
3058
- for (var _c = __values($scrapersMetadataRegister.list()), _d = _c.next(); !_d.done; _d = _c.next()) {
3059
- var _e = _d.value, packageName = _e.packageName, className = _e.className, mimeTypes = _e.mimeTypes, documentationUrl = _e.documentationUrl, isAvilableInBrowser = _e.isAvilableInBrowser;
3058
+ for (var _d = __values($scrapersMetadataRegister.list()), _e = _d.next(); !_e.done; _e = _d.next()) {
3059
+ var _f = _e.value, packageName = _f.packageName, className = _f.className, mimeTypes = _f.mimeTypes, documentationUrl = _f.documentationUrl, isAvilableInBrowser = _f.isAvilableInBrowser;
3060
3060
  _loop_1(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
3061
3061
  }
3062
3062
  }
3063
3063
  catch (e_1_1) { e_1 = { error: e_1_1 }; }
3064
3064
  finally {
3065
3065
  try {
3066
- if (_d && !_d.done && (_a = _c.return)) _a.call(_c);
3066
+ if (_e && !_e.done && (_a = _d.return)) _a.call(_d);
3067
3067
  }
3068
3068
  finally { if (e_1) throw e_1.error; }
3069
3069
  }
@@ -3074,18 +3074,31 @@ function $registeredScrapersMessage() {
3074
3074
  all.push({ packageName: packageName, className: className, mimeTypes: mimeTypes, documentationUrl: documentationUrl, isAvilableInBrowser: isAvilableInBrowser });
3075
3075
  };
3076
3076
  try {
3077
- for (var _f = __values($scrapersRegister.list()), _g = _f.next(); !_g.done; _g = _f.next()) {
3078
- var _h = _g.value, packageName = _h.packageName, className = _h.className, mimeTypes = _h.mimeTypes, documentationUrl = _h.documentationUrl, isAvilableInBrowser = _h.isAvilableInBrowser;
3077
+ for (var _g = __values($scrapersRegister.list()), _h = _g.next(); !_h.done; _h = _g.next()) {
3078
+ var _j = _h.value, packageName = _j.packageName, className = _j.className, mimeTypes = _j.mimeTypes, documentationUrl = _j.documentationUrl, isAvilableInBrowser = _j.isAvilableInBrowser;
3079
3079
  _loop_2(packageName, className, mimeTypes, documentationUrl, isAvilableInBrowser);
3080
3080
  }
3081
3081
  }
3082
3082
  catch (e_2_1) { e_2 = { error: e_2_1 }; }
3083
3083
  finally {
3084
3084
  try {
3085
- if (_g && !_g.done && (_b = _f.return)) _b.call(_f);
3085
+ if (_h && !_h.done && (_b = _g.return)) _b.call(_g);
3086
3086
  }
3087
3087
  finally { if (e_2) throw e_2.error; }
3088
3088
  }
3089
+ try {
3090
+ for (var availableScrapers_1 = __values(availableScrapers), availableScrapers_1_1 = availableScrapers_1.next(); !availableScrapers_1_1.done; availableScrapers_1_1 = availableScrapers_1.next()) {
3091
+ var metadata_1 = availableScrapers_1_1.value.metadata;
3092
+ all.push(metadata_1);
3093
+ }
3094
+ }
3095
+ catch (e_3_1) { e_3 = { error: e_3_1 }; }
3096
+ finally {
3097
+ try {
3098
+ if (availableScrapers_1_1 && !availableScrapers_1_1.done && (_c = availableScrapers_1.return)) _c.call(availableScrapers_1);
3099
+ }
3100
+ finally { if (e_3) throw e_3.error; }
3101
+ }
3089
3102
  var metadata = all.map(function (metadata) {
3090
3103
  var isMetadataAviailable = $scrapersMetadataRegister
3091
3104
  .list()
@@ -3099,42 +3112,44 @@ function $registeredScrapersMessage() {
3099
3112
  var packageName = _a.packageName, className = _a.className;
3100
3113
  return metadata.packageName === packageName && metadata.className === className;
3101
3114
  });
3102
- return __assign(__assign({}, metadata), { isMetadataAviailable: isMetadataAviailable, isInstalled: isInstalled });
3115
+ var isAvilableInTools = availableScrapers.some(function (_a) {
3116
+ var _b = _a.metadata, packageName = _b.packageName, className = _b.className;
3117
+ return metadata.packageName === packageName && metadata.className === className;
3118
+ });
3119
+ return __assign(__assign({}, metadata), { isMetadataAviailable: isMetadataAviailable, isInstalled: isInstalled, isAvilableInTools: isAvilableInTools });
3103
3120
  });
3104
3121
  if (metadata.length === 0) {
3105
- return "No scrapers are available";
3122
+ return spaceTrim("\n **No scrapers are available**\n\n This is a unexpected behavior, you are probably using some broken version of Promptbook\n At least there should be available the metadata of the scrapers\n ");
3106
3123
  }
3107
3124
  return spaceTrim(function (block) { return "\n Available scrapers are:\n ".concat(block(metadata
3108
3125
  .map(function (_a, i) {
3109
- var packageName = _a.packageName, className = _a.className, isMetadataAviailable = _a.isMetadataAviailable, isInstalled = _a.isInstalled, mimeTypes = _a.mimeTypes, isAvilableInBrowser = _a.isAvilableInBrowser;
3110
- var more;
3111
- // TODO: Use documentationUrl
3112
- if (just(false)) {
3113
- more = '';
3114
- }
3115
- else if (!isMetadataAviailable && !isInstalled) {
3116
- // TODO: [�][�] Maybe do allow to do auto-install if package not registered and not found
3117
- more = "*(not installed and no metadata, looks like a unexpected behavior)*";
3118
- }
3119
- else if (isMetadataAviailable && !isInstalled) {
3120
- // TODO: [�][�]
3121
- more = "*(not installed)*";
3122
- }
3123
- else if (!isMetadataAviailable && isInstalled) {
3124
- more = "*(no metadata, looks like a unexpected behavior)*";
3125
- }
3126
- else if (isMetadataAviailable && isInstalled) {
3127
- more = "(installed)";
3128
- }
3129
- else {
3130
- more = "*(unknown state, looks like a unexpected behavior)*";
3131
- }
3126
+ var packageName = _a.packageName, className = _a.className, isMetadataAviailable = _a.isMetadataAviailable, isInstalled = _a.isInstalled, mimeTypes = _a.mimeTypes, isAvilableInBrowser = _a.isAvilableInBrowser, isAvilableInTools = _a.isAvilableInTools;
3127
+ var more = [];
3128
+ // TODO: [🧠] Maybe use `documentationUrl`
3129
+ if (isMetadataAviailable) {
3130
+ more.push("\u2B1C Metadata registered");
3131
+ } // not else
3132
+ if (isInstalled) {
3133
+ more.push("\uD83D\uDFE9 Installed");
3134
+ } // not else
3135
+ if (isAvilableInTools) {
3136
+ more.push("\uD83D\uDFE6 Available in tools");
3137
+ } // not else
3138
+ if (!isMetadataAviailable && isInstalled) {
3139
+ more.push("When no metadata registered but scraper is installed, it is an unexpected behavior");
3140
+ } // not else
3141
+ if (!isInstalled && isAvilableInTools) {
3142
+ more.push("When the scraper is not installed but available in tools, it is an unexpected compatibility behavior");
3143
+ } // not else
3132
3144
  if (!isAvilableInBrowser) {
3133
- more += " *(not available in browser)*";
3145
+ more.push("Not usable in browser");
3134
3146
  }
3135
- return "".concat(i + 1, ") `").concat(className, "` from `").concat(packageName, "` compatible to scrape ").concat(mimeTypes.join(', '), " ").concat(more);
3147
+ var moreText = more.length === 0 ? '' : " *(".concat(more.join('; '), ")*");
3148
+ return "".concat(i + 1, ") `").concat(className, "` from `").concat(packageName, "` compatible to scrape ").concat(mimeTypes
3149
+ .map(function (mimeType) { return "\"".concat(mimeType, "\""); })
3150
+ .join(', ')).concat(moreText);
3136
3151
  })
3137
- .join('\n')), "\n "); });
3152
+ .join('\n')), "\n\n Legend:\n - \u2B1C **Metadata registered** means that Promptbook knows about the scraper, it is similar to registration in some registry\n - \uD83D\uDFE9 **Installed** means that you have imported package with particular scraper\n - \uD83D\uDFE6 **Available in tools** means that you have passed scraper as dependency into prepare or execution process\n\n "); });
3138
3153
  }
3139
3154
  /**
3140
3155
  * TODO: [®] DRY Register logic
@@ -3382,57 +3397,75 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
3382
3397
  _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
3383
3398
  knowledgePreparedUnflatten = new Array(knowledgeSources.length);
3384
3399
  return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
3385
- var partialPieces, sourceHandler, _a, _b, scraper, partialPiecesUnchecked, e_1_1, pieces;
3386
- var e_1, _c;
3387
- return __generator(this, function (_d) {
3388
- switch (_d.label) {
3400
+ var partialPieces, sourceHandler, scrapers, _loop_1, scrapers_1, scrapers_1_1, scraper, state_1, e_1_1, pieces;
3401
+ var e_1, _a;
3402
+ return __generator(this, function (_b) {
3403
+ switch (_b.label) {
3389
3404
  case 0:
3390
3405
  partialPieces = null;
3391
3406
  return [4 /*yield*/, makeKnowledgeSourceHandler(knowledgeSource, tools, { rootDirname: rootDirname, isVerbose: isVerbose })];
3392
3407
  case 1:
3393
- sourceHandler = _d.sent();
3394
- _d.label = 2;
3408
+ sourceHandler = _b.sent();
3409
+ scrapers = arrayableToArray(tools.scrapers);
3410
+ _loop_1 = function (scraper) {
3411
+ var partialPiecesUnchecked;
3412
+ return __generator(this, function (_c) {
3413
+ switch (_c.label) {
3414
+ case 0:
3415
+ if (!scraper.metadata.mimeTypes.includes(sourceHandler.mimeType)
3416
+ // <- TODO: [🦔] Implement mime-type wildcards
3417
+ ) {
3418
+ return [2 /*return*/, "continue"];
3419
+ }
3420
+ return [4 /*yield*/, scraper.scrape(sourceHandler)];
3421
+ case 1:
3422
+ partialPiecesUnchecked = _c.sent();
3423
+ if (partialPiecesUnchecked !== null) {
3424
+ partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
3425
+ return [2 /*return*/, "break"];
3426
+ }
3427
+ console.warn(spaceTrim(function (block) { return "\n Cannot scrape knowledge from source despite the scraper `".concat(scraper.metadata.className, "` supports the mime type \"").concat(sourceHandler.mimeType, "\".\n \n The source:\n > ").concat(block(knowledgeSource.sourceContent
3428
+ .split('\n')
3429
+ .map(function (line) { return "> ".concat(line); })
3430
+ .join('\n')), "\n\n ").concat(block($registeredScrapersMessage(scrapers)), "\n\n\n "); }));
3431
+ return [2 /*return*/];
3432
+ }
3433
+ });
3434
+ };
3435
+ _b.label = 2;
3395
3436
  case 2:
3396
- _d.trys.push([2, 7, 8, 9]);
3397
- _a = __values(arrayableToArray(tools.scrapers)), _b = _a.next();
3398
- _d.label = 3;
3437
+ _b.trys.push([2, 7, 8, 9]);
3438
+ scrapers_1 = __values(scrapers), scrapers_1_1 = scrapers_1.next();
3439
+ _b.label = 3;
3399
3440
  case 3:
3400
- if (!!_b.done) return [3 /*break*/, 6];
3401
- scraper = _b.value;
3402
- if (!scraper.metadata.mimeTypes.includes(sourceHandler.mimeType)
3403
- // <- TODO: [🦔] Implement mime-type wildcards
3404
- ) {
3405
- return [3 /*break*/, 5];
3406
- }
3407
- return [4 /*yield*/, scraper.scrape(sourceHandler)];
3441
+ if (!!scrapers_1_1.done) return [3 /*break*/, 6];
3442
+ scraper = scrapers_1_1.value;
3443
+ return [5 /*yield**/, _loop_1(scraper)];
3408
3444
  case 4:
3409
- partialPiecesUnchecked = _d.sent();
3410
- if (partialPiecesUnchecked !== null) {
3411
- partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
3412
- // <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
3445
+ state_1 = _b.sent();
3446
+ if (state_1 === "break")
3413
3447
  return [3 /*break*/, 6];
3414
- }
3415
- _d.label = 5;
3448
+ _b.label = 5;
3416
3449
  case 5:
3417
- _b = _a.next();
3450
+ scrapers_1_1 = scrapers_1.next();
3418
3451
  return [3 /*break*/, 3];
3419
3452
  case 6: return [3 /*break*/, 9];
3420
3453
  case 7:
3421
- e_1_1 = _d.sent();
3454
+ e_1_1 = _b.sent();
3422
3455
  e_1 = { error: e_1_1 };
3423
3456
  return [3 /*break*/, 9];
3424
3457
  case 8:
3425
3458
  try {
3426
- if (_b && !_b.done && (_c = _a.return)) _c.call(_a);
3459
+ if (scrapers_1_1 && !scrapers_1_1.done && (_a = scrapers_1.return)) _a.call(scrapers_1);
3427
3460
  }
3428
3461
  finally { if (e_1) throw e_1.error; }
3429
3462
  return [7 /*endfinally*/];
3430
3463
  case 9:
3431
3464
  if (partialPieces === null) {
3432
- throw new KnowledgeScrapeError(spaceTrim(function (block) { return "\n Cannot scrape knowledge from source:\n \n > ".concat(block(knowledgeSource.sourceContent
3465
+ throw new KnowledgeScrapeError(spaceTrim(function (block) { return "\n Cannot scrape knowledge\n \n The source:\n > ".concat(block(knowledgeSource.sourceContent
3433
3466
  .split('\n')
3434
3467
  .map(function (line) { return "> ".concat(line); })
3435
- .join('\n')), "\n\n No scraper found for the mime type \"").concat(sourceHandler.mimeType, "\"\n\n ").concat(block($registeredScrapersMessage()), "\n\n\n "); }));
3468
+ .join('\n')), "\n\n No scraper found for the mime type \"").concat(sourceHandler.mimeType, "\"\n\n ").concat(block($registeredScrapersMessage(scrapers)), "\n\n\n "); }));
3436
3469
  }
3437
3470
  pieces = partialPieces.map(function (partialPiece) { return (__assign(__assign({}, partialPiece), { sources: [
3438
3471
  {