@promptbook/node 0.84.0-12 → 0.84.0-14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -9,7 +9,7 @@ import { unparse, parse } from 'papaparse';
9
9
  import hexEncoder from 'crypto-js/enc-hex';
10
10
  import sha256 from 'crypto-js/sha256';
11
11
  import { SHA256 } from 'crypto-js';
12
- import { lookup } from 'mime-types';
12
+ import { lookup, extension } from 'mime-types';
13
13
  import { spawn } from 'child_process';
14
14
  import * as dotenv from 'dotenv';
15
15
 
@@ -27,7 +27,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
27
27
  * @generated
28
28
  * @see https://github.com/webgptorg/promptbook
29
29
  */
30
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
30
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-13';
31
31
  /**
32
32
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
33
33
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -204,6 +204,12 @@ var DEFAULT_BOOK_TITLE = "\u2728 Untitled Book";
204
204
  * @public exported from `@promptbook/core`
205
205
  */
206
206
  var DEFAULT_TASK_TITLE = "Task";
207
+ /**
208
+ * When the pipeline is flat and no name of return parameter is provided, this name is used
209
+ *
210
+ * @public exported from `@promptbook/core`
211
+ */
212
+ var DEFAULT_BOOK_OUTPUT_PARAMETER_NAME = 'result';
207
213
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
208
214
  /**
209
215
  * The maximum number of iterations for a loops
@@ -5322,22 +5328,6 @@ function $registeredScrapersMessage(availableScrapers) {
5322
5328
  * TODO: [®] DRY Register logic
5323
5329
  */
5324
5330
 
5325
- /**
5326
- * Removes emojis from a string and fix whitespaces
5327
- *
5328
- * @param text with emojis
5329
- * @returns text without emojis
5330
- * @public exported from `@promptbook/utils`
5331
- */
5332
- function removeEmojis(text) {
5333
- // Replace emojis (and also ZWJ sequence) with hyphens
5334
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5335
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5336
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5337
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
5338
- return text;
5339
- }
5340
-
5341
5331
  /**
5342
5332
  * @@@
5343
5333
  *
@@ -5400,30 +5390,6 @@ function normalizeToKebabCase(text) {
5400
5390
  * Note: [💞] Ignore a discrepancy between file name and entity name
5401
5391
  */
5402
5392
 
5403
- /**
5404
- * @@@
5405
- *
5406
- * @param value @@@
5407
- * @returns @@@
5408
- * @example @@@
5409
- * @public exported from `@promptbook/utils`
5410
- */
5411
- function titleToName(value) {
5412
- if (isValidUrl(value)) {
5413
- value = value.replace(/^https?:\/\//, '');
5414
- value = value.replace(/\.html$/, '');
5415
- }
5416
- else if (isValidFilePath(value)) {
5417
- value = basename(value);
5418
- // Note: Keeping extension in the name
5419
- }
5420
- value = value.split('/').join('-');
5421
- value = removeEmojis(value);
5422
- value = normalizeToKebabCase(value);
5423
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
5424
- return value;
5425
- }
5426
-
5427
5393
  /**
5428
5394
  * Creates unique name for the source
5429
5395
  *
@@ -5509,6 +5475,57 @@ function isFileExisting(filename, fs) {
5509
5475
  * TODO: [🖇] What about symlinks?
5510
5476
  */
5511
5477
 
5478
+ /**
5479
+ * Convert mime type to file extension
5480
+ *
5481
+ * Note: If the mime type is invalid, `null` is returned
5482
+ *
5483
+ * @private within the repository
5484
+ */
5485
+ function mimeTypeToExtension(value) {
5486
+ return extension(value) || null;
5487
+ }
5488
+
5489
+ /**
5490
+ * Removes emojis from a string and fix whitespaces
5491
+ *
5492
+ * @param text with emojis
5493
+ * @returns text without emojis
5494
+ * @public exported from `@promptbook/utils`
5495
+ */
5496
+ function removeEmojis(text) {
5497
+ // Replace emojis (and also ZWJ sequence) with hyphens
5498
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5499
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5500
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5501
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
5502
+ return text;
5503
+ }
5504
+
5505
+ /**
5506
+ * @@@
5507
+ *
5508
+ * @param value @@@
5509
+ * @returns @@@
5510
+ * @example @@@
5511
+ * @public exported from `@promptbook/utils`
5512
+ */
5513
+ function titleToName(value) {
5514
+ if (isValidUrl(value)) {
5515
+ value = value.replace(/^https?:\/\//, '');
5516
+ value = value.replace(/\.html$/, '');
5517
+ }
5518
+ else if (isValidFilePath(value)) {
5519
+ value = basename(value);
5520
+ // Note: Keeping extension in the name
5521
+ }
5522
+ value = value.split('/').join('-');
5523
+ value = removeEmojis(value);
5524
+ value = normalizeToKebabCase(value);
5525
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
5526
+ return value;
5527
+ }
5528
+
5512
5529
  /**
5513
5530
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
5514
5531
  *
@@ -5544,7 +5561,7 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
5544
5561
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5545
5562
  var _a;
5546
5563
  return __awaiter(this, void 0, void 0, function () {
5547
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
5564
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
5548
5565
  return __generator(this, function (_l) {
5549
5566
  switch (_l.label) {
5550
5567
  case 0:
@@ -5560,25 +5577,67 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5560
5577
  url = knowledgeSourceContent;
5561
5578
  return [4 /*yield*/, fetch(url)];
5562
5579
  case 1:
5563
- response = _l.sent();
5564
- mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5565
- filename = url.split('/').pop() || titleToName(url);
5580
+ response_1 = _l.sent();
5581
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5582
+ if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
5583
+ return [2 /*return*/, {
5584
+ source: name,
5585
+ filename: null,
5586
+ url: url,
5587
+ mimeType: mimeType,
5588
+ /*
5589
+ TODO: [🥽]
5590
+ > async asBlob() {
5591
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
5592
+ > const content = await response.blob();
5593
+ > return content;
5594
+ > },
5595
+ */
5596
+ asJson: function () {
5597
+ return __awaiter(this, void 0, void 0, function () {
5598
+ var content;
5599
+ return __generator(this, function (_a) {
5600
+ switch (_a.label) {
5601
+ case 0: return [4 /*yield*/, response_1.json()];
5602
+ case 1:
5603
+ content = _a.sent();
5604
+ return [2 /*return*/, content];
5605
+ }
5606
+ });
5607
+ });
5608
+ },
5609
+ asText: function () {
5610
+ return __awaiter(this, void 0, void 0, function () {
5611
+ var content;
5612
+ return __generator(this, function (_a) {
5613
+ switch (_a.label) {
5614
+ case 0: return [4 /*yield*/, response_1.text()];
5615
+ case 1:
5616
+ content = _a.sent();
5617
+ return [2 /*return*/, content];
5618
+ }
5619
+ });
5620
+ });
5621
+ },
5622
+ }];
5623
+ }
5624
+ basename = url.split('/').pop() || titleToName(url);
5566
5625
  hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
5567
5626
  rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
5568
- filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
5627
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
5569
5628
  return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
5570
5629
  case 2:
5571
5630
  _l.sent();
5572
5631
  _g = (_f = tools.fs).writeFile;
5573
5632
  _h = [join(rootDirname_1, filepath)];
5574
5633
  _k = (_j = Buffer).from;
5575
- return [4 /*yield*/, response.arrayBuffer()];
5634
+ return [4 /*yield*/, response_1.arrayBuffer()];
5576
5635
  case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
5577
5636
  case 4:
5578
5637
  _l.sent();
5579
- // TODO: !!!!!!!! Check the file security
5638
+ // TODO: [💵] Check the file security
5580
5639
  // TODO: !!!!!!!! Check the file size (if it is not too big)
5581
- // TODO: !!!!!!!! Delete the file
5640
+ // TODO: !!!!!!!! Delete the file after the scraping is done
5582
5641
  return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
5583
5642
  case 5:
5584
5643
  if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
@@ -8699,12 +8758,15 @@ function isFlatPipeline(pipelineString) {
8699
8758
  pipelineString = removeMarkdownComments(pipelineString);
8700
8759
  pipelineString = spaceTrim(pipelineString);
8701
8760
  var isMarkdownBeginningWithHeadline = pipelineString.startsWith('# ');
8702
- var isLastLineReturnStatement = pipelineString.split('\n').pop().split('`').join('').startsWith('->');
8703
- // TODO: Also (double)check
8761
+ //const isLastLineReturnStatement = pipelineString.split('\n').pop()!.split('`').join('').startsWith('->');
8762
+ var isBacktickBlockUsed = pipelineString.includes('```');
8763
+ var isQuoteBlocksUsed = /^>\s+/m.test(pipelineString);
8764
+ var isBlocksUsed = isBacktickBlockUsed || isQuoteBlocksUsed;
8765
+ // TODO: [🧉] Also (double)check
8704
8766
  // > const usedCommands
8705
8767
  // > const isBlocksUsed
8706
8768
  // > const returnStatementCount
8707
- var isFlat = !isMarkdownBeginningWithHeadline && isLastLineReturnStatement;
8769
+ var isFlat = !isMarkdownBeginningWithHeadline && !isBlocksUsed; /* && isLastLineReturnStatement */
8708
8770
  return isFlat;
8709
8771
  }
8710
8772
 
@@ -8718,9 +8780,26 @@ function deflatePipeline(pipelineString) {
8718
8780
  return pipelineString;
8719
8781
  }
8720
8782
  var pipelineStringLines = pipelineString.split('\n');
8721
- var returnStatement = pipelineStringLines.pop();
8783
+ var potentialReturnStatement = pipelineStringLines.pop();
8784
+ var returnStatement;
8785
+ if (/(-|=)>\s*\{.*\}/.test(potentialReturnStatement)) {
8786
+ // Note: Last line is return statement
8787
+ returnStatement = potentialReturnStatement;
8788
+ }
8789
+ else {
8790
+ // Note: Last line is not a return statement
8791
+ returnStatement = "-> {".concat(DEFAULT_BOOK_OUTPUT_PARAMETER_NAME, "}");
8792
+ pipelineStringLines.push(potentialReturnStatement);
8793
+ }
8722
8794
  var prompt = spaceTrim(pipelineStringLines.join('\n'));
8723
- pipelineString = validatePipelineString(spaceTrim(function (block) { return "\n # ".concat(DEFAULT_BOOK_TITLE, "\n\n ## Prompt\n\n ```\n ").concat(block(prompt), "\n ```\n\n ").concat(returnStatement, "\n "); }));
8795
+ var quotedPrompt;
8796
+ if (prompt.split('\n').length <= 1) {
8797
+ quotedPrompt = "> ".concat(prompt);
8798
+ }
8799
+ else {
8800
+ quotedPrompt = spaceTrim(function (block) { return "\n ```\n ".concat(block(prompt.split('`').join('\\`')), "\n ```\n "); });
8801
+ }
8802
+ pipelineString = validatePipelineString(spaceTrim(function (block) { return "\n # ".concat(DEFAULT_BOOK_TITLE, "\n\n ## Prompt\n\n ").concat(block(quotedPrompt), "\n\n ").concat(returnStatement, "\n "); }));
8724
8803
  // <- TODO: Maybe use book` notation
8725
8804
  return pipelineString;
8726
8805
  }