@promptbook/core 0.72.0-6 → 0.72.0-8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/README.md +6 -0
  2. package/esm/index.es.js +1514 -454
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/browser.index.d.ts +1 -1
  5. package/esm/typings/src/_packages/core.index.d.ts +22 -10
  6. package/esm/typings/src/_packages/node.index.d.ts +6 -2
  7. package/esm/typings/src/_packages/types.index.d.ts +28 -20
  8. package/esm/typings/src/cli/cli-commands/about.d.ts +1 -1
  9. package/esm/typings/src/cli/cli-commands/hello.d.ts +2 -1
  10. package/esm/typings/src/cli/cli-commands/make.d.ts +1 -1
  11. package/esm/typings/src/cli/cli-commands/prettify.d.ts +2 -1
  12. package/esm/typings/src/cli/cli-commands/test-command.d.ts +13 -0
  13. package/esm/typings/src/cli/main.d.ts +1 -1
  14. package/esm/typings/src/cli/promptbookCli.d.ts +1 -1
  15. package/esm/typings/src/collection/constructors/createCollectionFromDirectory.d.ts +8 -5
  16. package/esm/typings/src/commands/EXPECT/expectCommandParser.d.ts +1 -1
  17. package/esm/typings/src/commands/FOREACH/foreachCommandParser.d.ts +1 -1
  18. package/esm/typings/src/commands/FORMAT/formatCommandParser.d.ts +1 -1
  19. package/esm/typings/src/commands/JOKER/jokerCommandParser.d.ts +1 -1
  20. package/esm/typings/src/commands/KNOWLEDGE/knowledgeCommandParser.d.ts +1 -1
  21. package/esm/typings/src/commands/KNOWLEDGE/utils/sourceContentToName.d.ts +11 -0
  22. package/esm/typings/src/commands/KNOWLEDGE/utils/sourceContentToName.test.d.ts +4 -0
  23. package/esm/typings/src/commands/MODEL/modelCommandParser.d.ts +1 -1
  24. package/esm/typings/src/commands/PARAMETER/parameterCommandParser.d.ts +1 -1
  25. package/esm/typings/src/commands/PERSONA/personaCommandParser.d.ts +1 -1
  26. package/esm/typings/src/commands/POSTPROCESS/postprocessCommandParser.d.ts +1 -1
  27. package/esm/typings/src/commands/PROMPTBOOK_VERSION/promptbookVersionCommandParser.d.ts +1 -1
  28. package/esm/typings/src/commands/TEMPLATE/templateCommandParser.d.ts +1 -1
  29. package/esm/typings/src/commands/URL/urlCommandParser.d.ts +1 -1
  30. package/esm/typings/src/commands/X_ACTION/actionCommandParser.d.ts +1 -1
  31. package/esm/typings/src/commands/X_INSTRUMENT/instrumentCommandParser.d.ts +1 -1
  32. package/esm/typings/src/commands/_BOILERPLATE/boilerplateCommandParser.d.ts +1 -1
  33. package/esm/typings/src/config.d.ts +10 -0
  34. package/esm/typings/src/conversion/pipelineStringToJson.d.ts +2 -15
  35. package/esm/typings/src/conversion/validation/_importPipeline.d.ts +1 -1
  36. package/esm/typings/src/conversion/validation/validatePipeline.d.ts +5 -5
  37. package/esm/typings/src/{knowledge/dialogs → dialogs}/callback/CallbackInterfaceTools.d.ts +2 -2
  38. package/esm/typings/src/{knowledge/dialogs → dialogs}/callback/CallbackInterfaceToolsOptions.d.ts +2 -2
  39. package/esm/typings/src/{knowledge/dialogs → dialogs}/simple-prompt/SimplePromptInterfaceTools.d.ts +4 -4
  40. package/esm/typings/src/errors/KnowledgeScrapeError.d.ts +9 -0
  41. package/esm/typings/src/errors/MissingToolsError.d.ts +9 -0
  42. package/esm/typings/src/execution/ExecutionTools.d.ts +3 -3
  43. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +5 -2
  44. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +2 -13
  45. package/esm/typings/src/execution/createPipelineExecutor/00-createPipelineExecutor.d.ts +1 -1
  46. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +1 -1
  47. package/esm/typings/src/execution/translation/automatic-translate/translateMessages.d.ts +3 -0
  48. package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -0
  49. package/esm/typings/src/llm-providers/_common/createLlmToolsFromConfigurationFromEnv.d.ts +1 -1
  50. package/esm/typings/src/llm-providers/_common/createLlmToolsFromEnv.d.ts +1 -1
  51. package/esm/typings/src/llm-providers/_common/getLlmToolsForCli.d.ts +1 -1
  52. package/esm/typings/src/llm-providers/anthropic-claude/playground/playground.d.ts +1 -0
  53. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +6 -0
  54. package/esm/typings/src/llm-providers/azure-openai/playground/playground.d.ts +1 -0
  55. package/esm/typings/src/llm-providers/langtail/playground/playground.d.ts +3 -0
  56. package/esm/typings/src/llm-providers/multiple/playground/playground.d.ts +3 -0
  57. package/esm/typings/src/llm-providers/openai/playground/playground.d.ts +1 -0
  58. package/esm/typings/src/llm-providers/remote/playground/playground.d.ts +3 -0
  59. package/esm/typings/src/personas/preparePersona.d.ts +2 -2
  60. package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +67 -0
  61. package/esm/typings/src/prepare/preparePipeline.d.ts +2 -2
  62. package/esm/typings/src/prepare/prepareTemplates.d.ts +2 -2
  63. package/esm/typings/src/scrapers/_common/Converter.d.ts +28 -0
  64. package/esm/typings/src/scrapers/_common/Scraper.d.ts +71 -0
  65. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +11 -0
  66. package/esm/typings/src/{knowledge/prepare-knowledge → scrapers}/_common/prepareKnowledgePieces.d.ts +4 -4
  67. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +33 -0
  68. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.test.d.ts +4 -0
  69. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +10 -0
  70. package/esm/typings/src/scrapers/document/documentScraper.d.ts +37 -0
  71. package/esm/typings/src/scrapers/document/documentScraper.test.d.ts +4 -0
  72. package/esm/typings/src/scrapers/document/playground/document-scraper-playground.d.ts +5 -0
  73. package/esm/typings/src/scrapers/document-legacy/legacyDocumentScraper.d.ts +37 -0
  74. package/esm/typings/src/scrapers/document-legacy/legacyDocumentScraper.test.d.ts +4 -0
  75. package/esm/typings/src/scrapers/document-legacy/playground/legacy-document-scraper-playground.d.ts +5 -0
  76. package/esm/typings/src/scrapers/index.d.ts +8 -0
  77. package/esm/typings/src/scrapers/markdown/markdownScraper.d.ts +29 -0
  78. package/esm/typings/src/scrapers/markdown/playground/markdown-scraper-playground.d.ts +5 -0
  79. package/esm/typings/src/scrapers/pdf/pdfScraper.d.ts +35 -0
  80. package/esm/typings/src/scrapers/pdf/playground/pdf-scraper-playground.d.ts +5 -0
  81. package/esm/typings/src/scrapers/website/playground/website-scraper-playground.d.ts +5 -0
  82. package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +12 -0
  83. package/esm/typings/src/scrapers/website/websiteScraper.d.ts +43 -0
  84. package/esm/typings/src/storage/{files-storage/FilesStorage.d.ts → file-cache-storage/FileCacheStorage.d.ts} +5 -5
  85. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +10 -0
  86. package/esm/typings/src/storage/{files-storage → file-cache-storage}/utils/nameToSubfolderPath.d.ts +1 -1
  87. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.test.d.ts +1 -0
  88. package/esm/typings/src/storage/local-storage/getLocalStorage.d.ts +1 -1
  89. package/esm/typings/src/storage/local-storage/getSessionStorage.d.ts +1 -1
  90. package/esm/typings/src/types/PipelineJson/KnowledgeSourceJson.d.ts +9 -2
  91. package/esm/typings/src/types/PipelineJson/PipelineJson.d.ts +2 -2
  92. package/esm/typings/src/types/typeAliases.d.ts +8 -11
  93. package/esm/typings/src/utils/execCommand/$execCommand.d.ts +14 -0
  94. package/esm/typings/src/utils/execCommand/$execCommands.d.ts +17 -0
  95. package/esm/typings/src/utils/execCommand/IExecCommandOptions.d.ts +23 -0
  96. package/esm/typings/src/utils/execCommand/execCommand.test.d.ts +1 -0
  97. package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +10 -0
  98. package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.test.d.ts +1 -0
  99. package/esm/typings/src/utils/files/$isDirectoryExisting.d.ts +3 -3
  100. package/esm/typings/src/utils/files/$isFileExisting.d.ts +3 -3
  101. package/esm/typings/src/utils/files/$listAllFiles.d.ts +5 -4
  102. package/esm/typings/src/utils/files/extensionToMimeType.d.ts +8 -0
  103. package/esm/typings/src/utils/files/extensionToMimeType.test.d.ts +1 -0
  104. package/esm/typings/src/utils/files/getFileExtension.d.ts +8 -0
  105. package/esm/typings/src/utils/files/getFileExtension.test.d.ts +1 -0
  106. package/esm/typings/src/utils/validators/filePath/isValidFilePath.d.ts +2 -2
  107. package/package.json +6 -1
  108. package/umd/index.umd.js +1518 -459
  109. package/umd/index.umd.js.map +1 -1
  110. package/esm/typings/src/knowledge/prepare-knowledge/_common/Scraper.d.ts +0 -37
  111. package/esm/typings/src/knowledge/prepare-knowledge/markdown/playground/markdown-knowledge-playground.d.ts +0 -2
  112. package/esm/typings/src/knowledge/prepare-knowledge/markdown/prepareKnowledgeFromMarkdown.d.ts +0 -14
  113. package/esm/typings/src/knowledge/prepare-knowledge/pdf/prepareKnowledgeFromPdf.d.ts +0 -15
  114. package/esm/typings/src/prepare/PrepareOptions.d.ts +0 -22
  115. package/esm/typings/src/storage/files-storage/FilesStorageOptions.d.ts +0 -10
  116. /package/esm/typings/src/{knowledge/dialogs → dialogs}/user-interface-execution-tools.test.d.ts +0 -0
  117. /package/esm/typings/src/{knowledge/prepare-knowledge → scrapers}/_common/prepareKnowledgePieces.test.d.ts +0 -0
  118. /package/esm/typings/src/{knowledge/prepare-knowledge/markdown/prepareKnowledgeFromMarkdown.test.d.ts → scrapers/markdown/markdownScraper.test.d.ts} +0 -0
  119. /package/esm/typings/src/{knowledge/prepare-knowledge/pdf/prepareKnowledgeFromPdf.test.d.ts → scrapers/website/utils/markdownConverter.test.d.ts} +0 -0
  120. /package/esm/typings/src/{storage/files-storage/utils/nameToSubfolderPath.test.d.ts → scrapers/website/websiteScraper.test.d.ts} +0 -0
package/umd/index.umd.js CHANGED
@@ -1,13 +1,14 @@
1
1
  (function (global, factory) {
2
- typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('spacetrim'), require('prettier'), require('prettier/parser-html'), require('waitasecond'), require('papaparse'), require('crypto-js/enc-hex'), require('crypto-js/sha256'), require('moment')) :
3
- typeof define === 'function' && define.amd ? define(['exports', 'spacetrim', 'prettier', 'prettier/parser-html', 'waitasecond', 'papaparse', 'crypto-js/enc-hex', 'crypto-js/sha256', 'moment'], factory) :
4
- (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global["promptbook-core"] = {}, global.spaceTrim, global.prettier, global.parserHtml, global.waitasecond, global.papaparse, global.hexEncoder, global.sha256, global.moment));
5
- })(this, (function (exports, spaceTrim, prettier, parserHtml, waitasecond, papaparse, hexEncoder, sha256, moment) { 'use strict';
2
+ typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('spacetrim'), require('prettier'), require('prettier/parser-html'), require('fs/promises'), require('path'), require('child_process'), require('colors'), require('waitasecond'), require('crypto-js'), require('crypto-js/enc-hex'), require('papaparse'), require('@mozilla/readability'), require('jsdom'), require('showdown'), require('mime-types'), require('crypto-js/sha256'), require('moment')) :
3
+ typeof define === 'function' && define.amd ? define(['exports', 'spacetrim', 'prettier', 'prettier/parser-html', 'fs/promises', 'path', 'child_process', 'colors', 'waitasecond', 'crypto-js', 'crypto-js/enc-hex', 'papaparse', '@mozilla/readability', 'jsdom', 'showdown', 'mime-types', 'crypto-js/sha256', 'moment'], factory) :
4
+ (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global["promptbook-core"] = {}, global.spaceTrim, global.prettier, global.parserHtml, global.promises, global.path, global.child_process, global.colors, global.waitasecond, global.cryptoJs, global.hexEncoder, global.papaparse, global.readability, global.jsdom, global.showdown, global.mimeTypes, global.sha256, global.moment));
5
+ })(this, (function (exports, spaceTrim, prettier, parserHtml, promises, path, child_process, colors, waitasecond, cryptoJs, hexEncoder, papaparse, readability, jsdom, showdown, mimeTypes, sha256, moment) { 'use strict';
6
6
 
7
7
  function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
8
8
 
9
9
  var spaceTrim__default = /*#__PURE__*/_interopDefaultLegacy(spaceTrim);
10
10
  var parserHtml__default = /*#__PURE__*/_interopDefaultLegacy(parserHtml);
11
+ var colors__default = /*#__PURE__*/_interopDefaultLegacy(colors);
11
12
  var hexEncoder__default = /*#__PURE__*/_interopDefaultLegacy(hexEncoder);
12
13
  var sha256__default = /*#__PURE__*/_interopDefaultLegacy(sha256);
13
14
  var moment__default = /*#__PURE__*/_interopDefaultLegacy(moment);
@@ -16,7 +17,7 @@
16
17
  /**
17
18
  * The version of the Promptbook library
18
19
  */
19
- var PROMPTBOOK_VERSION = '0.72.0-5';
20
+ var PROMPTBOOK_VERSION = '0.72.0-7';
20
21
  // TODO: [main] !!!! List here all the versions and annotate + put into script
21
22
 
22
23
  /*! *****************************************************************************
@@ -688,9 +689,19 @@
688
689
  /**
689
690
  * Where to store the cache of executions for promptbook CLI
690
691
  *
692
+ * Note: When the folder does not exist, it is created recursively
693
+ *
691
694
  * @public exported from `@promptbook/core`
692
695
  */
693
696
  var EXECUTIONS_CACHE_DIRNAME = '/.promptbook/executions-cache';
697
+ /**
698
+ * Where to store the scrape cache
699
+ *
700
+ * Note: When the folder does not exist, it is created recursively
701
+ *
702
+ * @public exported from `@promptbook/core`
703
+ */
704
+ var SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
694
705
  /**
695
706
  * The name of the builded pipeline collection made by CLI `ptbk make` and for lookup in `createCollectionFromDirectory`
696
707
  *
@@ -1220,11 +1231,11 @@
1220
1231
  * > ex port function validatePipeline(promptbook: really_unknown): asserts promptbook is PipelineJson {
1221
1232
  */
1222
1233
  /**
1223
- * TODO: [🐣][main] !!!! Validate that all samples match expectations
1224
- * TODO: [🐣][🐝][main] !!!! Validate that knowledge is valid (non-void)
1225
- * TODO: [🐣][main] !!!! Validate that persona can be used only with CHAT variant
1226
- * TODO: [🐣][main] !!!! Validate that parameter with reserved name not used RESERVED_PARAMETER_NAMES
1227
- * TODO: [🐣][main] !!!! Validate that reserved parameter is not used as joker
1234
+ * TODO: [🧳][main] !!!! Validate that all samples match expectations
1235
+ * TODO: [🧳][🐝][main] !!!! Validate that knowledge is valid (non-void)
1236
+ * TODO: [🧳][main] !!!! Validate that persona can be used only with CHAT variant
1237
+ * TODO: [🧳][main] !!!! Validate that parameter with reserved name not used RESERVED_PARAMETER_NAMES
1238
+ * TODO: [🧳][main] !!!! Validate that reserved parameter is not used as joker
1228
1239
  * TODO: [🧠] Validation not only logic itself but imports around - files and websites and rerefenced pipelines exists
1229
1240
  * TODO: [🛠] Actions, instruments (and maybe knowledge) => Functions and tools
1230
1241
  */
@@ -1639,6 +1650,22 @@
1639
1650
  // <- [🅱]
1640
1651
  ];
1641
1652
 
1653
+ /**
1654
+ * This error indicates that the promptbook can not retrieve knowledge from external sources
1655
+ *
1656
+ * @public exported from `@promptbook/core`
1657
+ */
1658
+ var KnowledgeScrapeError = /** @class */ (function (_super) {
1659
+ __extends(KnowledgeScrapeError, _super);
1660
+ function KnowledgeScrapeError(message) {
1661
+ var _this = _super.call(this, message) || this;
1662
+ _this.name = 'KnowledgeScrapeError';
1663
+ Object.setPrototypeOf(_this, KnowledgeScrapeError.prototype);
1664
+ return _this;
1665
+ }
1666
+ return KnowledgeScrapeError;
1667
+ }(Error));
1668
+
1642
1669
  /**
1643
1670
  * Async version of Array.forEach
1644
1671
  *
@@ -1646,6 +1673,7 @@
1646
1673
  * @param options - Options for the function
1647
1674
  * @param callbackfunction - Function to call for each item
1648
1675
  * @public exported from `@promptbook/utils`
1676
+ * @deprecated [🪂] Use queues instead
1649
1677
  */
1650
1678
  function forEachAsync(array, options, callbackfunction) {
1651
1679
  return __awaiter(this, void 0, void 0, function () {
@@ -1715,59 +1743,246 @@
1715
1743
  }
1716
1744
 
1717
1745
  /**
1718
- * Represents the usage with no resources consumed
1746
+ * This error type indicates that some tools are missing for pipeline execution or preparation
1719
1747
  *
1720
1748
  * @public exported from `@promptbook/core`
1721
1749
  */
1722
- var ZERO_USAGE = $deepFreeze({
1723
- price: { value: 0 },
1724
- input: {
1725
- tokensCount: { value: 0 },
1726
- charactersCount: { value: 0 },
1727
- wordsCount: { value: 0 },
1728
- sentencesCount: { value: 0 },
1729
- linesCount: { value: 0 },
1730
- paragraphsCount: { value: 0 },
1731
- pagesCount: { value: 0 },
1732
- },
1733
- output: {
1734
- tokensCount: { value: 0 },
1735
- charactersCount: { value: 0 },
1736
- wordsCount: { value: 0 },
1737
- sentencesCount: { value: 0 },
1738
- linesCount: { value: 0 },
1739
- paragraphsCount: { value: 0 },
1740
- pagesCount: { value: 0 },
1741
- },
1742
- });
1750
+ var MissingToolsError = /** @class */ (function (_super) {
1751
+ __extends(MissingToolsError, _super);
1752
+ function MissingToolsError(message) {
1753
+ var _this = _super.call(this, spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(message), "\n\n Note: You have probbably forgot to provide some tools for pipeline execution or preparation\n\n "); })) || this;
1754
+ _this.name = 'MissingToolsError';
1755
+ Object.setPrototypeOf(_this, MissingToolsError.prototype);
1756
+ return _this;
1757
+ }
1758
+ return MissingToolsError;
1759
+ }(Error));
1760
+
1743
1761
  /**
1744
- * Represents the usage with unknown resources consumed
1762
+ * Detects if the code is running in a Node.js environment
1763
+ *
1764
+ * Note: `$` is used to indicate that this function is not a pure function - it looks at the global object to determine the environment
1765
+ *
1766
+ * @public exported from `@promptbook/utils`
1767
+ */
1768
+ var $isRunningInNode = new Function("\n try {\n return this === global;\n } catch (e) {\n return false;\n }\n");
1769
+
1770
+ /**
1771
+ * This error type indicates that you try to use a feature that is not available in the current environment
1745
1772
  *
1746
1773
  * @public exported from `@promptbook/core`
1747
1774
  */
1748
- var UNCERTAIN_USAGE = $deepFreeze({
1749
- price: { value: 0, isUncertain: true },
1750
- input: {
1751
- tokensCount: { value: 0, isUncertain: true },
1752
- charactersCount: { value: 0, isUncertain: true },
1753
- wordsCount: { value: 0, isUncertain: true },
1754
- sentencesCount: { value: 0, isUncertain: true },
1755
- linesCount: { value: 0, isUncertain: true },
1756
- paragraphsCount: { value: 0, isUncertain: true },
1757
- pagesCount: { value: 0, isUncertain: true },
1758
- },
1759
- output: {
1760
- tokensCount: { value: 0, isUncertain: true },
1761
- charactersCount: { value: 0, isUncertain: true },
1762
- wordsCount: { value: 0, isUncertain: true },
1763
- sentencesCount: { value: 0, isUncertain: true },
1764
- linesCount: { value: 0, isUncertain: true },
1765
- paragraphsCount: { value: 0, isUncertain: true },
1766
- pagesCount: { value: 0, isUncertain: true },
1767
- },
1768
- });
1775
+ var EnvironmentMismatchError = /** @class */ (function (_super) {
1776
+ __extends(EnvironmentMismatchError, _super);
1777
+ function EnvironmentMismatchError(message) {
1778
+ var _this = _super.call(this, message) || this;
1779
+ _this.name = 'EnvironmentMismatchError';
1780
+ Object.setPrototypeOf(_this, EnvironmentMismatchError.prototype);
1781
+ return _this;
1782
+ }
1783
+ return EnvironmentMismatchError;
1784
+ }(Error));
1769
1785
 
1770
- var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-from-markdown.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-keywords.ptbk.md"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Title should be concise and clear\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-title.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.ptbk.md",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Sample\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-persona.ptbk.md"}];
1786
+ /**
1787
+ * Normalize options for `execCommand` and `execCommands`
1788
+ *
1789
+ * @private internal utility of `execCommand` and `execCommands`
1790
+ */
1791
+ function execCommandNormalizeOptions(options) {
1792
+ var _a;
1793
+ var _b, _c, _d;
1794
+ var command;
1795
+ var cwd;
1796
+ var crashOnError;
1797
+ var args = [];
1798
+ var timeout;
1799
+ if (typeof options === 'string') {
1800
+ // TODO: [1] DRY default values
1801
+ command = options;
1802
+ cwd = process.cwd();
1803
+ crashOnError = true;
1804
+ timeout = Infinity;
1805
+ }
1806
+ else {
1807
+ /*
1808
+ TODO:
1809
+ if ((options as any).commands !== undefined) {
1810
+ commands = (options as any).commands;
1811
+ } else {
1812
+ commands = [(options as any).command];
1813
+ }
1814
+ */
1815
+ // TODO: [1] DRY default values
1816
+ command = options.command;
1817
+ cwd = (_b = options.cwd) !== null && _b !== void 0 ? _b : process.cwd();
1818
+ crashOnError = (_c = options.crashOnError) !== null && _c !== void 0 ? _c : true;
1819
+ timeout = (_d = options.timeout) !== null && _d !== void 0 ? _d : Infinity;
1820
+ }
1821
+ // TODO: /(-[a-zA-Z0-9-]+\s+[^\s]*)|[^\s]*/g
1822
+ var _ = Array.from(command.matchAll(/(".*")|([^\s]*)/g))
1823
+ .map(function (_a) {
1824
+ var _b = __read(_a, 1), match = _b[0];
1825
+ return match;
1826
+ })
1827
+ .filter(function (arg) { return arg !== ''; });
1828
+ if (_.length > 1) {
1829
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1830
+ _a = __read(_), command = _a[0], args = _a.slice(1);
1831
+ }
1832
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1833
+ if (options.args) {
1834
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1835
+ args = __spreadArray(__spreadArray([], __read(args), false), __read(options.args), false);
1836
+ }
1837
+ var humanReadableCommand = !['npx', 'npm'].includes(command) ? command : args[0];
1838
+ if (['ts-node'].includes(humanReadableCommand)) {
1839
+ humanReadableCommand += " ".concat(args[1]);
1840
+ }
1841
+ return { command: command, humanReadableCommand: humanReadableCommand, args: args, cwd: cwd, crashOnError: crashOnError, timeout: timeout };
1842
+ }
1843
+ // TODO: This should show type error> execCommandNormalizeOptions({ command: '', commands: [''] });
1844
+
1845
+ /**
1846
+ * Run one command in a shell
1847
+ *
1848
+ * Note: There are 2 similar functions in the codebase:
1849
+ * - `$execCommand` which runs a single command
1850
+ * - `$execCommands` which runs multiple commands
1851
+ *
1852
+ * @public exported from `@promptbook/node`
1853
+ */
1854
+ function $execCommand(options) {
1855
+ if (!$isRunningInNode()) {
1856
+ throw new EnvironmentMismatchError('Function `$execCommand` can run only in Node environment.js');
1857
+ }
1858
+ return new Promise(
1859
+ // <- TODO: [🧱] Implement in a functional (not new Class) way
1860
+ function (resolve, reject) {
1861
+ // eslint-disable-next-line prefer-const
1862
+ var _a = execCommandNormalizeOptions(options), command = _a.command, humanReadableCommand = _a.humanReadableCommand, args = _a.args, cwd = _a.cwd, crashOnError = _a.crashOnError, timeout = _a.timeout;
1863
+ if (timeout !== Infinity) {
1864
+ // TODO: In waitasecond forTime(Infinity) should be equivalent to forEver()
1865
+ waitasecond.forTime(timeout).then(function () {
1866
+ if (crashOnError) {
1867
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" exceeded time limit of ").concat(timeout, "ms")));
1868
+ }
1869
+ else {
1870
+ console.warn("Command \"".concat(humanReadableCommand, "\" exceeded time limit of ").concat(timeout, "ms but continues running"));
1871
+ resolve('Command exceeded time limit');
1872
+ }
1873
+ });
1874
+ }
1875
+ if (/^win/.test(process.platform) && ['npm', 'npx'].includes(command)) {
1876
+ command = "".concat(command, ".cmd");
1877
+ }
1878
+ // !!!!!! Verbose mode - to all consoles
1879
+ console.info(colors__default["default"].yellow(cwd) + ' ' + colors__default["default"].green(command) + ' ' + colors__default["default"].blue(args.join(' ')));
1880
+ try {
1881
+ var commandProcess = child_process.spawn(command, args, { cwd: cwd, shell: true });
1882
+ commandProcess.on('message', function (message) {
1883
+ console.info({ message: message });
1884
+ });
1885
+ var output_1 = [];
1886
+ commandProcess.stdout.on('data', function (stdout) {
1887
+ output_1.push(stdout.toString());
1888
+ console.info(stdout.toString());
1889
+ });
1890
+ commandProcess.stderr.on('data', function (stderr) {
1891
+ output_1.push(stderr.toString());
1892
+ if (stderr.toString().trim()) {
1893
+ console.warn(stderr.toString());
1894
+ }
1895
+ });
1896
+ var finishWithCode = function (code) {
1897
+ if (code !== 0) {
1898
+ if (crashOnError) {
1899
+ reject(new Error(output_1.join('\n').trim() ||
1900
+ "Command \"".concat(humanReadableCommand, "\" exited with code ").concat(code)));
1901
+ }
1902
+ else {
1903
+ console.warn("Command \"".concat(humanReadableCommand, "\" exited with code ").concat(code));
1904
+ resolve(spaceTrim.spaceTrim(output_1.join('\n')));
1905
+ }
1906
+ }
1907
+ else {
1908
+ resolve(spaceTrim.spaceTrim(output_1.join('\n')));
1909
+ }
1910
+ };
1911
+ commandProcess.on('close', finishWithCode);
1912
+ commandProcess.on('exit', finishWithCode);
1913
+ commandProcess.on('disconnect', function () {
1914
+ // Note: Unexpected disconnection should always result in rejection
1915
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" disconnected")));
1916
+ });
1917
+ commandProcess.on('error', function (error) {
1918
+ if (crashOnError) {
1919
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" failed: \n").concat(error.message)));
1920
+ }
1921
+ else {
1922
+ console.warn(error);
1923
+ resolve(spaceTrim.spaceTrim(output_1.join('\n')));
1924
+ }
1925
+ });
1926
+ }
1927
+ catch (error) {
1928
+ // Note: Unexpected error in sync code should always result in rejection
1929
+ reject(error);
1930
+ }
1931
+ });
1932
+ }
1933
+ /**
1934
+ * Note: [🟢 <- TODO: [🦖] !!!!!! Split scrapers into packages and enable] Code in this file should never be published outside of `@promptbook/node` and `@promptbook/cli`
1935
+ */
1936
+
1937
+ /**
1938
+ * Checks if the file exists
1939
+ *
1940
+ * Note: `$` is used to indicate that this function is not a pure function - it looks at the filesystem
1941
+ *
1942
+ * @private within the repository
1943
+ */
1944
+ function $isFileExisting(filename) {
1945
+ return __awaiter(this, void 0, void 0, function () {
1946
+ var isReadAccessAllowed, isFile;
1947
+ return __generator(this, function (_a) {
1948
+ switch (_a.label) {
1949
+ case 0:
1950
+ if (!$isRunningInNode()) {
1951
+ throw new EnvironmentMismatchError('Function `$isFileExisting` works only in Node environment.js');
1952
+ }
1953
+ return [4 /*yield*/, promises.access(filename, promises.constants.R_OK)
1954
+ .then(function () { return true; })
1955
+ .catch(function () { return false; })];
1956
+ case 1:
1957
+ isReadAccessAllowed = _a.sent();
1958
+ if (!isReadAccessAllowed) {
1959
+ return [2 /*return*/, false];
1960
+ }
1961
+ return [4 /*yield*/, promises.stat(filename)
1962
+ .then(function (fileStat) { return fileStat.isFile(); })
1963
+ .catch(function () { return false; })];
1964
+ case 2:
1965
+ isFile = _a.sent();
1966
+ return [2 /*return*/, isFile];
1967
+ }
1968
+ });
1969
+ });
1970
+ }
1971
+ /**
1972
+ * Note: [🟢 <- TODO: [🦖] !!!!!! Split scrapers into packages and enable] Code in this file should never be published outside of `@promptbook/node` and `@promptbook/cli`
1973
+ * TODO: [🐠] This can be a validator - with variants that return true/false and variants that throw errors with meaningless messages
1974
+ * TODO: [🖇] What about symlinks?
1975
+ */
1976
+
1977
+ /**
1978
+ * Get the file extension from a file name
1979
+ *
1980
+ * @private within the repository
1981
+ */
1982
+ function getFileExtension(value) {
1983
+ var match = value.match(/\.([0-9a-z]+)(?:[?#]|$)/i);
1984
+ return match ? match[1].toLowerCase() : null;
1985
+ }
1771
1986
 
1772
1987
  var defaultDiacriticsRemovalMap = [
1773
1988
  {
@@ -2061,10 +2276,6 @@
2061
2276
  charType = 'NUMBER';
2062
2277
  normalizedChar = char;
2063
2278
  }
2064
- else if (/^\/$/.test(char)) {
2065
- charType = 'SLASH';
2066
- normalizedChar = char;
2067
- }
2068
2279
  else {
2069
2280
  charType = 'OTHER';
2070
2281
  normalizedChar = '-';
@@ -2109,6 +2320,32 @@
2109
2320
  return text;
2110
2321
  }
2111
2322
 
2323
+ /**
2324
+ * Tests if given string is valid URL.
2325
+ *
2326
+ * Note: This does not check if the file exists only if the path is valid
2327
+ * @public exported from `@promptbook/utils`
2328
+ */
2329
+ function isValidFilePath(filename) {
2330
+ if (typeof filename !== 'string') {
2331
+ return false;
2332
+ }
2333
+ var filenameSlashes = filename.split('\\').join('/');
2334
+ // Absolute Unix path: /hello.txt
2335
+ if (/^(\/)/i.test(filenameSlashes)) {
2336
+ return true;
2337
+ }
2338
+ // Absolute Windows path: /hello.txt
2339
+ if (/^([A-Z]{1,2}:\/?)\//i.test(filenameSlashes)) {
2340
+ return true;
2341
+ }
2342
+ // Relative path: ./hello.txt
2343
+ if (/^(\.\.?\/)+/i.test(filenameSlashes)) {
2344
+ return true;
2345
+ }
2346
+ return false;
2347
+ }
2348
+
2112
2349
  /**
2113
2350
  * @@@
2114
2351
  *
@@ -2118,20 +2355,121 @@
2118
2355
  * @public exported from `@promptbook/utils`
2119
2356
  */
2120
2357
  function titleToName(value) {
2121
- if (value.startsWith('http://') || value.startsWith('https://')) {
2122
- // TODO: Maybe check against some list unallowed characters
2123
- return value;
2358
+ if (isValidUrl(value)) {
2359
+ value = value.replace(/^https?:\/\//, '');
2360
+ value = value.replace(/\.html$/, '');
2124
2361
  }
2125
- if (value.startsWith('./') || value.startsWith('../')) {
2126
- // TODO: Maybe check against some list unallowed characters
2127
- return value;
2362
+ else if (isValidFilePath(value)) {
2363
+ value = path.basename(value);
2364
+ // Note: Keeping extension in the name
2128
2365
  }
2366
+ value = value.split('/').join('-');
2129
2367
  value = removeEmojis(value);
2130
2368
  value = normalizeToKebabCase(value);
2131
2369
  // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
2132
2370
  return value;
2133
2371
  }
2134
2372
 
2373
+ /**
2374
+ * @@@
2375
+ *
2376
+ * @private for `FileCacheStorage`
2377
+ */
2378
+ function nameToSubfolderPath(name) {
2379
+ return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
2380
+ }
2381
+
2382
+ /**
2383
+ * Just marks a place of place where should be something implemented
2384
+ * No side effects.
2385
+ *
2386
+ * Note: It can be usefull suppressing eslint errors of unused variables
2387
+ *
2388
+ * @param value any values
2389
+ * @returns void
2390
+ * @private within the repository
2391
+ */
2392
+ function TODO_USE() {
2393
+ var value = [];
2394
+ for (var _i = 0; _i < arguments.length; _i++) {
2395
+ value[_i] = arguments[_i];
2396
+ }
2397
+ }
2398
+
2399
+ /**
2400
+ * Create a filename for intermediate cache for scrapers
2401
+ *
2402
+ * Note: It also checks if directory exists and creates it if not
2403
+ *
2404
+ * @private as internal utility for scrapers
2405
+ */
2406
+ function getScraperIntermediateSource(source, options) {
2407
+ return __awaiter(this, void 0, void 0, function () {
2408
+ var sourceFilename, url, rootDirname, cacheDirname, isCacheCleaned, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
2409
+ return __generator(this, function (_a) {
2410
+ switch (_a.label) {
2411
+ case 0:
2412
+ sourceFilename = source.filename, url = source.url;
2413
+ rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, isCacheCleaned = options.isCacheCleaned, extension = options.extension, isVerbose = options.isVerbose;
2414
+ hash = cryptoJs.SHA256(
2415
+ // <- TODO: [🥬] Encapsulate sha256 to some private utility function
2416
+ hexEncoder__default["default"].parse(sourceFilename || url || 'untitled'))
2417
+ .toString( /* hex */)
2418
+ .substring(0, 20);
2419
+ semanticName = normalizeToKebabCase(titleToName((sourceFilename || url || '').split('intermediate').join(''))).substring(0, 20);
2420
+ pieces = ['intermediate', semanticName, hash].filter(function (piece) { return piece !== ''; });
2421
+ name = pieces.join('-').split('--').join('-');
2422
+ // <- TODO: Use MAX_FILENAME_LENGTH
2423
+ TODO_USE(rootDirname); // <- TODO: !!!!!!
2424
+ cacheFilename = path.join.apply(void 0, __spreadArray(__spreadArray([process.cwd(),
2425
+ cacheDirname], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), [name], false)).split('\\')
2426
+ .join('/') +
2427
+ '.' +
2428
+ extension;
2429
+ return [4 /*yield*/, promises.mkdir(path.dirname(cacheFilename), { recursive: true })];
2430
+ case 1:
2431
+ _a.sent();
2432
+ isDestroyed = true;
2433
+ fileHandler = {
2434
+ filename: cacheFilename,
2435
+ get isDestroyed() {
2436
+ return isDestroyed;
2437
+ },
2438
+ destroy: function () {
2439
+ return __awaiter(this, void 0, void 0, function () {
2440
+ return __generator(this, function (_a) {
2441
+ switch (_a.label) {
2442
+ case 0:
2443
+ if (!isCacheCleaned) return [3 /*break*/, 2];
2444
+ if (isVerbose) {
2445
+ console.info('legacyDocumentScraper: Clening cache');
2446
+ }
2447
+ return [4 /*yield*/, promises.rm(cacheFilename)];
2448
+ case 1:
2449
+ _a.sent();
2450
+ _a.label = 2;
2451
+ case 2:
2452
+ isDestroyed = true;
2453
+ return [2 /*return*/];
2454
+ }
2455
+ });
2456
+ });
2457
+ },
2458
+ };
2459
+ return [2 /*return*/, fileHandler];
2460
+ }
2461
+ });
2462
+ });
2463
+ }
2464
+ /**
2465
+ * Note: Not using `FileCacheStorage` for two reasons:
2466
+ * 1) Need to store more than serialized JSONs
2467
+ * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: !!!!
2468
+ * TODO: [🐱‍🐉][🧠] Make some smart crop
2469
+ */
2470
+
2471
+ var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-from-markdown.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-keywords.ptbk.md"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Title should be concise and clear\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-title.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.ptbk.md",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Sample\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-persona.ptbk.md"}];
2472
+
2135
2473
  /**
2136
2474
  * This error indicates errors during the execution of the pipeline
2137
2475
  *
@@ -2164,22 +2502,6 @@
2164
2502
  return CollectionError;
2165
2503
  }(Error));
2166
2504
 
2167
- /**
2168
- * This error type indicates that you try to use a feature that is not available in the current environment
2169
- *
2170
- * @public exported from `@promptbook/core`
2171
- */
2172
- var EnvironmentMismatchError = /** @class */ (function (_super) {
2173
- __extends(EnvironmentMismatchError, _super);
2174
- function EnvironmentMismatchError(message) {
2175
- var _this = _super.call(this, message) || this;
2176
- _this.name = 'EnvironmentMismatchError';
2177
- Object.setPrototypeOf(_this, EnvironmentMismatchError.prototype);
2178
- return _this;
2179
- }
2180
- return EnvironmentMismatchError;
2181
- }(Error));
2182
-
2183
2505
  /**
2184
2506
  * This error occurs when some expectation is not met in the execution of the pipeline
2185
2507
  *
@@ -2679,19 +3001,72 @@
2679
3001
  */
2680
3002
 
2681
3003
  /**
2682
- * Function `addUsage` will add multiple usages into one
2683
- *
2684
- * Note: If you provide 0 values, it returns ZERO_USAGE
3004
+ * Represents the usage with no resources consumed
2685
3005
  *
2686
3006
  * @public exported from `@promptbook/core`
2687
3007
  */
2688
- function addUsage() {
2689
- var usageItems = [];
2690
- for (var _i = 0; _i < arguments.length; _i++) {
2691
- usageItems[_i] = arguments[_i];
2692
- }
2693
- return usageItems.reduce(function (acc, item) {
2694
- var e_1, _a, e_2, _b;
3008
+ var ZERO_USAGE = $deepFreeze({
3009
+ price: { value: 0 },
3010
+ input: {
3011
+ tokensCount: { value: 0 },
3012
+ charactersCount: { value: 0 },
3013
+ wordsCount: { value: 0 },
3014
+ sentencesCount: { value: 0 },
3015
+ linesCount: { value: 0 },
3016
+ paragraphsCount: { value: 0 },
3017
+ pagesCount: { value: 0 },
3018
+ },
3019
+ output: {
3020
+ tokensCount: { value: 0 },
3021
+ charactersCount: { value: 0 },
3022
+ wordsCount: { value: 0 },
3023
+ sentencesCount: { value: 0 },
3024
+ linesCount: { value: 0 },
3025
+ paragraphsCount: { value: 0 },
3026
+ pagesCount: { value: 0 },
3027
+ },
3028
+ });
3029
+ /**
3030
+ * Represents the usage with unknown resources consumed
3031
+ *
3032
+ * @public exported from `@promptbook/core`
3033
+ */
3034
+ var UNCERTAIN_USAGE = $deepFreeze({
3035
+ price: { value: 0, isUncertain: true },
3036
+ input: {
3037
+ tokensCount: { value: 0, isUncertain: true },
3038
+ charactersCount: { value: 0, isUncertain: true },
3039
+ wordsCount: { value: 0, isUncertain: true },
3040
+ sentencesCount: { value: 0, isUncertain: true },
3041
+ linesCount: { value: 0, isUncertain: true },
3042
+ paragraphsCount: { value: 0, isUncertain: true },
3043
+ pagesCount: { value: 0, isUncertain: true },
3044
+ },
3045
+ output: {
3046
+ tokensCount: { value: 0, isUncertain: true },
3047
+ charactersCount: { value: 0, isUncertain: true },
3048
+ wordsCount: { value: 0, isUncertain: true },
3049
+ sentencesCount: { value: 0, isUncertain: true },
3050
+ linesCount: { value: 0, isUncertain: true },
3051
+ paragraphsCount: { value: 0, isUncertain: true },
3052
+ pagesCount: { value: 0, isUncertain: true },
3053
+ },
3054
+ });
3055
+
3056
+ /**
3057
+ * Function `addUsage` will add multiple usages into one
3058
+ *
3059
+ * Note: If you provide 0 values, it returns ZERO_USAGE
3060
+ *
3061
+ * @public exported from `@promptbook/core`
3062
+ */
3063
+ function addUsage() {
3064
+ var usageItems = [];
3065
+ for (var _i = 0; _i < arguments.length; _i++) {
3066
+ usageItems[_i] = arguments[_i];
3067
+ }
3068
+ return usageItems.reduce(function (acc, item) {
3069
+ var e_1, _a, e_2, _b;
2695
3070
  var _c;
2696
3071
  acc.price.value += ((_c = item.price) === null || _c === void 0 ? void 0 : _c.value) || 0;
2697
3072
  try {
@@ -2955,23 +3330,6 @@
2955
3330
  return union;
2956
3331
  }
2957
3332
 
2958
- /**
2959
- * Just marks a place of place where should be something implemented
2960
- * No side effects.
2961
- *
2962
- * Note: It can be usefull suppressing eslint errors of unused variables
2963
- *
2964
- * @param value any values
2965
- * @returns void
2966
- * @private within the repository
2967
- */
2968
- function TODO_USE() {
2969
- var value = [];
2970
- for (var _i = 0; _i < arguments.length; _i++) {
2971
- value[_i] = arguments[_i];
2972
- }
2973
- }
2974
-
2975
3333
  /**
2976
3334
  * This error indicates problems parsing the format value
2977
3335
  *
@@ -3015,7 +3373,7 @@
3015
3373
  */
3016
3374
  var MANDATORY_CSV_SETTINGS = Object.freeze({
3017
3375
  header: true,
3018
- // encoding: 'utf8',
3376
+ // encoding: 'utf-8',
3019
3377
  });
3020
3378
 
3021
3379
  /**
@@ -4469,25 +4827,26 @@
4469
4827
  */
4470
4828
  function executePipeline(options) {
4471
4829
  return __awaiter(this, void 0, void 0, function () {
4472
- var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, settings, maxParallelCount, isVerbose, preparedPipeline, llmTools, errors, warnings, executionReport, isReturned, _a, _b, parameter, e_1_1, _loop_1, _c, _d, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
4473
- var e_1, _e, e_2, _f;
4474
- return __generator(this, function (_g) {
4475
- switch (_g.label) {
4830
+ var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, settings, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, llmTools, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
4831
+ var e_1, _f, e_2, _g;
4832
+ return __generator(this, function (_h) {
4833
+ switch (_h.label) {
4476
4834
  case 0:
4477
4835
  inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, settings = options.settings;
4478
- maxParallelCount = settings.maxParallelCount, isVerbose = settings.isVerbose;
4836
+ maxParallelCount = settings.maxParallelCount, rootDirname = settings.rootDirname, _a = settings.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
4479
4837
  preparedPipeline = options.preparedPipeline;
4480
4838
  llmTools = joinLlmExecutionTools.apply(void 0, __spreadArray([], __read(arrayableToArray(tools.llm)), false));
4481
4839
  if (!(preparedPipeline === undefined)) return [3 /*break*/, 2];
4482
4840
  return [4 /*yield*/, preparePipeline(pipeline, {
4483
4841
  llmTools: llmTools,
4842
+ rootDirname: rootDirname,
4484
4843
  isVerbose: isVerbose,
4485
4844
  maxParallelCount: maxParallelCount,
4486
4845
  })];
4487
4846
  case 1:
4488
- preparedPipeline = _g.sent();
4847
+ preparedPipeline = _h.sent();
4489
4848
  setPreparedPipeline(preparedPipeline);
4490
- _g.label = 2;
4849
+ _h.label = 2;
4491
4850
  case 2:
4492
4851
  errors = [];
4493
4852
  warnings = [];
@@ -4500,17 +4859,17 @@
4500
4859
  promptExecutions: [],
4501
4860
  };
4502
4861
  isReturned = false;
4503
- _g.label = 3;
4862
+ _h.label = 3;
4504
4863
  case 3:
4505
- _g.trys.push([3, 9, 10, 11]);
4506
- _a = __values(preparedPipeline.parameters.filter(function (_a) {
4864
+ _h.trys.push([3, 9, 10, 11]);
4865
+ _b = __values(preparedPipeline.parameters.filter(function (_a) {
4507
4866
  var isInput = _a.isInput;
4508
4867
  return isInput;
4509
- })), _b = _a.next();
4510
- _g.label = 4;
4868
+ })), _c = _b.next();
4869
+ _h.label = 4;
4511
4870
  case 4:
4512
- if (!!_b.done) return [3 /*break*/, 8];
4513
- parameter = _b.value;
4871
+ if (!!_c.done) return [3 /*break*/, 8];
4872
+ parameter = _c.value;
4514
4873
  if (!(inputParameters[parameter.name] === undefined)) return [3 /*break*/, 7];
4515
4874
  isReturned = true;
4516
4875
  if (!(onProgress !== undefined)) return [3 /*break*/, 6];
@@ -4518,8 +4877,8 @@
4518
4877
  return [4 /*yield*/, waitasecond.forTime(IMMEDIATE_TIME)];
4519
4878
  case 5:
4520
4879
  // Note: Wait a short time to prevent race conditions
4521
- _g.sent();
4522
- _g.label = 6;
4880
+ _h.sent();
4881
+ _h.label = 6;
4523
4882
  case 6: return [2 /*return*/, $asDeeplyFrozenSerializableJson("Unuccessful PipelineExecutorResult (with missing parameter {".concat(parameter.name, "}) PipelineExecutorResult"), {
4524
4883
  isSuccessful: false,
4525
4884
  errors: __spreadArray([
@@ -4532,24 +4891,24 @@
4532
4891
  preparedPipeline: preparedPipeline,
4533
4892
  })];
4534
4893
  case 7:
4535
- _b = _a.next();
4894
+ _c = _b.next();
4536
4895
  return [3 /*break*/, 4];
4537
4896
  case 8: return [3 /*break*/, 11];
4538
4897
  case 9:
4539
- e_1_1 = _g.sent();
4898
+ e_1_1 = _h.sent();
4540
4899
  e_1 = { error: e_1_1 };
4541
4900
  return [3 /*break*/, 11];
4542
4901
  case 10:
4543
4902
  try {
4544
- if (_b && !_b.done && (_e = _a.return)) _e.call(_a);
4903
+ if (_c && !_c.done && (_f = _b.return)) _f.call(_b);
4545
4904
  }
4546
4905
  finally { if (e_1) throw e_1.error; }
4547
4906
  return [7 /*endfinally*/];
4548
4907
  case 11:
4549
4908
  _loop_1 = function (parameterName) {
4550
4909
  var parameter;
4551
- return __generator(this, function (_h) {
4552
- switch (_h.label) {
4910
+ return __generator(this, function (_j) {
4911
+ switch (_j.label) {
4553
4912
  case 0:
4554
4913
  parameter = preparedPipeline.parameters.find(function (_a) {
4555
4914
  var name = _a.name;
@@ -4566,8 +4925,8 @@
4566
4925
  return [4 /*yield*/, waitasecond.forTime(IMMEDIATE_TIME)];
4567
4926
  case 2:
4568
4927
  // Note: Wait a short time to prevent race conditions
4569
- _h.sent();
4570
- _h.label = 3;
4928
+ _j.sent();
4929
+ _j.label = 3;
4571
4930
  case 3: return [2 /*return*/, { value: $asDeeplyFrozenSerializableJson(spaceTrim.spaceTrim(function (block) { return "\n Unuccessful PipelineExecutorResult (with extra parameter {".concat(parameter.name, "}) PipelineExecutorResult\n\n ").concat(block(pipelineIdentification), "\n "); }), {
4572
4931
  isSuccessful: false,
4573
4932
  errors: __spreadArray([
@@ -4583,39 +4942,39 @@
4583
4942
  }
4584
4943
  });
4585
4944
  };
4586
- _g.label = 12;
4945
+ _h.label = 12;
4587
4946
  case 12:
4588
- _g.trys.push([12, 17, 18, 19]);
4589
- _c = __values(Object.keys(inputParameters)), _d = _c.next();
4590
- _g.label = 13;
4947
+ _h.trys.push([12, 17, 18, 19]);
4948
+ _d = __values(Object.keys(inputParameters)), _e = _d.next();
4949
+ _h.label = 13;
4591
4950
  case 13:
4592
- if (!!_d.done) return [3 /*break*/, 16];
4593
- parameterName = _d.value;
4951
+ if (!!_e.done) return [3 /*break*/, 16];
4952
+ parameterName = _e.value;
4594
4953
  return [5 /*yield**/, _loop_1(parameterName)];
4595
4954
  case 14:
4596
- state_1 = _g.sent();
4955
+ state_1 = _h.sent();
4597
4956
  if (typeof state_1 === "object")
4598
4957
  return [2 /*return*/, state_1.value];
4599
- _g.label = 15;
4958
+ _h.label = 15;
4600
4959
  case 15:
4601
- _d = _c.next();
4960
+ _e = _d.next();
4602
4961
  return [3 /*break*/, 13];
4603
4962
  case 16: return [3 /*break*/, 19];
4604
4963
  case 17:
4605
- e_2_1 = _g.sent();
4964
+ e_2_1 = _h.sent();
4606
4965
  e_2 = { error: e_2_1 };
4607
4966
  return [3 /*break*/, 19];
4608
4967
  case 18:
4609
4968
  try {
4610
- if (_d && !_d.done && (_f = _c.return)) _f.call(_c);
4969
+ if (_e && !_e.done && (_g = _d.return)) _g.call(_d);
4611
4970
  }
4612
4971
  finally { if (e_2) throw e_2.error; }
4613
4972
  return [7 /*endfinally*/];
4614
4973
  case 19:
4615
4974
  parametersToPass = inputParameters;
4616
- _g.label = 20;
4975
+ _h.label = 20;
4617
4976
  case 20:
4618
- _g.trys.push([20, 25, , 28]);
4977
+ _h.trys.push([20, 25, , 28]);
4619
4978
  resovedParameterNames_1 = preparedPipeline.parameters
4620
4979
  .filter(function (_a) {
4621
4980
  var isInput = _a.isInput;
@@ -4630,8 +4989,8 @@
4630
4989
  loopLimit = LOOP_LIMIT;
4631
4990
  _loop_2 = function () {
4632
4991
  var currentTemplate, work_1;
4633
- return __generator(this, function (_j) {
4634
- switch (_j.label) {
4992
+ return __generator(this, function (_k) {
4993
+ switch (_k.label) {
4635
4994
  case 0:
4636
4995
  if (loopLimit-- < 0) {
4637
4996
  // Note: Really UnexpectedError not LimitReachedError - this should be catched during validatePipeline
@@ -4657,7 +5016,7 @@
4657
5016
  if (!!currentTemplate) return [3 /*break*/, 3];
4658
5017
  /* [🤹‍♂️] */ return [4 /*yield*/, Promise.race(resolving_1)];
4659
5018
  case 2:
4660
- /* [🤹‍♂️] */ _j.sent();
5019
+ /* [🤹‍♂️] */ _k.sent();
4661
5020
  return [3 /*break*/, 4];
4662
5021
  case 3:
4663
5022
  unresovedTemplates_1 = unresovedTemplates_1.filter(function (template) { return template !== currentTemplate; });
@@ -4692,24 +5051,24 @@
4692
5051
  // <- Note: Errors are catched here [3]
4693
5052
  // TODO: BUT if in multiple templates are errors, only the first one is catched so maybe we should catch errors here and save them to errors array here
4694
5053
  resolving_1.push(work_1);
4695
- _j.label = 4;
5054
+ _k.label = 4;
4696
5055
  case 4: return [2 /*return*/];
4697
5056
  }
4698
5057
  });
4699
5058
  };
4700
- _g.label = 21;
5059
+ _h.label = 21;
4701
5060
  case 21:
4702
5061
  if (!(unresovedTemplates_1.length > 0)) return [3 /*break*/, 23];
4703
5062
  return [5 /*yield**/, _loop_2()];
4704
5063
  case 22:
4705
- _g.sent();
5064
+ _h.sent();
4706
5065
  return [3 /*break*/, 21];
4707
5066
  case 23: return [4 /*yield*/, Promise.all(resolving_1)];
4708
5067
  case 24:
4709
- _g.sent();
5068
+ _h.sent();
4710
5069
  return [3 /*break*/, 28];
4711
5070
  case 25:
4712
- error_1 = _g.sent();
5071
+ error_1 = _h.sent();
4713
5072
  if (!(error_1 instanceof Error)) {
4714
5073
  throw error_1;
4715
5074
  }
@@ -4729,8 +5088,8 @@
4729
5088
  return [4 /*yield*/, waitasecond.forTime(IMMEDIATE_TIME)];
4730
5089
  case 26:
4731
5090
  // Note: Wait a short time to prevent race conditions
4732
- _g.sent();
4733
- _g.label = 27;
5091
+ _h.sent();
5092
+ _h.label = 27;
4734
5093
  case 27: return [2 /*return*/, $asDeeplyFrozenSerializableJson('Unuccessful PipelineExecutorResult (with misc errors) PipelineExecutorResult', {
4735
5094
  isSuccessful: false,
4736
5095
  errors: __spreadArray([error_1], __read(errors), false).map(serializeError),
@@ -4757,8 +5116,8 @@
4757
5116
  return [4 /*yield*/, waitasecond.forTime(IMMEDIATE_TIME)];
4758
5117
  case 29:
4759
5118
  // Note: Wait a short time to prevent race conditions
4760
- _g.sent();
4761
- _g.label = 30;
5119
+ _h.sent();
5120
+ _h.label = 30;
4762
5121
  case 30: return [2 /*return*/, $asDeeplyFrozenSerializableJson('Successful PipelineExecutorResult', {
4763
5122
  isSuccessful: true,
4764
5123
  errors: errors.map(serializeError),
@@ -4770,211 +5129,884 @@
4770
5129
  })];
4771
5130
  }
4772
5131
  });
4773
- });
4774
- }
5132
+ });
5133
+ }
5134
+ /**
5135
+ * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5136
+ */
5137
+
5138
+ /**
5139
+ * Creates executor function from pipeline and execution tools.
5140
+ *
5141
+ * @returns The executor function
5142
+ * @throws {PipelineLogicError} on logical error in the pipeline
5143
+ * @public exported from `@promptbook/core`
5144
+ */
5145
+ function createPipelineExecutor(options) {
5146
+ var _this = this;
5147
+ var pipeline = options.pipeline, tools = options.tools, _a = options.settings, settings = _a === void 0 ? {} : _a;
5148
+ var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f, _g = settings.rootDirname, rootDirname = _g === void 0 ? null : _g;
5149
+ validatePipeline(pipeline);
5150
+ var pipelineIdentification = (function () {
5151
+ // Note: This is a 😐 implementation of [🚞]
5152
+ var _ = [];
5153
+ if (pipeline.sourceFile !== undefined) {
5154
+ _.push("File: ".concat(pipeline.sourceFile));
5155
+ }
5156
+ if (pipeline.pipelineUrl !== undefined) {
5157
+ _.push("Url: ".concat(pipeline.pipelineUrl));
5158
+ }
5159
+ return _.join('\n');
5160
+ })();
5161
+ var preparedPipeline;
5162
+ if (isPipelinePrepared(pipeline)) {
5163
+ preparedPipeline = pipeline;
5164
+ }
5165
+ else if (isNotPreparedWarningSupressed !== true) {
5166
+ console.warn(spaceTrim.spaceTrim(function (block) { return "\n Pipeline is not prepared\n\n ".concat(block(pipelineIdentification), "\n\n It will be prepared ad-hoc before the first execution and **returned as `preparedPipeline` in `PipelineExecutorResult`**\n But it is recommended to prepare the pipeline during collection preparation\n\n @see more at https://ptbk.io/prepare-pipeline\n "); }));
5167
+ }
5168
+ var runCount = 0;
5169
+ var pipelineExecutor = function (inputParameters, onProgress) { return __awaiter(_this, void 0, void 0, function () {
5170
+ return __generator(this, function (_a) {
5171
+ runCount++;
5172
+ return [2 /*return*/, /* not await */ executePipeline({
5173
+ pipeline: pipeline,
5174
+ preparedPipeline: preparedPipeline,
5175
+ setPreparedPipeline: function (newPreparedPipeline) {
5176
+ preparedPipeline = newPreparedPipeline;
5177
+ },
5178
+ inputParameters: inputParameters,
5179
+ tools: tools,
5180
+ onProgress: onProgress,
5181
+ pipelineIdentification: spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
5182
+ settings: {
5183
+ maxExecutionAttempts: maxExecutionAttempts,
5184
+ maxParallelCount: maxParallelCount,
5185
+ csvSettings: csvSettings,
5186
+ isVerbose: isVerbose,
5187
+ isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
5188
+ rootDirname: rootDirname,
5189
+ },
5190
+ })];
5191
+ });
5192
+ }); };
5193
+ return pipelineExecutor;
5194
+ }
5195
+ /**
5196
+ * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5197
+ */
5198
+
5199
+ /**
5200
+ * Scraper for markdown files
5201
+ *
5202
+ * @see `documentationUrl` for more details
5203
+ * @public exported from `@promptbook/core`
5204
+ */
5205
+ var markdownScraper = {
5206
+ /**
5207
+ * Mime types that this scraper can handle
5208
+ */
5209
+ mimeTypes: ['text/markdown', 'text/plain'],
5210
+ /**
5211
+ * Link to documentation
5212
+ */
5213
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5214
+ /**
5215
+ * Scrapes the markdown file and returns the knowledge pieces or `null` if it can't scrape it
5216
+ */
5217
+ scrape: function (source, options) {
5218
+ return __awaiter(this, void 0, void 0, function () {
5219
+ var llmTools, _a, maxParallelCount, _b, isVerbose, collection, prepareKnowledgeFromMarkdownExecutor, _c, prepareTitleExecutor, _d, prepareKeywordsExecutor, _e, knowledgeContent, result, outputParameters, knowledgePiecesRaw, knowledgeTextPieces, knowledge;
5220
+ var _f, _g, _h;
5221
+ var _this = this;
5222
+ return __generator(this, function (_j) {
5223
+ switch (_j.label) {
5224
+ case 0:
5225
+ llmTools = options.llmTools, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
5226
+ if (llmTools === undefined) {
5227
+ throw new MissingToolsError('LLM tools are required for scraping external files');
5228
+ // <- Note: This scraper is used in all other scrapers, so saying "external files" not "markdown files"
5229
+ }
5230
+ TODO_USE(maxParallelCount); // <- [🪂]
5231
+ collection = createCollectionFromJson.apply(void 0, __spreadArray([], __read(PipelineCollection), false));
5232
+ _c = createPipelineExecutor;
5233
+ _f = {};
5234
+ return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md')];
5235
+ case 1:
5236
+ prepareKnowledgeFromMarkdownExecutor = _c.apply(void 0, [(_f.pipeline = _j.sent(),
5237
+ _f.tools = {
5238
+ llm: llmTools,
5239
+ },
5240
+ _f)]);
5241
+ _d = createPipelineExecutor;
5242
+ _g = {};
5243
+ return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md')];
5244
+ case 2:
5245
+ prepareTitleExecutor = _d.apply(void 0, [(_g.pipeline = _j.sent(),
5246
+ _g.tools = {
5247
+ llm: llmTools,
5248
+ },
5249
+ _g)]);
5250
+ _e = createPipelineExecutor;
5251
+ _h = {};
5252
+ return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md')];
5253
+ case 3:
5254
+ prepareKeywordsExecutor = _e.apply(void 0, [(_h.pipeline = _j.sent(),
5255
+ _h.tools = {
5256
+ llm: llmTools,
5257
+ },
5258
+ _h)]);
5259
+ return [4 /*yield*/, source.asText()];
5260
+ case 4:
5261
+ knowledgeContent = _j.sent();
5262
+ return [4 /*yield*/, prepareKnowledgeFromMarkdownExecutor({ knowledgeContent: knowledgeContent })];
5263
+ case 5:
5264
+ result = _j.sent();
5265
+ assertsExecutionSuccessful(result);
5266
+ outputParameters = result.outputParameters;
5267
+ knowledgePiecesRaw = outputParameters.knowledgePieces;
5268
+ knowledgeTextPieces = (knowledgePiecesRaw || '').split('\n---\n');
5269
+ // <- TODO: [main] !!!!! Smarter split and filter out empty pieces
5270
+ if (isVerbose) {
5271
+ console.info('knowledgeTextPieces:', knowledgeTextPieces);
5272
+ }
5273
+ return [4 /*yield*/, Promise.all(
5274
+ // TODO: [🪂] !! Do not send all at once but in chunks
5275
+ knowledgeTextPieces.map(function (knowledgeTextPiece, i) { return __awaiter(_this, void 0, void 0, function () {
5276
+ var name, title, knowledgePieceContent, keywords, index, titleResult, _a, titleRaw, keywordsResult, _b, keywordsRaw, embeddingResult, error_1;
5277
+ return __generator(this, function (_c) {
5278
+ switch (_c.label) {
5279
+ case 0:
5280
+ name = "piece-".concat(i);
5281
+ title = spaceTrim__default["default"](knowledgeTextPiece.substring(0, 100));
5282
+ knowledgePieceContent = spaceTrim__default["default"](knowledgeTextPiece);
5283
+ keywords = [];
5284
+ index = [];
5285
+ _c.label = 1;
5286
+ case 1:
5287
+ _c.trys.push([1, 7, , 8]);
5288
+ return [4 /*yield*/, prepareTitleExecutor({ knowledgePieceContent: knowledgePieceContent })];
5289
+ case 2:
5290
+ titleResult = _c.sent();
5291
+ _a = titleResult.outputParameters.title, titleRaw = _a === void 0 ? 'Untitled' : _a;
5292
+ title = spaceTrim__default["default"](titleRaw) /* <- TODO: Maybe do in pipeline */;
5293
+ name = titleToName(title);
5294
+ return [4 /*yield*/, prepareKeywordsExecutor({ knowledgePieceContent: knowledgePieceContent })];
5295
+ case 3:
5296
+ keywordsResult = _c.sent();
5297
+ _b = keywordsResult.outputParameters.keywords, keywordsRaw = _b === void 0 ? '' : _b;
5298
+ keywords = (keywordsRaw || '')
5299
+ .split(',')
5300
+ .map(function (keyword) { return keyword.trim(); })
5301
+ .filter(function (keyword) { return keyword !== ''; });
5302
+ if (isVerbose) {
5303
+ console.info("Keywords for \"".concat(title, "\":"), keywords);
5304
+ }
5305
+ if (!!llmTools.callEmbeddingModel) return [3 /*break*/, 4];
5306
+ // TODO: [🟥] Detect browser / node and make it colorfull
5307
+ console.error('No callEmbeddingModel function provided');
5308
+ return [3 /*break*/, 6];
5309
+ case 4: return [4 /*yield*/, llmTools.callEmbeddingModel({
5310
+ title: "Embedding for ".concat(title) /* <- Note: No impact on embedding result itself, just for logging */,
5311
+ parameters: {},
5312
+ content: knowledgePieceContent,
5313
+ modelRequirements: {
5314
+ modelVariant: 'EMBEDDING',
5315
+ },
5316
+ })];
5317
+ case 5:
5318
+ embeddingResult = _c.sent();
5319
+ index.push({
5320
+ modelName: embeddingResult.modelName,
5321
+ position: embeddingResult.content,
5322
+ });
5323
+ _c.label = 6;
5324
+ case 6: return [3 /*break*/, 8];
5325
+ case 7:
5326
+ error_1 = _c.sent();
5327
+ // Note: Here is expected error:
5328
+ // > PipelineExecutionError: You have not provided any `LlmExecutionTools` that support model variant "EMBEDDING
5329
+ if (!(error_1 instanceof PipelineExecutionError)) {
5330
+ throw error_1;
5331
+ }
5332
+ // TODO: [🟥] Detect browser / node and make it colorfull
5333
+ console.error(error_1, "<- Note: This error is not critical to prepare the pipeline, just knowledge pieces won't have embeddings");
5334
+ return [3 /*break*/, 8];
5335
+ case 8: return [2 /*return*/, {
5336
+ name: name,
5337
+ title: title,
5338
+ content: knowledgePieceContent,
5339
+ keywords: keywords,
5340
+ index: index,
5341
+ // <- TODO: [☀] sources,
5342
+ }];
5343
+ }
5344
+ });
5345
+ }); }))];
5346
+ case 6:
5347
+ knowledge = _j.sent();
5348
+ return [2 /*return*/, knowledge];
5349
+ }
5350
+ });
5351
+ });
5352
+ },
5353
+ } /* TODO: [🦷] as const */;
5354
+ /**
5355
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5356
+ * TODO: [🪂] Do it in parallel 11:11
5357
+ * TODO: [🦷] Ideally use `as const satisfies Scraper` BUT this combination throws errors
5358
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5359
+ */
5360
+
5361
+ /**
5362
+ * Scraper of .docx and .odt files
5363
+ *
5364
+ * @see `documentationUrl` for more details
5365
+ * @public exported from `@promptbook/core`
5366
+ */
5367
+ var documentScraper = {
5368
+ /**
5369
+ * Mime types that this scraper can handle
5370
+ */
5371
+ mimeTypes: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
5372
+ /**
5373
+ * Link to documentation
5374
+ */
5375
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5376
+ /**
5377
+ * Convert the `.docx` or `.odt` to `.md` file and returns intermediate source
5378
+ *
5379
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
5380
+ */
5381
+ $convert: function (source, options) {
5382
+ return __awaiter(this, void 0, void 0, function () {
5383
+ var _a, externalProgramsPaths, rootDirname, _b, cacheDirname, _c, isCacheCleaned, _d, isVerbose, extension, cacheFilehandler, command_1;
5384
+ return __generator(this, function (_e) {
5385
+ switch (_e.label) {
5386
+ case 0:
5387
+ _a = options.externalProgramsPaths, externalProgramsPaths = _a === void 0 ? {} : _a, rootDirname = options.rootDirname, _b = options.cacheDirname, cacheDirname = _b === void 0 ? SCRAPE_CACHE_DIRNAME : _b, _c = options.isCacheCleaned, isCacheCleaned = _c === void 0 ? false : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
5388
+ if (!$isRunningInNode()) {
5389
+ throw new KnowledgeScrapeError('Scraping .docx files is only supported in Node environment');
5390
+ }
5391
+ if (externalProgramsPaths.pandocPath === undefined) {
5392
+ throw new MissingToolsError('Pandoc is required for scraping .docx files');
5393
+ }
5394
+ if (source.filename === null) {
5395
+ // TODO: [🧠] Maybe save file as temporary
5396
+ throw new KnowledgeScrapeError('When parsing .docx file, it must be real file in the file system');
5397
+ }
5398
+ extension = getFileExtension(source.filename);
5399
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
5400
+ rootDirname: rootDirname,
5401
+ cacheDirname: cacheDirname,
5402
+ isCacheCleaned: isCacheCleaned,
5403
+ extension: 'md',
5404
+ isVerbose: isVerbose,
5405
+ })];
5406
+ case 1:
5407
+ cacheFilehandler = _e.sent();
5408
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5409
+ case 2:
5410
+ if (!!(_e.sent())) return [3 /*break*/, 5];
5411
+ command_1 = "\"".concat(externalProgramsPaths.pandocPath, "\" -f ").concat(extension, " -t markdown \"").concat(source.filename, "\" -o \"").concat(cacheFilehandler.filename, "\"");
5412
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
5413
+ return [4 /*yield*/, $execCommand(command_1)];
5414
+ case 3:
5415
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
5416
+ _e.sent();
5417
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5418
+ case 4:
5419
+ // Note: [0]
5420
+ if (!(_e.sent())) {
5421
+ throw new UnexpectedError(spaceTrim__default["default"](function (block) { return "\n File that was supposed to be created by Pandoc does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));
5422
+ }
5423
+ _e.label = 5;
5424
+ case 5: return [2 /*return*/, cacheFilehandler];
5425
+ }
5426
+ });
5427
+ });
5428
+ },
5429
+ /**
5430
+ * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
5431
+ */
5432
+ scrape: function (source, options) {
5433
+ return __awaiter(this, void 0, void 0, function () {
5434
+ var cacheFilehandler, markdownSource, knowledge;
5435
+ return __generator(this, function (_a) {
5436
+ switch (_a.label) {
5437
+ case 0: return [4 /*yield*/, documentScraper.$convert(source, options)];
5438
+ case 1:
5439
+ cacheFilehandler = _a.sent();
5440
+ markdownSource = {
5441
+ source: source.source,
5442
+ filename: cacheFilehandler.filename,
5443
+ url: null,
5444
+ mimeType: 'text/markdown',
5445
+ asText: function () {
5446
+ return __awaiter(this, void 0, void 0, function () {
5447
+ return __generator(this, function (_a) {
5448
+ switch (_a.label) {
5449
+ case 0: return [4 /*yield*/, promises.readFile(cacheFilehandler.filename, 'utf-8')];
5450
+ case 1:
5451
+ // Note: [0] In $convert we check that the file exists
5452
+ return [2 /*return*/, _a.sent()];
5453
+ }
5454
+ });
5455
+ });
5456
+ },
5457
+ asJson: function () {
5458
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
5459
+ },
5460
+ asBlob: function () {
5461
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asBlob`');
5462
+ },
5463
+ };
5464
+ knowledge = markdownScraper.scrape(markdownSource, options);
5465
+ return [4 /*yield*/, cacheFilehandler.destroy()];
5466
+ case 2:
5467
+ _a.sent();
5468
+ return [2 /*return*/, knowledge];
5469
+ }
5470
+ });
5471
+ });
5472
+ },
5473
+ } /* TODO: [🦷] as const */;
5474
+ /**
5475
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
5476
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5477
+ * TODO: [🪂] Do it in parallel 11:11
5478
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5479
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5480
+ */
5481
+
5482
+ /**
5483
+ * Scraper for .docx files
5484
+ *
5485
+ * @see `documentationUrl` for more details
5486
+ * @public exported from `@promptbook/core`
5487
+ */
5488
+ var legacyDocumentScraper = {
5489
+ /**
5490
+ * Mime types that this scraper can handle
5491
+ */
5492
+ mimeTypes: ['application/msword', 'text/rtf'],
5493
+ /**
5494
+ * Link to documentation
5495
+ */
5496
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5497
+ /**
5498
+ * Convert the `.doc` or `.rtf` to `.doc` file and returns intermediate source
5499
+ *
5500
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
5501
+ */
5502
+ $convert: function (source, options) {
5503
+ return __awaiter(this, void 0, void 0, function () {
5504
+ var _a, externalProgramsPaths, rootDirname, _b, cacheDirname, _c, isCacheCleaned, _d, isVerbose, extension, cacheFilehandler, documentSourceOutdirPathForLibreOffice_1, command_1, files_1, file;
5505
+ return __generator(this, function (_e) {
5506
+ switch (_e.label) {
5507
+ case 0:
5508
+ _a = options.externalProgramsPaths, externalProgramsPaths = _a === void 0 ? {} : _a, rootDirname = options.rootDirname, _b = options.cacheDirname, cacheDirname = _b === void 0 ? SCRAPE_CACHE_DIRNAME : _b, _c = options.isCacheCleaned, isCacheCleaned = _c === void 0 ? false : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
5509
+ if (!$isRunningInNode()) {
5510
+ throw new KnowledgeScrapeError('Scraping .doc files is only supported in Node environment');
5511
+ }
5512
+ if (externalProgramsPaths.libreOfficePath === undefined) {
5513
+ throw new MissingToolsError('LibreOffice is required for scraping .doc and .rtf files');
5514
+ }
5515
+ if (source.filename === null) {
5516
+ // TODO: [🧠] Maybe save file as temporary
5517
+ throw new KnowledgeScrapeError('When parsing .doc or .rtf file, it must be real file in the file system');
5518
+ }
5519
+ extension = getFileExtension(source.filename);
5520
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
5521
+ rootDirname: rootDirname,
5522
+ cacheDirname: cacheDirname,
5523
+ isCacheCleaned: isCacheCleaned,
5524
+ extension: 'docx',
5525
+ isVerbose: isVerbose,
5526
+ })];
5527
+ case 1:
5528
+ cacheFilehandler = _e.sent();
5529
+ if (isVerbose) {
5530
+ console.info("documentScraper: Converting .".concat(extension, " -> .docx"));
5531
+ }
5532
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5533
+ case 2:
5534
+ if (!!(_e.sent())) return [3 /*break*/, 8];
5535
+ documentSourceOutdirPathForLibreOffice_1 = path.join(path.dirname(cacheFilehandler.filename), 'libreoffice')
5536
+ .split('\\')
5537
+ .join('/');
5538
+ command_1 = "\"".concat(externalProgramsPaths.libreOfficePath, "\" --headless --convert-to docx \"").concat(source.filename, "\" --outdir \"").concat(documentSourceOutdirPathForLibreOffice_1, "\"");
5539
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
5540
+ return [4 /*yield*/, $execCommand(command_1)];
5541
+ case 3:
5542
+ // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
5543
+ _e.sent();
5544
+ return [4 /*yield*/, promises.readdir(documentSourceOutdirPathForLibreOffice_1)];
5545
+ case 4:
5546
+ files_1 = _e.sent();
5547
+ if (files_1.length !== 1) {
5548
+ throw new UnexpectedError(spaceTrim__default["default"](function (block) { return "\n Expected exactly 1 file in the LibreOffice output directory, got ".concat(files_1.length, "\n\n The temporary folder:\n ").concat(block(documentSourceOutdirPathForLibreOffice_1), "\n\n Command:\n > ").concat(block(command_1), "\n "); }));
5549
+ }
5550
+ file = files_1[0];
5551
+ return [4 /*yield*/, promises.rename(path.join(documentSourceOutdirPathForLibreOffice_1, file), cacheFilehandler.filename)];
5552
+ case 5:
5553
+ _e.sent();
5554
+ return [4 /*yield*/, promises.rmdir(documentSourceOutdirPathForLibreOffice_1)];
5555
+ case 6:
5556
+ _e.sent();
5557
+ return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5558
+ case 7:
5559
+ if (!(_e.sent())) {
5560
+ throw new UnexpectedError(spaceTrim__default["default"](function (block) { return "\n File that was supposed to be created by LibreOffice does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n The temporary folder:\n ").concat(block(documentSourceOutdirPathForLibreOffice_1), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));
5561
+ }
5562
+ _e.label = 8;
5563
+ case 8: return [2 /*return*/, cacheFilehandler];
5564
+ }
5565
+ });
5566
+ });
5567
+ },
5568
+ /**
5569
+ * Scrapes the `.doc` or `.rtf` file and returns the knowledge pieces or `null` if it can't scrape it
5570
+ */
5571
+ scrape: function (source, options) {
5572
+ return __awaiter(this, void 0, void 0, function () {
5573
+ var cacheFilehandler, markdownSource, knowledge;
5574
+ return __generator(this, function (_a) {
5575
+ switch (_a.label) {
5576
+ case 0: return [4 /*yield*/, legacyDocumentScraper.$convert(source, options)];
5577
+ case 1:
5578
+ cacheFilehandler = _a.sent();
5579
+ markdownSource = {
5580
+ source: source.source,
5581
+ filename: cacheFilehandler.filename,
5582
+ url: null,
5583
+ mimeType: 'text/markdown',
5584
+ asText: function () {
5585
+ throw new UnexpectedError('Did not expect that `documentScraper` would need to get the content `asText`');
5586
+ },
5587
+ asJson: function () {
5588
+ throw new UnexpectedError('Did not expect that `documentScraper` would need to get the content `asJson`');
5589
+ },
5590
+ asBlob: function () {
5591
+ throw new UnexpectedError('Did not expect that `documentScraper` would need to get the content `asBlob`');
5592
+ },
5593
+ };
5594
+ knowledge = documentScraper.scrape(markdownSource, options);
5595
+ return [4 /*yield*/, cacheFilehandler.destroy()];
5596
+ case 2:
5597
+ _a.sent();
5598
+ return [2 /*return*/, knowledge];
5599
+ }
5600
+ });
5601
+ });
5602
+ },
5603
+ } /* TODO: [🦷] as const */;
5604
+ /**
5605
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
5606
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5607
+ * TODO: [🪂] Do it in parallel 11:11
5608
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5609
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5610
+ */
5611
+
5612
+ /**
5613
+ * Scraper for .docx files
5614
+ *
5615
+ * @see `documentationUrl` for more details
5616
+ * @public exported from `@promptbook/core`
5617
+ */
5618
+ var pdfScraper = {
5619
+ /**
5620
+ * Mime types that this scraper can handle
5621
+ */
5622
+ mimeTypes: ['application/pdf'],
5623
+ /**
5624
+ * Link to documentation
5625
+ */
5626
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5627
+ /**
5628
+ * Converts the `.pdf` file to `.md` file and returns intermediate source
5629
+ */
5630
+ $convert: function (source, options) {
5631
+ return __awaiter(this, void 0, void 0, function () {
5632
+ return __generator(this, function (_a) {
5633
+ TODO_USE(source);
5634
+ TODO_USE(options);
5635
+ throw new NotYetImplementedError('PDF conversion not yet implemented');
5636
+ });
5637
+ });
5638
+ },
5639
+ /**
5640
+ * Scrapes the `.pdf` file and returns the knowledge pieces or `null` if it can't scrape it
5641
+ */
5642
+ scrape: function (source, options) {
5643
+ return __awaiter(this, void 0, void 0, function () {
5644
+ return __generator(this, function (_a) {
5645
+ TODO_USE(source);
5646
+ TODO_USE(options);
5647
+ /*
5648
+ const {
5649
+ externalProgramsPaths = {},
5650
+ cacheDirname = SCRAPE_CACHE_DIRNAME,
5651
+ isCacheCleaned = false,
5652
+ isVerbose = IS_VERBOSE,
5653
+ } = options;
5654
+ */
5655
+ throw new NotYetImplementedError('PDF scraping not yet implemented');
5656
+ });
5657
+ });
5658
+ },
5659
+ } /* TODO: [🦷] as const */;
5660
+ /**
5661
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
5662
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5663
+ * TODO: [🪂] Do it in parallel 11:11
5664
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5665
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
5666
+ */
5667
+
5668
+ /**
5669
+ * A converter instance that uses showdown and highlight extensions
5670
+ *
5671
+ * @type {Converter}
5672
+ * @private for markdown and html knowledge scrapers
5673
+ */
5674
+ var markdownConverter = new showdown.Converter({
5675
+ flavor: 'github', // <- TODO: !!!!!! Explicitly specify the flavor of promptbook markdown
5676
+ /*
5677
+ > import showdownHighlight from 'showdown-highlight';
5678
+ > extensions: [
5679
+ > showdownHighlight({
5680
+ > // Whether to add the classes to the <pre> tag, default is false
5681
+ > pre: true,
5682
+ > // Whether to use hljs' auto language detection, default is true
5683
+ > auto_detection: true,
5684
+ > }),
5685
+ > ],
5686
+ */
5687
+ });
5688
+ /**
5689
+ * TODO: !!!!!! Figure out better name not to confuse with `Converter`
5690
+ * TODO: !!!!!! Lazy-make converter
5691
+ */
5692
+
5693
+ /**
5694
+ * Scraper for .docx files
5695
+ *
5696
+ * @see `documentationUrl` for more details
5697
+ * @public exported from `@promptbook/core`
5698
+ */
5699
+ var websiteScraper = {
5700
+ /**
5701
+ * Mime types that this scraper can handle
5702
+ */
5703
+ mimeTypes: ['text/html'],
5704
+ /**
5705
+ * Link to documentation
5706
+ */
5707
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5708
+ /**
5709
+ * Convert the website to `.md` file and returns intermediate source
5710
+ *
5711
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
5712
+ */
5713
+ $convert: function (source, options) {
5714
+ return __awaiter(this, void 0, void 0, function () {
5715
+ var
5716
+ // TODO: [🧠] Maybe in node use headless browser not just JSDOM
5717
+ // externalProgramsPaths = {},
5718
+ rootDirname, _a, cacheDirname, _b, isCacheCleaned, _c, isVerbose, jsdom$1, _d, reader, article, html, i, cacheFilehandler, markdown;
5719
+ return __generator(this, function (_e) {
5720
+ switch (_e.label) {
5721
+ case 0:
5722
+ rootDirname = options.rootDirname, _a = options.cacheDirname, cacheDirname = _a === void 0 ? SCRAPE_CACHE_DIRNAME : _a, _b = options.isCacheCleaned, isCacheCleaned = _b === void 0 ? false : _b, _c = options.isVerbose, isVerbose = _c === void 0 ? IS_VERBOSE : _c;
5723
+ // TODO: !!!!!! Does this work in browser? Make it work.
5724
+ if (source.url === null) {
5725
+ throw new KnowledgeScrapeError('Website scraper requires URL');
5726
+ }
5727
+ _d = jsdom.JSDOM.bind;
5728
+ return [4 /*yield*/, source.asText()];
5729
+ case 1:
5730
+ jsdom$1 = new (_d.apply(jsdom.JSDOM, [void 0, _e.sent(), {
5731
+ url: source.url,
5732
+ }]))();
5733
+ reader = new readability.Readability(jsdom$1.window.document);
5734
+ article = reader.parse();
5735
+ console.log(article);
5736
+ return [4 /*yield*/, waitasecond.forTime(10000)];
5737
+ case 2:
5738
+ _e.sent();
5739
+ html = (article === null || article === void 0 ? void 0 : article.content) || (article === null || article === void 0 ? void 0 : article.textContent) || jsdom$1.window.document.body.innerHTML;
5740
+ // Note: Unwrap html such as it is convertable by `markdownConverter`
5741
+ for (i = 0; i < 2; i++) {
5742
+ html = html.replace(/<div\s*(?:id="readability-page-\d+"\s+class="page")?>(.*)<\/div>/is, '$1');
5743
+ }
5744
+ if (html.includes('<div')) {
5745
+ html = (article === null || article === void 0 ? void 0 : article.textContent) || '';
5746
+ }
5747
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
5748
+ rootDirname: rootDirname,
5749
+ cacheDirname: cacheDirname,
5750
+ isCacheCleaned: isCacheCleaned,
5751
+ extension: 'html',
5752
+ isVerbose: isVerbose,
5753
+ })];
5754
+ case 3:
5755
+ cacheFilehandler = _e.sent();
5756
+ return [4 /*yield*/, promises.writeFile(cacheFilehandler.filename, html, 'utf-8')];
5757
+ case 4:
5758
+ _e.sent();
5759
+ markdown = markdownConverter.makeMarkdown(html, jsdom$1.window.document);
5760
+ return [2 /*return*/, __assign(__assign({}, cacheFilehandler), { markdown: markdown })];
5761
+ }
5762
+ });
5763
+ });
5764
+ },
5765
+ /**
5766
+ * Scrapes the website and returns the knowledge pieces or `null` if it can't scrape it
5767
+ */
5768
+ scrape: function (source, options) {
5769
+ return __awaiter(this, void 0, void 0, function () {
5770
+ var cacheFilehandler, markdownSource, knowledge;
5771
+ return __generator(this, function (_a) {
5772
+ switch (_a.label) {
5773
+ case 0: return [4 /*yield*/, websiteScraper.$convert(source, options)];
5774
+ case 1:
5775
+ cacheFilehandler = _a.sent();
5776
+ markdownSource = {
5777
+ source: source.source,
5778
+ filename: cacheFilehandler.filename,
5779
+ url: null,
5780
+ mimeType: 'text/markdown',
5781
+ asText: function () {
5782
+ return cacheFilehandler.markdown;
5783
+ },
5784
+ asJson: function () {
5785
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
5786
+ },
5787
+ asBlob: function () {
5788
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asBlob`');
5789
+ },
5790
+ };
5791
+ knowledge = markdownScraper.scrape(markdownSource, options);
5792
+ return [4 /*yield*/, cacheFilehandler.destroy()];
5793
+ case 2:
5794
+ _a.sent();
5795
+ return [2 /*return*/, knowledge];
5796
+ }
5797
+ });
5798
+ });
5799
+ },
5800
+ } /* TODO: [🦷] as const */;
4775
5801
  /**
4776
- * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5802
+ * TODO: !!!!!! Put into separate package
5803
+ * TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
5804
+ * TODO: [🦖] Make some system for putting scrapers to separete packages
5805
+ * TODO: [🪂] Do it in parallel 11:11
5806
+ * TODO: [🦷] Ideally use `as const satisfies Converter & Scraper` BUT this combination throws errors
5807
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
4777
5808
  */
4778
5809
 
5810
+ // TODO: [🦖] !!!!!! Pass scrapers as dependency,
4779
5811
  /**
4780
- * Creates executor function from pipeline and execution tools.
5812
+ * @@@
4781
5813
  *
4782
- * @returns The executor function
4783
- * @throws {PipelineLogicError} on logical error in the pipeline
4784
- * @public exported from `@promptbook/core`
5814
+ * @private because this will be replaced by a system of one scraper per package [🦖]
5815
+ * TODO: [🦖] System for scrapers NOT public exported from `@promptbook/core`
4785
5816
  */
4786
- function createPipelineExecutor(options) {
4787
- var _this = this;
4788
- var pipeline = options.pipeline, tools = options.tools, _a = options.settings, settings = _a === void 0 ? {} : _a;
4789
- var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f;
4790
- validatePipeline(pipeline);
4791
- var pipelineIdentification = (function () {
4792
- // Note: This is a 😐 implementation of [🚞]
4793
- var _ = [];
4794
- if (pipeline.sourceFile !== undefined) {
4795
- _.push("File: ".concat(pipeline.sourceFile));
4796
- }
4797
- if (pipeline.pipelineUrl !== undefined) {
4798
- _.push("Url: ".concat(pipeline.pipelineUrl));
4799
- }
4800
- return _.join('\n');
4801
- })();
4802
- var preparedPipeline;
4803
- if (isPipelinePrepared(pipeline)) {
4804
- preparedPipeline = pipeline;
4805
- }
4806
- else if (isNotPreparedWarningSupressed !== true) {
4807
- console.warn(spaceTrim.spaceTrim(function (block) { return "\n Pipeline is not prepared\n\n ".concat(block(pipelineIdentification), "\n\n It will be prepared ad-hoc before the first execution and **returned as `preparedPipeline` in `PipelineExecutorResult`**\n But it is recommended to prepare the pipeline during collection preparation\n\n @see more at https://ptbk.io/prepare-pipeline\n "); }));
4808
- }
4809
- var runCount = 0;
4810
- var pipelineExecutor = function (inputParameters, onProgress) { return __awaiter(_this, void 0, void 0, function () {
4811
- return __generator(this, function (_a) {
4812
- runCount++;
4813
- return [2 /*return*/, /* not await */ executePipeline({
4814
- pipeline: pipeline,
4815
- preparedPipeline: preparedPipeline,
4816
- setPreparedPipeline: function (newPreparedPipeline) {
4817
- preparedPipeline = newPreparedPipeline;
4818
- },
4819
- inputParameters: inputParameters,
4820
- tools: tools,
4821
- onProgress: onProgress,
4822
- pipelineIdentification: spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
4823
- settings: {
4824
- maxExecutionAttempts: maxExecutionAttempts,
4825
- maxParallelCount: maxParallelCount,
4826
- csvSettings: csvSettings,
4827
- isVerbose: isVerbose,
4828
- isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
4829
- },
4830
- })];
4831
- });
4832
- }); };
4833
- return pipelineExecutor;
5817
+ var SCRAPERS = [
5818
+ markdownScraper,
5819
+ documentScraper,
5820
+ legacyDocumentScraper,
5821
+ pdfScraper,
5822
+ websiteScraper,
5823
+ // <- Note: [♓️] This is the order of the scrapers for knowledge, BUT consider some better (more explicit) way to do this
5824
+ ];
5825
+
5826
+ /**
5827
+ * Creates unique name for the source
5828
+ *
5829
+ * @private within the repository
5830
+ */
5831
+ function sourceContentToName(sourceContent) {
5832
+ // TODO: !!!!!! Better name for source than gibberish hash
5833
+ var hash = cryptoJs.SHA256(hexEncoder__default["default"].parse(JSON.stringify(sourceContent)))
5834
+ // <- TODO: [🥬] Encapsulate sha256 to some private utility function
5835
+ .toString( /* hex */)
5836
+ .substring(0, 20);
5837
+ // <- TODO: [🥬] Make some system for hashes and ids of promptbook
5838
+ var semanticName = normalizeToKebabCase(sourceContent.substring(0, 20));
5839
+ var pieces = ['source', semanticName, hash].filter(function (piece) { return piece !== ''; });
5840
+ var name = pieces.join('-').split('--').join('-');
5841
+ // <- TODO: Use MAX_FILENAME_LENGTH
5842
+ return name;
4834
5843
  }
4835
5844
  /**
4836
- * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5845
+ * TODO: [🐱‍🐉][🧠] Make some smart crop NOT source-i-m-pavol-a-develop-... BUT source-i-m-pavol-a-developer-...
5846
+ */
5847
+
5848
+ /**
5849
+ * Convert file extension to mime type
5850
+ *
5851
+ * @private within the repository
4837
5852
  */
5853
+ function extensionToMimeType(value) {
5854
+ return mimeTypes.lookup(value) || 'application/octet-stream';
5855
+ }
4838
5856
 
4839
5857
  /**
4840
5858
  * @@@
4841
5859
  *
4842
- * @public exported from `@promptbook/core`
5860
+ * @private for scraper utilities
4843
5861
  */
4844
- function prepareKnowledgeFromMarkdown(knowledgeContent /* <- TODO: [🖖] (?maybe not) Always the file */, options) {
5862
+ function makeKnowledgeSourceHandler(knowledgeSource, options) {
5863
+ var _a;
4845
5864
  return __awaiter(this, void 0, void 0, function () {
4846
- var llmTools, _a, maxParallelCount, _b, isVerbose, collection, prepareKnowledgeFromMarkdownExecutor, _c, prepareTitleExecutor, _d, prepareKeywordsExecutor, _e, result, outputParameters, knowledgePiecesRaw, knowledgeTextPieces, knowledge;
4847
- var _f, _g, _h;
4848
- var _this = this;
4849
- return __generator(this, function (_j) {
4850
- switch (_j.label) {
5865
+ var sourceContent, name, _b, _c, rootDirname, _d, isVerbose, url, response_1, mimeType, filename_1, fileExtension, mimeType_1;
5866
+ return __generator(this, function (_e) {
5867
+ switch (_e.label) {
4851
5868
  case 0:
4852
- llmTools = options.llmTools, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
4853
- TODO_USE(maxParallelCount); // <- [🪂]
4854
- collection = createCollectionFromJson.apply(void 0, __spreadArray([], __read(PipelineCollection), false));
4855
- _c = createPipelineExecutor;
4856
- _f = {};
4857
- return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md')];
5869
+ sourceContent = knowledgeSource.sourceContent;
5870
+ name = knowledgeSource.name;
5871
+ _b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
5872
+ TODO_USE(isVerbose);
5873
+ if (!name) {
5874
+ name = sourceContentToName(sourceContent);
5875
+ }
5876
+ if (!isValidUrl(sourceContent)) return [3 /*break*/, 2];
5877
+ url = sourceContent;
5878
+ return [4 /*yield*/, fetch(url)];
4858
5879
  case 1:
4859
- prepareKnowledgeFromMarkdownExecutor = _c.apply(void 0, [(_f.pipeline = _j.sent(),
4860
- _f.tools = {
4861
- llm: llmTools,
5880
+ response_1 = _e.sent();
5881
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5882
+ return [2 /*return*/, {
5883
+ source: name,
5884
+ filename: null,
5885
+ url: url,
5886
+ mimeType: mimeType,
5887
+ asBlob: function () {
5888
+ return __awaiter(this, void 0, void 0, function () {
5889
+ var content;
5890
+ return __generator(this, function (_a) {
5891
+ switch (_a.label) {
5892
+ case 0: return [4 /*yield*/, response_1.blob()];
5893
+ case 1:
5894
+ content = _a.sent();
5895
+ return [2 /*return*/, content];
5896
+ }
5897
+ });
5898
+ });
4862
5899
  },
4863
- _f)]);
4864
- _d = createPipelineExecutor;
4865
- _g = {};
4866
- return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md')];
4867
- case 2:
4868
- prepareTitleExecutor = _d.apply(void 0, [(_g.pipeline = _j.sent(),
4869
- _g.tools = {
4870
- llm: llmTools,
5900
+ asJson: function () {
5901
+ return __awaiter(this, void 0, void 0, function () {
5902
+ var content;
5903
+ return __generator(this, function (_a) {
5904
+ switch (_a.label) {
5905
+ case 0: return [4 /*yield*/, response_1.json()];
5906
+ case 1:
5907
+ content = _a.sent();
5908
+ return [2 /*return*/, content];
5909
+ }
5910
+ });
5911
+ });
4871
5912
  },
4872
- _g)]);
4873
- _e = createPipelineExecutor;
4874
- _h = {};
4875
- return [4 /*yield*/, collection.getPipelineByUrl('https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md')];
4876
- case 3:
4877
- prepareKeywordsExecutor = _e.apply(void 0, [(_h.pipeline = _j.sent(),
4878
- _h.tools = {
4879
- llm: llmTools,
5913
+ asText: function () {
5914
+ return __awaiter(this, void 0, void 0, function () {
5915
+ var content;
5916
+ return __generator(this, function (_a) {
5917
+ switch (_a.label) {
5918
+ case 0: return [4 /*yield*/, response_1.text()];
5919
+ case 1:
5920
+ content = _a.sent();
5921
+ return [2 /*return*/, content];
5922
+ }
5923
+ });
5924
+ });
4880
5925
  },
4881
- _h)]);
4882
- return [4 /*yield*/, prepareKnowledgeFromMarkdownExecutor({ knowledgeContent: knowledgeContent })];
4883
- case 4:
4884
- result = _j.sent();
4885
- assertsExecutionSuccessful(result);
4886
- outputParameters = result.outputParameters;
4887
- knowledgePiecesRaw = outputParameters.knowledgePieces;
4888
- knowledgeTextPieces = (knowledgePiecesRaw || '').split('\n---\n');
4889
- // <- TODO: [main] !!!!! Smarter split and filter out empty pieces
4890
- if (isVerbose) {
4891
- console.info('knowledgeTextPieces:', knowledgeTextPieces);
5926
+ }];
5927
+ case 2:
5928
+ if (!(isValidFilePath(sourceContent) || /\.[a-z]{1,10}$/i.exec(sourceContent))) return [3 /*break*/, 4];
5929
+ if (!$isRunningInNode()) {
5930
+ throw new EnvironmentMismatchError('Importing knowledge source file works only in Node.js environment');
4892
5931
  }
4893
- return [4 /*yield*/, Promise.all(
4894
- // TODO: [🪂] !! Do not send all at once but in chunks
4895
- knowledgeTextPieces.map(function (knowledgeTextPiece, i) { return __awaiter(_this, void 0, void 0, function () {
4896
- var name, title, knowledgePieceContent, keywords, index, titleResult, _a, titleRaw, keywordsResult, _b, keywordsRaw, embeddingResult, error_1;
4897
- return __generator(this, function (_c) {
4898
- switch (_c.label) {
4899
- case 0:
4900
- name = "piece-".concat(i);
4901
- title = spaceTrim__default["default"](knowledgeTextPiece.substring(0, 100));
4902
- knowledgePieceContent = spaceTrim__default["default"](knowledgeTextPiece);
4903
- keywords = [];
4904
- index = [];
4905
- _c.label = 1;
4906
- case 1:
4907
- _c.trys.push([1, 7, , 8]);
4908
- return [4 /*yield*/, prepareTitleExecutor({ knowledgePieceContent: knowledgePieceContent })];
4909
- case 2:
4910
- titleResult = _c.sent();
4911
- _a = titleResult.outputParameters.title, titleRaw = _a === void 0 ? 'Untitled' : _a;
4912
- title = spaceTrim__default["default"](titleRaw) /* <- TODO: Maybe do in pipeline */;
4913
- name = titleToName(title);
4914
- return [4 /*yield*/, prepareKeywordsExecutor({ knowledgePieceContent: knowledgePieceContent })];
4915
- case 3:
4916
- keywordsResult = _c.sent();
4917
- _b = keywordsResult.outputParameters.keywords, keywordsRaw = _b === void 0 ? '' : _b;
4918
- keywords = (keywordsRaw || '')
4919
- .split(',')
4920
- .map(function (keyword) { return keyword.trim(); })
4921
- .filter(function (keyword) { return keyword !== ''; });
4922
- if (isVerbose) {
4923
- console.info("Keywords for \"".concat(title, "\":"), keywords);
5932
+ if (rootDirname === null) {
5933
+ throw new EnvironmentMismatchError('Can not import file knowledge in non-file pipeline');
5934
+ // <- TODO: [🧠] What is the best error type here`
5935
+ }
5936
+ filename_1 = path.join(rootDirname, sourceContent).split('\\').join('/');
5937
+ fileExtension = getFileExtension(filename_1);
5938
+ mimeType_1 = extensionToMimeType(fileExtension || '');
5939
+ return [4 /*yield*/, $isFileExisting(filename_1)];
5940
+ case 3:
5941
+ if (!(_e.sent())) {
5942
+ throw new NotFoundError(spaceTrim__default["default"](function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(filename_1), "\n "); }));
5943
+ }
5944
+ // TODO: !!!!!! Test security file - file is scoped to the project (maybe do this in `filesystemTools`)
5945
+ return [2 /*return*/, {
5946
+ source: name,
5947
+ filename: filename_1,
5948
+ url: null,
5949
+ mimeType: mimeType_1,
5950
+ asBlob: function () {
5951
+ return __awaiter(this, void 0, void 0, function () {
5952
+ var content;
5953
+ return __generator(this, function (_a) {
5954
+ switch (_a.label) {
5955
+ case 0: return [4 /*yield*/, promises.readFile(filename_1)];
5956
+ case 1:
5957
+ content = _a.sent();
5958
+ // <- Note: Its OK to use sync in tooling for tests
5959
+ return [2 /*return*/, new Blob([
5960
+ content,
5961
+ // <- TODO: !!!!!! Maybe not working
5962
+ ], { type: mimeType_1 })];
4924
5963
  }
4925
- if (!!llmTools.callEmbeddingModel) return [3 /*break*/, 4];
4926
- // TODO: [🟥] Detect browser / node and make it colorfull
4927
- console.error('No callEmbeddingModel function provided');
4928
- return [3 /*break*/, 6];
4929
- case 4: return [4 /*yield*/, llmTools.callEmbeddingModel({
4930
- title: "Embedding for ".concat(title) /* <- Note: No impact on embedding result itself, just for logging */,
4931
- parameters: {},
4932
- content: knowledgePieceContent,
4933
- modelRequirements: {
4934
- modelVariant: 'EMBEDDING',
4935
- },
4936
- })];
4937
- case 5:
4938
- embeddingResult = _c.sent();
4939
- index.push({
4940
- modelName: embeddingResult.modelName,
4941
- position: embeddingResult.content,
4942
- });
4943
- _c.label = 6;
4944
- case 6: return [3 /*break*/, 8];
4945
- case 7:
4946
- error_1 = _c.sent();
4947
- // Note: Here is expected error:
4948
- // > PipelineExecutionError: You have not provided any `LlmExecutionTools` that support model variant "EMBEDDING
4949
- if (!(error_1 instanceof PipelineExecutionError)) {
4950
- throw error_1;
5964
+ });
5965
+ });
5966
+ },
5967
+ asJson: function () {
5968
+ return __awaiter(this, void 0, void 0, function () {
5969
+ var _a, _b;
5970
+ return __generator(this, function (_c) {
5971
+ switch (_c.label) {
5972
+ case 0:
5973
+ _b = (_a = JSON).parse;
5974
+ return [4 /*yield*/, promises.readFile(filename_1, 'utf-8')];
5975
+ case 1: return [2 /*return*/, _b.apply(_a, [_c.sent()])];
4951
5976
  }
4952
- // TODO: [🟥] Detect browser / node and make it colorfull
4953
- console.error(error_1, "<- Note: This error is not critical to prepare the pipeline, just knowledge pieces won't have embeddings");
4954
- return [3 /*break*/, 8];
4955
- case 8: return [2 /*return*/, {
4956
- name: name,
4957
- title: title,
4958
- content: knowledgePieceContent,
4959
- keywords: keywords,
4960
- index: index,
4961
- // <- TODO: [☀] sources,
4962
- }];
4963
- }
4964
- });
4965
- }); }))];
4966
- case 5:
4967
- knowledge = _j.sent();
4968
- return [2 /*return*/, knowledge];
5977
+ });
5978
+ });
5979
+ },
5980
+ asText: function () {
5981
+ return __awaiter(this, void 0, void 0, function () {
5982
+ return __generator(this, function (_a) {
5983
+ switch (_a.label) {
5984
+ case 0: return [4 /*yield*/, promises.readFile(filename_1, 'utf-8')];
5985
+ case 1: return [2 /*return*/, _a.sent()];
5986
+ }
5987
+ });
5988
+ });
5989
+ },
5990
+ }];
5991
+ case 4: return [2 /*return*/, {
5992
+ source: name,
5993
+ filename: null,
5994
+ url: null,
5995
+ mimeType: 'text/markdown',
5996
+ asText: function () {
5997
+ return knowledgeSource.sourceContent;
5998
+ },
5999
+ asJson: function () {
6000
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
6001
+ },
6002
+ asBlob: function () {
6003
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asBlob`');
6004
+ },
6005
+ }];
4969
6006
  }
4970
6007
  });
4971
6008
  });
4972
6009
  }
4973
- /**
4974
- * TODO: [🐝][🔼][main] !!! Export via `@promptbook/markdown`
4975
- * TODO: [🪂] Do it in parallel 11:11
4976
- * Note: No need to aggregate usage here, it is done by intercepting the llmTools
4977
- */
4978
6010
 
4979
6011
  /**
4980
6012
  * Prepares the knowle
@@ -4984,21 +6016,64 @@
4984
6016
  */
4985
6017
  function prepareKnowledgePieces(knowledgeSources, options) {
4986
6018
  return __awaiter(this, void 0, void 0, function () {
4987
- var _a, maxParallelCount, knowledgePrepared;
6019
+ var _a, maxParallelCount, rootDirname, _b, isVerbose, knowledgePreparedUnflatten, knowledgePrepared;
4988
6020
  var _this = this;
4989
- return __generator(this, function (_b) {
4990
- switch (_b.label) {
6021
+ return __generator(this, function (_c) {
6022
+ switch (_c.label) {
4991
6023
  case 0:
4992
- _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a;
4993
- knowledgePrepared = [];
4994
- return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource) { return __awaiter(_this, void 0, void 0, function () {
4995
- var partialPieces, pieces;
4996
- return __generator(this, function (_a) {
4997
- switch (_a.label) {
4998
- case 0: return [4 /*yield*/, prepareKnowledgeFromMarkdown(knowledgeSource.sourceContent, // <- TODO: [🐝][main] !!! Unhardcode markdown, detect which type it is - BE AWARE of big package size
4999
- options)];
6024
+ _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
6025
+ knowledgePreparedUnflatten = new Array(knowledgeSources.length);
6026
+ return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
6027
+ var partialPieces, sourceHandler, SCRAPERS_1, SCRAPERS_1_1, scraper, partialPiecesUnchecked, e_1_1, pieces;
6028
+ var e_1, _a;
6029
+ return __generator(this, function (_b) {
6030
+ switch (_b.label) {
6031
+ case 0:
6032
+ partialPieces = null;
6033
+ return [4 /*yield*/, makeKnowledgeSourceHandler(knowledgeSource, { rootDirname: rootDirname, isVerbose: isVerbose })];
5000
6034
  case 1:
5001
- partialPieces = _a.sent();
6035
+ sourceHandler = _b.sent();
6036
+ _b.label = 2;
6037
+ case 2:
6038
+ _b.trys.push([2, 7, 8, 9]);
6039
+ SCRAPERS_1 = __values(SCRAPERS), SCRAPERS_1_1 = SCRAPERS_1.next();
6040
+ _b.label = 3;
6041
+ case 3:
6042
+ if (!!SCRAPERS_1_1.done) return [3 /*break*/, 6];
6043
+ scraper = SCRAPERS_1_1.value;
6044
+ if (!scraper.mimeTypes.includes(sourceHandler.mimeType)
6045
+ // <- TODO: [🦔] Implement mime-type wildcards
6046
+ ) {
6047
+ return [3 /*break*/, 5];
6048
+ }
6049
+ return [4 /*yield*/, scraper.scrape(sourceHandler, options)];
6050
+ case 4:
6051
+ partialPiecesUnchecked = _b.sent();
6052
+ if (partialPiecesUnchecked !== null) {
6053
+ partialPieces = partialPiecesUnchecked;
6054
+ return [3 /*break*/, 6];
6055
+ }
6056
+ _b.label = 5;
6057
+ case 5:
6058
+ SCRAPERS_1_1 = SCRAPERS_1.next();
6059
+ return [3 /*break*/, 3];
6060
+ case 6: return [3 /*break*/, 9];
6061
+ case 7:
6062
+ e_1_1 = _b.sent();
6063
+ e_1 = { error: e_1_1 };
6064
+ return [3 /*break*/, 9];
6065
+ case 8:
6066
+ try {
6067
+ if (SCRAPERS_1_1 && !SCRAPERS_1_1.done && (_a = SCRAPERS_1.return)) _a.call(SCRAPERS_1);
6068
+ }
6069
+ finally { if (e_1) throw e_1.error; }
6070
+ return [7 /*endfinally*/];
6071
+ case 9:
6072
+ if (partialPieces === null) {
6073
+ throw new KnowledgeScrapeError(spaceTrim__default["default"](function (block) { return "\n Cannot scrape knowledge from source: ".concat(knowledgeSource.sourceContent, "\n\n No scraper found for the mime type \"").concat(sourceHandler.mimeType, "\"\n\n Available scrapers:\n ").concat(block(SCRAPERS.flatMap(function (scraper) { return scraper.mimeTypes; })
6074
+ .map(function (mimeType) { return "- ".concat(mimeType); })
6075
+ .join('\n')), "\n\n\n "); }));
6076
+ }
5002
6077
  pieces = partialPieces.map(function (partialPiece) { return (__assign(__assign({}, partialPiece), { sources: [
5003
6078
  {
5004
6079
  name: knowledgeSource.name,
@@ -5006,13 +6081,14 @@
5006
6081
  // <- TODO: [❎]
5007
6082
  },
5008
6083
  ] })); });
5009
- knowledgePrepared.push.apply(knowledgePrepared, __spreadArray([], __read(pieces), false));
6084
+ knowledgePreparedUnflatten[index] = pieces;
5010
6085
  return [2 /*return*/];
5011
6086
  }
5012
6087
  });
5013
6088
  }); })];
5014
6089
  case 1:
5015
- _b.sent();
6090
+ _c.sent();
6091
+ knowledgePrepared = knowledgePreparedUnflatten.flat();
5016
6092
  return [2 /*return*/, knowledgePrepared];
5017
6093
  }
5018
6094
  });
@@ -5029,7 +6105,7 @@
5029
6105
  >
5030
6106
  > export async function prepareKnowledgePieces(
5031
6107
  > knowledge: PrepareKnowledgeKnowledge,
5032
- > options: PrepareOptions,
6108
+ > options: PrepareAndScrapeOptions,
5033
6109
  > ):
5034
6110
  */
5035
6111
  /**
@@ -5142,6 +6218,9 @@
5142
6218
  switch (_d.label) {
5143
6219
  case 0:
5144
6220
  llmTools = options.llmTools, _a = options.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
6221
+ if (llmTools === undefined) {
6222
+ throw new MissingToolsError('LLM tools are required for preparing persona');
6223
+ }
5145
6224
  collection = createCollectionFromJson.apply(void 0, __spreadArray([], __read(PipelineCollection), false));
5146
6225
  _b = createPipelineExecutor;
5147
6226
  _c = {};
@@ -5285,7 +6364,7 @@
5285
6364
  */
5286
6365
  function preparePipeline(pipeline, options) {
5287
6366
  return __awaiter(this, void 0, void 0, function () {
5288
- var llmTools, _a, maxParallelCount, _b, isVerbose, parameters, templates,
6367
+ var llmTools, rootDirname, _a, maxParallelCount, _b, isVerbose, parameters, templates,
5289
6368
  /*
5290
6369
  <- TODO: [🧠][🪑] `promptbookVersion` */
5291
6370
  knowledgeSources /*
@@ -5298,8 +6377,11 @@
5298
6377
  if (isPipelinePrepared(pipeline)) {
5299
6378
  return [2 /*return*/, pipeline];
5300
6379
  }
5301
- llmTools = options.llmTools, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
6380
+ llmTools = options.llmTools, rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
5302
6381
  parameters = pipeline.parameters, templates = pipeline.templates, knowledgeSources = pipeline.knowledgeSources, personas = pipeline.personas;
6382
+ if (llmTools === undefined) {
6383
+ throw new MissingToolsError('LLM tools are required for preparing the pipeline');
6384
+ }
5303
6385
  llmToolsWithUsage = countTotalUsage(llmTools);
5304
6386
  currentPreparation = {
5305
6387
  id: 1,
@@ -5321,6 +6403,7 @@
5321
6403
  switch (_a.label) {
5322
6404
  case 0: return [4 /*yield*/, preparePersona(persona.description, {
5323
6405
  llmTools: llmToolsWithUsage,
6406
+ rootDirname: rootDirname,
5324
6407
  maxParallelCount: maxParallelCount /* <- TODO: [🪂] */,
5325
6408
  isVerbose: isVerbose,
5326
6409
  })];
@@ -5335,11 +6418,7 @@
5335
6418
  case 1:
5336
6419
  _c.sent();
5337
6420
  knowledgeSourcesPrepared = knowledgeSources.map(function (source) { return (__assign(__assign({}, source), { preparationIds: [/* TODO: [🧊] -> */ currentPreparation.id] })); });
5338
- return [4 /*yield*/, prepareKnowledgePieces(knowledgeSources /* <- TODO: [🧊] {knowledgeSources, knowledgePieces} */, {
5339
- llmTools: llmToolsWithUsage,
5340
- maxParallelCount: maxParallelCount /* <- TODO: [🪂] */,
5341
- isVerbose: isVerbose,
5342
- })];
6421
+ return [4 /*yield*/, prepareKnowledgePieces(knowledgeSources /* <- TODO: [🧊] {knowledgeSources, knowledgePieces} */, __assign(__assign({}, options), { llmTools: llmToolsWithUsage, rootDirname: rootDirname, maxParallelCount: maxParallelCount /* <- TODO: [🪂] */, isVerbose: isVerbose }))];
5343
6422
  case 2:
5344
6423
  partialknowledgePiecesPrepared = _c.sent();
5345
6424
  knowledgePiecesPrepared = partialknowledgePiecesPrepared.map(function (piece) { return (__assign(__assign({}, piece), { preparationIds: [/* TODO: [🧊] -> */ currentPreparation.id] })); });
@@ -5349,6 +6428,7 @@
5349
6428
  knowledgePiecesCount: knowledgePiecesPrepared.length,
5350
6429
  }, {
5351
6430
  llmTools: llmToolsWithUsage,
6431
+ rootDirname: rootDirname,
5352
6432
  maxParallelCount: maxParallelCount /* <- TODO: [🪂] */,
5353
6433
  isVerbose: isVerbose,
5354
6434
  })];
@@ -5371,36 +6451,10 @@
5371
6451
  * @see https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency#specify-the-desired-output-format
5372
6452
  */
5373
6453
 
5374
- /**
5375
- * Tests if given string is valid URL.
5376
- *
5377
- * Note: This does not check if the file exists only if the path is valid
5378
- * @public exported from `@promptbook/utils`
5379
- */
5380
- function isValidFilePath(filePath) {
5381
- if (typeof filePath !== 'string') {
5382
- return false;
5383
- }
5384
- var filePathSlashes = filePath.split('\\').join('/');
5385
- // Absolute Unix path: /hello.txt
5386
- if (/^(\/)/i.test(filePathSlashes)) {
5387
- return true;
5388
- }
5389
- // Absolute Windows path: /hello.txt
5390
- if (/^([A-Z]{1,2}:\/?)\//i.test(filePathSlashes)) {
5391
- return true;
5392
- }
5393
- // Relative path: ./hello.txt
5394
- if (/^(\.\.?\/)+/i.test(filePathSlashes)) {
5395
- return true;
5396
- }
5397
- return false;
5398
- }
5399
-
5400
6454
  /**
5401
6455
  * Parses the knowledge command
5402
6456
  *
5403
- * @see ./KNOWLEDGE-README.md for more details
6457
+ * @see `documentationUrl` for more details
5404
6458
  * @private within the commands folder
5405
6459
  */
5406
6460
  var knowledgeCommandParser = {
@@ -5418,7 +6472,7 @@
5418
6472
  */
5419
6473
  description: "Tells promptbook which external knowledge to use",
5420
6474
  /**
5421
- * Link to discussion
6475
+ * Link to documentation
5422
6476
  */
5423
6477
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/41',
5424
6478
  /**
@@ -5462,11 +6516,8 @@
5462
6516
  */
5463
6517
  $applyToPipelineJson: function (command, $pipelineJson) {
5464
6518
  var sourceContent = command.sourceContent;
5465
- var name = 'source-' + sha256__default["default"](hexEncoder__default["default"].parse(JSON.stringify(sourceContent))).toString( /* hex */);
5466
- // <- TODO: [🥬] Encapsulate sha256 to some private utility function
5467
- // <- TODO: This should be replaced with a better name later in preparation (done with some propper LLM summarization)
5468
6519
  $pipelineJson.knowledgeSources.push({
5469
- name: name,
6520
+ name: sourceContentToName(sourceContent),
5470
6521
  sourceContent: sourceContent,
5471
6522
  });
5472
6523
  },
@@ -5496,7 +6547,7 @@
5496
6547
  /**
5497
6548
  * Parses the template command
5498
6549
  *
5499
- * @see ./TEMPLATE-README.md for more details
6550
+ * @see `documentationUrl` for more details
5500
6551
  * @private within the commands folder
5501
6552
  */
5502
6553
  var templateCommandParser = {
@@ -5532,7 +6583,7 @@
5532
6583
  */
5533
6584
  description: "What should the code template template do",
5534
6585
  /**
5535
- * Link to discussion
6586
+ * Link to documentation
5536
6587
  */
5537
6588
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/64',
5538
6589
  /**
@@ -5757,7 +6808,7 @@
5757
6808
  /**
5758
6809
  * Parses the expect command
5759
6810
  *
5760
- * @see ./EXPECT-README.md for more details
6811
+ * @see `documentationUrl` for more details
5761
6812
  * @private within the commands folder
5762
6813
  */
5763
6814
  var expectCommandParser = {
@@ -5775,7 +6826,7 @@
5775
6826
  */
5776
6827
  description: spaceTrim__default["default"]("\n Expect command describes the desired output of the template (after post-processing)\n It can set limits for the maximum/minimum length of the output, measured in characters, words, sentences, paragraphs or some other shape of the output.\n "),
5777
6828
  /**
5778
- * Link to discussion
6829
+ * Link to documentation
5779
6830
  */
5780
6831
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/30',
5781
6832
  /**
@@ -5935,10 +6986,6 @@
5935
6986
  charType = 'NUMBER';
5936
6987
  normalizedChar = char;
5937
6988
  }
5938
- else if (/^\/$/.test(char)) {
5939
- charType = 'SLASH';
5940
- normalizedChar = char;
5941
- }
5942
6989
  else {
5943
6990
  charType = 'OTHER';
5944
6991
  normalizedChar = '_';
@@ -6145,7 +7192,7 @@
6145
7192
  *
6146
7193
  * Note: @@@ This command is used as foreach for new commands - it should NOT be used in any `.ptbk.md` file
6147
7194
  *
6148
- * @see ./FOREACH-README.md for more details <- TODO: @@@ Write theese README files OR remove this link + add annotation here (to all commands)
7195
+ * @see `documentationUrl` for more details
6149
7196
  * @private within the commands folder
6150
7197
  */
6151
7198
  var foreachCommandParser = {
@@ -6167,7 +7214,7 @@
6167
7214
  */
6168
7215
  description: "@@",
6169
7216
  /**
6170
- * Link to discussion
7217
+ * Link to documentation
6171
7218
  */
6172
7219
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/148',
6173
7220
  /**
@@ -6302,7 +7349,7 @@
6302
7349
  /**
6303
7350
  * Parses the format command
6304
7351
  *
6305
- * @see ./FORMAT-README.md for more details
7352
+ * @see `documentationUrl` for more details
6306
7353
  * @private within the commands folder
6307
7354
  */
6308
7355
  var formatCommandParser = {
@@ -6320,7 +7367,7 @@
6320
7367
  */
6321
7368
  description: spaceTrim__default["default"]("\n Format command describes the desired output of the template (after post-processing)\n It can set limits for the maximum/minimum length of the output, measured in characters, words, sentences, paragraphs or some other shape of the output.\n "),
6322
7369
  /**
6323
- * Link to discussion
7370
+ * Link to documentation
6324
7371
  */
6325
7372
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/30',
6326
7373
  /**
@@ -6376,7 +7423,7 @@
6376
7423
  /**
6377
7424
  * Parses the joker command
6378
7425
  *
6379
- * @see ./JOKER-README.md for more details
7426
+ * @see `documentationUrl` for more details
6380
7427
  * @private within the commands folder
6381
7428
  */
6382
7429
  var jokerCommandParser = {
@@ -6394,7 +7441,7 @@
6394
7441
  */
6395
7442
  description: "Joker parameter is used instead of executing the template result if jokers value meets the expectations requirements",
6396
7443
  /**
6397
- * Link to discussion
7444
+ * Link to documentation
6398
7445
  */
6399
7446
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/66',
6400
7447
  /**
@@ -6455,7 +7502,7 @@
6455
7502
  /**
6456
7503
  * Parses the model command
6457
7504
  *
6458
- * @see ./MODEL-README.md for more details
7505
+ * @see `documentationUrl` for more details
6459
7506
  * @private within the commands folder
6460
7507
  */
6461
7508
  var modelCommandParser = {
@@ -6473,7 +7520,7 @@
6473
7520
  */
6474
7521
  description: "Tells which `modelRequirements` (for example which model) to use for the prompt template execution",
6475
7522
  /**
6476
- * Link to discussion
7523
+ * Link to documentation
6477
7524
  */
6478
7525
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/67',
6479
7526
  /**
@@ -6555,7 +7602,11 @@
6555
7602
  // TODO: [🚜] DRY
6556
7603
  if ($templateJson.modelRequirements[command.key] !== undefined) {
6557
7604
  if ($templateJson.modelRequirements[command.key] === command.value) {
6558
- console.warn("Multiple commands `MODEL ".concat(command.key, " ").concat(command.value, "` in the template \"").concat($templateJson.title || $templateJson.name, "\""));
7605
+ console.warn("Multiple commands `MODEL ".concat({
7606
+ modelName: 'NAME',
7607
+ modelVariant: 'VARIANT',
7608
+ maxTokens: '???',
7609
+ }[command.key], " ").concat(command.value, "` in the template \"").concat($templateJson.title || $templateJson.name, "\""));
6559
7610
  }
6560
7611
  else {
6561
7612
  throw new ParseError(spaceTrim__default["default"]("\n Redefinition of MODEL `".concat(command.key, "` in the template \"").concat($templateJson.title || $templateJson.name, "\"\n\n You have used:\n - MODEL ").concat(command.key, " ").concat($templateJson.modelRequirements[command.key], "\n - MODEL ").concat(command.key, " ").concat(command.value, "\n ")));
@@ -6598,7 +7649,7 @@
6598
7649
  /**
6599
7650
  * Parses the parameter command
6600
7651
  *
6601
- * @see ./PARAMETER-README.md for more details
7652
+ * @see `documentationUrl` for more details
6602
7653
  * @private within the commands folder
6603
7654
  */
6604
7655
  var parameterCommandParser = {
@@ -6623,7 +7674,7 @@
6623
7674
  */
6624
7675
  description: "Describes one parameter of the template",
6625
7676
  /**
6626
- * Link to discussion
7677
+ * Link to documentation
6627
7678
  */
6628
7679
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/68',
6629
7680
  /**
@@ -6708,7 +7759,7 @@
6708
7759
  /**
6709
7760
  * Parses the persona command
6710
7761
  *
6711
- * @see ./PERSONA-README.md for more details
7762
+ * @see `documentationUrl` for more details
6712
7763
  * @private within the commands folder
6713
7764
  */
6714
7765
  var personaCommandParser = {
@@ -6730,7 +7781,7 @@
6730
7781
  */
6731
7782
  description: "Persona command is used to specify who the system is, it will be transformed into system message, top_t,...",
6732
7783
  /**
6733
- * Link to discussion
7784
+ * Link to documentation
6734
7785
  */
6735
7786
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/22',
6736
7787
  /**
@@ -6846,7 +7897,7 @@
6846
7897
  /**
6847
7898
  * Parses the postprocess command
6848
7899
  *
6849
- * @see ./POSTPROCESS-README.md for more details
7900
+ * @see `documentationUrl` for more details
6850
7901
  * @private within the commands folder
6851
7902
  */
6852
7903
  var postprocessCommandParser = {
@@ -6865,7 +7916,7 @@
6865
7916
  */
6866
7917
  description: "Defines the postprocess function to be used on the result from LLM and before the result is validated",
6867
7918
  /**
6868
- * Link to discussion
7919
+ * Link to documentation
6869
7920
  */
6870
7921
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/31',
6871
7922
  /**
@@ -6926,7 +7977,7 @@
6926
7977
  /**
6927
7978
  * Parses the PROMPTBOOK_VERSION command
6928
7979
  *
6929
- * @see ./PROMPTBOOK_VERSION-README.md for more details
7980
+ * @see `documentationUrl` for more details
6930
7981
  * @private within the commands folder
6931
7982
  */
6932
7983
  var promptbookVersionCommandParser = {
@@ -6945,7 +7996,7 @@
6945
7996
  */
6946
7997
  description: "Which version of the promptbook is the .ptbk.md using",
6947
7998
  /**
6948
- * Link to discussion
7999
+ * Link to documentation
6949
8000
  */
6950
8001
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/69',
6951
8002
  /**
@@ -7004,7 +8055,7 @@
7004
8055
  /**
7005
8056
  * Parses the url command
7006
8057
  *
7007
- * @see ./URL-README.md for more details
8058
+ * @see `documentationUrl` for more details
7008
8059
  * @private within the commands folder
7009
8060
  */
7010
8061
  var urlCommandParser = {
@@ -7027,7 +8078,7 @@
7027
8078
  */
7028
8079
  description: "Declares unique URL for the pipeline",
7029
8080
  /**
7030
- * Link to discussion
8081
+ * Link to documentation
7031
8082
  */
7032
8083
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/70',
7033
8084
  /**
@@ -7107,7 +8158,7 @@
7107
8158
  /**
7108
8159
  * Parses the action command
7109
8160
  *
7110
- * @see ./ACTION-README.md for more details
8161
+ * @see `documentationUrl` for more details
7111
8162
  * @private within the commands folder
7112
8163
  */
7113
8164
  var actionCommandParser = {
@@ -7125,7 +8176,7 @@
7125
8176
  */
7126
8177
  description: "Actions influences from the pipeline or template into external world. Like turning on a light, sending an email, etc.",
7127
8178
  /**
7128
- * Link to discussion
8179
+ * Link to documentation
7129
8180
  */
7130
8181
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/72',
7131
8182
  /**
@@ -7177,7 +8228,7 @@
7177
8228
  /**
7178
8229
  * Parses the instrument command
7179
8230
  *
7180
- * @see ./INSTRUMENT-README.md for more details
8231
+ * @see `documentationUrl` for more details
7181
8232
  * @private within the commands folder
7182
8233
  */
7183
8234
  var instrumentCommandParser = {
@@ -7195,7 +8246,7 @@
7195
8246
  */
7196
8247
  description: "Instrument command is used to specify the instrument to be used in the pipeline or template like search, calculate, etc.",
7197
8248
  /**
7198
- * Link to discussion
8249
+ * Link to documentation
7199
8250
  */
7200
8251
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/71',
7201
8252
  /**
@@ -7250,7 +8301,7 @@
7250
8301
  *
7251
8302
  * Note: @@@ This command is used as boilerplate for new commands - it should NOT be used in any `.ptbk.md` file
7252
8303
  *
7253
- * @see ./BOILERPLATE-README.md for more details <- TODO: @@@ Write theese README files OR remove this link + add annotation here (to all commands)
8304
+ * @see `documentationUrl` for more details
7254
8305
  * @private within the commands folder
7255
8306
  */
7256
8307
  var boilerplateCommandParser = {
@@ -7272,7 +8323,7 @@
7272
8323
  */
7273
8324
  description: "@@",
7274
8325
  /**
7275
- * Link to discussion
8326
+ * Link to documentation
7276
8327
  */
7277
8328
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
7278
8329
  /**
@@ -8122,16 +9173,17 @@
8122
9173
  * @public exported from `@promptbook/core`
8123
9174
  */
8124
9175
  function pipelineStringToJson(pipelineString, options) {
8125
- if (options === void 0) { options = { llmTools: null }; }
8126
9176
  return __awaiter(this, void 0, void 0, function () {
8127
9177
  var llmTools, pipelineJson;
8128
9178
  return __generator(this, function (_a) {
8129
9179
  switch (_a.label) {
8130
9180
  case 0:
8131
- llmTools = options.llmTools;
9181
+ llmTools = (options || {}).llmTools;
8132
9182
  pipelineJson = pipelineStringToJsonSync(pipelineString);
8133
- if (!(llmTools !== null)) return [3 /*break*/, 2];
8134
- return [4 /*yield*/, preparePipeline(pipelineJson, { llmTools: llmTools })];
9183
+ if (!(llmTools !== undefined)) return [3 /*break*/, 2];
9184
+ return [4 /*yield*/, preparePipeline(pipelineJson, options || {
9185
+ rootDirname: null,
9186
+ })];
8135
9187
  case 1:
8136
9188
  pipelineJson = _a.sent();
8137
9189
  _a.label = 2;
@@ -8339,6 +9391,38 @@
8339
9391
  * TODO: [🍙] Make some standard order of json properties
8340
9392
  */
8341
9393
 
9394
+ /**
9395
+ * Delagates the user interaction to a async callback function
9396
+ * You need to provide your own implementation of this callback function and its bind to UI.
9397
+ *
9398
+ * @public exported from `@promptbook/core`
9399
+ */
9400
+ var CallbackInterfaceTools = /** @class */ (function () {
9401
+ function CallbackInterfaceTools(options) {
9402
+ this.options = options;
9403
+ }
9404
+ /**
9405
+ * Trigger the custom callback function
9406
+ */
9407
+ CallbackInterfaceTools.prototype.promptDialog = function (options) {
9408
+ return __awaiter(this, void 0, void 0, function () {
9409
+ var answer;
9410
+ return __generator(this, function (_a) {
9411
+ switch (_a.label) {
9412
+ case 0: return [4 /*yield*/, this.options.callback(options)];
9413
+ case 1:
9414
+ answer = _a.sent();
9415
+ if (this.options.isVerbose) {
9416
+ console.info(spaceTrim.spaceTrim(function (block) { return "\n \uD83D\uDCD6 ".concat(block(options.promptTitle), "\n \uD83D\uDC64 ").concat(block(answer), "\n "); }));
9417
+ }
9418
+ return [2 /*return*/, answer];
9419
+ }
9420
+ });
9421
+ });
9422
+ };
9423
+ return CallbackInterfaceTools;
9424
+ }());
9425
+
8342
9426
  /**
8343
9427
  * Pretty print an embedding vector for logging
8344
9428
  *
@@ -8414,38 +9498,6 @@
8414
9498
  * TODO: [🏛] Maybe make some markdown builder
8415
9499
  */
8416
9500
 
8417
- /**
8418
- * Delagates the user interaction to a async callback function
8419
- * You need to provide your own implementation of this callback function and its bind to UI.
8420
- *
8421
- * @public exported from `@promptbook/core`
8422
- */
8423
- var CallbackInterfaceTools = /** @class */ (function () {
8424
- function CallbackInterfaceTools(options) {
8425
- this.options = options;
8426
- }
8427
- /**
8428
- * Trigger the custom callback function
8429
- */
8430
- CallbackInterfaceTools.prototype.promptDialog = function (options) {
8431
- return __awaiter(this, void 0, void 0, function () {
8432
- var answer;
8433
- return __generator(this, function (_a) {
8434
- switch (_a.label) {
8435
- case 0: return [4 /*yield*/, this.options.callback(options)];
8436
- case 1:
8437
- answer = _a.sent();
8438
- if (this.options.isVerbose) {
8439
- console.info(spaceTrim.spaceTrim(function (block) { return "\n \uD83D\uDCD6 ".concat(block(options.promptTitle), "\n \uD83D\uDC64 ").concat(block(answer), "\n "); }));
8440
- }
8441
- return [2 /*return*/, answer];
8442
- }
8443
- });
8444
- });
8445
- };
8446
- return CallbackInterfaceTools;
8447
- }());
8448
-
8449
9501
  /**
8450
9502
  * @@@
8451
9503
  *
@@ -9413,6 +10465,7 @@
9413
10465
  exports.ExecutionReportStringOptionsDefaults = ExecutionReportStringOptionsDefaults;
9414
10466
  exports.ExpectError = ExpectError;
9415
10467
  exports.IS_VERBOSE = IS_VERBOSE;
10468
+ exports.KnowledgeScrapeError = KnowledgeScrapeError;
9416
10469
  exports.LimitReachedError = LimitReachedError;
9417
10470
  exports.MANDATORY_CSV_SETTINGS = MANDATORY_CSV_SETTINGS;
9418
10471
  exports.MAX_EXECUTION_ATTEMPTS = MAX_EXECUTION_ATTEMPTS;
@@ -9422,6 +10475,7 @@
9422
10475
  exports.MAX_PARALLEL_COUNT = MAX_PARALLEL_COUNT;
9423
10476
  exports.MODEL_VARIANTS = MODEL_VARIANTS;
9424
10477
  exports.MemoryStorage = MemoryStorage;
10478
+ exports.MissingToolsError = MissingToolsError;
9425
10479
  exports.NotFoundError = NotFoundError;
9426
10480
  exports.NotYetImplementedError = NotYetImplementedError;
9427
10481
  exports.PIPELINE_COLLECTION_BASE_FILENAME = PIPELINE_COLLECTION_BASE_FILENAME;
@@ -9432,6 +10486,7 @@
9432
10486
  exports.PipelineUrlError = PipelineUrlError;
9433
10487
  exports.PrefixStorage = PrefixStorage;
9434
10488
  exports.RESERVED_PARAMETER_NAMES = RESERVED_PARAMETER_NAMES;
10489
+ exports.SCRAPE_CACHE_DIRNAME = SCRAPE_CACHE_DIRNAME;
9435
10490
  exports.TemplateTypes = TemplateTypes;
9436
10491
  exports.TextFormatDefinition = TextFormatDefinition;
9437
10492
  exports.UNCERTAIN_USAGE = UNCERTAIN_USAGE;
@@ -9452,16 +10507,19 @@
9452
10507
  exports.createLlmToolsFromConfiguration = createLlmToolsFromConfiguration;
9453
10508
  exports.createPipelineExecutor = createPipelineExecutor;
9454
10509
  exports.createSubcollection = createSubcollection;
10510
+ exports.documentScraper = documentScraper;
9455
10511
  exports.embeddingVectorToString = embeddingVectorToString;
9456
10512
  exports.executionReportJsonToString = executionReportJsonToString;
9457
10513
  exports.isPassingExpectations = isPassingExpectations;
9458
10514
  exports.isPipelinePrepared = isPipelinePrepared;
9459
10515
  exports.joinLlmExecutionTools = joinLlmExecutionTools;
10516
+ exports.legacyDocumentScraper = legacyDocumentScraper;
9460
10517
  exports.limitTotalUsage = limitTotalUsage;
10518
+ exports.markdownScraper = markdownScraper;
10519
+ exports.pdfScraper = pdfScraper;
9461
10520
  exports.pipelineJsonToString = pipelineJsonToString;
9462
10521
  exports.pipelineStringToJson = pipelineStringToJson;
9463
10522
  exports.pipelineStringToJsonSync = pipelineStringToJsonSync;
9464
- exports.prepareKnowledgeFromMarkdown = prepareKnowledgeFromMarkdown;
9465
10523
  exports.prepareKnowledgePieces = prepareKnowledgePieces;
9466
10524
  exports.preparePersona = preparePersona;
9467
10525
  exports.preparePipeline = preparePipeline;
@@ -9472,6 +10530,7 @@
9472
10530
  exports.usageToHuman = usageToHuman;
9473
10531
  exports.usageToWorktime = usageToWorktime;
9474
10532
  exports.validatePipeline = validatePipeline;
10533
+ exports.websiteScraper = websiteScraper;
9475
10534
 
9476
10535
  Object.defineProperty(exports, '__esModule', { value: true });
9477
10536