@promptbook/cli 0.84.0-0 → 0.84.0-11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +1 -0
  2. package/esm/index.es.js +664 -221
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/cli.index.d.ts +8 -0
  5. package/esm/typings/src/_packages/core.index.d.ts +4 -0
  6. package/esm/typings/src/_packages/markitdown.index.d.ts +8 -0
  7. package/esm/typings/src/_packages/pdf.index.d.ts +6 -0
  8. package/esm/typings/src/_packages/utils.index.d.ts +4 -0
  9. package/esm/typings/src/_packages/wizzard.index.d.ts +8 -0
  10. package/esm/typings/src/constants.d.ts +1 -1
  11. package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +1 -1
  12. package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +1 -1
  13. package/esm/typings/src/execution/assertsExecutionSuccessful.d.ts +3 -1
  14. package/esm/typings/src/pipeline/book-notation.d.ts +5 -0
  15. package/esm/typings/src/pipeline/prompt-notation.d.ts +31 -0
  16. package/esm/typings/src/pipeline/prompt-notation.test.d.ts +4 -0
  17. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +43 -0
  18. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +20 -0
  19. package/esm/typings/src/scrapers/_boilerplate/playground/boilerplate-scraper-playground.d.ts +5 -0
  20. package/esm/typings/src/scrapers/_boilerplate/register-constructor.d.ts +15 -0
  21. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +28 -0
  22. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +50 -0
  23. package/esm/typings/src/scrapers/markitdown/createMarkitdownScraper.d.ts +22 -0
  24. package/esm/typings/src/scrapers/markitdown/playground/markitdown-scraper-playground.d.ts +5 -0
  25. package/esm/typings/src/scrapers/markitdown/register-constructor.d.ts +17 -0
  26. package/esm/typings/src/scrapers/markitdown/register-metadata.d.ts +28 -0
  27. package/esm/typings/src/types/typeAliases.d.ts +1 -1
  28. package/package.json +3 -2
  29. package/umd/index.umd.js +670 -222
  30. package/umd/index.umd.js.map +1 -1
package/esm/index.es.js CHANGED
@@ -12,8 +12,7 @@ import parserHtml from 'prettier/parser-html';
12
12
  import { unparse, parse } from 'papaparse';
13
13
  import { SHA256 } from 'crypto-js';
14
14
  import { lookup } from 'mime-types';
15
- import { exec as exec$2, spawn } from 'child_process';
16
- import { promisify } from 'util';
15
+ import { spawn } from 'child_process';
17
16
  import glob from 'glob-promise';
18
17
  import prompts from 'prompts';
19
18
  import moment from 'moment';
@@ -39,7 +38,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
39
38
  * @generated
40
39
  * @see https://github.com/webgptorg/promptbook
41
40
  */
42
- var PROMPTBOOK_ENGINE_VERSION = '0.83.0';
41
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-10';
43
42
  /**
44
43
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
45
44
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -203,7 +202,7 @@ var NAME = "Promptbook";
203
202
  *
204
203
  * @public exported from `@promptbook/core`
205
204
  */
206
- var ADMIN_EMAIL = 'me@pavolhejny.com';
205
+ var ADMIN_EMAIL = 'pavol@ptbk.io';
207
206
  /**
208
207
  * Name of the responsible person for the Promptbook on GitHub
209
208
  *
@@ -813,7 +812,7 @@ var ORDER_OF_PIPELINE_JSON = [
813
812
  *
814
813
  * @private within the repository
815
814
  */
816
- var REPLACING_NONCE = 'u$k42k%!V2zo34w7Fu#@QUHYPW';
815
+ var REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
817
816
  /**
818
817
  * @@@
819
818
  *
@@ -1916,7 +1915,7 @@ function countTotalUsage(llmTools) {
1916
1915
  var NotYetImplementedError = /** @class */ (function (_super) {
1917
1916
  __extends(NotYetImplementedError, _super);
1918
1917
  function NotYetImplementedError(message) {
1919
- var _this = _super.call(this, spaceTrim$1(function (block) { return "\n ".concat(block(message), "\n\n Note: This feature is not implemented yet but it will be soon.\n\n If you want speed up the implementation or just read more, look here:\n https://github.com/webgptorg/promptbook\n\n Or contact us on me@pavolhejny.com\n\n "); })) || this;
1918
+ var _this = _super.call(this, spaceTrim$1(function (block) { return "\n ".concat(block(message), "\n\n Note: This feature is not implemented yet but it will be soon.\n\n If you want speed up the implementation or just read more, look here:\n https://github.com/webgptorg/promptbook\n\n Or contact us on pavol@ptbk.io\n\n "); })) || this;
1920
1919
  _this.name = 'NotYetImplementedError';
1921
1920
  Object.setPrototypeOf(_this, NotYetImplementedError.prototype);
1922
1921
  return _this;
@@ -2791,24 +2790,18 @@ function collectionToJson(collection) {
2791
2790
  var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book.md",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book.md`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book.md",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book.md`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [🍆] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book.md"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book.md",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book.md`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book.md"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book.md",formfactorName:"GENERIC",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book.md`\n- INPUT PARAMETER `{availableModelNames}` List of available model names separated by comma (,)\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Example\n\n\\`\\`\\`json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelRequirements}`\n"}],sourceFile:"./books/prepare-persona.book.md"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book.md",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book.md`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"✍ Convert Knowledge-piece to title\" but \"✍ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book.md"}];
2792
2791
 
2793
2792
  /**
2794
- * Function isValidJsonString will tell you if the string is valid JSON or not
2793
+ * Checks if value is valid email
2795
2794
  *
2796
2795
  * @public exported from `@promptbook/utils`
2797
2796
  */
2798
- function isValidJsonString(value /* <- [👨‍⚖️] */) {
2799
- try {
2800
- JSON.parse(value);
2801
- return true;
2797
+ function isValidEmail(email) {
2798
+ if (typeof email !== 'string') {
2799
+ return false;
2802
2800
  }
2803
- catch (error) {
2804
- if (!(error instanceof Error)) {
2805
- throw error;
2806
- }
2807
- if (error.message.includes('Unexpected token')) {
2808
- return false;
2809
- }
2801
+ if (email.split('\n').length > 1) {
2810
2802
  return false;
2811
2803
  }
2804
+ return /^.+@.+\..+$/.test(email);
2812
2805
  }
2813
2806
 
2814
2807
  /**
@@ -2830,6 +2823,27 @@ var ParseError = /** @class */ (function (_super) {
2830
2823
  * TODO: Maybe split `ParseError` and `ApplyError`
2831
2824
  */
2832
2825
 
2826
+ /**
2827
+ * Function isValidJsonString will tell you if the string is valid JSON or not
2828
+ *
2829
+ * @public exported from `@promptbook/utils`
2830
+ */
2831
+ function isValidJsonString(value /* <- [👨‍⚖️] */) {
2832
+ try {
2833
+ JSON.parse(value);
2834
+ return true;
2835
+ }
2836
+ catch (error) {
2837
+ if (!(error instanceof Error)) {
2838
+ throw error;
2839
+ }
2840
+ if (error.message.includes('Unexpected token')) {
2841
+ return false;
2842
+ }
2843
+ return false;
2844
+ }
2845
+ }
2846
+
2833
2847
  /**
2834
2848
  * Function `validatePipelineString` will validate the if the string is a valid pipeline string
2835
2849
  * It does not check if the string is fully logically correct, but if it is a string that can be a pipeline string or the string looks completely different.
@@ -2843,6 +2857,15 @@ function validatePipelineString(pipelineString) {
2843
2857
  if (isValidJsonString(pipelineString)) {
2844
2858
  throw new ParseError('Expected a book, but got a JSON string');
2845
2859
  }
2860
+ else if (isValidUrl(pipelineString)) {
2861
+ throw new ParseError("Expected a book, but got just the URL \"".concat(pipelineString, "\""));
2862
+ }
2863
+ else if (isValidFilePath(pipelineString)) {
2864
+ throw new ParseError("Expected a book, but got just the file path \"".concat(pipelineString, "\""));
2865
+ }
2866
+ else if (isValidEmail(pipelineString)) {
2867
+ throw new ParseError("Expected a book, but got just the email \"".concat(pipelineString, "\""));
2868
+ }
2846
2869
  // <- TODO: Implement the validation + add tests when the pipeline logic considered as invalid
2847
2870
  return pipelineString;
2848
2871
  }
@@ -3915,12 +3938,28 @@ function deserializeError(error) {
3915
3938
  /**
3916
3939
  * Asserts that the execution of a Promptbook is successful
3917
3940
  *
3941
+ * Note: If there are only warnings, the execution is still successful but the warnings are logged in the console
3942
+ *
3918
3943
  * @param executionResult - The partial result of the Promptbook execution
3919
3944
  * @throws {PipelineExecutionError} If the execution is not successful or if multiple errors occurred
3920
3945
  * @public exported from `@promptbook/core`
3921
3946
  */
3922
3947
  function assertsExecutionSuccessful(executionResult) {
3923
- var isSuccessful = executionResult.isSuccessful, errors = executionResult.errors;
3948
+ var e_1, _a;
3949
+ var isSuccessful = executionResult.isSuccessful, errors = executionResult.errors, warnings = executionResult.warnings;
3950
+ try {
3951
+ for (var warnings_1 = __values(warnings), warnings_1_1 = warnings_1.next(); !warnings_1_1.done; warnings_1_1 = warnings_1.next()) {
3952
+ var warning = warnings_1_1.value;
3953
+ console.warn(warning.message);
3954
+ }
3955
+ }
3956
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
3957
+ finally {
3958
+ try {
3959
+ if (warnings_1_1 && !warnings_1_1.done && (_a = warnings_1.return)) _a.call(warnings_1);
3960
+ }
3961
+ finally { if (e_1) throw e_1.error; }
3962
+ }
3924
3963
  if (isSuccessful === true) {
3925
3964
  return;
3926
3965
  }
@@ -10393,30 +10432,187 @@ function compilePipeline(pipelineString, tools, options) {
10393
10432
  * TODO: [🧠] Should be in generated JSON file GENERATOR_WARNING
10394
10433
  */
10395
10434
 
10396
- // Note: We want to use the `exec` as async function
10397
- var exec$1 = promisify(exec$2);
10435
+ /**
10436
+ * Normalize options for `execCommand` and `execCommands`
10437
+ *
10438
+ * Note: `$` is used to indicate that this function behaves differently according to `process.platform`
10439
+ *
10440
+ * @private internal utility of `execCommand` and `execCommands`
10441
+ */
10442
+ function $execCommandNormalizeOptions(options) {
10443
+ var _a;
10444
+ var _b, _c, _d, _e;
10445
+ var command;
10446
+ var cwd;
10447
+ var crashOnError;
10448
+ var args = [];
10449
+ var timeout;
10450
+ var isVerbose;
10451
+ if (typeof options === 'string') {
10452
+ // TODO: [1] DRY default values
10453
+ command = options;
10454
+ cwd = process.cwd();
10455
+ crashOnError = true;
10456
+ timeout = Infinity; // <- TODO: [⏳]
10457
+ isVerbose = DEFAULT_IS_VERBOSE;
10458
+ }
10459
+ else {
10460
+ /*
10461
+ TODO:
10462
+ if ((options as any).commands !== undefined) {
10463
+ commands = (options as any).commands;
10464
+ } else {
10465
+ commands = [(options as any).command];
10466
+ }
10467
+ */
10468
+ // TODO: [1] DRY default values
10469
+ command = options.command;
10470
+ cwd = (_b = options.cwd) !== null && _b !== void 0 ? _b : process.cwd();
10471
+ crashOnError = (_c = options.crashOnError) !== null && _c !== void 0 ? _c : true;
10472
+ timeout = (_d = options.timeout) !== null && _d !== void 0 ? _d : Infinity;
10473
+ isVerbose = (_e = options.isVerbose) !== null && _e !== void 0 ? _e : DEFAULT_IS_VERBOSE;
10474
+ }
10475
+ // TODO: /(-[a-zA-Z0-9-]+\s+[^\s]*)|[^\s]*/g
10476
+ var _ = Array.from(command.matchAll(/(".*")|([^\s]*)/g))
10477
+ .map(function (_a) {
10478
+ var _b = __read(_a, 1), match = _b[0];
10479
+ return match;
10480
+ })
10481
+ .filter(function (arg) { return arg !== ''; });
10482
+ if (_.length > 1) {
10483
+ _a = __read(_), command = _a[0], args = _a.slice(1);
10484
+ }
10485
+ if (options.args) {
10486
+ args = __spreadArray(__spreadArray([], __read(args), false), __read(options.args), false);
10487
+ }
10488
+ var humanReadableCommand = !['npx', 'npm'].includes(command) ? command : args[0];
10489
+ if (['ts-node'].includes(humanReadableCommand)) {
10490
+ humanReadableCommand += " ".concat(args[1]);
10491
+ }
10492
+ if (/^win/.test(process.platform) && ['npm', 'npx'].includes(command)) {
10493
+ command = "".concat(command, ".cmd");
10494
+ }
10495
+ return { command: command, humanReadableCommand: humanReadableCommand, args: args, cwd: cwd, crashOnError: crashOnError, timeout: timeout, isVerbose: isVerbose };
10496
+ }
10497
+ // TODO: This should show type error> execCommandNormalizeOptions({ command: '', commands: [''] });
10498
+
10499
+ /**
10500
+ * Run one command in a shell
10501
+ *
10502
+ *
10503
+ * Note: There are 2 similar functions in the codebase:
10504
+ * - `$execCommand` which runs a single command
10505
+ * - `$execCommands` which runs multiple commands
10506
+ * Note: `$` is used to indicate that this function is not a pure function - it runs a command in a shell
10507
+ *
10508
+ * @public exported from `@promptbook/node`
10509
+ */
10510
+ function $execCommand(options) {
10511
+ if (!$isRunningInNode()) {
10512
+ throw new EnvironmentMismatchError('Function `$execCommand` can run only in Node environment.js');
10513
+ }
10514
+ return new Promise(function (resolve, reject) {
10515
+ // eslint-disable-next-line prefer-const
10516
+ var _a = $execCommandNormalizeOptions(options), command = _a.command, humanReadableCommand = _a.humanReadableCommand, args = _a.args, cwd = _a.cwd, crashOnError = _a.crashOnError, timeout = _a.timeout, _b = _a.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
10517
+ if (timeout !== Infinity) {
10518
+ // TODO: In waitasecond forTime(Infinity) should be equivalent to forEver()
10519
+ forTime(timeout).then(function () {
10520
+ if (crashOnError) {
10521
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" exceeded time limit of ").concat(timeout, "ms")));
10522
+ }
10523
+ else {
10524
+ console.warn("Command \"".concat(humanReadableCommand, "\" exceeded time limit of ").concat(timeout, "ms but continues running"));
10525
+ resolve('Command exceeded time limit');
10526
+ }
10527
+ });
10528
+ }
10529
+ if (isVerbose) {
10530
+ console.info(colors.yellow(cwd) + ' ' + colors.green(command) + ' ' + colors.blue(args.join(' ')));
10531
+ }
10532
+ try {
10533
+ var commandProcess = spawn(command, args, { cwd: cwd, shell: true });
10534
+ if (isVerbose) {
10535
+ commandProcess.on('message', function (message) {
10536
+ console.info({ message: message });
10537
+ });
10538
+ }
10539
+ var output_1 = [];
10540
+ commandProcess.stdout.on('data', function (stdout) {
10541
+ output_1.push(stdout.toString());
10542
+ if (isVerbose) {
10543
+ console.info(stdout.toString());
10544
+ }
10545
+ });
10546
+ commandProcess.stderr.on('data', function (stderr) {
10547
+ output_1.push(stderr.toString());
10548
+ if (isVerbose && stderr.toString().trim()) {
10549
+ console.warn(stderr.toString());
10550
+ }
10551
+ });
10552
+ var finishWithCode = function (code) {
10553
+ if (code !== 0) {
10554
+ if (crashOnError) {
10555
+ reject(new Error(output_1.join('\n').trim() ||
10556
+ "Command \"".concat(humanReadableCommand, "\" exited with code ").concat(code)));
10557
+ }
10558
+ else {
10559
+ if (isVerbose) {
10560
+ console.warn("Command \"".concat(humanReadableCommand, "\" exited with code ").concat(code));
10561
+ }
10562
+ resolve(spaceTrim$1(output_1.join('\n')));
10563
+ }
10564
+ }
10565
+ else {
10566
+ resolve(spaceTrim$1(output_1.join('\n')));
10567
+ }
10568
+ };
10569
+ commandProcess.on('close', finishWithCode);
10570
+ commandProcess.on('exit', finishWithCode);
10571
+ commandProcess.on('disconnect', function () {
10572
+ // Note: Unexpected disconnection should always result in rejection
10573
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" disconnected")));
10574
+ });
10575
+ commandProcess.on('error', function (error) {
10576
+ if (crashOnError) {
10577
+ reject(new Error("Command \"".concat(humanReadableCommand, "\" failed: \n").concat(error.message)));
10578
+ }
10579
+ else {
10580
+ if (isVerbose) {
10581
+ console.warn(error);
10582
+ }
10583
+ resolve(spaceTrim$1(output_1.join('\n')));
10584
+ }
10585
+ });
10586
+ }
10587
+ catch (error) {
10588
+ // Note: Unexpected error in sync code should always result in rejection
10589
+ reject(error);
10590
+ }
10591
+ });
10592
+ }
10593
+ /**
10594
+ * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
10595
+ */
10596
+
10398
10597
  /**
10399
10598
  * @@@
10400
10599
  *
10401
10600
  * @private within the repository
10402
10601
  */
10403
10602
  function locateAppOnLinux(_a) {
10404
- var appName = _a.appName, linuxWhich = _a.linuxWhich;
10603
+ var linuxWhich = _a.linuxWhich;
10405
10604
  return __awaiter(this, void 0, void 0, function () {
10406
- var _b, stderr, stdout, error_1;
10407
- return __generator(this, function (_c) {
10408
- switch (_c.label) {
10605
+ var result, error_1;
10606
+ return __generator(this, function (_b) {
10607
+ switch (_b.label) {
10409
10608
  case 0:
10410
- _c.trys.push([0, 2, , 3]);
10411
- return [4 /*yield*/, exec$1("which ".concat(linuxWhich))];
10609
+ _b.trys.push([0, 2, , 3]);
10610
+ return [4 /*yield*/, $execCommand({ crashOnError: true, command: "which ".concat(linuxWhich) })];
10412
10611
  case 1:
10413
- _b = _c.sent(), stderr = _b.stderr, stdout = _b.stdout;
10414
- if (!stderr && stdout) {
10415
- return [2 /*return*/, stdout.trim()];
10416
- }
10417
- throw new Error("Can not locate app ".concat(appName, " on Linux.\n ").concat(stderr));
10612
+ result = _b.sent();
10613
+ return [2 /*return*/, result.trim()];
10418
10614
  case 2:
10419
- error_1 = _c.sent();
10615
+ error_1 = _b.sent();
10420
10616
  if (!(error_1 instanceof Error)) {
10421
10617
  throw error_1;
10422
10618
  }
@@ -10463,43 +10659,41 @@ function isExecutable(path, fs) {
10463
10659
  // @see https://stackoverflow.com/questions/37000981/how-to-import-node-module-in-typescript-without-type-definitions
10464
10660
  // eslint-disable-next-line @typescript-eslint/no-var-requires
10465
10661
  var userhome = require('userhome');
10466
- // Note: We want to use the `exec` as async function
10467
- var exec = promisify(exec$2);
10468
10662
  /**
10469
10663
  * @@@
10470
10664
  *
10471
10665
  * @private within the repository
10472
10666
  */
10473
10667
  function locateAppOnMacOs(_a) {
10474
- var appName = _a.appName, macOsName = _a.macOsName;
10668
+ var macOsName = _a.macOsName;
10475
10669
  return __awaiter(this, void 0, void 0, function () {
10476
- var toExec, regPath, altPath, _b, stderr, stdout, error_1;
10477
- return __generator(this, function (_c) {
10478
- switch (_c.label) {
10670
+ var toExec, regPath, altPath, result, error_1;
10671
+ return __generator(this, function (_b) {
10672
+ switch (_b.label) {
10479
10673
  case 0:
10480
- _c.trys.push([0, 6, , 7]);
10674
+ _b.trys.push([0, 6, , 7]);
10481
10675
  toExec = "/Contents/MacOS/".concat(macOsName);
10482
10676
  regPath = "/Applications/".concat(macOsName, ".app") + toExec;
10483
10677
  altPath = userhome(regPath.slice(1));
10484
10678
  return [4 /*yield*/, isExecutable(regPath, $provideFilesystemForNode())];
10485
10679
  case 1:
10486
- if (!_c.sent()) return [3 /*break*/, 2];
10680
+ if (!_b.sent()) return [3 /*break*/, 2];
10487
10681
  return [2 /*return*/, regPath];
10488
10682
  case 2: return [4 /*yield*/, isExecutable(altPath, $provideFilesystemForNode())];
10489
10683
  case 3:
10490
- if (_c.sent()) {
10684
+ if (_b.sent()) {
10491
10685
  return [2 /*return*/, altPath];
10492
10686
  }
10493
- _c.label = 4;
10494
- case 4: return [4 /*yield*/, exec("mdfind 'kMDItemDisplayName == \"".concat(macOsName, "\" && kMDItemKind == Application'"))];
10687
+ _b.label = 4;
10688
+ case 4: return [4 /*yield*/, $execCommand({
10689
+ crashOnError: true,
10690
+ command: "mdfind 'kMDItemDisplayName == \"".concat(macOsName, "\" && kMDItemKind == Application'"),
10691
+ })];
10495
10692
  case 5:
10496
- _b = _c.sent(), stderr = _b.stderr, stdout = _b.stdout;
10497
- if (!stderr && stdout) {
10498
- return [2 /*return*/, stdout.trim() + toExec];
10499
- }
10500
- throw new Error("Can not locate app ".concat(appName, " on macOS.\n ").concat(stderr));
10693
+ result = _b.sent();
10694
+ return [2 /*return*/, result.trim() + toExec];
10501
10695
  case 6:
10502
- error_1 = _c.sent();
10696
+ error_1 = _b.sent();
10503
10697
  if (!(error_1 instanceof Error)) {
10504
10698
  throw error_1;
10505
10699
  }
@@ -10600,7 +10794,7 @@ function locateApp(options) {
10600
10794
  }
10601
10795
  else if (process.platform === 'darwin') {
10602
10796
  if (macOsName) {
10603
- return locateAppOnMacOs({ appName: appName, macOsName: macOsName });
10797
+ return locateAppOnMacOs({ macOsName: macOsName });
10604
10798
  }
10605
10799
  else {
10606
10800
  throw new Error("".concat(appName, " is not available on macOS."));
@@ -10608,7 +10802,7 @@ function locateApp(options) {
10608
10802
  }
10609
10803
  else {
10610
10804
  if (linuxWhich) {
10611
- return locateAppOnLinux({ appName: appName, linuxWhich: linuxWhich });
10805
+ return locateAppOnLinux({ linuxWhich: linuxWhich });
10612
10806
  }
10613
10807
  else {
10614
10808
  throw new Error("".concat(appName, " is not available on Linux."));
@@ -15815,173 +16009,11 @@ var _OpenAiAssistantRegistration = $llmToolsRegister.register(createOpenAiAssist
15815
16009
  */
15816
16010
 
15817
16011
  /**
15818
- * Normalize options for `execCommand` and `execCommands`
16012
+ * Create a filename for intermediate cache for scrapers
15819
16013
  *
15820
- * Note: `$` is used to indicate that this function behaves differently according to `process.platform`
16014
+ * Note: It also checks if directory exists and creates it if not
15821
16015
  *
15822
- * @private internal utility of `execCommand` and `execCommands`
15823
- */
15824
- function $execCommandNormalizeOptions(options) {
15825
- var _a;
15826
- var _b, _c, _d, _e;
15827
- var command;
15828
- var cwd;
15829
- var crashOnError;
15830
- var args = [];
15831
- var timeout;
15832
- var isVerbose;
15833
- if (typeof options === 'string') {
15834
- // TODO: [1] DRY default values
15835
- command = options;
15836
- cwd = process.cwd();
15837
- crashOnError = true;
15838
- timeout = Infinity; // <- TODO: [⏳]
15839
- isVerbose = DEFAULT_IS_VERBOSE;
15840
- }
15841
- else {
15842
- /*
15843
- TODO:
15844
- if ((options as any).commands !== undefined) {
15845
- commands = (options as any).commands;
15846
- } else {
15847
- commands = [(options as any).command];
15848
- }
15849
- */
15850
- // TODO: [1] DRY default values
15851
- command = options.command;
15852
- cwd = (_b = options.cwd) !== null && _b !== void 0 ? _b : process.cwd();
15853
- crashOnError = (_c = options.crashOnError) !== null && _c !== void 0 ? _c : true;
15854
- timeout = (_d = options.timeout) !== null && _d !== void 0 ? _d : Infinity;
15855
- isVerbose = (_e = options.isVerbose) !== null && _e !== void 0 ? _e : DEFAULT_IS_VERBOSE;
15856
- }
15857
- // TODO: /(-[a-zA-Z0-9-]+\s+[^\s]*)|[^\s]*/g
15858
- var _ = Array.from(command.matchAll(/(".*")|([^\s]*)/g))
15859
- .map(function (_a) {
15860
- var _b = __read(_a, 1), match = _b[0];
15861
- return match;
15862
- })
15863
- .filter(function (arg) { return arg !== ''; });
15864
- if (_.length > 1) {
15865
- _a = __read(_), command = _a[0], args = _a.slice(1);
15866
- }
15867
- if (options.args) {
15868
- args = __spreadArray(__spreadArray([], __read(args), false), __read(options.args), false);
15869
- }
15870
- var humanReadableCommand = !['npx', 'npm'].includes(command) ? command : args[0];
15871
- if (['ts-node'].includes(humanReadableCommand)) {
15872
- humanReadableCommand += " ".concat(args[1]);
15873
- }
15874
- if (/^win/.test(process.platform) && ['npm', 'npx'].includes(command)) {
15875
- command = "".concat(command, ".cmd");
15876
- }
15877
- return { command: command, humanReadableCommand: humanReadableCommand, args: args, cwd: cwd, crashOnError: crashOnError, timeout: timeout, isVerbose: isVerbose };
15878
- }
15879
- // TODO: This should show type error> execCommandNormalizeOptions({ command: '', commands: [''] });
15880
-
15881
- /**
15882
- * Run one command in a shell
15883
- *
15884
- *
15885
- * Note: There are 2 similar functions in the codebase:
15886
- * - `$execCommand` which runs a single command
15887
- * - `$execCommands` which runs multiple commands
15888
- * Note: `$` is used to indicate that this function is not a pure function - it runs a command in a shell
15889
- *
15890
- * @public exported from `@promptbook/node`
15891
- */
15892
- function $execCommand(options) {
15893
- if (!$isRunningInNode()) {
15894
- throw new EnvironmentMismatchError('Function `$execCommand` can run only in Node environment.js');
15895
- }
15896
- return new Promise(function (resolve, reject) {
15897
- // eslint-disable-next-line prefer-const
15898
- var _a = $execCommandNormalizeOptions(options), command = _a.command, humanReadableCommand = _a.humanReadableCommand, args = _a.args, cwd = _a.cwd, crashOnError = _a.crashOnError, timeout = _a.timeout, _b = _a.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
15899
- if (timeout !== Infinity) {
15900
- // TODO: In waitasecond forTime(Infinity) should be equivalent to forEver()
15901
- forTime(timeout).then(function () {
15902
- if (crashOnError) {
15903
- reject(new Error("Command \"".concat(humanReadableCommand, "\" exceeded time limit of ").concat(timeout, "ms")));
15904
- }
15905
- else {
15906
- console.warn("Command \"".concat(humanReadableCommand, "\" exceeded time limit of ").concat(timeout, "ms but continues running"));
15907
- resolve('Command exceeded time limit');
15908
- }
15909
- });
15910
- }
15911
- if (isVerbose) {
15912
- console.info(colors.yellow(cwd) + ' ' + colors.green(command) + ' ' + colors.blue(args.join(' ')));
15913
- }
15914
- try {
15915
- var commandProcess = spawn(command, args, { cwd: cwd, shell: true });
15916
- if (isVerbose) {
15917
- commandProcess.on('message', function (message) {
15918
- console.info({ message: message });
15919
- });
15920
- }
15921
- var output_1 = [];
15922
- commandProcess.stdout.on('data', function (stdout) {
15923
- output_1.push(stdout.toString());
15924
- if (isVerbose) {
15925
- console.info(stdout.toString());
15926
- }
15927
- });
15928
- commandProcess.stderr.on('data', function (stderr) {
15929
- output_1.push(stderr.toString());
15930
- if (isVerbose && stderr.toString().trim()) {
15931
- console.warn(stderr.toString());
15932
- }
15933
- });
15934
- var finishWithCode = function (code) {
15935
- if (code !== 0) {
15936
- if (crashOnError) {
15937
- reject(new Error(output_1.join('\n').trim() ||
15938
- "Command \"".concat(humanReadableCommand, "\" exited with code ").concat(code)));
15939
- }
15940
- else {
15941
- if (isVerbose) {
15942
- console.warn("Command \"".concat(humanReadableCommand, "\" exited with code ").concat(code));
15943
- }
15944
- resolve(spaceTrim$1(output_1.join('\n')));
15945
- }
15946
- }
15947
- else {
15948
- resolve(spaceTrim$1(output_1.join('\n')));
15949
- }
15950
- };
15951
- commandProcess.on('close', finishWithCode);
15952
- commandProcess.on('exit', finishWithCode);
15953
- commandProcess.on('disconnect', function () {
15954
- // Note: Unexpected disconnection should always result in rejection
15955
- reject(new Error("Command \"".concat(humanReadableCommand, "\" disconnected")));
15956
- });
15957
- commandProcess.on('error', function (error) {
15958
- if (crashOnError) {
15959
- reject(new Error("Command \"".concat(humanReadableCommand, "\" failed: \n").concat(error.message)));
15960
- }
15961
- else {
15962
- if (isVerbose) {
15963
- console.warn(error);
15964
- }
15965
- resolve(spaceTrim$1(output_1.join('\n')));
15966
- }
15967
- });
15968
- }
15969
- catch (error) {
15970
- // Note: Unexpected error in sync code should always result in rejection
15971
- reject(error);
15972
- }
15973
- });
15974
- }
15975
- /**
15976
- * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
15977
- */
15978
-
15979
- /**
15980
- * Create a filename for intermediate cache for scrapers
15981
- *
15982
- * Note: It also checks if directory exists and creates it if not
15983
- *
15984
- * @private as internal utility for scrapers
16016
+ * @private as internal utility for scrapers
15985
16017
  */
15986
16018
  function getScraperIntermediateSource(source, options) {
15987
16019
  return __awaiter(this, void 0, void 0, function () {
@@ -16246,6 +16278,207 @@ var MarkdownScraper = /** @class */ (function () {
16246
16278
  * Note: No need to aggregate usage here, it is done by intercepting the llmTools
16247
16279
  */
16248
16280
 
16281
+ /**
16282
+ * Metadata of the scraper
16283
+ *
16284
+ * @private within the scraper directory
16285
+ */
16286
+ var boilerplateScraperMetadata = $deepFreeze({
16287
+ title: 'Boilerplate scraper',
16288
+ packageName: '@promptbook/boilerplate',
16289
+ className: 'BoilerplateScraper',
16290
+ mimeTypes: [
16291
+ '@@@/@@@',
16292
+ // <- TODO: @@@ Add compatible mime types with Boilerplate scraper
16293
+ ],
16294
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@@',
16295
+ isAvilableInBrowser: false,
16296
+ // <- Note: [🌏] Only `MarkdownScraper` makes sense to be available in the browser, for scraping non-markdown sources in the browser use a remote server
16297
+ requiredExecutables: [
16298
+ /* @@@ 'Pandoc' */
16299
+ ],
16300
+ }); /* <- Note: [🤛] */
16301
+ /**
16302
+ * Registration of known scraper metadata
16303
+ *
16304
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
16305
+ *
16306
+ * @public exported from `@promptbook/core`
16307
+ * @public exported from `@promptbook/wizzard`
16308
+ * @public exported from `@promptbook/cli`
16309
+ */
16310
+ var _BoilerplateScraperMetadataRegistration = $scrapersMetadataRegister.register(boilerplateScraperMetadata);
16311
+ /**
16312
+ * Note: [💞] Ignore a discrepancy between file name and entity name
16313
+ */
16314
+
16315
+ /**
16316
+ * Scraper of @@@ files
16317
+ *
16318
+ * @see `documentationUrl` for more details
16319
+ * @public exported from `@promptbook/boilerplate`
16320
+ */
16321
+ var BoilerplateScraper = /** @class */ (function () {
16322
+ function BoilerplateScraper(tools, options) {
16323
+ this.tools = tools;
16324
+ this.options = options;
16325
+ this.markdownScraper = new MarkdownScraper(tools, options);
16326
+ }
16327
+ Object.defineProperty(BoilerplateScraper.prototype, "metadata", {
16328
+ /**
16329
+ * Metadata of the scraper which includes title, mime types, etc.
16330
+ */
16331
+ get: function () {
16332
+ return boilerplateScraperMetadata;
16333
+ },
16334
+ enumerable: false,
16335
+ configurable: true
16336
+ });
16337
+ /**
16338
+ * Convert the `.@@@` to `.md` file and returns intermediate source
16339
+ *
16340
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
16341
+ */
16342
+ BoilerplateScraper.prototype.$convert = function (source) {
16343
+ var _a;
16344
+ return __awaiter(this, void 0, void 0, function () {
16345
+ var _b, _c, rootDirname, _d, cacheDirname, _e, intermediateFilesStrategy, _f, isVerbose, extension, cacheFilehandler, command_1;
16346
+ return __generator(this, function (_g) {
16347
+ switch (_g.label) {
16348
+ case 0:
16349
+ _b = this.options, _c = _b.rootDirname, rootDirname = _c === void 0 ? process.cwd() : _c, _d = _b.cacheDirname, cacheDirname = _d === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _d, _e = _b.intermediateFilesStrategy, intermediateFilesStrategy = _e === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _e, _f = _b.isVerbose, isVerbose = _f === void 0 ? DEFAULT_IS_VERBOSE : _f;
16350
+ // TODO: @@@ Preserve or delete
16351
+ if (!$isRunningInNode()) {
16352
+ throw new KnowledgeScrapeError('BoilerplateScraper is only supported in Node environment');
16353
+ }
16354
+ // TODO: @@@ Preserve or delete
16355
+ if (this.tools.fs === undefined) {
16356
+ throw new EnvironmentMismatchError('Can not scrape boilerplates without filesystem tools');
16357
+ // <- TODO: [🧠] What is the best error type here`
16358
+ }
16359
+ // TODO: @@@ Preserve, delete or modify
16360
+ if (((_a = this.tools.executables) === null || _a === void 0 ? void 0 : _a.pandocPath) === undefined) {
16361
+ throw new MissingToolsError('Pandoc is required for scraping .docx files');
16362
+ }
16363
+ // TODO: @@@ Preserve, delete or modify
16364
+ if (source.filename === null) {
16365
+ // TODO: [🧠] Maybe save file as temporary
16366
+ throw new KnowledgeScrapeError('When parsing .@@@ file, it must be real file in the file system');
16367
+ }
16368
+ extension = getFileExtension(source.filename);
16369
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
16370
+ rootDirname: rootDirname,
16371
+ cacheDirname: cacheDirname,
16372
+ intermediateFilesStrategy: intermediateFilesStrategy,
16373
+ extension: 'md',
16374
+ isVerbose: isVerbose,
16375
+ })];
16376
+ case 1:
16377
+ cacheFilehandler = _g.sent();
16378
+ return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
16379
+ case 2:
16380
+ if (!!(_g.sent())) return [3 /*break*/, 5];
16381
+ command_1 = "\"".concat(this.tools.executables.pandocPath, "\" -f ").concat(extension, " -t markdown \"").concat(source.filename, "\" -o \"").concat(cacheFilehandler.filename, "\"");
16382
+ return [4 /*yield*/, $execCommand(command_1)];
16383
+ case 3:
16384
+ _g.sent();
16385
+ return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
16386
+ case 4:
16387
+ // Note: [0]
16388
+ if (!(_g.sent())) {
16389
+ throw new UnexpectedError(spaceTrim(function (block) { return "\n File that was supposed to be created by Pandoc does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));
16390
+ }
16391
+ _g.label = 5;
16392
+ case 5: return [2 /*return*/, cacheFilehandler];
16393
+ }
16394
+ });
16395
+ });
16396
+ };
16397
+ /**
16398
+ * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
16399
+ */
16400
+ BoilerplateScraper.prototype.scrape = function (source) {
16401
+ return __awaiter(this, void 0, void 0, function () {
16402
+ var cacheFilehandler, markdownSource, knowledge;
16403
+ return __generator(this, function (_a) {
16404
+ switch (_a.label) {
16405
+ case 0: return [4 /*yield*/, this.$convert(source)];
16406
+ case 1:
16407
+ cacheFilehandler = _a.sent();
16408
+ markdownSource = {
16409
+ source: source.source,
16410
+ filename: cacheFilehandler.filename,
16411
+ url: null,
16412
+ mimeType: 'text/markdown',
16413
+ asText: function () {
16414
+ return __awaiter(this, void 0, void 0, function () {
16415
+ return __generator(this, function (_a) {
16416
+ switch (_a.label) {
16417
+ case 0: return [4 /*yield*/, readFile(cacheFilehandler.filename, 'utf-8')];
16418
+ case 1:
16419
+ // Note: [0] In $convert we check that the file exists
16420
+ return [2 /*return*/, _a.sent()];
16421
+ }
16422
+ });
16423
+ });
16424
+ },
16425
+ asJson: function () {
16426
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
16427
+ },
16428
+ /*
16429
+ TODO: [🥽]
16430
+ > asBlob() {
16431
+ > throw new UnexpectedError(
16432
+ > 'Did not expect that `markdownScraper` would need to get the content `asBlob`',
16433
+ > );
16434
+ > },
16435
+ */
16436
+ };
16437
+ knowledge = this.markdownScraper.scrape(markdownSource);
16438
+ return [4 /*yield*/, cacheFilehandler.destroy()];
16439
+ case 2:
16440
+ _a.sent();
16441
+ return [2 /*return*/, knowledge];
16442
+ }
16443
+ });
16444
+ });
16445
+ };
16446
+ return BoilerplateScraper;
16447
+ }());
16448
+ /**
16449
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
16450
+ * TODO: [🪂] Do it in parallel
16451
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
16452
+ * @@@ Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
16453
+ */
16454
+
16455
+ /**
16456
+ * Constructor of `BoilerplateScraper`
16457
+ *
16458
+ * @public exported from `@promptbook/boilerplate`
16459
+ */
16460
+ var createBoilerplateScraper = Object.assign(function (tools, options) {
16461
+ return new BoilerplateScraper(tools, options);
16462
+ }, boilerplateScraperMetadata); /* <- Note: [🤛] */
16463
+ /**
16464
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
16465
+ */
16466
+
16467
+ /**
16468
+ * Registration of known scraper
16469
+ *
16470
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
16471
+ *
16472
+ * @public exported from `@promptbook/boilerplate`
16473
+ * @public exported from `@promptbook/wizzard`
16474
+ * @public exported from `@promptbook/cli`
16475
+ */
16476
+ var _BoilerplateScraperRegistration = $scrapersRegister.register(createBoilerplateScraper);
16477
+ /**
16478
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
16479
+ * Note: [💞] Ignore a discrepancy between file name and entity name
16480
+ */
16481
+
16249
16482
  /**
16250
16483
  * Metadata of the scraper
16251
16484
  *
@@ -16345,7 +16578,7 @@ var DocumentScraper = /** @class */ (function () {
16345
16578
  case 4:
16346
16579
  // Note: [0]
16347
16580
  if (!(_g.sent())) {
16348
- throw new UnexpectedError(spaceTrim(function (block) { return "\n File that was supposed to be created by Pandoc does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));
16581
+ throw new UnexpectedError(spaceTrim(function (block) { return "\n File that was supposed to be created by Pandoc does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));
16349
16582
  }
16350
16583
  _g.label = 5;
16351
16584
  case 5: return [2 /*return*/, cacheFilehandler];
@@ -16520,7 +16753,7 @@ var LegacyDocumentScraper = /** @class */ (function () {
16520
16753
  case 4:
16521
16754
  files_1 = _g.sent();
16522
16755
  if (files_1.length !== 1) {
16523
- throw new UnexpectedError(spaceTrim(function (block) { return "\n Expected exactly 1 file in the LibreOffice output directory, got ".concat(files_1.length, "\n\n The temporary folder:\n ").concat(block(documentSourceOutdirPathForLibreOffice_1), "\n\n Command:\n > ").concat(block(command_1), "\n "); }));
16756
+ throw new UnexpectedError(spaceTrim(function (block) { return "\n Expected exactly 1 file in the LibreOffice output directory, got ".concat(files_1.length, "\n\n The temporary folder:\n ").concat(block(documentSourceOutdirPathForLibreOffice_1), "\n\n Command:\n > ").concat(block(command_1), "\n "); }));
16524
16757
  }
16525
16758
  file = files_1[0];
16526
16759
  return [4 /*yield*/, rename(join(documentSourceOutdirPathForLibreOffice_1, file), cacheFilehandler.filename)];
@@ -16670,6 +16903,216 @@ var _MarkdownScraperRegistration = $scrapersRegister.register(createMarkdownScra
16670
16903
  * Note: [💞] Ignore a discrepancy between file name and entity name
16671
16904
  */
16672
16905
 
16906
+ /**
16907
+ * Metadata of the scraper
16908
+ *
16909
+ * @private within the scraper directory
16910
+ */
16911
+ var markitdownScraperMetadata = $deepFreeze({
16912
+ title: 'Markitdown scraper',
16913
+ packageName: '@promptbook/markitdown',
16914
+ className: 'MarkitdownScraper',
16915
+ mimeTypes: [
16916
+ 'application/pdf',
16917
+ // TODO: Make priority for scrapers and than allow all mime types here:
16918
+ // 'text/html',
16919
+ // 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
16920
+ ],
16921
+ documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
16922
+ isAvilableInBrowser: false,
16923
+ // <- Note: [🌏] Only `MarkdownScraper` makes sense to be available in the browser, for scraping non-markdown sources in the browser use a remote server
16924
+ requiredExecutables: [],
16925
+ }); /* <- Note: [🤛] */
16926
+ /**
16927
+ * Registration of known scraper metadata
16928
+ *
16929
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
16930
+ *
16931
+ * @public exported from `@promptbook/core`
16932
+ * @public exported from `@promptbook/wizzard`
16933
+ * @public exported from `@promptbook/cli`
16934
+ */
16935
+ var _MarkitdownScraperMetadataRegistration = $scrapersMetadataRegister.register(markitdownScraperMetadata);
16936
+ /**
16937
+ * Note: [💞] Ignore a discrepancy between file name and entity name
16938
+ */
16939
+
16940
+ /**
16941
+ * Integration of Markitdown by Microsoft into Promptbook
16942
+ *
16943
+ * @see https://github.com/microsoft/markitdown
16944
+ * @see `documentationUrl` for more details
16945
+ * @public exported from `@promptbook/markitdown`
16946
+ * @public exported from `@promptbook/pdf`
16947
+ */
16948
+ var MarkitdownScraper = /** @class */ (function () {
16949
+ function MarkitdownScraper(tools, options) {
16950
+ this.tools = tools;
16951
+ this.options = options;
16952
+ this.markdownScraper = new MarkdownScraper(tools, options);
16953
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
16954
+ var MarkItDown = require('markitdown-ts').MarkItDown;
16955
+ // <- TODO: !!! Use Markitdown directly not through this package
16956
+ // <- Note: !!!!!!!
16957
+ this.markitdown = new MarkItDown();
16958
+ }
16959
+ Object.defineProperty(MarkitdownScraper.prototype, "metadata", {
16960
+ /**
16961
+ * Metadata of the scraper which includes title, mime types, etc.
16962
+ */
16963
+ get: function () {
16964
+ return markitdownScraperMetadata;
16965
+ },
16966
+ enumerable: false,
16967
+ configurable: true
16968
+ });
16969
+ /**
16970
+ * Convert the documents to `.md` file and returns intermediate source
16971
+ *
16972
+ * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
16973
+ */
16974
+ MarkitdownScraper.prototype.$convert = function (source) {
16975
+ return __awaiter(this, void 0, void 0, function () {
16976
+ var _a, _b, rootDirname, _c, cacheDirname, _d, intermediateFilesStrategy, _e, isVerbose, cacheFilehandler, src, result;
16977
+ return __generator(this, function (_f) {
16978
+ switch (_f.label) {
16979
+ case 0:
16980
+ _a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _c, _d = _a.intermediateFilesStrategy, intermediateFilesStrategy = _d === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _d, _e = _a.isVerbose, isVerbose = _e === void 0 ? DEFAULT_IS_VERBOSE : _e;
16981
+ if (!$isRunningInNode()) {
16982
+ throw new KnowledgeScrapeError('MarkitdownScraper is only supported in Node environment');
16983
+ }
16984
+ if (this.tools.fs === undefined) {
16985
+ throw new EnvironmentMismatchError('Can not scrape boilerplates without filesystem tools');
16986
+ // <- TODO: [🧠] What is the best error type here`
16987
+ }
16988
+ return [4 /*yield*/, getScraperIntermediateSource(source, {
16989
+ rootDirname: rootDirname,
16990
+ cacheDirname: cacheDirname,
16991
+ intermediateFilesStrategy: intermediateFilesStrategy,
16992
+ extension: 'md',
16993
+ isVerbose: isVerbose,
16994
+ })];
16995
+ case 1:
16996
+ cacheFilehandler = _f.sent();
16997
+ return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
16998
+ case 2:
16999
+ if (!!(_f.sent())) return [3 /*break*/, 5];
17000
+ src = source.filename || source.url || null;
17001
+ console.log('!!!', { src: src, source: source, cacheFilehandler: cacheFilehandler });
17002
+ if (src === null) {
17003
+ throw new UnexpectedError('Source has no filename or url');
17004
+ }
17005
+ return [4 /*yield*/, this.markitdown.convert(src, {
17006
+ // TODO: !!!!!! Pass when sacraping Youtube
17007
+ // enableYoutubeTranscript: true,
17008
+ // youtubeTranscriptLanguage: 'en',
17009
+ })];
17010
+ case 3:
17011
+ result = _f.sent();
17012
+ if (result === null || result === undefined) {
17013
+ throw new Error("Markitdown could not convert the \"".concat(source.source, "\""));
17014
+ // <- TODO: !!! Make MarkitdownError
17015
+ }
17016
+ console.log('!!!', { result: result, cacheFilehandler: cacheFilehandler });
17017
+ return [4 /*yield*/, this.tools.fs.writeFile(cacheFilehandler.filename, result.text_content)];
17018
+ case 4:
17019
+ _f.sent();
17020
+ _f.label = 5;
17021
+ case 5: return [2 /*return*/, cacheFilehandler];
17022
+ }
17023
+ });
17024
+ });
17025
+ };
17026
+ /**
17027
+ * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
17028
+ */
17029
+ MarkitdownScraper.prototype.scrape = function (source) {
17030
+ return __awaiter(this, void 0, void 0, function () {
17031
+ var cacheFilehandler, markdownSource, knowledge;
17032
+ return __generator(this, function (_a) {
17033
+ switch (_a.label) {
17034
+ case 0: return [4 /*yield*/, this.$convert(source)];
17035
+ case 1:
17036
+ cacheFilehandler = _a.sent();
17037
+ markdownSource = {
17038
+ source: source.source,
17039
+ filename: cacheFilehandler.filename,
17040
+ url: null,
17041
+ mimeType: 'text/markdown',
17042
+ asText: function () {
17043
+ return __awaiter(this, void 0, void 0, function () {
17044
+ return __generator(this, function (_a) {
17045
+ switch (_a.label) {
17046
+ case 0: return [4 /*yield*/, readFile(cacheFilehandler.filename, 'utf-8')];
17047
+ case 1:
17048
+ // Note: [0] In $convert we check that the file exists
17049
+ return [2 /*return*/, _a.sent()];
17050
+ }
17051
+ });
17052
+ });
17053
+ },
17054
+ asJson: function () {
17055
+ throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
17056
+ },
17057
+ /*
17058
+ TODO: [🥽]
17059
+ > asBlob() {
17060
+ > throw new UnexpectedError(
17061
+ > 'Did not expect that `markdownScraper` would need to get the content `asBlob`',
17062
+ > );
17063
+ > },
17064
+ */
17065
+ };
17066
+ knowledge = this.markdownScraper.scrape(markdownSource);
17067
+ return [4 /*yield*/, cacheFilehandler.destroy()];
17068
+ case 2:
17069
+ _a.sent();
17070
+ return [2 /*return*/, knowledge];
17071
+ }
17072
+ });
17073
+ });
17074
+ };
17075
+ return MarkitdownScraper;
17076
+ }());
17077
+ /**
17078
+ * TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
17079
+ * TODO: [👣] Converted documents can act as cached items - there is no need to run conversion each time
17080
+ * TODO: [🪂] Do it in parallel
17081
+ * Note: No need to aggregate usage here, it is done by intercepting the llmTools
17082
+ * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
17083
+ */
17084
+
17085
+ /**
17086
+ * Constructor of `MarkitdownScraper`
17087
+ *
17088
+ * @public exported from `@promptbook/markitdown`
17089
+ * @public exported from `@promptbook/pdf`
17090
+ */
17091
+ var createMarkitdownScraper = Object.assign(function (tools, options) {
17092
+ return new MarkitdownScraper(tools, options);
17093
+ }, markitdownScraperMetadata); /* <- Note: [🤛] */
17094
+ /**
17095
+ * TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
17096
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
17097
+ */
17098
+
17099
+ /**
17100
+ * Registration of known scraper
17101
+ *
17102
+ * Warning: This is not useful for the end user, it is just a side effect of the mechanism that handles all available known scrapers
17103
+ *
17104
+ * @public exported from `@promptbook/markitdown`
17105
+ * @public exported from `@promptbook/pdf`
17106
+ * @public exported from `@promptbook/wizzard`
17107
+ * @public exported from `@promptbook/cli`
17108
+ */
17109
+ var _MarkitdownScraperRegistration = $scrapersRegister.register(createMarkitdownScraper);
17110
+ /**
17111
+ * TODO: [🧠][🌜] Export only from `@promptbook/markitdown` or `@promptbook/pdf` NOT both
17112
+ * TODO: [🎶] Naming "constructor" vs "creator" vs "factory"
17113
+ * Note: [💞] Ignore a discrepancy between file name and entity name
17114
+ */
17115
+
16673
17116
  /**
16674
17117
  * Metadata of the scraper
16675
17118
  *
@@ -16679,7 +17122,7 @@ var pdfScraperMetadata = $deepFreeze({
16679
17122
  title: 'Pdf scraper',
16680
17123
  packageName: '@promptbook/pdf',
16681
17124
  className: 'PdfScraper',
16682
- mimeTypes: ['application/pdf'],
17125
+ mimeTypes: ['application/pdf-DISABLED'],
16683
17126
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
16684
17127
  isAvilableInBrowser: false,
16685
17128
  // <- Note: [🌏] Only `MarkdownScraper` makes sense to be available in the browser, for scraping non-markdown sources in the browser use a remote server
@@ -16990,5 +17433,5 @@ var _WebsiteScraperRegistration = $scrapersRegister.register(createWebsiteScrape
16990
17433
  * Note: [💞] Ignore a discrepancy between file name and entity name
16991
17434
  */
16992
17435
 
16993
- export { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION, _AnthropicClaudeMetadataRegistration, _AnthropicClaudeRegistration, _AzureOpenAiMetadataRegistration, _AzureOpenAiRegistration, _CLI, _DocumentScraperMetadataRegistration, _DocumentScraperRegistration, _GoogleMetadataRegistration, _GoogleRegistration, _LegacyDocumentScraperMetadataRegistration, _LegacyDocumentScraperRegistration, _MarkdownScraperMetadataRegistration, _MarkdownScraperRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiAssistantRegistration, _OpenAiMetadataRegistration, _OpenAiRegistration, _PdfScraperMetadataRegistration, _PdfScraperRegistration, _WebsiteScraperMetadataRegistration, _WebsiteScraperRegistration };
17436
+ export { BOOK_LANGUAGE_VERSION, PROMPTBOOK_ENGINE_VERSION, _AnthropicClaudeMetadataRegistration, _AnthropicClaudeRegistration, _AzureOpenAiMetadataRegistration, _AzureOpenAiRegistration, _BoilerplateScraperMetadataRegistration, _BoilerplateScraperRegistration, _CLI, _DocumentScraperMetadataRegistration, _DocumentScraperRegistration, _GoogleMetadataRegistration, _GoogleRegistration, _LegacyDocumentScraperMetadataRegistration, _LegacyDocumentScraperRegistration, _MarkdownScraperMetadataRegistration, _MarkdownScraperRegistration, _MarkitdownScraperMetadataRegistration, _MarkitdownScraperRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiAssistantRegistration, _OpenAiMetadataRegistration, _OpenAiRegistration, _PdfScraperMetadataRegistration, _PdfScraperRegistration, _WebsiteScraperMetadataRegistration, _WebsiteScraperRegistration };
16994
17437
  //# sourceMappingURL=index.es.js.map