@promptbook/legacy-documents 0.71.0-13 → 0.71.0-15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/esm/index.es.js +80 -77
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/node.index.d.ts +2 -0
  4. package/esm/typings/src/_packages/types.index.d.ts +2 -0
  5. package/esm/typings/src/collection/constructors/createCollectionFromDirectory.d.ts +2 -2
  6. package/esm/typings/src/conversion/pipelineStringToJson.d.ts +1 -1
  7. package/esm/typings/src/execution/ExecutionTools.d.ts +12 -4
  8. package/esm/typings/src/execution/FilesystemTools.d.ts +9 -0
  9. package/esm/typings/src/execution/translation/automatic-translate/translateMessages.d.ts +1 -0
  10. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsFromEnv.d.ts +0 -1
  11. package/esm/typings/src/prepare/preparePipeline.d.ts +1 -1
  12. package/esm/typings/src/prepare/prepareTemplates.d.ts +1 -1
  13. package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +1 -1
  14. package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +11 -0
  15. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +1 -1
  16. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +1 -0
  17. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +2 -4
  18. package/esm/typings/src/scrapers/document/DocumentScraper.d.ts +1 -1
  19. package/esm/typings/src/scrapers/document-legacy/LegacyDocumentScraper.d.ts +1 -1
  20. package/esm/typings/src/scrapers/website/WebsiteScraper.d.ts +1 -1
  21. package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +3 -1
  22. package/esm/typings/src/utils/files/{$isDirectoryExisting.d.ts → isDirectoryExisting.d.ts} +3 -4
  23. package/esm/typings/src/utils/files/isFileExisting.d.ts +13 -0
  24. package/esm/typings/src/utils/files/{$listAllFiles.d.ts → listAllFiles.d.ts} +3 -4
  25. package/package.json +2 -2
  26. package/umd/index.umd.js +79 -76
  27. package/umd/index.umd.js.map +1 -1
  28. package/esm/typings/src/utils/files/$isFileExisting.d.ts +0 -14
  29. /package/esm/typings/src/utils/files/{$isDirectoryExisting.test.d.ts → isDirectoryExisting.test.d.ts} +0 -0
  30. /package/esm/typings/src/utils/files/{$isFileExisting.test.d.ts → isFileExisting.test.d.ts} +0 -0
  31. /package/esm/typings/src/utils/files/{$listAllFiles.test.d.ts → listAllFiles.test.d.ts} +0 -0
package/umd/index.umd.js CHANGED
@@ -15,7 +15,7 @@
15
15
  /**
16
16
  * The version of the Promptbook library
17
17
  */
18
- var PROMPTBOOK_VERSION = '0.71.0-12';
18
+ var PROMPTBOOK_VERSION = '0.71.0-14';
19
19
  // TODO: [main] !!!! List here all the versions and annotate + put into script
20
20
 
21
21
  /*! *****************************************************************************
@@ -448,6 +448,22 @@
448
448
  * TODO: [🧠][🧜‍♂️] Maybe join remoteUrl and path into single value
449
449
  */
450
450
 
451
+ /**
452
+ * This error type indicates that you try to use a feature that is not available in the current environment
453
+ *
454
+ * @public exported from `@promptbook/core`
455
+ */
456
+ var EnvironmentMismatchError = /** @class */ (function (_super) {
457
+ __extends(EnvironmentMismatchError, _super);
458
+ function EnvironmentMismatchError(message) {
459
+ var _this = _super.call(this, message) || this;
460
+ _this.name = 'EnvironmentMismatchError';
461
+ Object.setPrototypeOf(_this, EnvironmentMismatchError.prototype);
462
+ return _this;
463
+ }
464
+ return EnvironmentMismatchError;
465
+ }(Error));
466
+
451
467
  /**
452
468
  * This error indicates that the promptbook can not retrieve knowledge from external sources
453
469
  *
@@ -489,22 +505,6 @@
489
505
  */
490
506
  var $isRunningInNode = new Function("\n try {\n return this === global;\n } catch (e) {\n return false;\n }\n");
491
507
 
492
- /**
493
- * This error type indicates that you try to use a feature that is not available in the current environment
494
- *
495
- * @public exported from `@promptbook/core`
496
- */
497
- var EnvironmentMismatchError = /** @class */ (function (_super) {
498
- __extends(EnvironmentMismatchError, _super);
499
- function EnvironmentMismatchError(message) {
500
- var _this = _super.call(this, message) || this;
501
- _this.name = 'EnvironmentMismatchError';
502
- Object.setPrototypeOf(_this, EnvironmentMismatchError.prototype);
503
- return _this;
504
- }
505
- return EnvironmentMismatchError;
506
- }(Error));
507
-
508
508
  /**
509
509
  * Normalize options for `execCommand` and `execCommands`
510
510
  *
@@ -655,30 +655,36 @@
655
655
  */
656
656
 
657
657
  /**
658
- * Checks if the file exists
658
+ * Get the file extension from a file name
659
659
  *
660
- * Note: `$` is used to indicate that this function is not a pure function - it looks at the filesystem
660
+ * @private within the repository
661
+ */
662
+ function getFileExtension(value) {
663
+ var match = value.match(/\.([0-9a-z]+)(?:[?#]|$)/i);
664
+ return match ? match[1].toLowerCase() : null;
665
+ }
666
+
667
+ /**
668
+ * Checks if the file exists
661
669
  *
662
670
  * @private within the repository
663
671
  */
664
- function $isFileExisting(filename) {
672
+ function isFileExisting(filename, fs) {
665
673
  return __awaiter(this, void 0, void 0, function () {
666
674
  var isReadAccessAllowed, isFile;
667
675
  return __generator(this, function (_a) {
668
676
  switch (_a.label) {
669
- case 0:
670
- if (!$isRunningInNode()) {
671
- throw new EnvironmentMismatchError('Function `$isFileExisting` works only in Node environment.js');
672
- }
673
- return [4 /*yield*/, promises.access(filename, promises.constants.R_OK)
674
- .then(function () { return true; })
675
- .catch(function () { return false; })];
677
+ case 0: return [4 /*yield*/, fs
678
+ .access(filename, fs.constants.R_OK)
679
+ .then(function () { return true; })
680
+ .catch(function () { return false; })];
676
681
  case 1:
677
682
  isReadAccessAllowed = _a.sent();
678
683
  if (!isReadAccessAllowed) {
679
684
  return [2 /*return*/, false];
680
685
  }
681
- return [4 /*yield*/, promises.stat(filename)
686
+ return [4 /*yield*/, fs
687
+ .stat(filename)
682
688
  .then(function (fileStat) { return fileStat.isFile(); })
683
689
  .catch(function () { return false; })];
684
690
  case 2:
@@ -689,19 +695,38 @@
689
695
  });
690
696
  }
691
697
  /**
692
- * Note: [🟢 !!!!!! After fix makeKnowledgeSourceHandler] Code in this file should never be published outside of `@promptbook/node` and `@promptbook/cli`
698
+ * Note: Not [~🟢~] because it is not directly dependent on `fs
693
699
  * TODO: [🐠] This can be a validator - with variants that return true/false and variants that throw errors with meaningless messages
694
700
  * TODO: [🖇] What about symlinks?
695
701
  */
696
702
 
697
703
  /**
698
- * Get the file extension from a file name
704
+ * Tests if given string is valid URL.
699
705
  *
700
- * @private within the repository
706
+ * Note: Dataurl are considered perfectly valid.
707
+ * Note: There are two simmilar functions:
708
+ * - `isValidUrl` which tests any URL
709
+ * - `isValidPipelineUrl` *(this one)* which tests just promptbook URL
710
+ *
711
+ * @public exported from `@promptbook/utils`
701
712
  */
702
- function getFileExtension(value) {
703
- var match = value.match(/\.([0-9a-z]+)(?:[?#]|$)/i);
704
- return match ? match[1].toLowerCase() : null;
713
+ function isValidUrl(url) {
714
+ if (typeof url !== 'string') {
715
+ return false;
716
+ }
717
+ try {
718
+ if (url.startsWith('blob:')) {
719
+ url = url.replace(/^blob:/, '');
720
+ }
721
+ var urlObject = new URL(url /* because fail is handled */);
722
+ if (!['http:', 'https:', 'data:'].includes(urlObject.protocol)) {
723
+ return false;
724
+ }
725
+ return true;
726
+ }
727
+ catch (error) {
728
+ return false;
729
+ }
705
730
  }
706
731
 
707
732
  var defaultDiacriticsRemovalMap = [
@@ -1024,35 +1049,6 @@
1024
1049
  return normalizedName;
1025
1050
  }
1026
1051
 
1027
- /**
1028
- * Tests if given string is valid URL.
1029
- *
1030
- * Note: Dataurl are considered perfectly valid.
1031
- * Note: There are two simmilar functions:
1032
- * - `isValidUrl` which tests any URL
1033
- * - `isValidPipelineUrl` *(this one)* which tests just promptbook URL
1034
- *
1035
- * @public exported from `@promptbook/utils`
1036
- */
1037
- function isValidUrl(url) {
1038
- if (typeof url !== 'string') {
1039
- return false;
1040
- }
1041
- try {
1042
- if (url.startsWith('blob:')) {
1043
- url = url.replace(/^blob:/, '');
1044
- }
1045
- var urlObject = new URL(url /* because fail is handled */);
1046
- if (!['http:', 'https:', 'data:'].includes(urlObject.protocol)) {
1047
- return false;
1048
- }
1049
- return true;
1050
- }
1051
- catch (error) {
1052
- return false;
1053
- }
1054
- }
1055
-
1056
1052
  /**
1057
1053
  * Removes emojis from a string and fix whitespaces
1058
1054
  *
@@ -1215,6 +1211,7 @@
1215
1211
  * 1) Need to store more than serialized JSONs
1216
1212
  * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: !!!!
1217
1213
  * TODO: [🐱‍🐉][🧠] Make some smart crop
1214
+ * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
1218
1215
  */
1219
1216
 
1220
1217
  var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.ptbk.md",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-from-markdown.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-keywords.ptbk.md"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.ptbk.md",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Title should be concise and clear\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-knowledge-title.ptbk.md"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.ptbk.md",parameters:[{name:"availableModelNames",description:"List of available model names separated by comma (,)",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],templates:[{templateType:"PROMPT_TEMPLATE",name:"make-model-requirements",title:"Make modelRequirements",content:"You are experienced AI engineer, you need to create virtual assistant.\nWrite\n\n## Sample\n\n```json\n{\n\"modelName\": \"gpt-4o\",\n\"systemMessage\": \"You are experienced AI engineer and helpfull assistant.\",\n\"temperature\": 0.7\n}\n```\n\n## Instructions\n\n- Your output format is JSON object\n- Write just the JSON object, no other text should be present\n- It contains the following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nPick from the following models:\n\n- {availableModelNames}\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelRequirements",format:"JSON",dependentParameterNames:["availableModelNames","personaDescription"]}],knowledgeSources:[],knowledgePieces:[],personas:[],preparations:[],sourceFile:"./promptbook-collection/prepare-persona.ptbk.md"}];
@@ -3235,7 +3232,7 @@
3235
3232
  *
3236
3233
  * @private for scraper utilities
3237
3234
  */
3238
- function makeKnowledgeSourceHandler(knowledgeSource, options) {
3235
+ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3239
3236
  var _a;
3240
3237
  return __awaiter(this, void 0, void 0, function () {
3241
3238
  var sourceContent, name, _b, _c, rootDirname, _d,
@@ -3304,8 +3301,9 @@
3304
3301
  }];
3305
3302
  case 2:
3306
3303
  if (!(isValidFilePath(sourceContent) || /\.[a-z]{1,10}$/i.exec(sourceContent))) return [3 /*break*/, 4];
3307
- if (!$isRunningInNode()) {
3308
- throw new EnvironmentMismatchError('Importing knowledge source file works only in Node.js environment');
3304
+ if (tools.fs === undefined) {
3305
+ throw new EnvironmentMismatchError('Can not import file knowledge without filesystem tools');
3306
+ // <- TODO: [🧠] What is the best error type here`
3309
3307
  }
3310
3308
  if (rootDirname === null) {
3311
3309
  throw new EnvironmentMismatchError('Can not import file knowledge in non-file pipeline');
@@ -3314,7 +3312,7 @@
3314
3312
  filename_1 = path.join(rootDirname, sourceContent).split('\\').join('/');
3315
3313
  fileExtension = getFileExtension(filename_1);
3316
3314
  mimeType_1 = extensionToMimeType(fileExtension || '');
3317
- return [4 /*yield*/, $isFileExisting(filename_1)];
3315
+ return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
3318
3316
  case 3:
3319
3317
  if (!(_e.sent())) {
3320
3318
  throw new NotFoundError(spaceTrim__default["default"](function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(filename_1), "\n "); }));
@@ -3330,7 +3328,7 @@
3330
3328
  var content;
3331
3329
  return __generator(this, function (_a) {
3332
3330
  switch (_a.label) {
3333
- case 0: return [4 /*yield*/, promises.readFile(filename_1)];
3331
+ case 0: return [4 /*yield*/, tools.fs.readFile(filename_1)];
3334
3332
  case 1:
3335
3333
  content = _a.sent();
3336
3334
  return [2 /*return*/, new Blob([
@@ -3384,9 +3382,6 @@
3384
3382
  });
3385
3383
  });
3386
3384
  }
3387
- /**
3388
- * TODO: !!!!!!! Maybe constrain to @promptbook/node bundle
3389
- */
3390
3385
 
3391
3386
  /**
3392
3387
  * Prepares the knowle
@@ -3410,7 +3405,7 @@
3410
3405
  switch (_d.label) {
3411
3406
  case 0:
3412
3407
  partialPieces = null;
3413
- return [4 /*yield*/, makeKnowledgeSourceHandler(knowledgeSource, { rootDirname: rootDirname, isVerbose: isVerbose })];
3408
+ return [4 /*yield*/, makeKnowledgeSourceHandler(knowledgeSource, tools, { rootDirname: rootDirname, isVerbose: isVerbose })];
3414
3409
  case 1:
3415
3410
  sourceHandler = _d.sent();
3416
3411
  _d.label = 2;
@@ -5976,6 +5971,10 @@
5976
5971
  if (!$isRunningInNode()) {
5977
5972
  throw new KnowledgeScrapeError('Scraping .docx files is only supported in Node environment');
5978
5973
  }
5974
+ if (this.tools.fs === undefined) {
5975
+ throw new EnvironmentMismatchError('Can not scrape documents without filesystem tools');
5976
+ // <- TODO: [🧠] What is the best error type here`
5977
+ }
5979
5978
  if (externalProgramsPaths.pandocPath === undefined) {
5980
5979
  throw new MissingToolsError('Pandoc is required for scraping .docx files');
5981
5980
  }
@@ -5993,7 +5992,7 @@
5993
5992
  })];
5994
5993
  case 1:
5995
5994
  cacheFilehandler = _g.sent();
5996
- return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
5995
+ return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
5997
5996
  case 2:
5998
5997
  if (!!(_g.sent())) return [3 /*break*/, 5];
5999
5998
  command_1 = "\"".concat(externalProgramsPaths.pandocPath, "\" -f ").concat(extension, " -t markdown \"").concat(source.filename, "\" -o \"").concat(cacheFilehandler.filename, "\"");
@@ -6002,7 +6001,7 @@
6002
6001
  case 3:
6003
6002
  // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
6004
6003
  _g.sent();
6005
- return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
6004
+ return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
6006
6005
  case 4:
6007
6006
  // Note: [0]
6008
6007
  if (!(_g.sent())) {
@@ -6128,6 +6127,10 @@
6128
6127
  if (!$isRunningInNode()) {
6129
6128
  throw new KnowledgeScrapeError('Scraping .doc files is only supported in Node environment');
6130
6129
  }
6130
+ if (this.tools.fs === undefined) {
6131
+ throw new EnvironmentMismatchError('Can not scrape (legacy) documents without filesystem tools');
6132
+ // <- TODO: [🧠] What is the best error type here`
6133
+ }
6131
6134
  if (externalProgramsPaths.libreOfficePath === undefined) {
6132
6135
  throw new MissingToolsError('LibreOffice is required for scraping .doc and .rtf files');
6133
6136
  }
@@ -6148,7 +6151,7 @@
6148
6151
  if (isVerbose) {
6149
6152
  console.info("documentScraper: Converting .".concat(extension, " -> .docx"));
6150
6153
  }
6151
- return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
6154
+ return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
6152
6155
  case 2:
6153
6156
  if (!!(_g.sent())) return [3 /*break*/, 8];
6154
6157
  documentSourceOutdirPathForLibreOffice_1 = path.join(path.dirname(cacheFilehandler.filename), 'libreoffice')
@@ -6173,7 +6176,7 @@
6173
6176
  return [4 /*yield*/, promises.rmdir(documentSourceOutdirPathForLibreOffice_1)];
6174
6177
  case 6:
6175
6178
  _g.sent();
6176
- return [4 /*yield*/, $isFileExisting(cacheFilehandler.filename)];
6179
+ return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
6177
6180
  case 7:
6178
6181
  if (!(_g.sent())) {
6179
6182
  throw new UnexpectedError(spaceTrim__default["default"](function (block) { return "\n File that was supposed to be created by LibreOffice does not exist for unknown reason\n\n Expected file:\n ".concat(block(cacheFilehandler.filename), "\n\n The temporary folder:\n ").concat(block(documentSourceOutdirPathForLibreOffice_1), "\n\n Command:\n > ").concat(block(command_1), "\n\n "); }));