@promptbook/core 0.84.0-11 → 0.84.0-12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -3,13 +3,13 @@ import { format } from 'prettier';
3
3
  import parserHtml from 'prettier/parser-html';
4
4
  import { forTime } from 'waitasecond';
5
5
  import { unparse, parse } from 'papaparse';
6
- import { join, basename } from 'path';
7
- import { SHA256 } from 'crypto-js';
8
6
  import hexEncoder from 'crypto-js/enc-hex';
7
+ import sha256 from 'crypto-js/sha256';
8
+ import { basename, join, dirname } from 'path';
9
+ import { SHA256 } from 'crypto-js';
9
10
  import { lookup } from 'mime-types';
10
11
  import moment from 'moment';
11
12
  import colors from 'colors';
12
- import sha256 from 'crypto-js/sha256';
13
13
 
14
14
  // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
15
15
  /**
@@ -25,7 +25,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
25
25
  * @generated
26
26
  * @see https://github.com/webgptorg/promptbook
27
27
  */
28
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-10';
28
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
29
29
  /**
30
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
31
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -752,6 +752,14 @@ var DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL = 200;
752
752
  */
753
753
  var DEFAULT_BOOKS_DIRNAME = './books';
754
754
  // <- TODO: [🕝] Make also `BOOKS_DIRNAME_ALTERNATIVES`
755
+ /**
756
+ * Where to store the temporary downloads
757
+ *
758
+ * Note: When the folder does not exist, it is created recursively
759
+ *
760
+ * @public exported from `@promptbook/core`
761
+ */
762
+ var DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
755
763
  /**
756
764
  * Where to store the cache of executions for promptbook CLI
757
765
  *
@@ -759,7 +767,7 @@ var DEFAULT_BOOKS_DIRNAME = './books';
759
767
  *
760
768
  * @public exported from `@promptbook/core`
761
769
  */
762
- var DEFAULT_EXECUTIONS_CACHE_DIRNAME = './.promptbook/executions-cache';
770
+ var DEFAULT_EXECUTION_CACHE_DIRNAME = './.promptbook/execution-cache';
763
771
  /**
764
772
  * Where to store the scrape cache
765
773
  *
@@ -5666,6 +5674,22 @@ function $registeredScrapersMessage(availableScrapers) {
5666
5674
  * TODO: [®] DRY Register logic
5667
5675
  */
5668
5676
 
5677
+ /**
5678
+ * Removes emojis from a string and fix whitespaces
5679
+ *
5680
+ * @param text with emojis
5681
+ * @returns text without emojis
5682
+ * @public exported from `@promptbook/utils`
5683
+ */
5684
+ function removeEmojis(text) {
5685
+ // Replace emojis (and also ZWJ sequence) with hyphens
5686
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5687
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5688
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5689
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
5690
+ return text;
5691
+ }
5692
+
5669
5693
  /**
5670
5694
  * @@@
5671
5695
  *
@@ -5728,6 +5752,30 @@ function normalizeToKebabCase(text) {
5728
5752
  * Note: [💞] Ignore a discrepancy between file name and entity name
5729
5753
  */
5730
5754
 
5755
+ /**
5756
+ * @@@
5757
+ *
5758
+ * @param value @@@
5759
+ * @returns @@@
5760
+ * @example @@@
5761
+ * @public exported from `@promptbook/utils`
5762
+ */
5763
+ function titleToName(value) {
5764
+ if (isValidUrl(value)) {
5765
+ value = value.replace(/^https?:\/\//, '');
5766
+ value = value.replace(/\.html$/, '');
5767
+ }
5768
+ else if (isValidFilePath(value)) {
5769
+ value = basename(value);
5770
+ // Note: Keeping extension in the name
5771
+ }
5772
+ value = value.split('/').join('-');
5773
+ value = removeEmojis(value);
5774
+ value = normalizeToKebabCase(value);
5775
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
5776
+ return value;
5777
+ }
5778
+
5731
5779
  /**
5732
5780
  * Creates unique name for the source
5733
5781
  *
@@ -5749,6 +5797,15 @@ function knowledgeSourceContentToName(knowledgeSourceContent) {
5749
5797
  * TODO: [🐱‍🐉][🧠] Make some smart crop NOT source-i-m-pavol-a-develop-... BUT source-i-m-pavol-a-developer-...
5750
5798
  */
5751
5799
 
5800
+ /**
5801
+ * @@@
5802
+ *
5803
+ * @private for `FileCacheStorage`
5804
+ */
5805
+ function nameToSubfolderPath(name) {
5806
+ return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
5807
+ }
5808
+
5752
5809
  /**
5753
5810
  * Convert file extension to mime type
5754
5811
  *
@@ -5839,10 +5896,11 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
5839
5896
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5840
5897
  var _a;
5841
5898
  return __awaiter(this, void 0, void 0, function () {
5842
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, filename_1, fileExtension, mimeType;
5843
- return __generator(this, function (_f) {
5844
- switch (_f.label) {
5899
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
5900
+ return __generator(this, function (_l) {
5901
+ switch (_l.label) {
5845
5902
  case 0:
5903
+ console.log('!!! makeKnowledgeSourceHandler', knowledgeSource);
5846
5904
  _b = tools.fetch, fetch = _b === void 0 ? scraperFetch : _b;
5847
5905
  knowledgeSourceContent = knowledgeSource.knowledgeSourceContent;
5848
5906
  name = knowledgeSource.name;
@@ -5850,54 +5908,32 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5850
5908
  if (!name) {
5851
5909
  name = knowledgeSourceContentToName(knowledgeSourceContent);
5852
5910
  }
5853
- if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 2];
5911
+ if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 5];
5854
5912
  url = knowledgeSourceContent;
5855
5913
  return [4 /*yield*/, fetch(url)];
5856
5914
  case 1:
5857
- response_1 = _f.sent();
5858
- mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5859
- return [2 /*return*/, {
5860
- source: name,
5861
- filename: null,
5862
- url: url,
5863
- mimeType: mimeType,
5864
- /*
5865
- TODO: [🥽]
5866
- > async asBlob() {
5867
- > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
5868
- > const content = await response.blob();
5869
- > return content;
5870
- > },
5871
- */
5872
- asJson: function () {
5873
- return __awaiter(this, void 0, void 0, function () {
5874
- var content;
5875
- return __generator(this, function (_a) {
5876
- switch (_a.label) {
5877
- case 0: return [4 /*yield*/, response_1.json()];
5878
- case 1:
5879
- content = _a.sent();
5880
- return [2 /*return*/, content];
5881
- }
5882
- });
5883
- });
5884
- },
5885
- asText: function () {
5886
- return __awaiter(this, void 0, void 0, function () {
5887
- var content;
5888
- return __generator(this, function (_a) {
5889
- switch (_a.label) {
5890
- case 0: return [4 /*yield*/, response_1.text()];
5891
- case 1:
5892
- content = _a.sent();
5893
- return [2 /*return*/, content];
5894
- }
5895
- });
5896
- });
5897
- },
5898
- }];
5915
+ response = _l.sent();
5916
+ mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5917
+ filename = url.split('/').pop() || titleToName(url);
5918
+ hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
5919
+ rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
5920
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
5921
+ return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
5899
5922
  case 2:
5900
- if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 4];
5923
+ _l.sent();
5924
+ _g = (_f = tools.fs).writeFile;
5925
+ _h = [join(rootDirname_1, filepath)];
5926
+ _k = (_j = Buffer).from;
5927
+ return [4 /*yield*/, response.arrayBuffer()];
5928
+ case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
5929
+ case 4:
5930
+ _l.sent();
5931
+ // TODO: !!!!!!!! Check the file security
5932
+ // TODO: !!!!!!!! Check the file size (if it is not too big)
5933
+ // TODO: !!!!!!!! Delete the file
5934
+ return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
5935
+ case 5:
5936
+ if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
5901
5937
  if (tools.fs === undefined) {
5902
5938
  throw new EnvironmentMismatchError('Can not import file knowledge without filesystem tools');
5903
5939
  // <- TODO: [🧠] What is the best error type here`
@@ -5910,8 +5946,8 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5910
5946
  fileExtension = getFileExtension(filename_1);
5911
5947
  mimeType = extensionToMimeType(fileExtension || '');
5912
5948
  return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
5913
- case 3:
5914
- if (!(_f.sent())) {
5949
+ case 6:
5950
+ if (!(_l.sent())) {
5915
5951
  throw new NotFoundError(spaceTrim(function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(knowledgeSourceContent), "\n\n Full file path:\n ").concat(block(filename_1), "\n "); }));
5916
5952
  }
5917
5953
  // TODO: [🧠][😿] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
@@ -5957,7 +5993,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5957
5993
  });
5958
5994
  },
5959
5995
  }];
5960
- case 4: return [2 /*return*/, {
5996
+ case 7: return [2 /*return*/, {
5961
5997
  source: name,
5962
5998
  filename: null,
5963
5999
  url: null,
@@ -7068,22 +7104,6 @@ function normalizeTo_camelCase(text, _isFirstLetterCapital) {
7068
7104
  * TODO: [🌺] Use some intermediate util splitWords
7069
7105
  */
7070
7106
 
7071
- /**
7072
- * Removes emojis from a string and fix whitespaces
7073
- *
7074
- * @param text with emojis
7075
- * @returns text without emojis
7076
- * @public exported from `@promptbook/utils`
7077
- */
7078
- function removeEmojis(text) {
7079
- // Replace emojis (and also ZWJ sequence) with hyphens
7080
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
7081
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
7082
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
7083
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
7084
- return text;
7085
- }
7086
-
7087
7107
  /**
7088
7108
  * Removes quotes from a string
7089
7109
  *
@@ -9275,30 +9295,6 @@ function flattenMarkdown(markdown) {
9275
9295
  * NOW we are working just with markdown string and its good enough
9276
9296
  */
9277
9297
 
9278
- /**
9279
- * @@@
9280
- *
9281
- * @param value @@@
9282
- * @returns @@@
9283
- * @example @@@
9284
- * @public exported from `@promptbook/utils`
9285
- */
9286
- function titleToName(value) {
9287
- if (isValidUrl(value)) {
9288
- value = value.replace(/^https?:\/\//, '');
9289
- value = value.replace(/\.html$/, '');
9290
- }
9291
- else if (isValidFilePath(value)) {
9292
- value = basename(value);
9293
- // Note: Keeping extension in the name
9294
- }
9295
- value = value.split('/').join('-');
9296
- value = removeEmojis(value);
9297
- value = normalizeToKebabCase(value);
9298
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
9299
- return value;
9300
- }
9301
-
9302
9298
  /**
9303
9299
  * Compile pipeline from string (markdown) format to JSON format synchronously
9304
9300
  *
@@ -11497,5 +11493,5 @@ var PrefixStorage = /** @class */ (function () {
11497
11493
  return PrefixStorage;
11498
11494
  }());
11499
11495
 
11500
- export { $llmToolsMetadataRegister, $llmToolsRegister, $scrapersMetadataRegister, $scrapersRegister, ADMIN_EMAIL, ADMIN_GITHUB_NAME, AbstractFormatError, BOOK_LANGUAGE_VERSION, BlackholeStorage, BoilerplateError, BoilerplateFormfactorDefinition, CLAIM, CallbackInterfaceTools, ChatbotFormfactorDefinition, CollectionError, CsvFormatDefinition, CsvFormatError, DEFAULT_BOOKS_DIRNAME, DEFAULT_BOOK_TITLE, DEFAULT_CSV_SETTINGS, DEFAULT_EXECUTIONS_CACHE_DIRNAME, DEFAULT_GET_PIPELINE_COLLECTION_FUNCTION_NAME, DEFAULT_INTERMEDIATE_FILES_STRATEGY, DEFAULT_IS_AUTO_INSTALLED, DEFAULT_IS_VERBOSE, DEFAULT_MAX_EXECUTION_ATTEMPTS, DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_DEPTH, DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL, DEFAULT_MAX_PARALLEL_COUNT, DEFAULT_PIPELINE_COLLECTION_BASE_FILENAME, DEFAULT_REMOTE_URL, DEFAULT_REMOTE_URL_PATH, DEFAULT_SCRAPE_CACHE_DIRNAME, DEFAULT_TASK_TITLE, EXPECTATION_UNITS, EnvironmentMismatchError, ExecutionReportStringOptionsDefaults, ExpectError, FORMFACTOR_DEFINITIONS, GENERIC_PIPELINE_INTERFACE, GeneratorFormfactorDefinition, GenericFormfactorDefinition, ImageGeneratorFormfactorDefinition, KnowledgeScrapeError, LOGO_DARK_SRC, LOGO_LIGHT_SRC, LimitReachedError, MANDATORY_CSV_SETTINGS, MAX_FILENAME_LENGTH, MODEL_VARIANTS, MatcherFormfactorDefinition, MemoryStorage, MissingToolsError, MultipleLlmExecutionTools, NAME, NonTaskSectionTypes, NotFoundError, NotYetImplementedError, ORDER_OF_PIPELINE_JSON, PROMPTBOOK_ENGINE_VERSION, PROMPTBOOK_ERRORS, ParseError, PipelineExecutionError, PipelineLogicError, PipelineUrlError, PrefixStorage, RESERVED_PARAMETER_NAMES, SET_IS_VERBOSE, SectionTypes, SheetsFormfactorDefinition, TaskTypes, TextFormatDefinition, TranslatorFormfactorDefinition, UNCERTAIN_USAGE, UnexpectedError, ZERO_USAGE, _AnthropicClaudeMetadataRegistration, _AzureOpenAiMetadataRegistration, _BoilerplateScraperMetadataRegistration, _DocumentScraperMetadataRegistration, _GoogleMetadataRegistration, _LegacyDocumentScraperMetadataRegistration, _MarkdownScraperMetadataRegistration, _MarkitdownScraperMetadataRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiMetadataRegistration, _PdfScraperMetadataRegistration, _WebsiteScraperMetadataRegistration, addUsage, assertsExecutionSuccessful, book, cacheLlmTools, collectionToJson, compilePipeline, countTotalUsage, createCollectionFromJson, createCollectionFromPromise, createCollectionFromUrl, createLlmToolsFromConfiguration, createPipelineExecutor, createSubcollection, embeddingVectorToString, executionReportJsonToString, extractParameterNamesFromTask, getPipelineInterface, isPassingExpectations, isPipelineImplementingInterface, isPipelineInterfacesEqual, isPipelinePrepared, isValidPipelineString, joinLlmExecutionTools, limitTotalUsage, makeKnowledgeSourceHandler, parsePipeline, pipelineJsonToString, prepareKnowledgePieces, preparePersona, preparePipeline, prepareTasks, prettifyPipelineString, unpreparePipeline, usageToHuman, usageToWorktime, validatePipeline, validatePipelineString };
11496
+ export { $llmToolsMetadataRegister, $llmToolsRegister, $scrapersMetadataRegister, $scrapersRegister, ADMIN_EMAIL, ADMIN_GITHUB_NAME, AbstractFormatError, BOOK_LANGUAGE_VERSION, BlackholeStorage, BoilerplateError, BoilerplateFormfactorDefinition, CLAIM, CallbackInterfaceTools, ChatbotFormfactorDefinition, CollectionError, CsvFormatDefinition, CsvFormatError, DEFAULT_BOOKS_DIRNAME, DEFAULT_BOOK_TITLE, DEFAULT_CSV_SETTINGS, DEFAULT_DOWNLOAD_CACHE_DIRNAME, DEFAULT_EXECUTION_CACHE_DIRNAME, DEFAULT_GET_PIPELINE_COLLECTION_FUNCTION_NAME, DEFAULT_INTERMEDIATE_FILES_STRATEGY, DEFAULT_IS_AUTO_INSTALLED, DEFAULT_IS_VERBOSE, DEFAULT_MAX_EXECUTION_ATTEMPTS, DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_DEPTH, DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL, DEFAULT_MAX_PARALLEL_COUNT, DEFAULT_PIPELINE_COLLECTION_BASE_FILENAME, DEFAULT_REMOTE_URL, DEFAULT_REMOTE_URL_PATH, DEFAULT_SCRAPE_CACHE_DIRNAME, DEFAULT_TASK_TITLE, EXPECTATION_UNITS, EnvironmentMismatchError, ExecutionReportStringOptionsDefaults, ExpectError, FORMFACTOR_DEFINITIONS, GENERIC_PIPELINE_INTERFACE, GeneratorFormfactorDefinition, GenericFormfactorDefinition, ImageGeneratorFormfactorDefinition, KnowledgeScrapeError, LOGO_DARK_SRC, LOGO_LIGHT_SRC, LimitReachedError, MANDATORY_CSV_SETTINGS, MAX_FILENAME_LENGTH, MODEL_VARIANTS, MatcherFormfactorDefinition, MemoryStorage, MissingToolsError, MultipleLlmExecutionTools, NAME, NonTaskSectionTypes, NotFoundError, NotYetImplementedError, ORDER_OF_PIPELINE_JSON, PROMPTBOOK_ENGINE_VERSION, PROMPTBOOK_ERRORS, ParseError, PipelineExecutionError, PipelineLogicError, PipelineUrlError, PrefixStorage, RESERVED_PARAMETER_NAMES, SET_IS_VERBOSE, SectionTypes, SheetsFormfactorDefinition, TaskTypes, TextFormatDefinition, TranslatorFormfactorDefinition, UNCERTAIN_USAGE, UnexpectedError, ZERO_USAGE, _AnthropicClaudeMetadataRegistration, _AzureOpenAiMetadataRegistration, _BoilerplateScraperMetadataRegistration, _DocumentScraperMetadataRegistration, _GoogleMetadataRegistration, _LegacyDocumentScraperMetadataRegistration, _MarkdownScraperMetadataRegistration, _MarkitdownScraperMetadataRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiMetadataRegistration, _PdfScraperMetadataRegistration, _WebsiteScraperMetadataRegistration, addUsage, assertsExecutionSuccessful, book, cacheLlmTools, collectionToJson, compilePipeline, countTotalUsage, createCollectionFromJson, createCollectionFromPromise, createCollectionFromUrl, createLlmToolsFromConfiguration, createPipelineExecutor, createSubcollection, embeddingVectorToString, executionReportJsonToString, extractParameterNamesFromTask, getPipelineInterface, isPassingExpectations, isPipelineImplementingInterface, isPipelineInterfacesEqual, isPipelinePrepared, isValidPipelineString, joinLlmExecutionTools, limitTotalUsage, makeKnowledgeSourceHandler, parsePipeline, pipelineJsonToString, prepareKnowledgePieces, preparePersona, preparePipeline, prepareTasks, prettifyPipelineString, unpreparePipeline, usageToHuman, usageToWorktime, validatePipeline, validatePipelineString };
11501
11497
  //# sourceMappingURL=index.es.js.map