@promptbook/core 0.84.0-11 → 0.84.0-13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -3,13 +3,13 @@ import { format } from 'prettier';
3
3
  import parserHtml from 'prettier/parser-html';
4
4
  import { forTime } from 'waitasecond';
5
5
  import { unparse, parse } from 'papaparse';
6
- import { join, basename } from 'path';
7
- import { SHA256 } from 'crypto-js';
8
6
  import hexEncoder from 'crypto-js/enc-hex';
7
+ import sha256 from 'crypto-js/sha256';
8
+ import { basename, join, dirname } from 'path';
9
+ import { SHA256 } from 'crypto-js';
9
10
  import { lookup } from 'mime-types';
10
11
  import moment from 'moment';
11
12
  import colors from 'colors';
12
- import sha256 from 'crypto-js/sha256';
13
13
 
14
14
  // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
15
15
  /**
@@ -25,7 +25,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
25
25
  * @generated
26
26
  * @see https://github.com/webgptorg/promptbook
27
27
  */
28
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-10';
28
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
29
29
  /**
30
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
31
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -752,6 +752,14 @@ var DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL = 200;
752
752
  */
753
753
  var DEFAULT_BOOKS_DIRNAME = './books';
754
754
  // <- TODO: [🕝] Make also `BOOKS_DIRNAME_ALTERNATIVES`
755
+ /**
756
+ * Where to store the temporary downloads
757
+ *
758
+ * Note: When the folder does not exist, it is created recursively
759
+ *
760
+ * @public exported from `@promptbook/core`
761
+ */
762
+ var DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
755
763
  /**
756
764
  * Where to store the cache of executions for promptbook CLI
757
765
  *
@@ -759,7 +767,7 @@ var DEFAULT_BOOKS_DIRNAME = './books';
759
767
  *
760
768
  * @public exported from `@promptbook/core`
761
769
  */
762
- var DEFAULT_EXECUTIONS_CACHE_DIRNAME = './.promptbook/executions-cache';
770
+ var DEFAULT_EXECUTION_CACHE_DIRNAME = './.promptbook/execution-cache';
763
771
  /**
764
772
  * Where to store the scrape cache
765
773
  *
@@ -5749,6 +5757,15 @@ function knowledgeSourceContentToName(knowledgeSourceContent) {
5749
5757
  * TODO: [🐱‍🐉][🧠] Make some smart crop NOT source-i-m-pavol-a-develop-... BUT source-i-m-pavol-a-developer-...
5750
5758
  */
5751
5759
 
5760
+ /**
5761
+ * @@@
5762
+ *
5763
+ * @private for `FileCacheStorage`
5764
+ */
5765
+ function nameToSubfolderPath(name) {
5766
+ return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
5767
+ }
5768
+
5752
5769
  /**
5753
5770
  * Convert file extension to mime type
5754
5771
  *
@@ -5804,6 +5821,46 @@ function isFileExisting(filename, fs) {
5804
5821
  * TODO: [🖇] What about symlinks?
5805
5822
  */
5806
5823
 
5824
+ /**
5825
+ * Removes emojis from a string and fix whitespaces
5826
+ *
5827
+ * @param text with emojis
5828
+ * @returns text without emojis
5829
+ * @public exported from `@promptbook/utils`
5830
+ */
5831
+ function removeEmojis(text) {
5832
+ // Replace emojis (and also ZWJ sequence) with hyphens
5833
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5834
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5835
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5836
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
5837
+ return text;
5838
+ }
5839
+
5840
+ /**
5841
+ * @@@
5842
+ *
5843
+ * @param value @@@
5844
+ * @returns @@@
5845
+ * @example @@@
5846
+ * @public exported from `@promptbook/utils`
5847
+ */
5848
+ function titleToName(value) {
5849
+ if (isValidUrl(value)) {
5850
+ value = value.replace(/^https?:\/\//, '');
5851
+ value = value.replace(/\.html$/, '');
5852
+ }
5853
+ else if (isValidFilePath(value)) {
5854
+ value = basename(value);
5855
+ // Note: Keeping extension in the name
5856
+ }
5857
+ value = value.split('/').join('-');
5858
+ value = removeEmojis(value);
5859
+ value = normalizeToKebabCase(value);
5860
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
5861
+ return value;
5862
+ }
5863
+
5807
5864
  /**
5808
5865
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
5809
5866
  *
@@ -5839,10 +5896,11 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
5839
5896
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5840
5897
  var _a;
5841
5898
  return __awaiter(this, void 0, void 0, function () {
5842
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, filename_1, fileExtension, mimeType;
5843
- return __generator(this, function (_f) {
5844
- switch (_f.label) {
5899
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
5900
+ return __generator(this, function (_l) {
5901
+ switch (_l.label) {
5845
5902
  case 0:
5903
+ console.log('!!! makeKnowledgeSourceHandler', knowledgeSource);
5846
5904
  _b = tools.fetch, fetch = _b === void 0 ? scraperFetch : _b;
5847
5905
  knowledgeSourceContent = knowledgeSource.knowledgeSourceContent;
5848
5906
  name = knowledgeSource.name;
@@ -5850,54 +5908,74 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5850
5908
  if (!name) {
5851
5909
  name = knowledgeSourceContentToName(knowledgeSourceContent);
5852
5910
  }
5853
- if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 2];
5911
+ if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 5];
5854
5912
  url = knowledgeSourceContent;
5855
5913
  return [4 /*yield*/, fetch(url)];
5856
5914
  case 1:
5857
- response_1 = _f.sent();
5915
+ response_1 = _l.sent();
5858
5916
  mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5859
- return [2 /*return*/, {
5860
- source: name,
5861
- filename: null,
5862
- url: url,
5863
- mimeType: mimeType,
5864
- /*
5865
- TODO: [🥽]
5866
- > async asBlob() {
5867
- > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
5868
- > const content = await response.blob();
5869
- > return content;
5870
- > },
5871
- */
5872
- asJson: function () {
5873
- return __awaiter(this, void 0, void 0, function () {
5874
- var content;
5875
- return __generator(this, function (_a) {
5876
- switch (_a.label) {
5877
- case 0: return [4 /*yield*/, response_1.json()];
5878
- case 1:
5879
- content = _a.sent();
5880
- return [2 /*return*/, content];
5881
- }
5917
+ if (tools.fs === undefined || !url.endsWith('.pdf')) {
5918
+ return [2 /*return*/, {
5919
+ source: name,
5920
+ filename: null,
5921
+ url: url,
5922
+ mimeType: mimeType,
5923
+ /*
5924
+ TODO: [🥽]
5925
+ > async asBlob() {
5926
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
5927
+ > const content = await response.blob();
5928
+ > return content;
5929
+ > },
5930
+ */
5931
+ asJson: function () {
5932
+ return __awaiter(this, void 0, void 0, function () {
5933
+ var content;
5934
+ return __generator(this, function (_a) {
5935
+ switch (_a.label) {
5936
+ case 0: return [4 /*yield*/, response_1.json()];
5937
+ case 1:
5938
+ content = _a.sent();
5939
+ return [2 /*return*/, content];
5940
+ }
5941
+ });
5882
5942
  });
5883
- });
5884
- },
5885
- asText: function () {
5886
- return __awaiter(this, void 0, void 0, function () {
5887
- var content;
5888
- return __generator(this, function (_a) {
5889
- switch (_a.label) {
5890
- case 0: return [4 /*yield*/, response_1.text()];
5891
- case 1:
5892
- content = _a.sent();
5893
- return [2 /*return*/, content];
5894
- }
5943
+ },
5944
+ asText: function () {
5945
+ return __awaiter(this, void 0, void 0, function () {
5946
+ var content;
5947
+ return __generator(this, function (_a) {
5948
+ switch (_a.label) {
5949
+ case 0: return [4 /*yield*/, response_1.text()];
5950
+ case 1:
5951
+ content = _a.sent();
5952
+ return [2 /*return*/, content];
5953
+ }
5954
+ });
5895
5955
  });
5896
- });
5897
- },
5898
- }];
5956
+ },
5957
+ }];
5958
+ }
5959
+ basename = url.split('/').pop() || titleToName(url);
5960
+ hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
5961
+ rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
5962
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
5963
+ return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
5899
5964
  case 2:
5900
- if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 4];
5965
+ _l.sent();
5966
+ _g = (_f = tools.fs).writeFile;
5967
+ _h = [join(rootDirname_1, filepath)];
5968
+ _k = (_j = Buffer).from;
5969
+ return [4 /*yield*/, response_1.arrayBuffer()];
5970
+ case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
5971
+ case 4:
5972
+ _l.sent();
5973
+ // TODO: !!!!!!!! Check the file security
5974
+ // TODO: !!!!!!!! Check the file size (if it is not too big)
5975
+ // TODO: !!!!!!!! Delete the file
5976
+ return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
5977
+ case 5:
5978
+ if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
5901
5979
  if (tools.fs === undefined) {
5902
5980
  throw new EnvironmentMismatchError('Can not import file knowledge without filesystem tools');
5903
5981
  // <- TODO: [🧠] What is the best error type here`
@@ -5910,8 +5988,8 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5910
5988
  fileExtension = getFileExtension(filename_1);
5911
5989
  mimeType = extensionToMimeType(fileExtension || '');
5912
5990
  return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
5913
- case 3:
5914
- if (!(_f.sent())) {
5991
+ case 6:
5992
+ if (!(_l.sent())) {
5915
5993
  throw new NotFoundError(spaceTrim(function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(knowledgeSourceContent), "\n\n Full file path:\n ").concat(block(filename_1), "\n "); }));
5916
5994
  }
5917
5995
  // TODO: [🧠][😿] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
@@ -5957,7 +6035,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5957
6035
  });
5958
6036
  },
5959
6037
  }];
5960
- case 4: return [2 /*return*/, {
6038
+ case 7: return [2 /*return*/, {
5961
6039
  source: name,
5962
6040
  filename: null,
5963
6041
  url: null,
@@ -7068,22 +7146,6 @@ function normalizeTo_camelCase(text, _isFirstLetterCapital) {
7068
7146
  * TODO: [🌺] Use some intermediate util splitWords
7069
7147
  */
7070
7148
 
7071
- /**
7072
- * Removes emojis from a string and fix whitespaces
7073
- *
7074
- * @param text with emojis
7075
- * @returns text without emojis
7076
- * @public exported from `@promptbook/utils`
7077
- */
7078
- function removeEmojis(text) {
7079
- // Replace emojis (and also ZWJ sequence) with hyphens
7080
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
7081
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
7082
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
7083
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
7084
- return text;
7085
- }
7086
-
7087
7149
  /**
7088
7150
  * Removes quotes from a string
7089
7151
  *
@@ -9275,30 +9337,6 @@ function flattenMarkdown(markdown) {
9275
9337
  * NOW we are working just with markdown string and its good enough
9276
9338
  */
9277
9339
 
9278
- /**
9279
- * @@@
9280
- *
9281
- * @param value @@@
9282
- * @returns @@@
9283
- * @example @@@
9284
- * @public exported from `@promptbook/utils`
9285
- */
9286
- function titleToName(value) {
9287
- if (isValidUrl(value)) {
9288
- value = value.replace(/^https?:\/\//, '');
9289
- value = value.replace(/\.html$/, '');
9290
- }
9291
- else if (isValidFilePath(value)) {
9292
- value = basename(value);
9293
- // Note: Keeping extension in the name
9294
- }
9295
- value = value.split('/').join('-');
9296
- value = removeEmojis(value);
9297
- value = normalizeToKebabCase(value);
9298
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
9299
- return value;
9300
- }
9301
-
9302
9340
  /**
9303
9341
  * Compile pipeline from string (markdown) format to JSON format synchronously
9304
9342
  *
@@ -11497,5 +11535,5 @@ var PrefixStorage = /** @class */ (function () {
11497
11535
  return PrefixStorage;
11498
11536
  }());
11499
11537
 
11500
- export { $llmToolsMetadataRegister, $llmToolsRegister, $scrapersMetadataRegister, $scrapersRegister, ADMIN_EMAIL, ADMIN_GITHUB_NAME, AbstractFormatError, BOOK_LANGUAGE_VERSION, BlackholeStorage, BoilerplateError, BoilerplateFormfactorDefinition, CLAIM, CallbackInterfaceTools, ChatbotFormfactorDefinition, CollectionError, CsvFormatDefinition, CsvFormatError, DEFAULT_BOOKS_DIRNAME, DEFAULT_BOOK_TITLE, DEFAULT_CSV_SETTINGS, DEFAULT_EXECUTIONS_CACHE_DIRNAME, DEFAULT_GET_PIPELINE_COLLECTION_FUNCTION_NAME, DEFAULT_INTERMEDIATE_FILES_STRATEGY, DEFAULT_IS_AUTO_INSTALLED, DEFAULT_IS_VERBOSE, DEFAULT_MAX_EXECUTION_ATTEMPTS, DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_DEPTH, DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL, DEFAULT_MAX_PARALLEL_COUNT, DEFAULT_PIPELINE_COLLECTION_BASE_FILENAME, DEFAULT_REMOTE_URL, DEFAULT_REMOTE_URL_PATH, DEFAULT_SCRAPE_CACHE_DIRNAME, DEFAULT_TASK_TITLE, EXPECTATION_UNITS, EnvironmentMismatchError, ExecutionReportStringOptionsDefaults, ExpectError, FORMFACTOR_DEFINITIONS, GENERIC_PIPELINE_INTERFACE, GeneratorFormfactorDefinition, GenericFormfactorDefinition, ImageGeneratorFormfactorDefinition, KnowledgeScrapeError, LOGO_DARK_SRC, LOGO_LIGHT_SRC, LimitReachedError, MANDATORY_CSV_SETTINGS, MAX_FILENAME_LENGTH, MODEL_VARIANTS, MatcherFormfactorDefinition, MemoryStorage, MissingToolsError, MultipleLlmExecutionTools, NAME, NonTaskSectionTypes, NotFoundError, NotYetImplementedError, ORDER_OF_PIPELINE_JSON, PROMPTBOOK_ENGINE_VERSION, PROMPTBOOK_ERRORS, ParseError, PipelineExecutionError, PipelineLogicError, PipelineUrlError, PrefixStorage, RESERVED_PARAMETER_NAMES, SET_IS_VERBOSE, SectionTypes, SheetsFormfactorDefinition, TaskTypes, TextFormatDefinition, TranslatorFormfactorDefinition, UNCERTAIN_USAGE, UnexpectedError, ZERO_USAGE, _AnthropicClaudeMetadataRegistration, _AzureOpenAiMetadataRegistration, _BoilerplateScraperMetadataRegistration, _DocumentScraperMetadataRegistration, _GoogleMetadataRegistration, _LegacyDocumentScraperMetadataRegistration, _MarkdownScraperMetadataRegistration, _MarkitdownScraperMetadataRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiMetadataRegistration, _PdfScraperMetadataRegistration, _WebsiteScraperMetadataRegistration, addUsage, assertsExecutionSuccessful, book, cacheLlmTools, collectionToJson, compilePipeline, countTotalUsage, createCollectionFromJson, createCollectionFromPromise, createCollectionFromUrl, createLlmToolsFromConfiguration, createPipelineExecutor, createSubcollection, embeddingVectorToString, executionReportJsonToString, extractParameterNamesFromTask, getPipelineInterface, isPassingExpectations, isPipelineImplementingInterface, isPipelineInterfacesEqual, isPipelinePrepared, isValidPipelineString, joinLlmExecutionTools, limitTotalUsage, makeKnowledgeSourceHandler, parsePipeline, pipelineJsonToString, prepareKnowledgePieces, preparePersona, preparePipeline, prepareTasks, prettifyPipelineString, unpreparePipeline, usageToHuman, usageToWorktime, validatePipeline, validatePipelineString };
11538
+ export { $llmToolsMetadataRegister, $llmToolsRegister, $scrapersMetadataRegister, $scrapersRegister, ADMIN_EMAIL, ADMIN_GITHUB_NAME, AbstractFormatError, BOOK_LANGUAGE_VERSION, BlackholeStorage, BoilerplateError, BoilerplateFormfactorDefinition, CLAIM, CallbackInterfaceTools, ChatbotFormfactorDefinition, CollectionError, CsvFormatDefinition, CsvFormatError, DEFAULT_BOOKS_DIRNAME, DEFAULT_BOOK_TITLE, DEFAULT_CSV_SETTINGS, DEFAULT_DOWNLOAD_CACHE_DIRNAME, DEFAULT_EXECUTION_CACHE_DIRNAME, DEFAULT_GET_PIPELINE_COLLECTION_FUNCTION_NAME, DEFAULT_INTERMEDIATE_FILES_STRATEGY, DEFAULT_IS_AUTO_INSTALLED, DEFAULT_IS_VERBOSE, DEFAULT_MAX_EXECUTION_ATTEMPTS, DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_DEPTH, DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL, DEFAULT_MAX_PARALLEL_COUNT, DEFAULT_PIPELINE_COLLECTION_BASE_FILENAME, DEFAULT_REMOTE_URL, DEFAULT_REMOTE_URL_PATH, DEFAULT_SCRAPE_CACHE_DIRNAME, DEFAULT_TASK_TITLE, EXPECTATION_UNITS, EnvironmentMismatchError, ExecutionReportStringOptionsDefaults, ExpectError, FORMFACTOR_DEFINITIONS, GENERIC_PIPELINE_INTERFACE, GeneratorFormfactorDefinition, GenericFormfactorDefinition, ImageGeneratorFormfactorDefinition, KnowledgeScrapeError, LOGO_DARK_SRC, LOGO_LIGHT_SRC, LimitReachedError, MANDATORY_CSV_SETTINGS, MAX_FILENAME_LENGTH, MODEL_VARIANTS, MatcherFormfactorDefinition, MemoryStorage, MissingToolsError, MultipleLlmExecutionTools, NAME, NonTaskSectionTypes, NotFoundError, NotYetImplementedError, ORDER_OF_PIPELINE_JSON, PROMPTBOOK_ENGINE_VERSION, PROMPTBOOK_ERRORS, ParseError, PipelineExecutionError, PipelineLogicError, PipelineUrlError, PrefixStorage, RESERVED_PARAMETER_NAMES, SET_IS_VERBOSE, SectionTypes, SheetsFormfactorDefinition, TaskTypes, TextFormatDefinition, TranslatorFormfactorDefinition, UNCERTAIN_USAGE, UnexpectedError, ZERO_USAGE, _AnthropicClaudeMetadataRegistration, _AzureOpenAiMetadataRegistration, _BoilerplateScraperMetadataRegistration, _DocumentScraperMetadataRegistration, _GoogleMetadataRegistration, _LegacyDocumentScraperMetadataRegistration, _MarkdownScraperMetadataRegistration, _MarkitdownScraperMetadataRegistration, _OpenAiAssistantMetadataRegistration, _OpenAiMetadataRegistration, _PdfScraperMetadataRegistration, _WebsiteScraperMetadataRegistration, addUsage, assertsExecutionSuccessful, book, cacheLlmTools, collectionToJson, compilePipeline, countTotalUsage, createCollectionFromJson, createCollectionFromPromise, createCollectionFromUrl, createLlmToolsFromConfiguration, createPipelineExecutor, createSubcollection, embeddingVectorToString, executionReportJsonToString, extractParameterNamesFromTask, getPipelineInterface, isPassingExpectations, isPipelineImplementingInterface, isPipelineInterfacesEqual, isPipelinePrepared, isValidPipelineString, joinLlmExecutionTools, limitTotalUsage, makeKnowledgeSourceHandler, parsePipeline, pipelineJsonToString, prepareKnowledgePieces, preparePersona, preparePipeline, prepareTasks, prettifyPipelineString, unpreparePipeline, usageToHuman, usageToWorktime, validatePipeline, validatePipelineString };
11501
11539
  //# sourceMappingURL=index.es.js.map