@promptbook/markdown-utils 0.84.0-11 → 0.84.0-12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,8 @@ import { DEFAULT_MAX_EXECUTION_ATTEMPTS } from '../config';
19
19
  import { DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_DEPTH } from '../config';
20
20
  import { DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL } from '../config';
21
21
  import { DEFAULT_BOOKS_DIRNAME } from '../config';
22
- import { DEFAULT_EXECUTIONS_CACHE_DIRNAME } from '../config';
22
+ import { DEFAULT_DOWNLOAD_CACHE_DIRNAME } from '../config';
23
+ import { DEFAULT_EXECUTION_CACHE_DIRNAME } from '../config';
23
24
  import { DEFAULT_SCRAPE_CACHE_DIRNAME } from '../config';
24
25
  import { DEFAULT_PIPELINE_COLLECTION_BASE_FILENAME } from '../config';
25
26
  import { DEFAULT_REMOTE_URL } from '../config';
@@ -145,7 +146,8 @@ export { DEFAULT_MAX_EXECUTION_ATTEMPTS };
145
146
  export { DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_DEPTH };
146
147
  export { DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL };
147
148
  export { DEFAULT_BOOKS_DIRNAME };
148
- export { DEFAULT_EXECUTIONS_CACHE_DIRNAME };
149
+ export { DEFAULT_DOWNLOAD_CACHE_DIRNAME };
150
+ export { DEFAULT_EXECUTION_CACHE_DIRNAME };
149
151
  export { DEFAULT_SCRAPE_CACHE_DIRNAME };
150
152
  export { DEFAULT_PIPELINE_COLLECTION_BASE_FILENAME };
151
153
  export { DEFAULT_REMOTE_URL };
@@ -166,6 +166,14 @@ export declare const DEFAULT_MAX_KNOWLEDGE_SOURCES_SCRAPING_TOTAL = 200;
166
166
  * @public exported from `@promptbook/core`
167
167
  */
168
168
  export declare const DEFAULT_BOOKS_DIRNAME = "./books";
169
+ /**
170
+ * Where to store the temporary downloads
171
+ *
172
+ * Note: When the folder does not exist, it is created recursively
173
+ *
174
+ * @public exported from `@promptbook/core`
175
+ */
176
+ export declare const DEFAULT_DOWNLOAD_CACHE_DIRNAME = "./.promptbook/download-cache";
169
177
  /**
170
178
  * Where to store the cache of executions for promptbook CLI
171
179
  *
@@ -173,7 +181,7 @@ export declare const DEFAULT_BOOKS_DIRNAME = "./books";
173
181
  *
174
182
  * @public exported from `@promptbook/core`
175
183
  */
176
- export declare const DEFAULT_EXECUTIONS_CACHE_DIRNAME = "./.promptbook/executions-cache";
184
+ export declare const DEFAULT_EXECUTION_CACHE_DIRNAME = "./.promptbook/execution-cache";
177
185
  /**
178
186
  * Where to store the scrape cache
179
187
  *
@@ -3,7 +3,7 @@ import type fs from 'fs/promises';
3
3
  /**
4
4
  * Container for all the tools needed to manipulate with filesystem
5
5
  */
6
- export type FilesystemTools = Pick<typeof fs, 'access' | 'constants' | 'readFile' | 'writeFile' | 'stat' | 'readdir'>;
6
+ export type FilesystemTools = Pick<typeof fs, 'access' | 'constants' | 'readFile' | 'writeFile' | 'stat' | 'readdir' | 'mkdir'>;
7
7
  /**
8
8
  * TODO: Implement destroyable pattern to free resources
9
9
  */
@@ -6,6 +6,7 @@ import type { PipelineString } from '../pipeline/PipelineString';
6
6
  import type { TaskProgress } from '../types/TaskProgress';
7
7
  import type { InputParameters } from '../types/typeAliases';
8
8
  import type { string_filename } from '../types/typeAliases';
9
+ import type { string_parameter_value } from '../types/typeAliases';
9
10
  import type { string_pipeline_url } from '../types/typeAliases';
10
11
  /**
11
12
  * Wizzard for simple usage of the Promptbook
@@ -27,7 +28,12 @@ declare class Wizzard {
27
28
  *
28
29
  * Note: This works simmilar to the `ptbk run` command
29
30
  */
30
- execute(book: string_pipeline_url | string_filename | PipelineString, inputParameters: InputParameters, onProgress?: (taskProgress: TaskProgress) => Promisable<void>): Promise<PipelineExecutorResult>;
31
+ execute(book: string_pipeline_url | string_filename | PipelineString, inputParameters: InputParameters, onProgress?: (taskProgress: TaskProgress) => Promisable<void>): Promise<{
32
+ /**
33
+ * Simple result of the execution
34
+ */
35
+ result: string_parameter_value;
36
+ } & PipelineExecutorResult>;
31
37
  private executionTools;
32
38
  /**
33
39
  * Provides the tools automatically for the Node.js environment
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@promptbook/markdown-utils",
3
- "version": "0.84.0-11",
3
+ "version": "0.84.0-12",
4
4
  "description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
5
5
  "--note-0": " <- [🐊]",
6
6
  "private": false,
package/umd/index.umd.js CHANGED
@@ -1,14 +1,15 @@
1
1
  (function (global, factory) {
2
- typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('spacetrim'), require('prettier'), require('prettier/parser-html'), require('waitasecond'), require('path'), require('crypto-js'), require('crypto-js/enc-hex'), require('mime-types'), require('papaparse')) :
3
- typeof define === 'function' && define.amd ? define(['exports', 'spacetrim', 'prettier', 'prettier/parser-html', 'waitasecond', 'path', 'crypto-js', 'crypto-js/enc-hex', 'mime-types', 'papaparse'], factory) :
4
- (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global["promptbook-markdown-utils"] = {}, global.spaceTrim, global.prettier, global.parserHtml, global.waitasecond, global.path, global.cryptoJs, global.hexEncoder, global.mimeTypes, global.papaparse));
5
- })(this, (function (exports, spaceTrim, prettier, parserHtml, waitasecond, path, cryptoJs, hexEncoder, mimeTypes, papaparse) { 'use strict';
2
+ typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports, require('spacetrim'), require('prettier'), require('prettier/parser-html'), require('waitasecond'), require('crypto-js/enc-hex'), require('crypto-js/sha256'), require('path'), require('crypto-js'), require('mime-types'), require('papaparse')) :
3
+ typeof define === 'function' && define.amd ? define(['exports', 'spacetrim', 'prettier', 'prettier/parser-html', 'waitasecond', 'crypto-js/enc-hex', 'crypto-js/sha256', 'path', 'crypto-js', 'mime-types', 'papaparse'], factory) :
4
+ (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global["promptbook-markdown-utils"] = {}, global.spaceTrim, global.prettier, global.parserHtml, global.waitasecond, global.hexEncoder, global.sha256, global.path, global.cryptoJs, global.mimeTypes, global.papaparse));
5
+ })(this, (function (exports, spaceTrim, prettier, parserHtml, waitasecond, hexEncoder, sha256, path, cryptoJs, mimeTypes, papaparse) { 'use strict';
6
6
 
7
7
  function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
8
8
 
9
9
  var spaceTrim__default = /*#__PURE__*/_interopDefaultLegacy(spaceTrim);
10
10
  var parserHtml__default = /*#__PURE__*/_interopDefaultLegacy(parserHtml);
11
11
  var hexEncoder__default = /*#__PURE__*/_interopDefaultLegacy(hexEncoder);
12
+ var sha256__default = /*#__PURE__*/_interopDefaultLegacy(sha256);
12
13
 
13
14
  // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
14
15
  /**
@@ -24,7 +25,7 @@
24
25
  * @generated
25
26
  * @see https://github.com/webgptorg/promptbook
26
27
  */
27
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-10';
28
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
28
29
  /**
29
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
30
31
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -810,6 +811,12 @@
810
811
  * @private within the repository - too low-level in comparison with other `MAX_...`
811
812
  */
812
813
  var IMMEDIATE_TIME = 10;
814
+ /**
815
+ * The maximum length of the (generated) filename
816
+ *
817
+ * @public exported from `@promptbook/core`
818
+ */
819
+ var MAX_FILENAME_LENGTH = 30;
813
820
  /**
814
821
  * Strategy for caching the intermediate results for knowledge sources
815
822
  *
@@ -829,6 +836,15 @@
829
836
  * @public exported from `@promptbook/core`
830
837
  */
831
838
  var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
839
+ // <- TODO: [🕝] Make also `BOOKS_DIRNAME_ALTERNATIVES`
840
+ /**
841
+ * Where to store the temporary downloads
842
+ *
843
+ * Note: When the folder does not exist, it is created recursively
844
+ *
845
+ * @public exported from `@promptbook/core`
846
+ */
847
+ var DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
832
848
  /**
833
849
  * Where to store the scrape cache
834
850
  *
@@ -3192,6 +3208,22 @@
3192
3208
  * TODO: [®] DRY Register logic
3193
3209
  */
3194
3210
 
3211
+ /**
3212
+ * Removes emojis from a string and fix whitespaces
3213
+ *
3214
+ * @param text with emojis
3215
+ * @returns text without emojis
3216
+ * @public exported from `@promptbook/utils`
3217
+ */
3218
+ function removeEmojis(text) {
3219
+ // Replace emojis (and also ZWJ sequence) with hyphens
3220
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
3221
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
3222
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
3223
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
3224
+ return text;
3225
+ }
3226
+
3195
3227
  var defaultDiacriticsRemovalMap = [
3196
3228
  {
3197
3229
  base: 'A',
@@ -3515,6 +3547,30 @@
3515
3547
  * Note: [💞] Ignore a discrepancy between file name and entity name
3516
3548
  */
3517
3549
 
3550
+ /**
3551
+ * @@@
3552
+ *
3553
+ * @param value @@@
3554
+ * @returns @@@
3555
+ * @example @@@
3556
+ * @public exported from `@promptbook/utils`
3557
+ */
3558
+ function titleToName(value) {
3559
+ if (isValidUrl(value)) {
3560
+ value = value.replace(/^https?:\/\//, '');
3561
+ value = value.replace(/\.html$/, '');
3562
+ }
3563
+ else if (isValidFilePath(value)) {
3564
+ value = path.basename(value);
3565
+ // Note: Keeping extension in the name
3566
+ }
3567
+ value = value.split('/').join('-');
3568
+ value = removeEmojis(value);
3569
+ value = normalizeToKebabCase(value);
3570
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
3571
+ return value;
3572
+ }
3573
+
3518
3574
  /**
3519
3575
  * Creates unique name for the source
3520
3576
  *
@@ -3536,6 +3592,15 @@
3536
3592
  * TODO: [🐱‍🐉][🧠] Make some smart crop NOT source-i-m-pavol-a-develop-... BUT source-i-m-pavol-a-developer-...
3537
3593
  */
3538
3594
 
3595
+ /**
3596
+ * @@@
3597
+ *
3598
+ * @private for `FileCacheStorage`
3599
+ */
3600
+ function nameToSubfolderPath(name) {
3601
+ return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
3602
+ }
3603
+
3539
3604
  /**
3540
3605
  * Convert file extension to mime type
3541
3606
  *
@@ -3626,10 +3691,11 @@
3626
3691
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3627
3692
  var _a;
3628
3693
  return __awaiter(this, void 0, void 0, function () {
3629
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, filename_1, fileExtension, mimeType;
3630
- return __generator(this, function (_f) {
3631
- switch (_f.label) {
3694
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3695
+ return __generator(this, function (_l) {
3696
+ switch (_l.label) {
3632
3697
  case 0:
3698
+ console.log('!!! makeKnowledgeSourceHandler', knowledgeSource);
3633
3699
  _b = tools.fetch, fetch = _b === void 0 ? scraperFetch : _b;
3634
3700
  knowledgeSourceContent = knowledgeSource.knowledgeSourceContent;
3635
3701
  name = knowledgeSource.name;
@@ -3637,54 +3703,32 @@
3637
3703
  if (!name) {
3638
3704
  name = knowledgeSourceContentToName(knowledgeSourceContent);
3639
3705
  }
3640
- if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 2];
3706
+ if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 5];
3641
3707
  url = knowledgeSourceContent;
3642
3708
  return [4 /*yield*/, fetch(url)];
3643
3709
  case 1:
3644
- response_1 = _f.sent();
3645
- mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3646
- return [2 /*return*/, {
3647
- source: name,
3648
- filename: null,
3649
- url: url,
3650
- mimeType: mimeType,
3651
- /*
3652
- TODO: [🥽]
3653
- > async asBlob() {
3654
- > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
3655
- > const content = await response.blob();
3656
- > return content;
3657
- > },
3658
- */
3659
- asJson: function () {
3660
- return __awaiter(this, void 0, void 0, function () {
3661
- var content;
3662
- return __generator(this, function (_a) {
3663
- switch (_a.label) {
3664
- case 0: return [4 /*yield*/, response_1.json()];
3665
- case 1:
3666
- content = _a.sent();
3667
- return [2 /*return*/, content];
3668
- }
3669
- });
3670
- });
3671
- },
3672
- asText: function () {
3673
- return __awaiter(this, void 0, void 0, function () {
3674
- var content;
3675
- return __generator(this, function (_a) {
3676
- switch (_a.label) {
3677
- case 0: return [4 /*yield*/, response_1.text()];
3678
- case 1:
3679
- content = _a.sent();
3680
- return [2 /*return*/, content];
3681
- }
3682
- });
3683
- });
3684
- },
3685
- }];
3710
+ response = _l.sent();
3711
+ mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3712
+ filename = url.split('/').pop() || titleToName(url);
3713
+ hash = sha256__default["default"](hexEncoder__default["default"].parse(url)).toString( /* hex */);
3714
+ rootDirname_1 = path.join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3715
+ filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3716
+ return [4 /*yield*/, tools.fs.mkdir(path.dirname(path.join(rootDirname_1, filepath)), { recursive: true })];
3686
3717
  case 2:
3687
- if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 4];
3718
+ _l.sent();
3719
+ _g = (_f = tools.fs).writeFile;
3720
+ _h = [path.join(rootDirname_1, filepath)];
3721
+ _k = (_j = Buffer).from;
3722
+ return [4 /*yield*/, response.arrayBuffer()];
3723
+ case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
3724
+ case 4:
3725
+ _l.sent();
3726
+ // TODO: !!!!!!!! Check the file security
3727
+ // TODO: !!!!!!!! Check the file size (if it is not too big)
3728
+ // TODO: !!!!!!!! Delete the file
3729
+ return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
3730
+ case 5:
3731
+ if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
3688
3732
  if (tools.fs === undefined) {
3689
3733
  throw new EnvironmentMismatchError('Can not import file knowledge without filesystem tools');
3690
3734
  // <- TODO: [🧠] What is the best error type here`
@@ -3697,8 +3741,8 @@
3697
3741
  fileExtension = getFileExtension(filename_1);
3698
3742
  mimeType = extensionToMimeType(fileExtension || '');
3699
3743
  return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
3700
- case 3:
3701
- if (!(_f.sent())) {
3744
+ case 6:
3745
+ if (!(_l.sent())) {
3702
3746
  throw new NotFoundError(spaceTrim__default["default"](function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(knowledgeSourceContent), "\n\n Full file path:\n ").concat(block(filename_1), "\n "); }));
3703
3747
  }
3704
3748
  // TODO: [🧠][😿] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
@@ -3744,7 +3788,7 @@
3744
3788
  });
3745
3789
  },
3746
3790
  }];
3747
- case 4: return [2 /*return*/, {
3791
+ case 7: return [2 /*return*/, {
3748
3792
  source: name,
3749
3793
  filename: null,
3750
3794
  url: null,
@@ -5974,46 +6018,6 @@
5974
6018
  * TODO: [🐚] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
5975
6019
  */
5976
6020
 
5977
- /**
5978
- * Removes emojis from a string and fix whitespaces
5979
- *
5980
- * @param text with emojis
5981
- * @returns text without emojis
5982
- * @public exported from `@promptbook/utils`
5983
- */
5984
- function removeEmojis(text) {
5985
- // Replace emojis (and also ZWJ sequence) with hyphens
5986
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5987
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5988
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5989
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
5990
- return text;
5991
- }
5992
-
5993
- /**
5994
- * @@@
5995
- *
5996
- * @param value @@@
5997
- * @returns @@@
5998
- * @example @@@
5999
- * @public exported from `@promptbook/utils`
6000
- */
6001
- function titleToName(value) {
6002
- if (isValidUrl(value)) {
6003
- value = value.replace(/^https?:\/\//, '');
6004
- value = value.replace(/\.html$/, '');
6005
- }
6006
- else if (isValidFilePath(value)) {
6007
- value = path.basename(value);
6008
- // Note: Keeping extension in the name
6009
- }
6010
- value = value.split('/').join('-');
6011
- value = removeEmojis(value);
6012
- value = normalizeToKebabCase(value);
6013
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
6014
- return value;
6015
- }
6016
-
6017
6021
  /**
6018
6022
  * Metadata of the scraper
6019
6023
  *