@promptbook/pdf 0.84.0-16 → 0.84.0-18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@ import { DEFAULT_BOOK_TITLE } from '../config';
14
14
  import { DEFAULT_TASK_TITLE } from '../config';
15
15
  import { DEFAULT_PROMPT_TASK_TITLE } from '../config';
16
16
  import { DEFAULT_BOOK_OUTPUT_PARAMETER_NAME } from '../config';
17
+ import { DEFAULT_MAX_FILE_SIZE } from '../config';
17
18
  import { MAX_FILENAME_LENGTH } from '../config';
18
19
  import { DEFAULT_INTERMEDIATE_FILES_STRATEGY } from '../config';
19
20
  import { DEFAULT_MAX_PARALLEL_COUNT } from '../config';
@@ -144,6 +145,7 @@ export { DEFAULT_BOOK_TITLE };
144
145
  export { DEFAULT_TASK_TITLE };
145
146
  export { DEFAULT_PROMPT_TASK_TITLE };
146
147
  export { DEFAULT_BOOK_OUTPUT_PARAMETER_NAME };
148
+ export { DEFAULT_MAX_FILE_SIZE };
147
149
  export { MAX_FILENAME_LENGTH };
148
150
  export { DEFAULT_INTERMEDIATE_FILES_STRATEGY };
149
151
  export { DEFAULT_MAX_PARALLEL_COUNT };
@@ -77,6 +77,12 @@ export declare const DEFAULT_PROMPT_TASK_TITLE = "Prompt";
77
77
  * @public exported from `@promptbook/core`
78
78
  */
79
79
  export declare const DEFAULT_BOOK_OUTPUT_PARAMETER_NAME = "result";
80
+ /**
81
+ * Maximum file size limit
82
+ *
83
+ * @public exported from `@promptbook/core`
84
+ */
85
+ export declare const DEFAULT_MAX_FILE_SIZE: number;
80
86
  /**
81
87
  * Warning message for the generated sections and files files
82
88
  *
@@ -17,5 +17,5 @@ import type { RemoteClientOptions } from '../remote-server/types/RemoteClientOpt
17
17
  */
18
18
  export declare function compilePipelineOnRemoteServer<TCustomOptions = undefined>(pipelineString: PipelineString, options: RemoteClientOptions<TCustomOptions>): Promise<PipelineJson>;
19
19
  /**
20
- * TODO: !!!! Do not return Promise<PipelineJson> But PreparePipelineTask
20
+ * TODO: [🐚] Do not return Promise<PipelineJson> But PreparePipelineTask
21
21
  */
@@ -13,5 +13,5 @@ import type { RemoteClientOptions } from '../remote-server/types/RemoteClientOpt
13
13
  */
14
14
  export declare function preparePipelineOnRemoteServer<TCustomOptions = undefined>(pipeline: PipelineJson, options: RemoteClientOptions<TCustomOptions>): Promise<PipelineJson>;
15
15
  /**
16
- * TODO: !!!! Do not return Promise<PipelineJson> But PreparePipelineTask
16
+ * TODO: [🐚] Do not return Promise<PipelineJson> But PreparePipelineTask
17
17
  */
@@ -9,7 +9,7 @@ import type { ApplicationRemoteServerClientOptions } from '../../types/RemoteSer
9
9
  */
10
10
  export type PromptbookServer_Identification<TCustomOptions> = PromptbookServer_ApplicationIdentification<TCustomOptions> | PromptbookServer_AnonymousIdentification;
11
11
  /**
12
- * Application mode is @@@!!!
12
+ * Application mode is situation when you run known and well-defined books with your own api keys
13
13
  *
14
14
  * @public exported from `@promptbook/remote-server`
15
15
  * @public exported from `@promptbook/remote-client`
@@ -21,7 +21,10 @@ export type PromptbookServer_ApplicationIdentification<TCustomOptions> = Applica
21
21
  readonly isAnonymous: false;
22
22
  };
23
23
  /**
24
- * Anonymous mode is @@@!!!
24
+ * Anonymous mode is when you run arbitrary user books without api keys from user
25
+ *
26
+ * Note: This is useful in situations when the LLM provider does not allow to call the API requests from the client side
27
+ * It is kind of a proxy mode
25
28
  *
26
29
  * @public exported from `@promptbook/remote-server`
27
30
  * @public exported from `@promptbook/remote-client`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@promptbook/pdf",
3
- "version": "0.84.0-16",
3
+ "version": "0.84.0-18",
4
4
  "description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
5
5
  "private": false,
6
6
  "sideEffects": false,
@@ -8,8 +8,10 @@
8
8
  "type": "git",
9
9
  "url": "https://github.com/webgptorg/promptbook"
10
10
  },
11
+ "author": "Pavol Hejný <pavol@ptbk.io> (https://www.pavolhejny.com/)",
11
12
  "contributors": [
12
- "Pavol Hejný <pavol@ptbk.io> (https://www.pavolhejny.com/)"
13
+ "Pavol Hejný <pavol@ptbk.io> (https://www.pavolhejny.com/)",
14
+ "Jiří Jahn <jiri@ptbk.io> (https://www.ptbk.io/)"
13
15
  ],
14
16
  "keywords": [
15
17
  "ai",
@@ -23,6 +25,9 @@
23
25
  "natural-language-processing",
24
26
  "nlp",
25
27
  "openai",
28
+ "o3",
29
+ "o3-mini",
30
+ "deepseek",
26
31
  "gpt-3",
27
32
  "gpt-4",
28
33
  "gpt-4o",
@@ -33,7 +38,7 @@
33
38
  "anthropic",
34
39
  "LLMOps"
35
40
  ],
36
- "license": "FSL-1.1-Apache-2.0",
41
+ "license": "UNLICENSED",
37
42
  "bugs": {
38
43
  "url": "https://github.com/webgptorg/promptbook/issues"
39
44
  },
@@ -42,7 +47,7 @@
42
47
  "module": "./esm/index.es.js",
43
48
  "typings": "./esm/typings/src/_packages/pdf.index.d.ts",
44
49
  "peerDependencies": {
45
- "@promptbook/core": "0.84.0-16"
50
+ "@promptbook/core": "0.84.0-18"
46
51
  },
47
52
  "dependencies": {
48
53
  "crypto-js": "4.2.0",
package/umd/index.umd.js CHANGED
@@ -25,7 +25,7 @@
25
25
  * @generated
26
26
  * @see https://github.com/webgptorg/promptbook
27
27
  */
28
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-15';
28
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-17';
29
29
  /**
30
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
31
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -196,6 +196,12 @@
196
196
  * @public exported from `@promptbook/core`
197
197
  */
198
198
  var DEFAULT_BOOK_TITLE = "\u2728 Untitled Book";
199
+ /**
200
+ * Maximum file size limit
201
+ *
202
+ * @public exported from `@promptbook/core`
203
+ */
204
+ var DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
199
205
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
200
206
  /**
201
207
  * The maximum number of iterations for a loops
@@ -3627,11 +3633,10 @@
3627
3633
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3628
3634
  var _a;
3629
3635
  return __awaiter(this, void 0, void 0, function () {
3630
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3631
- return __generator(this, function (_l) {
3632
- switch (_l.label) {
3636
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, fileContent, _f, _g, filename_1, fileExtension, mimeType;
3637
+ return __generator(this, function (_h) {
3638
+ switch (_h.label) {
3633
3639
  case 0:
3634
- console.log('!!! makeKnowledgeSourceHandler', knowledgeSource);
3635
3640
  _b = tools.fetch, fetch = _b === void 0 ? scraperFetch : _b;
3636
3641
  knowledgeSourceContent = knowledgeSource.knowledgeSourceContent;
3637
3642
  name = knowledgeSource.name;
@@ -3643,7 +3648,7 @@
3643
3648
  url = knowledgeSourceContent;
3644
3649
  return [4 /*yield*/, fetch(url)];
3645
3650
  case 1:
3646
- response_1 = _l.sent();
3651
+ response_1 = _h.sent();
3647
3652
  mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3648
3653
  if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
3649
3654
  return [2 /*return*/, {
@@ -3693,17 +3698,19 @@
3693
3698
  filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
3694
3699
  return [4 /*yield*/, tools.fs.mkdir(path.dirname(path.join(rootDirname_1, filepath)), { recursive: true })];
3695
3700
  case 2:
3696
- _l.sent();
3697
- _g = (_f = tools.fs).writeFile;
3698
- _h = [path.join(rootDirname_1, filepath)];
3699
- _k = (_j = Buffer).from;
3701
+ _h.sent();
3702
+ _g = (_f = Buffer).from;
3700
3703
  return [4 /*yield*/, response_1.arrayBuffer()];
3701
- case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
3704
+ case 3:
3705
+ fileContent = _g.apply(_f, [_h.sent()]);
3706
+ if (fileContent.length > DEFAULT_MAX_FILE_SIZE /* <- TODO: Allow to pass different value to remote server */) {
3707
+ throw new LimitReachedError("File is too large (".concat(Math.round(fileContent.length / 1024 / 1024), "MB). Maximum allowed size is ").concat(Math.round(DEFAULT_MAX_FILE_SIZE / 1024 / 1024), "MB."));
3708
+ }
3709
+ return [4 /*yield*/, tools.fs.writeFile(path.join(rootDirname_1, filepath), fileContent)];
3702
3710
  case 4:
3703
- _l.sent();
3711
+ _h.sent();
3704
3712
  // TODO: [💵] Check the file security
3705
- // TODO: !!!!!!!! Check the file size (if it is not too big)
3706
- // TODO: !!!!!!!! Delete the file after the scraping is done
3713
+ // TODO: [🧹][🧠] Delete the file after the scraping is done
3707
3714
  return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
3708
3715
  case 5:
3709
3716
  if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
@@ -3720,7 +3727,7 @@
3720
3727
  mimeType = extensionToMimeType(fileExtension || '');
3721
3728
  return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
3722
3729
  case 6:
3723
- if (!(_l.sent())) {
3730
+ if (!(_h.sent())) {
3724
3731
  throw new NotFoundError(spaceTrim__default["default"](function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(knowledgeSourceContent), "\n\n Full file path:\n ").concat(block(filename_1), "\n "); }));
3725
3732
  }
3726
3733
  // TODO: [🧠][😿] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
@@ -6361,7 +6368,7 @@
6361
6368
  // Note: Module `markitdown-ts` has no types available, so it is imported using `require`
6362
6369
  // eslint-disable-next-line @typescript-eslint/no-var-requires
6363
6370
  var MarkItDown = require('markitdown-ts').MarkItDown;
6364
- // <- TODO: 'Use Markitdown directly not through this package
6371
+ // <- TODO: [🍀] Use Markitdown directly not through this package
6365
6372
  this.markitdown = new MarkItDown();
6366
6373
  }
6367
6374
  Object.defineProperty(MarkitdownScraper.prototype, "metadata", {
@@ -6406,7 +6413,7 @@
6406
6413
  case 2:
6407
6414
  if (!!(_f.sent())) return [3 /*break*/, 5];
6408
6415
  src = source.filename || source.url || null;
6409
- // console.log('!!!', { src, source, cacheFilehandler });
6416
+ // console.log('!!', { src, source, cacheFilehandler });
6410
6417
  if (src === null) {
6411
6418
  throw new UnexpectedError('Source has no filename or url');
6412
6419
  }
@@ -6419,12 +6426,12 @@
6419
6426
  result = _f.sent();
6420
6427
  if (result === null || result === undefined) {
6421
6428
  throw new Error("Markitdown could not convert the \"".concat(source.source, "\""));
6422
- // <- TODO: !!! Make MarkitdownError
6429
+ // <- TODO: [🍀] Make MarkitdownError
6423
6430
  }
6424
- // console.log('!!!', { result, cacheFilehandler });
6431
+ // console.log('!!', { result, cacheFilehandler });
6425
6432
  return [4 /*yield*/, this.tools.fs.writeFile(cacheFilehandler.filename, result.text_content)];
6426
6433
  case 4:
6427
- // console.log('!!!', { result, cacheFilehandler });
6434
+ // console.log('!!', { result, cacheFilehandler });
6428
6435
  _f.sent();
6429
6436
  _f.label = 5;
6430
6437
  case 5: return [2 /*return*/, cacheFilehandler];