@promptbook/pdf 0.84.0-13 → 0.84.0-15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE.md ADDED
@@ -0,0 +1 @@
1
+ [Functional Source License, Version 1.1, ALv2 Future License](https://github.com/getsentry/fsl.software/blob/main/FSL-1.1-ALv2.template.md)
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  <!-- ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten -->
2
2
 
3
- # Promptbook
3
+ # Promptbook
4
4
 
5
5
 
6
6
 
@@ -16,10 +16,10 @@
16
16
 
17
17
  ## ❄ New Features
18
18
 
19
+ - 🐋 **Support of [DeepSeek models](https://www.deepseek.com/)**
19
20
  - 💙 Working [the **Book** language v1.0.0](https://github.com/webgptorg/book)
20
21
  - 🖤 Run books from CLI - `npx ptbk run path/to/your/book`
21
- - 📚 Support of `.docx`, `.doc` and `.pdf` documents
22
- - ✨ **Support of [OpenAI o1 model](https://openai.com/o1/)**
22
+ - 📚 Support of `.docx`, `.doc` and `.pdf` documents as knowledge
23
23
 
24
24
 
25
25
 
package/esm/index.es.js CHANGED
@@ -7,7 +7,7 @@ import { format } from 'prettier';
7
7
  import parserHtml from 'prettier/parser-html';
8
8
  import { forTime } from 'waitasecond';
9
9
  import sha256 from 'crypto-js/sha256';
10
- import { lookup } from 'mime-types';
10
+ import { lookup, extension } from 'mime-types';
11
11
  import { unparse, parse } from 'papaparse';
12
12
 
13
13
  // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
@@ -24,7 +24,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
24
24
  * @generated
25
25
  * @see https://github.com/webgptorg/promptbook
26
26
  */
27
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
27
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-14';
28
28
  /**
29
29
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
30
30
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -3580,6 +3580,17 @@ function getFileExtension(value) {
3580
3580
  return match ? match[1].toLowerCase() : null;
3581
3581
  }
3582
3582
 
3583
+ /**
3584
+ * Convert mime type to file extension
3585
+ *
3586
+ * Note: If the mime type is invalid, `null` is returned
3587
+ *
3588
+ * @private within the repository
3589
+ */
3590
+ function mimeTypeToExtension(value) {
3591
+ return extension(value) || null;
3592
+ }
3593
+
3583
3594
  /**
3584
3595
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
3585
3596
  *
@@ -3633,7 +3644,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3633
3644
  case 1:
3634
3645
  response_1 = _l.sent();
3635
3646
  mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3636
- if (tools.fs === undefined || !url.endsWith('.pdf')) {
3647
+ if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
3637
3648
  return [2 /*return*/, {
3638
3649
  source: name,
3639
3650
  filename: null,
@@ -3678,7 +3689,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3678
3689
  basename = url.split('/').pop() || titleToName(url);
3679
3690
  hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
3680
3691
  rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3681
- filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3692
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
3682
3693
  return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
3683
3694
  case 2:
3684
3695
  _l.sent();
@@ -3689,9 +3700,9 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3689
3700
  case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
3690
3701
  case 4:
3691
3702
  _l.sent();
3692
- // TODO: !!!!!!!! Check the file security
3703
+ // TODO: [💵] Check the file security
3693
3704
  // TODO: !!!!!!!! Check the file size (if it is not too big)
3694
- // TODO: !!!!!!!! Delete the file
3705
+ // TODO: !!!!!!!! Delete the file after the scraping is done
3695
3706
  return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
3696
3707
  case 5:
3697
3708
  if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
@@ -6310,7 +6321,7 @@ var markitdownScraperMetadata = $deepFreeze({
6310
6321
  className: 'MarkitdownScraper',
6311
6322
  mimeTypes: [
6312
6323
  'application/pdf',
6313
- // TODO: Make priority for scrapers and than allow all mime types here:
6324
+ // TODO: [💵] Make priority for scrapers and than analyze which mime-types can Markitdown scrape and allow all mime types here:
6314
6325
  // 'text/html',
6315
6326
  // 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
6316
6327
  ],
@@ -6346,10 +6357,10 @@ var MarkitdownScraper = /** @class */ (function () {
6346
6357
  this.tools = tools;
6347
6358
  this.options = options;
6348
6359
  this.markdownScraper = new MarkdownScraper(tools, options);
6360
+ // Note: Module `markitdown-ts` has no types available, so it is imported using `require`
6349
6361
  // eslint-disable-next-line @typescript-eslint/no-var-requires
6350
6362
  var MarkItDown = require('markitdown-ts').MarkItDown;
6351
- // <- TODO: !!! Use Markitdown directly not through this package
6352
- // <- Note: !!!!!!!
6363
+ // <- TODO: 'Use Markitdown directly not through this package
6353
6364
  this.markitdown = new MarkItDown();
6354
6365
  }
6355
6366
  Object.defineProperty(MarkitdownScraper.prototype, "metadata", {
@@ -6394,12 +6405,12 @@ var MarkitdownScraper = /** @class */ (function () {
6394
6405
  case 2:
6395
6406
  if (!!(_f.sent())) return [3 /*break*/, 5];
6396
6407
  src = source.filename || source.url || null;
6397
- console.log('!!!', { src: src, source: source, cacheFilehandler: cacheFilehandler });
6408
+ // console.log('!!!', { src, source, cacheFilehandler });
6398
6409
  if (src === null) {
6399
6410
  throw new UnexpectedError('Source has no filename or url');
6400
6411
  }
6401
6412
  return [4 /*yield*/, this.markitdown.convert(src, {
6402
- // TODO: !!!!!! Pass when sacraping Youtube
6413
+ // TODO: Pass when sacraping Youtube
6403
6414
  // enableYoutubeTranscript: true,
6404
6415
  // youtubeTranscriptLanguage: 'en',
6405
6416
  })];
@@ -6409,9 +6420,10 @@ var MarkitdownScraper = /** @class */ (function () {
6409
6420
  throw new Error("Markitdown could not convert the \"".concat(source.source, "\""));
6410
6421
  // <- TODO: !!! Make MarkitdownError
6411
6422
  }
6412
- console.log('!!!', { result: result, cacheFilehandler: cacheFilehandler });
6423
+ // console.log('!!!', { result, cacheFilehandler });
6413
6424
  return [4 /*yield*/, this.tools.fs.writeFile(cacheFilehandler.filename, result.text_content)];
6414
6425
  case 4:
6426
+ // console.log('!!!', { result, cacheFilehandler });
6415
6427
  _f.sent();
6416
6428
  _f.label = 5;
6417
6429
  case 5: return [2 /*return*/, cacheFilehandler];