@promptbook/markdown-utils 0.100.1 โ†’ 0.100.3-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -29,6 +29,10 @@ Write AI applications using plain human language across multiple models and plat
29
29
 
30
30
 
31
31
 
32
+ <blockquote style="color: #ff8811">
33
+ <b>โš  Warning:</b> This is a pre-release version of the library. It is not yet ready for production use. Please look at <a href="https://www.npmjs.com/package/@promptbook/core?activeTab=versions">latest stable release</a>.
34
+ </blockquote>
35
+
32
36
  ## ๐Ÿ“ฆ Package `@promptbook/markdown-utils`
33
37
 
34
38
  - Promptbooks are [divided into several](#-packages) packages, all are published from [single monorepo](https://github.com/webgptorg/promptbook).
package/esm/index.es.js CHANGED
@@ -7,7 +7,7 @@ import { Subject } from 'rxjs';
7
7
  import { forTime } from 'waitasecond';
8
8
  import hexEncoder from 'crypto-js/enc-hex';
9
9
  import sha256 from 'crypto-js/sha256';
10
- import { basename, join, dirname } from 'path';
10
+ import { basename, join, dirname, isAbsolute } from 'path';
11
11
  import { SHA256 } from 'crypto-js';
12
12
  import { lookup, extension } from 'mime-types';
13
13
  import { parse, unparse } from 'papaparse';
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.100.1';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.100.3-0';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
@@ -584,7 +584,7 @@ function isValidEmail(email) {
584
584
  }
585
585
 
586
586
  /**
587
- * Tests if given string is valid URL.
587
+ * Tests if given string is valid file path.
588
588
  *
589
589
  * Note: This does not check if the file exists only if the path is valid
590
590
  * @public exported from `@promptbook/utils`
@@ -596,18 +596,25 @@ function isValidFilePath(filename) {
596
596
  if (filename.split('\n').length > 1) {
597
597
  return false;
598
598
  }
599
- if (filename.split(' ').length >
600
- 5 /* <- TODO: [๐Ÿง ][๐Ÿˆท] Make some better non-arbitrary way how to distinct filenames from informational texts */) {
599
+ // Normalize slashes early so heuristics can detect path-like inputs
600
+ const filenameSlashes = filename.replace(/\\/g, '/');
601
+ // Reject strings that look like sentences (informational text)
602
+ // Heuristic: contains multiple spaces and ends with a period, or contains typical sentence punctuation
603
+ // But skip this heuristic if the string looks like a path (contains '/' or starts with a drive letter)
604
+ if (filename.trim().length > 60 && // long enough to be a sentence
605
+ /[.!?]/.test(filename) && // contains sentence punctuation
606
+ filename.split(' ').length > 8 && // has many words
607
+ !/\/|^[A-Z]:/i.test(filenameSlashes) // do NOT treat as sentence if looks like a path
608
+ ) {
601
609
  return false;
602
610
  }
603
- const filenameSlashes = filename.split('\\').join('/');
604
611
  // Absolute Unix path: /hello.txt
605
612
  if (/^(\/)/i.test(filenameSlashes)) {
606
613
  // console.log(filename, 'Absolute Unix path: /hello.txt');
607
614
  return true;
608
615
  }
609
- // Absolute Windows path: /hello.txt
610
- if (/^([A-Z]{1,2}:\/?)\//i.test(filenameSlashes)) {
616
+ // Absolute Windows path: C:/ or C:\ (allow spaces and multiple dots in filename)
617
+ if (/^[A-Z]:\/.+$/i.test(filenameSlashes)) {
611
618
  // console.log(filename, 'Absolute Windows path: /hello.txt');
612
619
  return true;
613
620
  }
@@ -3697,9 +3704,15 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3697
3704
  }
3698
3705
  if (isValidUrl(knowledgeSourceContent)) {
3699
3706
  const url = knowledgeSourceContent;
3707
+ if (isVerbose) {
3708
+ console.info(`๐Ÿ“„ [1] "${name}" is available at "${url}"`);
3709
+ }
3700
3710
  const response = await fetch(url); // <- TODO: [๐Ÿง ] Scraping and fetch proxy
3701
3711
  const mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3702
3712
  if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [๐Ÿ’ต] */)) {
3713
+ if (isVerbose) {
3714
+ console.info(`๐Ÿ“„ [2] "${name}" tools.fs is not available or URL is not a PDF.`);
3715
+ }
3703
3716
  return {
3704
3717
  source: name,
3705
3718
  filename: null,
@@ -3735,13 +3748,17 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3735
3748
  await tools.fs.mkdir(dirname(join(rootDirname, filepath)), { recursive: true });
3736
3749
  }
3737
3750
  catch (error) {
3751
+ if (isVerbose) {
3752
+ console.info(`๐Ÿ“„ [3] "${name}" error creating cache directory`);
3753
+ }
3738
3754
  // Note: If we can't create cache directory, we'll handle it when trying to write the file
3739
3755
  // This handles read-only filesystems, permission issues, and missing parent directories
3740
- if (error instanceof Error && (error.message.includes('EROFS') ||
3741
- error.message.includes('read-only') ||
3742
- error.message.includes('EACCES') ||
3743
- error.message.includes('EPERM') ||
3744
- error.message.includes('ENOENT'))) ;
3756
+ if (error instanceof Error &&
3757
+ (error.message.includes('EROFS') ||
3758
+ error.message.includes('read-only') ||
3759
+ error.message.includes('EACCES') ||
3760
+ error.message.includes('EPERM') ||
3761
+ error.message.includes('ENOENT'))) ;
3745
3762
  else {
3746
3763
  // Re-throw other unexpected errors
3747
3764
  throw error;
@@ -3756,13 +3773,17 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3756
3773
  await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
3757
3774
  }
3758
3775
  catch (error) {
3776
+ if (isVerbose) {
3777
+ console.info(`๐Ÿ“„ [4] "${name}" error writing cache file`);
3778
+ }
3759
3779
  // Note: If we can't write to cache, we'll process the file directly from memory
3760
3780
  // This handles read-only filesystems like Vercel
3761
- if (error instanceof Error && (error.message.includes('EROFS') ||
3762
- error.message.includes('read-only') ||
3763
- error.message.includes('EACCES') ||
3764
- error.message.includes('EPERM') ||
3765
- error.message.includes('ENOENT'))) {
3781
+ if (error instanceof Error &&
3782
+ (error.message.includes('EROFS') ||
3783
+ error.message.includes('read-only') ||
3784
+ error.message.includes('EACCES') ||
3785
+ error.message.includes('EPERM') ||
3786
+ error.message.includes('ENOENT'))) {
3766
3787
  // Return a handler that works directly with the downloaded content
3767
3788
  return {
3768
3789
  source: name,
@@ -3784,6 +3805,9 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3784
3805
  }
3785
3806
  // TODO: [๐Ÿ’ต] Check the file security
3786
3807
  // TODO: [๐Ÿงน][๐Ÿง ] Delete the file after the scraping is done
3808
+ if (isVerbose) {
3809
+ console.info(`๐Ÿ“„ [5] "${name}" cached at "${join(rootDirname, filepath)}"`);
3810
+ }
3787
3811
  return makeKnowledgeSourceHandler({ name, knowledgeSourceContent: filepath }, tools, {
3788
3812
  ...options,
3789
3813
  rootDirname,
@@ -3798,7 +3822,12 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3798
3822
  throw new EnvironmentMismatchError('Can not import file knowledge in non-file pipeline');
3799
3823
  // <- TODO: [๐Ÿง ] What is the best error type here`
3800
3824
  }
3801
- const filename = join(rootDirname, knowledgeSourceContent).split('\\').join('/');
3825
+ const filename = isAbsolute(knowledgeSourceContent)
3826
+ ? knowledgeSourceContent
3827
+ : join(rootDirname, knowledgeSourceContent).split('\\').join('/');
3828
+ if (isVerbose) {
3829
+ console.info(`๐Ÿ“„ [6] "${name}" is a valid file "${filename}"`);
3830
+ }
3802
3831
  const fileExtension = getFileExtension(filename);
3803
3832
  const mimeType = extensionToMimeType(fileExtension || '');
3804
3833
  if (!(await isFileExisting(filename, tools.fs))) {
@@ -3840,6 +3869,12 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3840
3869
  };
3841
3870
  }
3842
3871
  else {
3872
+ if (isVerbose) {
3873
+ console.info(`๐Ÿ“„ [7] "${name}" is just a explicit string text with a knowledge source`);
3874
+ console.info('---');
3875
+ console.info(knowledgeSourceContent);
3876
+ console.info('---');
3877
+ }
3843
3878
  return {
3844
3879
  source: name,
3845
3880
  filename: null,