@promptbook/legacy-documents 0.100.1 โ†’ 0.100.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { mkdir, rm, readFile, readdir, rename, rmdir } from 'fs/promises';
2
- import { basename, join, dirname } from 'path';
2
+ import { basename, join, dirname, isAbsolute } from 'path';
3
3
  import spaceTrim$1, { spaceTrim } from 'spacetrim';
4
4
  import { spawn } from 'child_process';
5
5
  import colors from 'colors';
@@ -29,7 +29,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
29
29
  * @generated
30
30
  * @see https://github.com/webgptorg/promptbook
31
31
  */
32
- const PROMPTBOOK_ENGINE_VERSION = '0.100.1';
32
+ const PROMPTBOOK_ENGINE_VERSION = '0.100.2';
33
33
  /**
34
34
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
35
35
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
@@ -914,7 +914,7 @@ function removeEmojis(text) {
914
914
  }
915
915
 
916
916
  /**
917
- * Tests if given string is valid URL.
917
+ * Tests if given string is valid file path.
918
918
  *
919
919
  * Note: This does not check if the file exists only if the path is valid
920
920
  * @public exported from `@promptbook/utils`
@@ -926,18 +926,25 @@ function isValidFilePath(filename) {
926
926
  if (filename.split('\n').length > 1) {
927
927
  return false;
928
928
  }
929
- if (filename.split(' ').length >
930
- 5 /* <- TODO: [๐Ÿง ][๐Ÿˆท] Make some better non-arbitrary way how to distinct filenames from informational texts */) {
929
+ // Normalize slashes early so heuristics can detect path-like inputs
930
+ const filenameSlashes = filename.replace(/\\/g, '/');
931
+ // Reject strings that look like sentences (informational text)
932
+ // Heuristic: contains multiple spaces and ends with a period, or contains typical sentence punctuation
933
+ // But skip this heuristic if the string looks like a path (contains '/' or starts with a drive letter)
934
+ if (filename.trim().length > 60 && // long enough to be a sentence
935
+ /[.!?]/.test(filename) && // contains sentence punctuation
936
+ filename.split(' ').length > 8 && // has many words
937
+ !/\/|^[A-Z]:/i.test(filenameSlashes) // do NOT treat as sentence if looks like a path
938
+ ) {
931
939
  return false;
932
940
  }
933
- const filenameSlashes = filename.split('\\').join('/');
934
941
  // Absolute Unix path: /hello.txt
935
942
  if (/^(\/)/i.test(filenameSlashes)) {
936
943
  // console.log(filename, 'Absolute Unix path: /hello.txt');
937
944
  return true;
938
945
  }
939
- // Absolute Windows path: /hello.txt
940
- if (/^([A-Z]{1,2}:\/?)\//i.test(filenameSlashes)) {
946
+ // Absolute Windows path: C:/ or C:\ (allow spaces and multiple dots in filename)
947
+ if (/^[A-Z]:\/.+$/i.test(filenameSlashes)) {
941
948
  // console.log(filename, 'Absolute Windows path: /hello.txt');
942
949
  return true;
943
950
  }
@@ -3791,9 +3798,15 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3791
3798
  }
3792
3799
  if (isValidUrl(knowledgeSourceContent)) {
3793
3800
  const url = knowledgeSourceContent;
3801
+ if (isVerbose) {
3802
+ console.info(`๐Ÿ“„ [1] "${name}" is available at "${url}"`);
3803
+ }
3794
3804
  const response = await fetch(url); // <- TODO: [๐Ÿง ] Scraping and fetch proxy
3795
3805
  const mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3796
3806
  if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [๐Ÿ’ต] */)) {
3807
+ if (isVerbose) {
3808
+ console.info(`๐Ÿ“„ [2] "${name}" tools.fs is not available or URL is not a PDF.`);
3809
+ }
3797
3810
  return {
3798
3811
  source: name,
3799
3812
  filename: null,
@@ -3829,13 +3842,17 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3829
3842
  await tools.fs.mkdir(dirname(join(rootDirname, filepath)), { recursive: true });
3830
3843
  }
3831
3844
  catch (error) {
3845
+ if (isVerbose) {
3846
+ console.info(`๐Ÿ“„ [3] "${name}" error creating cache directory`);
3847
+ }
3832
3848
  // Note: If we can't create cache directory, we'll handle it when trying to write the file
3833
3849
  // This handles read-only filesystems, permission issues, and missing parent directories
3834
- if (error instanceof Error && (error.message.includes('EROFS') ||
3835
- error.message.includes('read-only') ||
3836
- error.message.includes('EACCES') ||
3837
- error.message.includes('EPERM') ||
3838
- error.message.includes('ENOENT'))) ;
3850
+ if (error instanceof Error &&
3851
+ (error.message.includes('EROFS') ||
3852
+ error.message.includes('read-only') ||
3853
+ error.message.includes('EACCES') ||
3854
+ error.message.includes('EPERM') ||
3855
+ error.message.includes('ENOENT'))) ;
3839
3856
  else {
3840
3857
  // Re-throw other unexpected errors
3841
3858
  throw error;
@@ -3850,13 +3867,17 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3850
3867
  await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
3851
3868
  }
3852
3869
  catch (error) {
3870
+ if (isVerbose) {
3871
+ console.info(`๐Ÿ“„ [4] "${name}" error writing cache file`);
3872
+ }
3853
3873
  // Note: If we can't write to cache, we'll process the file directly from memory
3854
3874
  // This handles read-only filesystems like Vercel
3855
- if (error instanceof Error && (error.message.includes('EROFS') ||
3856
- error.message.includes('read-only') ||
3857
- error.message.includes('EACCES') ||
3858
- error.message.includes('EPERM') ||
3859
- error.message.includes('ENOENT'))) {
3875
+ if (error instanceof Error &&
3876
+ (error.message.includes('EROFS') ||
3877
+ error.message.includes('read-only') ||
3878
+ error.message.includes('EACCES') ||
3879
+ error.message.includes('EPERM') ||
3880
+ error.message.includes('ENOENT'))) {
3860
3881
  // Return a handler that works directly with the downloaded content
3861
3882
  return {
3862
3883
  source: name,
@@ -3878,6 +3899,9 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3878
3899
  }
3879
3900
  // TODO: [๐Ÿ’ต] Check the file security
3880
3901
  // TODO: [๐Ÿงน][๐Ÿง ] Delete the file after the scraping is done
3902
+ if (isVerbose) {
3903
+ console.info(`๐Ÿ“„ [5] "${name}" cached at "${join(rootDirname, filepath)}"`);
3904
+ }
3881
3905
  return makeKnowledgeSourceHandler({ name, knowledgeSourceContent: filepath }, tools, {
3882
3906
  ...options,
3883
3907
  rootDirname,
@@ -3892,7 +3916,12 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3892
3916
  throw new EnvironmentMismatchError('Can not import file knowledge in non-file pipeline');
3893
3917
  // <- TODO: [๐Ÿง ] What is the best error type here`
3894
3918
  }
3895
- const filename = join(rootDirname, knowledgeSourceContent).split('\\').join('/');
3919
+ const filename = isAbsolute(knowledgeSourceContent)
3920
+ ? knowledgeSourceContent
3921
+ : join(rootDirname, knowledgeSourceContent).split('\\').join('/');
3922
+ if (isVerbose) {
3923
+ console.info(`๐Ÿ“„ [6] "${name}" is a valid file "${filename}"`);
3924
+ }
3896
3925
  const fileExtension = getFileExtension(filename);
3897
3926
  const mimeType = extensionToMimeType(fileExtension || '');
3898
3927
  if (!(await isFileExisting(filename, tools.fs))) {
@@ -3934,6 +3963,12 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3934
3963
  };
3935
3964
  }
3936
3965
  else {
3966
+ if (isVerbose) {
3967
+ console.info(`๐Ÿ“„ [7] "${name}" is just a explicit string text with a knowledge source`);
3968
+ console.info('---');
3969
+ console.info(knowledgeSourceContent);
3970
+ console.info('---');
3971
+ }
3937
3972
  return {
3938
3973
  source: name,
3939
3974
  filename: null,