@promptbook/core 0.100.1 โ†’ 0.100.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -8,7 +8,7 @@ import { forTime } from 'waitasecond';
8
8
  import { parse, unparse } from 'papaparse';
9
9
  import hexEncoder from 'crypto-js/enc-hex';
10
10
  import sha256 from 'crypto-js/sha256';
11
- import { basename, join, dirname } from 'path';
11
+ import { basename, join, dirname, isAbsolute } from 'path';
12
12
  import { SHA256 } from 'crypto-js';
13
13
  import { lookup, extension } from 'mime-types';
14
14
  import moment from 'moment';
@@ -28,7 +28,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
28
28
  * @generated
29
29
  * @see https://github.com/webgptorg/promptbook
30
30
  */
31
- const PROMPTBOOK_ENGINE_VERSION = '0.100.1';
31
+ const PROMPTBOOK_ENGINE_VERSION = '0.100.2';
32
32
  /**
33
33
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
34
34
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
@@ -1852,7 +1852,7 @@ function isValidEmail(email) {
1852
1852
  }
1853
1853
 
1854
1854
  /**
1855
- * Tests if given string is valid URL.
1855
+ * Tests if given string is valid file path.
1856
1856
  *
1857
1857
  * Note: This does not check if the file exists only if the path is valid
1858
1858
  * @public exported from `@promptbook/utils`
@@ -1864,18 +1864,25 @@ function isValidFilePath(filename) {
1864
1864
  if (filename.split('\n').length > 1) {
1865
1865
  return false;
1866
1866
  }
1867
- if (filename.split(' ').length >
1868
- 5 /* <- TODO: [๐Ÿง ][๐Ÿˆท] Make some better non-arbitrary way how to distinct filenames from informational texts */) {
1867
+ // Normalize slashes early so heuristics can detect path-like inputs
1868
+ const filenameSlashes = filename.replace(/\\/g, '/');
1869
+ // Reject strings that look like sentences (informational text)
1870
+ // Heuristic: contains multiple spaces and ends with a period, or contains typical sentence punctuation
1871
+ // But skip this heuristic if the string looks like a path (contains '/' or starts with a drive letter)
1872
+ if (filename.trim().length > 60 && // long enough to be a sentence
1873
+ /[.!?]/.test(filename) && // contains sentence punctuation
1874
+ filename.split(' ').length > 8 && // has many words
1875
+ !/\/|^[A-Z]:/i.test(filenameSlashes) // do NOT treat as sentence if looks like a path
1876
+ ) {
1869
1877
  return false;
1870
1878
  }
1871
- const filenameSlashes = filename.split('\\').join('/');
1872
1879
  // Absolute Unix path: /hello.txt
1873
1880
  if (/^(\/)/i.test(filenameSlashes)) {
1874
1881
  // console.log(filename, 'Absolute Unix path: /hello.txt');
1875
1882
  return true;
1876
1883
  }
1877
- // Absolute Windows path: /hello.txt
1878
- if (/^([A-Z]{1,2}:\/?)\//i.test(filenameSlashes)) {
1884
+ // Absolute Windows path: C:/ or C:\ (allow spaces and multiple dots in filename)
1885
+ if (/^[A-Z]:\/.+$/i.test(filenameSlashes)) {
1879
1886
  // console.log(filename, 'Absolute Windows path: /hello.txt');
1880
1887
  return true;
1881
1888
  }
@@ -7875,9 +7882,15 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
7875
7882
  }
7876
7883
  if (isValidUrl(knowledgeSourceContent)) {
7877
7884
  const url = knowledgeSourceContent;
7885
+ if (isVerbose) {
7886
+ console.info(`๐Ÿ“„ [1] "${name}" is available at "${url}"`);
7887
+ }
7878
7888
  const response = await fetch(url); // <- TODO: [๐Ÿง ] Scraping and fetch proxy
7879
7889
  const mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
7880
7890
  if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [๐Ÿ’ต] */)) {
7891
+ if (isVerbose) {
7892
+ console.info(`๐Ÿ“„ [2] "${name}" tools.fs is not available or URL is not a PDF.`);
7893
+ }
7881
7894
  return {
7882
7895
  source: name,
7883
7896
  filename: null,
@@ -7913,13 +7926,17 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
7913
7926
  await tools.fs.mkdir(dirname(join(rootDirname, filepath)), { recursive: true });
7914
7927
  }
7915
7928
  catch (error) {
7929
+ if (isVerbose) {
7930
+ console.info(`๐Ÿ“„ [3] "${name}" error creating cache directory`);
7931
+ }
7916
7932
  // Note: If we can't create cache directory, we'll handle it when trying to write the file
7917
7933
  // This handles read-only filesystems, permission issues, and missing parent directories
7918
- if (error instanceof Error && (error.message.includes('EROFS') ||
7919
- error.message.includes('read-only') ||
7920
- error.message.includes('EACCES') ||
7921
- error.message.includes('EPERM') ||
7922
- error.message.includes('ENOENT'))) ;
7934
+ if (error instanceof Error &&
7935
+ (error.message.includes('EROFS') ||
7936
+ error.message.includes('read-only') ||
7937
+ error.message.includes('EACCES') ||
7938
+ error.message.includes('EPERM') ||
7939
+ error.message.includes('ENOENT'))) ;
7923
7940
  else {
7924
7941
  // Re-throw other unexpected errors
7925
7942
  throw error;
@@ -7934,13 +7951,17 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
7934
7951
  await tools.fs.writeFile(join(rootDirname, filepath), fileContent);
7935
7952
  }
7936
7953
  catch (error) {
7954
+ if (isVerbose) {
7955
+ console.info(`๐Ÿ“„ [4] "${name}" error writing cache file`);
7956
+ }
7937
7957
  // Note: If we can't write to cache, we'll process the file directly from memory
7938
7958
  // This handles read-only filesystems like Vercel
7939
- if (error instanceof Error && (error.message.includes('EROFS') ||
7940
- error.message.includes('read-only') ||
7941
- error.message.includes('EACCES') ||
7942
- error.message.includes('EPERM') ||
7943
- error.message.includes('ENOENT'))) {
7959
+ if (error instanceof Error &&
7960
+ (error.message.includes('EROFS') ||
7961
+ error.message.includes('read-only') ||
7962
+ error.message.includes('EACCES') ||
7963
+ error.message.includes('EPERM') ||
7964
+ error.message.includes('ENOENT'))) {
7944
7965
  // Return a handler that works directly with the downloaded content
7945
7966
  return {
7946
7967
  source: name,
@@ -7962,6 +7983,9 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
7962
7983
  }
7963
7984
  // TODO: [๐Ÿ’ต] Check the file security
7964
7985
  // TODO: [๐Ÿงน][๐Ÿง ] Delete the file after the scraping is done
7986
+ if (isVerbose) {
7987
+ console.info(`๐Ÿ“„ [5] "${name}" cached at "${join(rootDirname, filepath)}"`);
7988
+ }
7965
7989
  return makeKnowledgeSourceHandler({ name, knowledgeSourceContent: filepath }, tools, {
7966
7990
  ...options,
7967
7991
  rootDirname,
@@ -7976,7 +8000,12 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
7976
8000
  throw new EnvironmentMismatchError('Can not import file knowledge in non-file pipeline');
7977
8001
  // <- TODO: [๐Ÿง ] What is the best error type here`
7978
8002
  }
7979
- const filename = join(rootDirname, knowledgeSourceContent).split('\\').join('/');
8003
+ const filename = isAbsolute(knowledgeSourceContent)
8004
+ ? knowledgeSourceContent
8005
+ : join(rootDirname, knowledgeSourceContent).split('\\').join('/');
8006
+ if (isVerbose) {
8007
+ console.info(`๐Ÿ“„ [6] "${name}" is a valid file "${filename}"`);
8008
+ }
7980
8009
  const fileExtension = getFileExtension(filename);
7981
8010
  const mimeType = extensionToMimeType(fileExtension || '');
7982
8011
  if (!(await isFileExisting(filename, tools.fs))) {
@@ -8018,6 +8047,12 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
8018
8047
  };
8019
8048
  }
8020
8049
  else {
8050
+ if (isVerbose) {
8051
+ console.info(`๐Ÿ“„ [7] "${name}" is just a explicit string text with a knowledge source`);
8052
+ console.info('---');
8053
+ console.info(knowledgeSourceContent);
8054
+ console.info('---');
8055
+ }
8021
8056
  return {
8022
8057
  source: name,
8023
8058
  filename: null,