@promptbook/markitdown 0.86.22 → 0.86.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.86.22';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.86.31';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -3636,119 +3636,60 @@ function valueToString(value) {
3636
3636
  }
3637
3637
 
3638
3638
  /**
3639
- * Extract all used variable names from ginen JavaScript/TypeScript script
3639
+ * Parses the given script and returns the list of all used variables that are not defined in the script
3640
3640
  *
3641
- * @param script JavaScript/TypeScript script
3642
- * @returns Set of variable names
3641
+ * @param script from which to extract the variables
3642
+ * @returns the list of variable names
3643
3643
  * @throws {ParseError} if the script is invalid
3644
- * @public exported from `@promptbook/utils` <- Note: [👖] This is usable elsewhere than in Promptbook, so keeping in utils
3644
+ * @public exported from `@promptbook/execute-javascript`
3645
3645
  */
3646
- function extractVariablesFromScript(script) {
3647
- if (script.trim() === '') {
3648
- return new Set();
3649
- }
3646
+ function extractVariablesFromJavascript(script) {
3650
3647
  const variables = new Set();
3651
- // JS keywords and builtins to exclude
3652
- const exclude = new Set([
3653
- // Keywords
3654
- 'break',
3655
- 'case',
3656
- 'catch',
3657
- 'class',
3658
- 'const',
3659
- 'continue',
3660
- 'debugger',
3661
- 'default',
3662
- 'delete',
3663
- 'do',
3664
- 'else',
3665
- 'export',
3666
- 'extends',
3667
- 'false',
3668
- 'finally',
3669
- 'for',
3670
- 'function',
3671
- 'if',
3672
- 'import',
3673
- 'in',
3674
- 'instanceof',
3675
- 'let',
3676
- 'new',
3677
- 'null',
3678
- 'return',
3679
- 'super',
3680
- 'switch',
3681
- 'this',
3682
- 'throw',
3683
- 'true',
3684
- 'try',
3685
- 'typeof',
3686
- 'var',
3687
- 'void',
3688
- 'while',
3689
- 'with',
3690
- 'yield',
3691
- // Common globals
3692
- 'console',
3693
- 'JSON',
3694
- 'Error',
3695
- // Typescript types
3696
- 'string',
3697
- 'number',
3698
- 'boolean',
3699
- 'object',
3700
- 'symbol',
3701
- // Common methods on built-in objects
3702
- 'test',
3703
- 'match',
3704
- 'exec',
3705
- 'replace',
3706
- 'search',
3707
- 'split',
3708
- ]);
3648
+ const originalScript = script;
3649
+ script = `(()=>{${script}})()`;
3709
3650
  try {
3710
- // Note: Extract variables from template literals like ${variable}
3711
- const templateRegex = /\$\{([a-zA-Z_$][a-zA-Z0-9_$]*)\}/g;
3712
- let match;
3713
- while ((match = templateRegex.exec(script)) !== null) {
3714
- const varName = match[1];
3715
- if (!exclude.has(varName)) {
3716
- variables.add(varName);
3651
+ for (let i = 0; i < LOOP_LIMIT; i++)
3652
+ try {
3653
+ eval(script); // <- TODO: Use `JavascriptExecutionTools.execute` here
3717
3654
  }
3718
- }
3719
- // Note: Process the script to handle normal variable usage
3720
- const processedScript = script
3721
- .replace(/'(?:\\.|[^'\\])*'/g, "''") // <- Note: Remove string literals
3722
- .replace(/"(?:\\.|[^"\\])*"/g, '""')
3723
- .replace(/`(?:\\.|[^`\\])*`/g, '``')
3724
- .replace(/\/(?:\\.|[^/\\])*\/[gimsuy]*/g, '{}'); // <- Note: Remove regex literals
3725
- // Note: Find identifiers in function arguments
3726
- const funcArgRegex = /\b([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(/g;
3727
- const funcNames = new Set();
3728
- while ((match = funcArgRegex.exec(processedScript)) !== null) {
3729
- funcNames.add(match[1]);
3730
- }
3731
- // Find variable declarations to exclude them
3732
- const declaredVars = new Set();
3733
- const declRegex = /\b(const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\b/g;
3734
- while ((match = declRegex.exec(processedScript)) !== null) {
3735
- declaredVars.add(match[2]);
3736
- }
3737
- // Note: Find identifiers in the script
3738
- const identifierRegex = /\b([a-zA-Z_$][a-zA-Z0-9_$]*)\b/g;
3739
- while ((match = identifierRegex.exec(processedScript)) !== null) {
3740
- const name = match[1];
3741
- // Add if not excluded, not a function name, and not a declared variable
3742
- if (!exclude.has(name) && !funcNames.has(name) && !declaredVars.has(name)) {
3743
- variables.add(name);
3655
+ catch (error) {
3656
+ if (!(error instanceof ReferenceError)) {
3657
+ throw error;
3658
+ }
3659
+ /*
3660
+ Note: Parsing the error
3661
+ 🌟 Most devices:
3662
+ [PipelineUrlError: thing is not defined]
3663
+
3664
+ 🍏 iPhone`s Safari:
3665
+ [PipelineUrlError: Can't find variable: thing]
3666
+ */
3667
+ let variableName = undefined;
3668
+ if (error.message.startsWith(`Can't`)) {
3669
+ // 🍏 Case
3670
+ variableName = error.message.split(' ').pop();
3671
+ }
3672
+ else {
3673
+ // 🌟 Case
3674
+ variableName = error.message.split(' ').shift();
3675
+ }
3676
+ if (variableName === undefined) {
3677
+ throw error;
3678
+ }
3679
+ if (script.includes(variableName + '(')) {
3680
+ script = `const ${variableName} = ()=>'';` + script;
3681
+ }
3682
+ else {
3683
+ variables.add(variableName);
3684
+ script = `const ${variableName} = '';` + script;
3685
+ }
3744
3686
  }
3745
- }
3746
3687
  }
3747
3688
  catch (error) {
3748
3689
  if (!(error instanceof Error)) {
3749
3690
  throw error;
3750
3691
  }
3751
- throw new ParseError(spaceTrim((block) => `
3692
+ throw new ParseError(spaceTrim$1((block) => `
3752
3693
  Can not extract variables from the script
3753
3694
  ${block(error.stack || error.message)}
3754
3695
 
@@ -3761,7 +3702,7 @@ function extractVariablesFromScript(script) {
3761
3702
  The script:
3762
3703
 
3763
3704
  \`\`\`javascript
3764
- ${block(script)}
3705
+ ${block(originalScript)}
3765
3706
  \`\`\`
3766
3707
  `));
3767
3708
  }
@@ -3782,19 +3723,24 @@ function extractVariablesFromScript(script) {
3782
3723
  function extractParameterNamesFromTask(task) {
3783
3724
  const { title, description, taskType, content, preparedContent, jokerParameterNames, foreach } = task;
3784
3725
  const parameterNames = new Set();
3726
+ let contentParameters;
3727
+ if (taskType !== 'SCRIPT_TASK') {
3728
+ contentParameters = extractParameterNames(content);
3729
+ }
3730
+ else {
3731
+ // TODO: What if script is not javascript?
3732
+ // const { contentLanguage } = task;
3733
+ // if (contentLanguage !== 'javascript') {
3734
+ contentParameters = extractVariablesFromJavascript(content);
3735
+ }
3785
3736
  for (const parameterName of [
3786
3737
  ...extractParameterNames(title),
3787
3738
  ...extractParameterNames(description || ''),
3788
- ...extractParameterNames(content),
3739
+ ...contentParameters,
3789
3740
  ...extractParameterNames(preparedContent || ''),
3790
3741
  ]) {
3791
3742
  parameterNames.add(parameterName);
3792
3743
  }
3793
- if (taskType === 'SCRIPT_TASK') {
3794
- for (const parameterName of extractVariablesFromScript(content)) {
3795
- parameterNames.add(parameterName);
3796
- }
3797
- }
3798
3744
  for (const jokerName of jokerParameterNames || []) {
3799
3745
  parameterNames.add(jokerName);
3800
3746
  }