@promptbook/markitdown 0.89.0-9 → 0.89.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +9 -11
  2. package/esm/index.es.js +131 -39
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/servers.d.ts +40 -0
  5. package/esm/typings/src/_packages/core.index.d.ts +8 -4
  6. package/esm/typings/src/_packages/types.index.d.ts +18 -0
  7. package/esm/typings/src/_packages/utils.index.d.ts +4 -0
  8. package/esm/typings/src/cli/cli-commands/login.d.ts +0 -1
  9. package/esm/typings/src/cli/common/$provideLlmToolsForCli.d.ts +16 -3
  10. package/esm/typings/src/cli/test/ptbk.d.ts +1 -1
  11. package/esm/typings/src/commands/EXPECT/expectCommandParser.d.ts +2 -0
  12. package/esm/typings/src/config.d.ts +10 -19
  13. package/esm/typings/src/errors/0-index.d.ts +7 -4
  14. package/esm/typings/src/errors/PipelineExecutionError.d.ts +1 -1
  15. package/esm/typings/src/errors/WrappedError.d.ts +10 -0
  16. package/esm/typings/src/errors/assertsError.d.ts +11 -0
  17. package/esm/typings/src/execution/PromptbookFetch.d.ts +1 -1
  18. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +9 -0
  19. package/esm/typings/src/formats/csv/utils/isValidCsvString.test.d.ts +1 -0
  20. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +3 -0
  21. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +9 -0
  22. package/esm/typings/src/formats/xml/utils/isValidXmlString.test.d.ts +1 -0
  23. package/esm/typings/src/llm-providers/_common/register/{$provideEnvFilepath.d.ts → $provideEnvFilename.d.ts} +2 -2
  24. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +1 -1
  25. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForTestingAndScriptsAndPlayground.d.ts +1 -1
  26. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForWizzardOrCli.d.ts +11 -2
  27. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsFromEnv.d.ts +1 -1
  28. package/esm/typings/src/remote-server/openapi-types.d.ts +284 -0
  29. package/esm/typings/src/remote-server/openapi.d.ts +187 -0
  30. package/esm/typings/src/remote-server/socket-types/_subtypes/Identification.d.ts +7 -1
  31. package/esm/typings/src/remote-server/socket-types/_subtypes/identificationToPromptbookToken.d.ts +11 -0
  32. package/esm/typings/src/remote-server/socket-types/_subtypes/promptbookTokenToIdentification.d.ts +10 -0
  33. package/esm/typings/src/remote-server/startRemoteServer.d.ts +1 -2
  34. package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +15 -9
  35. package/esm/typings/src/storage/env-storage/$EnvStorage.d.ts +40 -0
  36. package/esm/typings/src/types/typeAliases.d.ts +26 -0
  37. package/package.json +9 -5
  38. package/umd/index.umd.js +131 -39
  39. package/umd/index.umd.js.map +1 -1
  40. package/esm/typings/src/cli/test/ptbk2.d.ts +0 -5
package/README.md CHANGED
@@ -23,10 +23,6 @@
23
23
 
24
24
 
25
25
 
26
- <blockquote style="color: #ff8811">
27
- <b>⚠ Warning:</b> This is a pre-release version of the library. It is not yet ready for production use. Please look at <a href="https://www.npmjs.com/package/@promptbook/core?activeTab=versions">latest stable release</a>.
28
- </blockquote>
29
-
30
26
  ## 📦 Package `@promptbook/markitdown`
31
27
 
32
28
  - Promptbooks are [divided into several](#-packages) packages, all are published from [single monorepo](https://github.com/webgptorg/promptbook).
@@ -244,6 +240,10 @@ But unlike programming languages, it is designed to be understandable by non-pro
244
240
 
245
241
 
246
242
 
243
+ ## 🔒 Security
244
+
245
+ For information on reporting security vulnerabilities, see our [Security Policy](./SECURITY.md).
246
+
247
247
  ## 📦 Packages _(for developers)_
248
248
 
249
249
  This library is divided into several packages, all are published from [single monorepo](https://github.com/webgptorg/promptbook).
@@ -300,7 +300,7 @@ The following glossary is used to clarify certain concepts:
300
300
  ### General LLM / AI terms
301
301
 
302
302
  - **Prompt drift** is a phenomenon where the AI model starts to generate outputs that are not aligned with the original prompt. This can happen due to the model's training data, the prompt's wording, or the model's architecture.
303
- - **Pipeline, workflow or chain** is a sequence of tasks that are executed in a specific order. In the context of AI, a pipeline can refer to a sequence of AI models that are used to process data.
303
+ - [**Pipeline, workflow scenario or chain** is a sequence of tasks that are executed in a specific order. In the context of AI, a pipeline can refer to a sequence of AI models that are used to process data.](https://github.com/webgptorg/promptbook/discussions/88)
304
304
  - **Fine-tuning** is a process where a pre-trained AI model is further trained on a specific dataset to improve its performance on a specific task.
305
305
  - **Zero-shot learning** is a machine learning paradigm where a model is trained to perform a task without any labeled examples. Instead, the model is provided with a description of the task and is expected to generate the correct output.
306
306
  - **Few-shot learning** is a machine learning paradigm where a model is trained to perform a task with only a few labeled examples. This is in contrast to traditional machine learning, where models are trained on large datasets.
@@ -308,10 +308,6 @@ The following glossary is used to clarify certain concepts:
308
308
  - **Retrieval-augmented generation** is a machine learning paradigm where a model generates text by retrieving relevant information from a large database of text. This approach combines the benefits of generative models and retrieval models.
309
309
  - **Longtail** refers to non-common or rare events, items, or entities that are not well-represented in the training data of machine learning models. Longtail items are often challenging for models to predict accurately.
310
310
 
311
-
312
-
313
-
314
-
315
311
  _Note: This section is not complete dictionary, more list of general AI / LLM terms that has connection with Promptbook_
316
312
 
317
313
 
@@ -425,6 +421,8 @@ See [TODO.md](./TODO.md)
425
421
 
426
422
  ## 🖋️ Contributing
427
423
 
428
- We are open to pull requests, feedback, and suggestions.
424
+ You can also ⭐ star the project, [follow us on GitHub](https://github.com/hejny) or [various other social networks](https://www.pavolhejny.com/contact/).We are open to [pull requests, feedback, and suggestions](./CONTRIBUTING.md).
425
+
426
+ ## 📞 Support
429
427
 
430
- You can also star the project, [follow us on GitHub](https://github.com/hejny) or [various other social networks](https://www.pavolhejny.com/contact/).
428
+ If you need help or have questions, please check our [Support Resources](./SUPPORT.md).
package/esm/index.es.js CHANGED
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.89.0-9';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.89.0';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -89,6 +89,7 @@ const ADMIN_EMAIL = 'pavol@ptbk.io';
89
89
  * @public exported from `@promptbook/core`
90
90
  */
91
91
  const ADMIN_GITHUB_NAME = 'hejny';
92
+ // <- TODO: [🐊] Pick the best claim
92
93
  /**
93
94
  * When the title is not provided, the default title is used
94
95
  *
@@ -121,6 +122,7 @@ const VALUE_STRINGS = {
121
122
  infinity: '(infinity; ∞)',
122
123
  negativeInfinity: '(negative infinity; -∞)',
123
124
  unserializable: '(unserializable value)',
125
+ circular: '(circular JSON)',
124
126
  };
125
127
  /**
126
128
  * Small number limit
@@ -160,7 +162,7 @@ const DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️]
160
162
  */
161
163
  const DEFAULT_MAX_EXECUTION_ATTEMPTS = 10; // <- TODO: [🤹‍♂️]
162
164
  // <- TODO: [🕝] Make also `BOOKS_DIRNAME_ALTERNATIVES`
163
- // TODO: !!!!!! Just .promptbook dir, hardocode others
165
+ // TODO: Just `.promptbook` in config, hardcode subfolders like `download-cache` or `execution-cache`
164
166
  /**
165
167
  * Where to store the temporary downloads
166
168
  *
@@ -878,9 +880,60 @@ class ParseError extends Error {
878
880
  * TODO: Maybe split `ParseError` and `ApplyError`
879
881
  */
880
882
 
883
+ /**
884
+ * This error type indicates that somewhere in the code non-Error object was thrown and it was wrapped into the `WrappedError`
885
+ *
886
+ * @public exported from `@promptbook/core`
887
+ */
888
+ class WrappedError extends Error {
889
+ constructor(whatWasThrown) {
890
+ const tag = `[🤮]`;
891
+ console.error(tag, whatWasThrown);
892
+ super(spaceTrim$1(`
893
+ Non-Error object was thrown
894
+
895
+ Note: Look for ${tag} in the console for more details
896
+ Please report issue on ${ADMIN_EMAIL}
897
+ `));
898
+ this.name = 'WrappedError';
899
+ Object.setPrototypeOf(this, WrappedError.prototype);
900
+ }
901
+ }
902
+
903
+ /**
904
+ * Helper used in catch blocks to assert that the error is an instance of `Error`
905
+ *
906
+ * @param whatWasThrown Any object that was thrown
907
+ * @returns Nothing if the error is an instance of `Error`
908
+ * @throws `WrappedError` or `UnexpectedError` if the error is not standard
909
+ *
910
+ * @private within the repository
911
+ */
912
+ function assertsError(whatWasThrown) {
913
+ // Case 1: Handle error which was rethrown as `WrappedError`
914
+ if (whatWasThrown instanceof WrappedError) {
915
+ const wrappedError = whatWasThrown;
916
+ throw wrappedError;
917
+ }
918
+ // Case 2: Handle unexpected errors
919
+ if (whatWasThrown instanceof UnexpectedError) {
920
+ const unexpectedError = whatWasThrown;
921
+ throw unexpectedError;
922
+ }
923
+ // Case 3: Handle standard errors - keep them up to consumer
924
+ if (whatWasThrown instanceof Error) {
925
+ return;
926
+ }
927
+ // Case 4: Handle non-standard errors - wrap them into `WrappedError` and throw
928
+ throw new WrappedError(whatWasThrown);
929
+ }
930
+
881
931
  /**
882
932
  * Function isValidJsonString will tell you if the string is valid JSON or not
883
933
  *
934
+ * @param value The string to check
935
+ * @returns True if the string is a valid JSON string, false otherwise
936
+ *
884
937
  * @public exported from `@promptbook/utils`
885
938
  */
886
939
  function isValidJsonString(value /* <- [👨‍⚖️] */) {
@@ -889,9 +942,7 @@ function isValidJsonString(value /* <- [👨‍⚖️] */) {
889
942
  return true;
890
943
  }
891
944
  catch (error) {
892
- if (!(error instanceof Error)) {
893
- throw error;
894
- }
945
+ assertsError(error);
895
946
  if (error.message.includes('Unexpected token')) {
896
947
  return false;
897
948
  }
@@ -1244,9 +1295,7 @@ function checkSerializableAsJson(options) {
1244
1295
  JSON.stringify(value); // <- TODO: [0]
1245
1296
  }
1246
1297
  catch (error) {
1247
- if (!(error instanceof Error)) {
1248
- throw error;
1249
- }
1298
+ assertsError(error);
1250
1299
  throw new UnexpectedError(spaceTrim((block) => `
1251
1300
  \`${name}\` is not serializable
1252
1301
 
@@ -2035,7 +2084,7 @@ class PipelineExecutionError extends Error {
2035
2084
  }
2036
2085
  }
2037
2086
  /**
2038
- * TODO: !!!!!! Add id to all errors
2087
+ * TODO: [🧠][🌂] Add id to all errors
2039
2088
  */
2040
2089
 
2041
2090
  /**
@@ -2246,7 +2295,10 @@ const PROMPTBOOK_ERRORS = {
2246
2295
  PipelineExecutionError,
2247
2296
  PipelineLogicError,
2248
2297
  PipelineUrlError,
2298
+ AuthenticationError,
2299
+ PromptbookFetchError,
2249
2300
  UnexpectedError,
2301
+ WrappedError,
2250
2302
  // TODO: [🪑]> VersionMismatchError,
2251
2303
  };
2252
2304
  /**
@@ -2263,8 +2315,6 @@ const COMMON_JAVASCRIPT_ERRORS = {
2263
2315
  TypeError,
2264
2316
  URIError,
2265
2317
  AggregateError,
2266
- AuthenticationError,
2267
- PromptbookFetchError,
2268
2318
  /*
2269
2319
  Note: Not widely supported
2270
2320
  > InternalError,
@@ -2387,8 +2437,8 @@ function createTask(options) {
2387
2437
  updatedAt = new Date();
2388
2438
  errors.push(...executionResult.errors);
2389
2439
  warnings.push(...executionResult.warnings);
2390
- // <- TODO: !!! Only unique errors and warnings should be added (or filtered)
2391
- // TODO: [🧠] !!! errors, warning, isSuccessful are redundant both in `ExecutionTask` and `ExecutionTask.currentValue`
2440
+ // <- TODO: [🌂] Only unique errors and warnings should be added (or filtered)
2441
+ // TODO: [🧠] !! errors, warning, isSuccessful are redundant both in `ExecutionTask` and `ExecutionTask.currentValue`
2392
2442
  // Also maybe move `ExecutionTask.currentValue.usage` -> `ExecutionTask.usage`
2393
2443
  // And delete `ExecutionTask.currentValue.preparedPipeline`
2394
2444
  assertsTaskSuccessful(executionResult);
@@ -2398,6 +2448,7 @@ function createTask(options) {
2398
2448
  partialResultSubject.next(executionResult);
2399
2449
  }
2400
2450
  catch (error) {
2451
+ assertsError(error);
2401
2452
  status = 'ERROR';
2402
2453
  errors.push(error);
2403
2454
  partialResultSubject.error(error);
@@ -2789,14 +2840,15 @@ class MultipleLlmExecutionTools {
2789
2840
  }
2790
2841
  }
2791
2842
  catch (error) {
2792
- if (!(error instanceof Error) || error instanceof UnexpectedError) {
2843
+ assertsError(error);
2844
+ if (error instanceof UnexpectedError) {
2793
2845
  throw error;
2794
2846
  }
2795
2847
  errors.push({ llmExecutionTools, error });
2796
2848
  }
2797
2849
  }
2798
2850
  if (errors.length === 1) {
2799
- throw errors[0];
2851
+ throw errors[0].error;
2800
2852
  }
2801
2853
  else if (errors.length > 1) {
2802
2854
  throw new PipelineExecutionError(
@@ -3251,9 +3303,7 @@ const promptbookFetch = async (urlOrRequest, init) => {
3251
3303
  return await fetch(urlOrRequest, init);
3252
3304
  }
3253
3305
  catch (error) {
3254
- if (!(error instanceof Error)) {
3255
- throw error;
3256
- }
3306
+ assertsError(error);
3257
3307
  let url;
3258
3308
  if (typeof urlOrRequest === 'string') {
3259
3309
  url = urlOrRequest;
@@ -3484,9 +3534,7 @@ async function prepareKnowledgePieces(knowledgeSources, tools, options) {
3484
3534
  knowledgePreparedUnflatten[index] = pieces;
3485
3535
  }
3486
3536
  catch (error) {
3487
- if (!(error instanceof Error)) {
3488
- throw error;
3489
- }
3537
+ assertsError(error);
3490
3538
  console.warn(error);
3491
3539
  // <- TODO: [🏮] Some standard way how to transform errors into warnings and how to handle non-critical fails during the tasks
3492
3540
  }
@@ -3778,13 +3826,19 @@ function valueToString(value) {
3778
3826
  return value.toISOString();
3779
3827
  }
3780
3828
  else {
3781
- return JSON.stringify(value);
3829
+ try {
3830
+ return JSON.stringify(value);
3831
+ }
3832
+ catch (error) {
3833
+ if (error instanceof TypeError && error.message.includes('circular structure')) {
3834
+ return VALUE_STRINGS.circular;
3835
+ }
3836
+ throw error;
3837
+ }
3782
3838
  }
3783
3839
  }
3784
3840
  catch (error) {
3785
- if (!(error instanceof Error)) {
3786
- throw error;
3787
- }
3841
+ assertsError(error);
3788
3842
  console.error(error);
3789
3843
  return VALUE_STRINGS.unserializable;
3790
3844
  }
@@ -3841,9 +3895,7 @@ function extractVariablesFromJavascript(script) {
3841
3895
  }
3842
3896
  }
3843
3897
  catch (error) {
3844
- if (!(error instanceof Error)) {
3845
- throw error;
3846
- }
3898
+ assertsError(error);
3847
3899
  throw new ParseError(spaceTrim$1((block) => `
3848
3900
  Can not extract variables from the script
3849
3901
  ${block(error.stack || error.message)}
@@ -3962,6 +4014,28 @@ const MANDATORY_CSV_SETTINGS = Object.freeze({
3962
4014
  // encoding: 'utf-8',
3963
4015
  });
3964
4016
 
4017
+ /**
4018
+ * Function to check if a string is valid CSV
4019
+ *
4020
+ * @param value The string to check
4021
+ * @returns True if the string is a valid CSV string, false otherwise
4022
+ *
4023
+ * @public exported from `@promptbook/utils`
4024
+ */
4025
+ function isValidCsvString(value) {
4026
+ try {
4027
+ // A simple check for CSV format: at least one comma and no invalid characters
4028
+ if (value.includes(',') && /^[\w\s,"']+$/.test(value)) {
4029
+ return true;
4030
+ }
4031
+ return false;
4032
+ }
4033
+ catch (error) {
4034
+ assertsError(error);
4035
+ return false;
4036
+ }
4037
+ }
4038
+
3965
4039
  /**
3966
4040
  * Definition for CSV spreadsheet
3967
4041
  *
@@ -3972,7 +4046,7 @@ const CsvFormatDefinition = {
3972
4046
  formatName: 'CSV',
3973
4047
  aliases: ['SPREADSHEET', 'TABLE'],
3974
4048
  isValid(value, settings, schema) {
3975
- return true;
4049
+ return isValidCsvString(value);
3976
4050
  },
3977
4051
  canBeValid(partialValue, settings, schema) {
3978
4052
  return true;
@@ -4126,6 +4200,30 @@ const TextFormatDefinition = {
4126
4200
  * TODO: [🏢] Allow to expect something inside each item of list and other formats
4127
4201
  */
4128
4202
 
4203
+ /**
4204
+ * Function to check if a string is valid XML
4205
+ *
4206
+ * @param value
4207
+ * @returns True if the string is a valid XML string, false otherwise
4208
+ *
4209
+ * @public exported from `@promptbook/utils`
4210
+ */
4211
+ function isValidXmlString(value) {
4212
+ try {
4213
+ const parser = new DOMParser();
4214
+ const parsedDocument = parser.parseFromString(value, 'application/xml');
4215
+ const parserError = parsedDocument.getElementsByTagName('parsererror');
4216
+ if (parserError.length > 0) {
4217
+ return false;
4218
+ }
4219
+ return true;
4220
+ }
4221
+ catch (error) {
4222
+ assertsError(error);
4223
+ return false;
4224
+ }
4225
+ }
4226
+
4129
4227
  /**
4130
4228
  * Definition for XML format
4131
4229
  *
@@ -4135,7 +4233,7 @@ const XmlFormatDefinition = {
4135
4233
  formatName: 'XML',
4136
4234
  mimeType: 'application/xml',
4137
4235
  isValid(value, settings, schema) {
4138
- return true;
4236
+ return isValidXmlString(value);
4139
4237
  },
4140
4238
  canBeValid(partialValue, settings, schema) {
4141
4239
  return true;
@@ -4708,9 +4806,7 @@ async function executeAttempts(options) {
4708
4806
  break scripts;
4709
4807
  }
4710
4808
  catch (error) {
4711
- if (!(error instanceof Error)) {
4712
- throw error;
4713
- }
4809
+ assertsError(error);
4714
4810
  if (error instanceof UnexpectedError) {
4715
4811
  throw error;
4716
4812
  }
@@ -4780,9 +4876,7 @@ async function executeAttempts(options) {
4780
4876
  break scripts;
4781
4877
  }
4782
4878
  catch (error) {
4783
- if (!(error instanceof Error)) {
4784
- throw error;
4785
- }
4879
+ assertsError(error);
4786
4880
  if (error instanceof UnexpectedError) {
4787
4881
  throw error;
4788
4882
  }
@@ -5403,9 +5497,7 @@ async function executePipeline(options) {
5403
5497
  await Promise.all(resolving);
5404
5498
  }
5405
5499
  catch (error /* <- Note: [3] */) {
5406
- if (!(error instanceof Error)) {
5407
- throw error;
5408
- }
5500
+ assertsError(error);
5409
5501
  // Note: No need to rethrow UnexpectedError
5410
5502
  // if (error instanceof UnexpectedError) {
5411
5503
  // Note: Count usage, [🧠] Maybe put to separate function executionReportJsonToUsage + DRY [🤹‍♂️]