@promptbook/markitdown 0.103.0-55 → 0.103.0-56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { mkdir, rm, readFile } from 'fs/promises';
2
- import spaceTrim, { spaceTrim as spaceTrim$1 } from 'spacetrim';
2
+ import spaceTrim$2, { spaceTrim as spaceTrim$1 } from 'spacetrim';
3
3
  import { SHA256 } from 'crypto-js';
4
4
  import hexEncoder from 'crypto-js/enc-hex';
5
5
  import { basename, join, dirname, isAbsolute } from 'path';
@@ -24,7 +24,7 @@ const BOOK_LANGUAGE_VERSION = '2.0.0';
24
24
  * @generated
25
25
  * @see https://github.com/webgptorg/promptbook
26
26
  */
27
- const PROMPTBOOK_ENGINE_VERSION = '0.103.0-55';
27
+ const PROMPTBOOK_ENGINE_VERSION = '0.103.0-56';
28
28
  /**
29
29
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
30
30
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -47,6 +47,17 @@ const PROMPTBOOK_ENGINE_VERSION = '0.103.0-55';
47
47
  function keepUnused(...valuesToKeep) {
48
48
  }
49
49
 
50
+ /**
51
+ * Trims string from all 4 sides
52
+ *
53
+ * Note: This is a re-exported function from the `spacetrim` package which is
54
+ * Developed by same author @hejny as this package
55
+ *
56
+ * @public exported from `@promptbook/utils`
57
+ * @see https://github.com/hejny/spacetrim#usage
58
+ */
59
+ const spaceTrim = spaceTrim$1;
60
+
50
61
  /**
51
62
  * @private util of `@promptbook/color`
52
63
  * @de
@@ -95,6 +106,7 @@ function take(initialValue) {
95
106
  * @public exported from `@promptbook/color`
96
107
  */
97
108
  const CSS_COLORS = {
109
+ promptbook: '#79EAFD',
98
110
  transparent: 'rgba(0,0,0,0)',
99
111
  aliceblue: '#f0f8ff',
100
112
  antiquewhite: '#faebd7',
@@ -310,6 +322,28 @@ class Color {
310
322
  throw new Error(`Can not create color from given object`);
311
323
  }
312
324
  }
325
+ /**
326
+ * Creates a new Color instance from miscellaneous formats
327
+ * It just does not throw error when it fails, it returns PROMPTBOOK_COLOR instead
328
+ *
329
+ * @param color
330
+ * @returns Color object
331
+ */
332
+ static fromSafe(color) {
333
+ try {
334
+ return Color.from(color);
335
+ }
336
+ catch (error) {
337
+ // <- Note: Can not use `assertsError(error)` here because it causes circular dependency
338
+ console.warn(spaceTrim((block) => `
339
+ Color.fromSafe error:
340
+ ${block(error.message)}
341
+
342
+ Returning default PROMPTBOOK_COLOR.
343
+ `));
344
+ return Color.fromString('promptbook');
345
+ }
346
+ }
313
347
  /**
314
348
  * Creates a new Color instance from miscellaneous string formats
315
349
  *
@@ -919,7 +953,7 @@ const ADMIN_GITHUB_NAME = 'hejny';
919
953
  *
920
954
  * @public exported from `@promptbook/core`
921
955
  */
922
- const PROMPTBOOK_COLOR = Color.fromHex('#79EAFD');
956
+ const PROMPTBOOK_COLOR = Color.fromString('promptbook');
923
957
  // <- TODO: [🧠][🈵] Using `Color` here increases the package size approx 3kb, maybe remove it
924
958
  /**
925
959
  * Colors for syntax highlighting in the `<BookEditor/>`
@@ -1143,7 +1177,7 @@ class KnowledgeScrapeError extends Error {
1143
1177
  function getErrorReportUrl(error) {
1144
1178
  const report = {
1145
1179
  title: `🐜 Error report from ${NAME}`,
1146
- body: spaceTrim((block) => `
1180
+ body: spaceTrim$2((block) => `
1147
1181
 
1148
1182
 
1149
1183
  \`${error.name || 'Error'}\` has occurred in the [${NAME}], please look into it @${ADMIN_GITHUB_NAME}.
@@ -2040,7 +2074,7 @@ function pipelineJsonToString(pipelineJson) {
2040
2074
  pipelineString += '\n\n';
2041
2075
  pipelineString += '```' + contentLanguage;
2042
2076
  pipelineString += '\n';
2043
- pipelineString += spaceTrim(content);
2077
+ pipelineString += spaceTrim$2(content);
2044
2078
  // <- TODO: [main] !!3 Escape
2045
2079
  // <- TODO: [🧠] Some clear strategy how to spaceTrim the blocks
2046
2080
  pipelineString += '\n';
@@ -2161,7 +2195,7 @@ function checkSerializableAsJson(options) {
2161
2195
  }
2162
2196
  else if (typeof value === 'object') {
2163
2197
  if (value instanceof Date) {
2164
- throw new UnexpectedError(spaceTrim((block) => `
2198
+ throw new UnexpectedError(spaceTrim$2((block) => `
2165
2199
  \`${name}\` is Date
2166
2200
 
2167
2201
  Use \`string_date_iso8601\` instead
@@ -2180,7 +2214,7 @@ function checkSerializableAsJson(options) {
2180
2214
  throw new UnexpectedError(`${name} is RegExp`);
2181
2215
  }
2182
2216
  else if (value instanceof Error) {
2183
- throw new UnexpectedError(spaceTrim((block) => `
2217
+ throw new UnexpectedError(spaceTrim$2((block) => `
2184
2218
  \`${name}\` is unserialized Error
2185
2219
 
2186
2220
  Use function \`serializeError\`
@@ -2203,7 +2237,7 @@ function checkSerializableAsJson(options) {
2203
2237
  }
2204
2238
  catch (error) {
2205
2239
  assertsError(error);
2206
- throw new UnexpectedError(spaceTrim((block) => `
2240
+ throw new UnexpectedError(spaceTrim$2((block) => `
2207
2241
  \`${name}\` is not serializable
2208
2242
 
2209
2243
  ${block(error.stack || error.message)}
@@ -2235,7 +2269,7 @@ function checkSerializableAsJson(options) {
2235
2269
  }
2236
2270
  }
2237
2271
  else {
2238
- throw new UnexpectedError(spaceTrim((block) => `
2272
+ throw new UnexpectedError(spaceTrim$2((block) => `
2239
2273
  \`${name}\` is unknown type
2240
2274
 
2241
2275
  Additional message for \`${name}\`:
@@ -3266,7 +3300,7 @@ function serializeError(error) {
3266
3300
  const { name, message, stack } = error;
3267
3301
  const { id } = error;
3268
3302
  if (!Object.keys(ALL_ERRORS).includes(name)) {
3269
- console.error(spaceTrim((block) => `
3303
+ console.error(spaceTrim$2((block) => `
3270
3304
 
3271
3305
  Cannot serialize error with name "${name}"
3272
3306
 
@@ -3299,7 +3333,7 @@ function jsonParse(value) {
3299
3333
  }
3300
3334
  else if (typeof value !== 'string') {
3301
3335
  console.error('Can not parse JSON from non-string value.', { text: value });
3302
- throw new Error(spaceTrim(`
3336
+ throw new Error(spaceTrim$2(`
3303
3337
  Can not parse JSON from non-string value.
3304
3338
 
3305
3339
  The value type: ${typeof value}
@@ -3313,7 +3347,7 @@ function jsonParse(value) {
3313
3347
  if (!(error instanceof Error)) {
3314
3348
  throw error;
3315
3349
  }
3316
- throw new Error(spaceTrim((block) => `
3350
+ throw new Error(spaceTrim$2((block) => `
3317
3351
  ${block(error.message)}
3318
3352
 
3319
3353
  The expected JSON text:
@@ -3366,7 +3400,7 @@ function deserializeError(error) {
3366
3400
  message = `${name}: ${message}`;
3367
3401
  }
3368
3402
  if (stack !== undefined && stack !== '') {
3369
- message = spaceTrim((block) => `
3403
+ message = spaceTrim$2((block) => `
3370
3404
  ${block(message)}
3371
3405
 
3372
3406
  Original stack trace:
@@ -3878,14 +3912,14 @@ class MultipleLlmExecutionTools {
3878
3912
  if (description === undefined) {
3879
3913
  return headLine;
3880
3914
  }
3881
- return spaceTrim((block) => `
3915
+ return spaceTrim$2((block) => `
3882
3916
  ${headLine}
3883
3917
 
3884
3918
  ${ /* <- Note: Indenting the description: */block(description)}
3885
3919
  `);
3886
3920
  })
3887
3921
  .join('\n\n');
3888
- return spaceTrim((block) => `
3922
+ return spaceTrim$2((block) => `
3889
3923
  Multiple LLM Providers:
3890
3924
 
3891
3925
  ${block(innerModelsTitlesAndDescriptions)}
@@ -3976,7 +4010,7 @@ class MultipleLlmExecutionTools {
3976
4010
  // 1) OpenAI throw PipelineExecutionError: Parameter `{knowledge}` is not defined
3977
4011
  // 2) AnthropicClaude throw PipelineExecutionError: Parameter `{knowledge}` is not defined
3978
4012
  // 3) ...
3979
- spaceTrim((block) => `
4013
+ spaceTrim$2((block) => `
3980
4014
  All execution tools of ${this.title} failed:
3981
4015
 
3982
4016
  ${block(errors
@@ -3989,7 +4023,7 @@ class MultipleLlmExecutionTools {
3989
4023
  throw new PipelineExecutionError(`You have not provided any \`LlmExecutionTools\` into ${this.title}`);
3990
4024
  }
3991
4025
  else {
3992
- throw new PipelineExecutionError(spaceTrim((block) => `
4026
+ throw new PipelineExecutionError(spaceTrim$2((block) => `
3993
4027
  You have not provided any \`LlmExecutionTools\` that support model variant "${prompt.modelRequirements.modelVariant}" into ${this.title}
3994
4028
 
3995
4029
  Available \`LlmExecutionTools\`:
@@ -4022,7 +4056,7 @@ class MultipleLlmExecutionTools {
4022
4056
  */
4023
4057
  function joinLlmExecutionTools(title, ...llmExecutionTools) {
4024
4058
  if (llmExecutionTools.length === 0) {
4025
- const warningMessage = spaceTrim(`
4059
+ const warningMessage = spaceTrim$2(`
4026
4060
  You have not provided any \`LlmExecutionTools\`
4027
4061
  This means that you won't be able to execute any prompts that require large language models like GPT-4 or Anthropic's Claude.
4028
4062
 
@@ -4339,14 +4373,14 @@ function $registeredScrapersMessage(availableScrapers) {
4339
4373
  return { ...metadata, isMetadataAviailable, isInstalled, isAvailableInTools };
4340
4374
  });
4341
4375
  if (metadata.length === 0) {
4342
- return spaceTrim(`
4376
+ return spaceTrim$2(`
4343
4377
  **No scrapers are available**
4344
4378
 
4345
4379
  This is a unexpected behavior, you are probably using some broken version of Promptbook
4346
4380
  At least there should be available the metadata of the scrapers
4347
4381
  `);
4348
4382
  }
4349
- return spaceTrim((block) => `
4383
+ return spaceTrim$2((block) => `
4350
4384
  Available scrapers are:
4351
4385
  ${block(metadata
4352
4386
  .map(({ packageName, className, isMetadataAviailable, isInstalled, mimeTypes, isAvailableInBrowser, isAvailableInTools, }, i) => {
@@ -4457,7 +4491,7 @@ const promptbookFetch = async (urlOrRequest, init) => {
4457
4491
  else if (urlOrRequest instanceof Request) {
4458
4492
  url = urlOrRequest.url;
4459
4493
  }
4460
- throw new PromptbookFetchError(spaceTrim((block) => `
4494
+ throw new PromptbookFetchError(spaceTrim$2((block) => `
4461
4495
  Can not fetch "${url}"
4462
4496
 
4463
4497
  Fetch error:
@@ -4618,7 +4652,7 @@ async function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
4618
4652
  const fileExtension = getFileExtension(filename);
4619
4653
  const mimeType = extensionToMimeType(fileExtension || '');
4620
4654
  if (!(await isFileExisting(filename, tools.fs))) {
4621
- throw new NotFoundError(spaceTrim((block) => `
4655
+ throw new NotFoundError(spaceTrim$2((block) => `
4622
4656
  Can not make source handler for file which does not exist:
4623
4657
 
4624
4658
  File:
@@ -4711,7 +4745,7 @@ async function prepareKnowledgePieces(knowledgeSources, tools, options) {
4711
4745
  // <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
4712
4746
  break;
4713
4747
  }
4714
- console.warn(spaceTrim((block) => `
4748
+ console.warn(spaceTrim$2((block) => `
4715
4749
  Cannot scrape knowledge from source despite the scraper \`${scraper.metadata.className}\` supports the mime type "${sourceHandler.mimeType}".
4716
4750
 
4717
4751
  The source:
@@ -4727,7 +4761,7 @@ async function prepareKnowledgePieces(knowledgeSources, tools, options) {
4727
4761
  // <- TODO: [🏮] Some standard way how to transform errors into warnings and how to handle non-critical fails during the tasks
4728
4762
  }
4729
4763
  if (partialPieces === null) {
4730
- throw new KnowledgeScrapeError(spaceTrim((block) => `
4764
+ throw new KnowledgeScrapeError(spaceTrim$2((block) => `
4731
4765
  Cannot scrape knowledge
4732
4766
 
4733
4767
  The source:
@@ -5302,7 +5336,7 @@ const CsvFormatParser = {
5302
5336
  const { value, outputParameterName, settings, mapCallback, onProgress } = options;
5303
5337
  const csv = csvParse(value, settings);
5304
5338
  if (csv.errors.length !== 0) {
5305
- throw new CsvFormatError(spaceTrim((block) => `
5339
+ throw new CsvFormatError(spaceTrim$2((block) => `
5306
5340
  CSV parsing error
5307
5341
 
5308
5342
  Error(s) from CSV parsing:
@@ -5347,7 +5381,7 @@ const CsvFormatParser = {
5347
5381
  const { value, settings, mapCallback, onProgress } = options;
5348
5382
  const csv = csvParse(value, settings);
5349
5383
  if (csv.errors.length !== 0) {
5350
- throw new CsvFormatError(spaceTrim((block) => `
5384
+ throw new CsvFormatError(spaceTrim$2((block) => `
5351
5385
  CSV parsing error
5352
5386
 
5353
5387
  Error(s) from CSV parsing:
@@ -5557,7 +5591,7 @@ function mapAvailableToExpectedParameters(options) {
5557
5591
  }
5558
5592
  // Phase 2️⃣: Non-matching mapping
5559
5593
  if (expectedParameterNames.size !== availableParametersNames.size) {
5560
- throw new PipelineExecutionError(spaceTrim((block) => `
5594
+ throw new PipelineExecutionError(spaceTrim$2((block) => `
5561
5595
  Can not map available parameters to expected parameters
5562
5596
 
5563
5597
  Mapped parameters:
@@ -6342,7 +6376,7 @@ async function executeFormatSubvalues(options) {
6342
6376
  return /* not await */ executeAttempts({ ...options, logLlmCall });
6343
6377
  }
6344
6378
  if (jokerParameterNames.length !== 0) {
6345
- throw new UnexpectedError(spaceTrim((block) => `
6379
+ throw new UnexpectedError(spaceTrim$2((block) => `
6346
6380
  JOKER parameters are not supported together with FOREACH command
6347
6381
 
6348
6382
  [🧞‍♀️] This should be prevented in \`validatePipeline\`
@@ -6355,7 +6389,7 @@ async function executeFormatSubvalues(options) {
6355
6389
  if (formatDefinition === undefined) {
6356
6390
  throw new UnexpectedError(
6357
6391
  // <- TODO: [🧠][🧐] Should be formats fixed per promptbook version or behave as plugins (=> change UnexpectedError)
6358
- spaceTrim((block) => `
6392
+ spaceTrim$2((block) => `
6359
6393
  Unsupported format "${task.foreach.formatName}"
6360
6394
 
6361
6395
  Available formats:
@@ -6372,7 +6406,7 @@ async function executeFormatSubvalues(options) {
6372
6406
  if (subvalueParser === undefined) {
6373
6407
  throw new UnexpectedError(
6374
6408
  // <- TODO: [🧠][🧐] Should be formats fixed per promptbook version or behave as plugins (=> change UnexpectedError)
6375
- spaceTrim((block) => `
6409
+ spaceTrim$2((block) => `
6376
6410
  Unsupported subformat name "${task.foreach.subformatName}" for format "${task.foreach.formatName}"
6377
6411
 
6378
6412
  Available subformat names for format "${formatDefinition.formatName}":
@@ -6412,7 +6446,7 @@ async function executeFormatSubvalues(options) {
6412
6446
  if (!(error instanceof PipelineExecutionError)) {
6413
6447
  throw error;
6414
6448
  }
6415
- const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
6449
+ const highLevelError = new PipelineExecutionError(spaceTrim$2((block) => `
6416
6450
  ${error.message}
6417
6451
 
6418
6452
  This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
@@ -6436,7 +6470,7 @@ async function executeFormatSubvalues(options) {
6436
6470
  ...options,
6437
6471
  priority: priority + index,
6438
6472
  parameters: allSubparameters,
6439
- pipelineIdentification: spaceTrim((block) => `
6473
+ pipelineIdentification: spaceTrim$2((block) => `
6440
6474
  ${block(pipelineIdentification)}
6441
6475
  Subparameter index: ${index}
6442
6476
  `),
@@ -6445,7 +6479,7 @@ async function executeFormatSubvalues(options) {
6445
6479
  }
6446
6480
  catch (error) {
6447
6481
  if (length > BIG_DATASET_TRESHOLD) {
6448
- console.error(spaceTrim((block) => `
6482
+ console.error(spaceTrim$2((block) => `
6449
6483
  ${error.message}
6450
6484
 
6451
6485
  This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
@@ -7317,8 +7351,8 @@ class MarkdownScraper {
7317
7351
  knowledgeTextPieces.map(async (knowledgeTextPiece, i) => {
7318
7352
  // Note: These are just default values, they will be overwritten by the actual values:
7319
7353
  let name = `piece-${i}`;
7320
- let title = spaceTrim(knowledgeTextPiece.substring(0, 100));
7321
- const knowledgePieceContent = spaceTrim(knowledgeTextPiece);
7354
+ let title = spaceTrim$2(knowledgeTextPiece.substring(0, 100));
7355
+ const knowledgePieceContent = spaceTrim$2(knowledgeTextPiece);
7322
7356
  let keywords = [];
7323
7357
  const index = [];
7324
7358
  /*
@@ -7331,7 +7365,7 @@ class MarkdownScraper {
7331
7365
  isCrashedOnError: true,
7332
7366
  });
7333
7367
  const { title: titleRaw = 'Untitled' } = titleResult.outputParameters;
7334
- title = spaceTrim(titleRaw) /* <- TODO: Maybe do in pipeline */;
7368
+ title = spaceTrim$2(titleRaw) /* <- TODO: Maybe do in pipeline */;
7335
7369
  name = titleToName(title);
7336
7370
  // --- Keywords
7337
7371
  const keywordsResult = await prepareKeywordsExecutor({ knowledgePieceContent }).asPromise({