@promptbook/markdown-utils 0.92.0-21 β†’ 0.92.0-23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/esm/index.es.js +136 -77
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/core.index.d.ts +6 -0
  4. package/esm/typings/src/_packages/types.index.d.ts +2 -0
  5. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
  6. package/esm/typings/src/config.d.ts +29 -11
  7. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
  8. package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
  9. package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
  10. package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +10 -8
  11. package/esm/typings/src/formats/_common/FormatParser.d.ts +5 -3
  12. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +40 -5
  13. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
  14. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
  15. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
  16. package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +4 -1
  17. package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
  18. package/esm/typings/src/types/typeAliases.d.ts +9 -7
  19. package/esm/typings/src/utils/$Register.d.ts +8 -7
  20. package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
  21. package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
  22. package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
  23. package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
  24. package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
  25. package/package.json +1 -1
  26. package/umd/index.umd.js +136 -77
  27. package/umd/index.umd.js.map +1 -1
package/esm/index.es.js CHANGED
@@ -25,7 +25,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
25
25
  * @generated
26
26
  * @see https://github.com/webgptorg/promptbook
27
27
  */
28
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-21';
28
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-23';
29
29
  /**
30
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
31
  * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
@@ -231,6 +231,12 @@ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`;
231
231
  * @public exported from `@promptbook/core`
232
232
  */
233
233
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
234
+ /**
235
+ * @@@
236
+ *
237
+ * @public exported from `@promptbook/core`
238
+ */
239
+ const BIG_DATASET_TRESHOLD = 50;
234
240
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
235
241
  /**
236
242
  * The maximum number of iterations for a loops
@@ -310,7 +316,7 @@ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
310
316
  const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
311
317
  // <- TODO: [πŸ§œβ€β™‚οΈ]
312
318
  /**
313
- * @@@
319
+ * Default settings for parsing and generating CSV files in Promptbook.
314
320
  *
315
321
  * @public exported from `@promptbook/core`
316
322
  */
@@ -321,19 +327,19 @@ const DEFAULT_CSV_SETTINGS = Object.freeze({
321
327
  skipEmptyLines: true,
322
328
  });
323
329
  /**
324
- * @@@
330
+ * Controls whether verbose logging is enabled by default throughout the application.
325
331
  *
326
332
  * @public exported from `@promptbook/core`
327
333
  */
328
334
  let DEFAULT_IS_VERBOSE = false;
329
335
  /**
330
- * @@@
336
+ * Controls whether auto-installation of dependencies is enabled by default.
331
337
  *
332
338
  * @public exported from `@promptbook/core`
333
339
  */
334
340
  const DEFAULT_IS_AUTO_INSTALLED = false;
335
341
  /**
336
- * @@@
342
+ * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
337
343
  *
338
344
  * @private within the repository
339
345
  */
@@ -466,7 +472,7 @@ function assertsError(whatWasThrown) {
466
472
  * Function isValidJsonString will tell you if the string is valid JSON or not
467
473
  *
468
474
  * @param value The string to check
469
- * @returns True if the string is a valid JSON string, false otherwise
475
+ * @returns `true` if the string is a valid JSON string, false otherwise
470
476
  *
471
477
  * @public exported from `@promptbook/utils`
472
478
  */
@@ -1016,8 +1022,12 @@ function checkSerializableAsJson(options) {
1016
1022
  */
1017
1023
 
1018
1024
  /**
1019
- * @@@
1025
+ * Creates a deep clone of the given object
1026
+ *
1027
+ * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
1020
1028
  *
1029
+ * @param objectValue The object to clone.
1030
+ * @returns A deep, writable clone of the input object.
1021
1031
  * @public exported from `@promptbook/utils`
1022
1032
  */
1023
1033
  function deepClone(objectValue) {
@@ -2846,11 +2856,11 @@ function normalizeTo_snake_case(text) {
2846
2856
  }
2847
2857
 
2848
2858
  /**
2849
- * Register is @@@
2859
+ * Global registry for storing and managing registered entities of a given type.
2850
2860
  *
2851
2861
  * Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
2852
2862
  *
2853
- * @private internal utility, exported are only signleton instances of this class
2863
+ * @private internal utility, exported are only singleton instances of this class
2854
2864
  */
2855
2865
  class $Register {
2856
2866
  constructor(registerName) {
@@ -2894,10 +2904,10 @@ class $Register {
2894
2904
  }
2895
2905
 
2896
2906
  /**
2897
- * @@@
2907
+ * Global registry for storing metadata about all available scrapers and converters.
2898
2908
  *
2899
- * Note: `$` is used to indicate that this interacts with the global scope
2900
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
2909
+ * Note: `$` is used to indicate that this interacts with the global scope.
2910
+ * @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
2901
2911
  * @public exported from `@promptbook/core`
2902
2912
  */
2903
2913
  const $scrapersMetadataRegister = new $Register('scrapers_metadata');
@@ -4184,7 +4194,7 @@ function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO:
4184
4194
  * Function to check if a string is valid CSV
4185
4195
  *
4186
4196
  * @param value The string to check
4187
- * @returns True if the string is a valid CSV string, false otherwise
4197
+ * @returns `true` if the string is a valid CSV string, false otherwise
4188
4198
  *
4189
4199
  * @public exported from `@promptbook/utils`
4190
4200
  */
@@ -4223,7 +4233,8 @@ const CsvFormatParser = {
4223
4233
  subvalueParsers: [
4224
4234
  {
4225
4235
  subvalueName: 'ROW',
4226
- async mapValues(value, outputParameterName, settings, mapCallback) {
4236
+ async mapValues(options) {
4237
+ const { value, outputParameterName, settings, mapCallback, onProgress } = options;
4227
4238
  const csv = csvParse(value, settings);
4228
4239
  if (csv.errors.length !== 0) {
4229
4240
  throw new CsvFormatError(spaceTrim((block) => `
@@ -4239,21 +4250,30 @@ const CsvFormatParser = {
4239
4250
  ${block(value)}
4240
4251
  `));
4241
4252
  }
4242
- const mappedData = await Promise.all(csv.data.map(async (row, index) => {
4253
+ const mappedData = [];
4254
+ const length = csv.data.length;
4255
+ for (let index = 0; index < length; index++) {
4256
+ const row = csv.data[index];
4243
4257
  if (row[outputParameterName]) {
4244
4258
  throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
4245
4259
  }
4246
- return {
4260
+ const mappedRow = {
4247
4261
  ...row,
4248
- [outputParameterName]: await mapCallback(row, index),
4262
+ [outputParameterName]: await mapCallback(row, index, length),
4249
4263
  };
4250
- }));
4264
+ mappedData.push(mappedRow);
4265
+ if (onProgress) {
4266
+ // Note: Report the CSV with all rows mapped so far
4267
+ await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4268
+ }
4269
+ }
4251
4270
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
4252
4271
  },
4253
4272
  },
4254
4273
  {
4255
4274
  subvalueName: 'CELL',
4256
- async mapValues(value, outputParameterName, settings, mapCallback) {
4275
+ async mapValues(options) {
4276
+ const { value, settings, mapCallback, onProgress } = options;
4257
4277
  const csv = csvParse(value, settings);
4258
4278
  if (csv.errors.length !== 0) {
4259
4279
  throw new CsvFormatError(spaceTrim((block) => `
@@ -4270,9 +4290,9 @@ const CsvFormatParser = {
4270
4290
  `));
4271
4291
  }
4272
4292
  const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
4273
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
4293
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
4274
4294
  const index = rowIndex * Object.keys(row).length + columnIndex;
4275
- return /* not await */ mapCallback({ [key]: value }, index);
4295
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
4276
4296
  }));
4277
4297
  }));
4278
4298
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
@@ -4340,14 +4360,15 @@ const TextFormatParser = {
4340
4360
  subvalueParsers: [
4341
4361
  {
4342
4362
  subvalueName: 'LINE',
4343
- async mapValues(value, outputParameterName, settings, mapCallback) {
4363
+ async mapValues(options) {
4364
+ const { value, mapCallback, onProgress } = options;
4344
4365
  const lines = value.split('\n');
4345
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
4366
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
4346
4367
  // TODO: [🧠] Maybe option to skip empty line
4347
4368
  /* not await */ mapCallback({
4348
4369
  lineContent,
4349
4370
  // TODO: [🧠] Maybe also put here `lineNumber`
4350
- }, lineNumber)));
4371
+ }, lineNumber, array.length)));
4351
4372
  return mappedLines.join('\n');
4352
4373
  },
4353
4374
  },
@@ -4368,7 +4389,7 @@ const TextFormatParser = {
4368
4389
  * Function to check if a string is valid XML
4369
4390
  *
4370
4391
  * @param value
4371
- * @returns True if the string is a valid XML string, false otherwise
4392
+ * @returns `true` if the string is a valid XML string, false otherwise
4372
4393
  *
4373
4394
  * @public exported from `@promptbook/utils`
4374
4395
  */
@@ -4430,13 +4451,13 @@ const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser,
4430
4451
  */
4431
4452
 
4432
4453
  /**
4433
- * Maps available parameters to expected parameters
4454
+ * Maps available parameters to expected parameters for a pipeline task.
4434
4455
  *
4435
4456
  * The strategy is:
4436
- * 1) @@@
4437
- * 2) @@@
4457
+ * 1) First, match parameters by name where both available and expected.
4458
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
4438
4459
  *
4439
- * @throws {PipelineExecutionError} @@@
4460
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
4440
4461
  * @private within the repository used in `createPipelineExecutor`
4441
4462
  */
4442
4463
  function mapAvailableToExpectedParameters(options) {
@@ -5054,12 +5075,16 @@ async function executeAttempts(options) {
5054
5075
  */
5055
5076
 
5056
5077
  /**
5057
- * @@@
5078
+ * Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
5079
+ * Handles format and subformat resolution, error handling, and progress reporting.
5080
+ *
5081
+ * @param options - Options for execution, including task details and progress callback.
5082
+ * @returns The result of the subvalue mapping or execution attempts.
5058
5083
  *
5059
5084
  * @private internal utility of `createPipelineExecutor`
5060
5085
  */
5061
5086
  async function executeFormatSubvalues(options) {
5062
- const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
5087
+ const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
5063
5088
  if (task.foreach === undefined) {
5064
5089
  return /* not await */ executeAttempts(options);
5065
5090
  }
@@ -5113,46 +5138,74 @@ async function executeFormatSubvalues(options) {
5113
5138
  formatSettings = csvSettings;
5114
5139
  // <- TODO: [πŸ€Ήβ€β™‚οΈ] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
5115
5140
  }
5116
- const resultString = await subvalueParser.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
5117
- let mappedParameters;
5118
- // TODO: [πŸ€Ήβ€β™‚οΈ][πŸͺ‚] Limit to N concurrent executions
5119
- // TODO: When done [🐚] Report progress also for each subvalue here
5120
- try {
5121
- mappedParameters = mapAvailableToExpectedParameters({
5122
- expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5123
- availableParameters: subparameters,
5124
- });
5125
- }
5126
- catch (error) {
5127
- if (!(error instanceof PipelineExecutionError)) {
5128
- throw error;
5141
+ const resultString = await subvalueParser.mapValues({
5142
+ value: parameterValue,
5143
+ outputParameterName: task.foreach.outputSubparameterName,
5144
+ settings: formatSettings,
5145
+ onProgress(partialResultString) {
5146
+ return onProgress(Object.freeze({
5147
+ [task.resultingParameterName]: partialResultString,
5148
+ }));
5149
+ },
5150
+ async mapCallback(subparameters, index, length) {
5151
+ let mappedParameters;
5152
+ try {
5153
+ mappedParameters = mapAvailableToExpectedParameters({
5154
+ expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5155
+ availableParameters: subparameters,
5156
+ });
5129
5157
  }
5130
- throw new PipelineExecutionError(spaceTrim((block) => `
5131
- ${error.message}
5158
+ catch (error) {
5159
+ if (!(error instanceof PipelineExecutionError)) {
5160
+ throw error;
5161
+ }
5162
+ const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
5163
+ ${error.message}
5132
5164
 
5133
- This is error in FOREACH command
5134
- You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5165
+ This is error in FOREACH command when mapping data
5166
+ You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5135
5167
 
5136
- ${block(pipelineIdentification)}
5137
- Subparameter index: ${index}
5138
- `));
5139
- }
5140
- const allSubparameters = {
5141
- ...parameters,
5142
- ...mappedParameters,
5143
- };
5144
- // Note: [πŸ‘¨β€πŸ‘¨β€πŸ‘§] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
5145
- Object.freeze(allSubparameters);
5146
- const subresultString = await executeAttempts({
5147
- ...options,
5148
- priority: priority + index,
5149
- parameters: allSubparameters,
5150
- pipelineIdentification: spaceTrim((block) => `
5151
- ${block(pipelineIdentification)}
5152
- Subparameter index: ${index}
5153
- `),
5154
- });
5155
- return subresultString;
5168
+ ${block(pipelineIdentification)}
5169
+ Subparameter index: ${index}
5170
+ `));
5171
+ if (length > BIG_DATASET_TRESHOLD) {
5172
+ console.error(highLevelError);
5173
+ return '~';
5174
+ }
5175
+ throw highLevelError;
5176
+ }
5177
+ const allSubparameters = {
5178
+ ...parameters,
5179
+ ...mappedParameters,
5180
+ };
5181
+ Object.freeze(allSubparameters);
5182
+ try {
5183
+ const subresultString = await executeAttempts({
5184
+ ...options,
5185
+ priority: priority + index,
5186
+ parameters: allSubparameters,
5187
+ pipelineIdentification: spaceTrim((block) => `
5188
+ ${block(pipelineIdentification)}
5189
+ Subparameter index: ${index}
5190
+ `),
5191
+ });
5192
+ return subresultString;
5193
+ }
5194
+ catch (error) {
5195
+ if (length > BIG_DATASET_TRESHOLD) {
5196
+ console.error(spaceTrim((block) => `
5197
+ Error in FOREACH command:
5198
+
5199
+ ${block(pipelineIdentification)}
5200
+
5201
+ ${block(pipelineIdentification)}
5202
+ Subparameter index: ${index}
5203
+ `));
5204
+ return '~';
5205
+ }
5206
+ throw error;
5207
+ }
5208
+ },
5156
5209
  });
5157
5210
  return resultString;
5158
5211
  }
@@ -5286,7 +5339,11 @@ async function getKnowledgeForTask(options) {
5286
5339
  */
5287
5340
 
5288
5341
  /**
5289
- * @@@
5342
+ * Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
5343
+ * Ensures all reserved parameters are defined and throws if any are missing.
5344
+ *
5345
+ * @param options - Options including tools, pipeline, task, and context.
5346
+ * @returns An object containing all reserved parameters for the task.
5290
5347
  *
5291
5348
  * @private internal utility of `createPipelineExecutor`
5292
5349
  */
@@ -5319,18 +5376,16 @@ async function getReservedParametersForTask(options) {
5319
5376
  }
5320
5377
 
5321
5378
  /**
5322
- * @@@
5379
+ * Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
5380
+ *
5381
+ * @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
5382
+ * @returns The output parameters produced by the task.
5323
5383
  *
5324
5384
  * @private internal utility of `createPipelineExecutor`
5325
5385
  */
5326
5386
  async function executeTask(options) {
5327
5387
  const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
5328
5388
  const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
5329
- await onProgress({
5330
- outputParameters: {
5331
- [currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
5332
- },
5333
- });
5334
5389
  // Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
5335
5390
  const usedParameterNames = extractParameterNamesFromTask(currentTask);
5336
5391
  const dependentParameterNames = new Set(currentTask.dependentParameterNames);
@@ -5405,6 +5460,7 @@ async function executeTask(options) {
5405
5460
  preparedPipeline,
5406
5461
  tools,
5407
5462
  $executionReport,
5463
+ onProgress,
5408
5464
  pipelineIdentification,
5409
5465
  maxExecutionAttempts,
5410
5466
  maxParallelCount,
@@ -5457,9 +5513,12 @@ function filterJustOutputParameters(options) {
5457
5513
  }
5458
5514
 
5459
5515
  /**
5460
- * @@@
5516
+ * Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
5517
+ *
5518
+ * Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
5461
5519
  *
5462
- * Note: This is not a `PipelineExecutor` (which is binded with one exact pipeline), but a utility function of `createPipelineExecutor` which creates `PipelineExecutor`
5520
+ * @param options - Options for execution, including input parameters, pipeline, and callbacks.
5521
+ * @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
5463
5522
  *
5464
5523
  * @private internal utility of `createPipelineExecutor`
5465
5524
  */