@promptbook/documents 0.92.0-21 β†’ 0.92.0-23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/esm/index.es.js +136 -77
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/core.index.d.ts +6 -0
  4. package/esm/typings/src/_packages/types.index.d.ts +2 -0
  5. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
  6. package/esm/typings/src/config.d.ts +29 -11
  7. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
  8. package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
  9. package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
  10. package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +10 -8
  11. package/esm/typings/src/formats/_common/FormatParser.d.ts +5 -3
  12. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +40 -5
  13. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
  14. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
  15. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
  16. package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +4 -1
  17. package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
  18. package/esm/typings/src/types/typeAliases.d.ts +9 -7
  19. package/esm/typings/src/utils/$Register.d.ts +8 -7
  20. package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
  21. package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
  22. package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
  23. package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
  24. package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
  25. package/package.json +2 -2
  26. package/umd/index.umd.js +136 -77
  27. package/umd/index.umd.js.map +1 -1
@@ -1,10 +1,11 @@
1
1
  import type { string_parameter_name } from '../../../types/typeAliases';
2
2
  /**
3
- * Function `validateParameterName` will @@@
3
+ * Function `validateParameterName` will normalize and validate a parameter name for use in pipelines.
4
+ * It removes diacritics, emojis, and quotes, normalizes to camelCase, and checks for reserved names and invalid characters.
4
5
  *
5
- * @param parameterName @@@
6
- * @returns @@@
7
- * @throws {ParseError} @@@
6
+ * @param parameterName The parameter name to validate and normalize.
7
+ * @returns The validated and normalized parameter name.
8
+ * @throws {ParseError} If the parameter name is empty, reserved, or contains invalid characters.
8
9
  * @private within the repository
9
10
  */
10
11
  export declare function validateParameterName(parameterName: string): string_parameter_name;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@promptbook/documents",
3
- "version": "0.92.0-21",
3
+ "version": "0.92.0-23",
4
4
  "description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
5
5
  "private": false,
6
6
  "sideEffects": false,
@@ -51,7 +51,7 @@
51
51
  "module": "./esm/index.es.js",
52
52
  "typings": "./esm/typings/src/_packages/documents.index.d.ts",
53
53
  "peerDependencies": {
54
- "@promptbook/core": "0.92.0-21"
54
+ "@promptbook/core": "0.92.0-23"
55
55
  },
56
56
  "dependencies": {
57
57
  "colors": "1.4.0",
package/umd/index.umd.js CHANGED
@@ -26,7 +26,7 @@
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-21';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-23';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
@@ -102,6 +102,12 @@
102
102
  * @public exported from `@promptbook/core`
103
103
  */
104
104
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
105
+ /**
106
+ * @@@
107
+ *
108
+ * @public exported from `@promptbook/core`
109
+ */
110
+ const BIG_DATASET_TRESHOLD = 50;
105
111
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
106
112
  /**
107
113
  * The maximum number of iterations for a loops
@@ -181,7 +187,7 @@
181
187
  const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
182
188
  // <- TODO: [πŸ§œβ€β™‚οΈ]
183
189
  /**
184
- * @@@
190
+ * Default settings for parsing and generating CSV files in Promptbook.
185
191
  *
186
192
  * @public exported from `@promptbook/core`
187
193
  */
@@ -192,19 +198,19 @@
192
198
  skipEmptyLines: true,
193
199
  });
194
200
  /**
195
- * @@@
201
+ * Controls whether verbose logging is enabled by default throughout the application.
196
202
  *
197
203
  * @public exported from `@promptbook/core`
198
204
  */
199
205
  let DEFAULT_IS_VERBOSE = false;
200
206
  /**
201
- * @@@
207
+ * Controls whether auto-installation of dependencies is enabled by default.
202
208
  *
203
209
  * @public exported from `@promptbook/core`
204
210
  */
205
211
  const DEFAULT_IS_AUTO_INSTALLED = false;
206
212
  /**
207
- * @@@
213
+ * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
208
214
  *
209
215
  * @private within the repository
210
216
  */
@@ -1122,7 +1128,7 @@
1122
1128
  * Function isValidJsonString will tell you if the string is valid JSON or not
1123
1129
  *
1124
1130
  * @param value The string to check
1125
- * @returns True if the string is a valid JSON string, false otherwise
1131
+ * @returns `true` if the string is a valid JSON string, false otherwise
1126
1132
  *
1127
1133
  * @public exported from `@promptbook/utils`
1128
1134
  */
@@ -1533,8 +1539,12 @@
1533
1539
  */
1534
1540
 
1535
1541
  /**
1536
- * @@@
1542
+ * Creates a deep clone of the given object
1543
+ *
1544
+ * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
1537
1545
  *
1546
+ * @param objectValue The object to clone.
1547
+ * @returns A deep, writable clone of the input object.
1538
1548
  * @public exported from `@promptbook/utils`
1539
1549
  */
1540
1550
  function deepClone(objectValue) {
@@ -3319,11 +3329,11 @@
3319
3329
  }
3320
3330
 
3321
3331
  /**
3322
- * Register is @@@
3332
+ * Global registry for storing and managing registered entities of a given type.
3323
3333
  *
3324
3334
  * Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
3325
3335
  *
3326
- * @private internal utility, exported are only signleton instances of this class
3336
+ * @private internal utility, exported are only singleton instances of this class
3327
3337
  */
3328
3338
  class $Register {
3329
3339
  constructor(registerName) {
@@ -3367,10 +3377,10 @@
3367
3377
  }
3368
3378
 
3369
3379
  /**
3370
- * @@@
3380
+ * Global registry for storing metadata about all available scrapers and converters.
3371
3381
  *
3372
- * Note: `$` is used to indicate that this interacts with the global scope
3373
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3382
+ * Note: `$` is used to indicate that this interacts with the global scope.
3383
+ * @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
3374
3384
  * @public exported from `@promptbook/core`
3375
3385
  */
3376
3386
  const $scrapersMetadataRegister = new $Register('scrapers_metadata');
@@ -4261,7 +4271,7 @@
4261
4271
  * Function to check if a string is valid CSV
4262
4272
  *
4263
4273
  * @param value The string to check
4264
- * @returns True if the string is a valid CSV string, false otherwise
4274
+ * @returns `true` if the string is a valid CSV string, false otherwise
4265
4275
  *
4266
4276
  * @public exported from `@promptbook/utils`
4267
4277
  */
@@ -4300,7 +4310,8 @@
4300
4310
  subvalueParsers: [
4301
4311
  {
4302
4312
  subvalueName: 'ROW',
4303
- async mapValues(value, outputParameterName, settings, mapCallback) {
4313
+ async mapValues(options) {
4314
+ const { value, outputParameterName, settings, mapCallback, onProgress } = options;
4304
4315
  const csv = csvParse(value, settings);
4305
4316
  if (csv.errors.length !== 0) {
4306
4317
  throw new CsvFormatError(spaceTrim__default["default"]((block) => `
@@ -4316,21 +4327,30 @@
4316
4327
  ${block(value)}
4317
4328
  `));
4318
4329
  }
4319
- const mappedData = await Promise.all(csv.data.map(async (row, index) => {
4330
+ const mappedData = [];
4331
+ const length = csv.data.length;
4332
+ for (let index = 0; index < length; index++) {
4333
+ const row = csv.data[index];
4320
4334
  if (row[outputParameterName]) {
4321
4335
  throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
4322
4336
  }
4323
- return {
4337
+ const mappedRow = {
4324
4338
  ...row,
4325
- [outputParameterName]: await mapCallback(row, index),
4339
+ [outputParameterName]: await mapCallback(row, index, length),
4326
4340
  };
4327
- }));
4341
+ mappedData.push(mappedRow);
4342
+ if (onProgress) {
4343
+ // Note: Report the CSV with all rows mapped so far
4344
+ await onProgress(papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4345
+ }
4346
+ }
4328
4347
  return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
4329
4348
  },
4330
4349
  },
4331
4350
  {
4332
4351
  subvalueName: 'CELL',
4333
- async mapValues(value, outputParameterName, settings, mapCallback) {
4352
+ async mapValues(options) {
4353
+ const { value, settings, mapCallback, onProgress } = options;
4334
4354
  const csv = csvParse(value, settings);
4335
4355
  if (csv.errors.length !== 0) {
4336
4356
  throw new CsvFormatError(spaceTrim__default["default"]((block) => `
@@ -4347,9 +4367,9 @@
4347
4367
  `));
4348
4368
  }
4349
4369
  const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
4350
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
4370
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
4351
4371
  const index = rowIndex * Object.keys(row).length + columnIndex;
4352
- return /* not await */ mapCallback({ [key]: value }, index);
4372
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
4353
4373
  }));
4354
4374
  }));
4355
4375
  return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
@@ -4417,14 +4437,15 @@
4417
4437
  subvalueParsers: [
4418
4438
  {
4419
4439
  subvalueName: 'LINE',
4420
- async mapValues(value, outputParameterName, settings, mapCallback) {
4440
+ async mapValues(options) {
4441
+ const { value, mapCallback, onProgress } = options;
4421
4442
  const lines = value.split('\n');
4422
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
4443
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
4423
4444
  // TODO: [🧠] Maybe option to skip empty line
4424
4445
  /* not await */ mapCallback({
4425
4446
  lineContent,
4426
4447
  // TODO: [🧠] Maybe also put here `lineNumber`
4427
- }, lineNumber)));
4448
+ }, lineNumber, array.length)));
4428
4449
  return mappedLines.join('\n');
4429
4450
  },
4430
4451
  },
@@ -4445,7 +4466,7 @@
4445
4466
  * Function to check if a string is valid XML
4446
4467
  *
4447
4468
  * @param value
4448
- * @returns True if the string is a valid XML string, false otherwise
4469
+ * @returns `true` if the string is a valid XML string, false otherwise
4449
4470
  *
4450
4471
  * @public exported from `@promptbook/utils`
4451
4472
  */
@@ -4507,13 +4528,13 @@
4507
4528
  */
4508
4529
 
4509
4530
  /**
4510
- * Maps available parameters to expected parameters
4531
+ * Maps available parameters to expected parameters for a pipeline task.
4511
4532
  *
4512
4533
  * The strategy is:
4513
- * 1) @@@
4514
- * 2) @@@
4534
+ * 1) First, match parameters by name where both available and expected.
4535
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
4515
4536
  *
4516
- * @throws {PipelineExecutionError} @@@
4537
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
4517
4538
  * @private within the repository used in `createPipelineExecutor`
4518
4539
  */
4519
4540
  function mapAvailableToExpectedParameters(options) {
@@ -5233,12 +5254,16 @@
5233
5254
  */
5234
5255
 
5235
5256
  /**
5236
- * @@@
5257
+ * Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
5258
+ * Handles format and subformat resolution, error handling, and progress reporting.
5259
+ *
5260
+ * @param options - Options for execution, including task details and progress callback.
5261
+ * @returns The result of the subvalue mapping or execution attempts.
5237
5262
  *
5238
5263
  * @private internal utility of `createPipelineExecutor`
5239
5264
  */
5240
5265
  async function executeFormatSubvalues(options) {
5241
- const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
5266
+ const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
5242
5267
  if (task.foreach === undefined) {
5243
5268
  return /* not await */ executeAttempts(options);
5244
5269
  }
@@ -5292,46 +5317,74 @@
5292
5317
  formatSettings = csvSettings;
5293
5318
  // <- TODO: [πŸ€Ήβ€β™‚οΈ] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
5294
5319
  }
5295
- const resultString = await subvalueParser.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
5296
- let mappedParameters;
5297
- // TODO: [πŸ€Ήβ€β™‚οΈ][πŸͺ‚] Limit to N concurrent executions
5298
- // TODO: When done [🐚] Report progress also for each subvalue here
5299
- try {
5300
- mappedParameters = mapAvailableToExpectedParameters({
5301
- expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5302
- availableParameters: subparameters,
5303
- });
5304
- }
5305
- catch (error) {
5306
- if (!(error instanceof PipelineExecutionError)) {
5307
- throw error;
5320
+ const resultString = await subvalueParser.mapValues({
5321
+ value: parameterValue,
5322
+ outputParameterName: task.foreach.outputSubparameterName,
5323
+ settings: formatSettings,
5324
+ onProgress(partialResultString) {
5325
+ return onProgress(Object.freeze({
5326
+ [task.resultingParameterName]: partialResultString,
5327
+ }));
5328
+ },
5329
+ async mapCallback(subparameters, index, length) {
5330
+ let mappedParameters;
5331
+ try {
5332
+ mappedParameters = mapAvailableToExpectedParameters({
5333
+ expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5334
+ availableParameters: subparameters,
5335
+ });
5308
5336
  }
5309
- throw new PipelineExecutionError(spaceTrim__default["default"]((block) => `
5310
- ${error.message}
5337
+ catch (error) {
5338
+ if (!(error instanceof PipelineExecutionError)) {
5339
+ throw error;
5340
+ }
5341
+ const highLevelError = new PipelineExecutionError(spaceTrim__default["default"]((block) => `
5342
+ ${error.message}
5311
5343
 
5312
- This is error in FOREACH command
5313
- You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5344
+ This is error in FOREACH command when mapping data
5345
+ You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5314
5346
 
5315
- ${block(pipelineIdentification)}
5316
- Subparameter index: ${index}
5317
- `));
5318
- }
5319
- const allSubparameters = {
5320
- ...parameters,
5321
- ...mappedParameters,
5322
- };
5323
- // Note: [πŸ‘¨β€πŸ‘¨β€πŸ‘§] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
5324
- Object.freeze(allSubparameters);
5325
- const subresultString = await executeAttempts({
5326
- ...options,
5327
- priority: priority + index,
5328
- parameters: allSubparameters,
5329
- pipelineIdentification: spaceTrim__default["default"]((block) => `
5330
- ${block(pipelineIdentification)}
5331
- Subparameter index: ${index}
5332
- `),
5333
- });
5334
- return subresultString;
5347
+ ${block(pipelineIdentification)}
5348
+ Subparameter index: ${index}
5349
+ `));
5350
+ if (length > BIG_DATASET_TRESHOLD) {
5351
+ console.error(highLevelError);
5352
+ return '~';
5353
+ }
5354
+ throw highLevelError;
5355
+ }
5356
+ const allSubparameters = {
5357
+ ...parameters,
5358
+ ...mappedParameters,
5359
+ };
5360
+ Object.freeze(allSubparameters);
5361
+ try {
5362
+ const subresultString = await executeAttempts({
5363
+ ...options,
5364
+ priority: priority + index,
5365
+ parameters: allSubparameters,
5366
+ pipelineIdentification: spaceTrim__default["default"]((block) => `
5367
+ ${block(pipelineIdentification)}
5368
+ Subparameter index: ${index}
5369
+ `),
5370
+ });
5371
+ return subresultString;
5372
+ }
5373
+ catch (error) {
5374
+ if (length > BIG_DATASET_TRESHOLD) {
5375
+ console.error(spaceTrim__default["default"]((block) => `
5376
+ Error in FOREACH command:
5377
+
5378
+ ${block(pipelineIdentification)}
5379
+
5380
+ ${block(pipelineIdentification)}
5381
+ Subparameter index: ${index}
5382
+ `));
5383
+ return '~';
5384
+ }
5385
+ throw error;
5386
+ }
5387
+ },
5335
5388
  });
5336
5389
  return resultString;
5337
5390
  }
@@ -5465,7 +5518,11 @@
5465
5518
  */
5466
5519
 
5467
5520
  /**
5468
- * @@@
5521
+ * Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
5522
+ * Ensures all reserved parameters are defined and throws if any are missing.
5523
+ *
5524
+ * @param options - Options including tools, pipeline, task, and context.
5525
+ * @returns An object containing all reserved parameters for the task.
5469
5526
  *
5470
5527
  * @private internal utility of `createPipelineExecutor`
5471
5528
  */
@@ -5498,18 +5555,16 @@
5498
5555
  }
5499
5556
 
5500
5557
  /**
5501
- * @@@
5558
+ * Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
5559
+ *
5560
+ * @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
5561
+ * @returns The output parameters produced by the task.
5502
5562
  *
5503
5563
  * @private internal utility of `createPipelineExecutor`
5504
5564
  */
5505
5565
  async function executeTask(options) {
5506
5566
  const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
5507
5567
  const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
5508
- await onProgress({
5509
- outputParameters: {
5510
- [currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
5511
- },
5512
- });
5513
5568
  // Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
5514
5569
  const usedParameterNames = extractParameterNamesFromTask(currentTask);
5515
5570
  const dependentParameterNames = new Set(currentTask.dependentParameterNames);
@@ -5584,6 +5639,7 @@
5584
5639
  preparedPipeline,
5585
5640
  tools,
5586
5641
  $executionReport,
5642
+ onProgress,
5587
5643
  pipelineIdentification,
5588
5644
  maxExecutionAttempts,
5589
5645
  maxParallelCount,
@@ -5636,9 +5692,12 @@
5636
5692
  }
5637
5693
 
5638
5694
  /**
5639
- * @@@
5695
+ * Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
5696
+ *
5697
+ * Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
5640
5698
  *
5641
- * Note: This is not a `PipelineExecutor` (which is binded with one exact pipeline), but a utility function of `createPipelineExecutor` which creates `PipelineExecutor`
5699
+ * @param options - Options for execution, including input parameters, pipeline, and callbacks.
5700
+ * @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
5642
5701
  *
5643
5702
  * @private internal utility of `createPipelineExecutor`
5644
5703
  */