@promptbook/legacy-documents 0.92.0-21 β†’ 0.92.0-23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/esm/index.es.js +136 -77
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/core.index.d.ts +6 -0
  4. package/esm/typings/src/_packages/types.index.d.ts +2 -0
  5. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
  6. package/esm/typings/src/config.d.ts +29 -11
  7. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
  8. package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
  9. package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
  10. package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +10 -8
  11. package/esm/typings/src/formats/_common/FormatParser.d.ts +5 -3
  12. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +40 -5
  13. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
  14. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
  15. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
  16. package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +4 -1
  17. package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
  18. package/esm/typings/src/types/typeAliases.d.ts +9 -7
  19. package/esm/typings/src/utils/$Register.d.ts +8 -7
  20. package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
  21. package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
  22. package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
  23. package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
  24. package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
  25. package/package.json +2 -2
  26. package/umd/index.umd.js +136 -77
  27. package/umd/index.umd.js.map +1 -1
package/esm/index.es.js CHANGED
@@ -28,7 +28,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
28
28
  * @generated
29
29
  * @see https://github.com/webgptorg/promptbook
30
30
  */
31
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-21';
31
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-23';
32
32
  /**
33
33
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
34
34
  * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
@@ -104,6 +104,12 @@ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`;
104
104
  * @public exported from `@promptbook/core`
105
105
  */
106
106
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
107
+ /**
108
+ * @@@
109
+ *
110
+ * @public exported from `@promptbook/core`
111
+ */
112
+ const BIG_DATASET_TRESHOLD = 50;
107
113
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
108
114
  /**
109
115
  * The maximum number of iterations for a loops
@@ -183,7 +189,7 @@ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
183
189
  const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
184
190
  // <- TODO: [πŸ§œβ€β™‚οΈ]
185
191
  /**
186
- * @@@
192
+ * Default settings for parsing and generating CSV files in Promptbook.
187
193
  *
188
194
  * @public exported from `@promptbook/core`
189
195
  */
@@ -194,19 +200,19 @@ const DEFAULT_CSV_SETTINGS = Object.freeze({
194
200
  skipEmptyLines: true,
195
201
  });
196
202
  /**
197
- * @@@
203
+ * Controls whether verbose logging is enabled by default throughout the application.
198
204
  *
199
205
  * @public exported from `@promptbook/core`
200
206
  */
201
207
  let DEFAULT_IS_VERBOSE = false;
202
208
  /**
203
- * @@@
209
+ * Controls whether auto-installation of dependencies is enabled by default.
204
210
  *
205
211
  * @public exported from `@promptbook/core`
206
212
  */
207
213
  const DEFAULT_IS_AUTO_INSTALLED = false;
208
214
  /**
209
- * @@@
215
+ * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
210
216
  *
211
217
  * @private within the repository
212
218
  */
@@ -1124,7 +1130,7 @@ function assertsError(whatWasThrown) {
1124
1130
  * Function isValidJsonString will tell you if the string is valid JSON or not
1125
1131
  *
1126
1132
  * @param value The string to check
1127
- * @returns True if the string is a valid JSON string, false otherwise
1133
+ * @returns `true` if the string is a valid JSON string, false otherwise
1128
1134
  *
1129
1135
  * @public exported from `@promptbook/utils`
1130
1136
  */
@@ -1535,8 +1541,12 @@ function checkSerializableAsJson(options) {
1535
1541
  */
1536
1542
 
1537
1543
  /**
1538
- * @@@
1544
+ * Creates a deep clone of the given object
1545
+ *
1546
+ * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
1539
1547
  *
1548
+ * @param objectValue The object to clone.
1549
+ * @returns A deep, writable clone of the input object.
1540
1550
  * @public exported from `@promptbook/utils`
1541
1551
  */
1542
1552
  function deepClone(objectValue) {
@@ -3321,11 +3331,11 @@ function normalizeTo_snake_case(text) {
3321
3331
  }
3322
3332
 
3323
3333
  /**
3324
- * Register is @@@
3334
+ * Global registry for storing and managing registered entities of a given type.
3325
3335
  *
3326
3336
  * Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
3327
3337
  *
3328
- * @private internal utility, exported are only signleton instances of this class
3338
+ * @private internal utility, exported are only singleton instances of this class
3329
3339
  */
3330
3340
  class $Register {
3331
3341
  constructor(registerName) {
@@ -3369,10 +3379,10 @@ class $Register {
3369
3379
  }
3370
3380
 
3371
3381
  /**
3372
- * @@@
3382
+ * Global registry for storing metadata about all available scrapers and converters.
3373
3383
  *
3374
- * Note: `$` is used to indicate that this interacts with the global scope
3375
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3384
+ * Note: `$` is used to indicate that this interacts with the global scope.
3385
+ * @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
3376
3386
  * @public exported from `@promptbook/core`
3377
3387
  */
3378
3388
  const $scrapersMetadataRegister = new $Register('scrapers_metadata');
@@ -4263,7 +4273,7 @@ function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO:
4263
4273
  * Function to check if a string is valid CSV
4264
4274
  *
4265
4275
  * @param value The string to check
4266
- * @returns True if the string is a valid CSV string, false otherwise
4276
+ * @returns `true` if the string is a valid CSV string, false otherwise
4267
4277
  *
4268
4278
  * @public exported from `@promptbook/utils`
4269
4279
  */
@@ -4302,7 +4312,8 @@ const CsvFormatParser = {
4302
4312
  subvalueParsers: [
4303
4313
  {
4304
4314
  subvalueName: 'ROW',
4305
- async mapValues(value, outputParameterName, settings, mapCallback) {
4315
+ async mapValues(options) {
4316
+ const { value, outputParameterName, settings, mapCallback, onProgress } = options;
4306
4317
  const csv = csvParse(value, settings);
4307
4318
  if (csv.errors.length !== 0) {
4308
4319
  throw new CsvFormatError(spaceTrim$1((block) => `
@@ -4318,21 +4329,30 @@ const CsvFormatParser = {
4318
4329
  ${block(value)}
4319
4330
  `));
4320
4331
  }
4321
- const mappedData = await Promise.all(csv.data.map(async (row, index) => {
4332
+ const mappedData = [];
4333
+ const length = csv.data.length;
4334
+ for (let index = 0; index < length; index++) {
4335
+ const row = csv.data[index];
4322
4336
  if (row[outputParameterName]) {
4323
4337
  throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
4324
4338
  }
4325
- return {
4339
+ const mappedRow = {
4326
4340
  ...row,
4327
- [outputParameterName]: await mapCallback(row, index),
4341
+ [outputParameterName]: await mapCallback(row, index, length),
4328
4342
  };
4329
- }));
4343
+ mappedData.push(mappedRow);
4344
+ if (onProgress) {
4345
+ // Note: Report the CSV with all rows mapped so far
4346
+ await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4347
+ }
4348
+ }
4330
4349
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
4331
4350
  },
4332
4351
  },
4333
4352
  {
4334
4353
  subvalueName: 'CELL',
4335
- async mapValues(value, outputParameterName, settings, mapCallback) {
4354
+ async mapValues(options) {
4355
+ const { value, settings, mapCallback, onProgress } = options;
4336
4356
  const csv = csvParse(value, settings);
4337
4357
  if (csv.errors.length !== 0) {
4338
4358
  throw new CsvFormatError(spaceTrim$1((block) => `
@@ -4349,9 +4369,9 @@ const CsvFormatParser = {
4349
4369
  `));
4350
4370
  }
4351
4371
  const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
4352
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
4372
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
4353
4373
  const index = rowIndex * Object.keys(row).length + columnIndex;
4354
- return /* not await */ mapCallback({ [key]: value }, index);
4374
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
4355
4375
  }));
4356
4376
  }));
4357
4377
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
@@ -4419,14 +4439,15 @@ const TextFormatParser = {
4419
4439
  subvalueParsers: [
4420
4440
  {
4421
4441
  subvalueName: 'LINE',
4422
- async mapValues(value, outputParameterName, settings, mapCallback) {
4442
+ async mapValues(options) {
4443
+ const { value, mapCallback, onProgress } = options;
4423
4444
  const lines = value.split('\n');
4424
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
4445
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
4425
4446
  // TODO: [🧠] Maybe option to skip empty line
4426
4447
  /* not await */ mapCallback({
4427
4448
  lineContent,
4428
4449
  // TODO: [🧠] Maybe also put here `lineNumber`
4429
- }, lineNumber)));
4450
+ }, lineNumber, array.length)));
4430
4451
  return mappedLines.join('\n');
4431
4452
  },
4432
4453
  },
@@ -4447,7 +4468,7 @@ const TextFormatParser = {
4447
4468
  * Function to check if a string is valid XML
4448
4469
  *
4449
4470
  * @param value
4450
- * @returns True if the string is a valid XML string, false otherwise
4471
+ * @returns `true` if the string is a valid XML string, false otherwise
4451
4472
  *
4452
4473
  * @public exported from `@promptbook/utils`
4453
4474
  */
@@ -4509,13 +4530,13 @@ const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser,
4509
4530
  */
4510
4531
 
4511
4532
  /**
4512
- * Maps available parameters to expected parameters
4533
+ * Maps available parameters to expected parameters for a pipeline task.
4513
4534
  *
4514
4535
  * The strategy is:
4515
- * 1) @@@
4516
- * 2) @@@
4536
+ * 1) First, match parameters by name where both available and expected.
4537
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
4517
4538
  *
4518
- * @throws {PipelineExecutionError} @@@
4539
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
4519
4540
  * @private within the repository used in `createPipelineExecutor`
4520
4541
  */
4521
4542
  function mapAvailableToExpectedParameters(options) {
@@ -5235,12 +5256,16 @@ async function executeAttempts(options) {
5235
5256
  */
5236
5257
 
5237
5258
  /**
5238
- * @@@
5259
+ * Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
5260
+ * Handles format and subformat resolution, error handling, and progress reporting.
5261
+ *
5262
+ * @param options - Options for execution, including task details and progress callback.
5263
+ * @returns The result of the subvalue mapping or execution attempts.
5239
5264
  *
5240
5265
  * @private internal utility of `createPipelineExecutor`
5241
5266
  */
5242
5267
  async function executeFormatSubvalues(options) {
5243
- const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
5268
+ const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
5244
5269
  if (task.foreach === undefined) {
5245
5270
  return /* not await */ executeAttempts(options);
5246
5271
  }
@@ -5294,46 +5319,74 @@ async function executeFormatSubvalues(options) {
5294
5319
  formatSettings = csvSettings;
5295
5320
  // <- TODO: [πŸ€Ήβ€β™‚οΈ] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
5296
5321
  }
5297
- const resultString = await subvalueParser.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
5298
- let mappedParameters;
5299
- // TODO: [πŸ€Ήβ€β™‚οΈ][πŸͺ‚] Limit to N concurrent executions
5300
- // TODO: When done [🐚] Report progress also for each subvalue here
5301
- try {
5302
- mappedParameters = mapAvailableToExpectedParameters({
5303
- expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5304
- availableParameters: subparameters,
5305
- });
5306
- }
5307
- catch (error) {
5308
- if (!(error instanceof PipelineExecutionError)) {
5309
- throw error;
5322
+ const resultString = await subvalueParser.mapValues({
5323
+ value: parameterValue,
5324
+ outputParameterName: task.foreach.outputSubparameterName,
5325
+ settings: formatSettings,
5326
+ onProgress(partialResultString) {
5327
+ return onProgress(Object.freeze({
5328
+ [task.resultingParameterName]: partialResultString,
5329
+ }));
5330
+ },
5331
+ async mapCallback(subparameters, index, length) {
5332
+ let mappedParameters;
5333
+ try {
5334
+ mappedParameters = mapAvailableToExpectedParameters({
5335
+ expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5336
+ availableParameters: subparameters,
5337
+ });
5310
5338
  }
5311
- throw new PipelineExecutionError(spaceTrim$1((block) => `
5312
- ${error.message}
5339
+ catch (error) {
5340
+ if (!(error instanceof PipelineExecutionError)) {
5341
+ throw error;
5342
+ }
5343
+ const highLevelError = new PipelineExecutionError(spaceTrim$1((block) => `
5344
+ ${error.message}
5313
5345
 
5314
- This is error in FOREACH command
5315
- You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5346
+ This is error in FOREACH command when mapping data
5347
+ You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5316
5348
 
5317
- ${block(pipelineIdentification)}
5318
- Subparameter index: ${index}
5319
- `));
5320
- }
5321
- const allSubparameters = {
5322
- ...parameters,
5323
- ...mappedParameters,
5324
- };
5325
- // Note: [πŸ‘¨β€πŸ‘¨β€πŸ‘§] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
5326
- Object.freeze(allSubparameters);
5327
- const subresultString = await executeAttempts({
5328
- ...options,
5329
- priority: priority + index,
5330
- parameters: allSubparameters,
5331
- pipelineIdentification: spaceTrim$1((block) => `
5332
- ${block(pipelineIdentification)}
5333
- Subparameter index: ${index}
5334
- `),
5335
- });
5336
- return subresultString;
5349
+ ${block(pipelineIdentification)}
5350
+ Subparameter index: ${index}
5351
+ `));
5352
+ if (length > BIG_DATASET_TRESHOLD) {
5353
+ console.error(highLevelError);
5354
+ return '~';
5355
+ }
5356
+ throw highLevelError;
5357
+ }
5358
+ const allSubparameters = {
5359
+ ...parameters,
5360
+ ...mappedParameters,
5361
+ };
5362
+ Object.freeze(allSubparameters);
5363
+ try {
5364
+ const subresultString = await executeAttempts({
5365
+ ...options,
5366
+ priority: priority + index,
5367
+ parameters: allSubparameters,
5368
+ pipelineIdentification: spaceTrim$1((block) => `
5369
+ ${block(pipelineIdentification)}
5370
+ Subparameter index: ${index}
5371
+ `),
5372
+ });
5373
+ return subresultString;
5374
+ }
5375
+ catch (error) {
5376
+ if (length > BIG_DATASET_TRESHOLD) {
5377
+ console.error(spaceTrim$1((block) => `
5378
+ Error in FOREACH command:
5379
+
5380
+ ${block(pipelineIdentification)}
5381
+
5382
+ ${block(pipelineIdentification)}
5383
+ Subparameter index: ${index}
5384
+ `));
5385
+ return '~';
5386
+ }
5387
+ throw error;
5388
+ }
5389
+ },
5337
5390
  });
5338
5391
  return resultString;
5339
5392
  }
@@ -5467,7 +5520,11 @@ async function getKnowledgeForTask(options) {
5467
5520
  */
5468
5521
 
5469
5522
  /**
5470
- * @@@
5523
+ * Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
5524
+ * Ensures all reserved parameters are defined and throws if any are missing.
5525
+ *
5526
+ * @param options - Options including tools, pipeline, task, and context.
5527
+ * @returns An object containing all reserved parameters for the task.
5471
5528
  *
5472
5529
  * @private internal utility of `createPipelineExecutor`
5473
5530
  */
@@ -5500,18 +5557,16 @@ async function getReservedParametersForTask(options) {
5500
5557
  }
5501
5558
 
5502
5559
  /**
5503
- * @@@
5560
+ * Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
5561
+ *
5562
+ * @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
5563
+ * @returns The output parameters produced by the task.
5504
5564
  *
5505
5565
  * @private internal utility of `createPipelineExecutor`
5506
5566
  */
5507
5567
  async function executeTask(options) {
5508
5568
  const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
5509
5569
  const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
5510
- await onProgress({
5511
- outputParameters: {
5512
- [currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
5513
- },
5514
- });
5515
5570
  // Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
5516
5571
  const usedParameterNames = extractParameterNamesFromTask(currentTask);
5517
5572
  const dependentParameterNames = new Set(currentTask.dependentParameterNames);
@@ -5586,6 +5641,7 @@ async function executeTask(options) {
5586
5641
  preparedPipeline,
5587
5642
  tools,
5588
5643
  $executionReport,
5644
+ onProgress,
5589
5645
  pipelineIdentification,
5590
5646
  maxExecutionAttempts,
5591
5647
  maxParallelCount,
@@ -5638,9 +5694,12 @@ function filterJustOutputParameters(options) {
5638
5694
  }
5639
5695
 
5640
5696
  /**
5641
- * @@@
5697
+ * Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
5698
+ *
5699
+ * Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
5642
5700
  *
5643
- * Note: This is not a `PipelineExecutor` (which is binded with one exact pipeline), but a utility function of `createPipelineExecutor` which creates `PipelineExecutor`
5701
+ * @param options - Options for execution, including input parameters, pipeline, and callbacks.
5702
+ * @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
5644
5703
  *
5645
5704
  * @private internal utility of `createPipelineExecutor`
5646
5705
  */