@promptbook/pdf 0.92.0-21 β†’ 0.92.0-23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/esm/index.es.js +136 -77
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/core.index.d.ts +6 -0
  4. package/esm/typings/src/_packages/types.index.d.ts +2 -0
  5. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
  6. package/esm/typings/src/config.d.ts +29 -11
  7. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
  8. package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
  9. package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
  10. package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +10 -8
  11. package/esm/typings/src/formats/_common/FormatParser.d.ts +5 -3
  12. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +40 -5
  13. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
  14. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
  15. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
  16. package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +4 -1
  17. package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
  18. package/esm/typings/src/types/typeAliases.d.ts +9 -7
  19. package/esm/typings/src/utils/$Register.d.ts +8 -7
  20. package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
  21. package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
  22. package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
  23. package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
  24. package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
  25. package/package.json +2 -2
  26. package/umd/index.umd.js +136 -77
  27. package/umd/index.umd.js.map +1 -1
@@ -1,10 +1,11 @@
1
1
  import type { string_parameter_name } from '../../../types/typeAliases';
2
2
  /**
3
- * Function `validateParameterName` will @@@
3
+ * Function `validateParameterName` will normalize and validate a parameter name for use in pipelines.
4
+ * It removes diacritics, emojis, and quotes, normalizes to camelCase, and checks for reserved names and invalid characters.
4
5
  *
5
- * @param parameterName @@@
6
- * @returns @@@
7
- * @throws {ParseError} @@@
6
+ * @param parameterName The parameter name to validate and normalize.
7
+ * @returns The validated and normalized parameter name.
8
+ * @throws {ParseError} If the parameter name is empty, reserved, or contains invalid characters.
8
9
  * @private within the repository
9
10
  */
10
11
  export declare function validateParameterName(parameterName: string): string_parameter_name;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@promptbook/pdf",
3
- "version": "0.92.0-21",
3
+ "version": "0.92.0-23",
4
4
  "description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
5
5
  "private": false,
6
6
  "sideEffects": false,
@@ -51,7 +51,7 @@
51
51
  "module": "./esm/index.es.js",
52
52
  "typings": "./esm/typings/src/_packages/pdf.index.d.ts",
53
53
  "peerDependencies": {
54
- "@promptbook/core": "0.92.0-21"
54
+ "@promptbook/core": "0.92.0-23"
55
55
  },
56
56
  "dependencies": {
57
57
  "crypto": "1.0.1",
package/umd/index.umd.js CHANGED
@@ -25,7 +25,7 @@
25
25
  * @generated
26
26
  * @see https://github.com/webgptorg/promptbook
27
27
  */
28
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-21';
28
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-23';
29
29
  /**
30
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
31
  * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
@@ -101,6 +101,12 @@
101
101
  * @public exported from `@promptbook/core`
102
102
  */
103
103
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
104
+ /**
105
+ * @@@
106
+ *
107
+ * @public exported from `@promptbook/core`
108
+ */
109
+ const BIG_DATASET_TRESHOLD = 50;
104
110
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
105
111
  /**
106
112
  * The maximum number of iterations for a loops
@@ -180,7 +186,7 @@
180
186
  const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
181
187
  // <- TODO: [πŸ§œβ€β™‚οΈ]
182
188
  /**
183
- * @@@
189
+ * Default settings for parsing and generating CSV files in Promptbook.
184
190
  *
185
191
  * @public exported from `@promptbook/core`
186
192
  */
@@ -191,19 +197,19 @@
191
197
  skipEmptyLines: true,
192
198
  });
193
199
  /**
194
- * @@@
200
+ * Controls whether verbose logging is enabled by default throughout the application.
195
201
  *
196
202
  * @public exported from `@promptbook/core`
197
203
  */
198
204
  let DEFAULT_IS_VERBOSE = false;
199
205
  /**
200
- * @@@
206
+ * Controls whether auto-installation of dependencies is enabled by default.
201
207
  *
202
208
  * @public exported from `@promptbook/core`
203
209
  */
204
210
  const DEFAULT_IS_AUTO_INSTALLED = false;
205
211
  /**
206
- * @@@
212
+ * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
207
213
  *
208
214
  * @private within the repository
209
215
  */
@@ -944,7 +950,7 @@
944
950
  * Function isValidJsonString will tell you if the string is valid JSON or not
945
951
  *
946
952
  * @param value The string to check
947
- * @returns True if the string is a valid JSON string, false otherwise
953
+ * @returns `true` if the string is a valid JSON string, false otherwise
948
954
  *
949
955
  * @public exported from `@promptbook/utils`
950
956
  */
@@ -1355,8 +1361,12 @@
1355
1361
  */
1356
1362
 
1357
1363
  /**
1358
- * @@@
1364
+ * Creates a deep clone of the given object
1365
+ *
1366
+ * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
1359
1367
  *
1368
+ * @param objectValue The object to clone.
1369
+ * @returns A deep, writable clone of the input object.
1360
1370
  * @public exported from `@promptbook/utils`
1361
1371
  */
1362
1372
  function deepClone(objectValue) {
@@ -3159,11 +3169,11 @@
3159
3169
  }
3160
3170
 
3161
3171
  /**
3162
- * Register is @@@
3172
+ * Global registry for storing and managing registered entities of a given type.
3163
3173
  *
3164
3174
  * Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
3165
3175
  *
3166
- * @private internal utility, exported are only signleton instances of this class
3176
+ * @private internal utility, exported are only singleton instances of this class
3167
3177
  */
3168
3178
  class $Register {
3169
3179
  constructor(registerName) {
@@ -3207,10 +3217,10 @@
3207
3217
  }
3208
3218
 
3209
3219
  /**
3210
- * @@@
3220
+ * Global registry for storing metadata about all available scrapers and converters.
3211
3221
  *
3212
- * Note: `$` is used to indicate that this interacts with the global scope
3213
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3222
+ * Note: `$` is used to indicate that this interacts with the global scope.
3223
+ * @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
3214
3224
  * @public exported from `@promptbook/core`
3215
3225
  */
3216
3226
  const $scrapersMetadataRegister = new $Register('scrapers_metadata');
@@ -4111,7 +4121,7 @@
4111
4121
  * Function to check if a string is valid CSV
4112
4122
  *
4113
4123
  * @param value The string to check
4114
- * @returns True if the string is a valid CSV string, false otherwise
4124
+ * @returns `true` if the string is a valid CSV string, false otherwise
4115
4125
  *
4116
4126
  * @public exported from `@promptbook/utils`
4117
4127
  */
@@ -4150,7 +4160,8 @@
4150
4160
  subvalueParsers: [
4151
4161
  {
4152
4162
  subvalueName: 'ROW',
4153
- async mapValues(value, outputParameterName, settings, mapCallback) {
4163
+ async mapValues(options) {
4164
+ const { value, outputParameterName, settings, mapCallback, onProgress } = options;
4154
4165
  const csv = csvParse(value, settings);
4155
4166
  if (csv.errors.length !== 0) {
4156
4167
  throw new CsvFormatError(spaceTrim__default["default"]((block) => `
@@ -4166,21 +4177,30 @@
4166
4177
  ${block(value)}
4167
4178
  `));
4168
4179
  }
4169
- const mappedData = await Promise.all(csv.data.map(async (row, index) => {
4180
+ const mappedData = [];
4181
+ const length = csv.data.length;
4182
+ for (let index = 0; index < length; index++) {
4183
+ const row = csv.data[index];
4170
4184
  if (row[outputParameterName]) {
4171
4185
  throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
4172
4186
  }
4173
- return {
4187
+ const mappedRow = {
4174
4188
  ...row,
4175
- [outputParameterName]: await mapCallback(row, index),
4189
+ [outputParameterName]: await mapCallback(row, index, length),
4176
4190
  };
4177
- }));
4191
+ mappedData.push(mappedRow);
4192
+ if (onProgress) {
4193
+ // Note: Report the CSV with all rows mapped so far
4194
+ await onProgress(papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4195
+ }
4196
+ }
4178
4197
  return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
4179
4198
  },
4180
4199
  },
4181
4200
  {
4182
4201
  subvalueName: 'CELL',
4183
- async mapValues(value, outputParameterName, settings, mapCallback) {
4202
+ async mapValues(options) {
4203
+ const { value, settings, mapCallback, onProgress } = options;
4184
4204
  const csv = csvParse(value, settings);
4185
4205
  if (csv.errors.length !== 0) {
4186
4206
  throw new CsvFormatError(spaceTrim__default["default"]((block) => `
@@ -4197,9 +4217,9 @@
4197
4217
  `));
4198
4218
  }
4199
4219
  const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
4200
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
4220
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
4201
4221
  const index = rowIndex * Object.keys(row).length + columnIndex;
4202
- return /* not await */ mapCallback({ [key]: value }, index);
4222
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
4203
4223
  }));
4204
4224
  }));
4205
4225
  return papaparse.unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
@@ -4267,14 +4287,15 @@
4267
4287
  subvalueParsers: [
4268
4288
  {
4269
4289
  subvalueName: 'LINE',
4270
- async mapValues(value, outputParameterName, settings, mapCallback) {
4290
+ async mapValues(options) {
4291
+ const { value, mapCallback, onProgress } = options;
4271
4292
  const lines = value.split('\n');
4272
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
4293
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
4273
4294
  // TODO: [🧠] Maybe option to skip empty line
4274
4295
  /* not await */ mapCallback({
4275
4296
  lineContent,
4276
4297
  // TODO: [🧠] Maybe also put here `lineNumber`
4277
- }, lineNumber)));
4298
+ }, lineNumber, array.length)));
4278
4299
  return mappedLines.join('\n');
4279
4300
  },
4280
4301
  },
@@ -4295,7 +4316,7 @@
4295
4316
  * Function to check if a string is valid XML
4296
4317
  *
4297
4318
  * @param value
4298
- * @returns True if the string is a valid XML string, false otherwise
4319
+ * @returns `true` if the string is a valid XML string, false otherwise
4299
4320
  *
4300
4321
  * @public exported from `@promptbook/utils`
4301
4322
  */
@@ -4357,13 +4378,13 @@
4357
4378
  */
4358
4379
 
4359
4380
  /**
4360
- * Maps available parameters to expected parameters
4381
+ * Maps available parameters to expected parameters for a pipeline task.
4361
4382
  *
4362
4383
  * The strategy is:
4363
- * 1) @@@
4364
- * 2) @@@
4384
+ * 1) First, match parameters by name where both available and expected.
4385
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
4365
4386
  *
4366
- * @throws {PipelineExecutionError} @@@
4387
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
4367
4388
  * @private within the repository used in `createPipelineExecutor`
4368
4389
  */
4369
4390
  function mapAvailableToExpectedParameters(options) {
@@ -5083,12 +5104,16 @@
5083
5104
  */
5084
5105
 
5085
5106
  /**
5086
- * @@@
5107
+ * Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
5108
+ * Handles format and subformat resolution, error handling, and progress reporting.
5109
+ *
5110
+ * @param options - Options for execution, including task details and progress callback.
5111
+ * @returns The result of the subvalue mapping or execution attempts.
5087
5112
  *
5088
5113
  * @private internal utility of `createPipelineExecutor`
5089
5114
  */
5090
5115
  async function executeFormatSubvalues(options) {
5091
- const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
5116
+ const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
5092
5117
  if (task.foreach === undefined) {
5093
5118
  return /* not await */ executeAttempts(options);
5094
5119
  }
@@ -5142,46 +5167,74 @@
5142
5167
  formatSettings = csvSettings;
5143
5168
  // <- TODO: [πŸ€Ήβ€β™‚οΈ] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
5144
5169
  }
5145
- const resultString = await subvalueParser.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
5146
- let mappedParameters;
5147
- // TODO: [πŸ€Ήβ€β™‚οΈ][πŸͺ‚] Limit to N concurrent executions
5148
- // TODO: When done [🐚] Report progress also for each subvalue here
5149
- try {
5150
- mappedParameters = mapAvailableToExpectedParameters({
5151
- expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5152
- availableParameters: subparameters,
5153
- });
5154
- }
5155
- catch (error) {
5156
- if (!(error instanceof PipelineExecutionError)) {
5157
- throw error;
5170
+ const resultString = await subvalueParser.mapValues({
5171
+ value: parameterValue,
5172
+ outputParameterName: task.foreach.outputSubparameterName,
5173
+ settings: formatSettings,
5174
+ onProgress(partialResultString) {
5175
+ return onProgress(Object.freeze({
5176
+ [task.resultingParameterName]: partialResultString,
5177
+ }));
5178
+ },
5179
+ async mapCallback(subparameters, index, length) {
5180
+ let mappedParameters;
5181
+ try {
5182
+ mappedParameters = mapAvailableToExpectedParameters({
5183
+ expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5184
+ availableParameters: subparameters,
5185
+ });
5158
5186
  }
5159
- throw new PipelineExecutionError(spaceTrim__default["default"]((block) => `
5160
- ${error.message}
5187
+ catch (error) {
5188
+ if (!(error instanceof PipelineExecutionError)) {
5189
+ throw error;
5190
+ }
5191
+ const highLevelError = new PipelineExecutionError(spaceTrim__default["default"]((block) => `
5192
+ ${error.message}
5161
5193
 
5162
- This is error in FOREACH command
5163
- You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5194
+ This is error in FOREACH command when mapping data
5195
+ You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5164
5196
 
5165
- ${block(pipelineIdentification)}
5166
- Subparameter index: ${index}
5167
- `));
5168
- }
5169
- const allSubparameters = {
5170
- ...parameters,
5171
- ...mappedParameters,
5172
- };
5173
- // Note: [πŸ‘¨β€πŸ‘¨β€πŸ‘§] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
5174
- Object.freeze(allSubparameters);
5175
- const subresultString = await executeAttempts({
5176
- ...options,
5177
- priority: priority + index,
5178
- parameters: allSubparameters,
5179
- pipelineIdentification: spaceTrim__default["default"]((block) => `
5180
- ${block(pipelineIdentification)}
5181
- Subparameter index: ${index}
5182
- `),
5183
- });
5184
- return subresultString;
5197
+ ${block(pipelineIdentification)}
5198
+ Subparameter index: ${index}
5199
+ `));
5200
+ if (length > BIG_DATASET_TRESHOLD) {
5201
+ console.error(highLevelError);
5202
+ return '~';
5203
+ }
5204
+ throw highLevelError;
5205
+ }
5206
+ const allSubparameters = {
5207
+ ...parameters,
5208
+ ...mappedParameters,
5209
+ };
5210
+ Object.freeze(allSubparameters);
5211
+ try {
5212
+ const subresultString = await executeAttempts({
5213
+ ...options,
5214
+ priority: priority + index,
5215
+ parameters: allSubparameters,
5216
+ pipelineIdentification: spaceTrim__default["default"]((block) => `
5217
+ ${block(pipelineIdentification)}
5218
+ Subparameter index: ${index}
5219
+ `),
5220
+ });
5221
+ return subresultString;
5222
+ }
5223
+ catch (error) {
5224
+ if (length > BIG_DATASET_TRESHOLD) {
5225
+ console.error(spaceTrim__default["default"]((block) => `
5226
+ Error in FOREACH command:
5227
+
5228
+ ${block(pipelineIdentification)}
5229
+
5230
+ ${block(pipelineIdentification)}
5231
+ Subparameter index: ${index}
5232
+ `));
5233
+ return '~';
5234
+ }
5235
+ throw error;
5236
+ }
5237
+ },
5185
5238
  });
5186
5239
  return resultString;
5187
5240
  }
@@ -5315,7 +5368,11 @@
5315
5368
  */
5316
5369
 
5317
5370
  /**
5318
- * @@@
5371
+ * Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
5372
+ * Ensures all reserved parameters are defined and throws if any are missing.
5373
+ *
5374
+ * @param options - Options including tools, pipeline, task, and context.
5375
+ * @returns An object containing all reserved parameters for the task.
5319
5376
  *
5320
5377
  * @private internal utility of `createPipelineExecutor`
5321
5378
  */
@@ -5348,18 +5405,16 @@
5348
5405
  }
5349
5406
 
5350
5407
  /**
5351
- * @@@
5408
+ * Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
5409
+ *
5410
+ * @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
5411
+ * @returns The output parameters produced by the task.
5352
5412
  *
5353
5413
  * @private internal utility of `createPipelineExecutor`
5354
5414
  */
5355
5415
  async function executeTask(options) {
5356
5416
  const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
5357
5417
  const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
5358
- await onProgress({
5359
- outputParameters: {
5360
- [currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
5361
- },
5362
- });
5363
5418
  // Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
5364
5419
  const usedParameterNames = extractParameterNamesFromTask(currentTask);
5365
5420
  const dependentParameterNames = new Set(currentTask.dependentParameterNames);
@@ -5434,6 +5489,7 @@
5434
5489
  preparedPipeline,
5435
5490
  tools,
5436
5491
  $executionReport,
5492
+ onProgress,
5437
5493
  pipelineIdentification,
5438
5494
  maxExecutionAttempts,
5439
5495
  maxParallelCount,
@@ -5486,9 +5542,12 @@
5486
5542
  }
5487
5543
 
5488
5544
  /**
5489
- * @@@
5545
+ * Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
5546
+ *
5547
+ * Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
5490
5548
  *
5491
- * Note: This is not a `PipelineExecutor` (which is binded with one exact pipeline), but a utility function of `createPipelineExecutor` which creates `PipelineExecutor`
5549
+ * @param options - Options for execution, including input parameters, pipeline, and callbacks.
5550
+ * @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
5492
5551
  *
5493
5552
  * @private internal utility of `createPipelineExecutor`
5494
5553
  */