@promptbook/pdf 0.92.0-21 β†’ 0.92.0-23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/esm/index.es.js +136 -77
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/core.index.d.ts +6 -0
  4. package/esm/typings/src/_packages/types.index.d.ts +2 -0
  5. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
  6. package/esm/typings/src/config.d.ts +29 -11
  7. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
  8. package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
  9. package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +15 -3
  10. package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +10 -8
  11. package/esm/typings/src/formats/_common/FormatParser.d.ts +5 -3
  12. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +40 -5
  13. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
  14. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
  15. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
  16. package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +4 -1
  17. package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
  18. package/esm/typings/src/types/typeAliases.d.ts +9 -7
  19. package/esm/typings/src/utils/$Register.d.ts +8 -7
  20. package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
  21. package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
  22. package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
  23. package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
  24. package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
  25. package/package.json +2 -2
  26. package/umd/index.umd.js +136 -77
  27. package/umd/index.umd.js.map +1 -1
package/esm/index.es.js CHANGED
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-21';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-23';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [πŸ’ž] Ignore a discrepancy between file name and entity name
@@ -102,6 +102,12 @@ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`;
102
102
  * @public exported from `@promptbook/core`
103
103
  */
104
104
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
105
+ /**
106
+ * @@@
107
+ *
108
+ * @public exported from `@promptbook/core`
109
+ */
110
+ const BIG_DATASET_TRESHOLD = 50;
105
111
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
106
112
  /**
107
113
  * The maximum number of iterations for a loops
@@ -181,7 +187,7 @@ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
181
187
  const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
182
188
  // <- TODO: [πŸ§œβ€β™‚οΈ]
183
189
  /**
184
- * @@@
190
+ * Default settings for parsing and generating CSV files in Promptbook.
185
191
  *
186
192
  * @public exported from `@promptbook/core`
187
193
  */
@@ -192,19 +198,19 @@ const DEFAULT_CSV_SETTINGS = Object.freeze({
192
198
  skipEmptyLines: true,
193
199
  });
194
200
  /**
195
- * @@@
201
+ * Controls whether verbose logging is enabled by default throughout the application.
196
202
  *
197
203
  * @public exported from `@promptbook/core`
198
204
  */
199
205
  let DEFAULT_IS_VERBOSE = false;
200
206
  /**
201
- * @@@
207
+ * Controls whether auto-installation of dependencies is enabled by default.
202
208
  *
203
209
  * @public exported from `@promptbook/core`
204
210
  */
205
211
  const DEFAULT_IS_AUTO_INSTALLED = false;
206
212
  /**
207
- * @@@
213
+ * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
208
214
  *
209
215
  * @private within the repository
210
216
  */
@@ -945,7 +951,7 @@ function assertsError(whatWasThrown) {
945
951
  * Function isValidJsonString will tell you if the string is valid JSON or not
946
952
  *
947
953
  * @param value The string to check
948
- * @returns True if the string is a valid JSON string, false otherwise
954
+ * @returns `true` if the string is a valid JSON string, false otherwise
949
955
  *
950
956
  * @public exported from `@promptbook/utils`
951
957
  */
@@ -1356,8 +1362,12 @@ function checkSerializableAsJson(options) {
1356
1362
  */
1357
1363
 
1358
1364
  /**
1359
- * @@@
1365
+ * Creates a deep clone of the given object
1366
+ *
1367
+ * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
1360
1368
  *
1369
+ * @param objectValue The object to clone.
1370
+ * @returns A deep, writable clone of the input object.
1361
1371
  * @public exported from `@promptbook/utils`
1362
1372
  */
1363
1373
  function deepClone(objectValue) {
@@ -3160,11 +3170,11 @@ function normalizeTo_snake_case(text) {
3160
3170
  }
3161
3171
 
3162
3172
  /**
3163
- * Register is @@@
3173
+ * Global registry for storing and managing registered entities of a given type.
3164
3174
  *
3165
3175
  * Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
3166
3176
  *
3167
- * @private internal utility, exported are only signleton instances of this class
3177
+ * @private internal utility, exported are only singleton instances of this class
3168
3178
  */
3169
3179
  class $Register {
3170
3180
  constructor(registerName) {
@@ -3208,10 +3218,10 @@ class $Register {
3208
3218
  }
3209
3219
 
3210
3220
  /**
3211
- * @@@
3221
+ * Global registry for storing metadata about all available scrapers and converters.
3212
3222
  *
3213
- * Note: `$` is used to indicate that this interacts with the global scope
3214
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3223
+ * Note: `$` is used to indicate that this interacts with the global scope.
3224
+ * @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
3215
3225
  * @public exported from `@promptbook/core`
3216
3226
  */
3217
3227
  const $scrapersMetadataRegister = new $Register('scrapers_metadata');
@@ -4112,7 +4122,7 @@ function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO:
4112
4122
  * Function to check if a string is valid CSV
4113
4123
  *
4114
4124
  * @param value The string to check
4115
- * @returns True if the string is a valid CSV string, false otherwise
4125
+ * @returns `true` if the string is a valid CSV string, false otherwise
4116
4126
  *
4117
4127
  * @public exported from `@promptbook/utils`
4118
4128
  */
@@ -4151,7 +4161,8 @@ const CsvFormatParser = {
4151
4161
  subvalueParsers: [
4152
4162
  {
4153
4163
  subvalueName: 'ROW',
4154
- async mapValues(value, outputParameterName, settings, mapCallback) {
4164
+ async mapValues(options) {
4165
+ const { value, outputParameterName, settings, mapCallback, onProgress } = options;
4155
4166
  const csv = csvParse(value, settings);
4156
4167
  if (csv.errors.length !== 0) {
4157
4168
  throw new CsvFormatError(spaceTrim((block) => `
@@ -4167,21 +4178,30 @@ const CsvFormatParser = {
4167
4178
  ${block(value)}
4168
4179
  `));
4169
4180
  }
4170
- const mappedData = await Promise.all(csv.data.map(async (row, index) => {
4181
+ const mappedData = [];
4182
+ const length = csv.data.length;
4183
+ for (let index = 0; index < length; index++) {
4184
+ const row = csv.data[index];
4171
4185
  if (row[outputParameterName]) {
4172
4186
  throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
4173
4187
  }
4174
- return {
4188
+ const mappedRow = {
4175
4189
  ...row,
4176
- [outputParameterName]: await mapCallback(row, index),
4190
+ [outputParameterName]: await mapCallback(row, index, length),
4177
4191
  };
4178
- }));
4192
+ mappedData.push(mappedRow);
4193
+ if (onProgress) {
4194
+ // Note: Report the CSV with all rows mapped so far
4195
+ await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4196
+ }
4197
+ }
4179
4198
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
4180
4199
  },
4181
4200
  },
4182
4201
  {
4183
4202
  subvalueName: 'CELL',
4184
- async mapValues(value, outputParameterName, settings, mapCallback) {
4203
+ async mapValues(options) {
4204
+ const { value, settings, mapCallback, onProgress } = options;
4185
4205
  const csv = csvParse(value, settings);
4186
4206
  if (csv.errors.length !== 0) {
4187
4207
  throw new CsvFormatError(spaceTrim((block) => `
@@ -4198,9 +4218,9 @@ const CsvFormatParser = {
4198
4218
  `));
4199
4219
  }
4200
4220
  const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
4201
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
4221
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
4202
4222
  const index = rowIndex * Object.keys(row).length + columnIndex;
4203
- return /* not await */ mapCallback({ [key]: value }, index);
4223
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
4204
4224
  }));
4205
4225
  }));
4206
4226
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
@@ -4268,14 +4288,15 @@ const TextFormatParser = {
4268
4288
  subvalueParsers: [
4269
4289
  {
4270
4290
  subvalueName: 'LINE',
4271
- async mapValues(value, outputParameterName, settings, mapCallback) {
4291
+ async mapValues(options) {
4292
+ const { value, mapCallback, onProgress } = options;
4272
4293
  const lines = value.split('\n');
4273
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
4294
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
4274
4295
  // TODO: [🧠] Maybe option to skip empty line
4275
4296
  /* not await */ mapCallback({
4276
4297
  lineContent,
4277
4298
  // TODO: [🧠] Maybe also put here `lineNumber`
4278
- }, lineNumber)));
4299
+ }, lineNumber, array.length)));
4279
4300
  return mappedLines.join('\n');
4280
4301
  },
4281
4302
  },
@@ -4296,7 +4317,7 @@ const TextFormatParser = {
4296
4317
  * Function to check if a string is valid XML
4297
4318
  *
4298
4319
  * @param value
4299
- * @returns True if the string is a valid XML string, false otherwise
4320
+ * @returns `true` if the string is a valid XML string, false otherwise
4300
4321
  *
4301
4322
  * @public exported from `@promptbook/utils`
4302
4323
  */
@@ -4358,13 +4379,13 @@ const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser,
4358
4379
  */
4359
4380
 
4360
4381
  /**
4361
- * Maps available parameters to expected parameters
4382
+ * Maps available parameters to expected parameters for a pipeline task.
4362
4383
  *
4363
4384
  * The strategy is:
4364
- * 1) @@@
4365
- * 2) @@@
4385
+ * 1) First, match parameters by name where both available and expected.
4386
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
4366
4387
  *
4367
- * @throws {PipelineExecutionError} @@@
4388
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
4368
4389
  * @private within the repository used in `createPipelineExecutor`
4369
4390
  */
4370
4391
  function mapAvailableToExpectedParameters(options) {
@@ -5084,12 +5105,16 @@ async function executeAttempts(options) {
5084
5105
  */
5085
5106
 
5086
5107
  /**
5087
- * @@@
5108
+ * Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
5109
+ * Handles format and subformat resolution, error handling, and progress reporting.
5110
+ *
5111
+ * @param options - Options for execution, including task details and progress callback.
5112
+ * @returns The result of the subvalue mapping or execution attempts.
5088
5113
  *
5089
5114
  * @private internal utility of `createPipelineExecutor`
5090
5115
  */
5091
5116
  async function executeFormatSubvalues(options) {
5092
- const { task, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification } = options;
5117
+ const { task, jokerParameterNames, parameters, priority, csvSettings, onProgress, pipelineIdentification } = options;
5093
5118
  if (task.foreach === undefined) {
5094
5119
  return /* not await */ executeAttempts(options);
5095
5120
  }
@@ -5143,46 +5168,74 @@ async function executeFormatSubvalues(options) {
5143
5168
  formatSettings = csvSettings;
5144
5169
  // <- TODO: [πŸ€Ήβ€β™‚οΈ] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
5145
5170
  }
5146
- const resultString = await subvalueParser.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
5147
- let mappedParameters;
5148
- // TODO: [πŸ€Ήβ€β™‚οΈ][πŸͺ‚] Limit to N concurrent executions
5149
- // TODO: When done [🐚] Report progress also for each subvalue here
5150
- try {
5151
- mappedParameters = mapAvailableToExpectedParameters({
5152
- expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5153
- availableParameters: subparameters,
5154
- });
5155
- }
5156
- catch (error) {
5157
- if (!(error instanceof PipelineExecutionError)) {
5158
- throw error;
5171
+ const resultString = await subvalueParser.mapValues({
5172
+ value: parameterValue,
5173
+ outputParameterName: task.foreach.outputSubparameterName,
5174
+ settings: formatSettings,
5175
+ onProgress(partialResultString) {
5176
+ return onProgress(Object.freeze({
5177
+ [task.resultingParameterName]: partialResultString,
5178
+ }));
5179
+ },
5180
+ async mapCallback(subparameters, index, length) {
5181
+ let mappedParameters;
5182
+ try {
5183
+ mappedParameters = mapAvailableToExpectedParameters({
5184
+ expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
5185
+ availableParameters: subparameters,
5186
+ });
5159
5187
  }
5160
- throw new PipelineExecutionError(spaceTrim((block) => `
5161
- ${error.message}
5188
+ catch (error) {
5189
+ if (!(error instanceof PipelineExecutionError)) {
5190
+ throw error;
5191
+ }
5192
+ const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
5193
+ ${error.message}
5162
5194
 
5163
- This is error in FOREACH command
5164
- You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5195
+ This is error in FOREACH command when mapping data
5196
+ You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5165
5197
 
5166
- ${block(pipelineIdentification)}
5167
- Subparameter index: ${index}
5168
- `));
5169
- }
5170
- const allSubparameters = {
5171
- ...parameters,
5172
- ...mappedParameters,
5173
- };
5174
- // Note: [πŸ‘¨β€πŸ‘¨β€πŸ‘§] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
5175
- Object.freeze(allSubparameters);
5176
- const subresultString = await executeAttempts({
5177
- ...options,
5178
- priority: priority + index,
5179
- parameters: allSubparameters,
5180
- pipelineIdentification: spaceTrim((block) => `
5181
- ${block(pipelineIdentification)}
5182
- Subparameter index: ${index}
5183
- `),
5184
- });
5185
- return subresultString;
5198
+ ${block(pipelineIdentification)}
5199
+ Subparameter index: ${index}
5200
+ `));
5201
+ if (length > BIG_DATASET_TRESHOLD) {
5202
+ console.error(highLevelError);
5203
+ return '~';
5204
+ }
5205
+ throw highLevelError;
5206
+ }
5207
+ const allSubparameters = {
5208
+ ...parameters,
5209
+ ...mappedParameters,
5210
+ };
5211
+ Object.freeze(allSubparameters);
5212
+ try {
5213
+ const subresultString = await executeAttempts({
5214
+ ...options,
5215
+ priority: priority + index,
5216
+ parameters: allSubparameters,
5217
+ pipelineIdentification: spaceTrim((block) => `
5218
+ ${block(pipelineIdentification)}
5219
+ Subparameter index: ${index}
5220
+ `),
5221
+ });
5222
+ return subresultString;
5223
+ }
5224
+ catch (error) {
5225
+ if (length > BIG_DATASET_TRESHOLD) {
5226
+ console.error(spaceTrim((block) => `
5227
+ Error in FOREACH command:
5228
+
5229
+ ${block(pipelineIdentification)}
5230
+
5231
+ ${block(pipelineIdentification)}
5232
+ Subparameter index: ${index}
5233
+ `));
5234
+ return '~';
5235
+ }
5236
+ throw error;
5237
+ }
5238
+ },
5186
5239
  });
5187
5240
  return resultString;
5188
5241
  }
@@ -5316,7 +5369,11 @@ async function getKnowledgeForTask(options) {
5316
5369
  */
5317
5370
 
5318
5371
  /**
5319
- * @@@
5372
+ * Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
5373
+ * Ensures all reserved parameters are defined and throws if any are missing.
5374
+ *
5375
+ * @param options - Options including tools, pipeline, task, and context.
5376
+ * @returns An object containing all reserved parameters for the task.
5320
5377
  *
5321
5378
  * @private internal utility of `createPipelineExecutor`
5322
5379
  */
@@ -5349,18 +5406,16 @@ async function getReservedParametersForTask(options) {
5349
5406
  }
5350
5407
 
5351
5408
  /**
5352
- * @@@
5409
+ * Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
5410
+ *
5411
+ * @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
5412
+ * @returns The output parameters produced by the task.
5353
5413
  *
5354
5414
  * @private internal utility of `createPipelineExecutor`
5355
5415
  */
5356
5416
  async function executeTask(options) {
5357
5417
  const { currentTask, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, maxExecutionAttempts, maxParallelCount, csvSettings, isVerbose, rootDirname, cacheDirname, intermediateFilesStrategy, isAutoInstalled, isNotPreparedWarningSupressed, } = options;
5358
5418
  const priority = preparedPipeline.tasks.length - preparedPipeline.tasks.indexOf(currentTask);
5359
- await onProgress({
5360
- outputParameters: {
5361
- [currentTask.resultingParameterName]: '', // <- TODO: [🧠] What is the best value here?
5362
- },
5363
- });
5364
5419
  // Note: Check consistency of used and dependent parameters which was also done in `validatePipeline`, but it’s good to doublecheck
5365
5420
  const usedParameterNames = extractParameterNamesFromTask(currentTask);
5366
5421
  const dependentParameterNames = new Set(currentTask.dependentParameterNames);
@@ -5435,6 +5490,7 @@ async function executeTask(options) {
5435
5490
  preparedPipeline,
5436
5491
  tools,
5437
5492
  $executionReport,
5493
+ onProgress,
5438
5494
  pipelineIdentification,
5439
5495
  maxExecutionAttempts,
5440
5496
  maxParallelCount,
@@ -5487,9 +5543,12 @@ function filterJustOutputParameters(options) {
5487
5543
  }
5488
5544
 
5489
5545
  /**
5490
- * @@@
5546
+ * Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
5547
+ *
5548
+ * Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
5491
5549
  *
5492
- * Note: This is not a `PipelineExecutor` (which is binded with one exact pipeline), but a utility function of `createPipelineExecutor` which creates `PipelineExecutor`
5550
+ * @param options - Options for execution, including input parameters, pipeline, and callbacks.
5551
+ * @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
5493
5552
  *
5494
5553
  * @private internal utility of `createPipelineExecutor`
5495
5554
  */