@promptbook/website-crawler 0.92.0-22 โ†’ 0.92.0-24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/esm/index.es.js +115 -55
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/core.index.d.ts +6 -0
  4. package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
  5. package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
  6. package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
  7. package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
  8. package/esm/typings/src/config.d.ts +33 -11
  9. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
  10. package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
  11. package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +8 -3
  12. package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +10 -8
  13. package/esm/typings/src/formats/_common/FormatParser.d.ts +5 -3
  14. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +31 -6
  15. package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
  16. package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
  17. package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
  18. package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
  19. package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
  20. package/esm/typings/src/formfactors/index.d.ts +1 -1
  21. package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
  22. package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +4 -1
  23. package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
  24. package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
  25. package/esm/typings/src/types/typeAliases.d.ts +9 -7
  26. package/esm/typings/src/utils/$Register.d.ts +8 -7
  27. package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
  28. package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
  29. package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
  30. package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
  31. package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
  32. package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
  33. package/package.json +2 -2
  34. package/umd/index.umd.js +115 -55
  35. package/umd/index.umd.js.map +1 -1
package/esm/index.es.js CHANGED
@@ -29,7 +29,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
29
29
  * @generated
30
30
  * @see https://github.com/webgptorg/promptbook
31
31
  */
32
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-22';
32
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-24';
33
33
  /**
34
34
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
35
35
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
@@ -132,6 +132,21 @@ const DEFAULT_BOOK_TITLE = `โœจ Untitled Book`;
132
132
  * @public exported from `@promptbook/core`
133
133
  */
134
134
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
135
+ /**
136
+ * Threshold value that determines when a dataset is considered "big"
137
+ * and may require special handling or optimizations
138
+ *
139
+ * For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
140
+ *
141
+ * @public exported from `@promptbook/core`
142
+ */
143
+ const BIG_DATASET_TRESHOLD = 50;
144
+ /**
145
+ * Placeholder text used to represent a placeholder value of failed operation
146
+ *
147
+ * @public exported from `@promptbook/core`
148
+ */
149
+ const FAILED_VALUE_PLACEHOLDER = '!?';
135
150
  // <- TODO: [๐Ÿง ] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
136
151
  /**
137
152
  * The maximum number of iterations for a loops
@@ -211,7 +226,7 @@ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
211
226
  const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
212
227
  // <- TODO: [๐Ÿงœโ€โ™‚๏ธ]
213
228
  /**
214
- * @@@
229
+ * Default settings for parsing and generating CSV files in Promptbook.
215
230
  *
216
231
  * @public exported from `@promptbook/core`
217
232
  */
@@ -222,19 +237,19 @@ const DEFAULT_CSV_SETTINGS = Object.freeze({
222
237
  skipEmptyLines: true,
223
238
  });
224
239
  /**
225
- * @@@
240
+ * Controls whether verbose logging is enabled by default throughout the application.
226
241
  *
227
242
  * @public exported from `@promptbook/core`
228
243
  */
229
244
  let DEFAULT_IS_VERBOSE = false;
230
245
  /**
231
- * @@@
246
+ * Controls whether auto-installation of dependencies is enabled by default.
232
247
  *
233
248
  * @public exported from `@promptbook/core`
234
249
  */
235
250
  const DEFAULT_IS_AUTO_INSTALLED = false;
236
251
  /**
237
- * @@@
252
+ * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
238
253
  *
239
254
  * @private within the repository
240
255
  */
@@ -339,7 +354,8 @@ class UnexpectedError extends Error {
339
354
  }
340
355
 
341
356
  /**
342
- * @@@
357
+ * Safely retrieves the global scope object (window in browser, global in Node.js)
358
+ * regardless of the JavaScript environment in which the code is running
343
359
  *
344
360
  * Note: `$` is used to indicate that this function is not a pure function - it access global scope
345
361
  *
@@ -418,11 +434,11 @@ function normalizeTo_snake_case(text) {
418
434
  }
419
435
 
420
436
  /**
421
- * Register is @@@
437
+ * Global registry for storing and managing registered entities of a given type.
422
438
  *
423
439
  * Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
424
440
  *
425
- * @private internal utility, exported are only signleton instances of this class
441
+ * @private internal utility, exported are only singleton instances of this class
426
442
  */
427
443
  class $Register {
428
444
  constructor(registerName) {
@@ -466,10 +482,10 @@ class $Register {
466
482
  }
467
483
 
468
484
  /**
469
- * @@@
485
+ * Global registry for storing metadata about all available scrapers and converters.
470
486
  *
471
- * Note: `$` is used to indicate that this interacts with the global scope
472
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
487
+ * Note: `$` is used to indicate that this interacts with the global scope.
488
+ * @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
473
489
  * @public exported from `@promptbook/core`
474
490
  */
475
491
  const $scrapersMetadataRegister = new $Register('scrapers_metadata');
@@ -1110,7 +1126,7 @@ function assertsError(whatWasThrown) {
1110
1126
  * Function isValidJsonString will tell you if the string is valid JSON or not
1111
1127
  *
1112
1128
  * @param value The string to check
1113
- * @returns True if the string is a valid JSON string, false otherwise
1129
+ * @returns `true` if the string is a valid JSON string, false otherwise
1114
1130
  *
1115
1131
  * @public exported from `@promptbook/utils`
1116
1132
  */
@@ -1494,8 +1510,12 @@ function checkSerializableAsJson(options) {
1494
1510
  */
1495
1511
 
1496
1512
  /**
1497
- * @@@
1513
+ * Creates a deep clone of the given object
1514
+ *
1515
+ * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
1498
1516
  *
1517
+ * @param objectValue The object to clone.
1518
+ * @returns A deep, writable clone of the input object.
1499
1519
  * @public exported from `@promptbook/utils`
1500
1520
  */
1501
1521
  function deepClone(objectValue) {
@@ -2082,7 +2102,7 @@ class SimplePipelineCollection {
2082
2102
  /**
2083
2103
  * Constructs a pipeline collection from pipelines
2084
2104
  *
2085
- * @param pipelines @@@
2105
+ * @param pipelines Array of pipeline JSON objects to include in the collection
2086
2106
  *
2087
2107
  * Note: During the construction logic of all pipelines are validated
2088
2108
  * Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
@@ -4113,7 +4133,7 @@ function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO:
4113
4133
  * Function to check if a string is valid CSV
4114
4134
  *
4115
4135
  * @param value The string to check
4116
- * @returns True if the string is a valid CSV string, false otherwise
4136
+ * @returns `true` if the string is a valid CSV string, false otherwise
4117
4137
  *
4118
4138
  * @public exported from `@promptbook/utils`
4119
4139
  */
@@ -4170,18 +4190,28 @@ const CsvFormatParser = {
4170
4190
  `));
4171
4191
  }
4172
4192
  const mappedData = [];
4173
- for (let index = 0; index < csv.data.length; index++) {
4193
+ const length = csv.data.length;
4194
+ for (let index = 0; index < length; index++) {
4174
4195
  const row = csv.data[index];
4175
4196
  if (row[outputParameterName]) {
4176
4197
  throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
4177
4198
  }
4178
4199
  const mappedRow = {
4179
4200
  ...row,
4180
- [outputParameterName]: await mapCallback(row, index),
4201
+ [outputParameterName]: await mapCallback(row, index, length),
4181
4202
  };
4182
4203
  mappedData.push(mappedRow);
4183
4204
  if (onProgress) {
4184
4205
  // Note: Report the CSV with all rows mapped so far
4206
+ /*
4207
+ !!!!
4208
+ // Report progress with updated value
4209
+ const progressData = mappedData.map((row, i) =>
4210
+ i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
4211
+ );
4212
+
4213
+
4214
+ */
4185
4215
  await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4186
4216
  }
4187
4217
  }
@@ -4208,9 +4238,9 @@ const CsvFormatParser = {
4208
4238
  `));
4209
4239
  }
4210
4240
  const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
4211
- return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
4241
+ return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
4212
4242
  const index = rowIndex * Object.keys(row).length + columnIndex;
4213
- return /* not await */ mapCallback({ [key]: value }, index);
4243
+ return /* not await */ mapCallback({ [key]: value }, index, array.length);
4214
4244
  }));
4215
4245
  }));
4216
4246
  return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
@@ -4281,12 +4311,12 @@ const TextFormatParser = {
4281
4311
  async mapValues(options) {
4282
4312
  const { value, mapCallback, onProgress } = options;
4283
4313
  const lines = value.split('\n');
4284
- const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
4314
+ const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
4285
4315
  // TODO: [๐Ÿง ] Maybe option to skip empty line
4286
4316
  /* not await */ mapCallback({
4287
4317
  lineContent,
4288
4318
  // TODO: [๐Ÿง ] Maybe also put here `lineNumber`
4289
- }, lineNumber)));
4319
+ }, lineNumber, array.length)));
4290
4320
  return mappedLines.join('\n');
4291
4321
  },
4292
4322
  },
@@ -4307,7 +4337,7 @@ const TextFormatParser = {
4307
4337
  * Function to check if a string is valid XML
4308
4338
  *
4309
4339
  * @param value
4310
- * @returns True if the string is a valid XML string, false otherwise
4340
+ * @returns `true` if the string is a valid XML string, false otherwise
4311
4341
  *
4312
4342
  * @public exported from `@promptbook/utils`
4313
4343
  */
@@ -4369,13 +4399,13 @@ const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser,
4369
4399
  */
4370
4400
 
4371
4401
  /**
4372
- * Maps available parameters to expected parameters
4402
+ * Maps available parameters to expected parameters for a pipeline task.
4373
4403
  *
4374
4404
  * The strategy is:
4375
- * 1) @@@
4376
- * 2) @@@
4405
+ * 1) First, match parameters by name where both available and expected.
4406
+ * 2) Then, if there are unmatched expected and available parameters, map them by order.
4377
4407
  *
4378
- * @throws {PipelineExecutionError} @@@
4408
+ * @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
4379
4409
  * @private within the repository used in `createPipelineExecutor`
4380
4410
  */
4381
4411
  function mapAvailableToExpectedParameters(options) {
@@ -5095,7 +5125,11 @@ async function executeAttempts(options) {
5095
5125
  */
5096
5126
 
5097
5127
  /**
5098
- * @@@
5128
+ * Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
5129
+ * Handles format and subformat resolution, error handling, and progress reporting.
5130
+ *
5131
+ * @param options - Options for execution, including task details and progress callback.
5132
+ * @returns The result of the subvalue mapping or execution attempts.
5099
5133
  *
5100
5134
  * @private internal utility of `createPipelineExecutor`
5101
5135
  */
@@ -5160,15 +5194,11 @@ async function executeFormatSubvalues(options) {
5160
5194
  settings: formatSettings,
5161
5195
  onProgress(partialResultString) {
5162
5196
  return onProgress(Object.freeze({
5163
- [task.resultingParameterName]:
5164
- // <- Note: [๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ง] No need to detect parameter collision here because pipeline checks logic consistency during construction
5165
- partialResultString,
5197
+ [task.resultingParameterName]: partialResultString,
5166
5198
  }));
5167
5199
  },
5168
- async mapCallback(subparameters, index) {
5200
+ async mapCallback(subparameters, index, length) {
5169
5201
  let mappedParameters;
5170
- // TODO: [๐Ÿคนโ€โ™‚๏ธ][๐Ÿช‚] Limit to N concurrent executions
5171
- // TODO: When done [๐Ÿš] Report progress also for each subvalue here
5172
5202
  try {
5173
5203
  mappedParameters = mapAvailableToExpectedParameters({
5174
5204
  expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
@@ -5179,32 +5209,52 @@ async function executeFormatSubvalues(options) {
5179
5209
  if (!(error instanceof PipelineExecutionError)) {
5180
5210
  throw error;
5181
5211
  }
5182
- throw new PipelineExecutionError(spaceTrim$1((block) => `
5183
- ${error.message}
5212
+ const highLevelError = new PipelineExecutionError(spaceTrim$1((block) => `
5213
+ ${error.message}
5184
5214
 
5185
- This is error in FOREACH command
5186
- You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5215
+ This is error in FOREACH command when mapping data
5216
+ You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5187
5217
 
5188
- ${block(pipelineIdentification)}
5189
- Subparameter index: ${index}
5190
- `));
5218
+ ${block(pipelineIdentification)}
5219
+ Subparameter index: ${index}
5220
+ `));
5221
+ if (length > BIG_DATASET_TRESHOLD) {
5222
+ console.error(highLevelError);
5223
+ return FAILED_VALUE_PLACEHOLDER;
5224
+ }
5225
+ throw highLevelError;
5191
5226
  }
5192
5227
  const allSubparameters = {
5193
5228
  ...parameters,
5194
5229
  ...mappedParameters,
5195
5230
  };
5196
- // Note: [๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ง] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
5197
5231
  Object.freeze(allSubparameters);
5198
- const subresultString = await executeAttempts({
5199
- ...options,
5200
- priority: priority + index,
5201
- parameters: allSubparameters,
5202
- pipelineIdentification: spaceTrim$1((block) => `
5203
- ${block(pipelineIdentification)}
5204
- Subparameter index: ${index}
5205
- `),
5206
- });
5207
- return subresultString;
5232
+ try {
5233
+ const subresultString = await executeAttempts({
5234
+ ...options,
5235
+ priority: priority + index,
5236
+ parameters: allSubparameters,
5237
+ pipelineIdentification: spaceTrim$1((block) => `
5238
+ ${block(pipelineIdentification)}
5239
+ Subparameter index: ${index}
5240
+ `),
5241
+ });
5242
+ return subresultString;
5243
+ }
5244
+ catch (error) {
5245
+ if (length > BIG_DATASET_TRESHOLD) {
5246
+ console.error(spaceTrim$1((block) => `
5247
+ Error in FOREACH command:
5248
+
5249
+ ${block(pipelineIdentification)}
5250
+
5251
+ ${block(pipelineIdentification)}
5252
+ Subparameter index: ${index}
5253
+ `));
5254
+ return FAILED_VALUE_PLACEHOLDER;
5255
+ }
5256
+ throw error;
5257
+ }
5208
5258
  },
5209
5259
  });
5210
5260
  return resultString;
@@ -5339,7 +5389,11 @@ async function getKnowledgeForTask(options) {
5339
5389
  */
5340
5390
 
5341
5391
  /**
5342
- * @@@
5392
+ * Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
5393
+ * Ensures all reserved parameters are defined and throws if any are missing.
5394
+ *
5395
+ * @param options - Options including tools, pipeline, task, and context.
5396
+ * @returns An object containing all reserved parameters for the task.
5343
5397
  *
5344
5398
  * @private internal utility of `createPipelineExecutor`
5345
5399
  */
@@ -5372,7 +5426,10 @@ async function getReservedParametersForTask(options) {
5372
5426
  }
5373
5427
 
5374
5428
  /**
5375
- * @@@
5429
+ * Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
5430
+ *
5431
+ * @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
5432
+ * @returns The output parameters produced by the task.
5376
5433
  *
5377
5434
  * @private internal utility of `createPipelineExecutor`
5378
5435
  */
@@ -5506,9 +5563,12 @@ function filterJustOutputParameters(options) {
5506
5563
  }
5507
5564
 
5508
5565
  /**
5509
- * @@@
5566
+ * Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
5567
+ *
5568
+ * Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
5510
5569
  *
5511
- * Note: This is not a `PipelineExecutor` (which is binded with one exact pipeline), but a utility function of `createPipelineExecutor` which creates `PipelineExecutor`
5570
+ * @param options - Options for execution, including input parameters, pipeline, and callbacks.
5571
+ * @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
5512
5572
  *
5513
5573
  * @private internal utility of `createPipelineExecutor`
5514
5574
  */