@promptbook/website-crawler 0.92.0-22 โ 0.92.0-23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +94 -53
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +6 -0
- package/esm/typings/src/commands/FOREACH/ForeachJson.d.ts +6 -6
- package/esm/typings/src/config.d.ts +29 -11
- package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +12 -9
- package/esm/typings/src/execution/createPipelineExecutor/20-executeTask.d.ts +11 -8
- package/esm/typings/src/execution/createPipelineExecutor/30-executeFormatSubvalues.d.ts +8 -3
- package/esm/typings/src/execution/createPipelineExecutor/getReservedParametersForTask.d.ts +10 -8
- package/esm/typings/src/formats/_common/FormatParser.d.ts +5 -3
- package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +31 -6
- package/esm/typings/src/formats/csv/utils/isValidCsvString.d.ts +1 -1
- package/esm/typings/src/formats/json/utils/isValidJsonString.d.ts +1 -1
- package/esm/typings/src/formats/xml/utils/isValidXmlString.d.ts +1 -1
- package/esm/typings/src/llm-providers/_common/register/LlmToolsOptions.d.ts +4 -1
- package/esm/typings/src/scrapers/_common/register/$scrapersMetadataRegister.d.ts +3 -3
- package/esm/typings/src/types/typeAliases.d.ts +9 -7
- package/esm/typings/src/utils/$Register.d.ts +8 -7
- package/esm/typings/src/utils/parameters/mapAvailableToExpectedParameters.d.ts +7 -7
- package/esm/typings/src/utils/serialization/clonePipeline.d.ts +4 -3
- package/esm/typings/src/utils/serialization/deepClone.d.ts +5 -1
- package/esm/typings/src/utils/validators/javascriptName/isValidJavascriptName.d.ts +3 -3
- package/esm/typings/src/utils/validators/parameterName/validateParameterName.d.ts +5 -4
- package/package.json +2 -2
- package/umd/index.umd.js +94 -53
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -29,7 +29,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
29
29
|
* @generated
|
|
30
30
|
* @see https://github.com/webgptorg/promptbook
|
|
31
31
|
*/
|
|
32
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-
|
|
32
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-23';
|
|
33
33
|
/**
|
|
34
34
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
35
35
|
* Note: [๐] Ignore a discrepancy between file name and entity name
|
|
@@ -132,6 +132,12 @@ const DEFAULT_BOOK_TITLE = `โจ Untitled Book`;
|
|
|
132
132
|
* @public exported from `@promptbook/core`
|
|
133
133
|
*/
|
|
134
134
|
const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
|
|
135
|
+
/**
|
|
136
|
+
* @@@
|
|
137
|
+
*
|
|
138
|
+
* @public exported from `@promptbook/core`
|
|
139
|
+
*/
|
|
140
|
+
const BIG_DATASET_TRESHOLD = 50;
|
|
135
141
|
// <- TODO: [๐ง ] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
|
|
136
142
|
/**
|
|
137
143
|
* The maximum number of iterations for a loops
|
|
@@ -211,7 +217,7 @@ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
|
|
|
211
217
|
const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache';
|
|
212
218
|
// <- TODO: [๐งโโ๏ธ]
|
|
213
219
|
/**
|
|
214
|
-
*
|
|
220
|
+
* Default settings for parsing and generating CSV files in Promptbook.
|
|
215
221
|
*
|
|
216
222
|
* @public exported from `@promptbook/core`
|
|
217
223
|
*/
|
|
@@ -222,19 +228,19 @@ const DEFAULT_CSV_SETTINGS = Object.freeze({
|
|
|
222
228
|
skipEmptyLines: true,
|
|
223
229
|
});
|
|
224
230
|
/**
|
|
225
|
-
*
|
|
231
|
+
* Controls whether verbose logging is enabled by default throughout the application.
|
|
226
232
|
*
|
|
227
233
|
* @public exported from `@promptbook/core`
|
|
228
234
|
*/
|
|
229
235
|
let DEFAULT_IS_VERBOSE = false;
|
|
230
236
|
/**
|
|
231
|
-
*
|
|
237
|
+
* Controls whether auto-installation of dependencies is enabled by default.
|
|
232
238
|
*
|
|
233
239
|
* @public exported from `@promptbook/core`
|
|
234
240
|
*/
|
|
235
241
|
const DEFAULT_IS_AUTO_INSTALLED = false;
|
|
236
242
|
/**
|
|
237
|
-
*
|
|
243
|
+
* Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency.
|
|
238
244
|
*
|
|
239
245
|
* @private within the repository
|
|
240
246
|
*/
|
|
@@ -418,11 +424,11 @@ function normalizeTo_snake_case(text) {
|
|
|
418
424
|
}
|
|
419
425
|
|
|
420
426
|
/**
|
|
421
|
-
*
|
|
427
|
+
* Global registry for storing and managing registered entities of a given type.
|
|
422
428
|
*
|
|
423
429
|
* Note: `$` is used to indicate that this function is not a pure function - it accesses and adds variables in global scope.
|
|
424
430
|
*
|
|
425
|
-
* @private internal utility, exported are only
|
|
431
|
+
* @private internal utility, exported are only singleton instances of this class
|
|
426
432
|
*/
|
|
427
433
|
class $Register {
|
|
428
434
|
constructor(registerName) {
|
|
@@ -466,10 +472,10 @@ class $Register {
|
|
|
466
472
|
}
|
|
467
473
|
|
|
468
474
|
/**
|
|
469
|
-
*
|
|
475
|
+
* Global registry for storing metadata about all available scrapers and converters.
|
|
470
476
|
*
|
|
471
|
-
* Note: `$` is used to indicate that this interacts with the global scope
|
|
472
|
-
* @singleton Only one instance of each register is created per build, but
|
|
477
|
+
* Note: `$` is used to indicate that this interacts with the global scope.
|
|
478
|
+
* @singleton Only one instance of each register is created per build, but there can be more in different contexts (e.g., tests).
|
|
473
479
|
* @public exported from `@promptbook/core`
|
|
474
480
|
*/
|
|
475
481
|
const $scrapersMetadataRegister = new $Register('scrapers_metadata');
|
|
@@ -1110,7 +1116,7 @@ function assertsError(whatWasThrown) {
|
|
|
1110
1116
|
* Function isValidJsonString will tell you if the string is valid JSON or not
|
|
1111
1117
|
*
|
|
1112
1118
|
* @param value The string to check
|
|
1113
|
-
* @returns
|
|
1119
|
+
* @returns `true` if the string is a valid JSON string, false otherwise
|
|
1114
1120
|
*
|
|
1115
1121
|
* @public exported from `@promptbook/utils`
|
|
1116
1122
|
*/
|
|
@@ -1494,8 +1500,12 @@ function checkSerializableAsJson(options) {
|
|
|
1494
1500
|
*/
|
|
1495
1501
|
|
|
1496
1502
|
/**
|
|
1497
|
-
*
|
|
1503
|
+
* Creates a deep clone of the given object
|
|
1504
|
+
*
|
|
1505
|
+
* Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types.
|
|
1498
1506
|
*
|
|
1507
|
+
* @param objectValue The object to clone.
|
|
1508
|
+
* @returns A deep, writable clone of the input object.
|
|
1499
1509
|
* @public exported from `@promptbook/utils`
|
|
1500
1510
|
*/
|
|
1501
1511
|
function deepClone(objectValue) {
|
|
@@ -4113,7 +4123,7 @@ function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO:
|
|
|
4113
4123
|
* Function to check if a string is valid CSV
|
|
4114
4124
|
*
|
|
4115
4125
|
* @param value The string to check
|
|
4116
|
-
* @returns
|
|
4126
|
+
* @returns `true` if the string is a valid CSV string, false otherwise
|
|
4117
4127
|
*
|
|
4118
4128
|
* @public exported from `@promptbook/utils`
|
|
4119
4129
|
*/
|
|
@@ -4170,14 +4180,15 @@ const CsvFormatParser = {
|
|
|
4170
4180
|
`));
|
|
4171
4181
|
}
|
|
4172
4182
|
const mappedData = [];
|
|
4173
|
-
|
|
4183
|
+
const length = csv.data.length;
|
|
4184
|
+
for (let index = 0; index < length; index++) {
|
|
4174
4185
|
const row = csv.data[index];
|
|
4175
4186
|
if (row[outputParameterName]) {
|
|
4176
4187
|
throw new CsvFormatError(`Can not overwrite existing column "${outputParameterName}" in CSV row`);
|
|
4177
4188
|
}
|
|
4178
4189
|
const mappedRow = {
|
|
4179
4190
|
...row,
|
|
4180
|
-
[outputParameterName]: await mapCallback(row, index),
|
|
4191
|
+
[outputParameterName]: await mapCallback(row, index, length),
|
|
4181
4192
|
};
|
|
4182
4193
|
mappedData.push(mappedRow);
|
|
4183
4194
|
if (onProgress) {
|
|
@@ -4208,9 +4219,9 @@ const CsvFormatParser = {
|
|
|
4208
4219
|
`));
|
|
4209
4220
|
}
|
|
4210
4221
|
const mappedData = await Promise.all(csv.data.map(async (row, rowIndex) => {
|
|
4211
|
-
return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex) => {
|
|
4222
|
+
return /* not await */ Promise.all(Object.entries(row).map(async ([key, value], columnIndex, array) => {
|
|
4212
4223
|
const index = rowIndex * Object.keys(row).length + columnIndex;
|
|
4213
|
-
return /* not await */ mapCallback({ [key]: value }, index);
|
|
4224
|
+
return /* not await */ mapCallback({ [key]: value }, index, array.length);
|
|
4214
4225
|
}));
|
|
4215
4226
|
}));
|
|
4216
4227
|
return unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS });
|
|
@@ -4281,12 +4292,12 @@ const TextFormatParser = {
|
|
|
4281
4292
|
async mapValues(options) {
|
|
4282
4293
|
const { value, mapCallback, onProgress } = options;
|
|
4283
4294
|
const lines = value.split('\n');
|
|
4284
|
-
const mappedLines = await Promise.all(lines.map((lineContent, lineNumber) =>
|
|
4295
|
+
const mappedLines = await Promise.all(lines.map((lineContent, lineNumber, array) =>
|
|
4285
4296
|
// TODO: [๐ง ] Maybe option to skip empty line
|
|
4286
4297
|
/* not await */ mapCallback({
|
|
4287
4298
|
lineContent,
|
|
4288
4299
|
// TODO: [๐ง ] Maybe also put here `lineNumber`
|
|
4289
|
-
}, lineNumber)));
|
|
4300
|
+
}, lineNumber, array.length)));
|
|
4290
4301
|
return mappedLines.join('\n');
|
|
4291
4302
|
},
|
|
4292
4303
|
},
|
|
@@ -4307,7 +4318,7 @@ const TextFormatParser = {
|
|
|
4307
4318
|
* Function to check if a string is valid XML
|
|
4308
4319
|
*
|
|
4309
4320
|
* @param value
|
|
4310
|
-
* @returns
|
|
4321
|
+
* @returns `true` if the string is a valid XML string, false otherwise
|
|
4311
4322
|
*
|
|
4312
4323
|
* @public exported from `@promptbook/utils`
|
|
4313
4324
|
*/
|
|
@@ -4369,13 +4380,13 @@ const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser,
|
|
|
4369
4380
|
*/
|
|
4370
4381
|
|
|
4371
4382
|
/**
|
|
4372
|
-
* Maps available parameters to expected parameters
|
|
4383
|
+
* Maps available parameters to expected parameters for a pipeline task.
|
|
4373
4384
|
*
|
|
4374
4385
|
* The strategy is:
|
|
4375
|
-
* 1)
|
|
4376
|
-
* 2)
|
|
4386
|
+
* 1) First, match parameters by name where both available and expected.
|
|
4387
|
+
* 2) Then, if there are unmatched expected and available parameters, map them by order.
|
|
4377
4388
|
*
|
|
4378
|
-
* @throws {PipelineExecutionError}
|
|
4389
|
+
* @throws {PipelineExecutionError} If the number of unmatched expected and available parameters does not match, or mapping is ambiguous.
|
|
4379
4390
|
* @private within the repository used in `createPipelineExecutor`
|
|
4380
4391
|
*/
|
|
4381
4392
|
function mapAvailableToExpectedParameters(options) {
|
|
@@ -5095,7 +5106,11 @@ async function executeAttempts(options) {
|
|
|
5095
5106
|
*/
|
|
5096
5107
|
|
|
5097
5108
|
/**
|
|
5098
|
-
*
|
|
5109
|
+
* Executes a pipeline task that requires mapping or iterating over subvalues of a parameter (such as rows in a CSV).
|
|
5110
|
+
* Handles format and subformat resolution, error handling, and progress reporting.
|
|
5111
|
+
*
|
|
5112
|
+
* @param options - Options for execution, including task details and progress callback.
|
|
5113
|
+
* @returns The result of the subvalue mapping or execution attempts.
|
|
5099
5114
|
*
|
|
5100
5115
|
* @private internal utility of `createPipelineExecutor`
|
|
5101
5116
|
*/
|
|
@@ -5160,15 +5175,11 @@ async function executeFormatSubvalues(options) {
|
|
|
5160
5175
|
settings: formatSettings,
|
|
5161
5176
|
onProgress(partialResultString) {
|
|
5162
5177
|
return onProgress(Object.freeze({
|
|
5163
|
-
[task.resultingParameterName]:
|
|
5164
|
-
// <- Note: [๐ฉโ๐ฉโ๐ง] No need to detect parameter collision here because pipeline checks logic consistency during construction
|
|
5165
|
-
partialResultString,
|
|
5178
|
+
[task.resultingParameterName]: partialResultString,
|
|
5166
5179
|
}));
|
|
5167
5180
|
},
|
|
5168
|
-
async mapCallback(subparameters, index) {
|
|
5181
|
+
async mapCallback(subparameters, index, length) {
|
|
5169
5182
|
let mappedParameters;
|
|
5170
|
-
// TODO: [๐คนโโ๏ธ][๐ช] Limit to N concurrent executions
|
|
5171
|
-
// TODO: When done [๐] Report progress also for each subvalue here
|
|
5172
5183
|
try {
|
|
5173
5184
|
mappedParameters = mapAvailableToExpectedParameters({
|
|
5174
5185
|
expectedParameters: Object.fromEntries(task.foreach.inputSubparameterNames.map((subparameterName) => [subparameterName, null])),
|
|
@@ -5179,32 +5190,52 @@ async function executeFormatSubvalues(options) {
|
|
|
5179
5190
|
if (!(error instanceof PipelineExecutionError)) {
|
|
5180
5191
|
throw error;
|
|
5181
5192
|
}
|
|
5182
|
-
|
|
5183
|
-
|
|
5193
|
+
const highLevelError = new PipelineExecutionError(spaceTrim$1((block) => `
|
|
5194
|
+
${error.message}
|
|
5184
5195
|
|
|
5185
|
-
|
|
5186
|
-
|
|
5196
|
+
This is error in FOREACH command when mapping data
|
|
5197
|
+
You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
|
|
5187
5198
|
|
|
5188
|
-
|
|
5189
|
-
|
|
5190
|
-
|
|
5199
|
+
${block(pipelineIdentification)}
|
|
5200
|
+
Subparameter index: ${index}
|
|
5201
|
+
`));
|
|
5202
|
+
if (length > BIG_DATASET_TRESHOLD) {
|
|
5203
|
+
console.error(highLevelError);
|
|
5204
|
+
return '~';
|
|
5205
|
+
}
|
|
5206
|
+
throw highLevelError;
|
|
5191
5207
|
}
|
|
5192
5208
|
const allSubparameters = {
|
|
5193
5209
|
...parameters,
|
|
5194
5210
|
...mappedParameters,
|
|
5195
5211
|
};
|
|
5196
|
-
// Note: [๐จโ๐จโ๐ง] Now we can freeze `subparameters` because we are sure that all and only used parameters are defined and are not going to be changed
|
|
5197
5212
|
Object.freeze(allSubparameters);
|
|
5198
|
-
|
|
5199
|
-
|
|
5200
|
-
|
|
5201
|
-
|
|
5202
|
-
|
|
5203
|
-
|
|
5204
|
-
|
|
5205
|
-
|
|
5206
|
-
|
|
5207
|
-
|
|
5213
|
+
try {
|
|
5214
|
+
const subresultString = await executeAttempts({
|
|
5215
|
+
...options,
|
|
5216
|
+
priority: priority + index,
|
|
5217
|
+
parameters: allSubparameters,
|
|
5218
|
+
pipelineIdentification: spaceTrim$1((block) => `
|
|
5219
|
+
${block(pipelineIdentification)}
|
|
5220
|
+
Subparameter index: ${index}
|
|
5221
|
+
`),
|
|
5222
|
+
});
|
|
5223
|
+
return subresultString;
|
|
5224
|
+
}
|
|
5225
|
+
catch (error) {
|
|
5226
|
+
if (length > BIG_DATASET_TRESHOLD) {
|
|
5227
|
+
console.error(spaceTrim$1((block) => `
|
|
5228
|
+
Error in FOREACH command:
|
|
5229
|
+
|
|
5230
|
+
${block(pipelineIdentification)}
|
|
5231
|
+
|
|
5232
|
+
${block(pipelineIdentification)}
|
|
5233
|
+
Subparameter index: ${index}
|
|
5234
|
+
`));
|
|
5235
|
+
return '~';
|
|
5236
|
+
}
|
|
5237
|
+
throw error;
|
|
5238
|
+
}
|
|
5208
5239
|
},
|
|
5209
5240
|
});
|
|
5210
5241
|
return resultString;
|
|
@@ -5339,7 +5370,11 @@ async function getKnowledgeForTask(options) {
|
|
|
5339
5370
|
*/
|
|
5340
5371
|
|
|
5341
5372
|
/**
|
|
5342
|
-
*
|
|
5373
|
+
* Retrieves all reserved parameters for a given pipeline task, including context, knowledge, examples, and metadata.
|
|
5374
|
+
* Ensures all reserved parameters are defined and throws if any are missing.
|
|
5375
|
+
*
|
|
5376
|
+
* @param options - Options including tools, pipeline, task, and context.
|
|
5377
|
+
* @returns An object containing all reserved parameters for the task.
|
|
5343
5378
|
*
|
|
5344
5379
|
* @private internal utility of `createPipelineExecutor`
|
|
5345
5380
|
*/
|
|
@@ -5372,7 +5407,10 @@ async function getReservedParametersForTask(options) {
|
|
|
5372
5407
|
}
|
|
5373
5408
|
|
|
5374
5409
|
/**
|
|
5375
|
-
*
|
|
5410
|
+
* Executes a single task within a pipeline, handling parameter validation, error checking, and progress reporting.
|
|
5411
|
+
*
|
|
5412
|
+
* @param options - Options for execution, including the task, pipeline, parameters, and callbacks.
|
|
5413
|
+
* @returns The output parameters produced by the task.
|
|
5376
5414
|
*
|
|
5377
5415
|
* @private internal utility of `createPipelineExecutor`
|
|
5378
5416
|
*/
|
|
@@ -5506,9 +5544,12 @@ function filterJustOutputParameters(options) {
|
|
|
5506
5544
|
}
|
|
5507
5545
|
|
|
5508
5546
|
/**
|
|
5509
|
-
*
|
|
5547
|
+
* Executes an entire pipeline, resolving tasks in dependency order, handling errors, and reporting progress.
|
|
5548
|
+
*
|
|
5549
|
+
* Note: This is not a `PipelineExecutor` (which is bound to a single pipeline), but a utility function used by `createPipelineExecutor` to create a `PipelineExecutor`.
|
|
5510
5550
|
*
|
|
5511
|
-
*
|
|
5551
|
+
* @param options - Options for execution, including input parameters, pipeline, and callbacks.
|
|
5552
|
+
* @returns The result of the pipeline execution, including output parameters, errors, and usage statistics.
|
|
5512
5553
|
*
|
|
5513
5554
|
* @private internal utility of `createPipelineExecutor`
|
|
5514
5555
|
*/
|