@promptbook/website-crawler 0.92.0-11 → 0.92.0-13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +222 -199
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/browser.index.d.ts +2 -0
- package/esm/typings/src/_packages/core.index.d.ts +6 -4
- package/esm/typings/src/_packages/types.index.d.ts +2 -2
- package/esm/typings/src/execution/PipelineExecutorResult.d.ts +3 -1
- package/esm/typings/src/execution/createPipelineExecutor/computeCosineSimilarity.d.ts +13 -0
- package/esm/typings/src/execution/utils/checkExpectations.d.ts +1 -1
- package/esm/typings/src/formats/_common/{FormatDefinition.d.ts → FormatParser.d.ts} +3 -3
- package/esm/typings/src/formats/_common/{FormatSubvalueDefinition.d.ts → FormatSubvalueParser.d.ts} +1 -1
- package/esm/typings/src/formats/csv/CsvFormatParser.d.ts +17 -0
- package/esm/typings/src/formats/index.d.ts +2 -2
- package/esm/typings/src/formats/json/{JsonFormatDefinition.d.ts → JsonFormatParser.d.ts} +6 -6
- package/esm/typings/src/formats/text/{TextFormatDefinition.d.ts → TextFormatParser.d.ts} +7 -7
- package/esm/typings/src/formats/xml/XmlFormatParser.d.ts +19 -0
- package/esm/typings/src/postprocessing/utils/extractJsonBlock.d.ts +1 -1
- package/esm/typings/src/storage/local-storage/getIndexedDbStorage.d.ts +10 -0
- package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromIndexedDb.d.ts +7 -0
- package/esm/typings/src/utils/expectation-counters/index.d.ts +1 -1
- package/package.json +2 -2
- package/umd/index.umd.js +222 -199
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/formats/csv/CsvFormatDefinition.d.ts +0 -17
- package/esm/typings/src/formats/xml/XmlFormatDefinition.d.ts +0 -19
package/umd/index.umd.js
CHANGED
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
* @generated
|
|
26
26
|
* @see https://github.com/webgptorg/promptbook
|
|
27
27
|
*/
|
|
28
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-
|
|
28
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-13';
|
|
29
29
|
/**
|
|
30
30
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
31
31
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -2271,75 +2271,6 @@
|
|
|
2271
2271
|
* - [♨] Are tasks prepared
|
|
2272
2272
|
*/
|
|
2273
2273
|
|
|
2274
|
-
/**
|
|
2275
|
-
* Converts a JavaScript Object Notation (JSON) string into an object.
|
|
2276
|
-
*
|
|
2277
|
-
* Note: This is wrapper around `JSON.parse()` with better error and type handling
|
|
2278
|
-
*
|
|
2279
|
-
* @public exported from `@promptbook/utils`
|
|
2280
|
-
*/
|
|
2281
|
-
function jsonParse(value) {
|
|
2282
|
-
if (value === undefined) {
|
|
2283
|
-
throw new Error(`Can not parse JSON from undefined value.`);
|
|
2284
|
-
}
|
|
2285
|
-
else if (typeof value !== 'string') {
|
|
2286
|
-
console.error('Can not parse JSON from non-string value.', { text: value });
|
|
2287
|
-
throw new Error(spaceTrim__default["default"](`
|
|
2288
|
-
Can not parse JSON from non-string value.
|
|
2289
|
-
|
|
2290
|
-
The value type: ${typeof value}
|
|
2291
|
-
See more in console.
|
|
2292
|
-
`));
|
|
2293
|
-
}
|
|
2294
|
-
try {
|
|
2295
|
-
return JSON.parse(value);
|
|
2296
|
-
}
|
|
2297
|
-
catch (error) {
|
|
2298
|
-
if (!(error instanceof Error)) {
|
|
2299
|
-
throw error;
|
|
2300
|
-
}
|
|
2301
|
-
throw new Error(spaceTrim__default["default"]((block) => `
|
|
2302
|
-
${block(error.message)}
|
|
2303
|
-
|
|
2304
|
-
The JSON text:
|
|
2305
|
-
${block(value)}
|
|
2306
|
-
`));
|
|
2307
|
-
}
|
|
2308
|
-
}
|
|
2309
|
-
/**
|
|
2310
|
-
* TODO: !!!! Use in Promptbook.studio
|
|
2311
|
-
*/
|
|
2312
|
-
|
|
2313
|
-
/**
|
|
2314
|
-
* Recursively converts JSON strings to JSON objects
|
|
2315
|
-
|
|
2316
|
-
* @public exported from `@promptbook/utils`
|
|
2317
|
-
*/
|
|
2318
|
-
function jsonStringsToJsons(object) {
|
|
2319
|
-
if (object === null) {
|
|
2320
|
-
return object;
|
|
2321
|
-
}
|
|
2322
|
-
if (Array.isArray(object)) {
|
|
2323
|
-
return object.map(jsonStringsToJsons);
|
|
2324
|
-
}
|
|
2325
|
-
if (typeof object !== 'object') {
|
|
2326
|
-
return object;
|
|
2327
|
-
}
|
|
2328
|
-
const newObject = { ...object };
|
|
2329
|
-
for (const [key, value] of Object.entries(object)) {
|
|
2330
|
-
if (typeof value === 'string' && isValidJsonString(value)) {
|
|
2331
|
-
newObject[key] = jsonParse(value);
|
|
2332
|
-
}
|
|
2333
|
-
else {
|
|
2334
|
-
newObject[key] = jsonStringsToJsons(value);
|
|
2335
|
-
}
|
|
2336
|
-
}
|
|
2337
|
-
return newObject;
|
|
2338
|
-
}
|
|
2339
|
-
/**
|
|
2340
|
-
* TODO: Type the return type correctly
|
|
2341
|
-
*/
|
|
2342
|
-
|
|
2343
2274
|
/**
|
|
2344
2275
|
* This error indicates problems parsing the format value
|
|
2345
2276
|
*
|
|
@@ -2500,6 +2431,104 @@
|
|
|
2500
2431
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
2501
2432
|
*/
|
|
2502
2433
|
|
|
2434
|
+
/**
|
|
2435
|
+
* Serializes an error into a [🚉] JSON-serializable object
|
|
2436
|
+
*
|
|
2437
|
+
* @public exported from `@promptbook/utils`
|
|
2438
|
+
*/
|
|
2439
|
+
function serializeError(error) {
|
|
2440
|
+
const { name, message, stack } = error;
|
|
2441
|
+
const { id } = error;
|
|
2442
|
+
if (!Object.keys(ALL_ERRORS).includes(name)) {
|
|
2443
|
+
console.error(spaceTrim__default["default"]((block) => `
|
|
2444
|
+
|
|
2445
|
+
Cannot serialize error with name "${name}"
|
|
2446
|
+
|
|
2447
|
+
Authors of Promptbook probably forgot to add this error into the list of errors:
|
|
2448
|
+
https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
|
|
2449
|
+
|
|
2450
|
+
|
|
2451
|
+
${block(stack || message)}
|
|
2452
|
+
|
|
2453
|
+
`));
|
|
2454
|
+
}
|
|
2455
|
+
return {
|
|
2456
|
+
name: name,
|
|
2457
|
+
message,
|
|
2458
|
+
stack,
|
|
2459
|
+
id, // Include id in the serialized object
|
|
2460
|
+
};
|
|
2461
|
+
}
|
|
2462
|
+
|
|
2463
|
+
/**
|
|
2464
|
+
* Converts a JavaScript Object Notation (JSON) string into an object.
|
|
2465
|
+
*
|
|
2466
|
+
* Note: This is wrapper around `JSON.parse()` with better error and type handling
|
|
2467
|
+
*
|
|
2468
|
+
* @public exported from `@promptbook/utils`
|
|
2469
|
+
*/
|
|
2470
|
+
function jsonParse(value) {
|
|
2471
|
+
if (value === undefined) {
|
|
2472
|
+
throw new Error(`Can not parse JSON from undefined value.`);
|
|
2473
|
+
}
|
|
2474
|
+
else if (typeof value !== 'string') {
|
|
2475
|
+
console.error('Can not parse JSON from non-string value.', { text: value });
|
|
2476
|
+
throw new Error(spaceTrim__default["default"](`
|
|
2477
|
+
Can not parse JSON from non-string value.
|
|
2478
|
+
|
|
2479
|
+
The value type: ${typeof value}
|
|
2480
|
+
See more in console.
|
|
2481
|
+
`));
|
|
2482
|
+
}
|
|
2483
|
+
try {
|
|
2484
|
+
return JSON.parse(value);
|
|
2485
|
+
}
|
|
2486
|
+
catch (error) {
|
|
2487
|
+
if (!(error instanceof Error)) {
|
|
2488
|
+
throw error;
|
|
2489
|
+
}
|
|
2490
|
+
throw new Error(spaceTrim__default["default"]((block) => `
|
|
2491
|
+
${block(error.message)}
|
|
2492
|
+
|
|
2493
|
+
The JSON text:
|
|
2494
|
+
${block(value)}
|
|
2495
|
+
`));
|
|
2496
|
+
}
|
|
2497
|
+
}
|
|
2498
|
+
/**
|
|
2499
|
+
* TODO: !!!! Use in Promptbook.studio
|
|
2500
|
+
*/
|
|
2501
|
+
|
|
2502
|
+
/**
|
|
2503
|
+
* Recursively converts JSON strings to JSON objects
|
|
2504
|
+
|
|
2505
|
+
* @public exported from `@promptbook/utils`
|
|
2506
|
+
*/
|
|
2507
|
+
function jsonStringsToJsons(object) {
|
|
2508
|
+
if (object === null) {
|
|
2509
|
+
return object;
|
|
2510
|
+
}
|
|
2511
|
+
if (Array.isArray(object)) {
|
|
2512
|
+
return object.map(jsonStringsToJsons);
|
|
2513
|
+
}
|
|
2514
|
+
if (typeof object !== 'object') {
|
|
2515
|
+
return object;
|
|
2516
|
+
}
|
|
2517
|
+
const newObject = { ...object };
|
|
2518
|
+
for (const [key, value] of Object.entries(object)) {
|
|
2519
|
+
if (typeof value === 'string' && isValidJsonString(value)) {
|
|
2520
|
+
newObject[key] = jsonParse(value);
|
|
2521
|
+
}
|
|
2522
|
+
else {
|
|
2523
|
+
newObject[key] = jsonStringsToJsons(value);
|
|
2524
|
+
}
|
|
2525
|
+
}
|
|
2526
|
+
return newObject;
|
|
2527
|
+
}
|
|
2528
|
+
/**
|
|
2529
|
+
* TODO: Type the return type correctly
|
|
2530
|
+
*/
|
|
2531
|
+
|
|
2503
2532
|
/**
|
|
2504
2533
|
* Deserializes the error object
|
|
2505
2534
|
*
|
|
@@ -2665,64 +2694,6 @@
|
|
|
2665
2694
|
* TODO: [🐚] Split into more files and make `PrepareTask` & `RemoteTask` + split the function
|
|
2666
2695
|
*/
|
|
2667
2696
|
|
|
2668
|
-
/**
|
|
2669
|
-
* Serializes an error into a [🚉] JSON-serializable object
|
|
2670
|
-
*
|
|
2671
|
-
* @public exported from `@promptbook/utils`
|
|
2672
|
-
*/
|
|
2673
|
-
function serializeError(error) {
|
|
2674
|
-
const { name, message, stack } = error;
|
|
2675
|
-
const { id } = error;
|
|
2676
|
-
if (!Object.keys(ALL_ERRORS).includes(name)) {
|
|
2677
|
-
console.error(spaceTrim__default["default"]((block) => `
|
|
2678
|
-
|
|
2679
|
-
Cannot serialize error with name "${name}"
|
|
2680
|
-
|
|
2681
|
-
Authors of Promptbook probably forgot to add this error into the list of errors:
|
|
2682
|
-
https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
${block(stack || message)}
|
|
2686
|
-
|
|
2687
|
-
`));
|
|
2688
|
-
}
|
|
2689
|
-
return {
|
|
2690
|
-
name: name,
|
|
2691
|
-
message,
|
|
2692
|
-
stack,
|
|
2693
|
-
id, // Include id in the serialized object
|
|
2694
|
-
};
|
|
2695
|
-
}
|
|
2696
|
-
|
|
2697
|
-
/**
|
|
2698
|
-
* Async version of Array.forEach
|
|
2699
|
-
*
|
|
2700
|
-
* @param array - Array to iterate over
|
|
2701
|
-
* @param options - Options for the function
|
|
2702
|
-
* @param callbackfunction - Function to call for each item
|
|
2703
|
-
* @public exported from `@promptbook/utils`
|
|
2704
|
-
* @deprecated [🪂] Use queues instead
|
|
2705
|
-
*/
|
|
2706
|
-
async function forEachAsync(array, options, callbackfunction) {
|
|
2707
|
-
const { maxParallelCount = Infinity } = options;
|
|
2708
|
-
let index = 0;
|
|
2709
|
-
let runningTasks = [];
|
|
2710
|
-
const tasks = [];
|
|
2711
|
-
for (const item of array) {
|
|
2712
|
-
const currentIndex = index++;
|
|
2713
|
-
const task = callbackfunction(item, currentIndex, array);
|
|
2714
|
-
tasks.push(task);
|
|
2715
|
-
runningTasks.push(task);
|
|
2716
|
-
/* not await */ Promise.resolve(task).then(() => {
|
|
2717
|
-
runningTasks = runningTasks.filter((t) => t !== task);
|
|
2718
|
-
});
|
|
2719
|
-
if (maxParallelCount < runningTasks.length) {
|
|
2720
|
-
await Promise.race(runningTasks);
|
|
2721
|
-
}
|
|
2722
|
-
}
|
|
2723
|
-
await Promise.all(tasks);
|
|
2724
|
-
}
|
|
2725
|
-
|
|
2726
2697
|
/**
|
|
2727
2698
|
* Represents the uncertain value
|
|
2728
2699
|
*
|
|
@@ -2766,7 +2737,7 @@
|
|
|
2766
2737
|
*
|
|
2767
2738
|
* @public exported from `@promptbook/core`
|
|
2768
2739
|
*/
|
|
2769
|
-
$deepFreeze({
|
|
2740
|
+
const UNCERTAIN_USAGE = $deepFreeze({
|
|
2770
2741
|
price: UNCERTAIN_ZERO_VALUE,
|
|
2771
2742
|
input: {
|
|
2772
2743
|
tokensCount: UNCERTAIN_ZERO_VALUE,
|
|
@@ -2791,6 +2762,35 @@
|
|
|
2791
2762
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
2792
2763
|
*/
|
|
2793
2764
|
|
|
2765
|
+
/**
|
|
2766
|
+
* Async version of Array.forEach
|
|
2767
|
+
*
|
|
2768
|
+
* @param array - Array to iterate over
|
|
2769
|
+
* @param options - Options for the function
|
|
2770
|
+
* @param callbackfunction - Function to call for each item
|
|
2771
|
+
* @public exported from `@promptbook/utils`
|
|
2772
|
+
* @deprecated [🪂] Use queues instead
|
|
2773
|
+
*/
|
|
2774
|
+
async function forEachAsync(array, options, callbackfunction) {
|
|
2775
|
+
const { maxParallelCount = Infinity } = options;
|
|
2776
|
+
let index = 0;
|
|
2777
|
+
let runningTasks = [];
|
|
2778
|
+
const tasks = [];
|
|
2779
|
+
for (const item of array) {
|
|
2780
|
+
const currentIndex = index++;
|
|
2781
|
+
const task = callbackfunction(item, currentIndex, array);
|
|
2782
|
+
tasks.push(task);
|
|
2783
|
+
runningTasks.push(task);
|
|
2784
|
+
/* not await */ Promise.resolve(task).then(() => {
|
|
2785
|
+
runningTasks = runningTasks.filter((t) => t !== task);
|
|
2786
|
+
});
|
|
2787
|
+
if (maxParallelCount < runningTasks.length) {
|
|
2788
|
+
await Promise.race(runningTasks);
|
|
2789
|
+
}
|
|
2790
|
+
}
|
|
2791
|
+
await Promise.all(tasks);
|
|
2792
|
+
}
|
|
2793
|
+
|
|
2794
2794
|
/**
|
|
2795
2795
|
* Function `addUsage` will add multiple usages into one
|
|
2796
2796
|
*
|
|
@@ -4084,6 +4084,24 @@
|
|
|
4084
4084
|
// encoding: 'utf-8',
|
|
4085
4085
|
});
|
|
4086
4086
|
|
|
4087
|
+
/**
|
|
4088
|
+
* Converts a CSV string into an object
|
|
4089
|
+
*
|
|
4090
|
+
* Note: This is wrapper around `papaparse.parse()` with better autohealing
|
|
4091
|
+
*
|
|
4092
|
+
* @private - for now until `@promptbook/csv` is released
|
|
4093
|
+
*/
|
|
4094
|
+
function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
|
|
4095
|
+
settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
|
|
4096
|
+
// Note: Autoheal invalid '\n' characters
|
|
4097
|
+
if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
|
|
4098
|
+
console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
|
|
4099
|
+
value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
4100
|
+
}
|
|
4101
|
+
const csv = papaparse.parse(value, settings);
|
|
4102
|
+
return csv;
|
|
4103
|
+
}
|
|
4104
|
+
|
|
4087
4105
|
/**
|
|
4088
4106
|
* Function to check if a string is valid CSV
|
|
4089
4107
|
*
|
|
@@ -4106,31 +4124,13 @@
|
|
|
4106
4124
|
}
|
|
4107
4125
|
}
|
|
4108
4126
|
|
|
4109
|
-
/**
|
|
4110
|
-
* Converts a CSV string into an object
|
|
4111
|
-
*
|
|
4112
|
-
* Note: This is wrapper around `papaparse.parse()` with better autohealing
|
|
4113
|
-
*
|
|
4114
|
-
* @private - for now until `@promptbook/csv` is released
|
|
4115
|
-
*/
|
|
4116
|
-
function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
|
|
4117
|
-
settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
|
|
4118
|
-
// Note: Autoheal invalid '\n' characters
|
|
4119
|
-
if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
|
|
4120
|
-
console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
|
|
4121
|
-
value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
4122
|
-
}
|
|
4123
|
-
const csv = papaparse.parse(value, settings);
|
|
4124
|
-
return csv;
|
|
4125
|
-
}
|
|
4126
|
-
|
|
4127
4127
|
/**
|
|
4128
4128
|
* Definition for CSV spreadsheet
|
|
4129
4129
|
*
|
|
4130
4130
|
* @public exported from `@promptbook/core`
|
|
4131
4131
|
* <- TODO: [🏢] Export from package `@promptbook/csv`
|
|
4132
4132
|
*/
|
|
4133
|
-
const
|
|
4133
|
+
const CsvFormatParser = {
|
|
4134
4134
|
formatName: 'CSV',
|
|
4135
4135
|
aliases: ['SPREADSHEET', 'TABLE'],
|
|
4136
4136
|
isValid(value, settings, schema) {
|
|
@@ -4142,7 +4142,7 @@
|
|
|
4142
4142
|
heal(value, settings, schema) {
|
|
4143
4143
|
throw new Error('Not implemented');
|
|
4144
4144
|
},
|
|
4145
|
-
|
|
4145
|
+
subvalueParsers: [
|
|
4146
4146
|
{
|
|
4147
4147
|
subvalueName: 'ROW',
|
|
4148
4148
|
async mapValues(value, outputParameterName, settings, mapCallback) {
|
|
@@ -4203,10 +4203,10 @@
|
|
|
4203
4203
|
],
|
|
4204
4204
|
};
|
|
4205
4205
|
/**
|
|
4206
|
-
* TODO: [🍓] In `
|
|
4207
|
-
* TODO: [🍓] In `
|
|
4208
|
-
* TODO: [🍓] In `
|
|
4209
|
-
* TODO: [🍓] In `
|
|
4206
|
+
* TODO: [🍓] In `CsvFormatParser` implement simple `isValid`
|
|
4207
|
+
* TODO: [🍓] In `CsvFormatParser` implement partial `canBeValid`
|
|
4208
|
+
* TODO: [🍓] In `CsvFormatParser` implement `heal
|
|
4209
|
+
* TODO: [🍓] In `CsvFormatParser` implement `subvalueParsers`
|
|
4210
4210
|
* TODO: [🏢] Allow to expect something inside CSV objects and other formats
|
|
4211
4211
|
*/
|
|
4212
4212
|
|
|
@@ -4215,7 +4215,7 @@
|
|
|
4215
4215
|
*
|
|
4216
4216
|
* @private still in development [🏢]
|
|
4217
4217
|
*/
|
|
4218
|
-
const
|
|
4218
|
+
const JsonFormatParser = {
|
|
4219
4219
|
formatName: 'JSON',
|
|
4220
4220
|
mimeType: 'application/json',
|
|
4221
4221
|
isValid(value, settings, schema) {
|
|
@@ -4227,28 +4227,28 @@
|
|
|
4227
4227
|
heal(value, settings, schema) {
|
|
4228
4228
|
throw new Error('Not implemented');
|
|
4229
4229
|
},
|
|
4230
|
-
|
|
4230
|
+
subvalueParsers: [],
|
|
4231
4231
|
};
|
|
4232
4232
|
/**
|
|
4233
4233
|
* TODO: [🧠] Maybe propper instance of object
|
|
4234
4234
|
* TODO: [0] Make string_serialized_json
|
|
4235
4235
|
* TODO: [1] Make type for JSON Settings and Schema
|
|
4236
4236
|
* TODO: [🧠] What to use for validating JSONs - JSON Schema, ZoD, typescript types/interfaces,...?
|
|
4237
|
-
* TODO: [🍓] In `
|
|
4238
|
-
* TODO: [🍓] In `
|
|
4239
|
-
* TODO: [🍓] In `
|
|
4240
|
-
* TODO: [🍓] In `
|
|
4237
|
+
* TODO: [🍓] In `JsonFormatParser` implement simple `isValid`
|
|
4238
|
+
* TODO: [🍓] In `JsonFormatParser` implement partial `canBeValid`
|
|
4239
|
+
* TODO: [🍓] In `JsonFormatParser` implement `heal
|
|
4240
|
+
* TODO: [🍓] In `JsonFormatParser` implement `subvalueParsers`
|
|
4241
4241
|
* TODO: [🏢] Allow to expect something inside JSON objects and other formats
|
|
4242
4242
|
*/
|
|
4243
4243
|
|
|
4244
4244
|
/**
|
|
4245
4245
|
* Definition for any text - this will be always valid
|
|
4246
4246
|
*
|
|
4247
|
-
* Note: This is not useful for validation, but for splitting and mapping with `
|
|
4247
|
+
* Note: This is not useful for validation, but for splitting and mapping with `subvalueParsers`
|
|
4248
4248
|
*
|
|
4249
4249
|
* @public exported from `@promptbook/core`
|
|
4250
4250
|
*/
|
|
4251
|
-
const
|
|
4251
|
+
const TextFormatParser = {
|
|
4252
4252
|
formatName: 'TEXT',
|
|
4253
4253
|
isValid(value) {
|
|
4254
4254
|
return typeof value === 'string';
|
|
@@ -4257,9 +4257,9 @@
|
|
|
4257
4257
|
return typeof partialValue === 'string';
|
|
4258
4258
|
},
|
|
4259
4259
|
heal() {
|
|
4260
|
-
throw new UnexpectedError('It does not make sense to call `
|
|
4260
|
+
throw new UnexpectedError('It does not make sense to call `TextFormatParser.heal`');
|
|
4261
4261
|
},
|
|
4262
|
-
|
|
4262
|
+
subvalueParsers: [
|
|
4263
4263
|
{
|
|
4264
4264
|
subvalueName: 'LINE',
|
|
4265
4265
|
async mapValues(value, outputParameterName, settings, mapCallback) {
|
|
@@ -4279,10 +4279,10 @@
|
|
|
4279
4279
|
/**
|
|
4280
4280
|
* TODO: [1] Make type for XML Text and Schema
|
|
4281
4281
|
* TODO: [🧠][🤠] Here should be all words, characters, lines, paragraphs, pages available as subvalues
|
|
4282
|
-
* TODO: [🍓] In `
|
|
4283
|
-
* TODO: [🍓] In `
|
|
4284
|
-
* TODO: [🍓] In `
|
|
4285
|
-
* TODO: [🍓] In `
|
|
4282
|
+
* TODO: [🍓] In `TextFormatParser` implement simple `isValid`
|
|
4283
|
+
* TODO: [🍓] In `TextFormatParser` implement partial `canBeValid`
|
|
4284
|
+
* TODO: [🍓] In `TextFormatParser` implement `heal
|
|
4285
|
+
* TODO: [🍓] In `TextFormatParser` implement `subvalueParsers`
|
|
4286
4286
|
* TODO: [🏢] Allow to expect something inside each item of list and other formats
|
|
4287
4287
|
*/
|
|
4288
4288
|
|
|
@@ -4315,7 +4315,7 @@
|
|
|
4315
4315
|
*
|
|
4316
4316
|
* @private still in development [🏢]
|
|
4317
4317
|
*/
|
|
4318
|
-
const
|
|
4318
|
+
const XmlFormatParser = {
|
|
4319
4319
|
formatName: 'XML',
|
|
4320
4320
|
mimeType: 'application/xml',
|
|
4321
4321
|
isValid(value, settings, schema) {
|
|
@@ -4327,17 +4327,17 @@
|
|
|
4327
4327
|
heal(value, settings, schema) {
|
|
4328
4328
|
throw new Error('Not implemented');
|
|
4329
4329
|
},
|
|
4330
|
-
|
|
4330
|
+
subvalueParsers: [],
|
|
4331
4331
|
};
|
|
4332
4332
|
/**
|
|
4333
4333
|
* TODO: [🧠] Maybe propper instance of object
|
|
4334
4334
|
* TODO: [0] Make string_serialized_xml
|
|
4335
4335
|
* TODO: [1] Make type for XML Settings and Schema
|
|
4336
4336
|
* TODO: [🧠] What to use for validating XMLs - XSD,...
|
|
4337
|
-
* TODO: [🍓] In `
|
|
4338
|
-
* TODO: [🍓] In `
|
|
4339
|
-
* TODO: [🍓] In `
|
|
4340
|
-
* TODO: [🍓] In `
|
|
4337
|
+
* TODO: [🍓] In `XmlFormatParser` implement simple `isValid`
|
|
4338
|
+
* TODO: [🍓] In `XmlFormatParser` implement partial `canBeValid`
|
|
4339
|
+
* TODO: [🍓] In `XmlFormatParser` implement `heal
|
|
4340
|
+
* TODO: [🍓] In `XmlFormatParser` implement `subvalueParsers`
|
|
4341
4341
|
* TODO: [🏢] Allow to expect something inside XML and other formats
|
|
4342
4342
|
*/
|
|
4343
4343
|
|
|
@@ -4346,12 +4346,7 @@
|
|
|
4346
4346
|
*
|
|
4347
4347
|
* @private internal index of `...` <- TODO [🏢]
|
|
4348
4348
|
*/
|
|
4349
|
-
const FORMAT_DEFINITIONS = [
|
|
4350
|
-
JsonFormatDefinition,
|
|
4351
|
-
XmlFormatDefinition,
|
|
4352
|
-
TextFormatDefinition,
|
|
4353
|
-
CsvFormatDefinition,
|
|
4354
|
-
];
|
|
4349
|
+
const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser, CsvFormatParser];
|
|
4355
4350
|
/**
|
|
4356
4351
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
4357
4352
|
*/
|
|
@@ -4521,7 +4516,7 @@
|
|
|
4521
4516
|
}
|
|
4522
4517
|
/**
|
|
4523
4518
|
* TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc.
|
|
4524
|
-
* TODO: [🏢] Make this logic part of `
|
|
4519
|
+
* TODO: [🏢] Make this logic part of `JsonFormatParser` or `isValidJsonString`
|
|
4525
4520
|
*/
|
|
4526
4521
|
|
|
4527
4522
|
/**
|
|
@@ -4723,7 +4718,7 @@
|
|
|
4723
4718
|
PAGES: countPages,
|
|
4724
4719
|
};
|
|
4725
4720
|
/**
|
|
4726
|
-
* TODO: [🧠][🤠] This should be probbably as part of `
|
|
4721
|
+
* TODO: [🧠][🤠] This should be probbably as part of `TextFormatParser`
|
|
4727
4722
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
4728
4723
|
*/
|
|
4729
4724
|
|
|
@@ -4751,7 +4746,7 @@
|
|
|
4751
4746
|
}
|
|
4752
4747
|
/**
|
|
4753
4748
|
* TODO: [💝] Unite object for expecting amount and format
|
|
4754
|
-
* TODO: [🧠][🤠] This should be part of `
|
|
4749
|
+
* TODO: [🧠][🤠] This should be part of `TextFormatParser`
|
|
4755
4750
|
* Note: [💝] and [🤠] are interconnected together
|
|
4756
4751
|
*/
|
|
4757
4752
|
|
|
@@ -4979,7 +4974,7 @@
|
|
|
4979
4974
|
if (task.format) {
|
|
4980
4975
|
if (task.format === 'JSON') {
|
|
4981
4976
|
if (!isValidJsonString($ongoingTaskResult.$resultString || '')) {
|
|
4982
|
-
// TODO: [🏢] Do more universally via `
|
|
4977
|
+
// TODO: [🏢] Do more universally via `FormatParser`
|
|
4983
4978
|
try {
|
|
4984
4979
|
$ongoingTaskResult.$resultString = extractJsonBlock($ongoingTaskResult.$resultString || '');
|
|
4985
4980
|
}
|
|
@@ -5117,16 +5112,16 @@
|
|
|
5117
5112
|
${block(pipelineIdentification)}
|
|
5118
5113
|
`));
|
|
5119
5114
|
}
|
|
5120
|
-
const
|
|
5121
|
-
if (
|
|
5115
|
+
const subvalueParser = formatDefinition.subvalueParsers.find((subvalueParser) => [subvalueParser.subvalueName, ...(subvalueParser.aliases || [])].includes(task.foreach.subformatName));
|
|
5116
|
+
if (subvalueParser === undefined) {
|
|
5122
5117
|
throw new UnexpectedError(
|
|
5123
5118
|
// <- TODO: [🧠][🧐] Should be formats fixed per promptbook version or behave as plugins (=> change UnexpectedError)
|
|
5124
5119
|
spaceTrim__default["default"]((block) => `
|
|
5125
5120
|
Unsupported subformat name "${task.foreach.subformatName}" for format "${task.foreach.formatName}"
|
|
5126
5121
|
|
|
5127
5122
|
Available subformat names for format "${formatDefinition.formatName}":
|
|
5128
|
-
${block(formatDefinition.
|
|
5129
|
-
.map((
|
|
5123
|
+
${block(formatDefinition.subvalueParsers
|
|
5124
|
+
.map((subvalueParser) => subvalueParser.subvalueName)
|
|
5130
5125
|
.map((subvalueName) => `- ${subvalueName}`)
|
|
5131
5126
|
.join('\n'))}
|
|
5132
5127
|
|
|
@@ -5140,7 +5135,7 @@
|
|
|
5140
5135
|
formatSettings = csvSettings;
|
|
5141
5136
|
// <- TODO: [🤹♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
|
|
5142
5137
|
}
|
|
5143
|
-
const resultString = await
|
|
5138
|
+
const resultString = await subvalueParser.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
|
|
5144
5139
|
let mappedParameters;
|
|
5145
5140
|
// TODO: [🤹♂️][🪂] Limit to N concurrent executions
|
|
5146
5141
|
// TODO: When done [🐚] Report progress also for each subvalue here
|
|
@@ -5202,6 +5197,27 @@
|
|
|
5202
5197
|
return RESERVED_PARAMETER_MISSING_VALUE /* <- TODO: [♨] Implement */;
|
|
5203
5198
|
}
|
|
5204
5199
|
|
|
5200
|
+
/**
|
|
5201
|
+
* Computes the cosine similarity between two embedding vectors
|
|
5202
|
+
*
|
|
5203
|
+
* Note: This is helping function for RAG (retrieval-augmented generation)
|
|
5204
|
+
*
|
|
5205
|
+
* @param embeddingVector1
|
|
5206
|
+
* @param embeddingVector2
|
|
5207
|
+
* @returns Cosine similarity between the two vectors
|
|
5208
|
+
*
|
|
5209
|
+
* @public exported from `@promptbook/core`
|
|
5210
|
+
*/
|
|
5211
|
+
function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
|
|
5212
|
+
if (embeddingVector1.length !== embeddingVector2.length) {
|
|
5213
|
+
throw new TypeError('Embedding vectors must have the same length');
|
|
5214
|
+
}
|
|
5215
|
+
const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
|
|
5216
|
+
const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
|
|
5217
|
+
const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
|
|
5218
|
+
return 1 - dotProduct / (magnitude1 * magnitude2);
|
|
5219
|
+
}
|
|
5220
|
+
|
|
5205
5221
|
/**
|
|
5206
5222
|
* @@@
|
|
5207
5223
|
*
|
|
@@ -5228,7 +5244,7 @@
|
|
|
5228
5244
|
},
|
|
5229
5245
|
content: task.content,
|
|
5230
5246
|
parameters: {
|
|
5231
|
-
/*
|
|
5247
|
+
/* !!!! */
|
|
5232
5248
|
},
|
|
5233
5249
|
};
|
|
5234
5250
|
const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
|
|
@@ -5263,16 +5279,6 @@
|
|
|
5263
5279
|
return knowledgePiecesLimited.map(({ content }) => `- ${content}`).join('\n');
|
|
5264
5280
|
// <- TODO: [🧠] Some smart aggregation of knowledge pieces, single-line vs multi-line vs mixed
|
|
5265
5281
|
}
|
|
5266
|
-
// TODO: !!!!!! Annotate + to new file
|
|
5267
|
-
function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
|
|
5268
|
-
if (embeddingVector1.length !== embeddingVector2.length) {
|
|
5269
|
-
throw new TypeError('Embedding vectors must have the same length');
|
|
5270
|
-
}
|
|
5271
|
-
const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
|
|
5272
|
-
const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
|
|
5273
|
-
const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
|
|
5274
|
-
return 1 - dotProduct / (magnitude1 * magnitude2);
|
|
5275
|
-
}
|
|
5276
5282
|
/**
|
|
5277
5283
|
* TODO: !!!! Verify if this is working
|
|
5278
5284
|
* TODO: [♨] Implement Better - use keyword search
|
|
@@ -5481,6 +5487,7 @@
|
|
|
5481
5487
|
* Note: This is a flag to prevent `onProgress` call after the pipeline execution is finished
|
|
5482
5488
|
*/
|
|
5483
5489
|
let isReturned = false;
|
|
5490
|
+
console.log(`!!! preparedPipeline`, preparedPipeline);
|
|
5484
5491
|
// Note: Check that all input input parameters are defined
|
|
5485
5492
|
for (const parameter of preparedPipeline.parameters.filter(({ isInput }) => isInput)) {
|
|
5486
5493
|
if (inputParameters[parameter.name] === undefined) {
|
|
@@ -5775,6 +5782,22 @@
|
|
|
5775
5782
|
cacheDirname,
|
|
5776
5783
|
intermediateFilesStrategy,
|
|
5777
5784
|
isAutoInstalled,
|
|
5785
|
+
}).catch((error) => {
|
|
5786
|
+
assertsError(error);
|
|
5787
|
+
return exportJson({
|
|
5788
|
+
name: 'pipelineExecutorResult',
|
|
5789
|
+
message: `Unuccessful PipelineExecutorResult, last catch`,
|
|
5790
|
+
order: [],
|
|
5791
|
+
value: {
|
|
5792
|
+
isSuccessful: false,
|
|
5793
|
+
errors: [serializeError(error)],
|
|
5794
|
+
warnings: [],
|
|
5795
|
+
usage: UNCERTAIN_USAGE,
|
|
5796
|
+
executionReport: null,
|
|
5797
|
+
outputParameters: {},
|
|
5798
|
+
preparedPipeline,
|
|
5799
|
+
},
|
|
5800
|
+
});
|
|
5778
5801
|
});
|
|
5779
5802
|
};
|
|
5780
5803
|
const pipelineExecutor = (inputParameters) => createTask({
|