@promptbook/website-crawler 0.92.0-10 → 0.92.0-12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +221 -199
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/browser.index.d.ts +2 -0
- package/esm/typings/src/_packages/core.index.d.ts +6 -4
- package/esm/typings/src/_packages/types.index.d.ts +2 -2
- package/esm/typings/src/execution/PipelineExecutorResult.d.ts +3 -1
- package/esm/typings/src/execution/createPipelineExecutor/computeCosineSimilarity.d.ts +13 -0
- package/esm/typings/src/execution/utils/checkExpectations.d.ts +1 -1
- package/esm/typings/src/formats/_common/{FormatDefinition.d.ts → FormatParser.d.ts} +3 -3
- package/esm/typings/src/formats/_common/{FormatSubvalueDefinition.d.ts → FormatSubvalueParser.d.ts} +1 -1
- package/esm/typings/src/formats/csv/CsvFormatParser.d.ts +17 -0
- package/esm/typings/src/formats/index.d.ts +2 -2
- package/esm/typings/src/formats/json/{JsonFormatDefinition.d.ts → JsonFormatParser.d.ts} +6 -6
- package/esm/typings/src/formats/text/{TextFormatDefinition.d.ts → TextFormatParser.d.ts} +7 -7
- package/esm/typings/src/formats/xml/XmlFormatParser.d.ts +19 -0
- package/esm/typings/src/postprocessing/utils/extractJsonBlock.d.ts +1 -1
- package/esm/typings/src/storage/local-storage/getIndexedDbStorage.d.ts +10 -0
- package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromIndexedDb.d.ts +7 -0
- package/esm/typings/src/utils/expectation-counters/index.d.ts +1 -1
- package/package.json +2 -2
- package/umd/index.umd.js +221 -199
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/formats/csv/CsvFormatDefinition.d.ts +0 -17
- package/esm/typings/src/formats/xml/XmlFormatDefinition.d.ts +0 -19
package/esm/index.es.js
CHANGED
|
@@ -29,7 +29,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
29
29
|
* @generated
|
|
30
30
|
* @see https://github.com/webgptorg/promptbook
|
|
31
31
|
*/
|
|
32
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-
|
|
32
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-12';
|
|
33
33
|
/**
|
|
34
34
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
35
35
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -2275,75 +2275,6 @@ function isPipelinePrepared(pipeline) {
|
|
|
2275
2275
|
* - [♨] Are tasks prepared
|
|
2276
2276
|
*/
|
|
2277
2277
|
|
|
2278
|
-
/**
|
|
2279
|
-
* Converts a JavaScript Object Notation (JSON) string into an object.
|
|
2280
|
-
*
|
|
2281
|
-
* Note: This is wrapper around `JSON.parse()` with better error and type handling
|
|
2282
|
-
*
|
|
2283
|
-
* @public exported from `@promptbook/utils`
|
|
2284
|
-
*/
|
|
2285
|
-
function jsonParse(value) {
|
|
2286
|
-
if (value === undefined) {
|
|
2287
|
-
throw new Error(`Can not parse JSON from undefined value.`);
|
|
2288
|
-
}
|
|
2289
|
-
else if (typeof value !== 'string') {
|
|
2290
|
-
console.error('Can not parse JSON from non-string value.', { text: value });
|
|
2291
|
-
throw new Error(spaceTrim$1(`
|
|
2292
|
-
Can not parse JSON from non-string value.
|
|
2293
|
-
|
|
2294
|
-
The value type: ${typeof value}
|
|
2295
|
-
See more in console.
|
|
2296
|
-
`));
|
|
2297
|
-
}
|
|
2298
|
-
try {
|
|
2299
|
-
return JSON.parse(value);
|
|
2300
|
-
}
|
|
2301
|
-
catch (error) {
|
|
2302
|
-
if (!(error instanceof Error)) {
|
|
2303
|
-
throw error;
|
|
2304
|
-
}
|
|
2305
|
-
throw new Error(spaceTrim$1((block) => `
|
|
2306
|
-
${block(error.message)}
|
|
2307
|
-
|
|
2308
|
-
The JSON text:
|
|
2309
|
-
${block(value)}
|
|
2310
|
-
`));
|
|
2311
|
-
}
|
|
2312
|
-
}
|
|
2313
|
-
/**
|
|
2314
|
-
* TODO: !!!! Use in Promptbook.studio
|
|
2315
|
-
*/
|
|
2316
|
-
|
|
2317
|
-
/**
|
|
2318
|
-
* Recursively converts JSON strings to JSON objects
|
|
2319
|
-
|
|
2320
|
-
* @public exported from `@promptbook/utils`
|
|
2321
|
-
*/
|
|
2322
|
-
function jsonStringsToJsons(object) {
|
|
2323
|
-
if (object === null) {
|
|
2324
|
-
return object;
|
|
2325
|
-
}
|
|
2326
|
-
if (Array.isArray(object)) {
|
|
2327
|
-
return object.map(jsonStringsToJsons);
|
|
2328
|
-
}
|
|
2329
|
-
if (typeof object !== 'object') {
|
|
2330
|
-
return object;
|
|
2331
|
-
}
|
|
2332
|
-
const newObject = { ...object };
|
|
2333
|
-
for (const [key, value] of Object.entries(object)) {
|
|
2334
|
-
if (typeof value === 'string' && isValidJsonString(value)) {
|
|
2335
|
-
newObject[key] = jsonParse(value);
|
|
2336
|
-
}
|
|
2337
|
-
else {
|
|
2338
|
-
newObject[key] = jsonStringsToJsons(value);
|
|
2339
|
-
}
|
|
2340
|
-
}
|
|
2341
|
-
return newObject;
|
|
2342
|
-
}
|
|
2343
|
-
/**
|
|
2344
|
-
* TODO: Type the return type correctly
|
|
2345
|
-
*/
|
|
2346
|
-
|
|
2347
2278
|
/**
|
|
2348
2279
|
* This error indicates problems parsing the format value
|
|
2349
2280
|
*
|
|
@@ -2504,6 +2435,104 @@ const ALL_ERRORS = {
|
|
|
2504
2435
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
2505
2436
|
*/
|
|
2506
2437
|
|
|
2438
|
+
/**
|
|
2439
|
+
* Serializes an error into a [🚉] JSON-serializable object
|
|
2440
|
+
*
|
|
2441
|
+
* @public exported from `@promptbook/utils`
|
|
2442
|
+
*/
|
|
2443
|
+
function serializeError(error) {
|
|
2444
|
+
const { name, message, stack } = error;
|
|
2445
|
+
const { id } = error;
|
|
2446
|
+
if (!Object.keys(ALL_ERRORS).includes(name)) {
|
|
2447
|
+
console.error(spaceTrim$1((block) => `
|
|
2448
|
+
|
|
2449
|
+
Cannot serialize error with name "${name}"
|
|
2450
|
+
|
|
2451
|
+
Authors of Promptbook probably forgot to add this error into the list of errors:
|
|
2452
|
+
https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
|
|
2453
|
+
|
|
2454
|
+
|
|
2455
|
+
${block(stack || message)}
|
|
2456
|
+
|
|
2457
|
+
`));
|
|
2458
|
+
}
|
|
2459
|
+
return {
|
|
2460
|
+
name: name,
|
|
2461
|
+
message,
|
|
2462
|
+
stack,
|
|
2463
|
+
id, // Include id in the serialized object
|
|
2464
|
+
};
|
|
2465
|
+
}
|
|
2466
|
+
|
|
2467
|
+
/**
|
|
2468
|
+
* Converts a JavaScript Object Notation (JSON) string into an object.
|
|
2469
|
+
*
|
|
2470
|
+
* Note: This is wrapper around `JSON.parse()` with better error and type handling
|
|
2471
|
+
*
|
|
2472
|
+
* @public exported from `@promptbook/utils`
|
|
2473
|
+
*/
|
|
2474
|
+
function jsonParse(value) {
|
|
2475
|
+
if (value === undefined) {
|
|
2476
|
+
throw new Error(`Can not parse JSON from undefined value.`);
|
|
2477
|
+
}
|
|
2478
|
+
else if (typeof value !== 'string') {
|
|
2479
|
+
console.error('Can not parse JSON from non-string value.', { text: value });
|
|
2480
|
+
throw new Error(spaceTrim$1(`
|
|
2481
|
+
Can not parse JSON from non-string value.
|
|
2482
|
+
|
|
2483
|
+
The value type: ${typeof value}
|
|
2484
|
+
See more in console.
|
|
2485
|
+
`));
|
|
2486
|
+
}
|
|
2487
|
+
try {
|
|
2488
|
+
return JSON.parse(value);
|
|
2489
|
+
}
|
|
2490
|
+
catch (error) {
|
|
2491
|
+
if (!(error instanceof Error)) {
|
|
2492
|
+
throw error;
|
|
2493
|
+
}
|
|
2494
|
+
throw new Error(spaceTrim$1((block) => `
|
|
2495
|
+
${block(error.message)}
|
|
2496
|
+
|
|
2497
|
+
The JSON text:
|
|
2498
|
+
${block(value)}
|
|
2499
|
+
`));
|
|
2500
|
+
}
|
|
2501
|
+
}
|
|
2502
|
+
/**
|
|
2503
|
+
* TODO: !!!! Use in Promptbook.studio
|
|
2504
|
+
*/
|
|
2505
|
+
|
|
2506
|
+
/**
|
|
2507
|
+
* Recursively converts JSON strings to JSON objects
|
|
2508
|
+
|
|
2509
|
+
* @public exported from `@promptbook/utils`
|
|
2510
|
+
*/
|
|
2511
|
+
function jsonStringsToJsons(object) {
|
|
2512
|
+
if (object === null) {
|
|
2513
|
+
return object;
|
|
2514
|
+
}
|
|
2515
|
+
if (Array.isArray(object)) {
|
|
2516
|
+
return object.map(jsonStringsToJsons);
|
|
2517
|
+
}
|
|
2518
|
+
if (typeof object !== 'object') {
|
|
2519
|
+
return object;
|
|
2520
|
+
}
|
|
2521
|
+
const newObject = { ...object };
|
|
2522
|
+
for (const [key, value] of Object.entries(object)) {
|
|
2523
|
+
if (typeof value === 'string' && isValidJsonString(value)) {
|
|
2524
|
+
newObject[key] = jsonParse(value);
|
|
2525
|
+
}
|
|
2526
|
+
else {
|
|
2527
|
+
newObject[key] = jsonStringsToJsons(value);
|
|
2528
|
+
}
|
|
2529
|
+
}
|
|
2530
|
+
return newObject;
|
|
2531
|
+
}
|
|
2532
|
+
/**
|
|
2533
|
+
* TODO: Type the return type correctly
|
|
2534
|
+
*/
|
|
2535
|
+
|
|
2507
2536
|
/**
|
|
2508
2537
|
* Deserializes the error object
|
|
2509
2538
|
*
|
|
@@ -2669,64 +2698,6 @@ function createTask(options) {
|
|
|
2669
2698
|
* TODO: [🐚] Split into more files and make `PrepareTask` & `RemoteTask` + split the function
|
|
2670
2699
|
*/
|
|
2671
2700
|
|
|
2672
|
-
/**
|
|
2673
|
-
* Serializes an error into a [🚉] JSON-serializable object
|
|
2674
|
-
*
|
|
2675
|
-
* @public exported from `@promptbook/utils`
|
|
2676
|
-
*/
|
|
2677
|
-
function serializeError(error) {
|
|
2678
|
-
const { name, message, stack } = error;
|
|
2679
|
-
const { id } = error;
|
|
2680
|
-
if (!Object.keys(ALL_ERRORS).includes(name)) {
|
|
2681
|
-
console.error(spaceTrim$1((block) => `
|
|
2682
|
-
|
|
2683
|
-
Cannot serialize error with name "${name}"
|
|
2684
|
-
|
|
2685
|
-
Authors of Promptbook probably forgot to add this error into the list of errors:
|
|
2686
|
-
https://github.com/webgptorg/promptbook/blob/main/src/errors/0-index.ts
|
|
2687
|
-
|
|
2688
|
-
|
|
2689
|
-
${block(stack || message)}
|
|
2690
|
-
|
|
2691
|
-
`));
|
|
2692
|
-
}
|
|
2693
|
-
return {
|
|
2694
|
-
name: name,
|
|
2695
|
-
message,
|
|
2696
|
-
stack,
|
|
2697
|
-
id, // Include id in the serialized object
|
|
2698
|
-
};
|
|
2699
|
-
}
|
|
2700
|
-
|
|
2701
|
-
/**
|
|
2702
|
-
* Async version of Array.forEach
|
|
2703
|
-
*
|
|
2704
|
-
* @param array - Array to iterate over
|
|
2705
|
-
* @param options - Options for the function
|
|
2706
|
-
* @param callbackfunction - Function to call for each item
|
|
2707
|
-
* @public exported from `@promptbook/utils`
|
|
2708
|
-
* @deprecated [🪂] Use queues instead
|
|
2709
|
-
*/
|
|
2710
|
-
async function forEachAsync(array, options, callbackfunction) {
|
|
2711
|
-
const { maxParallelCount = Infinity } = options;
|
|
2712
|
-
let index = 0;
|
|
2713
|
-
let runningTasks = [];
|
|
2714
|
-
const tasks = [];
|
|
2715
|
-
for (const item of array) {
|
|
2716
|
-
const currentIndex = index++;
|
|
2717
|
-
const task = callbackfunction(item, currentIndex, array);
|
|
2718
|
-
tasks.push(task);
|
|
2719
|
-
runningTasks.push(task);
|
|
2720
|
-
/* not await */ Promise.resolve(task).then(() => {
|
|
2721
|
-
runningTasks = runningTasks.filter((t) => t !== task);
|
|
2722
|
-
});
|
|
2723
|
-
if (maxParallelCount < runningTasks.length) {
|
|
2724
|
-
await Promise.race(runningTasks);
|
|
2725
|
-
}
|
|
2726
|
-
}
|
|
2727
|
-
await Promise.all(tasks);
|
|
2728
|
-
}
|
|
2729
|
-
|
|
2730
2701
|
/**
|
|
2731
2702
|
* Represents the uncertain value
|
|
2732
2703
|
*
|
|
@@ -2770,7 +2741,7 @@ const ZERO_USAGE = $deepFreeze({
|
|
|
2770
2741
|
*
|
|
2771
2742
|
* @public exported from `@promptbook/core`
|
|
2772
2743
|
*/
|
|
2773
|
-
$deepFreeze({
|
|
2744
|
+
const UNCERTAIN_USAGE = $deepFreeze({
|
|
2774
2745
|
price: UNCERTAIN_ZERO_VALUE,
|
|
2775
2746
|
input: {
|
|
2776
2747
|
tokensCount: UNCERTAIN_ZERO_VALUE,
|
|
@@ -2795,6 +2766,35 @@ $deepFreeze({
|
|
|
2795
2766
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
2796
2767
|
*/
|
|
2797
2768
|
|
|
2769
|
+
/**
|
|
2770
|
+
* Async version of Array.forEach
|
|
2771
|
+
*
|
|
2772
|
+
* @param array - Array to iterate over
|
|
2773
|
+
* @param options - Options for the function
|
|
2774
|
+
* @param callbackfunction - Function to call for each item
|
|
2775
|
+
* @public exported from `@promptbook/utils`
|
|
2776
|
+
* @deprecated [🪂] Use queues instead
|
|
2777
|
+
*/
|
|
2778
|
+
async function forEachAsync(array, options, callbackfunction) {
|
|
2779
|
+
const { maxParallelCount = Infinity } = options;
|
|
2780
|
+
let index = 0;
|
|
2781
|
+
let runningTasks = [];
|
|
2782
|
+
const tasks = [];
|
|
2783
|
+
for (const item of array) {
|
|
2784
|
+
const currentIndex = index++;
|
|
2785
|
+
const task = callbackfunction(item, currentIndex, array);
|
|
2786
|
+
tasks.push(task);
|
|
2787
|
+
runningTasks.push(task);
|
|
2788
|
+
/* not await */ Promise.resolve(task).then(() => {
|
|
2789
|
+
runningTasks = runningTasks.filter((t) => t !== task);
|
|
2790
|
+
});
|
|
2791
|
+
if (maxParallelCount < runningTasks.length) {
|
|
2792
|
+
await Promise.race(runningTasks);
|
|
2793
|
+
}
|
|
2794
|
+
}
|
|
2795
|
+
await Promise.all(tasks);
|
|
2796
|
+
}
|
|
2797
|
+
|
|
2798
2798
|
/**
|
|
2799
2799
|
* Function `addUsage` will add multiple usages into one
|
|
2800
2800
|
*
|
|
@@ -4088,6 +4088,24 @@ const MANDATORY_CSV_SETTINGS = Object.freeze({
|
|
|
4088
4088
|
// encoding: 'utf-8',
|
|
4089
4089
|
});
|
|
4090
4090
|
|
|
4091
|
+
/**
|
|
4092
|
+
* Converts a CSV string into an object
|
|
4093
|
+
*
|
|
4094
|
+
* Note: This is wrapper around `papaparse.parse()` with better autohealing
|
|
4095
|
+
*
|
|
4096
|
+
* @private - for now until `@promptbook/csv` is released
|
|
4097
|
+
*/
|
|
4098
|
+
function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
|
|
4099
|
+
settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
|
|
4100
|
+
// Note: Autoheal invalid '\n' characters
|
|
4101
|
+
if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
|
|
4102
|
+
console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
|
|
4103
|
+
value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
4104
|
+
}
|
|
4105
|
+
const csv = parse(value, settings);
|
|
4106
|
+
return csv;
|
|
4107
|
+
}
|
|
4108
|
+
|
|
4091
4109
|
/**
|
|
4092
4110
|
* Function to check if a string is valid CSV
|
|
4093
4111
|
*
|
|
@@ -4110,31 +4128,13 @@ function isValidCsvString(value) {
|
|
|
4110
4128
|
}
|
|
4111
4129
|
}
|
|
4112
4130
|
|
|
4113
|
-
/**
|
|
4114
|
-
* Converts a CSV string into an object
|
|
4115
|
-
*
|
|
4116
|
-
* Note: This is wrapper around `papaparse.parse()` with better autohealing
|
|
4117
|
-
*
|
|
4118
|
-
* @private - for now until `@promptbook/csv` is released
|
|
4119
|
-
*/
|
|
4120
|
-
function csvParse(value /* <- TODO: string_csv */, settings, schema /* <- TODO: Make CSV Schemas */) {
|
|
4121
|
-
settings = { ...settings, ...MANDATORY_CSV_SETTINGS };
|
|
4122
|
-
// Note: Autoheal invalid '\n' characters
|
|
4123
|
-
if (settings.newline && !settings.newline.includes('\r') && value.includes('\r')) {
|
|
4124
|
-
console.warn('CSV string contains carriage return characters, but in the CSV settings the `newline` setting does not include them. Autohealing the CSV string.');
|
|
4125
|
-
value = value.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
4126
|
-
}
|
|
4127
|
-
const csv = parse(value, settings);
|
|
4128
|
-
return csv;
|
|
4129
|
-
}
|
|
4130
|
-
|
|
4131
4131
|
/**
|
|
4132
4132
|
* Definition for CSV spreadsheet
|
|
4133
4133
|
*
|
|
4134
4134
|
* @public exported from `@promptbook/core`
|
|
4135
4135
|
* <- TODO: [🏢] Export from package `@promptbook/csv`
|
|
4136
4136
|
*/
|
|
4137
|
-
const
|
|
4137
|
+
const CsvFormatParser = {
|
|
4138
4138
|
formatName: 'CSV',
|
|
4139
4139
|
aliases: ['SPREADSHEET', 'TABLE'],
|
|
4140
4140
|
isValid(value, settings, schema) {
|
|
@@ -4146,7 +4146,7 @@ const CsvFormatDefinition = {
|
|
|
4146
4146
|
heal(value, settings, schema) {
|
|
4147
4147
|
throw new Error('Not implemented');
|
|
4148
4148
|
},
|
|
4149
|
-
|
|
4149
|
+
subvalueParsers: [
|
|
4150
4150
|
{
|
|
4151
4151
|
subvalueName: 'ROW',
|
|
4152
4152
|
async mapValues(value, outputParameterName, settings, mapCallback) {
|
|
@@ -4207,10 +4207,10 @@ const CsvFormatDefinition = {
|
|
|
4207
4207
|
],
|
|
4208
4208
|
};
|
|
4209
4209
|
/**
|
|
4210
|
-
* TODO: [🍓] In `
|
|
4211
|
-
* TODO: [🍓] In `
|
|
4212
|
-
* TODO: [🍓] In `
|
|
4213
|
-
* TODO: [🍓] In `
|
|
4210
|
+
* TODO: [🍓] In `CsvFormatParser` implement simple `isValid`
|
|
4211
|
+
* TODO: [🍓] In `CsvFormatParser` implement partial `canBeValid`
|
|
4212
|
+
* TODO: [🍓] In `CsvFormatParser` implement `heal
|
|
4213
|
+
* TODO: [🍓] In `CsvFormatParser` implement `subvalueParsers`
|
|
4214
4214
|
* TODO: [🏢] Allow to expect something inside CSV objects and other formats
|
|
4215
4215
|
*/
|
|
4216
4216
|
|
|
@@ -4219,7 +4219,7 @@ const CsvFormatDefinition = {
|
|
|
4219
4219
|
*
|
|
4220
4220
|
* @private still in development [🏢]
|
|
4221
4221
|
*/
|
|
4222
|
-
const
|
|
4222
|
+
const JsonFormatParser = {
|
|
4223
4223
|
formatName: 'JSON',
|
|
4224
4224
|
mimeType: 'application/json',
|
|
4225
4225
|
isValid(value, settings, schema) {
|
|
@@ -4231,28 +4231,28 @@ const JsonFormatDefinition = {
|
|
|
4231
4231
|
heal(value, settings, schema) {
|
|
4232
4232
|
throw new Error('Not implemented');
|
|
4233
4233
|
},
|
|
4234
|
-
|
|
4234
|
+
subvalueParsers: [],
|
|
4235
4235
|
};
|
|
4236
4236
|
/**
|
|
4237
4237
|
* TODO: [🧠] Maybe propper instance of object
|
|
4238
4238
|
* TODO: [0] Make string_serialized_json
|
|
4239
4239
|
* TODO: [1] Make type for JSON Settings and Schema
|
|
4240
4240
|
* TODO: [🧠] What to use for validating JSONs - JSON Schema, ZoD, typescript types/interfaces,...?
|
|
4241
|
-
* TODO: [🍓] In `
|
|
4242
|
-
* TODO: [🍓] In `
|
|
4243
|
-
* TODO: [🍓] In `
|
|
4244
|
-
* TODO: [🍓] In `
|
|
4241
|
+
* TODO: [🍓] In `JsonFormatParser` implement simple `isValid`
|
|
4242
|
+
* TODO: [🍓] In `JsonFormatParser` implement partial `canBeValid`
|
|
4243
|
+
* TODO: [🍓] In `JsonFormatParser` implement `heal
|
|
4244
|
+
* TODO: [🍓] In `JsonFormatParser` implement `subvalueParsers`
|
|
4245
4245
|
* TODO: [🏢] Allow to expect something inside JSON objects and other formats
|
|
4246
4246
|
*/
|
|
4247
4247
|
|
|
4248
4248
|
/**
|
|
4249
4249
|
* Definition for any text - this will be always valid
|
|
4250
4250
|
*
|
|
4251
|
-
* Note: This is not useful for validation, but for splitting and mapping with `
|
|
4251
|
+
* Note: This is not useful for validation, but for splitting and mapping with `subvalueParsers`
|
|
4252
4252
|
*
|
|
4253
4253
|
* @public exported from `@promptbook/core`
|
|
4254
4254
|
*/
|
|
4255
|
-
const
|
|
4255
|
+
const TextFormatParser = {
|
|
4256
4256
|
formatName: 'TEXT',
|
|
4257
4257
|
isValid(value) {
|
|
4258
4258
|
return typeof value === 'string';
|
|
@@ -4261,9 +4261,9 @@ const TextFormatDefinition = {
|
|
|
4261
4261
|
return typeof partialValue === 'string';
|
|
4262
4262
|
},
|
|
4263
4263
|
heal() {
|
|
4264
|
-
throw new UnexpectedError('It does not make sense to call `
|
|
4264
|
+
throw new UnexpectedError('It does not make sense to call `TextFormatParser.heal`');
|
|
4265
4265
|
},
|
|
4266
|
-
|
|
4266
|
+
subvalueParsers: [
|
|
4267
4267
|
{
|
|
4268
4268
|
subvalueName: 'LINE',
|
|
4269
4269
|
async mapValues(value, outputParameterName, settings, mapCallback) {
|
|
@@ -4283,10 +4283,10 @@ const TextFormatDefinition = {
|
|
|
4283
4283
|
/**
|
|
4284
4284
|
* TODO: [1] Make type for XML Text and Schema
|
|
4285
4285
|
* TODO: [🧠][🤠] Here should be all words, characters, lines, paragraphs, pages available as subvalues
|
|
4286
|
-
* TODO: [🍓] In `
|
|
4287
|
-
* TODO: [🍓] In `
|
|
4288
|
-
* TODO: [🍓] In `
|
|
4289
|
-
* TODO: [🍓] In `
|
|
4286
|
+
* TODO: [🍓] In `TextFormatParser` implement simple `isValid`
|
|
4287
|
+
* TODO: [🍓] In `TextFormatParser` implement partial `canBeValid`
|
|
4288
|
+
* TODO: [🍓] In `TextFormatParser` implement `heal
|
|
4289
|
+
* TODO: [🍓] In `TextFormatParser` implement `subvalueParsers`
|
|
4290
4290
|
* TODO: [🏢] Allow to expect something inside each item of list and other formats
|
|
4291
4291
|
*/
|
|
4292
4292
|
|
|
@@ -4319,7 +4319,7 @@ function isValidXmlString(value) {
|
|
|
4319
4319
|
*
|
|
4320
4320
|
* @private still in development [🏢]
|
|
4321
4321
|
*/
|
|
4322
|
-
const
|
|
4322
|
+
const XmlFormatParser = {
|
|
4323
4323
|
formatName: 'XML',
|
|
4324
4324
|
mimeType: 'application/xml',
|
|
4325
4325
|
isValid(value, settings, schema) {
|
|
@@ -4331,17 +4331,17 @@ const XmlFormatDefinition = {
|
|
|
4331
4331
|
heal(value, settings, schema) {
|
|
4332
4332
|
throw new Error('Not implemented');
|
|
4333
4333
|
},
|
|
4334
|
-
|
|
4334
|
+
subvalueParsers: [],
|
|
4335
4335
|
};
|
|
4336
4336
|
/**
|
|
4337
4337
|
* TODO: [🧠] Maybe propper instance of object
|
|
4338
4338
|
* TODO: [0] Make string_serialized_xml
|
|
4339
4339
|
* TODO: [1] Make type for XML Settings and Schema
|
|
4340
4340
|
* TODO: [🧠] What to use for validating XMLs - XSD,...
|
|
4341
|
-
* TODO: [🍓] In `
|
|
4342
|
-
* TODO: [🍓] In `
|
|
4343
|
-
* TODO: [🍓] In `
|
|
4344
|
-
* TODO: [🍓] In `
|
|
4341
|
+
* TODO: [🍓] In `XmlFormatParser` implement simple `isValid`
|
|
4342
|
+
* TODO: [🍓] In `XmlFormatParser` implement partial `canBeValid`
|
|
4343
|
+
* TODO: [🍓] In `XmlFormatParser` implement `heal
|
|
4344
|
+
* TODO: [🍓] In `XmlFormatParser` implement `subvalueParsers`
|
|
4345
4345
|
* TODO: [🏢] Allow to expect something inside XML and other formats
|
|
4346
4346
|
*/
|
|
4347
4347
|
|
|
@@ -4350,12 +4350,7 @@ const XmlFormatDefinition = {
|
|
|
4350
4350
|
*
|
|
4351
4351
|
* @private internal index of `...` <- TODO [🏢]
|
|
4352
4352
|
*/
|
|
4353
|
-
const FORMAT_DEFINITIONS = [
|
|
4354
|
-
JsonFormatDefinition,
|
|
4355
|
-
XmlFormatDefinition,
|
|
4356
|
-
TextFormatDefinition,
|
|
4357
|
-
CsvFormatDefinition,
|
|
4358
|
-
];
|
|
4353
|
+
const FORMAT_DEFINITIONS = [JsonFormatParser, XmlFormatParser, TextFormatParser, CsvFormatParser];
|
|
4359
4354
|
/**
|
|
4360
4355
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
4361
4356
|
*/
|
|
@@ -4525,7 +4520,7 @@ function extractJsonBlock(markdown) {
|
|
|
4525
4520
|
}
|
|
4526
4521
|
/**
|
|
4527
4522
|
* TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc.
|
|
4528
|
-
* TODO: [🏢] Make this logic part of `
|
|
4523
|
+
* TODO: [🏢] Make this logic part of `JsonFormatParser` or `isValidJsonString`
|
|
4529
4524
|
*/
|
|
4530
4525
|
|
|
4531
4526
|
/**
|
|
@@ -4727,7 +4722,7 @@ const CountUtils = {
|
|
|
4727
4722
|
PAGES: countPages,
|
|
4728
4723
|
};
|
|
4729
4724
|
/**
|
|
4730
|
-
* TODO: [🧠][🤠] This should be probbably as part of `
|
|
4725
|
+
* TODO: [🧠][🤠] This should be probbably as part of `TextFormatParser`
|
|
4731
4726
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
4732
4727
|
*/
|
|
4733
4728
|
|
|
@@ -4755,7 +4750,7 @@ function checkExpectations(expectations, value) {
|
|
|
4755
4750
|
}
|
|
4756
4751
|
/**
|
|
4757
4752
|
* TODO: [💝] Unite object for expecting amount and format
|
|
4758
|
-
* TODO: [🧠][🤠] This should be part of `
|
|
4753
|
+
* TODO: [🧠][🤠] This should be part of `TextFormatParser`
|
|
4759
4754
|
* Note: [💝] and [🤠] are interconnected together
|
|
4760
4755
|
*/
|
|
4761
4756
|
|
|
@@ -4983,7 +4978,7 @@ async function executeAttempts(options) {
|
|
|
4983
4978
|
if (task.format) {
|
|
4984
4979
|
if (task.format === 'JSON') {
|
|
4985
4980
|
if (!isValidJsonString($ongoingTaskResult.$resultString || '')) {
|
|
4986
|
-
// TODO: [🏢] Do more universally via `
|
|
4981
|
+
// TODO: [🏢] Do more universally via `FormatParser`
|
|
4987
4982
|
try {
|
|
4988
4983
|
$ongoingTaskResult.$resultString = extractJsonBlock($ongoingTaskResult.$resultString || '');
|
|
4989
4984
|
}
|
|
@@ -5121,16 +5116,16 @@ async function executeFormatSubvalues(options) {
|
|
|
5121
5116
|
${block(pipelineIdentification)}
|
|
5122
5117
|
`));
|
|
5123
5118
|
}
|
|
5124
|
-
const
|
|
5125
|
-
if (
|
|
5119
|
+
const subvalueParser = formatDefinition.subvalueParsers.find((subvalueParser) => [subvalueParser.subvalueName, ...(subvalueParser.aliases || [])].includes(task.foreach.subformatName));
|
|
5120
|
+
if (subvalueParser === undefined) {
|
|
5126
5121
|
throw new UnexpectedError(
|
|
5127
5122
|
// <- TODO: [🧠][🧐] Should be formats fixed per promptbook version or behave as plugins (=> change UnexpectedError)
|
|
5128
5123
|
spaceTrim$1((block) => `
|
|
5129
5124
|
Unsupported subformat name "${task.foreach.subformatName}" for format "${task.foreach.formatName}"
|
|
5130
5125
|
|
|
5131
5126
|
Available subformat names for format "${formatDefinition.formatName}":
|
|
5132
|
-
${block(formatDefinition.
|
|
5133
|
-
.map((
|
|
5127
|
+
${block(formatDefinition.subvalueParsers
|
|
5128
|
+
.map((subvalueParser) => subvalueParser.subvalueName)
|
|
5134
5129
|
.map((subvalueName) => `- ${subvalueName}`)
|
|
5135
5130
|
.join('\n'))}
|
|
5136
5131
|
|
|
@@ -5144,7 +5139,7 @@ async function executeFormatSubvalues(options) {
|
|
|
5144
5139
|
formatSettings = csvSettings;
|
|
5145
5140
|
// <- TODO: [🤹♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
|
|
5146
5141
|
}
|
|
5147
|
-
const resultString = await
|
|
5142
|
+
const resultString = await subvalueParser.mapValues(parameterValue, task.foreach.outputSubparameterName, formatSettings, async (subparameters, index) => {
|
|
5148
5143
|
let mappedParameters;
|
|
5149
5144
|
// TODO: [🤹♂️][🪂] Limit to N concurrent executions
|
|
5150
5145
|
// TODO: When done [🐚] Report progress also for each subvalue here
|
|
@@ -5206,6 +5201,27 @@ async function getExamplesForTask(task) {
|
|
|
5206
5201
|
return RESERVED_PARAMETER_MISSING_VALUE /* <- TODO: [♨] Implement */;
|
|
5207
5202
|
}
|
|
5208
5203
|
|
|
5204
|
+
/**
|
|
5205
|
+
* Computes the cosine similarity between two embedding vectors
|
|
5206
|
+
*
|
|
5207
|
+
* Note: This is helping function for RAG (retrieval-augmented generation)
|
|
5208
|
+
*
|
|
5209
|
+
* @param embeddingVector1
|
|
5210
|
+
* @param embeddingVector2
|
|
5211
|
+
* @returns Cosine similarity between the two vectors
|
|
5212
|
+
*
|
|
5213
|
+
* @public exported from `@promptbook/core`
|
|
5214
|
+
*/
|
|
5215
|
+
function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
|
|
5216
|
+
if (embeddingVector1.length !== embeddingVector2.length) {
|
|
5217
|
+
throw new TypeError('Embedding vectors must have the same length');
|
|
5218
|
+
}
|
|
5219
|
+
const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
|
|
5220
|
+
const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
|
|
5221
|
+
const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
|
|
5222
|
+
return 1 - dotProduct / (magnitude1 * magnitude2);
|
|
5223
|
+
}
|
|
5224
|
+
|
|
5209
5225
|
/**
|
|
5210
5226
|
* @@@
|
|
5211
5227
|
*
|
|
@@ -5232,7 +5248,7 @@ async function getKnowledgeForTask(options) {
|
|
|
5232
5248
|
},
|
|
5233
5249
|
content: task.content,
|
|
5234
5250
|
parameters: {
|
|
5235
|
-
/*
|
|
5251
|
+
/* !!!! */
|
|
5236
5252
|
},
|
|
5237
5253
|
};
|
|
5238
5254
|
const taskEmbeddingResult = await llmTools.callEmbeddingModel(taskEmbeddingPrompt);
|
|
@@ -5267,16 +5283,6 @@ async function getKnowledgeForTask(options) {
|
|
|
5267
5283
|
return knowledgePiecesLimited.map(({ content }) => `- ${content}`).join('\n');
|
|
5268
5284
|
// <- TODO: [🧠] Some smart aggregation of knowledge pieces, single-line vs multi-line vs mixed
|
|
5269
5285
|
}
|
|
5270
|
-
// TODO: !!!!!! Annotate + to new file
|
|
5271
|
-
function computeCosineSimilarity(embeddingVector1, embeddingVector2) {
|
|
5272
|
-
if (embeddingVector1.length !== embeddingVector2.length) {
|
|
5273
|
-
throw new TypeError('Embedding vectors must have the same length');
|
|
5274
|
-
}
|
|
5275
|
-
const dotProduct = embeddingVector1.reduce((sum, value, index) => sum + value * embeddingVector2[index], 0);
|
|
5276
|
-
const magnitude1 = Math.sqrt(embeddingVector1.reduce((sum, value) => sum + value * value, 0));
|
|
5277
|
-
const magnitude2 = Math.sqrt(embeddingVector2.reduce((sum, value) => sum + value * value, 0));
|
|
5278
|
-
return 1 - dotProduct / (magnitude1 * magnitude2);
|
|
5279
|
-
}
|
|
5280
5286
|
/**
|
|
5281
5287
|
* TODO: !!!! Verify if this is working
|
|
5282
5288
|
* TODO: [♨] Implement Better - use keyword search
|
|
@@ -5779,6 +5785,22 @@ function createPipelineExecutor(options) {
|
|
|
5779
5785
|
cacheDirname,
|
|
5780
5786
|
intermediateFilesStrategy,
|
|
5781
5787
|
isAutoInstalled,
|
|
5788
|
+
}).catch((error) => {
|
|
5789
|
+
assertsError(error);
|
|
5790
|
+
return exportJson({
|
|
5791
|
+
name: 'pipelineExecutorResult',
|
|
5792
|
+
message: `Unuccessful PipelineExecutorResult, last catch`,
|
|
5793
|
+
order: [],
|
|
5794
|
+
value: {
|
|
5795
|
+
isSuccessful: false,
|
|
5796
|
+
errors: [serializeError(error)],
|
|
5797
|
+
warnings: [],
|
|
5798
|
+
usage: UNCERTAIN_USAGE,
|
|
5799
|
+
executionReport: null,
|
|
5800
|
+
outputParameters: {},
|
|
5801
|
+
preparedPipeline,
|
|
5802
|
+
},
|
|
5803
|
+
});
|
|
5782
5804
|
});
|
|
5783
5805
|
};
|
|
5784
5806
|
const pipelineExecutor = (inputParameters) => createTask({
|