@retab/node 0.0.0-reserved → 0.0.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +293 -2
- package/dist/api/client.d.ts +15 -0
- package/dist/api/client.d.ts.map +1 -0
- package/dist/api/client.js +16 -0
- package/dist/api/consensus/client.d.ts +7 -0
- package/dist/api/consensus/client.d.ts.map +1 -0
- package/dist/api/consensus/client.js +14 -0
- package/dist/api/deployments/client.d.ts +20 -0
- package/dist/api/deployments/client.d.ts.map +1 -0
- package/dist/api/deployments/client.js +23 -0
- package/dist/api/documents/client.d.ts +10 -0
- package/dist/api/documents/client.d.ts.map +1 -0
- package/dist/api/documents/client.js +35 -0
- package/dist/api/models/client.d.ts +17 -0
- package/dist/api/models/client.d.ts.map +1 -0
- package/dist/api/models/client.js +15 -0
- package/dist/api/schemas/client.d.ts +12 -0
- package/dist/api/schemas/client.d.ts.map +1 -0
- package/dist/api/schemas/client.js +14 -0
- package/dist/client.d.ts +50 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +135 -0
- package/dist/errors.d.ts +34 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +53 -0
- package/dist/generated_types.d.ts +64373 -0
- package/dist/generated_types.d.ts.map +1 -0
- package/dist/generated_types.js +2267 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/mime.d.ts +5 -0
- package/dist/mime.d.ts.map +1 -0
- package/dist/mime.js +66 -0
- package/dist/resource.d.ts +12 -0
- package/dist/resource.d.ts.map +1 -0
- package/dist/resource.js +19 -0
- package/dist/resources/consensus/completions.d.ts +66 -0
- package/dist/resources/consensus/completions.d.ts.map +1 -0
- package/dist/resources/consensus/completions.js +84 -0
- package/dist/resources/consensus/index.d.ts +72 -0
- package/dist/resources/consensus/index.d.ts.map +1 -0
- package/dist/resources/consensus/index.js +76 -0
- package/dist/resources/consensus/responses.d.ts +69 -0
- package/dist/resources/consensus/responses.d.ts.map +1 -0
- package/dist/resources/consensus/responses.js +99 -0
- package/dist/resources/documents/extractions.d.ts +74 -0
- package/dist/resources/documents/extractions.d.ts.map +1 -0
- package/dist/resources/documents/extractions.js +196 -0
- package/dist/resources/documents/index.d.ts +21 -0
- package/dist/resources/documents/index.d.ts.map +1 -0
- package/dist/resources/documents/index.js +55 -0
- package/dist/resources/evaluations/documents.d.ts +40 -0
- package/dist/resources/evaluations/documents.d.ts.map +1 -0
- package/dist/resources/evaluations/documents.js +123 -0
- package/dist/resources/evaluations/index.d.ts +14 -0
- package/dist/resources/evaluations/index.d.ts.map +1 -0
- package/dist/resources/evaluations/index.js +17 -0
- package/dist/resources/evaluations/iterations.d.ts +50 -0
- package/dist/resources/evaluations/iterations.d.ts.map +1 -0
- package/dist/resources/evaluations/iterations.js +156 -0
- package/dist/resources/files.d.ts +82 -0
- package/dist/resources/files.d.ts.map +1 -0
- package/dist/resources/files.js +150 -0
- package/dist/resources/finetuning.d.ts +105 -0
- package/dist/resources/finetuning.d.ts.map +1 -0
- package/dist/resources/finetuning.js +181 -0
- package/dist/resources/index.d.ts +11 -0
- package/dist/resources/index.d.ts.map +1 -0
- package/dist/resources/index.js +10 -0
- package/dist/resources/models.d.ts +57 -0
- package/dist/resources/models.d.ts.map +1 -0
- package/dist/resources/models.js +72 -0
- package/dist/resources/processors/automations/endpoints.d.ts +90 -0
- package/dist/resources/processors/automations/endpoints.d.ts.map +1 -0
- package/dist/resources/processors/automations/endpoints.js +145 -0
- package/dist/resources/processors/automations/index.d.ts +7 -0
- package/dist/resources/processors/automations/index.d.ts.map +1 -0
- package/dist/resources/processors/automations/index.js +6 -0
- package/dist/resources/processors/automations/links.d.ts +90 -0
- package/dist/resources/processors/automations/links.d.ts.map +1 -0
- package/dist/resources/processors/automations/links.js +149 -0
- package/dist/resources/processors/automations/logs.d.ts +35 -0
- package/dist/resources/processors/automations/logs.d.ts.map +1 -0
- package/dist/resources/processors/automations/logs.js +60 -0
- package/dist/resources/processors/automations/mailboxes.d.ts +102 -0
- package/dist/resources/processors/automations/mailboxes.d.ts.map +1 -0
- package/dist/resources/processors/automations/mailboxes.js +157 -0
- package/dist/resources/processors/automations/outlook.d.ts +114 -0
- package/dist/resources/processors/automations/outlook.d.ts.map +1 -0
- package/dist/resources/processors/automations/outlook.js +170 -0
- package/dist/resources/processors/automations/tests.d.ts +58 -0
- package/dist/resources/processors/automations/tests.d.ts.map +1 -0
- package/dist/resources/processors/automations/tests.js +90 -0
- package/dist/resources/processors/index.d.ts +303 -0
- package/dist/resources/processors/index.d.ts.map +1 -0
- package/dist/resources/processors/index.js +261 -0
- package/dist/resources/schemas.d.ts +63 -0
- package/dist/resources/schemas.d.ts.map +1 -0
- package/dist/resources/schemas.js +183 -0
- package/dist/resources/secrets/external_api_keys.d.ts +61 -0
- package/dist/resources/secrets/external_api_keys.d.ts.map +1 -0
- package/dist/resources/secrets/external_api_keys.js +120 -0
- package/dist/resources/secrets/index.d.ts +14 -0
- package/dist/resources/secrets/index.d.ts.map +1 -0
- package/dist/resources/secrets/index.js +17 -0
- package/dist/resources/secrets/webhooks.d.ts +73 -0
- package/dist/resources/secrets/webhooks.d.ts.map +1 -0
- package/dist/resources/secrets/webhooks.js +145 -0
- package/dist/resources/usage.d.ts +223 -0
- package/dist/resources/usage.d.ts.map +1 -0
- package/dist/resources/usage.js +310 -0
- package/dist/types/ai_models.d.ts +389 -0
- package/dist/types/ai_models.d.ts.map +1 -0
- package/dist/types/ai_models.js +145 -0
- package/dist/types/automations/cron.d.ts +28 -0
- package/dist/types/automations/cron.d.ts.map +1 -0
- package/dist/types/automations/cron.js +1 -0
- package/dist/types/automations/endpoints.d.ts +13 -0
- package/dist/types/automations/endpoints.d.ts.map +1 -0
- package/dist/types/automations/endpoints.js +1 -0
- package/dist/types/automations/index.d.ts +7 -0
- package/dist/types/automations/index.d.ts.map +1 -0
- package/dist/types/automations/index.js +6 -0
- package/dist/types/automations/links.d.ts +15 -0
- package/dist/types/automations/links.d.ts.map +1 -0
- package/dist/types/automations/links.js +1 -0
- package/dist/types/automations/mailboxes.d.ts +18 -0
- package/dist/types/automations/mailboxes.d.ts.map +1 -0
- package/dist/types/automations/mailboxes.js +1 -0
- package/dist/types/automations/outlook.d.ts +37 -0
- package/dist/types/automations/outlook.d.ts.map +1 -0
- package/dist/types/automations/outlook.js +1 -0
- package/dist/types/automations/webhooks.d.ts +13 -0
- package/dist/types/automations/webhooks.d.ts.map +1 -0
- package/dist/types/automations/webhooks.js +1 -0
- package/dist/types/browser_canvas.d.ts +4 -0
- package/dist/types/browser_canvas.d.ts.map +1 -0
- package/dist/types/browser_canvas.js +2 -0
- package/dist/types/chat.d.ts +99 -0
- package/dist/types/chat.d.ts.map +1 -0
- package/dist/types/chat.js +20 -0
- package/dist/types/consensus.d.ts +10 -0
- package/dist/types/consensus.d.ts.map +1 -0
- package/dist/types/consensus.js +1 -0
- package/dist/types/db/annotations.d.ts +108 -0
- package/dist/types/db/annotations.d.ts.map +1 -0
- package/dist/types/db/annotations.js +6 -0
- package/dist/types/db/files.d.ts +133 -0
- package/dist/types/db/files.d.ts.map +1 -0
- package/dist/types/db/files.js +5 -0
- package/dist/types/documents/extractions.d.ts +1849 -0
- package/dist/types/documents/extractions.d.ts.map +1 -0
- package/dist/types/documents/extractions.js +211 -0
- package/dist/types/documents/processing.d.ts +249 -0
- package/dist/types/documents/processing.d.ts.map +1 -0
- package/dist/types/documents/processing.js +6 -0
- package/dist/types/evaluations/iterations.d.ts +41 -0
- package/dist/types/evaluations/iterations.d.ts.map +1 -0
- package/dist/types/evaluations/iterations.js +1 -0
- package/dist/types/jobs/base.d.ts +162 -0
- package/dist/types/jobs/base.d.ts.map +1 -0
- package/dist/types/jobs/base.js +6 -0
- package/dist/types/jobs/specialized.d.ts +200 -0
- package/dist/types/jobs/specialized.d.ts.map +1 -0
- package/dist/types/jobs/specialized.js +37 -0
- package/dist/types/logs.d.ts +92 -0
- package/dist/types/logs.d.ts.map +1 -0
- package/dist/types/logs.js +1 -0
- package/dist/types/mime.d.ts +426 -0
- package/dist/types/mime.d.ts.map +1 -0
- package/dist/types/mime.js +48 -0
- package/dist/types/modalities.d.ts +31 -0
- package/dist/types/modalities.d.ts.map +1 -0
- package/dist/types/modalities.js +109 -0
- package/dist/types/pagination.d.ts +5 -0
- package/dist/types/pagination.d.ts.map +1 -0
- package/dist/types/pagination.js +1 -0
- package/dist/types/schemas/enhancement.d.ts +250 -0
- package/dist/types/schemas/enhancement.d.ts.map +1 -0
- package/dist/types/schemas/enhancement.js +6 -0
- package/dist/types/schemas/generate.d.ts +160 -0
- package/dist/types/schemas/generate.d.ts.map +1 -0
- package/dist/types/schemas/generate.js +19 -0
- package/dist/types/schemas/object.d.ts +116 -0
- package/dist/types/schemas/object.d.ts.map +1 -0
- package/dist/types/schemas/object.js +861 -0
- package/dist/types/secrets/external_api_keys.d.ts +27 -0
- package/dist/types/secrets/external_api_keys.d.ts.map +1 -0
- package/dist/types/secrets/external_api_keys.js +11 -0
- package/dist/types/secrets/index.d.ts +2 -0
- package/dist/types/secrets/index.d.ts.map +1 -0
- package/dist/types/secrets/index.js +1 -0
- package/dist/types/standards.d.ts +37 -0
- package/dist/types/standards.d.ts.map +1 -0
- package/dist/types/standards.js +1 -0
- package/dist/types.d.ts +276 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +85 -0
- package/dist/utils/ai_models.d.ts +10 -0
- package/dist/utils/ai_models.d.ts.map +1 -0
- package/dist/utils/ai_models.js +183 -0
- package/dist/utils/batch_processing.d.ts +227 -0
- package/dist/utils/batch_processing.d.ts.map +1 -0
- package/dist/utils/batch_processing.js +268 -0
- package/dist/utils/benchmarking.d.ts +115 -0
- package/dist/utils/benchmarking.d.ts.map +1 -0
- package/dist/utils/benchmarking.js +355 -0
- package/dist/utils/chat.d.ts +70 -0
- package/dist/utils/chat.d.ts.map +1 -0
- package/dist/utils/chat.js +79 -0
- package/dist/utils/cost_calculation.d.ts +26 -0
- package/dist/utils/cost_calculation.d.ts.map +1 -0
- package/dist/utils/cost_calculation.js +99 -0
- package/dist/utils/datasets.d.ts +135 -0
- package/dist/utils/datasets.d.ts.map +1 -0
- package/dist/utils/datasets.js +359 -0
- package/dist/utils/display.d.ts +108 -0
- package/dist/utils/display.d.ts.map +1 -0
- package/dist/utils/display.js +244 -0
- package/dist/utils/hash.d.ts +18 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +31 -0
- package/dist/utils/hashing.d.ts +18 -0
- package/dist/utils/hashing.d.ts.map +1 -0
- package/dist/utils/hashing.js +28 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +10 -0
- package/dist/utils/json_schema.d.ts +18 -0
- package/dist/utils/json_schema.d.ts.map +1 -0
- package/dist/utils/json_schema.js +334 -0
- package/dist/utils/json_schema_utils.d.ts +42 -0
- package/dist/utils/json_schema_utils.d.ts.map +1 -0
- package/dist/utils/json_schema_utils.js +212 -0
- package/dist/utils/jsonl.d.ts +60 -0
- package/dist/utils/jsonl.d.ts.map +1 -0
- package/dist/utils/jsonl.js +259 -0
- package/dist/utils/mime.d.ts +6 -0
- package/dist/utils/mime.d.ts.map +1 -0
- package/dist/utils/mime.js +129 -0
- package/dist/utils/model_cards.d.ts +219 -0
- package/dist/utils/model_cards.d.ts.map +1 -0
- package/dist/utils/model_cards.js +462 -0
- package/dist/utils/prompt_optimization.d.ts +96 -0
- package/dist/utils/prompt_optimization.d.ts.map +1 -0
- package/dist/utils/prompt_optimization.js +275 -0
- package/dist/utils/responses.d.ts +35 -0
- package/dist/utils/responses.d.ts.map +1 -0
- package/dist/utils/responses.js +37 -0
- package/dist/utils/stream.d.ts +13 -0
- package/dist/utils/stream.d.ts.map +1 -0
- package/dist/utils/stream.js +64 -0
- package/dist/utils/stream_context_managers.d.ts +147 -0
- package/dist/utils/stream_context_managers.d.ts.map +1 -0
- package/dist/utils/stream_context_managers.js +380 -0
- package/dist/utils/usage.d.ts +57 -0
- package/dist/utils/usage.d.ts.map +1 -0
- package/dist/utils/usage.js +97 -0
- package/dist/utils/webhook_secrets.d.ts +59 -0
- package/dist/utils/webhook_secrets.d.ts.map +1 -0
- package/dist/utils/webhook_secrets.js +107 -0
- package/dist/utils/zod_to_json_schema.d.ts +11 -0
- package/dist/utils/zod_to_json_schema.d.ts.map +1 -0
- package/dist/utils/zod_to_json_schema.js +123 -0
- package/dist/utils.d.ts +19 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +1 -0
- package/package.json +62 -6
- package/index.js +0 -7
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmarking and evaluation utilities for model comparison
|
|
3
|
+
* Equivalent to Python's benchmarking.py
|
|
4
|
+
*/
|
|
5
|
+
export interface EvaluationMetrics {
|
|
6
|
+
accuracy: number;
|
|
7
|
+
precision: number;
|
|
8
|
+
recall: number;
|
|
9
|
+
f1Score: number;
|
|
10
|
+
exactMatch: number;
|
|
11
|
+
levenshteinDistance: number;
|
|
12
|
+
jaccardSimilarity: number;
|
|
13
|
+
hammingDistance: number;
|
|
14
|
+
fieldAccuracy: Record<string, number>;
|
|
15
|
+
completeness: number;
|
|
16
|
+
errorRate: number;
|
|
17
|
+
}
|
|
18
|
+
export interface SingleFileEvalResult {
|
|
19
|
+
filename: string;
|
|
20
|
+
metrics: EvaluationMetrics;
|
|
21
|
+
predictions: any[];
|
|
22
|
+
groundTruths: any[];
|
|
23
|
+
differences: Record<string, any>[];
|
|
24
|
+
executionTime: number;
|
|
25
|
+
}
|
|
26
|
+
export interface BenchmarkResult {
|
|
27
|
+
model: string;
|
|
28
|
+
overallMetrics: EvaluationMetrics;
|
|
29
|
+
fileResults: SingleFileEvalResult[];
|
|
30
|
+
aggregateStats: {
|
|
31
|
+
meanAccuracy: number;
|
|
32
|
+
stdDevAccuracy: number;
|
|
33
|
+
meanF1: number;
|
|
34
|
+
stdDevF1: number;
|
|
35
|
+
totalFiles: number;
|
|
36
|
+
totalPredictions: number;
|
|
37
|
+
};
|
|
38
|
+
executionTime: number;
|
|
39
|
+
}
|
|
40
|
+
export interface DictionaryDifference {
|
|
41
|
+
field: string;
|
|
42
|
+
predicted: any;
|
|
43
|
+
groundTruth: any;
|
|
44
|
+
differenceType: 'missing' | 'extra' | 'value_mismatch' | 'type_mismatch';
|
|
45
|
+
path: string;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Calculate Levenshtein distance between two strings
|
|
49
|
+
*/
|
|
50
|
+
export declare function levenshteinDistance(str1: string, str2: string): number;
|
|
51
|
+
/**
|
|
52
|
+
* Calculate Jaccard similarity between two sets
|
|
53
|
+
*/
|
|
54
|
+
export declare function jaccardSimilarity(set1: Set<any>, set2: Set<any>): number;
|
|
55
|
+
/**
|
|
56
|
+
* Calculate Hamming distance between two strings
|
|
57
|
+
*/
|
|
58
|
+
export declare function hammingDistance(str1: string, str2: string): number;
|
|
59
|
+
/**
|
|
60
|
+
* Flatten nested object into dot-notation keys
|
|
61
|
+
*/
|
|
62
|
+
export declare function flattenObject(obj: any, prefix?: string): Record<string, any>;
|
|
63
|
+
/**
|
|
64
|
+
* Compute detailed differences between two dictionaries
|
|
65
|
+
*/
|
|
66
|
+
export declare function computeDictDifference(predicted: any, groundTruth: any, path?: string): DictionaryDifference[];
|
|
67
|
+
/**
|
|
68
|
+
* Aggregate dictionary differences across multiple examples
|
|
69
|
+
*/
|
|
70
|
+
export declare function aggregateDictDifferences(differences: DictionaryDifference[][]): Record<string, {
|
|
71
|
+
count: number;
|
|
72
|
+
percentage: number;
|
|
73
|
+
examples: DictionaryDifference[];
|
|
74
|
+
}>;
|
|
75
|
+
/**
|
|
76
|
+
* Calculate comprehensive evaluation metrics
|
|
77
|
+
*/
|
|
78
|
+
export declare function calculateMetrics(predictions: any[], groundTruths: any[]): EvaluationMetrics;
|
|
79
|
+
/**
|
|
80
|
+
* Single file evaluation class
|
|
81
|
+
*/
|
|
82
|
+
export declare class SingleFileEval {
|
|
83
|
+
private filename;
|
|
84
|
+
private predictions;
|
|
85
|
+
private groundTruths;
|
|
86
|
+
constructor(filename: string, predictions: any[], groundTruths: any[]);
|
|
87
|
+
evaluate(): Promise<SingleFileEvalResult>;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Plot metrics with uncertainty (text-based visualization)
|
|
91
|
+
*/
|
|
92
|
+
export declare function plotMetricsWithUncertainty(results: BenchmarkResult[]): void;
|
|
93
|
+
/**
|
|
94
|
+
* Benchmark multiple models
|
|
95
|
+
*/
|
|
96
|
+
export declare function benchmark(models: string[], testDataPath: string, groundTruthPath: string, evaluationFunction: (model: string, testData: any[]) => Promise<any[]>): Promise<BenchmarkResult[]>;
|
|
97
|
+
/**
|
|
98
|
+
* Save benchmark results to file
|
|
99
|
+
*/
|
|
100
|
+
export declare function saveBenchmarkResults(results: BenchmarkResult[], outputPath: string): Promise<void>;
|
|
101
|
+
declare const _default: {
|
|
102
|
+
SingleFileEval: typeof SingleFileEval;
|
|
103
|
+
calculateMetrics: typeof calculateMetrics;
|
|
104
|
+
computeDictDifference: typeof computeDictDifference;
|
|
105
|
+
aggregateDictDifferences: typeof aggregateDictDifferences;
|
|
106
|
+
levenshteinDistance: typeof levenshteinDistance;
|
|
107
|
+
jaccardSimilarity: typeof jaccardSimilarity;
|
|
108
|
+
hammingDistance: typeof hammingDistance;
|
|
109
|
+
flattenObject: typeof flattenObject;
|
|
110
|
+
plotMetricsWithUncertainty: typeof plotMetricsWithUncertainty;
|
|
111
|
+
benchmark: typeof benchmark;
|
|
112
|
+
saveBenchmarkResults: typeof saveBenchmarkResults;
|
|
113
|
+
};
|
|
114
|
+
export default _default;
|
|
115
|
+
//# sourceMappingURL=benchmarking.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"benchmarking.d.ts","sourceRoot":"","sources":["../../src/utils/benchmarking.ts"],"names":[],"mappings":"AAEA;;;GAGG;AAEH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,oBAAoB;IACnC,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,iBAAiB,CAAC;IAC3B,WAAW,EAAE,GAAG,EAAE,CAAC;IACnB,YAAY,EAAE,GAAG,EAAE,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,EAAE,iBAAiB,CAAC;IAClC,WAAW,EAAE,oBAAoB,EAAE,CAAC;IACpC,cAAc,EAAE;QACd,YAAY,EAAE,MAAM,CAAC;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,MAAM,CAAC;QACjB,UAAU,EAAE,MAAM,CAAC;QACnB,gBAAgB,EAAE,MAAM,CAAC;KAC1B,CAAC;IACF,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,oBAAoB;IACnC,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,GAAG,CAAC;IACf,WAAW,EAAE,GAAG,CAAC;IACjB,cAAc,EAAE,SAAS,GAAG,OAAO,GAAG,gBAAgB,GAAG,eAAe,CAAC;IACzE,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,CA2BtE;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC,GAAG,MAAM,CAMxE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,CAYlE;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,GAAE,MAAW,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAiBhF;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,GAAG,EAAE,IAAI,GAAE,MAAW,GAAG,oBAAoB,EAAE,CA6CjH;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,WAAW,EAAE,oBAAoB,EAAE,EAAE,GAAG,MAAM,CAAC,MAAM,EAAE;IAC9F,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,oBAAoB,EAAE,CAAC;CAClC,CAAC,CAyBD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,GAAG,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,GAAG,iBAAiB,CAmF3F;AAED;;GAEG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,WAAW,CAAQ;IAC3B,OAAO,CAAC,YAAY,CAAQ;gBAEhB,QAAQ,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE;IAM/D,QAAQ,IAAI,OAAO,CAAC,oBAAoB,CAAC;CA2BhD;AAED;;GAEG;AACH,wBAAgB,0BAA0B,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,IAAI,CA0B3E;AAED;;GAEG;AACH,wBAAsB,SAAS,CAC7B,MAAM,EAAE,MAAM,EAAE,EAChB,YAAY,EAAE,MAAM,EACpB,eAAe,EAAE,MAAM,EACvB,kBAAkB,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,KAAK,OAAO,CAAC,GAAG,EAAE,CAAC,GACrE,OAAO,CAAC,eAAe,EAAE,CAAC,CA+D5B;AAED;;GAEG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,eAAe,EAAE,EAC1B,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,IAAI,CAAC,CAgBf;;;;;;;;;;;;;;AAED,wBAYE"}
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
import { readJSONL, writeJSONL } from './jsonl.js';
|
|
2
|
+
/**
|
|
3
|
+
* Calculate Levenshtein distance between two strings
|
|
4
|
+
*/
|
|
5
|
+
export function levenshteinDistance(str1, str2) {
|
|
6
|
+
const matrix = [];
|
|
7
|
+
// Initialize matrix
|
|
8
|
+
for (let i = 0; i <= str2.length; i++) {
|
|
9
|
+
matrix[i] = [i];
|
|
10
|
+
}
|
|
11
|
+
for (let j = 0; j <= str1.length; j++) {
|
|
12
|
+
matrix[0][j] = j;
|
|
13
|
+
}
|
|
14
|
+
// Fill matrix
|
|
15
|
+
for (let i = 1; i <= str2.length; i++) {
|
|
16
|
+
for (let j = 1; j <= str1.length; j++) {
|
|
17
|
+
if (str2.charAt(i - 1) === str1.charAt(j - 1)) {
|
|
18
|
+
matrix[i][j] = matrix[i - 1][j - 1];
|
|
19
|
+
}
|
|
20
|
+
else {
|
|
21
|
+
matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, // substitution
|
|
22
|
+
matrix[i][j - 1] + 1, // insertion
|
|
23
|
+
matrix[i - 1][j] + 1 // deletion
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return matrix[str2.length][str1.length];
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Calculate Jaccard similarity between two sets
|
|
32
|
+
*/
|
|
33
|
+
export function jaccardSimilarity(set1, set2) {
|
|
34
|
+
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
|
35
|
+
const union = new Set([...set1, ...set2]);
|
|
36
|
+
if (union.size === 0)
|
|
37
|
+
return 1.0;
|
|
38
|
+
return intersection.size / union.size;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Calculate Hamming distance between two strings
|
|
42
|
+
*/
|
|
43
|
+
export function hammingDistance(str1, str2) {
|
|
44
|
+
if (str1.length !== str2.length) {
|
|
45
|
+
throw new Error('Strings must be of equal length for Hamming distance');
|
|
46
|
+
}
|
|
47
|
+
let distance = 0;
|
|
48
|
+
for (let i = 0; i < str1.length; i++) {
|
|
49
|
+
if (str1[i] !== str2[i]) {
|
|
50
|
+
distance++;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return distance;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Flatten nested object into dot-notation keys
|
|
57
|
+
*/
|
|
58
|
+
export function flattenObject(obj, prefix = '') {
|
|
59
|
+
const flattened = {};
|
|
60
|
+
for (const key in obj) {
|
|
61
|
+
if (obj.hasOwnProperty(key)) {
|
|
62
|
+
const newKey = prefix ? `${prefix}.${key}` : key;
|
|
63
|
+
const value = obj[key];
|
|
64
|
+
if (value !== null && typeof value === 'object' && !Array.isArray(value)) {
|
|
65
|
+
Object.assign(flattened, flattenObject(value, newKey));
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
flattened[newKey] = value;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return flattened;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Compute detailed differences between two dictionaries
|
|
76
|
+
*/
|
|
77
|
+
export function computeDictDifference(predicted, groundTruth, path = '') {
|
|
78
|
+
const differences = [];
|
|
79
|
+
const flatPredicted = flattenObject(predicted);
|
|
80
|
+
const flatGroundTruth = flattenObject(groundTruth);
|
|
81
|
+
const allKeys = new Set([
|
|
82
|
+
...Object.keys(flatPredicted),
|
|
83
|
+
...Object.keys(flatGroundTruth)
|
|
84
|
+
]);
|
|
85
|
+
for (const key of allKeys) {
|
|
86
|
+
const fullPath = path ? `${path}.${key}` : key;
|
|
87
|
+
const predValue = flatPredicted[key];
|
|
88
|
+
const truthValue = flatGroundTruth[key];
|
|
89
|
+
if (!(key in flatPredicted)) {
|
|
90
|
+
differences.push({
|
|
91
|
+
field: key,
|
|
92
|
+
predicted: undefined,
|
|
93
|
+
groundTruth: truthValue,
|
|
94
|
+
differenceType: 'missing',
|
|
95
|
+
path: fullPath,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
else if (!(key in flatGroundTruth)) {
|
|
99
|
+
differences.push({
|
|
100
|
+
field: key,
|
|
101
|
+
predicted: predValue,
|
|
102
|
+
groundTruth: undefined,
|
|
103
|
+
differenceType: 'extra',
|
|
104
|
+
path: fullPath,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
else if (predValue !== truthValue) {
|
|
108
|
+
const diffType = typeof predValue !== typeof truthValue ? 'type_mismatch' : 'value_mismatch';
|
|
109
|
+
differences.push({
|
|
110
|
+
field: key,
|
|
111
|
+
predicted: predValue,
|
|
112
|
+
groundTruth: truthValue,
|
|
113
|
+
differenceType: diffType,
|
|
114
|
+
path: fullPath,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return differences;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Aggregate dictionary differences across multiple examples
|
|
122
|
+
*/
|
|
123
|
+
export function aggregateDictDifferences(differences) {
|
|
124
|
+
const aggregated = {};
|
|
125
|
+
// Group differences by field path
|
|
126
|
+
for (const diffList of differences) {
|
|
127
|
+
for (const diff of diffList) {
|
|
128
|
+
if (!aggregated[diff.path]) {
|
|
129
|
+
aggregated[diff.path] = [];
|
|
130
|
+
}
|
|
131
|
+
aggregated[diff.path].push(diff);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
const totalExamples = differences.length;
|
|
135
|
+
const result = {};
|
|
136
|
+
for (const [path, diffs] of Object.entries(aggregated)) {
|
|
137
|
+
result[path] = {
|
|
138
|
+
count: diffs.length,
|
|
139
|
+
percentage: (diffs.length / totalExamples) * 100,
|
|
140
|
+
examples: diffs.slice(0, 5), // Keep first 5 examples
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
return result;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Calculate comprehensive evaluation metrics
|
|
147
|
+
*/
|
|
148
|
+
export function calculateMetrics(predictions, groundTruths) {
|
|
149
|
+
if (predictions.length !== groundTruths.length) {
|
|
150
|
+
throw new Error('Predictions and ground truths must have the same length');
|
|
151
|
+
}
|
|
152
|
+
const n = predictions.length;
|
|
153
|
+
let exactMatches = 0;
|
|
154
|
+
let totalLevenshtein = 0;
|
|
155
|
+
let totalJaccard = 0;
|
|
156
|
+
let totalHamming = 0;
|
|
157
|
+
let validHamming = 0;
|
|
158
|
+
const fieldAccuracy = {};
|
|
159
|
+
const differences = [];
|
|
160
|
+
for (let i = 0; i < n; i++) {
|
|
161
|
+
const pred = predictions[i];
|
|
162
|
+
const truth = groundTruths[i];
|
|
163
|
+
// Exact match
|
|
164
|
+
if (JSON.stringify(pred) === JSON.stringify(truth)) {
|
|
165
|
+
exactMatches++;
|
|
166
|
+
}
|
|
167
|
+
// String representations for text-based metrics
|
|
168
|
+
const predStr = JSON.stringify(pred);
|
|
169
|
+
const truthStr = JSON.stringify(truth);
|
|
170
|
+
// Levenshtein distance
|
|
171
|
+
totalLevenshtein += levenshteinDistance(predStr, truthStr);
|
|
172
|
+
// Jaccard similarity (using character sets)
|
|
173
|
+
const predSet = new Set(predStr.split(''));
|
|
174
|
+
const truthSet = new Set(truthStr.split(''));
|
|
175
|
+
totalJaccard += jaccardSimilarity(predSet, truthSet);
|
|
176
|
+
// Hamming distance (only for same-length strings)
|
|
177
|
+
if (predStr.length === truthStr.length) {
|
|
178
|
+
totalHamming += hammingDistance(predStr, truthStr);
|
|
179
|
+
validHamming++;
|
|
180
|
+
}
|
|
181
|
+
// Field-level accuracy
|
|
182
|
+
const diff = computeDictDifference(pred, truth);
|
|
183
|
+
differences.push(diff);
|
|
184
|
+
const flatPred = flattenObject(pred);
|
|
185
|
+
const flatTruth = flattenObject(truth);
|
|
186
|
+
for (const key of Object.keys(flatTruth)) {
|
|
187
|
+
if (!fieldAccuracy[key]) {
|
|
188
|
+
fieldAccuracy[key] = { correct: 0, total: 0 };
|
|
189
|
+
}
|
|
190
|
+
fieldAccuracy[key].total++;
|
|
191
|
+
if (flatPred[key] === flatTruth[key]) {
|
|
192
|
+
fieldAccuracy[key].correct++;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
// Calculate field accuracy percentages
|
|
197
|
+
const fieldAccuracyPercentages = {};
|
|
198
|
+
for (const [field, stats] of Object.entries(fieldAccuracy)) {
|
|
199
|
+
fieldAccuracyPercentages[field] = (stats.correct / stats.total) * 100;
|
|
200
|
+
}
|
|
201
|
+
// Calculate aggregate differences
|
|
202
|
+
const aggregatedDiffs = aggregateDictDifferences(differences);
|
|
203
|
+
const completeness = 100 - (Object.keys(aggregatedDiffs).length / Object.keys(fieldAccuracy).length) * 100;
|
|
204
|
+
return {
|
|
205
|
+
accuracy: (exactMatches / n) * 100,
|
|
206
|
+
precision: (exactMatches / n) * 100, // Simplified for exact match scenario
|
|
207
|
+
recall: (exactMatches / n) * 100, // Simplified for exact match scenario
|
|
208
|
+
f1Score: (exactMatches / n) * 100, // Simplified for exact match scenario
|
|
209
|
+
exactMatch: (exactMatches / n) * 100,
|
|
210
|
+
levenshteinDistance: totalLevenshtein / n,
|
|
211
|
+
jaccardSimilarity: (totalJaccard / n) * 100,
|
|
212
|
+
hammingDistance: validHamming > 0 ? totalHamming / validHamming : 0,
|
|
213
|
+
fieldAccuracy: fieldAccuracyPercentages,
|
|
214
|
+
completeness: Math.max(0, completeness),
|
|
215
|
+
errorRate: ((n - exactMatches) / n) * 100,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Single file evaluation class
|
|
220
|
+
*/
|
|
221
|
+
export class SingleFileEval {
|
|
222
|
+
constructor(filename, predictions, groundTruths) {
|
|
223
|
+
this.filename = filename;
|
|
224
|
+
this.predictions = predictions;
|
|
225
|
+
this.groundTruths = groundTruths;
|
|
226
|
+
}
|
|
227
|
+
async evaluate() {
|
|
228
|
+
const startTime = Date.now();
|
|
229
|
+
const metrics = calculateMetrics(this.predictions, this.groundTruths);
|
|
230
|
+
const differences = [];
|
|
231
|
+
for (let i = 0; i < this.predictions.length; i++) {
|
|
232
|
+
const diff = computeDictDifference(this.predictions[i], this.groundTruths[i]);
|
|
233
|
+
if (diff.length > 0) {
|
|
234
|
+
differences.push({
|
|
235
|
+
index: i,
|
|
236
|
+
differences: diff,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
const executionTime = Date.now() - startTime;
|
|
241
|
+
return {
|
|
242
|
+
filename: this.filename,
|
|
243
|
+
metrics,
|
|
244
|
+
predictions: this.predictions,
|
|
245
|
+
groundTruths: this.groundTruths,
|
|
246
|
+
differences,
|
|
247
|
+
executionTime,
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Plot metrics with uncertainty (text-based visualization)
|
|
253
|
+
*/
|
|
254
|
+
export function plotMetricsWithUncertainty(results) {
|
|
255
|
+
console.log('\n📊 Model Performance Comparison');
|
|
256
|
+
console.log('═'.repeat(60));
|
|
257
|
+
const maxModelNameLength = Math.max(...results.map(r => r.model.length));
|
|
258
|
+
console.log(`\n${'Model'.padEnd(maxModelNameLength)} | Accuracy | F1 Score | Exec Time`);
|
|
259
|
+
console.log('─'.repeat(maxModelNameLength + 35));
|
|
260
|
+
for (const result of results) {
|
|
261
|
+
const accuracy = result.overallMetrics.accuracy.toFixed(1);
|
|
262
|
+
const f1 = result.overallMetrics.f1Score.toFixed(1);
|
|
263
|
+
const execTime = `${(result.executionTime / 1000).toFixed(1)}s`;
|
|
264
|
+
console.log(`${result.model.padEnd(maxModelNameLength)} | ${accuracy.padStart(6)}% | ${f1.padStart(6)}% | ${execTime.padStart(8)}`);
|
|
265
|
+
}
|
|
266
|
+
// Show best performing model
|
|
267
|
+
const bestModel = results.reduce((best, current) => current.overallMetrics.accuracy > best.overallMetrics.accuracy ? current : best);
|
|
268
|
+
console.log(`\n🏆 Best performing model: ${bestModel.model} (${bestModel.overallMetrics.accuracy.toFixed(1)}% accuracy)`);
|
|
269
|
+
console.log('═'.repeat(60));
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Benchmark multiple models
|
|
273
|
+
*/
|
|
274
|
+
export async function benchmark(models, testDataPath, groundTruthPath, evaluationFunction) {
|
|
275
|
+
console.log(`🚀 Starting benchmark of ${models.length} models...`);
|
|
276
|
+
// Load test data and ground truth
|
|
277
|
+
const testData = await readJSONL(testDataPath);
|
|
278
|
+
const groundTruth = await readJSONL(groundTruthPath);
|
|
279
|
+
if (testData.length !== groundTruth.length) {
|
|
280
|
+
throw new Error('Test data and ground truth must have the same length');
|
|
281
|
+
}
|
|
282
|
+
const results = [];
|
|
283
|
+
for (let i = 0; i < models.length; i++) {
|
|
284
|
+
const model = models[i];
|
|
285
|
+
console.log(`\n📊 Evaluating model: ${model} (${i + 1}/${models.length})`);
|
|
286
|
+
const startTime = Date.now();
|
|
287
|
+
try {
|
|
288
|
+
// Get predictions from model
|
|
289
|
+
const predictions = await evaluationFunction(model, testData);
|
|
290
|
+
if (predictions.length !== groundTruth.length) {
|
|
291
|
+
throw new Error(`Model ${model} returned ${predictions.length} predictions, expected ${groundTruth.length}`);
|
|
292
|
+
}
|
|
293
|
+
// Calculate metrics
|
|
294
|
+
const metrics = calculateMetrics(predictions, groundTruth);
|
|
295
|
+
// Create single file evaluation
|
|
296
|
+
const fileEval = new SingleFileEval(testDataPath, predictions, groundTruth);
|
|
297
|
+
const fileResult = await fileEval.evaluate();
|
|
298
|
+
const executionTime = Date.now() - startTime;
|
|
299
|
+
results.push({
|
|
300
|
+
model,
|
|
301
|
+
overallMetrics: metrics,
|
|
302
|
+
fileResults: [fileResult],
|
|
303
|
+
aggregateStats: {
|
|
304
|
+
meanAccuracy: metrics.accuracy,
|
|
305
|
+
stdDevAccuracy: 0, // Would need multiple runs to calculate
|
|
306
|
+
meanF1: metrics.f1Score,
|
|
307
|
+
stdDevF1: 0,
|
|
308
|
+
totalFiles: 1,
|
|
309
|
+
totalPredictions: predictions.length,
|
|
310
|
+
},
|
|
311
|
+
executionTime,
|
|
312
|
+
});
|
|
313
|
+
console.log(` ✅ Accuracy: ${metrics.accuracy.toFixed(1)}%`);
|
|
314
|
+
console.log(` ⏱️ Time: ${(executionTime / 1000).toFixed(1)}s`);
|
|
315
|
+
}
|
|
316
|
+
catch (error) {
|
|
317
|
+
console.error(` ❌ Failed to evaluate ${model}:`, error);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
// Display final comparison
|
|
321
|
+
plotMetricsWithUncertainty(results);
|
|
322
|
+
return results;
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Save benchmark results to file
|
|
326
|
+
*/
|
|
327
|
+
export async function saveBenchmarkResults(results, outputPath) {
|
|
328
|
+
const summary = {
|
|
329
|
+
timestamp: new Date().toISOString(),
|
|
330
|
+
totalModels: results.length,
|
|
331
|
+
results: results.map(r => ({
|
|
332
|
+
model: r.model,
|
|
333
|
+
accuracy: r.overallMetrics.accuracy,
|
|
334
|
+
f1Score: r.overallMetrics.f1Score,
|
|
335
|
+
executionTime: r.executionTime,
|
|
336
|
+
totalPredictions: r.aggregateStats.totalPredictions,
|
|
337
|
+
})),
|
|
338
|
+
detailed: results,
|
|
339
|
+
};
|
|
340
|
+
await writeJSONL(outputPath, [summary]);
|
|
341
|
+
console.log(`📄 Benchmark results saved to ${outputPath}`);
|
|
342
|
+
}
|
|
343
|
+
export default {
|
|
344
|
+
SingleFileEval,
|
|
345
|
+
calculateMetrics,
|
|
346
|
+
computeDictDifference,
|
|
347
|
+
aggregateDictDifferences,
|
|
348
|
+
levenshteinDistance,
|
|
349
|
+
jaccardSimilarity,
|
|
350
|
+
hammingDistance,
|
|
351
|
+
flattenObject,
|
|
352
|
+
plotMetricsWithUncertainty,
|
|
353
|
+
benchmark,
|
|
354
|
+
saveBenchmarkResults,
|
|
355
|
+
};
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chat message processing utilities
|
|
3
|
+
* Equivalent to Python's utils/chat.py
|
|
4
|
+
*/
|
|
5
|
+
export interface ChatMessage {
|
|
6
|
+
role: 'system' | 'user' | 'assistant' | 'function' | 'tool';
|
|
7
|
+
content: string | null;
|
|
8
|
+
name?: string;
|
|
9
|
+
function_call?: {
|
|
10
|
+
name: string;
|
|
11
|
+
arguments: string;
|
|
12
|
+
};
|
|
13
|
+
tool_calls?: Array<{
|
|
14
|
+
id: string;
|
|
15
|
+
type: 'function';
|
|
16
|
+
function: {
|
|
17
|
+
name: string;
|
|
18
|
+
arguments: string;
|
|
19
|
+
};
|
|
20
|
+
}>;
|
|
21
|
+
tool_call_id?: string;
|
|
22
|
+
}
|
|
23
|
+
export interface ChatCompletionRequest {
|
|
24
|
+
model: string;
|
|
25
|
+
messages: ChatMessage[];
|
|
26
|
+
temperature?: number;
|
|
27
|
+
max_tokens?: number;
|
|
28
|
+
top_p?: number;
|
|
29
|
+
frequency_penalty?: number;
|
|
30
|
+
presence_penalty?: number;
|
|
31
|
+
stop?: string | string[];
|
|
32
|
+
stream?: boolean;
|
|
33
|
+
functions?: Array<{
|
|
34
|
+
name: string;
|
|
35
|
+
description?: string;
|
|
36
|
+
parameters: Record<string, any>;
|
|
37
|
+
}>;
|
|
38
|
+
function_call?: 'auto' | 'none' | {
|
|
39
|
+
name: string;
|
|
40
|
+
};
|
|
41
|
+
tools?: Array<{
|
|
42
|
+
type: 'function';
|
|
43
|
+
function: {
|
|
44
|
+
name: string;
|
|
45
|
+
description?: string;
|
|
46
|
+
parameters: Record<string, any>;
|
|
47
|
+
};
|
|
48
|
+
}>;
|
|
49
|
+
tool_choice?: 'auto' | 'none' | {
|
|
50
|
+
type: 'function';
|
|
51
|
+
function: {
|
|
52
|
+
name: string;
|
|
53
|
+
};
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
export declare function formatMessagesForProvider(messages: ChatMessage[], provider: 'openai' | 'anthropic' | 'xai' | 'gemini'): any[];
|
|
57
|
+
export declare function extractSystemPrompt(messages: ChatMessage[]): {
|
|
58
|
+
system: string | null;
|
|
59
|
+
filtered: ChatMessage[];
|
|
60
|
+
};
|
|
61
|
+
export declare function validateMessages(messages: ChatMessage[]): string[];
|
|
62
|
+
export declare function countTokensInMessages(messages: ChatMessage[], _model?: string): number;
|
|
63
|
+
declare const _default: {
|
|
64
|
+
formatMessagesForProvider: typeof formatMessagesForProvider;
|
|
65
|
+
extractSystemPrompt: typeof extractSystemPrompt;
|
|
66
|
+
validateMessages: typeof validateMessages;
|
|
67
|
+
countTokensInMessages: typeof countTokensInMessages;
|
|
68
|
+
};
|
|
69
|
+
export default _default;
|
|
70
|
+
//# sourceMappingURL=chat.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/utils/chat.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,GAAG,MAAM,CAAC;IAC5D,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE;QACd,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,UAAU,CAAC,EAAE,KAAK,CAAC;QACjB,EAAE,EAAE,MAAM,CAAC;QACX,IAAI,EAAE,UAAU,CAAC;QACjB,QAAQ,EAAE;YACR,IAAI,EAAE,MAAM,CAAC;YACb,SAAS,EAAE,MAAM,CAAC;SACnB,CAAC;KACH,CAAC,CAAC;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,qBAAqB;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,IAAI,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,KAAK,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KACjC,CAAC,CAAC;IACH,aAAa,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACnD,KAAK,CAAC,EAAE,KAAK,CAAC;QACZ,IAAI,EAAE,UAAU,CAAC;QACjB,QAAQ,EAAE;YACR,IAAI,EAAE,MAAM,CAAC;YACb,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;SACjC,CAAC;KACH,CAAC,CAAC;IACH,WAAW,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,QAAQ,EAAE;YAAE,IAAI,EAAE,MAAM,CAAA;SAAE,CAAA;KAAE,CAAC;CAClF;AAED,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,WAAW,EAAE,EACvB,QAAQ,EAAE,QAAQ,GAAG,WAAW,GAAG,KAAK,GAAG,QAAQ,GAClD,GAAG,EAAE,CAqBP;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,WAAW,EAAE,GAAG;IAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,QAAQ,EAAE,WAAW,EAAE,CAAA;CAAE,CAW/G;AAED,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,WAAW,EAAE,GAAG,MAAM,EAAE,CAyBlE;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,WAAW,EAAE,EAAE,MAAM,GAAE,MAAsB,GAAG,MAAM,CAwBrG;;;;;;;AAED,wBAKE"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chat message processing utilities
|
|
3
|
+
* Equivalent to Python's utils/chat.py
|
|
4
|
+
*/
|
|
5
|
+
export function formatMessagesForProvider(messages, provider) {
|
|
6
|
+
switch (provider) {
|
|
7
|
+
case 'openai':
|
|
8
|
+
case 'xai':
|
|
9
|
+
return messages;
|
|
10
|
+
case 'anthropic':
|
|
11
|
+
return messages.map(msg => ({
|
|
12
|
+
role: msg.role === 'system' ? 'user' : msg.role,
|
|
13
|
+
content: msg.content,
|
|
14
|
+
}));
|
|
15
|
+
case 'gemini':
|
|
16
|
+
return messages.map(msg => ({
|
|
17
|
+
role: msg.role === 'assistant' ? 'model' : 'user',
|
|
18
|
+
parts: [{ text: msg.content }],
|
|
19
|
+
}));
|
|
20
|
+
default:
|
|
21
|
+
return messages;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
export function extractSystemPrompt(messages) {
|
|
25
|
+
const systemMessages = messages.filter(msg => msg.role === 'system');
|
|
26
|
+
const nonSystemMessages = messages.filter(msg => msg.role !== 'system');
|
|
27
|
+
const systemPrompt = systemMessages.length > 0 ?
|
|
28
|
+
systemMessages.map(msg => msg.content).join('\n') : null;
|
|
29
|
+
return {
|
|
30
|
+
system: systemPrompt,
|
|
31
|
+
filtered: nonSystemMessages,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
export function validateMessages(messages) {
|
|
35
|
+
const errors = [];
|
|
36
|
+
if (!Array.isArray(messages) || messages.length === 0) {
|
|
37
|
+
errors.push('Messages array is required and cannot be empty');
|
|
38
|
+
return errors;
|
|
39
|
+
}
|
|
40
|
+
for (let i = 0; i < messages.length; i++) {
|
|
41
|
+
const msg = messages[i];
|
|
42
|
+
if (!msg.role) {
|
|
43
|
+
errors.push(`Message at index ${i} is missing role`);
|
|
44
|
+
}
|
|
45
|
+
if (!['system', 'user', 'assistant', 'function', 'tool'].includes(msg.role)) {
|
|
46
|
+
errors.push(`Message at index ${i} has invalid role: ${msg.role}`);
|
|
47
|
+
}
|
|
48
|
+
if (msg.content === null && !msg.function_call && !msg.tool_calls) {
|
|
49
|
+
errors.push(`Message at index ${i} must have content, function_call, or tool_calls`);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return errors;
|
|
53
|
+
}
|
|
54
|
+
export function countTokensInMessages(messages, _model = 'gpt-4o-mini') {
|
|
55
|
+
// Simplified token counting - in production use tiktoken
|
|
56
|
+
let totalTokens = 0;
|
|
57
|
+
for (const message of messages) {
|
|
58
|
+
// Role tokens
|
|
59
|
+
totalTokens += 4; // Base tokens per message
|
|
60
|
+
// Content tokens
|
|
61
|
+
if (message.content) {
|
|
62
|
+
totalTokens += Math.ceil(message.content.length / 4); // ~4 chars per token
|
|
63
|
+
}
|
|
64
|
+
// Function/tool call tokens
|
|
65
|
+
if (message.function_call) {
|
|
66
|
+
totalTokens += Math.ceil(JSON.stringify(message.function_call).length / 4);
|
|
67
|
+
}
|
|
68
|
+
if (message.tool_calls) {
|
|
69
|
+
totalTokens += Math.ceil(JSON.stringify(message.tool_calls).length / 4);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return totalTokens;
|
|
73
|
+
}
|
|
74
|
+
export default {
|
|
75
|
+
formatMessagesForProvider,
|
|
76
|
+
extractSystemPrompt,
|
|
77
|
+
validateMessages,
|
|
78
|
+
countTokensInMessages,
|
|
79
|
+
};
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { Amount } from '../types/ai_models.js';
|
|
2
|
+
interface Usage {
|
|
3
|
+
prompt_tokens?: number;
|
|
4
|
+
completion_tokens?: number;
|
|
5
|
+
total_tokens?: number;
|
|
6
|
+
cached_tokens?: number;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Compute the cost of a model usage.
|
|
10
|
+
*/
|
|
11
|
+
export declare function computeCostFromModel(model: string, usage: Usage, currency?: string): Amount;
|
|
12
|
+
/**
|
|
13
|
+
* Compute cost breakdown for detailed analysis.
|
|
14
|
+
*/
|
|
15
|
+
export interface CostBreakdown {
|
|
16
|
+
prompt_cost: Amount;
|
|
17
|
+
completion_cost: Amount;
|
|
18
|
+
cached_cost: Amount;
|
|
19
|
+
total_cost: Amount;
|
|
20
|
+
prompt_tokens: number;
|
|
21
|
+
completion_tokens: number;
|
|
22
|
+
cached_tokens: number;
|
|
23
|
+
}
|
|
24
|
+
export declare function computeCostFromModelWithBreakdown(model: string, usage: Usage, currency?: string): CostBreakdown;
|
|
25
|
+
export {};
|
|
26
|
+
//# sourceMappingURL=cost_calculation.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cost_calculation.d.ts","sourceRoot":"","sources":["../../src/utils/cost_calculation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAW,MAAM,uBAAuB,CAAC;AA0CxD,UAAU,KAAK;IACb,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,KAAK,EACZ,QAAQ,GAAE,MAAc,GACvB,MAAM,CAwBR;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,wBAAgB,iCAAiC,CAC/C,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,KAAK,EACZ,QAAQ,GAAE,MAAc,GACvB,aAAa,CAwCf"}
|