@retab/node 0.0.48 → 0.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. package/README.md +8 -215
  2. package/dist/api/client.d.ts +2 -2
  3. package/dist/api/client.d.ts.map +1 -1
  4. package/dist/api/client.js +2 -2
  5. package/dist/api/documents/client.d.ts +3 -3
  6. package/dist/api/documents/client.d.ts.map +1 -1
  7. package/dist/api/documents/client.js +3 -3
  8. package/dist/api/projects/client.d.ts +15 -0
  9. package/dist/api/projects/client.d.ts.map +1 -0
  10. package/dist/api/projects/client.js +43 -0
  11. package/dist/api/projects/documents/client.d.ts +12 -0
  12. package/dist/api/projects/documents/client.d.ts.map +1 -0
  13. package/dist/api/projects/documents/client.js +39 -0
  14. package/dist/api/projects/iterations/client.d.ts +17 -0
  15. package/dist/api/projects/iterations/client.d.ts.map +1 -0
  16. package/dist/api/projects/iterations/client.js +64 -0
  17. package/dist/client.d.ts +1 -0
  18. package/dist/client.d.ts.map +1 -1
  19. package/dist/client.js +6 -1
  20. package/dist/generated_types.d.ts +17837 -40090
  21. package/dist/generated_types.d.ts.map +1 -1
  22. package/dist/generated_types.js +309 -979
  23. package/dist/index.d.ts +7 -2
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +2 -2
  26. package/dist/types.d.ts +188 -80
  27. package/dist/types.d.ts.map +1 -1
  28. package/dist/types.js +22 -1
  29. package/package.json +6 -9
  30. package/dist/api/consensus/client.d.ts +0 -7
  31. package/dist/api/consensus/client.d.ts.map +0 -1
  32. package/dist/api/consensus/client.js +0 -14
  33. package/dist/errors.d.ts +0 -34
  34. package/dist/errors.d.ts.map +0 -1
  35. package/dist/errors.js +0 -53
  36. package/dist/resource.d.ts +0 -12
  37. package/dist/resource.d.ts.map +0 -1
  38. package/dist/resource.js +0 -19
  39. package/dist/resources/consensus/completions.d.ts +0 -66
  40. package/dist/resources/consensus/completions.d.ts.map +0 -1
  41. package/dist/resources/consensus/completions.js +0 -84
  42. package/dist/resources/consensus/index.d.ts +0 -72
  43. package/dist/resources/consensus/index.d.ts.map +0 -1
  44. package/dist/resources/consensus/index.js +0 -76
  45. package/dist/resources/consensus/responses.d.ts +0 -69
  46. package/dist/resources/consensus/responses.d.ts.map +0 -1
  47. package/dist/resources/consensus/responses.js +0 -99
  48. package/dist/resources/documents/extractions.d.ts +0 -74
  49. package/dist/resources/documents/extractions.d.ts.map +0 -1
  50. package/dist/resources/documents/extractions.js +0 -196
  51. package/dist/resources/documents/index.d.ts +0 -21
  52. package/dist/resources/documents/index.d.ts.map +0 -1
  53. package/dist/resources/documents/index.js +0 -55
  54. package/dist/resources/evaluations/documents.d.ts +0 -40
  55. package/dist/resources/evaluations/documents.d.ts.map +0 -1
  56. package/dist/resources/evaluations/documents.js +0 -123
  57. package/dist/resources/evaluations/index.d.ts +0 -14
  58. package/dist/resources/evaluations/index.d.ts.map +0 -1
  59. package/dist/resources/evaluations/index.js +0 -17
  60. package/dist/resources/evaluations/iterations.d.ts +0 -50
  61. package/dist/resources/evaluations/iterations.d.ts.map +0 -1
  62. package/dist/resources/evaluations/iterations.js +0 -156
  63. package/dist/resources/files.d.ts +0 -82
  64. package/dist/resources/files.d.ts.map +0 -1
  65. package/dist/resources/files.js +0 -150
  66. package/dist/resources/finetuning.d.ts +0 -105
  67. package/dist/resources/finetuning.d.ts.map +0 -1
  68. package/dist/resources/finetuning.js +0 -181
  69. package/dist/resources/index.d.ts +0 -11
  70. package/dist/resources/index.d.ts.map +0 -1
  71. package/dist/resources/index.js +0 -10
  72. package/dist/resources/models.d.ts +0 -57
  73. package/dist/resources/models.d.ts.map +0 -1
  74. package/dist/resources/models.js +0 -72
  75. package/dist/resources/processors/automations/endpoints.d.ts +0 -90
  76. package/dist/resources/processors/automations/endpoints.d.ts.map +0 -1
  77. package/dist/resources/processors/automations/endpoints.js +0 -145
  78. package/dist/resources/processors/automations/index.d.ts +0 -7
  79. package/dist/resources/processors/automations/index.d.ts.map +0 -1
  80. package/dist/resources/processors/automations/index.js +0 -6
  81. package/dist/resources/processors/automations/links.d.ts +0 -90
  82. package/dist/resources/processors/automations/links.d.ts.map +0 -1
  83. package/dist/resources/processors/automations/links.js +0 -149
  84. package/dist/resources/processors/automations/logs.d.ts +0 -35
  85. package/dist/resources/processors/automations/logs.d.ts.map +0 -1
  86. package/dist/resources/processors/automations/logs.js +0 -60
  87. package/dist/resources/processors/automations/mailboxes.d.ts +0 -102
  88. package/dist/resources/processors/automations/mailboxes.d.ts.map +0 -1
  89. package/dist/resources/processors/automations/mailboxes.js +0 -157
  90. package/dist/resources/processors/automations/outlook.d.ts +0 -114
  91. package/dist/resources/processors/automations/outlook.d.ts.map +0 -1
  92. package/dist/resources/processors/automations/outlook.js +0 -170
  93. package/dist/resources/processors/automations/tests.d.ts +0 -58
  94. package/dist/resources/processors/automations/tests.d.ts.map +0 -1
  95. package/dist/resources/processors/automations/tests.js +0 -90
  96. package/dist/resources/processors/index.d.ts +0 -303
  97. package/dist/resources/processors/index.d.ts.map +0 -1
  98. package/dist/resources/processors/index.js +0 -261
  99. package/dist/resources/schemas.d.ts +0 -63
  100. package/dist/resources/schemas.d.ts.map +0 -1
  101. package/dist/resources/schemas.js +0 -183
  102. package/dist/resources/secrets/external_api_keys.d.ts +0 -61
  103. package/dist/resources/secrets/external_api_keys.d.ts.map +0 -1
  104. package/dist/resources/secrets/external_api_keys.js +0 -120
  105. package/dist/resources/secrets/index.d.ts +0 -14
  106. package/dist/resources/secrets/index.d.ts.map +0 -1
  107. package/dist/resources/secrets/index.js +0 -17
  108. package/dist/resources/secrets/webhooks.d.ts +0 -73
  109. package/dist/resources/secrets/webhooks.d.ts.map +0 -1
  110. package/dist/resources/secrets/webhooks.js +0 -145
  111. package/dist/resources/usage.d.ts +0 -223
  112. package/dist/resources/usage.d.ts.map +0 -1
  113. package/dist/resources/usage.js +0 -310
  114. package/dist/types/ai_models.d.ts +0 -389
  115. package/dist/types/ai_models.d.ts.map +0 -1
  116. package/dist/types/ai_models.js +0 -145
  117. package/dist/types/automations/cron.d.ts +0 -28
  118. package/dist/types/automations/cron.d.ts.map +0 -1
  119. package/dist/types/automations/cron.js +0 -1
  120. package/dist/types/automations/endpoints.d.ts +0 -13
  121. package/dist/types/automations/endpoints.d.ts.map +0 -1
  122. package/dist/types/automations/endpoints.js +0 -1
  123. package/dist/types/automations/index.d.ts +0 -7
  124. package/dist/types/automations/index.d.ts.map +0 -1
  125. package/dist/types/automations/index.js +0 -6
  126. package/dist/types/automations/links.d.ts +0 -15
  127. package/dist/types/automations/links.d.ts.map +0 -1
  128. package/dist/types/automations/links.js +0 -1
  129. package/dist/types/automations/mailboxes.d.ts +0 -18
  130. package/dist/types/automations/mailboxes.d.ts.map +0 -1
  131. package/dist/types/automations/mailboxes.js +0 -1
  132. package/dist/types/automations/outlook.d.ts +0 -37
  133. package/dist/types/automations/outlook.d.ts.map +0 -1
  134. package/dist/types/automations/outlook.js +0 -1
  135. package/dist/types/automations/webhooks.d.ts +0 -13
  136. package/dist/types/automations/webhooks.d.ts.map +0 -1
  137. package/dist/types/automations/webhooks.js +0 -1
  138. package/dist/types/browser_canvas.d.ts +0 -4
  139. package/dist/types/browser_canvas.d.ts.map +0 -1
  140. package/dist/types/browser_canvas.js +0 -2
  141. package/dist/types/chat.d.ts +0 -99
  142. package/dist/types/chat.d.ts.map +0 -1
  143. package/dist/types/chat.js +0 -20
  144. package/dist/types/consensus.d.ts +0 -10
  145. package/dist/types/consensus.d.ts.map +0 -1
  146. package/dist/types/consensus.js +0 -1
  147. package/dist/types/db/annotations.d.ts +0 -108
  148. package/dist/types/db/annotations.d.ts.map +0 -1
  149. package/dist/types/db/annotations.js +0 -6
  150. package/dist/types/db/files.d.ts +0 -133
  151. package/dist/types/db/files.d.ts.map +0 -1
  152. package/dist/types/db/files.js +0 -5
  153. package/dist/types/documents/extractions.d.ts +0 -1849
  154. package/dist/types/documents/extractions.d.ts.map +0 -1
  155. package/dist/types/documents/extractions.js +0 -211
  156. package/dist/types/documents/processing.d.ts +0 -249
  157. package/dist/types/documents/processing.d.ts.map +0 -1
  158. package/dist/types/documents/processing.js +0 -6
  159. package/dist/types/evaluations/iterations.d.ts +0 -41
  160. package/dist/types/evaluations/iterations.d.ts.map +0 -1
  161. package/dist/types/evaluations/iterations.js +0 -1
  162. package/dist/types/jobs/base.d.ts +0 -162
  163. package/dist/types/jobs/base.d.ts.map +0 -1
  164. package/dist/types/jobs/base.js +0 -6
  165. package/dist/types/jobs/specialized.d.ts +0 -200
  166. package/dist/types/jobs/specialized.d.ts.map +0 -1
  167. package/dist/types/jobs/specialized.js +0 -37
  168. package/dist/types/logs.d.ts +0 -92
  169. package/dist/types/logs.d.ts.map +0 -1
  170. package/dist/types/logs.js +0 -1
  171. package/dist/types/mime.d.ts +0 -426
  172. package/dist/types/mime.d.ts.map +0 -1
  173. package/dist/types/mime.js +0 -48
  174. package/dist/types/modalities.d.ts +0 -31
  175. package/dist/types/modalities.d.ts.map +0 -1
  176. package/dist/types/modalities.js +0 -109
  177. package/dist/types/pagination.d.ts +0 -5
  178. package/dist/types/pagination.d.ts.map +0 -1
  179. package/dist/types/pagination.js +0 -1
  180. package/dist/types/schemas/enhancement.d.ts +0 -250
  181. package/dist/types/schemas/enhancement.d.ts.map +0 -1
  182. package/dist/types/schemas/enhancement.js +0 -6
  183. package/dist/types/schemas/generate.d.ts +0 -160
  184. package/dist/types/schemas/generate.d.ts.map +0 -1
  185. package/dist/types/schemas/generate.js +0 -19
  186. package/dist/types/schemas/object.d.ts +0 -116
  187. package/dist/types/schemas/object.d.ts.map +0 -1
  188. package/dist/types/schemas/object.js +0 -861
  189. package/dist/types/secrets/external_api_keys.d.ts +0 -27
  190. package/dist/types/secrets/external_api_keys.d.ts.map +0 -1
  191. package/dist/types/secrets/external_api_keys.js +0 -11
  192. package/dist/types/secrets/index.d.ts +0 -2
  193. package/dist/types/secrets/index.d.ts.map +0 -1
  194. package/dist/types/secrets/index.js +0 -1
  195. package/dist/types/standards.d.ts +0 -37
  196. package/dist/types/standards.d.ts.map +0 -1
  197. package/dist/types/standards.js +0 -1
  198. package/dist/utils/ai_models.d.ts +0 -10
  199. package/dist/utils/ai_models.d.ts.map +0 -1
  200. package/dist/utils/ai_models.js +0 -183
  201. package/dist/utils/batch_processing.d.ts +0 -227
  202. package/dist/utils/batch_processing.d.ts.map +0 -1
  203. package/dist/utils/batch_processing.js +0 -268
  204. package/dist/utils/benchmarking.d.ts +0 -115
  205. package/dist/utils/benchmarking.d.ts.map +0 -1
  206. package/dist/utils/benchmarking.js +0 -355
  207. package/dist/utils/chat.d.ts +0 -70
  208. package/dist/utils/chat.d.ts.map +0 -1
  209. package/dist/utils/chat.js +0 -79
  210. package/dist/utils/cost_calculation.d.ts +0 -26
  211. package/dist/utils/cost_calculation.d.ts.map +0 -1
  212. package/dist/utils/cost_calculation.js +0 -99
  213. package/dist/utils/datasets.d.ts +0 -135
  214. package/dist/utils/datasets.d.ts.map +0 -1
  215. package/dist/utils/datasets.js +0 -359
  216. package/dist/utils/display.d.ts +0 -108
  217. package/dist/utils/display.d.ts.map +0 -1
  218. package/dist/utils/display.js +0 -244
  219. package/dist/utils/hash.d.ts +0 -18
  220. package/dist/utils/hash.d.ts.map +0 -1
  221. package/dist/utils/hash.js +0 -31
  222. package/dist/utils/hashing.d.ts +0 -18
  223. package/dist/utils/hashing.d.ts.map +0 -1
  224. package/dist/utils/hashing.js +0 -28
  225. package/dist/utils/index.d.ts +0 -8
  226. package/dist/utils/index.d.ts.map +0 -1
  227. package/dist/utils/index.js +0 -10
  228. package/dist/utils/json_schema.d.ts +0 -18
  229. package/dist/utils/json_schema.d.ts.map +0 -1
  230. package/dist/utils/json_schema.js +0 -334
  231. package/dist/utils/json_schema_utils.d.ts +0 -42
  232. package/dist/utils/json_schema_utils.d.ts.map +0 -1
  233. package/dist/utils/json_schema_utils.js +0 -212
  234. package/dist/utils/jsonl.d.ts +0 -60
  235. package/dist/utils/jsonl.d.ts.map +0 -1
  236. package/dist/utils/jsonl.js +0 -259
  237. package/dist/utils/mime.d.ts +0 -6
  238. package/dist/utils/mime.d.ts.map +0 -1
  239. package/dist/utils/mime.js +0 -129
  240. package/dist/utils/model_cards.d.ts +0 -219
  241. package/dist/utils/model_cards.d.ts.map +0 -1
  242. package/dist/utils/model_cards.js +0 -462
  243. package/dist/utils/prompt_optimization.d.ts +0 -96
  244. package/dist/utils/prompt_optimization.d.ts.map +0 -1
  245. package/dist/utils/prompt_optimization.js +0 -275
  246. package/dist/utils/responses.d.ts +0 -35
  247. package/dist/utils/responses.d.ts.map +0 -1
  248. package/dist/utils/responses.js +0 -37
  249. package/dist/utils/stream.d.ts +0 -13
  250. package/dist/utils/stream.d.ts.map +0 -1
  251. package/dist/utils/stream.js +0 -64
  252. package/dist/utils/stream_context_managers.d.ts +0 -147
  253. package/dist/utils/stream_context_managers.d.ts.map +0 -1
  254. package/dist/utils/stream_context_managers.js +0 -380
  255. package/dist/utils/usage.d.ts +0 -57
  256. package/dist/utils/usage.d.ts.map +0 -1
  257. package/dist/utils/usage.js +0 -97
  258. package/dist/utils/webhook_secrets.d.ts +0 -59
  259. package/dist/utils/webhook_secrets.d.ts.map +0 -1
  260. package/dist/utils/webhook_secrets.js +0 -107
  261. package/dist/utils/zod_to_json_schema.d.ts +0 -11
  262. package/dist/utils/zod_to_json_schema.d.ts.map +0 -1
  263. package/dist/utils/zod_to_json_schema.js +0 -123
@@ -1,355 +0,0 @@
1
- import { readJSONL, writeJSONL } from './jsonl.js';
2
- /**
3
- * Calculate Levenshtein distance between two strings
4
- */
5
- export function levenshteinDistance(str1, str2) {
6
- const matrix = [];
7
- // Initialize matrix
8
- for (let i = 0; i <= str2.length; i++) {
9
- matrix[i] = [i];
10
- }
11
- for (let j = 0; j <= str1.length; j++) {
12
- matrix[0][j] = j;
13
- }
14
- // Fill matrix
15
- for (let i = 1; i <= str2.length; i++) {
16
- for (let j = 1; j <= str1.length; j++) {
17
- if (str2.charAt(i - 1) === str1.charAt(j - 1)) {
18
- matrix[i][j] = matrix[i - 1][j - 1];
19
- }
20
- else {
21
- matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, // substitution
22
- matrix[i][j - 1] + 1, // insertion
23
- matrix[i - 1][j] + 1 // deletion
24
- );
25
- }
26
- }
27
- }
28
- return matrix[str2.length][str1.length];
29
- }
30
- /**
31
- * Calculate Jaccard similarity between two sets
32
- */
33
- export function jaccardSimilarity(set1, set2) {
34
- const intersection = new Set([...set1].filter(x => set2.has(x)));
35
- const union = new Set([...set1, ...set2]);
36
- if (union.size === 0)
37
- return 1.0;
38
- return intersection.size / union.size;
39
- }
40
- /**
41
- * Calculate Hamming distance between two strings
42
- */
43
- export function hammingDistance(str1, str2) {
44
- if (str1.length !== str2.length) {
45
- throw new Error('Strings must be of equal length for Hamming distance');
46
- }
47
- let distance = 0;
48
- for (let i = 0; i < str1.length; i++) {
49
- if (str1[i] !== str2[i]) {
50
- distance++;
51
- }
52
- }
53
- return distance;
54
- }
55
- /**
56
- * Flatten nested object into dot-notation keys
57
- */
58
- export function flattenObject(obj, prefix = '') {
59
- const flattened = {};
60
- for (const key in obj) {
61
- if (obj.hasOwnProperty(key)) {
62
- const newKey = prefix ? `${prefix}.${key}` : key;
63
- const value = obj[key];
64
- if (value !== null && typeof value === 'object' && !Array.isArray(value)) {
65
- Object.assign(flattened, flattenObject(value, newKey));
66
- }
67
- else {
68
- flattened[newKey] = value;
69
- }
70
- }
71
- }
72
- return flattened;
73
- }
74
- /**
75
- * Compute detailed differences between two dictionaries
76
- */
77
- export function computeDictDifference(predicted, groundTruth, path = '') {
78
- const differences = [];
79
- const flatPredicted = flattenObject(predicted);
80
- const flatGroundTruth = flattenObject(groundTruth);
81
- const allKeys = new Set([
82
- ...Object.keys(flatPredicted),
83
- ...Object.keys(flatGroundTruth)
84
- ]);
85
- for (const key of allKeys) {
86
- const fullPath = path ? `${path}.${key}` : key;
87
- const predValue = flatPredicted[key];
88
- const truthValue = flatGroundTruth[key];
89
- if (!(key in flatPredicted)) {
90
- differences.push({
91
- field: key,
92
- predicted: undefined,
93
- groundTruth: truthValue,
94
- differenceType: 'missing',
95
- path: fullPath,
96
- });
97
- }
98
- else if (!(key in flatGroundTruth)) {
99
- differences.push({
100
- field: key,
101
- predicted: predValue,
102
- groundTruth: undefined,
103
- differenceType: 'extra',
104
- path: fullPath,
105
- });
106
- }
107
- else if (predValue !== truthValue) {
108
- const diffType = typeof predValue !== typeof truthValue ? 'type_mismatch' : 'value_mismatch';
109
- differences.push({
110
- field: key,
111
- predicted: predValue,
112
- groundTruth: truthValue,
113
- differenceType: diffType,
114
- path: fullPath,
115
- });
116
- }
117
- }
118
- return differences;
119
- }
120
- /**
121
- * Aggregate dictionary differences across multiple examples
122
- */
123
- export function aggregateDictDifferences(differences) {
124
- const aggregated = {};
125
- // Group differences by field path
126
- for (const diffList of differences) {
127
- for (const diff of diffList) {
128
- if (!aggregated[diff.path]) {
129
- aggregated[diff.path] = [];
130
- }
131
- aggregated[diff.path].push(diff);
132
- }
133
- }
134
- const totalExamples = differences.length;
135
- const result = {};
136
- for (const [path, diffs] of Object.entries(aggregated)) {
137
- result[path] = {
138
- count: diffs.length,
139
- percentage: (diffs.length / totalExamples) * 100,
140
- examples: diffs.slice(0, 5), // Keep first 5 examples
141
- };
142
- }
143
- return result;
144
- }
145
- /**
146
- * Calculate comprehensive evaluation metrics
147
- */
148
- export function calculateMetrics(predictions, groundTruths) {
149
- if (predictions.length !== groundTruths.length) {
150
- throw new Error('Predictions and ground truths must have the same length');
151
- }
152
- const n = predictions.length;
153
- let exactMatches = 0;
154
- let totalLevenshtein = 0;
155
- let totalJaccard = 0;
156
- let totalHamming = 0;
157
- let validHamming = 0;
158
- const fieldAccuracy = {};
159
- const differences = [];
160
- for (let i = 0; i < n; i++) {
161
- const pred = predictions[i];
162
- const truth = groundTruths[i];
163
- // Exact match
164
- if (JSON.stringify(pred) === JSON.stringify(truth)) {
165
- exactMatches++;
166
- }
167
- // String representations for text-based metrics
168
- const predStr = JSON.stringify(pred);
169
- const truthStr = JSON.stringify(truth);
170
- // Levenshtein distance
171
- totalLevenshtein += levenshteinDistance(predStr, truthStr);
172
- // Jaccard similarity (using character sets)
173
- const predSet = new Set(predStr.split(''));
174
- const truthSet = new Set(truthStr.split(''));
175
- totalJaccard += jaccardSimilarity(predSet, truthSet);
176
- // Hamming distance (only for same-length strings)
177
- if (predStr.length === truthStr.length) {
178
- totalHamming += hammingDistance(predStr, truthStr);
179
- validHamming++;
180
- }
181
- // Field-level accuracy
182
- const diff = computeDictDifference(pred, truth);
183
- differences.push(diff);
184
- const flatPred = flattenObject(pred);
185
- const flatTruth = flattenObject(truth);
186
- for (const key of Object.keys(flatTruth)) {
187
- if (!fieldAccuracy[key]) {
188
- fieldAccuracy[key] = { correct: 0, total: 0 };
189
- }
190
- fieldAccuracy[key].total++;
191
- if (flatPred[key] === flatTruth[key]) {
192
- fieldAccuracy[key].correct++;
193
- }
194
- }
195
- }
196
- // Calculate field accuracy percentages
197
- const fieldAccuracyPercentages = {};
198
- for (const [field, stats] of Object.entries(fieldAccuracy)) {
199
- fieldAccuracyPercentages[field] = (stats.correct / stats.total) * 100;
200
- }
201
- // Calculate aggregate differences
202
- const aggregatedDiffs = aggregateDictDifferences(differences);
203
- const completeness = 100 - (Object.keys(aggregatedDiffs).length / Object.keys(fieldAccuracy).length) * 100;
204
- return {
205
- accuracy: (exactMatches / n) * 100,
206
- precision: (exactMatches / n) * 100, // Simplified for exact match scenario
207
- recall: (exactMatches / n) * 100, // Simplified for exact match scenario
208
- f1Score: (exactMatches / n) * 100, // Simplified for exact match scenario
209
- exactMatch: (exactMatches / n) * 100,
210
- levenshteinDistance: totalLevenshtein / n,
211
- jaccardSimilarity: (totalJaccard / n) * 100,
212
- hammingDistance: validHamming > 0 ? totalHamming / validHamming : 0,
213
- fieldAccuracy: fieldAccuracyPercentages,
214
- completeness: Math.max(0, completeness),
215
- errorRate: ((n - exactMatches) / n) * 100,
216
- };
217
- }
218
- /**
219
- * Single file evaluation class
220
- */
221
- export class SingleFileEval {
222
- constructor(filename, predictions, groundTruths) {
223
- this.filename = filename;
224
- this.predictions = predictions;
225
- this.groundTruths = groundTruths;
226
- }
227
- async evaluate() {
228
- const startTime = Date.now();
229
- const metrics = calculateMetrics(this.predictions, this.groundTruths);
230
- const differences = [];
231
- for (let i = 0; i < this.predictions.length; i++) {
232
- const diff = computeDictDifference(this.predictions[i], this.groundTruths[i]);
233
- if (diff.length > 0) {
234
- differences.push({
235
- index: i,
236
- differences: diff,
237
- });
238
- }
239
- }
240
- const executionTime = Date.now() - startTime;
241
- return {
242
- filename: this.filename,
243
- metrics,
244
- predictions: this.predictions,
245
- groundTruths: this.groundTruths,
246
- differences,
247
- executionTime,
248
- };
249
- }
250
- }
251
- /**
252
- * Plot metrics with uncertainty (text-based visualization)
253
- */
254
- export function plotMetricsWithUncertainty(results) {
255
- console.log('\n📊 Model Performance Comparison');
256
- console.log('═'.repeat(60));
257
- const maxModelNameLength = Math.max(...results.map(r => r.model.length));
258
- console.log(`\n${'Model'.padEnd(maxModelNameLength)} | Accuracy | F1 Score | Exec Time`);
259
- console.log('─'.repeat(maxModelNameLength + 35));
260
- for (const result of results) {
261
- const accuracy = result.overallMetrics.accuracy.toFixed(1);
262
- const f1 = result.overallMetrics.f1Score.toFixed(1);
263
- const execTime = `${(result.executionTime / 1000).toFixed(1)}s`;
264
- console.log(`${result.model.padEnd(maxModelNameLength)} | ${accuracy.padStart(6)}% | ${f1.padStart(6)}% | ${execTime.padStart(8)}`);
265
- }
266
- // Show best performing model
267
- const bestModel = results.reduce((best, current) => current.overallMetrics.accuracy > best.overallMetrics.accuracy ? current : best);
268
- console.log(`\n🏆 Best performing model: ${bestModel.model} (${bestModel.overallMetrics.accuracy.toFixed(1)}% accuracy)`);
269
- console.log('═'.repeat(60));
270
- }
271
- /**
272
- * Benchmark multiple models
273
- */
274
- export async function benchmark(models, testDataPath, groundTruthPath, evaluationFunction) {
275
- console.log(`🚀 Starting benchmark of ${models.length} models...`);
276
- // Load test data and ground truth
277
- const testData = await readJSONL(testDataPath);
278
- const groundTruth = await readJSONL(groundTruthPath);
279
- if (testData.length !== groundTruth.length) {
280
- throw new Error('Test data and ground truth must have the same length');
281
- }
282
- const results = [];
283
- for (let i = 0; i < models.length; i++) {
284
- const model = models[i];
285
- console.log(`\n📊 Evaluating model: ${model} (${i + 1}/${models.length})`);
286
- const startTime = Date.now();
287
- try {
288
- // Get predictions from model
289
- const predictions = await evaluationFunction(model, testData);
290
- if (predictions.length !== groundTruth.length) {
291
- throw new Error(`Model ${model} returned ${predictions.length} predictions, expected ${groundTruth.length}`);
292
- }
293
- // Calculate metrics
294
- const metrics = calculateMetrics(predictions, groundTruth);
295
- // Create single file evaluation
296
- const fileEval = new SingleFileEval(testDataPath, predictions, groundTruth);
297
- const fileResult = await fileEval.evaluate();
298
- const executionTime = Date.now() - startTime;
299
- results.push({
300
- model,
301
- overallMetrics: metrics,
302
- fileResults: [fileResult],
303
- aggregateStats: {
304
- meanAccuracy: metrics.accuracy,
305
- stdDevAccuracy: 0, // Would need multiple runs to calculate
306
- meanF1: metrics.f1Score,
307
- stdDevF1: 0,
308
- totalFiles: 1,
309
- totalPredictions: predictions.length,
310
- },
311
- executionTime,
312
- });
313
- console.log(` ✅ Accuracy: ${metrics.accuracy.toFixed(1)}%`);
314
- console.log(` ⏱️ Time: ${(executionTime / 1000).toFixed(1)}s`);
315
- }
316
- catch (error) {
317
- console.error(` ❌ Failed to evaluate ${model}:`, error);
318
- }
319
- }
320
- // Display final comparison
321
- plotMetricsWithUncertainty(results);
322
- return results;
323
- }
324
- /**
325
- * Save benchmark results to file
326
- */
327
- export async function saveBenchmarkResults(results, outputPath) {
328
- const summary = {
329
- timestamp: new Date().toISOString(),
330
- totalModels: results.length,
331
- results: results.map(r => ({
332
- model: r.model,
333
- accuracy: r.overallMetrics.accuracy,
334
- f1Score: r.overallMetrics.f1Score,
335
- executionTime: r.executionTime,
336
- totalPredictions: r.aggregateStats.totalPredictions,
337
- })),
338
- detailed: results,
339
- };
340
- await writeJSONL(outputPath, [summary]);
341
- console.log(`📄 Benchmark results saved to ${outputPath}`);
342
- }
343
- export default {
344
- SingleFileEval,
345
- calculateMetrics,
346
- computeDictDifference,
347
- aggregateDictDifferences,
348
- levenshteinDistance,
349
- jaccardSimilarity,
350
- hammingDistance,
351
- flattenObject,
352
- plotMetricsWithUncertainty,
353
- benchmark,
354
- saveBenchmarkResults,
355
- };
@@ -1,70 +0,0 @@
1
- /**
2
- * Chat message processing utilities
3
- * Equivalent to Python's utils/chat.py
4
- */
5
- export interface ChatMessage {
6
- role: 'system' | 'user' | 'assistant' | 'function' | 'tool';
7
- content: string | null;
8
- name?: string;
9
- function_call?: {
10
- name: string;
11
- arguments: string;
12
- };
13
- tool_calls?: Array<{
14
- id: string;
15
- type: 'function';
16
- function: {
17
- name: string;
18
- arguments: string;
19
- };
20
- }>;
21
- tool_call_id?: string;
22
- }
23
- export interface ChatCompletionRequest {
24
- model: string;
25
- messages: ChatMessage[];
26
- temperature?: number;
27
- max_tokens?: number;
28
- top_p?: number;
29
- frequency_penalty?: number;
30
- presence_penalty?: number;
31
- stop?: string | string[];
32
- stream?: boolean;
33
- functions?: Array<{
34
- name: string;
35
- description?: string;
36
- parameters: Record<string, any>;
37
- }>;
38
- function_call?: 'auto' | 'none' | {
39
- name: string;
40
- };
41
- tools?: Array<{
42
- type: 'function';
43
- function: {
44
- name: string;
45
- description?: string;
46
- parameters: Record<string, any>;
47
- };
48
- }>;
49
- tool_choice?: 'auto' | 'none' | {
50
- type: 'function';
51
- function: {
52
- name: string;
53
- };
54
- };
55
- }
56
- export declare function formatMessagesForProvider(messages: ChatMessage[], provider: 'openai' | 'anthropic' | 'xai' | 'gemini'): any[];
57
- export declare function extractSystemPrompt(messages: ChatMessage[]): {
58
- system: string | null;
59
- filtered: ChatMessage[];
60
- };
61
- export declare function validateMessages(messages: ChatMessage[]): string[];
62
- export declare function countTokensInMessages(messages: ChatMessage[], _model?: string): number;
63
- declare const _default: {
64
- formatMessagesForProvider: typeof formatMessagesForProvider;
65
- extractSystemPrompt: typeof extractSystemPrompt;
66
- validateMessages: typeof validateMessages;
67
- countTokensInMessages: typeof countTokensInMessages;
68
- };
69
- export default _default;
70
- //# sourceMappingURL=chat.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/utils/chat.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,GAAG,MAAM,CAAC;IAC5D,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE;QACd,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,UAAU,CAAC,EAAE,KAAK,CAAC;QACjB,EAAE,EAAE,MAAM,CAAC;QACX,IAAI,EAAE,UAAU,CAAC;QACjB,QAAQ,EAAE;YACR,IAAI,EAAE,MAAM,CAAC;YACb,SAAS,EAAE,MAAM,CAAC;SACnB,CAAC;KACH,CAAC,CAAC;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,qBAAqB;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,IAAI,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,KAAK,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KACjC,CAAC,CAAC;IACH,aAAa,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACnD,KAAK,CAAC,EAAE,KAAK,CAAC;QACZ,IAAI,EAAE,UAAU,CAAC;QACjB,QAAQ,EAAE;YACR,IAAI,EAAE,MAAM,CAAC;YACb,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;SACjC,CAAC;KACH,CAAC,CAAC;IACH,WAAW,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,QAAQ,EAAE;YAAE,IAAI,EAAE,MAAM,CAAA;SAAE,CAAA;KAAE,CAAC;CAClF;AAED,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,WAAW,EAAE,EACvB,QAAQ,EAAE,QAAQ,GAAG,WAAW,GAAG,KAAK,GAAG,QAAQ,GAClD,GAAG,EAAE,CAqBP;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,WAAW,EAAE,GAAG;IAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,QAAQ,EAAE,WAAW,EAAE,CAAA;CAAE,CAW/G;AAED,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,WAAW,EAAE,GAAG,MAAM,EAAE,CAyBlE;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,WAAW,EAAE,EAAE,MAAM,GAAE,MAAsB,GAAG,MAAM,CAwBrG;;;;;;;AAED,wBAKE"}
@@ -1,79 +0,0 @@
1
- /**
2
- * Chat message processing utilities
3
- * Equivalent to Python's utils/chat.py
4
- */
5
- export function formatMessagesForProvider(messages, provider) {
6
- switch (provider) {
7
- case 'openai':
8
- case 'xai':
9
- return messages;
10
- case 'anthropic':
11
- return messages.map(msg => ({
12
- role: msg.role === 'system' ? 'user' : msg.role,
13
- content: msg.content,
14
- }));
15
- case 'gemini':
16
- return messages.map(msg => ({
17
- role: msg.role === 'assistant' ? 'model' : 'user',
18
- parts: [{ text: msg.content }],
19
- }));
20
- default:
21
- return messages;
22
- }
23
- }
24
- export function extractSystemPrompt(messages) {
25
- const systemMessages = messages.filter(msg => msg.role === 'system');
26
- const nonSystemMessages = messages.filter(msg => msg.role !== 'system');
27
- const systemPrompt = systemMessages.length > 0 ?
28
- systemMessages.map(msg => msg.content).join('\n') : null;
29
- return {
30
- system: systemPrompt,
31
- filtered: nonSystemMessages,
32
- };
33
- }
34
- export function validateMessages(messages) {
35
- const errors = [];
36
- if (!Array.isArray(messages) || messages.length === 0) {
37
- errors.push('Messages array is required and cannot be empty');
38
- return errors;
39
- }
40
- for (let i = 0; i < messages.length; i++) {
41
- const msg = messages[i];
42
- if (!msg.role) {
43
- errors.push(`Message at index ${i} is missing role`);
44
- }
45
- if (!['system', 'user', 'assistant', 'function', 'tool'].includes(msg.role)) {
46
- errors.push(`Message at index ${i} has invalid role: ${msg.role}`);
47
- }
48
- if (msg.content === null && !msg.function_call && !msg.tool_calls) {
49
- errors.push(`Message at index ${i} must have content, function_call, or tool_calls`);
50
- }
51
- }
52
- return errors;
53
- }
54
- export function countTokensInMessages(messages, _model = 'gpt-4o-mini') {
55
- // Simplified token counting - in production use tiktoken
56
- let totalTokens = 0;
57
- for (const message of messages) {
58
- // Role tokens
59
- totalTokens += 4; // Base tokens per message
60
- // Content tokens
61
- if (message.content) {
62
- totalTokens += Math.ceil(message.content.length / 4); // ~4 chars per token
63
- }
64
- // Function/tool call tokens
65
- if (message.function_call) {
66
- totalTokens += Math.ceil(JSON.stringify(message.function_call).length / 4);
67
- }
68
- if (message.tool_calls) {
69
- totalTokens += Math.ceil(JSON.stringify(message.tool_calls).length / 4);
70
- }
71
- }
72
- return totalTokens;
73
- }
74
- export default {
75
- formatMessagesForProvider,
76
- extractSystemPrompt,
77
- validateMessages,
78
- countTokensInMessages,
79
- };
@@ -1,26 +0,0 @@
1
- import { Amount } from '../types/ai_models.js';
2
- interface Usage {
3
- prompt_tokens?: number;
4
- completion_tokens?: number;
5
- total_tokens?: number;
6
- cached_tokens?: number;
7
- }
8
- /**
9
- * Compute the cost of a model usage.
10
- */
11
- export declare function computeCostFromModel(model: string, usage: Usage, currency?: string): Amount;
12
- /**
13
- * Compute cost breakdown for detailed analysis.
14
- */
15
- export interface CostBreakdown {
16
- prompt_cost: Amount;
17
- completion_cost: Amount;
18
- cached_cost: Amount;
19
- total_cost: Amount;
20
- prompt_tokens: number;
21
- completion_tokens: number;
22
- cached_tokens: number;
23
- }
24
- export declare function computeCostFromModelWithBreakdown(model: string, usage: Usage, currency?: string): CostBreakdown;
25
- export {};
26
- //# sourceMappingURL=cost_calculation.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"cost_calculation.d.ts","sourceRoot":"","sources":["../../src/utils/cost_calculation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAW,MAAM,uBAAuB,CAAC;AA0CxD,UAAU,KAAK;IACb,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,KAAK,EACZ,QAAQ,GAAE,MAAc,GACvB,MAAM,CAwBR;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,wBAAgB,iCAAiC,CAC/C,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,KAAK,EACZ,QAAQ,GAAE,MAAc,GACvB,aAAa,CAwCf"}
@@ -1,99 +0,0 @@
1
- // Basic pricing data for common models (this would typically come from a config or API)
2
- const MODEL_PRICING = {
3
- 'gpt-4o': {
4
- text: { prompt: 2.5, completion: 10.0, cached_discount: 1.0 },
5
- ft_price_hike: 1.0,
6
- },
7
- 'gpt-4o-mini': {
8
- text: { prompt: 0.15, completion: 0.6, cached_discount: 1.0 },
9
- ft_price_hike: 1.0,
10
- },
11
- 'gpt-4o-2024-11-20': {
12
- text: { prompt: 2.5, completion: 10.0, cached_discount: 1.0 },
13
- ft_price_hike: 1.0,
14
- },
15
- 'gpt-4o-2024-08-06': {
16
- text: { prompt: 2.5, completion: 10.0, cached_discount: 1.0 },
17
- ft_price_hike: 1.0,
18
- },
19
- 'gpt-4o-mini-2024-07-18': {
20
- text: { prompt: 0.15, completion: 0.6, cached_discount: 1.0 },
21
- ft_price_hike: 1.0,
22
- },
23
- 'claude-3-5-sonnet-latest': {
24
- text: { prompt: 3.0, completion: 15.0, cached_discount: 1.0 },
25
- ft_price_hike: 1.0,
26
- },
27
- 'claude-3-5-sonnet-20241022': {
28
- text: { prompt: 3.0, completion: 15.0, cached_discount: 1.0 },
29
- ft_price_hike: 1.0,
30
- },
31
- 'gemini-2.0-flash': {
32
- text: { prompt: 0.075, completion: 0.3, cached_discount: 1.0 },
33
- ft_price_hike: 1.0,
34
- },
35
- 'gemini-2.5-pro': {
36
- text: { prompt: 1.25, completion: 5.0, cached_discount: 1.0 },
37
- ft_price_hike: 1.0,
38
- },
39
- };
40
- /**
41
- * Compute the cost of a model usage.
42
- */
43
- export function computeCostFromModel(model, usage, currency = 'USD') {
44
- const pricing = MODEL_PRICING[model];
45
- if (!pricing) {
46
- // Return zero cost for unknown models
47
- return { value: 0, currency };
48
- }
49
- const promptTokens = usage.prompt_tokens || 0;
50
- const completionTokens = usage.completion_tokens || 0;
51
- const cachedTokens = usage.cached_tokens || 0;
52
- // Calculate costs per 1M tokens
53
- const promptCost = (promptTokens / 1000000) * pricing.text.prompt;
54
- const completionCost = (completionTokens / 1000000) * pricing.text.completion;
55
- // Apply cached discount if applicable
56
- const cachedCost = (cachedTokens / 1000000) * pricing.text.prompt * pricing.text.cached_discount;
57
- const totalCost = promptCost + completionCost + cachedCost;
58
- return {
59
- value: Math.round(totalCost * 100000) / 100000, // Round to 5 decimal places
60
- currency,
61
- };
62
- }
63
- export function computeCostFromModelWithBreakdown(model, usage, currency = 'USD') {
64
- const pricing = MODEL_PRICING[model];
65
- if (!pricing) {
66
- const zeroCost = { value: 0, currency };
67
- return {
68
- prompt_cost: zeroCost,
69
- completion_cost: zeroCost,
70
- cached_cost: zeroCost,
71
- total_cost: zeroCost,
72
- prompt_tokens: usage.prompt_tokens || 0,
73
- completion_tokens: usage.completion_tokens || 0,
74
- cached_tokens: usage.cached_tokens || 0,
75
- };
76
- }
77
- const promptTokens = usage.prompt_tokens || 0;
78
- const completionTokens = usage.completion_tokens || 0;
79
- const cachedTokens = usage.cached_tokens || 0;
80
- const promptCostValue = (promptTokens / 1000000) * pricing.text.prompt;
81
- const completionCostValue = (completionTokens / 1000000) * pricing.text.completion;
82
- const cachedCostValue = (cachedTokens / 1000000) * pricing.text.prompt * pricing.text.cached_discount;
83
- const promptCost = { value: Math.round(promptCostValue * 100000) / 100000, currency };
84
- const completionCost = { value: Math.round(completionCostValue * 100000) / 100000, currency };
85
- const cachedCost = { value: Math.round(cachedCostValue * 100000) / 100000, currency };
86
- const totalCost = {
87
- value: Math.round((promptCostValue + completionCostValue + cachedCostValue) * 100000) / 100000,
88
- currency
89
- };
90
- return {
91
- prompt_cost: promptCost,
92
- completion_cost: completionCost,
93
- cached_cost: cachedCost,
94
- total_cost: totalCost,
95
- prompt_tokens: promptTokens,
96
- completion_tokens: completionTokens,
97
- cached_tokens: cachedTokens,
98
- };
99
- }