@retab/node 0.0.48 → 0.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. package/README.md +8 -215
  2. package/dist/api/client.d.ts +2 -2
  3. package/dist/api/client.d.ts.map +1 -1
  4. package/dist/api/client.js +2 -2
  5. package/dist/api/documents/client.d.ts +3 -3
  6. package/dist/api/documents/client.d.ts.map +1 -1
  7. package/dist/api/documents/client.js +3 -3
  8. package/dist/api/projects/client.d.ts +15 -0
  9. package/dist/api/projects/client.d.ts.map +1 -0
  10. package/dist/api/projects/client.js +43 -0
  11. package/dist/api/projects/documents/client.d.ts +12 -0
  12. package/dist/api/projects/documents/client.d.ts.map +1 -0
  13. package/dist/api/projects/documents/client.js +39 -0
  14. package/dist/api/projects/iterations/client.d.ts +17 -0
  15. package/dist/api/projects/iterations/client.d.ts.map +1 -0
  16. package/dist/api/projects/iterations/client.js +64 -0
  17. package/dist/client.d.ts +1 -0
  18. package/dist/client.d.ts.map +1 -1
  19. package/dist/client.js +6 -1
  20. package/dist/generated_types.d.ts +17837 -40090
  21. package/dist/generated_types.d.ts.map +1 -1
  22. package/dist/generated_types.js +309 -979
  23. package/dist/index.d.ts +7 -2
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +2 -2
  26. package/dist/types.d.ts +188 -80
  27. package/dist/types.d.ts.map +1 -1
  28. package/dist/types.js +22 -1
  29. package/package.json +6 -9
  30. package/dist/api/consensus/client.d.ts +0 -7
  31. package/dist/api/consensus/client.d.ts.map +0 -1
  32. package/dist/api/consensus/client.js +0 -14
  33. package/dist/errors.d.ts +0 -34
  34. package/dist/errors.d.ts.map +0 -1
  35. package/dist/errors.js +0 -53
  36. package/dist/resource.d.ts +0 -12
  37. package/dist/resource.d.ts.map +0 -1
  38. package/dist/resource.js +0 -19
  39. package/dist/resources/consensus/completions.d.ts +0 -66
  40. package/dist/resources/consensus/completions.d.ts.map +0 -1
  41. package/dist/resources/consensus/completions.js +0 -84
  42. package/dist/resources/consensus/index.d.ts +0 -72
  43. package/dist/resources/consensus/index.d.ts.map +0 -1
  44. package/dist/resources/consensus/index.js +0 -76
  45. package/dist/resources/consensus/responses.d.ts +0 -69
  46. package/dist/resources/consensus/responses.d.ts.map +0 -1
  47. package/dist/resources/consensus/responses.js +0 -99
  48. package/dist/resources/documents/extractions.d.ts +0 -74
  49. package/dist/resources/documents/extractions.d.ts.map +0 -1
  50. package/dist/resources/documents/extractions.js +0 -196
  51. package/dist/resources/documents/index.d.ts +0 -21
  52. package/dist/resources/documents/index.d.ts.map +0 -1
  53. package/dist/resources/documents/index.js +0 -55
  54. package/dist/resources/evaluations/documents.d.ts +0 -40
  55. package/dist/resources/evaluations/documents.d.ts.map +0 -1
  56. package/dist/resources/evaluations/documents.js +0 -123
  57. package/dist/resources/evaluations/index.d.ts +0 -14
  58. package/dist/resources/evaluations/index.d.ts.map +0 -1
  59. package/dist/resources/evaluations/index.js +0 -17
  60. package/dist/resources/evaluations/iterations.d.ts +0 -50
  61. package/dist/resources/evaluations/iterations.d.ts.map +0 -1
  62. package/dist/resources/evaluations/iterations.js +0 -156
  63. package/dist/resources/files.d.ts +0 -82
  64. package/dist/resources/files.d.ts.map +0 -1
  65. package/dist/resources/files.js +0 -150
  66. package/dist/resources/finetuning.d.ts +0 -105
  67. package/dist/resources/finetuning.d.ts.map +0 -1
  68. package/dist/resources/finetuning.js +0 -181
  69. package/dist/resources/index.d.ts +0 -11
  70. package/dist/resources/index.d.ts.map +0 -1
  71. package/dist/resources/index.js +0 -10
  72. package/dist/resources/models.d.ts +0 -57
  73. package/dist/resources/models.d.ts.map +0 -1
  74. package/dist/resources/models.js +0 -72
  75. package/dist/resources/processors/automations/endpoints.d.ts +0 -90
  76. package/dist/resources/processors/automations/endpoints.d.ts.map +0 -1
  77. package/dist/resources/processors/automations/endpoints.js +0 -145
  78. package/dist/resources/processors/automations/index.d.ts +0 -7
  79. package/dist/resources/processors/automations/index.d.ts.map +0 -1
  80. package/dist/resources/processors/automations/index.js +0 -6
  81. package/dist/resources/processors/automations/links.d.ts +0 -90
  82. package/dist/resources/processors/automations/links.d.ts.map +0 -1
  83. package/dist/resources/processors/automations/links.js +0 -149
  84. package/dist/resources/processors/automations/logs.d.ts +0 -35
  85. package/dist/resources/processors/automations/logs.d.ts.map +0 -1
  86. package/dist/resources/processors/automations/logs.js +0 -60
  87. package/dist/resources/processors/automations/mailboxes.d.ts +0 -102
  88. package/dist/resources/processors/automations/mailboxes.d.ts.map +0 -1
  89. package/dist/resources/processors/automations/mailboxes.js +0 -157
  90. package/dist/resources/processors/automations/outlook.d.ts +0 -114
  91. package/dist/resources/processors/automations/outlook.d.ts.map +0 -1
  92. package/dist/resources/processors/automations/outlook.js +0 -170
  93. package/dist/resources/processors/automations/tests.d.ts +0 -58
  94. package/dist/resources/processors/automations/tests.d.ts.map +0 -1
  95. package/dist/resources/processors/automations/tests.js +0 -90
  96. package/dist/resources/processors/index.d.ts +0 -303
  97. package/dist/resources/processors/index.d.ts.map +0 -1
  98. package/dist/resources/processors/index.js +0 -261
  99. package/dist/resources/schemas.d.ts +0 -63
  100. package/dist/resources/schemas.d.ts.map +0 -1
  101. package/dist/resources/schemas.js +0 -183
  102. package/dist/resources/secrets/external_api_keys.d.ts +0 -61
  103. package/dist/resources/secrets/external_api_keys.d.ts.map +0 -1
  104. package/dist/resources/secrets/external_api_keys.js +0 -120
  105. package/dist/resources/secrets/index.d.ts +0 -14
  106. package/dist/resources/secrets/index.d.ts.map +0 -1
  107. package/dist/resources/secrets/index.js +0 -17
  108. package/dist/resources/secrets/webhooks.d.ts +0 -73
  109. package/dist/resources/secrets/webhooks.d.ts.map +0 -1
  110. package/dist/resources/secrets/webhooks.js +0 -145
  111. package/dist/resources/usage.d.ts +0 -223
  112. package/dist/resources/usage.d.ts.map +0 -1
  113. package/dist/resources/usage.js +0 -310
  114. package/dist/types/ai_models.d.ts +0 -389
  115. package/dist/types/ai_models.d.ts.map +0 -1
  116. package/dist/types/ai_models.js +0 -145
  117. package/dist/types/automations/cron.d.ts +0 -28
  118. package/dist/types/automations/cron.d.ts.map +0 -1
  119. package/dist/types/automations/cron.js +0 -1
  120. package/dist/types/automations/endpoints.d.ts +0 -13
  121. package/dist/types/automations/endpoints.d.ts.map +0 -1
  122. package/dist/types/automations/endpoints.js +0 -1
  123. package/dist/types/automations/index.d.ts +0 -7
  124. package/dist/types/automations/index.d.ts.map +0 -1
  125. package/dist/types/automations/index.js +0 -6
  126. package/dist/types/automations/links.d.ts +0 -15
  127. package/dist/types/automations/links.d.ts.map +0 -1
  128. package/dist/types/automations/links.js +0 -1
  129. package/dist/types/automations/mailboxes.d.ts +0 -18
  130. package/dist/types/automations/mailboxes.d.ts.map +0 -1
  131. package/dist/types/automations/mailboxes.js +0 -1
  132. package/dist/types/automations/outlook.d.ts +0 -37
  133. package/dist/types/automations/outlook.d.ts.map +0 -1
  134. package/dist/types/automations/outlook.js +0 -1
  135. package/dist/types/automations/webhooks.d.ts +0 -13
  136. package/dist/types/automations/webhooks.d.ts.map +0 -1
  137. package/dist/types/automations/webhooks.js +0 -1
  138. package/dist/types/browser_canvas.d.ts +0 -4
  139. package/dist/types/browser_canvas.d.ts.map +0 -1
  140. package/dist/types/browser_canvas.js +0 -2
  141. package/dist/types/chat.d.ts +0 -99
  142. package/dist/types/chat.d.ts.map +0 -1
  143. package/dist/types/chat.js +0 -20
  144. package/dist/types/consensus.d.ts +0 -10
  145. package/dist/types/consensus.d.ts.map +0 -1
  146. package/dist/types/consensus.js +0 -1
  147. package/dist/types/db/annotations.d.ts +0 -108
  148. package/dist/types/db/annotations.d.ts.map +0 -1
  149. package/dist/types/db/annotations.js +0 -6
  150. package/dist/types/db/files.d.ts +0 -133
  151. package/dist/types/db/files.d.ts.map +0 -1
  152. package/dist/types/db/files.js +0 -5
  153. package/dist/types/documents/extractions.d.ts +0 -1849
  154. package/dist/types/documents/extractions.d.ts.map +0 -1
  155. package/dist/types/documents/extractions.js +0 -211
  156. package/dist/types/documents/processing.d.ts +0 -249
  157. package/dist/types/documents/processing.d.ts.map +0 -1
  158. package/dist/types/documents/processing.js +0 -6
  159. package/dist/types/evaluations/iterations.d.ts +0 -41
  160. package/dist/types/evaluations/iterations.d.ts.map +0 -1
  161. package/dist/types/evaluations/iterations.js +0 -1
  162. package/dist/types/jobs/base.d.ts +0 -162
  163. package/dist/types/jobs/base.d.ts.map +0 -1
  164. package/dist/types/jobs/base.js +0 -6
  165. package/dist/types/jobs/specialized.d.ts +0 -200
  166. package/dist/types/jobs/specialized.d.ts.map +0 -1
  167. package/dist/types/jobs/specialized.js +0 -37
  168. package/dist/types/logs.d.ts +0 -92
  169. package/dist/types/logs.d.ts.map +0 -1
  170. package/dist/types/logs.js +0 -1
  171. package/dist/types/mime.d.ts +0 -426
  172. package/dist/types/mime.d.ts.map +0 -1
  173. package/dist/types/mime.js +0 -48
  174. package/dist/types/modalities.d.ts +0 -31
  175. package/dist/types/modalities.d.ts.map +0 -1
  176. package/dist/types/modalities.js +0 -109
  177. package/dist/types/pagination.d.ts +0 -5
  178. package/dist/types/pagination.d.ts.map +0 -1
  179. package/dist/types/pagination.js +0 -1
  180. package/dist/types/schemas/enhancement.d.ts +0 -250
  181. package/dist/types/schemas/enhancement.d.ts.map +0 -1
  182. package/dist/types/schemas/enhancement.js +0 -6
  183. package/dist/types/schemas/generate.d.ts +0 -160
  184. package/dist/types/schemas/generate.d.ts.map +0 -1
  185. package/dist/types/schemas/generate.js +0 -19
  186. package/dist/types/schemas/object.d.ts +0 -116
  187. package/dist/types/schemas/object.d.ts.map +0 -1
  188. package/dist/types/schemas/object.js +0 -861
  189. package/dist/types/secrets/external_api_keys.d.ts +0 -27
  190. package/dist/types/secrets/external_api_keys.d.ts.map +0 -1
  191. package/dist/types/secrets/external_api_keys.js +0 -11
  192. package/dist/types/secrets/index.d.ts +0 -2
  193. package/dist/types/secrets/index.d.ts.map +0 -1
  194. package/dist/types/secrets/index.js +0 -1
  195. package/dist/types/standards.d.ts +0 -37
  196. package/dist/types/standards.d.ts.map +0 -1
  197. package/dist/types/standards.js +0 -1
  198. package/dist/utils/ai_models.d.ts +0 -10
  199. package/dist/utils/ai_models.d.ts.map +0 -1
  200. package/dist/utils/ai_models.js +0 -183
  201. package/dist/utils/batch_processing.d.ts +0 -227
  202. package/dist/utils/batch_processing.d.ts.map +0 -1
  203. package/dist/utils/batch_processing.js +0 -268
  204. package/dist/utils/benchmarking.d.ts +0 -115
  205. package/dist/utils/benchmarking.d.ts.map +0 -1
  206. package/dist/utils/benchmarking.js +0 -355
  207. package/dist/utils/chat.d.ts +0 -70
  208. package/dist/utils/chat.d.ts.map +0 -1
  209. package/dist/utils/chat.js +0 -79
  210. package/dist/utils/cost_calculation.d.ts +0 -26
  211. package/dist/utils/cost_calculation.d.ts.map +0 -1
  212. package/dist/utils/cost_calculation.js +0 -99
  213. package/dist/utils/datasets.d.ts +0 -135
  214. package/dist/utils/datasets.d.ts.map +0 -1
  215. package/dist/utils/datasets.js +0 -359
  216. package/dist/utils/display.d.ts +0 -108
  217. package/dist/utils/display.d.ts.map +0 -1
  218. package/dist/utils/display.js +0 -244
  219. package/dist/utils/hash.d.ts +0 -18
  220. package/dist/utils/hash.d.ts.map +0 -1
  221. package/dist/utils/hash.js +0 -31
  222. package/dist/utils/hashing.d.ts +0 -18
  223. package/dist/utils/hashing.d.ts.map +0 -1
  224. package/dist/utils/hashing.js +0 -28
  225. package/dist/utils/index.d.ts +0 -8
  226. package/dist/utils/index.d.ts.map +0 -1
  227. package/dist/utils/index.js +0 -10
  228. package/dist/utils/json_schema.d.ts +0 -18
  229. package/dist/utils/json_schema.d.ts.map +0 -1
  230. package/dist/utils/json_schema.js +0 -334
  231. package/dist/utils/json_schema_utils.d.ts +0 -42
  232. package/dist/utils/json_schema_utils.d.ts.map +0 -1
  233. package/dist/utils/json_schema_utils.js +0 -212
  234. package/dist/utils/jsonl.d.ts +0 -60
  235. package/dist/utils/jsonl.d.ts.map +0 -1
  236. package/dist/utils/jsonl.js +0 -259
  237. package/dist/utils/mime.d.ts +0 -6
  238. package/dist/utils/mime.d.ts.map +0 -1
  239. package/dist/utils/mime.js +0 -129
  240. package/dist/utils/model_cards.d.ts +0 -219
  241. package/dist/utils/model_cards.d.ts.map +0 -1
  242. package/dist/utils/model_cards.js +0 -462
  243. package/dist/utils/prompt_optimization.d.ts +0 -96
  244. package/dist/utils/prompt_optimization.d.ts.map +0 -1
  245. package/dist/utils/prompt_optimization.js +0 -275
  246. package/dist/utils/responses.d.ts +0 -35
  247. package/dist/utils/responses.d.ts.map +0 -1
  248. package/dist/utils/responses.js +0 -37
  249. package/dist/utils/stream.d.ts +0 -13
  250. package/dist/utils/stream.d.ts.map +0 -1
  251. package/dist/utils/stream.js +0 -64
  252. package/dist/utils/stream_context_managers.d.ts +0 -147
  253. package/dist/utils/stream_context_managers.d.ts.map +0 -1
  254. package/dist/utils/stream_context_managers.js +0 -380
  255. package/dist/utils/usage.d.ts +0 -57
  256. package/dist/utils/usage.d.ts.map +0 -1
  257. package/dist/utils/usage.js +0 -97
  258. package/dist/utils/webhook_secrets.d.ts +0 -59
  259. package/dist/utils/webhook_secrets.d.ts.map +0 -1
  260. package/dist/utils/webhook_secrets.js +0 -107
  261. package/dist/utils/zod_to_json_schema.d.ts +0 -11
  262. package/dist/utils/zod_to_json_schema.d.ts.map +0 -1
  263. package/dist/utils/zod_to_json_schema.js +0 -123
@@ -1,244 +0,0 @@
1
- import fs from 'fs';
2
- import { readJSONL } from './jsonl.js';
3
- /**
4
- * Count tokens in text using a simple approximation
5
- * In production, you'd want to use tiktoken equivalent for JavaScript
6
- */
7
- export function countTokens(text, _model = 'gpt-4o-mini') {
8
- // Simple approximation: ~4 characters per token for English text
9
- // This is a rough estimate; for production use tiktoken-js or similar
10
- const avgCharsPerToken = 4;
11
- return Math.ceil(text.length / avgCharsPerToken);
12
- }
13
- /**
14
- * Count tokens in content (text + images)
15
- */
16
- export function countContentTokens(content, _model = 'gpt-4o-mini') {
17
- let textTokens = 0;
18
- let imageTokens = 0;
19
- // Check for image references (simplified detection)
20
- const imagePatterns = [
21
- /data:image\/[^;]+;base64,/g,
22
- /!\[.*?\]\(.*?\)/g, // Markdown images
23
- /<img[^>]*>/g, // HTML images
24
- ];
25
- let textContent = content;
26
- // Count and remove image references
27
- for (const pattern of imagePatterns) {
28
- const matches = content.match(pattern);
29
- if (matches) {
30
- // OpenAI vision pricing: roughly 85 tokens per image for low detail
31
- imageTokens += matches.length * 85;
32
- textContent = textContent.replace(pattern, '[IMAGE]');
33
- }
34
- }
35
- // Count text tokens
36
- textTokens = countTokens(textContent, _model);
37
- return {
38
- textTokens,
39
- imageTokens,
40
- totalTokens: textTokens + imageTokens,
41
- };
42
- }
43
- /**
44
- * Calculate statistical metrics for an array of numbers
45
- */
46
- export function calculateStats(values) {
47
- if (values.length === 0) {
48
- return { min: 0, max: 0, mean: 0, median: 0, p95: 0, p99: 0, total: 0 };
49
- }
50
- const sorted = [...values].sort((a, b) => a - b);
51
- const total = values.reduce((sum, val) => sum + val, 0);
52
- const mean = total / values.length;
53
- const getPercentile = (p) => {
54
- const index = Math.ceil((p / 100) * sorted.length) - 1;
55
- return sorted[Math.max(0, index)];
56
- };
57
- return {
58
- min: sorted[0],
59
- max: sorted[sorted.length - 1],
60
- mean: Math.round(mean * 100) / 100,
61
- median: sorted[Math.floor(sorted.length / 2)],
62
- p95: getPercentile(95),
63
- p99: getPercentile(99),
64
- total,
65
- };
66
- }
67
- /**
68
- * Process dataset and compute comprehensive metrics
69
- */
70
- export async function processDatasetAndComputeMetrics(datasetPath, inputTokenPrice = 0.00015, outputTokenPrice = 0.0006, model = 'gpt-4o-mini') {
71
- if (!fs.existsSync(datasetPath)) {
72
- throw new Error(`Dataset file not found: ${datasetPath}`);
73
- }
74
- const dataset = await readJSONL(datasetPath);
75
- const inputTokenCounts = [];
76
- const outputTokenCounts = [];
77
- const totalTokenCounts = [];
78
- let systemMessages = 0;
79
- let userMessages = 0;
80
- let assistantMessages = 0;
81
- let totalMessages = 0;
82
- let systemLengths = [];
83
- let userLengths = [];
84
- let assistantLengths = [];
85
- let imageCount = 0;
86
- let hasImages = false;
87
- for (const example of dataset) {
88
- if (!example.messages || !Array.isArray(example.messages)) {
89
- continue;
90
- }
91
- let exampleInputTokens = 0;
92
- let exampleOutputTokens = 0;
93
- for (const message of example.messages) {
94
- totalMessages++;
95
- const content = message.content || '';
96
- const tokenCount = countContentTokens(content, model);
97
- // Track content lengths
98
- const contentLength = content.length;
99
- switch (message.role) {
100
- case 'system':
101
- systemMessages++;
102
- exampleInputTokens += tokenCount.totalTokens;
103
- systemLengths.push(contentLength);
104
- break;
105
- case 'user':
106
- userMessages++;
107
- exampleInputTokens += tokenCount.totalTokens;
108
- userLengths.push(contentLength);
109
- break;
110
- case 'assistant':
111
- assistantMessages++;
112
- exampleOutputTokens += tokenCount.totalTokens;
113
- assistantLengths.push(contentLength);
114
- break;
115
- }
116
- // Check for images
117
- if (tokenCount.imageTokens > 0) {
118
- hasImages = true;
119
- imageCount += tokenCount.imageTokens / 85; // Rough estimate
120
- }
121
- }
122
- inputTokenCounts.push(exampleInputTokens);
123
- outputTokenCounts.push(exampleOutputTokens);
124
- totalTokenCounts.push(exampleInputTokens + exampleOutputTokens);
125
- }
126
- const inputStats = calculateStats(inputTokenCounts);
127
- const outputStats = calculateStats(outputTokenCounts);
128
- const totalStats = calculateStats(totalTokenCounts);
129
- return {
130
- totalExamples: dataset.length,
131
- inputTokens: inputStats,
132
- outputTokens: outputStats,
133
- totalTokens: totalStats,
134
- estimatedCost: {
135
- input: (inputStats.total * inputTokenPrice) / 1000,
136
- output: (outputStats.total * outputTokenPrice) / 1000,
137
- total: ((inputStats.total * inputTokenPrice) + (outputStats.total * outputTokenPrice)) / 1000,
138
- },
139
- messageStats: {
140
- systemMessages,
141
- userMessages,
142
- assistantMessages,
143
- avgMessagesPerExample: Math.round((totalMessages / dataset.length) * 100) / 100,
144
- },
145
- contentAnalysis: {
146
- avgSystemLength: systemLengths.length > 0 ? Math.round((systemLengths.reduce((a, b) => a + b, 0) / systemLengths.length) * 100) / 100 : 0,
147
- avgUserLength: userLengths.length > 0 ? Math.round((userLengths.reduce((a, b) => a + b, 0) / userLengths.length) * 100) / 100 : 0,
148
- avgAssistantLength: assistantLengths.length > 0 ? Math.round((assistantLengths.reduce((a, b) => a + b, 0) / assistantLengths.length) * 100) / 100 : 0,
149
- hasImages,
150
- imageCount: Math.round(imageCount),
151
- },
152
- };
153
- }
154
- /**
155
- * Display metrics in a formatted table
156
- */
157
- export function displayMetrics(metrics) {
158
- console.log('\n📊 Dataset Analysis Report');
159
- console.log('═'.repeat(50));
160
- // Basic Stats
161
- console.log(`\n📈 Basic Statistics:`);
162
- console.log(` Total Examples: ${metrics.totalExamples.toLocaleString()}`);
163
- console.log(` Avg Messages/Example: ${metrics.messageStats.avgMessagesPerExample}`);
164
- // Message Distribution
165
- console.log(`\n💬 Message Distribution:`);
166
- console.log(` System Messages: ${metrics.messageStats.systemMessages.toLocaleString()}`);
167
- console.log(` User Messages: ${metrics.messageStats.userMessages.toLocaleString()}`);
168
- console.log(` Assistant Messages: ${metrics.messageStats.assistantMessages.toLocaleString()}`);
169
- // Token Statistics
170
- console.log(`\n🔢 Token Statistics:`);
171
- console.log(` Input Tokens:`);
172
- console.log(` Total: ${metrics.inputTokens.total.toLocaleString()}`);
173
- console.log(` Mean: ${metrics.inputTokens.mean.toLocaleString()}`);
174
- console.log(` Median: ${metrics.inputTokens.median.toLocaleString()}`);
175
- console.log(` Min: ${metrics.inputTokens.min.toLocaleString()}`);
176
- console.log(` Max: ${metrics.inputTokens.max.toLocaleString()}`);
177
- console.log(` 95th percentile: ${metrics.inputTokens.p95.toLocaleString()}`);
178
- console.log(` 99th percentile: ${metrics.inputTokens.p99.toLocaleString()}`);
179
- console.log(`\n Output Tokens:`);
180
- console.log(` Total: ${metrics.outputTokens.total.toLocaleString()}`);
181
- console.log(` Mean: ${metrics.outputTokens.mean.toLocaleString()}`);
182
- console.log(` Median: ${metrics.outputTokens.median.toLocaleString()}`);
183
- console.log(` Min: ${metrics.outputTokens.min.toLocaleString()}`);
184
- console.log(` Max: ${metrics.outputTokens.max.toLocaleString()}`);
185
- console.log(` 95th percentile: ${metrics.outputTokens.p95.toLocaleString()}`);
186
- console.log(` 99th percentile: ${metrics.outputTokens.p99.toLocaleString()}`);
187
- // Cost Estimation
188
- console.log(`\n💰 Cost Estimation:`);
189
- console.log(` Input Cost: $${metrics.estimatedCost.input.toFixed(4)}`);
190
- console.log(` Output Cost: $${metrics.estimatedCost.output.toFixed(4)}`);
191
- console.log(` Total Cost: $${metrics.estimatedCost.total.toFixed(4)}`);
192
- // Content Analysis
193
- console.log(`\n📝 Content Analysis:`);
194
- console.log(` Avg System Message Length: ${metrics.contentAnalysis.avgSystemLength.toLocaleString()} chars`);
195
- console.log(` Avg User Message Length: ${metrics.contentAnalysis.avgUserLength.toLocaleString()} chars`);
196
- console.log(` Avg Assistant Message Length: ${metrics.contentAnalysis.avgAssistantLength.toLocaleString()} chars`);
197
- if (metrics.contentAnalysis.hasImages) {
198
- console.log(` Images Detected: ${metrics.contentAnalysis.imageCount.toLocaleString()}`);
199
- }
200
- console.log('\n' + '═'.repeat(50));
201
- }
202
- /**
203
- * Format large numbers with appropriate units
204
- */
205
- export function formatNumber(num) {
206
- if (num >= 1000000) {
207
- return `${(num / 1000000).toFixed(1)}M`;
208
- }
209
- else if (num >= 1000) {
210
- return `${(num / 1000).toFixed(1)}K`;
211
- }
212
- return num.toLocaleString();
213
- }
214
- /**
215
- * Create a simple ASCII progress bar
216
- */
217
- export function createProgressBar(current, total, width = 40) {
218
- const percentage = Math.min(current / total, 1);
219
- const filled = Math.floor(percentage * width);
220
- const empty = width - filled;
221
- return `[${'█'.repeat(filled)}${' '.repeat(empty)}] ${(percentage * 100).toFixed(1)}% (${current}/${total})`;
222
- }
223
- /**
224
- * Display progress with a progress bar
225
- */
226
- export function displayProgress(current, total, message) {
227
- const progressBar = createProgressBar(current, total);
228
- const output = message ? `${message} ${progressBar}` : progressBar;
229
- // Clear line and write progress (works in most terminals)
230
- process.stdout.write(`\r${output}`);
231
- if (current >= total) {
232
- process.stdout.write('\n');
233
- }
234
- }
235
- export default {
236
- processDatasetAndComputeMetrics,
237
- displayMetrics,
238
- countTokens,
239
- countContentTokens,
240
- calculateStats,
241
- formatNumber,
242
- createProgressBar,
243
- displayProgress,
244
- };
@@ -1,18 +0,0 @@
1
- /**
2
- * Generate a BLAKE2b hash from bytes.
3
- * Uses the blake2 package to match Python's hashlib.blake2b with 8-byte digest.
4
- */
5
- export declare function generateBlake2bHashFromBytes(bytes: Buffer): string;
6
- /**
7
- * Generate a BLAKE2b hash from a base64 string.
8
- */
9
- export declare function generateBlake2bHashFromBase64(base64String: string): string;
10
- /**
11
- * Generate a BLAKE2b hash from a UTF-8 string.
12
- */
13
- export declare function generateBlake2bHashFromString(inputString: string): string;
14
- /**
15
- * Generate a BLAKE2b hash from a dictionary/object.
16
- */
17
- export declare function generateBlake2bHashFromDict(inputDict: Record<string, any>): string;
18
- //# sourceMappingURL=hash.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"hash.d.ts","sourceRoot":"","sources":["../../src/utils/hash.ts"],"names":[],"mappings":"AAEA;;;GAGG;AACH,wBAAgB,4BAA4B,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAIlE;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM,CAG1E;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAGzE;AAED;;GAEG;AACH,wBAAgB,2BAA2B,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAGlF"}
@@ -1,31 +0,0 @@
1
- import * as blake2 from 'blake2';
2
- /**
3
- * Generate a BLAKE2b hash from bytes.
4
- * Uses the blake2 package to match Python's hashlib.blake2b with 8-byte digest.
5
- */
6
- export function generateBlake2bHashFromBytes(bytes) {
7
- const hash = blake2.createHash('blake2b', { digestLength: 8 });
8
- hash.update(bytes);
9
- return hash.digest('hex');
10
- }
11
- /**
12
- * Generate a BLAKE2b hash from a base64 string.
13
- */
14
- export function generateBlake2bHashFromBase64(base64String) {
15
- const bytes = Buffer.from(base64String, 'base64');
16
- return generateBlake2bHashFromBytes(bytes);
17
- }
18
- /**
19
- * Generate a BLAKE2b hash from a UTF-8 string.
20
- */
21
- export function generateBlake2bHashFromString(inputString) {
22
- const bytes = Buffer.from(inputString, 'utf-8');
23
- return generateBlake2bHashFromBytes(bytes);
24
- }
25
- /**
26
- * Generate a BLAKE2b hash from a dictionary/object.
27
- */
28
- export function generateBlake2bHashFromDict(inputDict) {
29
- const jsonString = JSON.stringify(inputDict, Object.keys(inputDict).sort());
30
- return generateBlake2bHashFromString(jsonString.trim());
31
- }
@@ -1,18 +0,0 @@
1
- /**
2
- * Cryptographic hashing utilities
3
- * Equivalent to Python's utils/hashing.py
4
- */
5
- export declare function md5(data: string | Buffer): string;
6
- export declare function sha256(data: string | Buffer): string;
7
- export declare function sha512(data: string | Buffer): string;
8
- export declare function hmacSha256(data: string | Buffer, secret: string): string;
9
- export declare function contentHash(content: any): string;
10
- declare const _default: {
11
- md5: typeof md5;
12
- sha256: typeof sha256;
13
- sha512: typeof sha512;
14
- hmacSha256: typeof hmacSha256;
15
- contentHash: typeof contentHash;
16
- };
17
- export default _default;
18
- //# sourceMappingURL=hashing.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"hashing.d.ts","sourceRoot":"","sources":["../../src/utils/hashing.ts"],"names":[],"mappings":"AAEA;;;GAGG;AAEH,wBAAgB,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAEjD;AAED,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAEpD;AAED,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAEpD;AAED,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAExE;AAED,wBAAgB,WAAW,CAAC,OAAO,EAAE,GAAG,GAAG,MAAM,CAGhD;;;;;;;;AAED,wBAME"}
@@ -1,28 +0,0 @@
1
- import { createHash, createHmac } from 'crypto';
2
- /**
3
- * Cryptographic hashing utilities
4
- * Equivalent to Python's utils/hashing.py
5
- */
6
- export function md5(data) {
7
- return createHash('md5').update(data).digest('hex');
8
- }
9
- export function sha256(data) {
10
- return createHash('sha256').update(data).digest('hex');
11
- }
12
- export function sha512(data) {
13
- return createHash('sha512').update(data).digest('hex');
14
- }
15
- export function hmacSha256(data, secret) {
16
- return createHmac('sha256', secret).update(data).digest('hex');
17
- }
18
- export function contentHash(content) {
19
- const normalized = typeof content === 'string' ? content : JSON.stringify(content);
20
- return sha256(normalized);
21
- }
22
- export default {
23
- md5,
24
- sha256,
25
- sha512,
26
- hmacSha256,
27
- contentHash,
28
- };
@@ -1,8 +0,0 @@
1
- export * from './stream.js';
2
- export * from './ai_models.js';
3
- export * from './json_schema_utils.js';
4
- export * from './jsonl.js';
5
- export * from './prompt_optimization.js';
6
- export { default as jsonlUtils } from './jsonl.js';
7
- export { default as promptOptimization } from './prompt_optimization.js';
8
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AACA,cAAc,aAAa,CAAC;AAC5B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,wBAAwB,CAAC;AAGvC,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AAGzC,OAAO,EAAE,OAAO,IAAI,UAAU,EAAE,MAAM,YAAY,CAAC;AACnD,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,0BAA0B,CAAC"}
@@ -1,10 +0,0 @@
1
- // Core utilities
2
- export * from './stream.js';
3
- export * from './ai_models.js';
4
- export * from './json_schema_utils.js';
5
- // New utilities for 100% feature parity
6
- export * from './jsonl.js';
7
- export * from './prompt_optimization.js';
8
- // Re-export commonly used utilities
9
- export { default as jsonlUtils } from './jsonl.js';
10
- export { default as promptOptimization } from './prompt_optimization.js';
@@ -1,18 +0,0 @@
1
- export declare function generateBlake2bHashFromString(input: string): string;
2
- export declare function generateSchemaDataId(jsonSchema: Record<string, any>): string;
3
- export declare function generateSchemaId(jsonSchema: Record<string, any>): string;
4
- export declare function cleanSchema(schema: Record<string, any>, removeCustomFields?: boolean, fieldsToRemove?: string[]): Record<string, any>;
5
- export declare function loadJsonSchema(jsonSchema: Record<string, any> | string): Record<string, any>;
6
- export declare function expandRefs(schema: Record<string, any>): Record<string, any>;
7
- export declare function createReasoningSchema(schema: Record<string, any>): Record<string, any>;
8
- export declare function validateCurrency(currencyCode: any): string | null;
9
- export declare function validateCountryCode(value: any): string | null;
10
- export declare function validateEmailRegex(value: any): string | null;
11
- export declare function validatePhoneNumber(value: any): string | null;
12
- export declare function filterAuxiliaryFieldsJson(jsonData: any): any;
13
- export declare function jsonSchemaToTypescriptInterface(schema: Record<string, any>, addFieldDescription?: boolean): string;
14
- export declare function jsonSchemaToNlpDataStructure(schema: Record<string, any>): string;
15
- export declare function jsonSchemaToStrictOpenaiSchema(schema: Record<string, any>): Record<string, any>;
16
- export declare function unflattenDict(flatDict: Record<string, any>, separator?: string): Record<string, any>;
17
- export declare function schemaToTsType(schema: Record<string, any>, definitions?: Record<string, any>, visited?: Set<string>, depth?: number, maxDepth?: number, addFieldDescription?: boolean): string;
18
- //# sourceMappingURL=json_schema.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"json_schema.d.ts","sourceRoot":"","sources":["../../src/utils/json_schema.ts"],"names":[],"mappings":"AAIA,wBAAgB,6BAA6B,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAGnE;AAGD,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAS5E;AAED,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAIxE;AAGD,wBAAgB,WAAW,CACzB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAC3B,kBAAkB,GAAE,OAAc,EAClC,cAAc,GAAE,MAAM,EAAO,GAC5B,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAiCrB;AAGD,wBAAgB,cAAc,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAQ5F;AAGD,wBAAgB,UAAU,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CA+B3E;AAGD,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAmCtF;AAGD,wBAAgB,gBAAgB,CAAC,YAAY,EAAE,GAAG,GAAG,MAAM,GAAG,IAAI,CAQjE;AAED,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,GAAG,GAAG,MAAM,GAAG,IAAI,CAQ7D;AAED,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,GAAG,GAAG,MAAM,GAAG,IAAI,CAO5D;AAED,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,GAAG,GAAG,MAAM,GAAG,IAAI,CAQ7D;AAGD,wBAAgB,yBAAyB,CAAC,QAAQ,EAAE,GAAG,GAAG,GAAG,CA4B5D;AAGD,wBAAgB,+BAA+B,CAC7C,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAC3B,mBAAmB,GAAE,OAAc,GAClC,MAAM,CAoCR;AAGD,wBAAgB,4BAA4B,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAkChF;AAGD,wBAAgB,8BAA8B,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAqB/F;AAGD,wBAAgB,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,SAAS,GAAE,MAAY,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAmBzG;AAGD,wBAAgB,cAAc,CAC5B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAC3B,WAAW,GAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAM,EACrC,OAAO,GAAE,GAAG,CAAC,MAAM,CAAa,EAChC,KAAK,GAAE,MAAU,EACjB,QAAQ,GAAE,MAAW,EACrB,mBAAmB,GAAE,OAAc,GAClC,MAAM,CAwCR"}