@retab/node 0.0.0-reserved → 0.0.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/README.md +293 -2
  2. package/dist/api/client.d.ts +15 -0
  3. package/dist/api/client.d.ts.map +1 -0
  4. package/dist/api/client.js +16 -0
  5. package/dist/api/consensus/client.d.ts +7 -0
  6. package/dist/api/consensus/client.d.ts.map +1 -0
  7. package/dist/api/consensus/client.js +14 -0
  8. package/dist/api/deployments/client.d.ts +20 -0
  9. package/dist/api/deployments/client.d.ts.map +1 -0
  10. package/dist/api/deployments/client.js +23 -0
  11. package/dist/api/documents/client.d.ts +10 -0
  12. package/dist/api/documents/client.d.ts.map +1 -0
  13. package/dist/api/documents/client.js +35 -0
  14. package/dist/api/models/client.d.ts +17 -0
  15. package/dist/api/models/client.d.ts.map +1 -0
  16. package/dist/api/models/client.js +15 -0
  17. package/dist/api/schemas/client.d.ts +12 -0
  18. package/dist/api/schemas/client.d.ts.map +1 -0
  19. package/dist/api/schemas/client.js +14 -0
  20. package/dist/client.d.ts +50 -0
  21. package/dist/client.d.ts.map +1 -0
  22. package/dist/client.js +135 -0
  23. package/dist/errors.d.ts +34 -0
  24. package/dist/errors.d.ts.map +1 -0
  25. package/dist/errors.js +53 -0
  26. package/dist/generated_types.d.ts +64373 -0
  27. package/dist/generated_types.d.ts.map +1 -0
  28. package/dist/generated_types.js +2267 -0
  29. package/dist/index.d.ts +8 -0
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/index.js +9 -0
  32. package/dist/mime.d.ts +5 -0
  33. package/dist/mime.d.ts.map +1 -0
  34. package/dist/mime.js +66 -0
  35. package/dist/resource.d.ts +12 -0
  36. package/dist/resource.d.ts.map +1 -0
  37. package/dist/resource.js +19 -0
  38. package/dist/resources/consensus/completions.d.ts +66 -0
  39. package/dist/resources/consensus/completions.d.ts.map +1 -0
  40. package/dist/resources/consensus/completions.js +84 -0
  41. package/dist/resources/consensus/index.d.ts +72 -0
  42. package/dist/resources/consensus/index.d.ts.map +1 -0
  43. package/dist/resources/consensus/index.js +76 -0
  44. package/dist/resources/consensus/responses.d.ts +69 -0
  45. package/dist/resources/consensus/responses.d.ts.map +1 -0
  46. package/dist/resources/consensus/responses.js +99 -0
  47. package/dist/resources/documents/extractions.d.ts +74 -0
  48. package/dist/resources/documents/extractions.d.ts.map +1 -0
  49. package/dist/resources/documents/extractions.js +196 -0
  50. package/dist/resources/documents/index.d.ts +21 -0
  51. package/dist/resources/documents/index.d.ts.map +1 -0
  52. package/dist/resources/documents/index.js +55 -0
  53. package/dist/resources/evaluations/documents.d.ts +40 -0
  54. package/dist/resources/evaluations/documents.d.ts.map +1 -0
  55. package/dist/resources/evaluations/documents.js +123 -0
  56. package/dist/resources/evaluations/index.d.ts +14 -0
  57. package/dist/resources/evaluations/index.d.ts.map +1 -0
  58. package/dist/resources/evaluations/index.js +17 -0
  59. package/dist/resources/evaluations/iterations.d.ts +50 -0
  60. package/dist/resources/evaluations/iterations.d.ts.map +1 -0
  61. package/dist/resources/evaluations/iterations.js +156 -0
  62. package/dist/resources/files.d.ts +82 -0
  63. package/dist/resources/files.d.ts.map +1 -0
  64. package/dist/resources/files.js +150 -0
  65. package/dist/resources/finetuning.d.ts +105 -0
  66. package/dist/resources/finetuning.d.ts.map +1 -0
  67. package/dist/resources/finetuning.js +181 -0
  68. package/dist/resources/index.d.ts +11 -0
  69. package/dist/resources/index.d.ts.map +1 -0
  70. package/dist/resources/index.js +10 -0
  71. package/dist/resources/models.d.ts +57 -0
  72. package/dist/resources/models.d.ts.map +1 -0
  73. package/dist/resources/models.js +72 -0
  74. package/dist/resources/processors/automations/endpoints.d.ts +90 -0
  75. package/dist/resources/processors/automations/endpoints.d.ts.map +1 -0
  76. package/dist/resources/processors/automations/endpoints.js +145 -0
  77. package/dist/resources/processors/automations/index.d.ts +7 -0
  78. package/dist/resources/processors/automations/index.d.ts.map +1 -0
  79. package/dist/resources/processors/automations/index.js +6 -0
  80. package/dist/resources/processors/automations/links.d.ts +90 -0
  81. package/dist/resources/processors/automations/links.d.ts.map +1 -0
  82. package/dist/resources/processors/automations/links.js +149 -0
  83. package/dist/resources/processors/automations/logs.d.ts +35 -0
  84. package/dist/resources/processors/automations/logs.d.ts.map +1 -0
  85. package/dist/resources/processors/automations/logs.js +60 -0
  86. package/dist/resources/processors/automations/mailboxes.d.ts +102 -0
  87. package/dist/resources/processors/automations/mailboxes.d.ts.map +1 -0
  88. package/dist/resources/processors/automations/mailboxes.js +157 -0
  89. package/dist/resources/processors/automations/outlook.d.ts +114 -0
  90. package/dist/resources/processors/automations/outlook.d.ts.map +1 -0
  91. package/dist/resources/processors/automations/outlook.js +170 -0
  92. package/dist/resources/processors/automations/tests.d.ts +58 -0
  93. package/dist/resources/processors/automations/tests.d.ts.map +1 -0
  94. package/dist/resources/processors/automations/tests.js +90 -0
  95. package/dist/resources/processors/index.d.ts +303 -0
  96. package/dist/resources/processors/index.d.ts.map +1 -0
  97. package/dist/resources/processors/index.js +261 -0
  98. package/dist/resources/schemas.d.ts +63 -0
  99. package/dist/resources/schemas.d.ts.map +1 -0
  100. package/dist/resources/schemas.js +183 -0
  101. package/dist/resources/secrets/external_api_keys.d.ts +61 -0
  102. package/dist/resources/secrets/external_api_keys.d.ts.map +1 -0
  103. package/dist/resources/secrets/external_api_keys.js +120 -0
  104. package/dist/resources/secrets/index.d.ts +14 -0
  105. package/dist/resources/secrets/index.d.ts.map +1 -0
  106. package/dist/resources/secrets/index.js +17 -0
  107. package/dist/resources/secrets/webhooks.d.ts +73 -0
  108. package/dist/resources/secrets/webhooks.d.ts.map +1 -0
  109. package/dist/resources/secrets/webhooks.js +145 -0
  110. package/dist/resources/usage.d.ts +223 -0
  111. package/dist/resources/usage.d.ts.map +1 -0
  112. package/dist/resources/usage.js +310 -0
  113. package/dist/types/ai_models.d.ts +389 -0
  114. package/dist/types/ai_models.d.ts.map +1 -0
  115. package/dist/types/ai_models.js +145 -0
  116. package/dist/types/automations/cron.d.ts +28 -0
  117. package/dist/types/automations/cron.d.ts.map +1 -0
  118. package/dist/types/automations/cron.js +1 -0
  119. package/dist/types/automations/endpoints.d.ts +13 -0
  120. package/dist/types/automations/endpoints.d.ts.map +1 -0
  121. package/dist/types/automations/endpoints.js +1 -0
  122. package/dist/types/automations/index.d.ts +7 -0
  123. package/dist/types/automations/index.d.ts.map +1 -0
  124. package/dist/types/automations/index.js +6 -0
  125. package/dist/types/automations/links.d.ts +15 -0
  126. package/dist/types/automations/links.d.ts.map +1 -0
  127. package/dist/types/automations/links.js +1 -0
  128. package/dist/types/automations/mailboxes.d.ts +18 -0
  129. package/dist/types/automations/mailboxes.d.ts.map +1 -0
  130. package/dist/types/automations/mailboxes.js +1 -0
  131. package/dist/types/automations/outlook.d.ts +37 -0
  132. package/dist/types/automations/outlook.d.ts.map +1 -0
  133. package/dist/types/automations/outlook.js +1 -0
  134. package/dist/types/automations/webhooks.d.ts +13 -0
  135. package/dist/types/automations/webhooks.d.ts.map +1 -0
  136. package/dist/types/automations/webhooks.js +1 -0
  137. package/dist/types/browser_canvas.d.ts +4 -0
  138. package/dist/types/browser_canvas.d.ts.map +1 -0
  139. package/dist/types/browser_canvas.js +2 -0
  140. package/dist/types/chat.d.ts +99 -0
  141. package/dist/types/chat.d.ts.map +1 -0
  142. package/dist/types/chat.js +20 -0
  143. package/dist/types/consensus.d.ts +10 -0
  144. package/dist/types/consensus.d.ts.map +1 -0
  145. package/dist/types/consensus.js +1 -0
  146. package/dist/types/db/annotations.d.ts +108 -0
  147. package/dist/types/db/annotations.d.ts.map +1 -0
  148. package/dist/types/db/annotations.js +6 -0
  149. package/dist/types/db/files.d.ts +133 -0
  150. package/dist/types/db/files.d.ts.map +1 -0
  151. package/dist/types/db/files.js +5 -0
  152. package/dist/types/documents/extractions.d.ts +1849 -0
  153. package/dist/types/documents/extractions.d.ts.map +1 -0
  154. package/dist/types/documents/extractions.js +211 -0
  155. package/dist/types/documents/processing.d.ts +249 -0
  156. package/dist/types/documents/processing.d.ts.map +1 -0
  157. package/dist/types/documents/processing.js +6 -0
  158. package/dist/types/evaluations/iterations.d.ts +41 -0
  159. package/dist/types/evaluations/iterations.d.ts.map +1 -0
  160. package/dist/types/evaluations/iterations.js +1 -0
  161. package/dist/types/jobs/base.d.ts +162 -0
  162. package/dist/types/jobs/base.d.ts.map +1 -0
  163. package/dist/types/jobs/base.js +6 -0
  164. package/dist/types/jobs/specialized.d.ts +200 -0
  165. package/dist/types/jobs/specialized.d.ts.map +1 -0
  166. package/dist/types/jobs/specialized.js +37 -0
  167. package/dist/types/logs.d.ts +92 -0
  168. package/dist/types/logs.d.ts.map +1 -0
  169. package/dist/types/logs.js +1 -0
  170. package/dist/types/mime.d.ts +426 -0
  171. package/dist/types/mime.d.ts.map +1 -0
  172. package/dist/types/mime.js +48 -0
  173. package/dist/types/modalities.d.ts +31 -0
  174. package/dist/types/modalities.d.ts.map +1 -0
  175. package/dist/types/modalities.js +109 -0
  176. package/dist/types/pagination.d.ts +5 -0
  177. package/dist/types/pagination.d.ts.map +1 -0
  178. package/dist/types/pagination.js +1 -0
  179. package/dist/types/schemas/enhancement.d.ts +250 -0
  180. package/dist/types/schemas/enhancement.d.ts.map +1 -0
  181. package/dist/types/schemas/enhancement.js +6 -0
  182. package/dist/types/schemas/generate.d.ts +160 -0
  183. package/dist/types/schemas/generate.d.ts.map +1 -0
  184. package/dist/types/schemas/generate.js +19 -0
  185. package/dist/types/schemas/object.d.ts +116 -0
  186. package/dist/types/schemas/object.d.ts.map +1 -0
  187. package/dist/types/schemas/object.js +861 -0
  188. package/dist/types/secrets/external_api_keys.d.ts +27 -0
  189. package/dist/types/secrets/external_api_keys.d.ts.map +1 -0
  190. package/dist/types/secrets/external_api_keys.js +11 -0
  191. package/dist/types/secrets/index.d.ts +2 -0
  192. package/dist/types/secrets/index.d.ts.map +1 -0
  193. package/dist/types/secrets/index.js +1 -0
  194. package/dist/types/standards.d.ts +37 -0
  195. package/dist/types/standards.d.ts.map +1 -0
  196. package/dist/types/standards.js +1 -0
  197. package/dist/types.d.ts +276 -0
  198. package/dist/types.d.ts.map +1 -0
  199. package/dist/types.js +85 -0
  200. package/dist/utils/ai_models.d.ts +10 -0
  201. package/dist/utils/ai_models.d.ts.map +1 -0
  202. package/dist/utils/ai_models.js +183 -0
  203. package/dist/utils/batch_processing.d.ts +227 -0
  204. package/dist/utils/batch_processing.d.ts.map +1 -0
  205. package/dist/utils/batch_processing.js +268 -0
  206. package/dist/utils/benchmarking.d.ts +115 -0
  207. package/dist/utils/benchmarking.d.ts.map +1 -0
  208. package/dist/utils/benchmarking.js +355 -0
  209. package/dist/utils/chat.d.ts +70 -0
  210. package/dist/utils/chat.d.ts.map +1 -0
  211. package/dist/utils/chat.js +79 -0
  212. package/dist/utils/cost_calculation.d.ts +26 -0
  213. package/dist/utils/cost_calculation.d.ts.map +1 -0
  214. package/dist/utils/cost_calculation.js +99 -0
  215. package/dist/utils/datasets.d.ts +135 -0
  216. package/dist/utils/datasets.d.ts.map +1 -0
  217. package/dist/utils/datasets.js +359 -0
  218. package/dist/utils/display.d.ts +108 -0
  219. package/dist/utils/display.d.ts.map +1 -0
  220. package/dist/utils/display.js +244 -0
  221. package/dist/utils/hash.d.ts +18 -0
  222. package/dist/utils/hash.d.ts.map +1 -0
  223. package/dist/utils/hash.js +31 -0
  224. package/dist/utils/hashing.d.ts +18 -0
  225. package/dist/utils/hashing.d.ts.map +1 -0
  226. package/dist/utils/hashing.js +28 -0
  227. package/dist/utils/index.d.ts +8 -0
  228. package/dist/utils/index.d.ts.map +1 -0
  229. package/dist/utils/index.js +10 -0
  230. package/dist/utils/json_schema.d.ts +18 -0
  231. package/dist/utils/json_schema.d.ts.map +1 -0
  232. package/dist/utils/json_schema.js +334 -0
  233. package/dist/utils/json_schema_utils.d.ts +42 -0
  234. package/dist/utils/json_schema_utils.d.ts.map +1 -0
  235. package/dist/utils/json_schema_utils.js +212 -0
  236. package/dist/utils/jsonl.d.ts +60 -0
  237. package/dist/utils/jsonl.d.ts.map +1 -0
  238. package/dist/utils/jsonl.js +259 -0
  239. package/dist/utils/mime.d.ts +6 -0
  240. package/dist/utils/mime.d.ts.map +1 -0
  241. package/dist/utils/mime.js +129 -0
  242. package/dist/utils/model_cards.d.ts +219 -0
  243. package/dist/utils/model_cards.d.ts.map +1 -0
  244. package/dist/utils/model_cards.js +462 -0
  245. package/dist/utils/prompt_optimization.d.ts +96 -0
  246. package/dist/utils/prompt_optimization.d.ts.map +1 -0
  247. package/dist/utils/prompt_optimization.js +275 -0
  248. package/dist/utils/responses.d.ts +35 -0
  249. package/dist/utils/responses.d.ts.map +1 -0
  250. package/dist/utils/responses.js +37 -0
  251. package/dist/utils/stream.d.ts +13 -0
  252. package/dist/utils/stream.d.ts.map +1 -0
  253. package/dist/utils/stream.js +64 -0
  254. package/dist/utils/stream_context_managers.d.ts +147 -0
  255. package/dist/utils/stream_context_managers.d.ts.map +1 -0
  256. package/dist/utils/stream_context_managers.js +380 -0
  257. package/dist/utils/usage.d.ts +57 -0
  258. package/dist/utils/usage.d.ts.map +1 -0
  259. package/dist/utils/usage.js +97 -0
  260. package/dist/utils/webhook_secrets.d.ts +59 -0
  261. package/dist/utils/webhook_secrets.d.ts.map +1 -0
  262. package/dist/utils/webhook_secrets.js +107 -0
  263. package/dist/utils/zod_to_json_schema.d.ts +11 -0
  264. package/dist/utils/zod_to_json_schema.d.ts.map +1 -0
  265. package/dist/utils/zod_to_json_schema.js +123 -0
  266. package/dist/utils.d.ts +19 -0
  267. package/dist/utils.d.ts.map +1 -0
  268. package/dist/utils.js +1 -0
  269. package/package.json +62 -6
  270. package/index.js +0 -7
@@ -0,0 +1,183 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import yaml from 'js-yaml';
4
+ import { fileURLToPath } from 'url';
5
+ import { ModelCardSchema } from '../types/ai_models.js';
6
+ const __filename = fileURLToPath(import.meta.url);
7
+ const __dirname = path.dirname(__filename);
8
+ const MODEL_CARDS_DIR = path.join(__dirname, '_model_cards');
9
+ function mergeModelCards(base, override) {
10
+ const result = { ...base };
11
+ for (const [key, value] of Object.entries(override)) {
12
+ if (key === 'inherits') {
13
+ continue;
14
+ }
15
+ if (typeof value === 'object' && value !== null && key in result && typeof result[key] === 'object') {
16
+ result[key] = mergeModelCards(result[key], value);
17
+ }
18
+ else {
19
+ result[key] = value;
20
+ }
21
+ }
22
+ return result;
23
+ }
24
+ function loadModelCards(yamlFile) {
25
+ const yamlContent = fs.readFileSync(yamlFile, 'utf-8');
26
+ const rawCards = yaml.load(yamlContent);
27
+ const nameToCard = {};
28
+ // First pass: collect base cards
29
+ for (const card of rawCards) {
30
+ if (!('inherits' in card)) {
31
+ nameToCard[card.model] = card;
32
+ }
33
+ }
34
+ const finalCards = [];
35
+ for (const card of rawCards) {
36
+ if ('inherits' in card) {
37
+ const parent = nameToCard[card.inherits];
38
+ const merged = mergeModelCards(parent, card);
39
+ finalCards.push(ModelCardSchema.parse(merged));
40
+ }
41
+ else {
42
+ finalCards.push(ModelCardSchema.parse(card));
43
+ }
44
+ }
45
+ return finalCards;
46
+ }
47
+ // Create model cards directory structure if it doesn't exist
48
+ if (!fs.existsSync(MODEL_CARDS_DIR)) {
49
+ fs.mkdirSync(MODEL_CARDS_DIR, { recursive: true });
50
+ // Create basic model card files
51
+ const openaiCards = [
52
+ {
53
+ model: 'gpt-4o',
54
+ pricing: {
55
+ text: { prompt: 2.5, completion: 10.0 },
56
+ },
57
+ capabilities: {
58
+ modalities: ['text', 'image'],
59
+ endpoints: ['chat_completions'],
60
+ features: ['streaming', 'function_calling', 'structured_outputs'],
61
+ },
62
+ },
63
+ {
64
+ model: 'gpt-4o-mini',
65
+ pricing: {
66
+ text: { prompt: 0.15, completion: 0.6 },
67
+ },
68
+ capabilities: {
69
+ modalities: ['text', 'image'],
70
+ endpoints: ['chat_completions'],
71
+ features: ['streaming', 'function_calling', 'structured_outputs'],
72
+ },
73
+ },
74
+ ];
75
+ fs.writeFileSync(path.join(MODEL_CARDS_DIR, 'openai.yaml'), yaml.dump(openaiCards));
76
+ const anthropicCards = [
77
+ {
78
+ model: 'claude-3-5-sonnet-latest',
79
+ pricing: {
80
+ text: { prompt: 3.0, completion: 15.0 },
81
+ },
82
+ capabilities: {
83
+ modalities: ['text', 'image'],
84
+ endpoints: ['chat_completions'],
85
+ features: ['streaming'],
86
+ },
87
+ },
88
+ ];
89
+ fs.writeFileSync(path.join(MODEL_CARDS_DIR, 'anthropic.yaml'), yaml.dump(anthropicCards));
90
+ // Create empty files for other providers
91
+ fs.writeFileSync(path.join(MODEL_CARDS_DIR, 'xai.yaml'), yaml.dump([]));
92
+ fs.writeFileSync(path.join(MODEL_CARDS_DIR, 'gemini.yaml'), yaml.dump([]));
93
+ fs.writeFileSync(path.join(MODEL_CARDS_DIR, 'auto.yaml'), yaml.dump([]));
94
+ }
95
+ // Load all model cards
96
+ let modelCards = [];
97
+ const modelCardsDict = {};
98
+ try {
99
+ const cardFiles = ['openai.yaml', 'anthropic.yaml', 'xai.yaml', 'gemini.yaml', 'auto.yaml'];
100
+ for (const file of cardFiles) {
101
+ const filePath = path.join(MODEL_CARDS_DIR, file);
102
+ if (fs.existsSync(filePath)) {
103
+ modelCards = [...modelCards, ...loadModelCards(filePath)];
104
+ }
105
+ }
106
+ for (const card of modelCards) {
107
+ modelCardsDict[card.model] = card;
108
+ }
109
+ }
110
+ catch (error) {
111
+ console.warn('Failed to load model cards:', error);
112
+ }
113
+ export function getModelFromModelId(modelId) {
114
+ if (modelId.startsWith('ft:')) {
115
+ const parts = modelId.split(':');
116
+ return parts[1];
117
+ }
118
+ return modelId;
119
+ }
120
+ export function getModelCard(model) {
121
+ const modelName = getModelFromModelId(model);
122
+ if (modelName in modelCardsDict) {
123
+ const modelCard = ModelCardSchema.parse({ ...modelCardsDict[modelName] });
124
+ if (modelName !== model) {
125
+ // Fine-tuned model -> Change the name
126
+ modelCard.model = model;
127
+ // Remove the fine-tuning feature (if exists)
128
+ const features = modelCard.capabilities.features;
129
+ const index = features.indexOf('fine_tuning');
130
+ if (index > -1) {
131
+ features.splice(index, 1);
132
+ }
133
+ }
134
+ return modelCard;
135
+ }
136
+ throw new Error(`No model card found for model: ${modelName}`);
137
+ }
138
+ export function getProviderForModel(modelId) {
139
+ const modelName = getModelFromModelId(modelId);
140
+ // Check OpenAI models
141
+ if (modelName.startsWith('gpt-') || modelName.startsWith('o1') || modelName.startsWith('o3') || modelName.startsWith('o4')) {
142
+ return 'OpenAI';
143
+ }
144
+ // Check Anthropic models
145
+ if (modelName.startsWith('claude-')) {
146
+ return 'Anthropic';
147
+ }
148
+ // Check xAI models
149
+ if (modelName.startsWith('grok-')) {
150
+ return 'xAI';
151
+ }
152
+ // Check Gemini models
153
+ if (modelName.startsWith('gemini-')) {
154
+ return 'Gemini';
155
+ }
156
+ // Check Retab models
157
+ if (modelName.startsWith('auto-')) {
158
+ return 'Retab';
159
+ }
160
+ throw new Error(`Unknown provider for model: ${modelName}`);
161
+ }
162
+ export function assertValidModelExtraction(model) {
163
+ if (!model || typeof model !== 'string') {
164
+ throw new Error('Valid model must be provided for extraction');
165
+ }
166
+ // Additional validation logic can be added here
167
+ try {
168
+ getProviderForModel(model);
169
+ }
170
+ catch (error) {
171
+ throw new Error(`Invalid model for extraction: ${model}`);
172
+ }
173
+ }
174
+ export function assertValidModelSchemaGeneration(model) {
175
+ if (!model || typeof model !== 'string') {
176
+ throw new Error('Valid model must be provided for schema generation');
177
+ }
178
+ const validModels = ['gpt-4o-2024-11-20', 'gpt-4o-mini', 'gpt-4o'];
179
+ if (!validModels.includes(model)) {
180
+ throw new Error(`Model ${model} not valid for schema generation`);
181
+ }
182
+ }
183
+ export { modelCards, modelCardsDict };
@@ -0,0 +1,227 @@
1
+ /**
2
+ * Batch processing utilities for OpenAI Batch API and other providers
3
+ * Equivalent to Python's batch processing functionality
4
+ */
5
+ export interface BatchRequest {
6
+ custom_id: string;
7
+ method: 'POST' | 'GET' | 'PUT' | 'DELETE';
8
+ url: string;
9
+ body?: Record<string, any>;
10
+ headers?: Record<string, string>;
11
+ }
12
+ export interface BatchResponse {
13
+ id: string;
14
+ custom_id: string;
15
+ response: {
16
+ status_code: number;
17
+ request_id: string;
18
+ body: Record<string, any>;
19
+ };
20
+ error?: {
21
+ code: string;
22
+ message: string;
23
+ };
24
+ }
25
+ export interface BatchJob {
26
+ id: string;
27
+ object: 'batch';
28
+ endpoint: string;
29
+ errors?: {
30
+ object: 'list';
31
+ data: Array<{
32
+ code: string;
33
+ message: string;
34
+ param?: string;
35
+ line?: number;
36
+ }>;
37
+ };
38
+ input_file_id: string;
39
+ completion_window: '24h';
40
+ status: 'validating' | 'failed' | 'in_progress' | 'finalizing' | 'completed' | 'expired' | 'cancelling' | 'cancelled';
41
+ output_file_id?: string;
42
+ error_file_id?: string;
43
+ created_at: number;
44
+ in_progress_at?: number;
45
+ expires_at?: number;
46
+ finalizing_at?: number;
47
+ completed_at?: number;
48
+ failed_at?: number;
49
+ expired_at?: number;
50
+ cancelling_at?: number;
51
+ cancelled_at?: number;
52
+ request_counts: {
53
+ total: number;
54
+ completed: number;
55
+ failed: number;
56
+ };
57
+ metadata?: Record<string, string>;
58
+ }
59
+ export interface BatchProcessingOptions {
60
+ apiKey: string;
61
+ baseUrl?: string;
62
+ timeout?: number;
63
+ maxRetries?: number;
64
+ completionWindow?: '24h';
65
+ metadata?: Record<string, string>;
66
+ }
67
+ export interface BatchProgressInfo {
68
+ jobId: string;
69
+ status: string;
70
+ progress: {
71
+ total: number;
72
+ completed: number;
73
+ failed: number;
74
+ percentage: number;
75
+ };
76
+ timeElapsed: number;
77
+ estimatedTimeRemaining?: number;
78
+ }
79
+ /**
80
+ * OpenAI Batch API client
81
+ */
82
+ export declare class OpenAIBatchProcessor {
83
+ private apiKey;
84
+ private baseUrl;
85
+ private timeout;
86
+ constructor(options: BatchProcessingOptions);
87
+ /**
88
+ * Upload file for batch processing
89
+ */
90
+ uploadFile(filePath: string, purpose?: 'batch'): Promise<{
91
+ id: string;
92
+ filename: string;
93
+ }>;
94
+ /**
95
+ * Create batch job
96
+ */
97
+ createBatch(inputFileId: string, endpoint: string, completionWindow?: '24h', metadata?: Record<string, string>): Promise<BatchJob>;
98
+ /**
99
+ * Get batch job status
100
+ */
101
+ getBatch(batchId: string): Promise<BatchJob>;
102
+ /**
103
+ * Cancel batch job
104
+ */
105
+ cancelBatch(batchId: string): Promise<BatchJob>;
106
+ /**
107
+ * List batch jobs
108
+ */
109
+ listBatches(after?: string, limit?: number): Promise<{
110
+ data: BatchJob[];
111
+ }>;
112
+ /**
113
+ * Download file content
114
+ */
115
+ downloadFile(fileId: string): Promise<string>;
116
+ /**
117
+ * Monitor batch job progress
118
+ */
119
+ monitorBatch(batchId: string, pollInterval?: number): AsyncGenerator<BatchProgressInfo, void, unknown>;
120
+ /**
121
+ * Process batch end-to-end
122
+ */
123
+ processBatch(requestsFilePath: string, outputFilePath: string, endpoint?: string, options?: {
124
+ completionWindow?: '24h';
125
+ metadata?: Record<string, string>;
126
+ pollInterval?: number;
127
+ showProgress?: boolean;
128
+ }): Promise<BatchJob>;
129
+ }
130
+ /**
131
+ * Utility functions for batch processing
132
+ */
133
+ export declare const batchUtils: {
134
+ /**
135
+ * Create batch requests for chat completions
136
+ */
137
+ createChatCompletionRequests: (messages: Array<{
138
+ messages: any[];
139
+ customId?: string;
140
+ }>, model?: string, options?: {
141
+ temperature?: number;
142
+ maxTokens?: number;
143
+ responseFormat?: {
144
+ type: string;
145
+ };
146
+ }) => BatchRequest[];
147
+ /**
148
+ * Create batch requests for document extraction
149
+ */
150
+ createExtractionRequests: (documents: Array<{
151
+ document: any;
152
+ schema: any;
153
+ customId?: string;
154
+ }>, model?: string) => BatchRequest[];
155
+ /**
156
+ * Save batch requests to JSONL file
157
+ */
158
+ saveBatchRequests: (requests: BatchRequest[], filePath: string) => Promise<void>;
159
+ /**
160
+ * Parse batch results from JSONL file
161
+ */
162
+ parseBatchResults: (filePath: string) => Promise<BatchResponse[]>;
163
+ /**
164
+ * Extract successful results from batch responses
165
+ */
166
+ extractSuccessfulResults: (responses: BatchResponse[]) => Array<{
167
+ customId: string;
168
+ result: any;
169
+ }>;
170
+ /**
171
+ * Extract failed results from batch responses
172
+ */
173
+ extractFailedResults: (responses: BatchResponse[]) => Array<{
174
+ customId: string;
175
+ error: any;
176
+ }>;
177
+ };
178
+ declare const _default: {
179
+ OpenAIBatchProcessor: typeof OpenAIBatchProcessor;
180
+ batchUtils: {
181
+ /**
182
+ * Create batch requests for chat completions
183
+ */
184
+ createChatCompletionRequests: (messages: Array<{
185
+ messages: any[];
186
+ customId?: string;
187
+ }>, model?: string, options?: {
188
+ temperature?: number;
189
+ maxTokens?: number;
190
+ responseFormat?: {
191
+ type: string;
192
+ };
193
+ }) => BatchRequest[];
194
+ /**
195
+ * Create batch requests for document extraction
196
+ */
197
+ createExtractionRequests: (documents: Array<{
198
+ document: any;
199
+ schema: any;
200
+ customId?: string;
201
+ }>, model?: string) => BatchRequest[];
202
+ /**
203
+ * Save batch requests to JSONL file
204
+ */
205
+ saveBatchRequests: (requests: BatchRequest[], filePath: string) => Promise<void>;
206
+ /**
207
+ * Parse batch results from JSONL file
208
+ */
209
+ parseBatchResults: (filePath: string) => Promise<BatchResponse[]>;
210
+ /**
211
+ * Extract successful results from batch responses
212
+ */
213
+ extractSuccessfulResults: (responses: BatchResponse[]) => Array<{
214
+ customId: string;
215
+ result: any;
216
+ }>;
217
+ /**
218
+ * Extract failed results from batch responses
219
+ */
220
+ extractFailedResults: (responses: BatchResponse[]) => Array<{
221
+ customId: string;
222
+ error: any;
223
+ }>;
224
+ };
225
+ };
226
+ export default _default;
227
+ //# sourceMappingURL=batch_processing.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"batch_processing.d.ts","sourceRoot":"","sources":["../../src/utils/batch_processing.ts"],"names":[],"mappings":"AAIA;;;GAGG;AAEH,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,GAAG,KAAK,GAAG,KAAK,GAAG,QAAQ,CAAC;IAC1C,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE;QACR,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KAC3B,CAAC;IACF,KAAK,CAAC,EAAE;QACN,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,OAAO,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE;QACP,MAAM,EAAE,MAAM,CAAC;QACf,IAAI,EAAE,KAAK,CAAC;YACV,IAAI,EAAE,MAAM,CAAC;YACb,OAAO,EAAE,MAAM,CAAC;YAChB,KAAK,CAAC,EAAE,MAAM,CAAC;YACf,IAAI,CAAC,EAAE,MAAM,CAAC;SACf,CAAC,CAAC;KACJ,CAAC;IACF,aAAa,EAAE,MAAM,CAAC;IACtB,iBAAiB,EAAE,KAAK,CAAC;IACzB,MAAM,EAAE,YAAY,GAAG,QAAQ,GAAG,aAAa,GAAG,YAAY,GAAG,WAAW,GAAG,SAAS,GAAG,YAAY,GAAG,WAAW,CAAC;IACtH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE;QACd,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;IACF,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,CAAC,EAAE,KAAK,CAAC;IACzB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE;QACR,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,CAAC,EAAE,MAAM,CAAC;CACjC;AAED;;GAEG;AACH,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,OAAO,CAAS;gBACZ,OAAO,EAAE,sBAAsB;IAM3C;;OAEG;IACG,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,GAAE,OAAiB,GAAG,OAAO,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IA0BzG;;OAEG;IACG,WAAW,CACf,WAAW,EAAE,MAAM,EACnB,QAAQ,EAAE,MAAM,EAChB,gBAAgB,GAAE,KAAa,EAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAChC,OAAO,CAAC,QAAQ,CAAC;IAqBpB;;OAEG;IACG,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC;IAWlD;;OAEG;IACG,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC;IAgBrD;;OAEG;IACG,WAAW,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,KAAK,GAAE,MAAW,GAAG,OAAO,CAAC;QAAE,IAAI,EAAE,QAAQ,EAAE,CAAA;KAAE,CAAC;IAepF;;OAEG;IACG,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAWnD;;OAEG;IACI,YAAY,CAAC,OAAO,EAAE,MAAM,EAAE,YAAY,GAAE,MAAc,GAAG,cAAc,CAAC,iBAAiB,EAAE,IAAI,EAAE,OAAO,CAAC;IA0CpH;;OAEG;IACG,YAAY,CAChB,gBAAgB,EAAE,MAAM,EACxB,cAAc,EAAE,MAAM,EACtB,QAAQ,GAAE,MAA+B,EACzC,OAAO,GAAE;QACP,gBAAgB,CAAC,EAAE,KAAK,CAAC;QACzB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAClC,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,YAAY,CAAC,EAAE,OAAO,CAAC;KACnB,GACL,OAAO,CAAC,QAAQ,CAAC;CAuDrB;AAED;;GAEG;AACH,eAAO,MAAM,UAAU;IACrB;;OAEG;6CAES,KAAK,CAAC;QAAE,QAAQ,EAAE,GAAG,EAAE,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,UAChD,MAAM,YACJ;QACP,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,cAAc,CAAC,EAAE;YAAE,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;KACnC,KACA,YAAY,EAAE;IAejB;;OAEG;0CAEU,KAAK,CAAC;QAAE,QAAQ,EAAE,GAAG,CAAC;QAAC,MAAM,EAAE,GAAG,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,UAC5D,MAAM,KACZ,YAAY,EAAE;IAajB;;OAEG;kCACiC,YAAY,EAAE,YAAY,MAAM,KAAG,OAAO,CAAC,IAAI,CAAC;IAKpF;;OAEG;kCACiC,MAAM,KAAG,OAAO,CAAC,aAAa,EAAE,CAAC;IAKrE;;OAEG;0CACmC,aAAa,EAAE,KAAG,KAAK,CAAC;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,GAAG,CAAA;KAAE,CAAC;IAShG;;OAEG;sCAC+B,aAAa,EAAE,KAAG,KAAK,CAAC;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,GAAG,CAAA;KAAE,CAAC;CAQ5F,CAAC;;;;QApFA;;WAEG;iDAES,KAAK,CAAC;YAAE,QAAQ,EAAE,GAAG,EAAE,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC,UAChD,MAAM,YACJ;YACP,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,SAAS,CAAC,EAAE,MAAM,CAAC;YACnB,cAAc,CAAC,EAAE;gBAAE,IAAI,EAAE,MAAM,CAAA;aAAE,CAAC;SACnC,KACA,YAAY,EAAE;QAejB;;WAEG;8CAEU,KAAK,CAAC;YAAE,QAAQ,EAAE,GAAG,CAAC;YAAC,MAAM,EAAE,GAAG,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC,UAC5D,MAAM,KACZ,YAAY,EAAE;QAajB;;WAEG;sCACiC,YAAY,EAAE,YAAY,MAAM,KAAG,OAAO,CAAC,IAAI,CAAC;QAKpF;;WAEG;sCACiC,MAAM,KAAG,OAAO,CAAC,aAAa,EAAE,CAAC;QAKrE;;WAEG;8CACmC,aAAa,EAAE,KAAG,KAAK,CAAC;YAAE,QAAQ,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,GAAG,CAAA;SAAE,CAAC;QAShG;;WAEG;0CAC+B,aAAa,EAAE,KAAG,KAAK,CAAC;YAAE,QAAQ,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,GAAG,CAAA;SAAE,CAAC;;;AAU7F,wBAGE"}
@@ -0,0 +1,268 @@
1
+ import fs from 'fs';
2
+ import axios from 'axios';
3
+ import { readJSONL, writeJSONL } from './jsonl.js';
4
+ /**
5
+ * OpenAI Batch API client
6
+ */
7
+ export class OpenAIBatchProcessor {
8
+ constructor(options) {
9
+ this.apiKey = options.apiKey;
10
+ this.baseUrl = options.baseUrl || 'https://api.openai.com/v1';
11
+ this.timeout = options.timeout || 300000; // 5 minutes
12
+ }
13
+ /**
14
+ * Upload file for batch processing
15
+ */
16
+ async uploadFile(filePath, purpose = 'batch') {
17
+ if (!fs.existsSync(filePath)) {
18
+ throw new Error(`File not found: ${filePath}`);
19
+ }
20
+ const formData = new FormData();
21
+ const fileBuffer = fs.readFileSync(filePath);
22
+ const blob = new Blob([fileBuffer]);
23
+ formData.append('file', blob, filePath.split('/').pop());
24
+ formData.append('purpose', purpose);
25
+ const response = await axios.post(`${this.baseUrl}/files`, formData, {
26
+ headers: {
27
+ 'Authorization': `Bearer ${this.apiKey}`,
28
+ 'Content-Type': 'multipart/form-data',
29
+ },
30
+ timeout: this.timeout,
31
+ });
32
+ return {
33
+ id: response.data.id,
34
+ filename: response.data.filename,
35
+ };
36
+ }
37
+ /**
38
+ * Create batch job
39
+ */
40
+ async createBatch(inputFileId, endpoint, completionWindow = '24h', metadata) {
41
+ const response = await axios.post(`${this.baseUrl}/batches`, {
42
+ input_file_id: inputFileId,
43
+ endpoint,
44
+ completion_window: completionWindow,
45
+ metadata,
46
+ }, {
47
+ headers: {
48
+ 'Authorization': `Bearer ${this.apiKey}`,
49
+ 'Content-Type': 'application/json',
50
+ },
51
+ timeout: this.timeout,
52
+ });
53
+ return response.data;
54
+ }
55
+ /**
56
+ * Get batch job status
57
+ */
58
+ async getBatch(batchId) {
59
+ const response = await axios.get(`${this.baseUrl}/batches/${batchId}`, {
60
+ headers: {
61
+ 'Authorization': `Bearer ${this.apiKey}`,
62
+ },
63
+ timeout: this.timeout,
64
+ });
65
+ return response.data;
66
+ }
67
+ /**
68
+ * Cancel batch job
69
+ */
70
+ async cancelBatch(batchId) {
71
+ const response = await axios.post(`${this.baseUrl}/batches/${batchId}/cancel`, {}, {
72
+ headers: {
73
+ 'Authorization': `Bearer ${this.apiKey}`,
74
+ 'Content-Type': 'application/json',
75
+ },
76
+ timeout: this.timeout,
77
+ });
78
+ return response.data;
79
+ }
80
+ /**
81
+ * List batch jobs
82
+ */
83
+ async listBatches(after, limit = 20) {
84
+ const params = new URLSearchParams();
85
+ if (after)
86
+ params.append('after', after);
87
+ params.append('limit', limit.toString());
88
+ const response = await axios.get(`${this.baseUrl}/batches?${params}`, {
89
+ headers: {
90
+ 'Authorization': `Bearer ${this.apiKey}`,
91
+ },
92
+ timeout: this.timeout,
93
+ });
94
+ return response.data;
95
+ }
96
+ /**
97
+ * Download file content
98
+ */
99
+ async downloadFile(fileId) {
100
+ const response = await axios.get(`${this.baseUrl}/files/${fileId}/content`, {
101
+ headers: {
102
+ 'Authorization': `Bearer ${this.apiKey}`,
103
+ },
104
+ timeout: this.timeout,
105
+ });
106
+ return response.data;
107
+ }
108
+ /**
109
+ * Monitor batch job progress
110
+ */
111
+ async *monitorBatch(batchId, pollInterval = 30000) {
112
+ const startTime = Date.now();
113
+ let lastProgress = 0;
114
+ while (true) {
115
+ const batch = await this.getBatch(batchId);
116
+ const timeElapsed = Date.now() - startTime;
117
+ const progress = {
118
+ total: batch.request_counts.total,
119
+ completed: batch.request_counts.completed,
120
+ failed: batch.request_counts.failed,
121
+ percentage: batch.request_counts.total > 0 ?
122
+ (batch.request_counts.completed / batch.request_counts.total) * 100 : 0,
123
+ };
124
+ // Estimate time remaining
125
+ let estimatedTimeRemaining;
126
+ if (progress.completed > lastProgress && progress.completed > 0) {
127
+ const completionRate = progress.completed / timeElapsed;
128
+ const remaining = progress.total - progress.completed;
129
+ estimatedTimeRemaining = remaining / completionRate;
130
+ }
131
+ yield {
132
+ jobId: batchId,
133
+ status: batch.status,
134
+ progress,
135
+ timeElapsed,
136
+ estimatedTimeRemaining,
137
+ };
138
+ // Break if job is complete
139
+ if (['completed', 'failed', 'cancelled', 'expired'].includes(batch.status)) {
140
+ break;
141
+ }
142
+ lastProgress = progress.completed;
143
+ await new Promise(resolve => setTimeout(resolve, pollInterval));
144
+ }
145
+ }
146
+ /**
147
+ * Process batch end-to-end
148
+ */
149
+ async processBatch(requestsFilePath, outputFilePath, endpoint = '/v1/chat/completions', options = {}) {
150
+ const { completionWindow = '24h', metadata, pollInterval = 30000, showProgress = true, } = options;
151
+ console.log('🚀 Starting batch processing...');
152
+ // Upload input file
153
+ console.log('📤 Uploading input file...');
154
+ const uploadResult = await this.uploadFile(requestsFilePath);
155
+ console.log(`✅ File uploaded: ${uploadResult.id}`);
156
+ // Create batch job
157
+ console.log('⚙️ Creating batch job...');
158
+ const batch = await this.createBatch(uploadResult.id, endpoint, completionWindow, metadata);
159
+ console.log(`✅ Batch created: ${batch.id}`);
160
+ // Monitor progress
161
+ if (showProgress) {
162
+ console.log('📊 Monitoring batch progress...');
163
+ for await (const progress of this.monitorBatch(batch.id, pollInterval)) {
164
+ const { percentage, completed, total } = progress.progress;
165
+ const timeStr = `${Math.round(progress.timeElapsed / 1000)}s`;
166
+ const etaStr = progress.estimatedTimeRemaining ?
167
+ ` (ETA: ${Math.round(progress.estimatedTimeRemaining / 1000)}s)` : '';
168
+ console.log(` Status: ${progress.status} - ${percentage.toFixed(1)}% (${completed}/${total}) - ${timeStr}${etaStr}`);
169
+ if (['completed', 'failed', 'cancelled', 'expired'].includes(progress.status)) {
170
+ break;
171
+ }
172
+ }
173
+ }
174
+ // Get final batch status
175
+ const finalBatch = await this.getBatch(batch.id);
176
+ if (finalBatch.status === 'completed' && finalBatch.output_file_id) {
177
+ console.log('📥 Downloading results...');
178
+ const results = await this.downloadFile(finalBatch.output_file_id);
179
+ fs.writeFileSync(outputFilePath, results);
180
+ console.log(`✅ Results saved to: ${outputFilePath}`);
181
+ }
182
+ else {
183
+ console.error(`❌ Batch failed with status: ${finalBatch.status}`);
184
+ if (finalBatch.error_file_id) {
185
+ const errors = await this.downloadFile(finalBatch.error_file_id);
186
+ console.error('Error details:', errors);
187
+ }
188
+ }
189
+ return finalBatch;
190
+ }
191
+ }
192
+ /**
193
+ * Utility functions for batch processing
194
+ */
195
+ export const batchUtils = {
196
+ /**
197
+ * Create batch requests for chat completions
198
+ */
199
+ createChatCompletionRequests: (messages, model = 'gpt-4o-mini', options = {}) => {
200
+ return messages.map((msg, index) => ({
201
+ custom_id: msg.customId || `request_${index}`,
202
+ method: 'POST',
203
+ url: '/v1/chat/completions',
204
+ body: {
205
+ model,
206
+ messages: msg.messages,
207
+ temperature: options.temperature || 0.0,
208
+ max_tokens: options.maxTokens,
209
+ response_format: options.responseFormat,
210
+ },
211
+ }));
212
+ },
213
+ /**
214
+ * Create batch requests for document extraction
215
+ */
216
+ createExtractionRequests: (documents, model = 'gpt-4o-mini') => {
217
+ return documents.map((doc, index) => ({
218
+ custom_id: doc.customId || `extraction_${index}`,
219
+ method: 'POST',
220
+ url: '/v1/documents/extractions',
221
+ body: {
222
+ json_schema: doc.schema,
223
+ documents: [doc.document],
224
+ model,
225
+ },
226
+ }));
227
+ },
228
+ /**
229
+ * Save batch requests to JSONL file
230
+ */
231
+ saveBatchRequests: async (requests, filePath) => {
232
+ await writeJSONL(filePath, requests);
233
+ console.log(`📄 Saved ${requests.length} batch requests to ${filePath}`);
234
+ },
235
+ /**
236
+ * Parse batch results from JSONL file
237
+ */
238
+ parseBatchResults: async (filePath) => {
239
+ const results = await readJSONL(filePath);
240
+ return results;
241
+ },
242
+ /**
243
+ * Extract successful results from batch responses
244
+ */
245
+ extractSuccessfulResults: (responses) => {
246
+ return responses
247
+ .filter(response => !response.error && response.response.status_code === 200)
248
+ .map(response => ({
249
+ customId: response.custom_id,
250
+ result: response.response.body,
251
+ }));
252
+ },
253
+ /**
254
+ * Extract failed results from batch responses
255
+ */
256
+ extractFailedResults: (responses) => {
257
+ return responses
258
+ .filter(response => response.error || response.response.status_code !== 200)
259
+ .map(response => ({
260
+ customId: response.custom_id,
261
+ error: response.error || { code: 'http_error', message: `HTTP ${response.response.status_code}` },
262
+ }));
263
+ },
264
+ };
265
+ export default {
266
+ OpenAIBatchProcessor,
267
+ batchUtils,
268
+ };