@retab/node 0.0.0-reserved → 0.0.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +293 -2
- package/dist/api/client.d.ts +15 -0
- package/dist/api/client.d.ts.map +1 -0
- package/dist/api/client.js +16 -0
- package/dist/api/consensus/client.d.ts +7 -0
- package/dist/api/consensus/client.d.ts.map +1 -0
- package/dist/api/consensus/client.js +14 -0
- package/dist/api/deployments/client.d.ts +20 -0
- package/dist/api/deployments/client.d.ts.map +1 -0
- package/dist/api/deployments/client.js +23 -0
- package/dist/api/documents/client.d.ts +10 -0
- package/dist/api/documents/client.d.ts.map +1 -0
- package/dist/api/documents/client.js +35 -0
- package/dist/api/models/client.d.ts +17 -0
- package/dist/api/models/client.d.ts.map +1 -0
- package/dist/api/models/client.js +15 -0
- package/dist/api/schemas/client.d.ts +12 -0
- package/dist/api/schemas/client.d.ts.map +1 -0
- package/dist/api/schemas/client.js +14 -0
- package/dist/client.d.ts +50 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +135 -0
- package/dist/errors.d.ts +34 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +53 -0
- package/dist/generated_types.d.ts +64373 -0
- package/dist/generated_types.d.ts.map +1 -0
- package/dist/generated_types.js +2267 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/mime.d.ts +5 -0
- package/dist/mime.d.ts.map +1 -0
- package/dist/mime.js +66 -0
- package/dist/resource.d.ts +12 -0
- package/dist/resource.d.ts.map +1 -0
- package/dist/resource.js +19 -0
- package/dist/resources/consensus/completions.d.ts +66 -0
- package/dist/resources/consensus/completions.d.ts.map +1 -0
- package/dist/resources/consensus/completions.js +84 -0
- package/dist/resources/consensus/index.d.ts +72 -0
- package/dist/resources/consensus/index.d.ts.map +1 -0
- package/dist/resources/consensus/index.js +76 -0
- package/dist/resources/consensus/responses.d.ts +69 -0
- package/dist/resources/consensus/responses.d.ts.map +1 -0
- package/dist/resources/consensus/responses.js +99 -0
- package/dist/resources/documents/extractions.d.ts +74 -0
- package/dist/resources/documents/extractions.d.ts.map +1 -0
- package/dist/resources/documents/extractions.js +196 -0
- package/dist/resources/documents/index.d.ts +21 -0
- package/dist/resources/documents/index.d.ts.map +1 -0
- package/dist/resources/documents/index.js +55 -0
- package/dist/resources/evaluations/documents.d.ts +40 -0
- package/dist/resources/evaluations/documents.d.ts.map +1 -0
- package/dist/resources/evaluations/documents.js +123 -0
- package/dist/resources/evaluations/index.d.ts +14 -0
- package/dist/resources/evaluations/index.d.ts.map +1 -0
- package/dist/resources/evaluations/index.js +17 -0
- package/dist/resources/evaluations/iterations.d.ts +50 -0
- package/dist/resources/evaluations/iterations.d.ts.map +1 -0
- package/dist/resources/evaluations/iterations.js +156 -0
- package/dist/resources/files.d.ts +82 -0
- package/dist/resources/files.d.ts.map +1 -0
- package/dist/resources/files.js +150 -0
- package/dist/resources/finetuning.d.ts +105 -0
- package/dist/resources/finetuning.d.ts.map +1 -0
- package/dist/resources/finetuning.js +181 -0
- package/dist/resources/index.d.ts +11 -0
- package/dist/resources/index.d.ts.map +1 -0
- package/dist/resources/index.js +10 -0
- package/dist/resources/models.d.ts +57 -0
- package/dist/resources/models.d.ts.map +1 -0
- package/dist/resources/models.js +72 -0
- package/dist/resources/processors/automations/endpoints.d.ts +90 -0
- package/dist/resources/processors/automations/endpoints.d.ts.map +1 -0
- package/dist/resources/processors/automations/endpoints.js +145 -0
- package/dist/resources/processors/automations/index.d.ts +7 -0
- package/dist/resources/processors/automations/index.d.ts.map +1 -0
- package/dist/resources/processors/automations/index.js +6 -0
- package/dist/resources/processors/automations/links.d.ts +90 -0
- package/dist/resources/processors/automations/links.d.ts.map +1 -0
- package/dist/resources/processors/automations/links.js +149 -0
- package/dist/resources/processors/automations/logs.d.ts +35 -0
- package/dist/resources/processors/automations/logs.d.ts.map +1 -0
- package/dist/resources/processors/automations/logs.js +60 -0
- package/dist/resources/processors/automations/mailboxes.d.ts +102 -0
- package/dist/resources/processors/automations/mailboxes.d.ts.map +1 -0
- package/dist/resources/processors/automations/mailboxes.js +157 -0
- package/dist/resources/processors/automations/outlook.d.ts +114 -0
- package/dist/resources/processors/automations/outlook.d.ts.map +1 -0
- package/dist/resources/processors/automations/outlook.js +170 -0
- package/dist/resources/processors/automations/tests.d.ts +58 -0
- package/dist/resources/processors/automations/tests.d.ts.map +1 -0
- package/dist/resources/processors/automations/tests.js +90 -0
- package/dist/resources/processors/index.d.ts +303 -0
- package/dist/resources/processors/index.d.ts.map +1 -0
- package/dist/resources/processors/index.js +261 -0
- package/dist/resources/schemas.d.ts +63 -0
- package/dist/resources/schemas.d.ts.map +1 -0
- package/dist/resources/schemas.js +183 -0
- package/dist/resources/secrets/external_api_keys.d.ts +61 -0
- package/dist/resources/secrets/external_api_keys.d.ts.map +1 -0
- package/dist/resources/secrets/external_api_keys.js +120 -0
- package/dist/resources/secrets/index.d.ts +14 -0
- package/dist/resources/secrets/index.d.ts.map +1 -0
- package/dist/resources/secrets/index.js +17 -0
- package/dist/resources/secrets/webhooks.d.ts +73 -0
- package/dist/resources/secrets/webhooks.d.ts.map +1 -0
- package/dist/resources/secrets/webhooks.js +145 -0
- package/dist/resources/usage.d.ts +223 -0
- package/dist/resources/usage.d.ts.map +1 -0
- package/dist/resources/usage.js +310 -0
- package/dist/types/ai_models.d.ts +389 -0
- package/dist/types/ai_models.d.ts.map +1 -0
- package/dist/types/ai_models.js +145 -0
- package/dist/types/automations/cron.d.ts +28 -0
- package/dist/types/automations/cron.d.ts.map +1 -0
- package/dist/types/automations/cron.js +1 -0
- package/dist/types/automations/endpoints.d.ts +13 -0
- package/dist/types/automations/endpoints.d.ts.map +1 -0
- package/dist/types/automations/endpoints.js +1 -0
- package/dist/types/automations/index.d.ts +7 -0
- package/dist/types/automations/index.d.ts.map +1 -0
- package/dist/types/automations/index.js +6 -0
- package/dist/types/automations/links.d.ts +15 -0
- package/dist/types/automations/links.d.ts.map +1 -0
- package/dist/types/automations/links.js +1 -0
- package/dist/types/automations/mailboxes.d.ts +18 -0
- package/dist/types/automations/mailboxes.d.ts.map +1 -0
- package/dist/types/automations/mailboxes.js +1 -0
- package/dist/types/automations/outlook.d.ts +37 -0
- package/dist/types/automations/outlook.d.ts.map +1 -0
- package/dist/types/automations/outlook.js +1 -0
- package/dist/types/automations/webhooks.d.ts +13 -0
- package/dist/types/automations/webhooks.d.ts.map +1 -0
- package/dist/types/automations/webhooks.js +1 -0
- package/dist/types/browser_canvas.d.ts +4 -0
- package/dist/types/browser_canvas.d.ts.map +1 -0
- package/dist/types/browser_canvas.js +2 -0
- package/dist/types/chat.d.ts +99 -0
- package/dist/types/chat.d.ts.map +1 -0
- package/dist/types/chat.js +20 -0
- package/dist/types/consensus.d.ts +10 -0
- package/dist/types/consensus.d.ts.map +1 -0
- package/dist/types/consensus.js +1 -0
- package/dist/types/db/annotations.d.ts +108 -0
- package/dist/types/db/annotations.d.ts.map +1 -0
- package/dist/types/db/annotations.js +6 -0
- package/dist/types/db/files.d.ts +133 -0
- package/dist/types/db/files.d.ts.map +1 -0
- package/dist/types/db/files.js +5 -0
- package/dist/types/documents/extractions.d.ts +1849 -0
- package/dist/types/documents/extractions.d.ts.map +1 -0
- package/dist/types/documents/extractions.js +211 -0
- package/dist/types/documents/processing.d.ts +249 -0
- package/dist/types/documents/processing.d.ts.map +1 -0
- package/dist/types/documents/processing.js +6 -0
- package/dist/types/evaluations/iterations.d.ts +41 -0
- package/dist/types/evaluations/iterations.d.ts.map +1 -0
- package/dist/types/evaluations/iterations.js +1 -0
- package/dist/types/jobs/base.d.ts +162 -0
- package/dist/types/jobs/base.d.ts.map +1 -0
- package/dist/types/jobs/base.js +6 -0
- package/dist/types/jobs/specialized.d.ts +200 -0
- package/dist/types/jobs/specialized.d.ts.map +1 -0
- package/dist/types/jobs/specialized.js +37 -0
- package/dist/types/logs.d.ts +92 -0
- package/dist/types/logs.d.ts.map +1 -0
- package/dist/types/logs.js +1 -0
- package/dist/types/mime.d.ts +426 -0
- package/dist/types/mime.d.ts.map +1 -0
- package/dist/types/mime.js +48 -0
- package/dist/types/modalities.d.ts +31 -0
- package/dist/types/modalities.d.ts.map +1 -0
- package/dist/types/modalities.js +109 -0
- package/dist/types/pagination.d.ts +5 -0
- package/dist/types/pagination.d.ts.map +1 -0
- package/dist/types/pagination.js +1 -0
- package/dist/types/schemas/enhancement.d.ts +250 -0
- package/dist/types/schemas/enhancement.d.ts.map +1 -0
- package/dist/types/schemas/enhancement.js +6 -0
- package/dist/types/schemas/generate.d.ts +160 -0
- package/dist/types/schemas/generate.d.ts.map +1 -0
- package/dist/types/schemas/generate.js +19 -0
- package/dist/types/schemas/object.d.ts +116 -0
- package/dist/types/schemas/object.d.ts.map +1 -0
- package/dist/types/schemas/object.js +861 -0
- package/dist/types/secrets/external_api_keys.d.ts +27 -0
- package/dist/types/secrets/external_api_keys.d.ts.map +1 -0
- package/dist/types/secrets/external_api_keys.js +11 -0
- package/dist/types/secrets/index.d.ts +2 -0
- package/dist/types/secrets/index.d.ts.map +1 -0
- package/dist/types/secrets/index.js +1 -0
- package/dist/types/standards.d.ts +37 -0
- package/dist/types/standards.d.ts.map +1 -0
- package/dist/types/standards.js +1 -0
- package/dist/types.d.ts +276 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +85 -0
- package/dist/utils/ai_models.d.ts +10 -0
- package/dist/utils/ai_models.d.ts.map +1 -0
- package/dist/utils/ai_models.js +183 -0
- package/dist/utils/batch_processing.d.ts +227 -0
- package/dist/utils/batch_processing.d.ts.map +1 -0
- package/dist/utils/batch_processing.js +268 -0
- package/dist/utils/benchmarking.d.ts +115 -0
- package/dist/utils/benchmarking.d.ts.map +1 -0
- package/dist/utils/benchmarking.js +355 -0
- package/dist/utils/chat.d.ts +70 -0
- package/dist/utils/chat.d.ts.map +1 -0
- package/dist/utils/chat.js +79 -0
- package/dist/utils/cost_calculation.d.ts +26 -0
- package/dist/utils/cost_calculation.d.ts.map +1 -0
- package/dist/utils/cost_calculation.js +99 -0
- package/dist/utils/datasets.d.ts +135 -0
- package/dist/utils/datasets.d.ts.map +1 -0
- package/dist/utils/datasets.js +359 -0
- package/dist/utils/display.d.ts +108 -0
- package/dist/utils/display.d.ts.map +1 -0
- package/dist/utils/display.js +244 -0
- package/dist/utils/hash.d.ts +18 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +31 -0
- package/dist/utils/hashing.d.ts +18 -0
- package/dist/utils/hashing.d.ts.map +1 -0
- package/dist/utils/hashing.js +28 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +10 -0
- package/dist/utils/json_schema.d.ts +18 -0
- package/dist/utils/json_schema.d.ts.map +1 -0
- package/dist/utils/json_schema.js +334 -0
- package/dist/utils/json_schema_utils.d.ts +42 -0
- package/dist/utils/json_schema_utils.d.ts.map +1 -0
- package/dist/utils/json_schema_utils.js +212 -0
- package/dist/utils/jsonl.d.ts +60 -0
- package/dist/utils/jsonl.d.ts.map +1 -0
- package/dist/utils/jsonl.js +259 -0
- package/dist/utils/mime.d.ts +6 -0
- package/dist/utils/mime.d.ts.map +1 -0
- package/dist/utils/mime.js +129 -0
- package/dist/utils/model_cards.d.ts +219 -0
- package/dist/utils/model_cards.d.ts.map +1 -0
- package/dist/utils/model_cards.js +462 -0
- package/dist/utils/prompt_optimization.d.ts +96 -0
- package/dist/utils/prompt_optimization.d.ts.map +1 -0
- package/dist/utils/prompt_optimization.js +275 -0
- package/dist/utils/responses.d.ts +35 -0
- package/dist/utils/responses.d.ts.map +1 -0
- package/dist/utils/responses.js +37 -0
- package/dist/utils/stream.d.ts +13 -0
- package/dist/utils/stream.d.ts.map +1 -0
- package/dist/utils/stream.js +64 -0
- package/dist/utils/stream_context_managers.d.ts +147 -0
- package/dist/utils/stream_context_managers.d.ts.map +1 -0
- package/dist/utils/stream_context_managers.js +380 -0
- package/dist/utils/usage.d.ts +57 -0
- package/dist/utils/usage.d.ts.map +1 -0
- package/dist/utils/usage.js +97 -0
- package/dist/utils/webhook_secrets.d.ts +59 -0
- package/dist/utils/webhook_secrets.d.ts.map +1 -0
- package/dist/utils/webhook_secrets.js +107 -0
- package/dist/utils/zod_to_json_schema.d.ts +11 -0
- package/dist/utils/zod_to_json_schema.d.ts.map +1 -0
- package/dist/utils/zod_to_json_schema.js +123 -0
- package/dist/utils.d.ts +19 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +1 -0
- package/package.json +62 -6
- package/index.js +0 -7
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import yaml from 'js-yaml';
|
|
4
|
+
import { fileURLToPath } from 'url';
|
|
5
|
+
import { ModelCardSchema } from '../types/ai_models.js';
|
|
6
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
const __dirname = path.dirname(__filename);
|
|
8
|
+
const MODEL_CARDS_DIR = path.join(__dirname, '_model_cards');
|
|
9
|
+
function mergeModelCards(base, override) {
|
|
10
|
+
const result = { ...base };
|
|
11
|
+
for (const [key, value] of Object.entries(override)) {
|
|
12
|
+
if (key === 'inherits') {
|
|
13
|
+
continue;
|
|
14
|
+
}
|
|
15
|
+
if (typeof value === 'object' && value !== null && key in result && typeof result[key] === 'object') {
|
|
16
|
+
result[key] = mergeModelCards(result[key], value);
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
result[key] = value;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
return result;
|
|
23
|
+
}
|
|
24
|
+
function loadModelCards(yamlFile) {
|
|
25
|
+
const yamlContent = fs.readFileSync(yamlFile, 'utf-8');
|
|
26
|
+
const rawCards = yaml.load(yamlContent);
|
|
27
|
+
const nameToCard = {};
|
|
28
|
+
// First pass: collect base cards
|
|
29
|
+
for (const card of rawCards) {
|
|
30
|
+
if (!('inherits' in card)) {
|
|
31
|
+
nameToCard[card.model] = card;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
const finalCards = [];
|
|
35
|
+
for (const card of rawCards) {
|
|
36
|
+
if ('inherits' in card) {
|
|
37
|
+
const parent = nameToCard[card.inherits];
|
|
38
|
+
const merged = mergeModelCards(parent, card);
|
|
39
|
+
finalCards.push(ModelCardSchema.parse(merged));
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
finalCards.push(ModelCardSchema.parse(card));
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return finalCards;
|
|
46
|
+
}
|
|
47
|
+
// Create model cards directory structure if it doesn't exist
|
|
48
|
+
if (!fs.existsSync(MODEL_CARDS_DIR)) {
|
|
49
|
+
fs.mkdirSync(MODEL_CARDS_DIR, { recursive: true });
|
|
50
|
+
// Create basic model card files
|
|
51
|
+
const openaiCards = [
|
|
52
|
+
{
|
|
53
|
+
model: 'gpt-4o',
|
|
54
|
+
pricing: {
|
|
55
|
+
text: { prompt: 2.5, completion: 10.0 },
|
|
56
|
+
},
|
|
57
|
+
capabilities: {
|
|
58
|
+
modalities: ['text', 'image'],
|
|
59
|
+
endpoints: ['chat_completions'],
|
|
60
|
+
features: ['streaming', 'function_calling', 'structured_outputs'],
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
model: 'gpt-4o-mini',
|
|
65
|
+
pricing: {
|
|
66
|
+
text: { prompt: 0.15, completion: 0.6 },
|
|
67
|
+
},
|
|
68
|
+
capabilities: {
|
|
69
|
+
modalities: ['text', 'image'],
|
|
70
|
+
endpoints: ['chat_completions'],
|
|
71
|
+
features: ['streaming', 'function_calling', 'structured_outputs'],
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
];
|
|
75
|
+
fs.writeFileSync(path.join(MODEL_CARDS_DIR, 'openai.yaml'), yaml.dump(openaiCards));
|
|
76
|
+
const anthropicCards = [
|
|
77
|
+
{
|
|
78
|
+
model: 'claude-3-5-sonnet-latest',
|
|
79
|
+
pricing: {
|
|
80
|
+
text: { prompt: 3.0, completion: 15.0 },
|
|
81
|
+
},
|
|
82
|
+
capabilities: {
|
|
83
|
+
modalities: ['text', 'image'],
|
|
84
|
+
endpoints: ['chat_completions'],
|
|
85
|
+
features: ['streaming'],
|
|
86
|
+
},
|
|
87
|
+
},
|
|
88
|
+
];
|
|
89
|
+
fs.writeFileSync(path.join(MODEL_CARDS_DIR, 'anthropic.yaml'), yaml.dump(anthropicCards));
|
|
90
|
+
// Create empty files for other providers
|
|
91
|
+
fs.writeFileSync(path.join(MODEL_CARDS_DIR, 'xai.yaml'), yaml.dump([]));
|
|
92
|
+
fs.writeFileSync(path.join(MODEL_CARDS_DIR, 'gemini.yaml'), yaml.dump([]));
|
|
93
|
+
fs.writeFileSync(path.join(MODEL_CARDS_DIR, 'auto.yaml'), yaml.dump([]));
|
|
94
|
+
}
|
|
95
|
+
// Load all model cards
|
|
96
|
+
let modelCards = [];
|
|
97
|
+
const modelCardsDict = {};
|
|
98
|
+
try {
|
|
99
|
+
const cardFiles = ['openai.yaml', 'anthropic.yaml', 'xai.yaml', 'gemini.yaml', 'auto.yaml'];
|
|
100
|
+
for (const file of cardFiles) {
|
|
101
|
+
const filePath = path.join(MODEL_CARDS_DIR, file);
|
|
102
|
+
if (fs.existsSync(filePath)) {
|
|
103
|
+
modelCards = [...modelCards, ...loadModelCards(filePath)];
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
for (const card of modelCards) {
|
|
107
|
+
modelCardsDict[card.model] = card;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
catch (error) {
|
|
111
|
+
console.warn('Failed to load model cards:', error);
|
|
112
|
+
}
|
|
113
|
+
export function getModelFromModelId(modelId) {
|
|
114
|
+
if (modelId.startsWith('ft:')) {
|
|
115
|
+
const parts = modelId.split(':');
|
|
116
|
+
return parts[1];
|
|
117
|
+
}
|
|
118
|
+
return modelId;
|
|
119
|
+
}
|
|
120
|
+
export function getModelCard(model) {
|
|
121
|
+
const modelName = getModelFromModelId(model);
|
|
122
|
+
if (modelName in modelCardsDict) {
|
|
123
|
+
const modelCard = ModelCardSchema.parse({ ...modelCardsDict[modelName] });
|
|
124
|
+
if (modelName !== model) {
|
|
125
|
+
// Fine-tuned model -> Change the name
|
|
126
|
+
modelCard.model = model;
|
|
127
|
+
// Remove the fine-tuning feature (if exists)
|
|
128
|
+
const features = modelCard.capabilities.features;
|
|
129
|
+
const index = features.indexOf('fine_tuning');
|
|
130
|
+
if (index > -1) {
|
|
131
|
+
features.splice(index, 1);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return modelCard;
|
|
135
|
+
}
|
|
136
|
+
throw new Error(`No model card found for model: ${modelName}`);
|
|
137
|
+
}
|
|
138
|
+
export function getProviderForModel(modelId) {
|
|
139
|
+
const modelName = getModelFromModelId(modelId);
|
|
140
|
+
// Check OpenAI models
|
|
141
|
+
if (modelName.startsWith('gpt-') || modelName.startsWith('o1') || modelName.startsWith('o3') || modelName.startsWith('o4')) {
|
|
142
|
+
return 'OpenAI';
|
|
143
|
+
}
|
|
144
|
+
// Check Anthropic models
|
|
145
|
+
if (modelName.startsWith('claude-')) {
|
|
146
|
+
return 'Anthropic';
|
|
147
|
+
}
|
|
148
|
+
// Check xAI models
|
|
149
|
+
if (modelName.startsWith('grok-')) {
|
|
150
|
+
return 'xAI';
|
|
151
|
+
}
|
|
152
|
+
// Check Gemini models
|
|
153
|
+
if (modelName.startsWith('gemini-')) {
|
|
154
|
+
return 'Gemini';
|
|
155
|
+
}
|
|
156
|
+
// Check Retab models
|
|
157
|
+
if (modelName.startsWith('auto-')) {
|
|
158
|
+
return 'Retab';
|
|
159
|
+
}
|
|
160
|
+
throw new Error(`Unknown provider for model: ${modelName}`);
|
|
161
|
+
}
|
|
162
|
+
export function assertValidModelExtraction(model) {
|
|
163
|
+
if (!model || typeof model !== 'string') {
|
|
164
|
+
throw new Error('Valid model must be provided for extraction');
|
|
165
|
+
}
|
|
166
|
+
// Additional validation logic can be added here
|
|
167
|
+
try {
|
|
168
|
+
getProviderForModel(model);
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
throw new Error(`Invalid model for extraction: ${model}`);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
export function assertValidModelSchemaGeneration(model) {
|
|
175
|
+
if (!model || typeof model !== 'string') {
|
|
176
|
+
throw new Error('Valid model must be provided for schema generation');
|
|
177
|
+
}
|
|
178
|
+
const validModels = ['gpt-4o-2024-11-20', 'gpt-4o-mini', 'gpt-4o'];
|
|
179
|
+
if (!validModels.includes(model)) {
|
|
180
|
+
throw new Error(`Model ${model} not valid for schema generation`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
export { modelCards, modelCardsDict };
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Batch processing utilities for OpenAI Batch API and other providers
|
|
3
|
+
* Equivalent to Python's batch processing functionality
|
|
4
|
+
*/
|
|
5
|
+
export interface BatchRequest {
|
|
6
|
+
custom_id: string;
|
|
7
|
+
method: 'POST' | 'GET' | 'PUT' | 'DELETE';
|
|
8
|
+
url: string;
|
|
9
|
+
body?: Record<string, any>;
|
|
10
|
+
headers?: Record<string, string>;
|
|
11
|
+
}
|
|
12
|
+
export interface BatchResponse {
|
|
13
|
+
id: string;
|
|
14
|
+
custom_id: string;
|
|
15
|
+
response: {
|
|
16
|
+
status_code: number;
|
|
17
|
+
request_id: string;
|
|
18
|
+
body: Record<string, any>;
|
|
19
|
+
};
|
|
20
|
+
error?: {
|
|
21
|
+
code: string;
|
|
22
|
+
message: string;
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
export interface BatchJob {
|
|
26
|
+
id: string;
|
|
27
|
+
object: 'batch';
|
|
28
|
+
endpoint: string;
|
|
29
|
+
errors?: {
|
|
30
|
+
object: 'list';
|
|
31
|
+
data: Array<{
|
|
32
|
+
code: string;
|
|
33
|
+
message: string;
|
|
34
|
+
param?: string;
|
|
35
|
+
line?: number;
|
|
36
|
+
}>;
|
|
37
|
+
};
|
|
38
|
+
input_file_id: string;
|
|
39
|
+
completion_window: '24h';
|
|
40
|
+
status: 'validating' | 'failed' | 'in_progress' | 'finalizing' | 'completed' | 'expired' | 'cancelling' | 'cancelled';
|
|
41
|
+
output_file_id?: string;
|
|
42
|
+
error_file_id?: string;
|
|
43
|
+
created_at: number;
|
|
44
|
+
in_progress_at?: number;
|
|
45
|
+
expires_at?: number;
|
|
46
|
+
finalizing_at?: number;
|
|
47
|
+
completed_at?: number;
|
|
48
|
+
failed_at?: number;
|
|
49
|
+
expired_at?: number;
|
|
50
|
+
cancelling_at?: number;
|
|
51
|
+
cancelled_at?: number;
|
|
52
|
+
request_counts: {
|
|
53
|
+
total: number;
|
|
54
|
+
completed: number;
|
|
55
|
+
failed: number;
|
|
56
|
+
};
|
|
57
|
+
metadata?: Record<string, string>;
|
|
58
|
+
}
|
|
59
|
+
export interface BatchProcessingOptions {
|
|
60
|
+
apiKey: string;
|
|
61
|
+
baseUrl?: string;
|
|
62
|
+
timeout?: number;
|
|
63
|
+
maxRetries?: number;
|
|
64
|
+
completionWindow?: '24h';
|
|
65
|
+
metadata?: Record<string, string>;
|
|
66
|
+
}
|
|
67
|
+
export interface BatchProgressInfo {
|
|
68
|
+
jobId: string;
|
|
69
|
+
status: string;
|
|
70
|
+
progress: {
|
|
71
|
+
total: number;
|
|
72
|
+
completed: number;
|
|
73
|
+
failed: number;
|
|
74
|
+
percentage: number;
|
|
75
|
+
};
|
|
76
|
+
timeElapsed: number;
|
|
77
|
+
estimatedTimeRemaining?: number;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* OpenAI Batch API client
|
|
81
|
+
*/
|
|
82
|
+
export declare class OpenAIBatchProcessor {
|
|
83
|
+
private apiKey;
|
|
84
|
+
private baseUrl;
|
|
85
|
+
private timeout;
|
|
86
|
+
constructor(options: BatchProcessingOptions);
|
|
87
|
+
/**
|
|
88
|
+
* Upload file for batch processing
|
|
89
|
+
*/
|
|
90
|
+
uploadFile(filePath: string, purpose?: 'batch'): Promise<{
|
|
91
|
+
id: string;
|
|
92
|
+
filename: string;
|
|
93
|
+
}>;
|
|
94
|
+
/**
|
|
95
|
+
* Create batch job
|
|
96
|
+
*/
|
|
97
|
+
createBatch(inputFileId: string, endpoint: string, completionWindow?: '24h', metadata?: Record<string, string>): Promise<BatchJob>;
|
|
98
|
+
/**
|
|
99
|
+
* Get batch job status
|
|
100
|
+
*/
|
|
101
|
+
getBatch(batchId: string): Promise<BatchJob>;
|
|
102
|
+
/**
|
|
103
|
+
* Cancel batch job
|
|
104
|
+
*/
|
|
105
|
+
cancelBatch(batchId: string): Promise<BatchJob>;
|
|
106
|
+
/**
|
|
107
|
+
* List batch jobs
|
|
108
|
+
*/
|
|
109
|
+
listBatches(after?: string, limit?: number): Promise<{
|
|
110
|
+
data: BatchJob[];
|
|
111
|
+
}>;
|
|
112
|
+
/**
|
|
113
|
+
* Download file content
|
|
114
|
+
*/
|
|
115
|
+
downloadFile(fileId: string): Promise<string>;
|
|
116
|
+
/**
|
|
117
|
+
* Monitor batch job progress
|
|
118
|
+
*/
|
|
119
|
+
monitorBatch(batchId: string, pollInterval?: number): AsyncGenerator<BatchProgressInfo, void, unknown>;
|
|
120
|
+
/**
|
|
121
|
+
* Process batch end-to-end
|
|
122
|
+
*/
|
|
123
|
+
processBatch(requestsFilePath: string, outputFilePath: string, endpoint?: string, options?: {
|
|
124
|
+
completionWindow?: '24h';
|
|
125
|
+
metadata?: Record<string, string>;
|
|
126
|
+
pollInterval?: number;
|
|
127
|
+
showProgress?: boolean;
|
|
128
|
+
}): Promise<BatchJob>;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Utility functions for batch processing
|
|
132
|
+
*/
|
|
133
|
+
export declare const batchUtils: {
|
|
134
|
+
/**
|
|
135
|
+
* Create batch requests for chat completions
|
|
136
|
+
*/
|
|
137
|
+
createChatCompletionRequests: (messages: Array<{
|
|
138
|
+
messages: any[];
|
|
139
|
+
customId?: string;
|
|
140
|
+
}>, model?: string, options?: {
|
|
141
|
+
temperature?: number;
|
|
142
|
+
maxTokens?: number;
|
|
143
|
+
responseFormat?: {
|
|
144
|
+
type: string;
|
|
145
|
+
};
|
|
146
|
+
}) => BatchRequest[];
|
|
147
|
+
/**
|
|
148
|
+
* Create batch requests for document extraction
|
|
149
|
+
*/
|
|
150
|
+
createExtractionRequests: (documents: Array<{
|
|
151
|
+
document: any;
|
|
152
|
+
schema: any;
|
|
153
|
+
customId?: string;
|
|
154
|
+
}>, model?: string) => BatchRequest[];
|
|
155
|
+
/**
|
|
156
|
+
* Save batch requests to JSONL file
|
|
157
|
+
*/
|
|
158
|
+
saveBatchRequests: (requests: BatchRequest[], filePath: string) => Promise<void>;
|
|
159
|
+
/**
|
|
160
|
+
* Parse batch results from JSONL file
|
|
161
|
+
*/
|
|
162
|
+
parseBatchResults: (filePath: string) => Promise<BatchResponse[]>;
|
|
163
|
+
/**
|
|
164
|
+
* Extract successful results from batch responses
|
|
165
|
+
*/
|
|
166
|
+
extractSuccessfulResults: (responses: BatchResponse[]) => Array<{
|
|
167
|
+
customId: string;
|
|
168
|
+
result: any;
|
|
169
|
+
}>;
|
|
170
|
+
/**
|
|
171
|
+
* Extract failed results from batch responses
|
|
172
|
+
*/
|
|
173
|
+
extractFailedResults: (responses: BatchResponse[]) => Array<{
|
|
174
|
+
customId: string;
|
|
175
|
+
error: any;
|
|
176
|
+
}>;
|
|
177
|
+
};
|
|
178
|
+
declare const _default: {
|
|
179
|
+
OpenAIBatchProcessor: typeof OpenAIBatchProcessor;
|
|
180
|
+
batchUtils: {
|
|
181
|
+
/**
|
|
182
|
+
* Create batch requests for chat completions
|
|
183
|
+
*/
|
|
184
|
+
createChatCompletionRequests: (messages: Array<{
|
|
185
|
+
messages: any[];
|
|
186
|
+
customId?: string;
|
|
187
|
+
}>, model?: string, options?: {
|
|
188
|
+
temperature?: number;
|
|
189
|
+
maxTokens?: number;
|
|
190
|
+
responseFormat?: {
|
|
191
|
+
type: string;
|
|
192
|
+
};
|
|
193
|
+
}) => BatchRequest[];
|
|
194
|
+
/**
|
|
195
|
+
* Create batch requests for document extraction
|
|
196
|
+
*/
|
|
197
|
+
createExtractionRequests: (documents: Array<{
|
|
198
|
+
document: any;
|
|
199
|
+
schema: any;
|
|
200
|
+
customId?: string;
|
|
201
|
+
}>, model?: string) => BatchRequest[];
|
|
202
|
+
/**
|
|
203
|
+
* Save batch requests to JSONL file
|
|
204
|
+
*/
|
|
205
|
+
saveBatchRequests: (requests: BatchRequest[], filePath: string) => Promise<void>;
|
|
206
|
+
/**
|
|
207
|
+
* Parse batch results from JSONL file
|
|
208
|
+
*/
|
|
209
|
+
parseBatchResults: (filePath: string) => Promise<BatchResponse[]>;
|
|
210
|
+
/**
|
|
211
|
+
* Extract successful results from batch responses
|
|
212
|
+
*/
|
|
213
|
+
extractSuccessfulResults: (responses: BatchResponse[]) => Array<{
|
|
214
|
+
customId: string;
|
|
215
|
+
result: any;
|
|
216
|
+
}>;
|
|
217
|
+
/**
|
|
218
|
+
* Extract failed results from batch responses
|
|
219
|
+
*/
|
|
220
|
+
extractFailedResults: (responses: BatchResponse[]) => Array<{
|
|
221
|
+
customId: string;
|
|
222
|
+
error: any;
|
|
223
|
+
}>;
|
|
224
|
+
};
|
|
225
|
+
};
|
|
226
|
+
export default _default;
|
|
227
|
+
//# sourceMappingURL=batch_processing.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"batch_processing.d.ts","sourceRoot":"","sources":["../../src/utils/batch_processing.ts"],"names":[],"mappings":"AAIA;;;GAGG;AAEH,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,GAAG,KAAK,GAAG,KAAK,GAAG,QAAQ,CAAC;IAC1C,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE;QACR,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KAC3B,CAAC;IACF,KAAK,CAAC,EAAE;QACN,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,OAAO,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE;QACP,MAAM,EAAE,MAAM,CAAC;QACf,IAAI,EAAE,KAAK,CAAC;YACV,IAAI,EAAE,MAAM,CAAC;YACb,OAAO,EAAE,MAAM,CAAC;YAChB,KAAK,CAAC,EAAE,MAAM,CAAC;YACf,IAAI,CAAC,EAAE,MAAM,CAAC;SACf,CAAC,CAAC;KACJ,CAAC;IACF,aAAa,EAAE,MAAM,CAAC;IACtB,iBAAiB,EAAE,KAAK,CAAC;IACzB,MAAM,EAAE,YAAY,GAAG,QAAQ,GAAG,aAAa,GAAG,YAAY,GAAG,WAAW,GAAG,SAAS,GAAG,YAAY,GAAG,WAAW,CAAC;IACtH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE;QACd,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;IACF,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,CAAC,EAAE,KAAK,CAAC;IACzB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE;QACR,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,CAAC,EAAE,MAAM,CAAC;CACjC;AAED;;GAEG;AACH,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,OAAO,CAAS;gBACZ,OAAO,EAAE,sBAAsB;IAM3C;;OAEG;IACG,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,GAAE,OAAiB,GAAG,OAAO,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IA0BzG;;OAEG;IACG,WAAW,CACf,WAAW,EAAE,MAAM,EACnB,QAAQ,EAAE,MAAM,EAChB,gBAAgB,GAAE,KAAa,EAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAChC,OAAO,CAAC,QAAQ,CAAC;IAqBpB;;OAEG;IACG,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC;IAWlD;;OAEG;IACG,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC;IAgBrD;;OAEG;IACG,WAAW,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,KAAK,GAAE,MAAW,GAAG,OAAO,CAAC;QAAE,IAAI,EAAE,QAAQ,EAAE,CAAA;KAAE,CAAC;IAepF;;OAEG;IACG,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAWnD;;OAEG;IACI,YAAY,CAAC,OAAO,EAAE,MAAM,EAAE,YAAY,GAAE,MAAc,GAAG,cAAc,CAAC,iBAAiB,EAAE,IAAI,EAAE,OAAO,CAAC;IA0CpH;;OAEG;IACG,YAAY,CAChB,gBAAgB,EAAE,MAAM,EACxB,cAAc,EAAE,MAAM,EACtB,QAAQ,GAAE,MAA+B,EACzC,OAAO,GAAE;QACP,gBAAgB,CAAC,EAAE,KAAK,CAAC;QACzB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAClC,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,YAAY,CAAC,EAAE,OAAO,CAAC;KACnB,GACL,OAAO,CAAC,QAAQ,CAAC;CAuDrB;AAED;;GAEG;AACH,eAAO,MAAM,UAAU;IACrB;;OAEG;6CAES,KAAK,CAAC;QAAE,QAAQ,EAAE,GAAG,EAAE,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,UAChD,MAAM,YACJ;QACP,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,cAAc,CAAC,EAAE;YAAE,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;KACnC,KACA,YAAY,EAAE;IAejB;;OAEG;0CAEU,KAAK,CAAC;QAAE,QAAQ,EAAE,GAAG,CAAC;QAAC,MAAM,EAAE,GAAG,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,UAC5D,MAAM,KACZ,YAAY,EAAE;IAajB;;OAEG;kCACiC,YAAY,EAAE,YAAY,MAAM,KAAG,OAAO,CAAC,IAAI,CAAC;IAKpF;;OAEG;kCACiC,MAAM,KAAG,OAAO,CAAC,aAAa,EAAE,CAAC;IAKrE;;OAEG;0CACmC,aAAa,EAAE,KAAG,KAAK,CAAC;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,GAAG,CAAA;KAAE,CAAC;IAShG;;OAEG;sCAC+B,aAAa,EAAE,KAAG,KAAK,CAAC;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,GAAG,CAAA;KAAE,CAAC;CAQ5F,CAAC;;;;QApFA;;WAEG;iDAES,KAAK,CAAC;YAAE,QAAQ,EAAE,GAAG,EAAE,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC,UAChD,MAAM,YACJ;YACP,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,SAAS,CAAC,EAAE,MAAM,CAAC;YACnB,cAAc,CAAC,EAAE;gBAAE,IAAI,EAAE,MAAM,CAAA;aAAE,CAAC;SACnC,KACA,YAAY,EAAE;QAejB;;WAEG;8CAEU,KAAK,CAAC;YAAE,QAAQ,EAAE,GAAG,CAAC;YAAC,MAAM,EAAE,GAAG,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC,UAC5D,MAAM,KACZ,YAAY,EAAE;QAajB;;WAEG;sCACiC,YAAY,EAAE,YAAY,MAAM,KAAG,OAAO,CAAC,IAAI,CAAC;QAKpF;;WAEG;sCACiC,MAAM,KAAG,OAAO,CAAC,aAAa,EAAE,CAAC;QAKrE;;WAEG;8CACmC,aAAa,EAAE,KAAG,KAAK,CAAC;YAAE,QAAQ,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,GAAG,CAAA;SAAE,CAAC;QAShG;;WAEG;0CAC+B,aAAa,EAAE,KAAG,KAAK,CAAC;YAAE,QAAQ,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,GAAG,CAAA;SAAE,CAAC;;;AAU7F,wBAGE"}
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import axios from 'axios';
|
|
3
|
+
import { readJSONL, writeJSONL } from './jsonl.js';
|
|
4
|
+
/**
|
|
5
|
+
* OpenAI Batch API client
|
|
6
|
+
*/
|
|
7
|
+
export class OpenAIBatchProcessor {
|
|
8
|
+
constructor(options) {
|
|
9
|
+
this.apiKey = options.apiKey;
|
|
10
|
+
this.baseUrl = options.baseUrl || 'https://api.openai.com/v1';
|
|
11
|
+
this.timeout = options.timeout || 300000; // 5 minutes
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Upload file for batch processing
|
|
15
|
+
*/
|
|
16
|
+
async uploadFile(filePath, purpose = 'batch') {
|
|
17
|
+
if (!fs.existsSync(filePath)) {
|
|
18
|
+
throw new Error(`File not found: ${filePath}`);
|
|
19
|
+
}
|
|
20
|
+
const formData = new FormData();
|
|
21
|
+
const fileBuffer = fs.readFileSync(filePath);
|
|
22
|
+
const blob = new Blob([fileBuffer]);
|
|
23
|
+
formData.append('file', blob, filePath.split('/').pop());
|
|
24
|
+
formData.append('purpose', purpose);
|
|
25
|
+
const response = await axios.post(`${this.baseUrl}/files`, formData, {
|
|
26
|
+
headers: {
|
|
27
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
28
|
+
'Content-Type': 'multipart/form-data',
|
|
29
|
+
},
|
|
30
|
+
timeout: this.timeout,
|
|
31
|
+
});
|
|
32
|
+
return {
|
|
33
|
+
id: response.data.id,
|
|
34
|
+
filename: response.data.filename,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Create batch job
|
|
39
|
+
*/
|
|
40
|
+
async createBatch(inputFileId, endpoint, completionWindow = '24h', metadata) {
|
|
41
|
+
const response = await axios.post(`${this.baseUrl}/batches`, {
|
|
42
|
+
input_file_id: inputFileId,
|
|
43
|
+
endpoint,
|
|
44
|
+
completion_window: completionWindow,
|
|
45
|
+
metadata,
|
|
46
|
+
}, {
|
|
47
|
+
headers: {
|
|
48
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
49
|
+
'Content-Type': 'application/json',
|
|
50
|
+
},
|
|
51
|
+
timeout: this.timeout,
|
|
52
|
+
});
|
|
53
|
+
return response.data;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Get batch job status
|
|
57
|
+
*/
|
|
58
|
+
async getBatch(batchId) {
|
|
59
|
+
const response = await axios.get(`${this.baseUrl}/batches/${batchId}`, {
|
|
60
|
+
headers: {
|
|
61
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
62
|
+
},
|
|
63
|
+
timeout: this.timeout,
|
|
64
|
+
});
|
|
65
|
+
return response.data;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Cancel batch job
|
|
69
|
+
*/
|
|
70
|
+
async cancelBatch(batchId) {
|
|
71
|
+
const response = await axios.post(`${this.baseUrl}/batches/${batchId}/cancel`, {}, {
|
|
72
|
+
headers: {
|
|
73
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
74
|
+
'Content-Type': 'application/json',
|
|
75
|
+
},
|
|
76
|
+
timeout: this.timeout,
|
|
77
|
+
});
|
|
78
|
+
return response.data;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* List batch jobs
|
|
82
|
+
*/
|
|
83
|
+
async listBatches(after, limit = 20) {
|
|
84
|
+
const params = new URLSearchParams();
|
|
85
|
+
if (after)
|
|
86
|
+
params.append('after', after);
|
|
87
|
+
params.append('limit', limit.toString());
|
|
88
|
+
const response = await axios.get(`${this.baseUrl}/batches?${params}`, {
|
|
89
|
+
headers: {
|
|
90
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
91
|
+
},
|
|
92
|
+
timeout: this.timeout,
|
|
93
|
+
});
|
|
94
|
+
return response.data;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Download file content
|
|
98
|
+
*/
|
|
99
|
+
async downloadFile(fileId) {
|
|
100
|
+
const response = await axios.get(`${this.baseUrl}/files/${fileId}/content`, {
|
|
101
|
+
headers: {
|
|
102
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
103
|
+
},
|
|
104
|
+
timeout: this.timeout,
|
|
105
|
+
});
|
|
106
|
+
return response.data;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Monitor batch job progress
|
|
110
|
+
*/
|
|
111
|
+
async *monitorBatch(batchId, pollInterval = 30000) {
|
|
112
|
+
const startTime = Date.now();
|
|
113
|
+
let lastProgress = 0;
|
|
114
|
+
while (true) {
|
|
115
|
+
const batch = await this.getBatch(batchId);
|
|
116
|
+
const timeElapsed = Date.now() - startTime;
|
|
117
|
+
const progress = {
|
|
118
|
+
total: batch.request_counts.total,
|
|
119
|
+
completed: batch.request_counts.completed,
|
|
120
|
+
failed: batch.request_counts.failed,
|
|
121
|
+
percentage: batch.request_counts.total > 0 ?
|
|
122
|
+
(batch.request_counts.completed / batch.request_counts.total) * 100 : 0,
|
|
123
|
+
};
|
|
124
|
+
// Estimate time remaining
|
|
125
|
+
let estimatedTimeRemaining;
|
|
126
|
+
if (progress.completed > lastProgress && progress.completed > 0) {
|
|
127
|
+
const completionRate = progress.completed / timeElapsed;
|
|
128
|
+
const remaining = progress.total - progress.completed;
|
|
129
|
+
estimatedTimeRemaining = remaining / completionRate;
|
|
130
|
+
}
|
|
131
|
+
yield {
|
|
132
|
+
jobId: batchId,
|
|
133
|
+
status: batch.status,
|
|
134
|
+
progress,
|
|
135
|
+
timeElapsed,
|
|
136
|
+
estimatedTimeRemaining,
|
|
137
|
+
};
|
|
138
|
+
// Break if job is complete
|
|
139
|
+
if (['completed', 'failed', 'cancelled', 'expired'].includes(batch.status)) {
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
lastProgress = progress.completed;
|
|
143
|
+
await new Promise(resolve => setTimeout(resolve, pollInterval));
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Process batch end-to-end
|
|
148
|
+
*/
|
|
149
|
+
async processBatch(requestsFilePath, outputFilePath, endpoint = '/v1/chat/completions', options = {}) {
|
|
150
|
+
const { completionWindow = '24h', metadata, pollInterval = 30000, showProgress = true, } = options;
|
|
151
|
+
console.log('🚀 Starting batch processing...');
|
|
152
|
+
// Upload input file
|
|
153
|
+
console.log('📤 Uploading input file...');
|
|
154
|
+
const uploadResult = await this.uploadFile(requestsFilePath);
|
|
155
|
+
console.log(`✅ File uploaded: ${uploadResult.id}`);
|
|
156
|
+
// Create batch job
|
|
157
|
+
console.log('⚙️ Creating batch job...');
|
|
158
|
+
const batch = await this.createBatch(uploadResult.id, endpoint, completionWindow, metadata);
|
|
159
|
+
console.log(`✅ Batch created: ${batch.id}`);
|
|
160
|
+
// Monitor progress
|
|
161
|
+
if (showProgress) {
|
|
162
|
+
console.log('📊 Monitoring batch progress...');
|
|
163
|
+
for await (const progress of this.monitorBatch(batch.id, pollInterval)) {
|
|
164
|
+
const { percentage, completed, total } = progress.progress;
|
|
165
|
+
const timeStr = `${Math.round(progress.timeElapsed / 1000)}s`;
|
|
166
|
+
const etaStr = progress.estimatedTimeRemaining ?
|
|
167
|
+
` (ETA: ${Math.round(progress.estimatedTimeRemaining / 1000)}s)` : '';
|
|
168
|
+
console.log(` Status: ${progress.status} - ${percentage.toFixed(1)}% (${completed}/${total}) - ${timeStr}${etaStr}`);
|
|
169
|
+
if (['completed', 'failed', 'cancelled', 'expired'].includes(progress.status)) {
|
|
170
|
+
break;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// Get final batch status
|
|
175
|
+
const finalBatch = await this.getBatch(batch.id);
|
|
176
|
+
if (finalBatch.status === 'completed' && finalBatch.output_file_id) {
|
|
177
|
+
console.log('📥 Downloading results...');
|
|
178
|
+
const results = await this.downloadFile(finalBatch.output_file_id);
|
|
179
|
+
fs.writeFileSync(outputFilePath, results);
|
|
180
|
+
console.log(`✅ Results saved to: ${outputFilePath}`);
|
|
181
|
+
}
|
|
182
|
+
else {
|
|
183
|
+
console.error(`❌ Batch failed with status: ${finalBatch.status}`);
|
|
184
|
+
if (finalBatch.error_file_id) {
|
|
185
|
+
const errors = await this.downloadFile(finalBatch.error_file_id);
|
|
186
|
+
console.error('Error details:', errors);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
return finalBatch;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Utility functions for batch processing
|
|
194
|
+
*/
|
|
195
|
+
export const batchUtils = {
|
|
196
|
+
/**
|
|
197
|
+
* Create batch requests for chat completions
|
|
198
|
+
*/
|
|
199
|
+
createChatCompletionRequests: (messages, model = 'gpt-4o-mini', options = {}) => {
|
|
200
|
+
return messages.map((msg, index) => ({
|
|
201
|
+
custom_id: msg.customId || `request_${index}`,
|
|
202
|
+
method: 'POST',
|
|
203
|
+
url: '/v1/chat/completions',
|
|
204
|
+
body: {
|
|
205
|
+
model,
|
|
206
|
+
messages: msg.messages,
|
|
207
|
+
temperature: options.temperature || 0.0,
|
|
208
|
+
max_tokens: options.maxTokens,
|
|
209
|
+
response_format: options.responseFormat,
|
|
210
|
+
},
|
|
211
|
+
}));
|
|
212
|
+
},
|
|
213
|
+
/**
|
|
214
|
+
* Create batch requests for document extraction
|
|
215
|
+
*/
|
|
216
|
+
createExtractionRequests: (documents, model = 'gpt-4o-mini') => {
|
|
217
|
+
return documents.map((doc, index) => ({
|
|
218
|
+
custom_id: doc.customId || `extraction_${index}`,
|
|
219
|
+
method: 'POST',
|
|
220
|
+
url: '/v1/documents/extractions',
|
|
221
|
+
body: {
|
|
222
|
+
json_schema: doc.schema,
|
|
223
|
+
documents: [doc.document],
|
|
224
|
+
model,
|
|
225
|
+
},
|
|
226
|
+
}));
|
|
227
|
+
},
|
|
228
|
+
/**
|
|
229
|
+
* Save batch requests to JSONL file
|
|
230
|
+
*/
|
|
231
|
+
saveBatchRequests: async (requests, filePath) => {
|
|
232
|
+
await writeJSONL(filePath, requests);
|
|
233
|
+
console.log(`📄 Saved ${requests.length} batch requests to ${filePath}`);
|
|
234
|
+
},
|
|
235
|
+
/**
|
|
236
|
+
* Parse batch results from JSONL file
|
|
237
|
+
*/
|
|
238
|
+
parseBatchResults: async (filePath) => {
|
|
239
|
+
const results = await readJSONL(filePath);
|
|
240
|
+
return results;
|
|
241
|
+
},
|
|
242
|
+
/**
|
|
243
|
+
* Extract successful results from batch responses
|
|
244
|
+
*/
|
|
245
|
+
extractSuccessfulResults: (responses) => {
|
|
246
|
+
return responses
|
|
247
|
+
.filter(response => !response.error && response.response.status_code === 200)
|
|
248
|
+
.map(response => ({
|
|
249
|
+
customId: response.custom_id,
|
|
250
|
+
result: response.response.body,
|
|
251
|
+
}));
|
|
252
|
+
},
|
|
253
|
+
/**
|
|
254
|
+
* Extract failed results from batch responses
|
|
255
|
+
*/
|
|
256
|
+
extractFailedResults: (responses) => {
|
|
257
|
+
return responses
|
|
258
|
+
.filter(response => response.error || response.response.status_code !== 200)
|
|
259
|
+
.map(response => ({
|
|
260
|
+
customId: response.custom_id,
|
|
261
|
+
error: response.error || { code: 'http_error', message: `HTTP ${response.response.status_code}` },
|
|
262
|
+
}));
|
|
263
|
+
},
|
|
264
|
+
};
|
|
265
|
+
export default {
|
|
266
|
+
OpenAIBatchProcessor,
|
|
267
|
+
batchUtils,
|
|
268
|
+
};
|