ai-functions 0.2.19 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -0
- package/.turbo/turbo-test.log +105 -0
- package/README.md +232 -37
- package/TODO.md +138 -0
- package/dist/ai-promise.d.ts +219 -0
- package/dist/ai-promise.d.ts.map +1 -0
- package/dist/ai-promise.js +610 -0
- package/dist/ai-promise.js.map +1 -0
- package/dist/ai.d.ts +285 -0
- package/dist/ai.d.ts.map +1 -0
- package/dist/ai.js +842 -0
- package/dist/ai.js.map +1 -0
- package/dist/batch/anthropic.d.ts +23 -0
- package/dist/batch/anthropic.d.ts.map +1 -0
- package/dist/batch/anthropic.js +257 -0
- package/dist/batch/anthropic.js.map +1 -0
- package/dist/batch/bedrock.d.ts +64 -0
- package/dist/batch/bedrock.d.ts.map +1 -0
- package/dist/batch/bedrock.js +586 -0
- package/dist/batch/bedrock.js.map +1 -0
- package/dist/batch/cloudflare.d.ts +37 -0
- package/dist/batch/cloudflare.d.ts.map +1 -0
- package/dist/batch/cloudflare.js +289 -0
- package/dist/batch/cloudflare.js.map +1 -0
- package/dist/batch/google.d.ts +41 -0
- package/dist/batch/google.d.ts.map +1 -0
- package/dist/batch/google.js +360 -0
- package/dist/batch/google.js.map +1 -0
- package/dist/batch/index.d.ts +31 -0
- package/dist/batch/index.d.ts.map +1 -0
- package/dist/batch/index.js +31 -0
- package/dist/batch/index.js.map +1 -0
- package/dist/batch/memory.d.ts +44 -0
- package/dist/batch/memory.d.ts.map +1 -0
- package/dist/batch/memory.js +188 -0
- package/dist/batch/memory.js.map +1 -0
- package/dist/batch/openai.d.ts +37 -0
- package/dist/batch/openai.d.ts.map +1 -0
- package/dist/batch/openai.js +403 -0
- package/dist/batch/openai.js.map +1 -0
- package/dist/batch-map.d.ts +125 -0
- package/dist/batch-map.d.ts.map +1 -0
- package/dist/batch-map.js +406 -0
- package/dist/batch-map.js.map +1 -0
- package/dist/batch-queue.d.ts +273 -0
- package/dist/batch-queue.d.ts.map +1 -0
- package/dist/batch-queue.js +271 -0
- package/dist/batch-queue.js.map +1 -0
- package/dist/context.d.ts +133 -0
- package/dist/context.d.ts.map +1 -0
- package/dist/context.js +267 -0
- package/dist/context.js.map +1 -0
- package/dist/embeddings.d.ts +123 -0
- package/dist/embeddings.d.ts.map +1 -0
- package/dist/embeddings.js +170 -0
- package/dist/embeddings.js.map +1 -0
- package/dist/eval/index.d.ts +8 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +8 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/models.d.ts +66 -0
- package/dist/eval/models.d.ts.map +1 -0
- package/dist/eval/models.js +120 -0
- package/dist/eval/models.js.map +1 -0
- package/dist/eval/runner.d.ts +64 -0
- package/dist/eval/runner.d.ts.map +1 -0
- package/dist/eval/runner.js +148 -0
- package/dist/eval/runner.js.map +1 -0
- package/dist/generate.d.ts +168 -0
- package/dist/generate.d.ts.map +1 -0
- package/dist/generate.js +174 -0
- package/dist/generate.js.map +1 -0
- package/dist/index.d.ts +30 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +54 -0
- package/dist/index.js.map +1 -0
- package/dist/primitives.d.ts +292 -0
- package/dist/primitives.d.ts.map +1 -0
- package/dist/primitives.js +471 -0
- package/dist/primitives.js.map +1 -0
- package/dist/providers/cloudflare.d.ts +9 -0
- package/dist/providers/cloudflare.d.ts.map +1 -0
- package/dist/providers/cloudflare.js +9 -0
- package/dist/providers/cloudflare.js.map +1 -0
- package/dist/providers/index.d.ts +9 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +9 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/schema.d.ts +54 -0
- package/dist/schema.d.ts.map +1 -0
- package/dist/schema.js +109 -0
- package/dist/schema.js.map +1 -0
- package/dist/template.d.ts +73 -0
- package/dist/template.d.ts.map +1 -0
- package/dist/template.js +129 -0
- package/dist/template.js.map +1 -0
- package/dist/types.d.ts +481 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +5 -0
- package/dist/types.js.map +1 -0
- package/evalite.config.ts +19 -0
- package/evals/README.md +212 -0
- package/evals/classification.eval.ts +108 -0
- package/evals/marketing.eval.ts +370 -0
- package/evals/math.eval.ts +94 -0
- package/evals/run-evals.ts +166 -0
- package/evals/structured-output.eval.ts +143 -0
- package/evals/writing.eval.ts +117 -0
- package/examples/batch-blog-posts.ts +160 -0
- package/package.json +59 -43
- package/src/ai-promise.ts +784 -0
- package/src/ai.ts +1183 -0
- package/src/batch/anthropic.ts +375 -0
- package/src/batch/bedrock.ts +801 -0
- package/src/batch/cloudflare.ts +421 -0
- package/src/batch/google.ts +491 -0
- package/src/batch/index.ts +31 -0
- package/src/batch/memory.ts +253 -0
- package/src/batch/openai.ts +557 -0
- package/src/batch-map.ts +534 -0
- package/src/batch-queue.ts +493 -0
- package/src/context.ts +332 -0
- package/src/embeddings.ts +244 -0
- package/src/eval/index.ts +8 -0
- package/src/eval/models.ts +158 -0
- package/src/eval/runner.ts +217 -0
- package/src/generate.ts +245 -0
- package/src/index.ts +154 -0
- package/src/primitives.ts +612 -0
- package/src/providers/cloudflare.ts +15 -0
- package/src/providers/index.ts +14 -0
- package/src/schema.ts +147 -0
- package/src/template.ts +209 -0
- package/src/types.ts +540 -0
- package/test/README.md +105 -0
- package/test/ai-proxy.test.ts +192 -0
- package/test/async-iterators.test.ts +327 -0
- package/test/batch-background.test.ts +482 -0
- package/test/batch-blog-posts.test.ts +387 -0
- package/test/blog-generation.test.ts +510 -0
- package/test/browse-read.test.ts +611 -0
- package/test/core-functions.test.ts +694 -0
- package/test/decide.test.ts +393 -0
- package/test/define.test.ts +274 -0
- package/test/e2e-bedrock-manual.ts +163 -0
- package/test/e2e-bedrock.test.ts +191 -0
- package/test/e2e-flex-gateway.ts +157 -0
- package/test/e2e-flex-manual.ts +183 -0
- package/test/e2e-flex.test.ts +209 -0
- package/test/e2e-google-manual.ts +178 -0
- package/test/e2e-google.test.ts +216 -0
- package/test/embeddings.test.ts +284 -0
- package/test/evals/define-function.eval.test.ts +379 -0
- package/test/evals/primitives.eval.test.ts +384 -0
- package/test/function-types.test.ts +492 -0
- package/test/generate-core.test.ts +319 -0
- package/test/generate.test.ts +163 -0
- package/test/implicit-batch.test.ts +422 -0
- package/test/schema.test.ts +109 -0
- package/test/tagged-templates.test.ts +302 -0
- package/tsconfig.json +8 -6
- package/vitest.config.ts +42 -0
- package/LICENSE +0 -21
- package/db/cache.ts +0 -6
- package/db/mongo.ts +0 -75
- package/dist/mjs/db/cache.d.ts +0 -1
- package/dist/mjs/db/cache.js +0 -5
- package/dist/mjs/db/mongo.d.ts +0 -31
- package/dist/mjs/db/mongo.js +0 -48
- package/dist/mjs/examples/data.d.ts +0 -1105
- package/dist/mjs/examples/data.js +0 -1105
- package/dist/mjs/functions/ai.d.ts +0 -20
- package/dist/mjs/functions/ai.js +0 -83
- package/dist/mjs/functions/ai.test.d.ts +0 -1
- package/dist/mjs/functions/ai.test.js +0 -29
- package/dist/mjs/functions/gpt.d.ts +0 -4
- package/dist/mjs/functions/gpt.js +0 -10
- package/dist/mjs/functions/list.d.ts +0 -7
- package/dist/mjs/functions/list.js +0 -72
- package/dist/mjs/index.d.ts +0 -3
- package/dist/mjs/index.js +0 -3
- package/dist/mjs/queue/kafka.d.ts +0 -0
- package/dist/mjs/queue/kafka.js +0 -1
- package/dist/mjs/queue/memory.d.ts +0 -0
- package/dist/mjs/queue/memory.js +0 -1
- package/dist/mjs/queue/mongo.d.ts +0 -30
- package/dist/mjs/queue/mongo.js +0 -42
- package/dist/mjs/streams/kafka.d.ts +0 -0
- package/dist/mjs/streams/kafka.js +0 -1
- package/dist/mjs/streams/memory.d.ts +0 -0
- package/dist/mjs/streams/memory.js +0 -1
- package/dist/mjs/streams/mongo.d.ts +0 -0
- package/dist/mjs/streams/mongo.js +0 -1
- package/dist/mjs/streams/types.d.ts +0 -0
- package/dist/mjs/streams/types.js +0 -1
- package/dist/mjs/types.d.ts +0 -11
- package/dist/mjs/types.js +0 -1
- package/dist/mjs/utils/completion.d.ts +0 -9
- package/dist/mjs/utils/completion.js +0 -20
- package/dist/mjs/utils/schema.d.ts +0 -10
- package/dist/mjs/utils/schema.js +0 -72
- package/dist/mjs/utils/schema.test.d.ts +0 -1
- package/dist/mjs/utils/schema.test.js +0 -60
- package/dist/mjs/utils/state.d.ts +0 -1
- package/dist/mjs/utils/state.js +0 -19
- package/examples/data.ts +0 -1105
- package/fixup +0 -11
- package/functions/ai.test.ts +0 -41
- package/functions/ai.ts +0 -115
- package/functions/gpt.ts +0 -12
- package/functions/list.ts +0 -84
- package/index.ts +0 -3
- package/queue/kafka.ts +0 -0
- package/queue/memory.ts +0 -0
- package/queue/mongo.ts +0 -88
- package/streams/kafka.ts +0 -0
- package/streams/memory.ts +0 -0
- package/streams/mongo.ts +0 -0
- package/streams/types.ts +0 -0
- package/tsconfig-backup.json +0 -105
- package/tsconfig-base.json +0 -26
- package/tsconfig-cjs.json +0 -8
- package/types.ts +0 -12
- package/utils/completion.ts +0 -28
- package/utils/schema.test.ts +0 -69
- package/utils/schema.ts +0 -74
- package/utils/state.ts +0 -23
|
@@ -0,0 +1,801 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AWS Bedrock Batch Inference Adapter
|
|
3
|
+
*
|
|
4
|
+
* Implements batch processing using AWS Bedrock's batch inference API.
|
|
5
|
+
* Bedrock batch inference provides cost-effective processing for large workloads.
|
|
6
|
+
*
|
|
7
|
+
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/batch-inference.html
|
|
8
|
+
*
|
|
9
|
+
* @packageDocumentation
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import {
|
|
13
|
+
registerBatchAdapter,
|
|
14
|
+
registerFlexAdapter,
|
|
15
|
+
type BatchAdapter,
|
|
16
|
+
type FlexAdapter,
|
|
17
|
+
type BatchItem,
|
|
18
|
+
type BatchJob,
|
|
19
|
+
type BatchQueueOptions,
|
|
20
|
+
type BatchResult,
|
|
21
|
+
type BatchSubmitResult,
|
|
22
|
+
type BatchStatus,
|
|
23
|
+
} from '../batch-queue.js'
|
|
24
|
+
import { schema as convertSchema } from '../schema.js'
|
|
25
|
+
|
|
26
|
+
// ============================================================================
|
|
27
|
+
// Types
|
|
28
|
+
// ============================================================================
|
|
29
|
+
|
|
30
|
+
interface BedrockBatchRequest {
|
|
31
|
+
recordId: string
|
|
32
|
+
modelInput: {
|
|
33
|
+
anthropic_version?: string
|
|
34
|
+
max_tokens: number
|
|
35
|
+
messages: Array<{ role: string; content: string }>
|
|
36
|
+
system?: string
|
|
37
|
+
temperature?: number
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
interface BedrockBatchResponse {
|
|
42
|
+
recordId: string
|
|
43
|
+
modelOutput?: {
|
|
44
|
+
content: Array<{ type: string; text?: string }>
|
|
45
|
+
usage: {
|
|
46
|
+
input_tokens: number
|
|
47
|
+
output_tokens: number
|
|
48
|
+
}
|
|
49
|
+
stop_reason: string
|
|
50
|
+
}
|
|
51
|
+
error?: {
|
|
52
|
+
errorCode: string
|
|
53
|
+
errorMessage: string
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
interface BedrockBatchJobStatus {
|
|
58
|
+
jobArn: string
|
|
59
|
+
jobName: string
|
|
60
|
+
status: 'Submitted' | 'InProgress' | 'Completed' | 'Failed' | 'Stopping' | 'Stopped'
|
|
61
|
+
modelId: string
|
|
62
|
+
inputDataConfig: {
|
|
63
|
+
s3InputDataConfig: {
|
|
64
|
+
s3Uri: string
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
outputDataConfig: {
|
|
68
|
+
s3OutputDataConfig: {
|
|
69
|
+
s3Uri: string
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
creationTime: string
|
|
73
|
+
lastModifiedTime: string
|
|
74
|
+
endTime?: string
|
|
75
|
+
failureMessage?: string
|
|
76
|
+
statistics?: {
|
|
77
|
+
inputRecordCount: number
|
|
78
|
+
outputRecordCount: number
|
|
79
|
+
errorCount: number
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// ============================================================================
|
|
84
|
+
// AWS Configuration
|
|
85
|
+
// ============================================================================
|
|
86
|
+
|
|
87
|
+
let awsRegion: string | undefined
|
|
88
|
+
let awsAccessKeyId: string | undefined
|
|
89
|
+
let awsSecretAccessKey: string | undefined
|
|
90
|
+
let awsSessionToken: string | undefined
|
|
91
|
+
let s3Bucket: string | undefined
|
|
92
|
+
let roleArn: string | undefined
|
|
93
|
+
|
|
94
|
+
// AI Gateway configuration (optional - for routing through Cloudflare AI Gateway)
|
|
95
|
+
let gatewayUrl: string | undefined
|
|
96
|
+
let gatewayToken: string | undefined
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Configure AWS credentials and settings
|
|
100
|
+
*/
|
|
101
|
+
export function configureAWSBedrock(options: {
|
|
102
|
+
region?: string
|
|
103
|
+
accessKeyId?: string
|
|
104
|
+
secretAccessKey?: string
|
|
105
|
+
sessionToken?: string
|
|
106
|
+
s3Bucket?: string
|
|
107
|
+
roleArn?: string
|
|
108
|
+
/** Optional: Cloudflare AI Gateway URL for routing requests */
|
|
109
|
+
gatewayUrl?: string
|
|
110
|
+
/** Optional: Cloudflare AI Gateway token */
|
|
111
|
+
gatewayToken?: string
|
|
112
|
+
}): void {
|
|
113
|
+
if (options.region) awsRegion = options.region
|
|
114
|
+
if (options.accessKeyId) awsAccessKeyId = options.accessKeyId
|
|
115
|
+
if (options.secretAccessKey) awsSecretAccessKey = options.secretAccessKey
|
|
116
|
+
if (options.sessionToken) awsSessionToken = options.sessionToken
|
|
117
|
+
if (options.s3Bucket) s3Bucket = options.s3Bucket
|
|
118
|
+
if (options.roleArn) roleArn = options.roleArn
|
|
119
|
+
if (options.gatewayUrl) gatewayUrl = options.gatewayUrl
|
|
120
|
+
if (options.gatewayToken) gatewayToken = options.gatewayToken
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function getConfig() {
|
|
124
|
+
const region = awsRegion || process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || 'us-east-1'
|
|
125
|
+
const accessKeyId = awsAccessKeyId || process.env.AWS_ACCESS_KEY_ID
|
|
126
|
+
const secretAccessKey = awsSecretAccessKey || process.env.AWS_SECRET_ACCESS_KEY
|
|
127
|
+
const sessionToken = awsSessionToken || process.env.AWS_SESSION_TOKEN
|
|
128
|
+
const bucket = s3Bucket || process.env.BEDROCK_BATCH_S3_BUCKET
|
|
129
|
+
const role = roleArn || process.env.BEDROCK_BATCH_ROLE_ARN
|
|
130
|
+
|
|
131
|
+
// Check for AI Gateway configuration
|
|
132
|
+
const gwUrl = gatewayUrl || process.env.AI_GATEWAY_URL
|
|
133
|
+
const gwToken = gatewayToken || process.env.AI_GATEWAY_TOKEN
|
|
134
|
+
|
|
135
|
+
// If using gateway, we don't need AWS credentials
|
|
136
|
+
if (gwUrl && gwToken) {
|
|
137
|
+
return {
|
|
138
|
+
region,
|
|
139
|
+
accessKeyId: accessKeyId || '',
|
|
140
|
+
secretAccessKey: secretAccessKey || '',
|
|
141
|
+
sessionToken,
|
|
142
|
+
bucket: bucket || '',
|
|
143
|
+
role,
|
|
144
|
+
gatewayUrl: gwUrl,
|
|
145
|
+
gatewayToken: gwToken,
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (!accessKeyId || !secretAccessKey) {
|
|
150
|
+
throw new Error('AWS credentials not configured. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY, or use AI_GATEWAY_URL and AI_GATEWAY_TOKEN')
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
if (!bucket) {
|
|
154
|
+
throw new Error('S3 bucket for Bedrock batch not configured. Set BEDROCK_BATCH_S3_BUCKET')
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return { region, accessKeyId, secretAccessKey, sessionToken, bucket, role, gatewayUrl: undefined, gatewayToken: undefined }
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ============================================================================
|
|
161
|
+
// AWS Signature V4 (Simplified)
|
|
162
|
+
// ============================================================================
|
|
163
|
+
|
|
164
|
+
async function signRequest(
|
|
165
|
+
method: string,
|
|
166
|
+
url: string,
|
|
167
|
+
body: string,
|
|
168
|
+
config: ReturnType<typeof getConfig>,
|
|
169
|
+
service: string
|
|
170
|
+
): Promise<Headers> {
|
|
171
|
+
// In production, use @aws-sdk/signature-v4 or similar
|
|
172
|
+
// This is a simplified implementation for demonstration
|
|
173
|
+
|
|
174
|
+
const headers = new Headers({
|
|
175
|
+
'Content-Type': 'application/json',
|
|
176
|
+
'X-Amz-Date': new Date().toISOString().replace(/[:-]|\.\d{3}/g, ''),
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
if (config.sessionToken) {
|
|
180
|
+
headers.set('X-Amz-Security-Token', config.sessionToken)
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// For actual implementation, compute proper AWS Signature V4
|
|
184
|
+
// This requires crypto operations that vary by environment
|
|
185
|
+
|
|
186
|
+
// Fallback: Use AWS SDK if available
|
|
187
|
+
try {
|
|
188
|
+
// Dynamic import to avoid build-time dependency
|
|
189
|
+
// @ts-expect-error - Optional dependency
|
|
190
|
+
const signatureV4Module = await import('@smithy/signature-v4')
|
|
191
|
+
// @ts-expect-error - Optional dependency
|
|
192
|
+
const sha256Module = await import('@aws-crypto/sha256-js')
|
|
193
|
+
|
|
194
|
+
const SignatureV4 = signatureV4Module.SignatureV4
|
|
195
|
+
const Sha256 = sha256Module.Sha256
|
|
196
|
+
|
|
197
|
+
const signer = new SignatureV4({
|
|
198
|
+
service,
|
|
199
|
+
region: config.region,
|
|
200
|
+
credentials: {
|
|
201
|
+
accessKeyId: config.accessKeyId,
|
|
202
|
+
secretAccessKey: config.secretAccessKey,
|
|
203
|
+
sessionToken: config.sessionToken,
|
|
204
|
+
},
|
|
205
|
+
sha256: Sha256,
|
|
206
|
+
})
|
|
207
|
+
|
|
208
|
+
const signedRequest = await signer.sign({
|
|
209
|
+
method,
|
|
210
|
+
headers: Object.fromEntries(headers.entries()),
|
|
211
|
+
hostname: new URL(url).hostname,
|
|
212
|
+
path: new URL(url).pathname,
|
|
213
|
+
body,
|
|
214
|
+
})
|
|
215
|
+
|
|
216
|
+
return new Headers(signedRequest.headers as Record<string, string>)
|
|
217
|
+
} catch {
|
|
218
|
+
// AWS SDK not available - return basic headers
|
|
219
|
+
// In production, the SDK should always be available
|
|
220
|
+
console.warn('AWS SDK not available for request signing. Install @smithy/signature-v4 and @aws-crypto/sha256-js')
|
|
221
|
+
return headers
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// ============================================================================
|
|
226
|
+
// In-memory job tracking
|
|
227
|
+
// ============================================================================
|
|
228
|
+
|
|
229
|
+
const pendingJobs = new Map<string, {
|
|
230
|
+
items: BatchItem[]
|
|
231
|
+
options: BatchQueueOptions
|
|
232
|
+
jobArn?: string
|
|
233
|
+
results: BatchResult[]
|
|
234
|
+
status: BatchStatus
|
|
235
|
+
createdAt: Date
|
|
236
|
+
completedAt?: Date
|
|
237
|
+
}>()
|
|
238
|
+
|
|
239
|
+
let jobCounter = 0
|
|
240
|
+
|
|
241
|
+
// ============================================================================
|
|
242
|
+
// Bedrock Batch Adapter
|
|
243
|
+
// ============================================================================
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* AWS Bedrock batch adapter
|
|
247
|
+
*
|
|
248
|
+
* Bedrock batch inference:
|
|
249
|
+
* 1. Uploads input JSONL to S3
|
|
250
|
+
* 2. Creates a batch inference job
|
|
251
|
+
* 3. Results are written to S3
|
|
252
|
+
* 4. Download and parse results
|
|
253
|
+
*
|
|
254
|
+
* Note: This requires S3 bucket access and proper IAM roles.
|
|
255
|
+
*/
|
|
256
|
+
const bedrockAdapter: BatchAdapter = {
|
|
257
|
+
async submit(items: BatchItem[], options: BatchQueueOptions): Promise<BatchSubmitResult> {
|
|
258
|
+
const config = getConfig()
|
|
259
|
+
const jobId = `bedrock_batch_${++jobCounter}_${Date.now()}`
|
|
260
|
+
|
|
261
|
+
// Default to Claude on Bedrock
|
|
262
|
+
const model = options.model || 'anthropic.claude-3-sonnet-20240229-v1:0'
|
|
263
|
+
|
|
264
|
+
// Store job state
|
|
265
|
+
pendingJobs.set(jobId, {
|
|
266
|
+
items,
|
|
267
|
+
options,
|
|
268
|
+
results: [],
|
|
269
|
+
status: 'pending',
|
|
270
|
+
createdAt: new Date(),
|
|
271
|
+
})
|
|
272
|
+
|
|
273
|
+
// For true Bedrock batch processing:
|
|
274
|
+
// 1. Create JSONL file with requests
|
|
275
|
+
// 2. Upload to S3
|
|
276
|
+
// 3. Create batch inference job via Bedrock API
|
|
277
|
+
// 4. Poll for completion
|
|
278
|
+
// 5. Download and parse results from S3
|
|
279
|
+
|
|
280
|
+
// For now, we implement a concurrent processing approach
|
|
281
|
+
// (similar to Cloudflare) that works without S3 setup
|
|
282
|
+
const completion = processBedrockRequestsConcurrently(jobId, items, config, model, options)
|
|
283
|
+
|
|
284
|
+
const job: BatchJob = {
|
|
285
|
+
id: jobId,
|
|
286
|
+
provider: 'bedrock',
|
|
287
|
+
status: 'pending',
|
|
288
|
+
totalItems: items.length,
|
|
289
|
+
completedItems: 0,
|
|
290
|
+
failedItems: 0,
|
|
291
|
+
createdAt: new Date(),
|
|
292
|
+
webhookUrl: options.webhookUrl,
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
return { job, completion }
|
|
296
|
+
},
|
|
297
|
+
|
|
298
|
+
async getStatus(batchId: string): Promise<BatchJob> {
|
|
299
|
+
const job = pendingJobs.get(batchId)
|
|
300
|
+
if (!job) {
|
|
301
|
+
throw new Error(`Batch not found: ${batchId}`)
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
const completedItems = job.results.filter((r) => r.status === 'completed').length
|
|
305
|
+
const failedItems = job.results.filter((r) => r.status === 'failed').length
|
|
306
|
+
|
|
307
|
+
return {
|
|
308
|
+
id: batchId,
|
|
309
|
+
provider: 'bedrock',
|
|
310
|
+
status: job.status,
|
|
311
|
+
totalItems: job.items.length,
|
|
312
|
+
completedItems,
|
|
313
|
+
failedItems,
|
|
314
|
+
createdAt: job.createdAt,
|
|
315
|
+
completedAt: job.completedAt,
|
|
316
|
+
}
|
|
317
|
+
},
|
|
318
|
+
|
|
319
|
+
async cancel(batchId: string): Promise<void> {
|
|
320
|
+
const job = pendingJobs.get(batchId)
|
|
321
|
+
if (job) {
|
|
322
|
+
job.status = 'cancelled'
|
|
323
|
+
|
|
324
|
+
// If we have a Bedrock job ARN, cancel it
|
|
325
|
+
if (job.jobArn) {
|
|
326
|
+
const config = getConfig()
|
|
327
|
+
const url = `https://bedrock.${config.region}.amazonaws.com/model-invocation-job/${encodeURIComponent(job.jobArn)}/stop`
|
|
328
|
+
|
|
329
|
+
try {
|
|
330
|
+
await fetch(url, {
|
|
331
|
+
method: 'POST',
|
|
332
|
+
headers: await signRequest('POST', url, '', config, 'bedrock'),
|
|
333
|
+
})
|
|
334
|
+
} catch (error) {
|
|
335
|
+
console.warn('Failed to cancel Bedrock job:', error)
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
},
|
|
340
|
+
|
|
341
|
+
async getResults(batchId: string): Promise<BatchResult[]> {
|
|
342
|
+
const job = pendingJobs.get(batchId)
|
|
343
|
+
if (!job) {
|
|
344
|
+
throw new Error(`Batch not found: ${batchId}`)
|
|
345
|
+
}
|
|
346
|
+
return job.results
|
|
347
|
+
},
|
|
348
|
+
|
|
349
|
+
async waitForCompletion(batchId: string, pollInterval = 5000): Promise<BatchResult[]> {
|
|
350
|
+
const job = pendingJobs.get(batchId)
|
|
351
|
+
if (!job) {
|
|
352
|
+
throw new Error(`Batch not found: ${batchId}`)
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
while (job.status !== 'completed' && job.status !== 'failed' && job.status !== 'cancelled') {
|
|
356
|
+
await new Promise((resolve) => setTimeout(resolve, pollInterval))
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
return job.results
|
|
360
|
+
},
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// ============================================================================
|
|
364
|
+
// Processing (Concurrent Mode)
|
|
365
|
+
// ============================================================================
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* Process Bedrock requests concurrently
|
|
369
|
+
* This is a fallback when true batch inference isn't configured
|
|
370
|
+
*/
|
|
371
|
+
async function processBedrockRequestsConcurrently(
|
|
372
|
+
jobId: string,
|
|
373
|
+
items: BatchItem[],
|
|
374
|
+
config: ReturnType<typeof getConfig>,
|
|
375
|
+
model: string,
|
|
376
|
+
options: BatchQueueOptions
|
|
377
|
+
): Promise<BatchResult[]> {
|
|
378
|
+
const job = pendingJobs.get(jobId)
|
|
379
|
+
if (!job) {
|
|
380
|
+
throw new Error(`Job not found: ${jobId}`)
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
job.status = 'in_progress'
|
|
384
|
+
|
|
385
|
+
// Process with concurrency limit
|
|
386
|
+
const CONCURRENCY = 5 // Bedrock has stricter rate limits
|
|
387
|
+
const results: BatchResult[] = []
|
|
388
|
+
|
|
389
|
+
for (let i = 0; i < items.length; i += CONCURRENCY) {
|
|
390
|
+
const batch = items.slice(i, i + CONCURRENCY)
|
|
391
|
+
|
|
392
|
+
const batchResults = await Promise.all(
|
|
393
|
+
batch.map(async (item) => {
|
|
394
|
+
try {
|
|
395
|
+
return await processBedrockItem(item, config, model)
|
|
396
|
+
} catch (error) {
|
|
397
|
+
return {
|
|
398
|
+
id: item.id,
|
|
399
|
+
customId: item.id,
|
|
400
|
+
status: 'failed' as const,
|
|
401
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
})
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
results.push(...batchResults)
|
|
408
|
+
job.results = results
|
|
409
|
+
|
|
410
|
+
// Respect rate limits
|
|
411
|
+
if (i + CONCURRENCY < items.length) {
|
|
412
|
+
await new Promise((resolve) => setTimeout(resolve, 1000))
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
job.status = results.every((r) => r.status === 'completed') ? 'completed' : 'failed'
|
|
417
|
+
job.completedAt = new Date()
|
|
418
|
+
|
|
419
|
+
return results
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
async function processBedrockItem(
|
|
423
|
+
item: BatchItem,
|
|
424
|
+
config: ReturnType<typeof getConfig>,
|
|
425
|
+
model: string
|
|
426
|
+
): Promise<BatchResult> {
|
|
427
|
+
// Check if using AI Gateway
|
|
428
|
+
if (config.gatewayUrl && config.gatewayToken) {
|
|
429
|
+
return processBedrockItemViaGateway(item, config, model)
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
const url = `https://bedrock-runtime.${config.region}.amazonaws.com/model/${encodeURIComponent(model)}/invoke`
|
|
433
|
+
|
|
434
|
+
// Build the request body based on the model type
|
|
435
|
+
let body: Record<string, unknown>
|
|
436
|
+
|
|
437
|
+
if (model.includes('anthropic')) {
|
|
438
|
+
// Anthropic models on Bedrock
|
|
439
|
+
body = {
|
|
440
|
+
anthropic_version: 'bedrock-2023-05-31',
|
|
441
|
+
max_tokens: item.options?.maxTokens || 4096,
|
|
442
|
+
messages: [{ role: 'user', content: item.prompt }],
|
|
443
|
+
system: item.options?.system,
|
|
444
|
+
temperature: item.options?.temperature,
|
|
445
|
+
}
|
|
446
|
+
} else if (model.includes('amazon')) {
|
|
447
|
+
// Amazon Titan models
|
|
448
|
+
body = {
|
|
449
|
+
inputText: item.prompt,
|
|
450
|
+
textGenerationConfig: {
|
|
451
|
+
maxTokenCount: item.options?.maxTokens || 4096,
|
|
452
|
+
temperature: item.options?.temperature || 0.7,
|
|
453
|
+
},
|
|
454
|
+
}
|
|
455
|
+
} else if (model.includes('meta')) {
|
|
456
|
+
// Meta Llama models
|
|
457
|
+
body = {
|
|
458
|
+
prompt: item.prompt,
|
|
459
|
+
max_gen_len: item.options?.maxTokens || 4096,
|
|
460
|
+
temperature: item.options?.temperature || 0.7,
|
|
461
|
+
}
|
|
462
|
+
} else if (model.includes('mistral')) {
|
|
463
|
+
// Mistral models
|
|
464
|
+
body = {
|
|
465
|
+
prompt: `<s>[INST] ${item.prompt} [/INST]`,
|
|
466
|
+
max_tokens: item.options?.maxTokens || 4096,
|
|
467
|
+
temperature: item.options?.temperature || 0.7,
|
|
468
|
+
}
|
|
469
|
+
} else {
|
|
470
|
+
// Generic format (Claude-style)
|
|
471
|
+
body = {
|
|
472
|
+
anthropic_version: 'bedrock-2023-05-31',
|
|
473
|
+
max_tokens: item.options?.maxTokens || 4096,
|
|
474
|
+
messages: [{ role: 'user', content: item.prompt }],
|
|
475
|
+
temperature: item.options?.temperature,
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
const bodyStr = JSON.stringify(body)
|
|
480
|
+
const headers = await signRequest('POST', url, bodyStr, config, 'bedrock')
|
|
481
|
+
|
|
482
|
+
const response = await fetch(url, {
|
|
483
|
+
method: 'POST',
|
|
484
|
+
headers,
|
|
485
|
+
body: bodyStr,
|
|
486
|
+
})
|
|
487
|
+
|
|
488
|
+
if (!response.ok) {
|
|
489
|
+
const error = await response.text()
|
|
490
|
+
throw new Error(`Bedrock API error: ${response.status} ${error}`)
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
const data = await response.json() as {
|
|
494
|
+
// Anthropic format
|
|
495
|
+
content?: Array<{ type: string; text?: string }>
|
|
496
|
+
usage?: { input_tokens: number; output_tokens: number }
|
|
497
|
+
// Titan format
|
|
498
|
+
results?: Array<{ outputText: string; tokenCount: number }>
|
|
499
|
+
// Llama/Mistral format
|
|
500
|
+
generation?: string
|
|
501
|
+
generation_token_count?: number
|
|
502
|
+
prompt_token_count?: number
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
// Extract content based on model response format
|
|
506
|
+
let content: string | undefined
|
|
507
|
+
let usage: { promptTokens: number; completionTokens: number; totalTokens: number } | undefined
|
|
508
|
+
|
|
509
|
+
if (data.content) {
|
|
510
|
+
// Anthropic format
|
|
511
|
+
const textContent = data.content.find((c) => c.type === 'text')
|
|
512
|
+
content = textContent?.text
|
|
513
|
+
if (data.usage) {
|
|
514
|
+
usage = {
|
|
515
|
+
promptTokens: data.usage.input_tokens,
|
|
516
|
+
completionTokens: data.usage.output_tokens,
|
|
517
|
+
totalTokens: data.usage.input_tokens + data.usage.output_tokens,
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
} else if (data.results?.[0]) {
|
|
521
|
+
// Titan format
|
|
522
|
+
content = data.results[0].outputText
|
|
523
|
+
usage = {
|
|
524
|
+
promptTokens: 0, // Titan doesn't return this
|
|
525
|
+
completionTokens: data.results[0].tokenCount || 0,
|
|
526
|
+
totalTokens: data.results[0].tokenCount || 0,
|
|
527
|
+
}
|
|
528
|
+
} else if (data.generation) {
|
|
529
|
+
// Llama/Mistral format
|
|
530
|
+
content = data.generation
|
|
531
|
+
if (data.generation_token_count !== undefined) {
|
|
532
|
+
usage = {
|
|
533
|
+
promptTokens: data.prompt_token_count || 0,
|
|
534
|
+
completionTokens: data.generation_token_count,
|
|
535
|
+
totalTokens: (data.prompt_token_count || 0) + data.generation_token_count,
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
let result: unknown = content
|
|
541
|
+
|
|
542
|
+
// Try to parse JSON if schema was provided
|
|
543
|
+
if (item.schema && content) {
|
|
544
|
+
try {
|
|
545
|
+
result = JSON.parse(content)
|
|
546
|
+
} catch {
|
|
547
|
+
// Keep as string
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
return {
|
|
552
|
+
id: item.id,
|
|
553
|
+
customId: item.id,
|
|
554
|
+
status: 'completed',
|
|
555
|
+
result,
|
|
556
|
+
usage,
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
/**
|
|
561
|
+
* Process a Bedrock item via Cloudflare AI Gateway
|
|
562
|
+
*
|
|
563
|
+
* NOTE: Unlike OpenAI and Google, Bedrock via AI Gateway still requires AWS Signature V4 signing.
|
|
564
|
+
* The gateway routes the request but doesn't handle authentication.
|
|
565
|
+
* @see https://developers.cloudflare.com/ai-gateway/usage/providers/bedrock/
|
|
566
|
+
*
|
|
567
|
+
* Gateway URL format: {gateway_url}/aws-bedrock/bedrock-runtime/{region}/model/{model}/invoke
|
|
568
|
+
*/
|
|
569
|
+
async function processBedrockItemViaGateway(
|
|
570
|
+
item: BatchItem,
|
|
571
|
+
config: ReturnType<typeof getConfig>,
|
|
572
|
+
model: string
|
|
573
|
+
): Promise<BatchResult> {
|
|
574
|
+
// AI Gateway URL for Bedrock - requires full path including region
|
|
575
|
+
// Format: {gateway_url}/aws-bedrock/bedrock-runtime/{region}/model/{model}/invoke
|
|
576
|
+
const url = `${config.gatewayUrl}/aws-bedrock/bedrock-runtime/${config.region}/model/${encodeURIComponent(model)}/invoke`
|
|
577
|
+
|
|
578
|
+
// Build the request body (Anthropic format for Claude models)
|
|
579
|
+
const body: Record<string, unknown> = {
|
|
580
|
+
anthropic_version: 'bedrock-2023-05-31',
|
|
581
|
+
max_tokens: item.options?.maxTokens || 4096,
|
|
582
|
+
messages: [{ role: 'user', content: item.prompt }],
|
|
583
|
+
system: item.options?.system,
|
|
584
|
+
temperature: item.options?.temperature,
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
const bodyStr = JSON.stringify(body)
|
|
588
|
+
|
|
589
|
+
// NOTE: Bedrock via Gateway still requires AWS SigV4 signing
|
|
590
|
+
// We need both the gateway token AND AWS credentials
|
|
591
|
+
if (!config.accessKeyId || !config.secretAccessKey) {
|
|
592
|
+
throw new Error(
|
|
593
|
+
'Bedrock via AI Gateway still requires AWS credentials for SigV4 signing. ' +
|
|
594
|
+
'Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.'
|
|
595
|
+
)
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
const headers = await signRequest('POST', url, bodyStr, config, 'bedrock')
|
|
599
|
+
headers.set('cf-aig-authorization', `Bearer ${config.gatewayToken}`)
|
|
600
|
+
|
|
601
|
+
const response = await fetch(url, {
|
|
602
|
+
method: 'POST',
|
|
603
|
+
headers,
|
|
604
|
+
body: bodyStr,
|
|
605
|
+
})
|
|
606
|
+
|
|
607
|
+
if (!response.ok) {
|
|
608
|
+
const error = await response.text()
|
|
609
|
+
throw new Error(`Bedrock via Gateway error: ${response.status} ${error}`)
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
const data = await response.json() as {
|
|
613
|
+
content?: Array<{ type: string; text?: string }>
|
|
614
|
+
usage?: { input_tokens: number; output_tokens: number }
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// Extract content (Anthropic format)
|
|
618
|
+
const textContent = data.content?.find((c) => c.type === 'text')
|
|
619
|
+
let content = textContent?.text
|
|
620
|
+
let usage: { promptTokens: number; completionTokens: number; totalTokens: number } | undefined
|
|
621
|
+
|
|
622
|
+
if (data.usage) {
|
|
623
|
+
usage = {
|
|
624
|
+
promptTokens: data.usage.input_tokens,
|
|
625
|
+
completionTokens: data.usage.output_tokens,
|
|
626
|
+
totalTokens: data.usage.input_tokens + data.usage.output_tokens,
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
let result: unknown = content
|
|
631
|
+
|
|
632
|
+
// Try to parse JSON if schema was provided
|
|
633
|
+
if (item.schema && content) {
|
|
634
|
+
try {
|
|
635
|
+
result = JSON.parse(content)
|
|
636
|
+
} catch {
|
|
637
|
+
// Keep as string
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
return {
|
|
642
|
+
id: item.id,
|
|
643
|
+
customId: item.id,
|
|
644
|
+
status: 'completed',
|
|
645
|
+
result,
|
|
646
|
+
usage,
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
// ============================================================================
|
|
651
|
+
// True Batch Inference (S3-based)
|
|
652
|
+
// ============================================================================
|
|
653
|
+
|
|
654
|
+
/**
|
|
655
|
+
* Create and submit a true Bedrock batch inference job
|
|
656
|
+
* This requires S3 bucket access and proper IAM setup
|
|
657
|
+
*/
|
|
658
|
+
export async function createBedrockBatchJob(
|
|
659
|
+
items: BatchItem[],
|
|
660
|
+
model: string,
|
|
661
|
+
options: {
|
|
662
|
+
jobName: string
|
|
663
|
+
s3InputPrefix?: string
|
|
664
|
+
s3OutputPrefix?: string
|
|
665
|
+
roleArn: string
|
|
666
|
+
}
|
|
667
|
+
): Promise<{ jobArn: string }> {
|
|
668
|
+
const config = getConfig()
|
|
669
|
+
|
|
670
|
+
// Build JSONL content
|
|
671
|
+
const jsonlLines = items.map((item) => {
|
|
672
|
+
const request: BedrockBatchRequest = {
|
|
673
|
+
recordId: item.id,
|
|
674
|
+
modelInput: {
|
|
675
|
+
anthropic_version: 'bedrock-2023-05-31',
|
|
676
|
+
max_tokens: item.options?.maxTokens || 4096,
|
|
677
|
+
messages: [{ role: 'user', content: item.prompt }],
|
|
678
|
+
system: item.options?.system,
|
|
679
|
+
temperature: item.options?.temperature,
|
|
680
|
+
},
|
|
681
|
+
}
|
|
682
|
+
return JSON.stringify(request)
|
|
683
|
+
})
|
|
684
|
+
|
|
685
|
+
const inputKey = `${options.s3InputPrefix || 'bedrock-batch/input'}/${options.jobName}.jsonl`
|
|
686
|
+
const outputPrefix = `${options.s3OutputPrefix || 'bedrock-batch/output'}/${options.jobName}/`
|
|
687
|
+
|
|
688
|
+
// Upload to S3
|
|
689
|
+
// In production, use @aws-sdk/client-s3
|
|
690
|
+
const s3Url = `https://${config.bucket}.s3.${config.region}.amazonaws.com/${inputKey}`
|
|
691
|
+
const content = jsonlLines.join('\n')
|
|
692
|
+
|
|
693
|
+
const s3Response = await fetch(s3Url, {
|
|
694
|
+
method: 'PUT',
|
|
695
|
+
headers: await signRequest('PUT', s3Url, content, config, 's3'),
|
|
696
|
+
body: content,
|
|
697
|
+
})
|
|
698
|
+
|
|
699
|
+
if (!s3Response.ok) {
|
|
700
|
+
throw new Error(`Failed to upload to S3: ${s3Response.status}`)
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
// Create batch inference job
|
|
704
|
+
const jobUrl = `https://bedrock.${config.region}.amazonaws.com/model-invocation-job`
|
|
705
|
+
const jobBody = JSON.stringify({
|
|
706
|
+
jobName: options.jobName,
|
|
707
|
+
modelId: model,
|
|
708
|
+
roleArn: options.roleArn,
|
|
709
|
+
inputDataConfig: {
|
|
710
|
+
s3InputDataConfig: {
|
|
711
|
+
s3Uri: `s3://${config.bucket}/${inputKey}`,
|
|
712
|
+
},
|
|
713
|
+
},
|
|
714
|
+
outputDataConfig: {
|
|
715
|
+
s3OutputDataConfig: {
|
|
716
|
+
s3Uri: `s3://${config.bucket}/${outputPrefix}`,
|
|
717
|
+
},
|
|
718
|
+
},
|
|
719
|
+
})
|
|
720
|
+
|
|
721
|
+
const jobResponse = await fetch(jobUrl, {
|
|
722
|
+
method: 'POST',
|
|
723
|
+
headers: await signRequest('POST', jobUrl, jobBody, config, 'bedrock'),
|
|
724
|
+
body: jobBody,
|
|
725
|
+
})
|
|
726
|
+
|
|
727
|
+
if (!jobResponse.ok) {
|
|
728
|
+
const error = await jobResponse.text()
|
|
729
|
+
throw new Error(`Failed to create Bedrock batch job: ${jobResponse.status} ${error}`)
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
const jobData = await jobResponse.json() as { jobArn: string }
|
|
733
|
+
return jobData
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
// ============================================================================
|
|
737
|
+
// Register Adapter
|
|
738
|
+
// ============================================================================
|
|
739
|
+
|
|
740
|
+
// ============================================================================
|
|
741
|
+
// Bedrock Flex Adapter
|
|
742
|
+
// ============================================================================
|
|
743
|
+
|
|
744
|
+
/**
|
|
745
|
+
* AWS Bedrock Flex Adapter
|
|
746
|
+
*
|
|
747
|
+
* Flex processing uses concurrent requests for medium-sized batches (5-500 items).
|
|
748
|
+
* This provides a balance between:
|
|
749
|
+
* - Immediate execution (fast but full price, <5 items)
|
|
750
|
+
* - Full batch inference (50% discount but 24hr turnaround, 500+ items)
|
|
751
|
+
*
|
|
752
|
+
* Flex tier uses concurrent API calls with rate limiting, providing results
|
|
753
|
+
* in minutes rather than hours while still benefiting from efficient processing.
|
|
754
|
+
*/
|
|
755
|
+
const bedrockFlexAdapter: FlexAdapter = {
|
|
756
|
+
async submitFlex(items: BatchItem[], options: { model?: string }): Promise<BatchResult[]> {
|
|
757
|
+
const config = getConfig()
|
|
758
|
+
const model = options.model || 'anthropic.claude-3-sonnet-20240229-v1:0'
|
|
759
|
+
const CONCURRENCY = 8 // Bedrock has stricter rate limits than OpenAI
|
|
760
|
+
|
|
761
|
+
const results: BatchResult[] = []
|
|
762
|
+
|
|
763
|
+
// Process items concurrently with rate limiting
|
|
764
|
+
for (let i = 0; i < items.length; i += CONCURRENCY) {
|
|
765
|
+
const batch = items.slice(i, i + CONCURRENCY)
|
|
766
|
+
|
|
767
|
+
const batchResults = await Promise.all(
|
|
768
|
+
batch.map(async (item) => {
|
|
769
|
+
try {
|
|
770
|
+
return await processBedrockItem(item, config, model)
|
|
771
|
+
} catch (error) {
|
|
772
|
+
return {
|
|
773
|
+
id: item.id,
|
|
774
|
+
customId: item.id,
|
|
775
|
+
status: 'failed' as const,
|
|
776
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
})
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
results.push(...batchResults)
|
|
783
|
+
|
|
784
|
+
// Add delay between batches to respect rate limits
|
|
785
|
+
if (i + CONCURRENCY < items.length) {
|
|
786
|
+
await new Promise((resolve) => setTimeout(resolve, 500))
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
return results
|
|
791
|
+
},
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
// ============================================================================
|
|
795
|
+
// Register Adapters
|
|
796
|
+
// ============================================================================
|
|
797
|
+
|
|
798
|
+
registerBatchAdapter('bedrock', bedrockAdapter)
|
|
799
|
+
registerFlexAdapter('bedrock', bedrockFlexAdapter)
|
|
800
|
+
|
|
801
|
+
export { bedrockAdapter, bedrockFlexAdapter }
|