ai-functions 2.1.3 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +55 -1
- package/README.md +38 -0
- package/dist/ai-promise.d.ts +3 -3
- package/dist/ai-promise.d.ts.map +1 -1
- package/dist/ai-promise.js +135 -64
- package/dist/ai-promise.js.map +1 -1
- package/dist/ai-schemas.d.ts +56 -0
- package/dist/ai-schemas.d.ts.map +1 -0
- package/dist/ai-schemas.js +53 -0
- package/dist/ai-schemas.js.map +1 -0
- package/dist/ai.d.ts +16 -242
- package/dist/ai.d.ts.map +1 -1
- package/dist/ai.js +51 -858
- package/dist/ai.js.map +1 -1
- package/dist/batch/anthropic.d.ts +6 -4
- package/dist/batch/anthropic.d.ts.map +1 -1
- package/dist/batch/anthropic.js +83 -145
- package/dist/batch/anthropic.js.map +1 -1
- package/dist/batch/bedrock.d.ts +8 -30
- package/dist/batch/bedrock.d.ts.map +1 -1
- package/dist/batch/bedrock.js +155 -338
- package/dist/batch/bedrock.js.map +1 -1
- package/dist/batch/cloudflare.d.ts +8 -20
- package/dist/batch/cloudflare.d.ts.map +1 -1
- package/dist/batch/cloudflare.js +68 -189
- package/dist/batch/cloudflare.js.map +1 -1
- package/dist/batch/google.d.ts +6 -20
- package/dist/batch/google.d.ts.map +1 -1
- package/dist/batch/google.js +70 -238
- package/dist/batch/google.js.map +1 -1
- package/dist/batch/index.d.ts +4 -1
- package/dist/batch/index.d.ts.map +1 -1
- package/dist/batch/index.js +4 -1
- package/dist/batch/index.js.map +1 -1
- package/dist/batch/memory.d.ts +1 -1
- package/dist/batch/memory.d.ts.map +1 -1
- package/dist/batch/memory.js +14 -10
- package/dist/batch/memory.js.map +1 -1
- package/dist/batch/openai.d.ts +11 -14
- package/dist/batch/openai.d.ts.map +1 -1
- package/dist/batch/openai.js +52 -156
- package/dist/batch/openai.js.map +1 -1
- package/dist/batch/provider.d.ts +111 -0
- package/dist/batch/provider.d.ts.map +1 -0
- package/dist/batch/provider.js +233 -0
- package/dist/batch/provider.js.map +1 -0
- package/dist/batch-map.d.ts.map +1 -1
- package/dist/batch-map.js +23 -17
- package/dist/batch-map.js.map +1 -1
- package/dist/batch-queue.d.ts +65 -0
- package/dist/batch-queue.d.ts.map +1 -1
- package/dist/batch-queue.js +169 -14
- package/dist/batch-queue.js.map +1 -1
- package/dist/budget.d.ts.map +1 -1
- package/dist/budget.js +27 -14
- package/dist/budget.js.map +1 -1
- package/dist/cache.d.ts +23 -0
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +36 -15
- package/dist/cache.js.map +1 -1
- package/dist/context.d.ts +26 -8
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +64 -62
- package/dist/context.js.map +1 -1
- package/dist/digital-objects-registry.d.ts +229 -0
- package/dist/digital-objects-registry.d.ts.map +1 -0
- package/dist/digital-objects-registry.js +617 -0
- package/dist/digital-objects-registry.js.map +1 -0
- package/dist/embeddings.d.ts +2 -2
- package/dist/embeddings.d.ts.map +1 -1
- package/dist/errors.d.ts +22 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +35 -0
- package/dist/errors.js.map +1 -0
- package/dist/eval/runner.d.ts +8 -0
- package/dist/eval/runner.d.ts.map +1 -1
- package/dist/eval/runner.js +41 -35
- package/dist/eval/runner.js.map +1 -1
- package/dist/eval-log/in-memory.d.ts +34 -0
- package/dist/eval-log/in-memory.d.ts.map +1 -0
- package/dist/eval-log/in-memory.js +84 -0
- package/dist/eval-log/in-memory.js.map +1 -0
- package/dist/eval-log/index.d.ts +29 -0
- package/dist/eval-log/index.d.ts.map +1 -0
- package/dist/eval-log/index.js +39 -0
- package/dist/eval-log/index.js.map +1 -0
- package/dist/eval-log/types.d.ts +101 -0
- package/dist/eval-log/types.d.ts.map +1 -0
- package/dist/eval-log/types.js +16 -0
- package/dist/eval-log/types.js.map +1 -0
- package/dist/function-registry.d.ts +116 -0
- package/dist/function-registry.d.ts.map +1 -0
- package/dist/function-registry.js +546 -0
- package/dist/function-registry.js.map +1 -0
- package/dist/generate.d.ts +9 -3
- package/dist/generate.d.ts.map +1 -1
- package/dist/generate.js +18 -18
- package/dist/generate.js.map +1 -1
- package/dist/index.d.ts +18 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +35 -18
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +118 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +187 -0
- package/dist/logger.js.map +1 -0
- package/dist/middleware/budget.d.ts +84 -0
- package/dist/middleware/budget.d.ts.map +1 -0
- package/dist/middleware/budget.js +110 -0
- package/dist/middleware/budget.js.map +1 -0
- package/dist/middleware/cache.d.ts +103 -0
- package/dist/middleware/cache.d.ts.map +1 -0
- package/dist/middleware/cache.js +228 -0
- package/dist/middleware/cache.js.map +1 -0
- package/dist/middleware/embed-cache.d.ts +99 -0
- package/dist/middleware/embed-cache.d.ts.map +1 -0
- package/dist/middleware/embed-cache.js +128 -0
- package/dist/middleware/embed-cache.js.map +1 -0
- package/dist/middleware/index.d.ts +11 -0
- package/dist/middleware/index.d.ts.map +1 -0
- package/dist/middleware/index.js +11 -0
- package/dist/middleware/index.js.map +1 -0
- package/dist/middleware/trace.d.ts +103 -0
- package/dist/middleware/trace.d.ts.map +1 -0
- package/dist/middleware/trace.js +176 -0
- package/dist/middleware/trace.js.map +1 -0
- package/dist/primitives.d.ts +120 -1
- package/dist/primitives.d.ts.map +1 -1
- package/dist/primitives.js +398 -26
- package/dist/primitives.js.map +1 -1
- package/dist/retry.d.ts +66 -1
- package/dist/retry.d.ts.map +1 -1
- package/dist/retry.js +115 -8
- package/dist/retry.js.map +1 -1
- package/dist/schema.js +2 -2
- package/dist/schema.js.map +1 -1
- package/dist/telemetry.d.ts +128 -0
- package/dist/telemetry.d.ts.map +1 -0
- package/dist/telemetry.js +285 -0
- package/dist/telemetry.js.map +1 -0
- package/dist/template.d.ts.map +1 -1
- package/dist/template.js +6 -1
- package/dist/template.js.map +1 -1
- package/dist/tool-orchestration.d.ts +66 -4
- package/dist/tool-orchestration.d.ts.map +1 -1
- package/dist/tool-orchestration.js +123 -23
- package/dist/tool-orchestration.js.map +1 -1
- package/dist/type-guards.d.ts +28 -0
- package/dist/type-guards.d.ts.map +1 -0
- package/dist/type-guards.js +29 -0
- package/dist/type-guards.js.map +1 -0
- package/dist/types.d.ts +135 -17
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +36 -1
- package/dist/types.js.map +1 -1
- package/dist/wrap-for-v3.d.ts +80 -0
- package/dist/wrap-for-v3.d.ts.map +1 -0
- package/dist/wrap-for-v3.js +89 -0
- package/dist/wrap-for-v3.js.map +1 -0
- package/examples/00-quickstart.ts +232 -0
- package/examples/01-rag-chatbot.ts +212 -0
- package/examples/02-multi-agent-research.ts +290 -0
- package/examples/03-email-classification.ts +379 -0
- package/examples/04-content-moderation.ts +400 -0
- package/examples/05-document-extraction.ts +455 -0
- package/examples/06-streaming-chat-nextjs.ts +437 -0
- package/examples/07-cloudflare-worker.ts +483 -0
- package/examples/08-batch-processing.ts +491 -0
- package/examples/09-budget-constrained.ts +527 -0
- package/examples/10-tool-orchestration.ts +565 -0
- package/examples/11-retry-resilience.ts +403 -0
- package/examples/12-caching-strategies.ts +422 -0
- package/examples/README.md +145 -0
- package/package.json +28 -25
- package/src/ai-promise.ts +226 -140
- package/src/ai-schemas.ts +122 -0
- package/src/ai.ts +69 -1176
- package/src/batch/anthropic.ts +96 -161
- package/src/batch/bedrock.ts +203 -454
- package/src/batch/cloudflare.ts +99 -282
- package/src/batch/google.ts +91 -297
- package/src/batch/index.ts +4 -1
- package/src/batch/memory.ts +15 -10
- package/src/batch/openai.ts +65 -193
- package/src/batch/provider.ts +336 -0
- package/src/batch-map.ts +29 -24
- package/src/batch-queue.ts +200 -11
- package/src/budget.ts +31 -18
- package/src/cache.ts +45 -17
- package/src/context.ts +106 -77
- package/src/digital-objects-registry.ts +750 -0
- package/src/errors.ts +37 -0
- package/src/eval/runner.ts +60 -36
- package/src/eval-log/in-memory.ts +90 -0
- package/src/eval-log/index.ts +46 -0
- package/src/eval-log/types.ts +110 -0
- package/src/function-registry.ts +671 -0
- package/src/generate.ts +33 -28
- package/src/index.ts +119 -21
- package/src/logger.ts +232 -0
- package/src/middleware/budget.ts +171 -0
- package/src/middleware/cache.ts +299 -0
- package/src/middleware/embed-cache.ts +195 -0
- package/src/middleware/index.ts +23 -0
- package/src/middleware/trace.ts +248 -0
- package/src/primitives.ts +589 -62
- package/src/retry.ts +144 -18
- package/src/schema.ts +8 -8
- package/src/telemetry.ts +403 -0
- package/src/template.ts +8 -4
- package/src/tool-orchestration.ts +213 -48
- package/src/type-guards.ts +31 -0
- package/src/types.ts +164 -25
- package/src/wrap-for-v3.ts +105 -0
- package/test/ai-promise.test.ts +1080 -0
- package/test/ai-proxy.test.ts +1 -1
- package/test/batch-autosubmit-errors.test.ts +49 -37
- package/test/batch-blog-posts.test.ts +87 -129
- package/test/core-functions.test.ts +183 -579
- package/test/decide.test.ts +154 -322
- package/test/define.test.ts +211 -8
- package/test/digital-objects-registry.test.ts +760 -0
- package/test/embedding-cache-middleware.test.ts +140 -0
- package/test/generate-core.test.ts +140 -229
- package/test/implicit-batch.test.ts +22 -65
- package/test/retry-policy-integration.test.ts +117 -0
- package/test/schema.test.ts +55 -19
- package/test/template.test.ts +1164 -0
- package/test/tool-orchestration.test.ts +270 -0
- package/test/wrap-for-v3.test.ts +612 -0
- package/vitest.config.js +6 -0
- package/vitest.config.ts +20 -0
- package/LICENSE +0 -21
- package/dist/rpc/auth.d.ts +0 -69
- package/dist/rpc/auth.d.ts.map +0 -1
- package/dist/rpc/auth.js +0 -136
- package/dist/rpc/auth.js.map +0 -1
- package/dist/rpc/client.d.ts +0 -62
- package/dist/rpc/client.d.ts.map +0 -1
- package/dist/rpc/client.js +0 -103
- package/dist/rpc/client.js.map +0 -1
- package/dist/rpc/deferred.d.ts +0 -60
- package/dist/rpc/deferred.d.ts.map +0 -1
- package/dist/rpc/deferred.js +0 -96
- package/dist/rpc/deferred.js.map +0 -1
- package/dist/rpc/index.d.ts +0 -22
- package/dist/rpc/index.d.ts.map +0 -1
- package/dist/rpc/index.js +0 -38
- package/dist/rpc/index.js.map +0 -1
- package/dist/rpc/local.d.ts +0 -42
- package/dist/rpc/local.d.ts.map +0 -1
- package/dist/rpc/local.js +0 -50
- package/dist/rpc/local.js.map +0 -1
- package/dist/rpc/server.d.ts +0 -165
- package/dist/rpc/server.d.ts.map +0 -1
- package/dist/rpc/server.js +0 -405
- package/dist/rpc/server.js.map +0 -1
- package/dist/rpc/session.d.ts +0 -32
- package/dist/rpc/session.d.ts.map +0 -1
- package/dist/rpc/session.js +0 -43
- package/dist/rpc/session.js.map +0 -1
- package/dist/rpc/transport.d.ts +0 -306
- package/dist/rpc/transport.d.ts.map +0 -1
- package/dist/rpc/transport.js +0 -731
- package/dist/rpc/transport.js.map +0 -1
- package/src/batch/anthropic.js +0 -256
- package/src/batch/bedrock.js +0 -584
- package/src/batch/cloudflare.js +0 -287
- package/src/batch/google.js +0 -359
- package/src/batch/index.js +0 -30
- package/src/batch/memory.js +0 -187
- package/src/batch/openai.js +0 -402
- package/src/eval/index.js +0 -7
- package/src/eval/models.js +0 -119
- package/src/eval/runner.js +0 -147
- package/test/schema.test.js +0 -96
package/src/batch/bedrock.ts
CHANGED
|
@@ -1,30 +1,34 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* AWS Bedrock Batch Inference Adapter
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Bedrock has a true batch inference API (S3-driven) and a runtime invoke API.
|
|
5
|
+
* The "batch" adapter here uses concurrent runtime invocations as a fallback
|
|
6
|
+
* (no S3 setup required); `createBedrockBatchJob` is exported separately for
|
|
7
|
+
* callers who want to drive the real S3-based batch flow directly.
|
|
6
8
|
*
|
|
7
9
|
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/batch-inference.html
|
|
8
10
|
*
|
|
9
11
|
* @packageDocumentation
|
|
10
12
|
*/
|
|
11
13
|
|
|
14
|
+
import { getLogger } from '../logger.js'
|
|
12
15
|
import {
|
|
16
|
+
LocalJobStore,
|
|
17
|
+
processConcurrently,
|
|
13
18
|
registerBatchAdapter,
|
|
14
19
|
registerFlexAdapter,
|
|
20
|
+
tryParseJson,
|
|
15
21
|
type BatchAdapter,
|
|
16
|
-
type FlexAdapter,
|
|
17
22
|
type BatchItem,
|
|
18
23
|
type BatchJob,
|
|
19
24
|
type BatchQueueOptions,
|
|
20
25
|
type BatchResult,
|
|
21
26
|
type BatchSubmitResult,
|
|
22
|
-
type
|
|
23
|
-
} from '
|
|
24
|
-
import { schema as convertSchema } from '../schema.js'
|
|
27
|
+
type FlexAdapter,
|
|
28
|
+
} from './provider.js'
|
|
25
29
|
|
|
26
30
|
// ============================================================================
|
|
27
|
-
//
|
|
31
|
+
// Provider-specific types
|
|
28
32
|
// ============================================================================
|
|
29
33
|
|
|
30
34
|
interface BedrockBatchRequest {
|
|
@@ -38,50 +42,8 @@ interface BedrockBatchRequest {
|
|
|
38
42
|
}
|
|
39
43
|
}
|
|
40
44
|
|
|
41
|
-
interface BedrockBatchResponse {
|
|
42
|
-
recordId: string
|
|
43
|
-
modelOutput?: {
|
|
44
|
-
content: Array<{ type: string; text?: string }>
|
|
45
|
-
usage: {
|
|
46
|
-
input_tokens: number
|
|
47
|
-
output_tokens: number
|
|
48
|
-
}
|
|
49
|
-
stop_reason: string
|
|
50
|
-
}
|
|
51
|
-
error?: {
|
|
52
|
-
errorCode: string
|
|
53
|
-
errorMessage: string
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
interface BedrockBatchJobStatus {
|
|
58
|
-
jobArn: string
|
|
59
|
-
jobName: string
|
|
60
|
-
status: 'Submitted' | 'InProgress' | 'Completed' | 'Failed' | 'Stopping' | 'Stopped'
|
|
61
|
-
modelId: string
|
|
62
|
-
inputDataConfig: {
|
|
63
|
-
s3InputDataConfig: {
|
|
64
|
-
s3Uri: string
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
outputDataConfig: {
|
|
68
|
-
s3OutputDataConfig: {
|
|
69
|
-
s3Uri: string
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
creationTime: string
|
|
73
|
-
lastModifiedTime: string
|
|
74
|
-
endTime?: string
|
|
75
|
-
failureMessage?: string
|
|
76
|
-
statistics?: {
|
|
77
|
-
inputRecordCount: number
|
|
78
|
-
outputRecordCount: number
|
|
79
|
-
errorCount: number
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
45
|
// ============================================================================
|
|
84
|
-
// AWS
|
|
46
|
+
// AWS configuration
|
|
85
47
|
// ============================================================================
|
|
86
48
|
|
|
87
49
|
let awsRegion: string | undefined
|
|
@@ -91,13 +53,10 @@ let awsSessionToken: string | undefined
|
|
|
91
53
|
let s3Bucket: string | undefined
|
|
92
54
|
let roleArn: string | undefined
|
|
93
55
|
|
|
94
|
-
// AI Gateway configuration (optional - for routing through Cloudflare AI Gateway)
|
|
95
56
|
let gatewayUrl: string | undefined
|
|
96
57
|
let gatewayToken: string | undefined
|
|
97
58
|
|
|
98
|
-
/**
|
|
99
|
-
* Configure AWS credentials and settings
|
|
100
|
-
*/
|
|
59
|
+
/** Configure AWS credentials and settings. */
|
|
101
60
|
export function configureAWSBedrock(options: {
|
|
102
61
|
region?: string
|
|
103
62
|
accessKeyId?: string
|
|
@@ -120,19 +79,29 @@ export function configureAWSBedrock(options: {
|
|
|
120
79
|
if (options.gatewayToken) gatewayToken = options.gatewayToken
|
|
121
80
|
}
|
|
122
81
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
82
|
+
interface BedrockConfig {
|
|
83
|
+
region: string
|
|
84
|
+
accessKeyId: string
|
|
85
|
+
secretAccessKey: string
|
|
86
|
+
sessionToken?: string | undefined
|
|
87
|
+
bucket: string
|
|
88
|
+
role: string | undefined
|
|
89
|
+
gatewayUrl: string | undefined
|
|
90
|
+
gatewayToken: string | undefined
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function getConfig(): BedrockConfig {
|
|
94
|
+
const region =
|
|
95
|
+
awsRegion || process.env['AWS_REGION'] || process.env['AWS_DEFAULT_REGION'] || 'us-east-1'
|
|
96
|
+
const accessKeyId = awsAccessKeyId || process.env['AWS_ACCESS_KEY_ID']
|
|
97
|
+
const secretAccessKey = awsSecretAccessKey || process.env['AWS_SECRET_ACCESS_KEY']
|
|
98
|
+
const sessionToken = awsSessionToken || process.env['AWS_SESSION_TOKEN']
|
|
99
|
+
const bucket = s3Bucket || process.env['BEDROCK_BATCH_S3_BUCKET']
|
|
100
|
+
const role = roleArn || process.env['BEDROCK_BATCH_ROLE_ARN']
|
|
130
101
|
|
|
131
|
-
|
|
132
|
-
const
|
|
133
|
-
const gwToken = gatewayToken || process.env.AI_GATEWAY_TOKEN
|
|
102
|
+
const gwUrl = gatewayUrl || process.env['AI_GATEWAY_URL']
|
|
103
|
+
const gwToken = gatewayToken || process.env['AI_GATEWAY_TOKEN']
|
|
134
104
|
|
|
135
|
-
// If using gateway, we don't need AWS credentials
|
|
136
105
|
if (gwUrl && gwToken) {
|
|
137
106
|
return {
|
|
138
107
|
region,
|
|
@@ -147,30 +116,38 @@ function getConfig() {
|
|
|
147
116
|
}
|
|
148
117
|
|
|
149
118
|
if (!accessKeyId || !secretAccessKey) {
|
|
150
|
-
throw new Error(
|
|
119
|
+
throw new Error(
|
|
120
|
+
'AWS credentials not configured. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY, or use AI_GATEWAY_URL and AI_GATEWAY_TOKEN'
|
|
121
|
+
)
|
|
151
122
|
}
|
|
152
123
|
|
|
153
124
|
if (!bucket) {
|
|
154
125
|
throw new Error('S3 bucket for Bedrock batch not configured. Set BEDROCK_BATCH_S3_BUCKET')
|
|
155
126
|
}
|
|
156
127
|
|
|
157
|
-
return {
|
|
128
|
+
return {
|
|
129
|
+
region,
|
|
130
|
+
accessKeyId,
|
|
131
|
+
secretAccessKey,
|
|
132
|
+
sessionToken,
|
|
133
|
+
bucket,
|
|
134
|
+
role,
|
|
135
|
+
gatewayUrl: undefined,
|
|
136
|
+
gatewayToken: undefined,
|
|
137
|
+
}
|
|
158
138
|
}
|
|
159
139
|
|
|
160
140
|
// ============================================================================
|
|
161
|
-
// AWS
|
|
141
|
+
// AWS SigV4 (delegated to optional @smithy/signature-v4 if available)
|
|
162
142
|
// ============================================================================
|
|
163
143
|
|
|
164
144
|
async function signRequest(
|
|
165
145
|
method: string,
|
|
166
146
|
url: string,
|
|
167
147
|
body: string,
|
|
168
|
-
config:
|
|
148
|
+
config: BedrockConfig,
|
|
169
149
|
service: string
|
|
170
150
|
): Promise<Headers> {
|
|
171
|
-
// In production, use @aws-sdk/signature-v4 or similar
|
|
172
|
-
// This is a simplified implementation for demonstration
|
|
173
|
-
|
|
174
151
|
const headers = new Headers({
|
|
175
152
|
'Content-Type': 'application/json',
|
|
176
153
|
'X-Amz-Date': new Date().toISOString().replace(/[:-]|\.\d{3}/g, ''),
|
|
@@ -180,21 +157,14 @@ async function signRequest(
|
|
|
180
157
|
headers.set('X-Amz-Security-Token', config.sessionToken)
|
|
181
158
|
}
|
|
182
159
|
|
|
183
|
-
// For actual implementation, compute proper AWS Signature V4
|
|
184
|
-
// This requires crypto operations that vary by environment
|
|
185
|
-
|
|
186
|
-
// Fallback: Use AWS SDK if available
|
|
187
160
|
try {
|
|
188
|
-
//
|
|
161
|
+
// Optional dependency — present in production, absent in dev/test.
|
|
189
162
|
// @ts-expect-error - Optional dependency
|
|
190
163
|
const signatureV4Module = await import('@smithy/signature-v4')
|
|
191
164
|
// @ts-expect-error - Optional dependency
|
|
192
165
|
const sha256Module = await import('@aws-crypto/sha256-js')
|
|
193
166
|
|
|
194
|
-
const
|
|
195
|
-
const Sha256 = sha256Module.Sha256
|
|
196
|
-
|
|
197
|
-
const signer = new SignatureV4({
|
|
167
|
+
const signer = new signatureV4Module.SignatureV4({
|
|
198
168
|
service,
|
|
199
169
|
region: config.region,
|
|
200
170
|
credentials: {
|
|
@@ -202,7 +172,7 @@ async function signRequest(
|
|
|
202
172
|
secretAccessKey: config.secretAccessKey,
|
|
203
173
|
sessionToken: config.sessionToken,
|
|
204
174
|
},
|
|
205
|
-
sha256: Sha256,
|
|
175
|
+
sha256: sha256Module.Sha256,
|
|
206
176
|
})
|
|
207
177
|
|
|
208
178
|
const signedRequest = await signer.sign({
|
|
@@ -215,301 +185,232 @@ async function signRequest(
|
|
|
215
185
|
|
|
216
186
|
return new Headers(signedRequest.headers as Record<string, string>)
|
|
217
187
|
} catch {
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
188
|
+
getLogger().warn(
|
|
189
|
+
'AWS SDK not available for request signing. Install @smithy/signature-v4 and @aws-crypto/sha256-js'
|
|
190
|
+
)
|
|
221
191
|
return headers
|
|
222
192
|
}
|
|
223
193
|
}
|
|
224
194
|
|
|
225
195
|
// ============================================================================
|
|
226
|
-
//
|
|
196
|
+
// Local job tracking
|
|
227
197
|
// ============================================================================
|
|
228
198
|
|
|
229
|
-
const
|
|
230
|
-
items: BatchItem[]
|
|
231
|
-
options: BatchQueueOptions
|
|
232
|
-
jobArn?: string
|
|
233
|
-
results: BatchResult[]
|
|
234
|
-
status: BatchStatus
|
|
235
|
-
createdAt: Date
|
|
236
|
-
completedAt?: Date
|
|
237
|
-
}>()
|
|
238
|
-
|
|
239
|
-
let jobCounter = 0
|
|
199
|
+
const jobs = new LocalJobStore('bedrock_batch')
|
|
240
200
|
|
|
241
201
|
// ============================================================================
|
|
242
|
-
// Bedrock
|
|
202
|
+
// Bedrock batch adapter (BatchProvider port)
|
|
243
203
|
// ============================================================================
|
|
244
204
|
|
|
245
|
-
/**
|
|
246
|
-
* AWS Bedrock batch adapter
|
|
247
|
-
*
|
|
248
|
-
* Bedrock batch inference:
|
|
249
|
-
* 1. Uploads input JSONL to S3
|
|
250
|
-
* 2. Creates a batch inference job
|
|
251
|
-
* 3. Results are written to S3
|
|
252
|
-
* 4. Download and parse results
|
|
253
|
-
*
|
|
254
|
-
* Note: This requires S3 bucket access and proper IAM roles.
|
|
255
|
-
*/
|
|
256
205
|
const bedrockAdapter: BatchAdapter = {
|
|
257
206
|
async submit(items: BatchItem[], options: BatchQueueOptions): Promise<BatchSubmitResult> {
|
|
258
207
|
const config = getConfig()
|
|
259
|
-
const jobId = `bedrock_batch_${++jobCounter}_${Date.now()}`
|
|
260
|
-
|
|
261
|
-
// Default to Claude on Bedrock
|
|
262
208
|
const model = options.model || 'anthropic.claude-3-sonnet-20240229-v1:0'
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
results
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
209
|
+
const { id, state } = jobs.create(items, options)
|
|
210
|
+
|
|
211
|
+
// Drive the job state machine in the background.
|
|
212
|
+
const completion = (async () => {
|
|
213
|
+
state.status = 'in_progress'
|
|
214
|
+
const results = await processConcurrently(
|
|
215
|
+
items,
|
|
216
|
+
(item) => processBedrockItem(item, config, model),
|
|
217
|
+
{
|
|
218
|
+
concurrency: 5, // Bedrock has stricter rate limits.
|
|
219
|
+
delayBetweenWaves: 1000,
|
|
220
|
+
onWaveComplete: (partial) => {
|
|
221
|
+
state.results = partial
|
|
222
|
+
},
|
|
223
|
+
}
|
|
224
|
+
)
|
|
225
|
+
state.results = results
|
|
226
|
+
state.status = results.every((r) => r.status === 'completed') ? 'completed' : 'failed'
|
|
227
|
+
state.completedAt = new Date()
|
|
228
|
+
return results
|
|
229
|
+
})()
|
|
283
230
|
|
|
284
231
|
const job: BatchJob = {
|
|
285
|
-
id
|
|
232
|
+
id,
|
|
286
233
|
provider: 'bedrock',
|
|
287
234
|
status: 'pending',
|
|
288
235
|
totalItems: items.length,
|
|
289
236
|
completedItems: 0,
|
|
290
237
|
failedItems: 0,
|
|
291
|
-
createdAt:
|
|
292
|
-
webhookUrl: options.webhookUrl,
|
|
238
|
+
createdAt: state.createdAt,
|
|
239
|
+
...(options.webhookUrl !== undefined && { webhookUrl: options.webhookUrl }),
|
|
293
240
|
}
|
|
294
241
|
|
|
295
242
|
return { job, completion }
|
|
296
243
|
},
|
|
297
244
|
|
|
298
245
|
async getStatus(batchId: string): Promise<BatchJob> {
|
|
299
|
-
|
|
300
|
-
if (!job) {
|
|
301
|
-
throw new Error(`Batch not found: ${batchId}`)
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
const completedItems = job.results.filter((r) => r.status === 'completed').length
|
|
305
|
-
const failedItems = job.results.filter((r) => r.status === 'failed').length
|
|
306
|
-
|
|
307
|
-
return {
|
|
308
|
-
id: batchId,
|
|
309
|
-
provider: 'bedrock',
|
|
310
|
-
status: job.status,
|
|
311
|
-
totalItems: job.items.length,
|
|
312
|
-
completedItems,
|
|
313
|
-
failedItems,
|
|
314
|
-
createdAt: job.createdAt,
|
|
315
|
-
completedAt: job.completedAt,
|
|
316
|
-
}
|
|
246
|
+
return jobs.snapshot(batchId, 'bedrock')
|
|
317
247
|
},
|
|
318
248
|
|
|
319
249
|
async cancel(batchId: string): Promise<void> {
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
|
|
250
|
+
if (!jobs.has(batchId)) return
|
|
251
|
+
const state = jobs.get(batchId)
|
|
252
|
+
state.status = 'cancelled'
|
|
253
|
+
|
|
254
|
+
const jobArn = state.meta?.['jobArn'] as string | undefined
|
|
255
|
+
if (jobArn) {
|
|
256
|
+
const config = getConfig()
|
|
257
|
+
const url = `https://bedrock.${
|
|
258
|
+
config.region
|
|
259
|
+
}.amazonaws.com/model-invocation-job/${encodeURIComponent(jobArn)}/stop`
|
|
260
|
+
try {
|
|
261
|
+
await fetch(url, {
|
|
262
|
+
method: 'POST',
|
|
263
|
+
headers: await signRequest('POST', url, '', config, 'bedrock'),
|
|
264
|
+
})
|
|
265
|
+
} catch (error) {
|
|
266
|
+
getLogger().warn('Failed to cancel Bedrock job:', error)
|
|
337
267
|
}
|
|
338
268
|
}
|
|
339
269
|
},
|
|
340
270
|
|
|
341
271
|
async getResults(batchId: string): Promise<BatchResult[]> {
|
|
342
|
-
|
|
343
|
-
if (!job) {
|
|
344
|
-
throw new Error(`Batch not found: ${batchId}`)
|
|
345
|
-
}
|
|
346
|
-
return job.results
|
|
272
|
+
return jobs.get(batchId).results
|
|
347
273
|
},
|
|
348
274
|
|
|
349
275
|
async waitForCompletion(batchId: string, pollInterval = 5000): Promise<BatchResult[]> {
|
|
350
|
-
|
|
351
|
-
if (!job) {
|
|
352
|
-
throw new Error(`Batch not found: ${batchId}`)
|
|
353
|
-
}
|
|
354
|
-
|
|
355
|
-
while (job.status !== 'completed' && job.status !== 'failed' && job.status !== 'cancelled') {
|
|
356
|
-
await new Promise((resolve) => setTimeout(resolve, pollInterval))
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
return job.results
|
|
276
|
+
return jobs.waitForCompletion(batchId, pollInterval)
|
|
360
277
|
},
|
|
361
278
|
}
|
|
362
279
|
|
|
363
280
|
// ============================================================================
|
|
364
|
-
//
|
|
281
|
+
// Per-item processing
|
|
365
282
|
// ============================================================================
|
|
366
283
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
config: ReturnType<typeof getConfig>,
|
|
375
|
-
model: string,
|
|
376
|
-
options: BatchQueueOptions
|
|
377
|
-
): Promise<BatchResult[]> {
|
|
378
|
-
const job = pendingJobs.get(jobId)
|
|
379
|
-
if (!job) {
|
|
380
|
-
throw new Error(`Job not found: ${jobId}`)
|
|
284
|
+
async function processBedrockItem(
|
|
285
|
+
item: BatchItem,
|
|
286
|
+
config: BedrockConfig,
|
|
287
|
+
model: string
|
|
288
|
+
): Promise<BatchResult> {
|
|
289
|
+
if (config.gatewayUrl && config.gatewayToken) {
|
|
290
|
+
return processBedrockItemViaGateway(item, config, model)
|
|
381
291
|
}
|
|
382
292
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
const CONCURRENCY = 5 // Bedrock has stricter rate limits
|
|
387
|
-
const results: BatchResult[] = []
|
|
388
|
-
|
|
389
|
-
for (let i = 0; i < items.length; i += CONCURRENCY) {
|
|
390
|
-
const batch = items.slice(i, i + CONCURRENCY)
|
|
391
|
-
|
|
392
|
-
const batchResults = await Promise.all(
|
|
393
|
-
batch.map(async (item) => {
|
|
394
|
-
try {
|
|
395
|
-
return await processBedrockItem(item, config, model)
|
|
396
|
-
} catch (error) {
|
|
397
|
-
return {
|
|
398
|
-
id: item.id,
|
|
399
|
-
customId: item.id,
|
|
400
|
-
status: 'failed' as const,
|
|
401
|
-
error: error instanceof Error ? error.message : 'Unknown error',
|
|
402
|
-
}
|
|
403
|
-
}
|
|
404
|
-
})
|
|
405
|
-
)
|
|
293
|
+
const url = `https://bedrock-runtime.${config.region}.amazonaws.com/model/${encodeURIComponent(
|
|
294
|
+
model
|
|
295
|
+
)}/invoke`
|
|
406
296
|
|
|
407
|
-
|
|
408
|
-
|
|
297
|
+
const body = buildBedrockRequestBody(item, model)
|
|
298
|
+
const bodyStr = JSON.stringify(body)
|
|
299
|
+
const headers = await signRequest('POST', url, bodyStr, config, 'bedrock')
|
|
409
300
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
}
|
|
301
|
+
const response = await fetch(url, { method: 'POST', headers, body: bodyStr })
|
|
302
|
+
if (!response.ok) {
|
|
303
|
+
const error = await response.text()
|
|
304
|
+
throw new Error(`Bedrock API error: ${response.status} ${error}`)
|
|
414
305
|
}
|
|
415
306
|
|
|
416
|
-
|
|
417
|
-
job.completedAt = new Date()
|
|
418
|
-
|
|
419
|
-
return results
|
|
307
|
+
return parseBedrockResponse(item, await response.json())
|
|
420
308
|
}
|
|
421
309
|
|
|
422
|
-
|
|
310
|
+
/**
|
|
311
|
+
* Process a Bedrock item via Cloudflare AI Gateway.
|
|
312
|
+
*
|
|
313
|
+
* Note: AI Gateway routes the request but doesn't handle authentication —
|
|
314
|
+
* Bedrock still requires AWS SigV4 signing.
|
|
315
|
+
* @see https://developers.cloudflare.com/ai-gateway/usage/providers/bedrock/
|
|
316
|
+
*/
|
|
317
|
+
async function processBedrockItemViaGateway(
|
|
423
318
|
item: BatchItem,
|
|
424
|
-
config:
|
|
319
|
+
config: BedrockConfig,
|
|
425
320
|
model: string
|
|
426
321
|
): Promise<BatchResult> {
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
322
|
+
const url = `${config.gatewayUrl}/aws-bedrock/bedrock-runtime/${
|
|
323
|
+
config.region
|
|
324
|
+
}/model/${encodeURIComponent(model)}/invoke`
|
|
325
|
+
|
|
326
|
+
const body: Record<string, unknown> = {
|
|
327
|
+
anthropic_version: 'bedrock-2023-05-31',
|
|
328
|
+
max_tokens: item.options?.maxTokens || 4096,
|
|
329
|
+
messages: [{ role: 'user', content: item.prompt }],
|
|
330
|
+
...(item.options?.system !== undefined && { system: item.options.system }),
|
|
331
|
+
...(item.options?.temperature !== undefined && { temperature: item.options.temperature }),
|
|
430
332
|
}
|
|
431
333
|
|
|
432
|
-
const
|
|
334
|
+
const bodyStr = JSON.stringify(body)
|
|
433
335
|
|
|
434
|
-
|
|
435
|
-
|
|
336
|
+
if (!config.accessKeyId || !config.secretAccessKey) {
|
|
337
|
+
throw new Error(
|
|
338
|
+
'Bedrock via AI Gateway still requires AWS credentials for SigV4 signing. ' +
|
|
339
|
+
'Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.'
|
|
340
|
+
)
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
const headers = await signRequest('POST', url, bodyStr, config, 'bedrock')
|
|
344
|
+
headers.set('cf-aig-authorization', `Bearer ${config.gatewayToken}`)
|
|
345
|
+
|
|
346
|
+
const response = await fetch(url, { method: 'POST', headers, body: bodyStr })
|
|
347
|
+
if (!response.ok) {
|
|
348
|
+
const error = await response.text()
|
|
349
|
+
throw new Error(`Bedrock via Gateway error: ${response.status} ${error}`)
|
|
350
|
+
}
|
|
436
351
|
|
|
352
|
+
return parseBedrockResponse(item, await response.json())
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/** Build the Bedrock invoke body for the model family. */
|
|
356
|
+
function buildBedrockRequestBody(item: BatchItem, model: string): Record<string, unknown> {
|
|
437
357
|
if (model.includes('anthropic')) {
|
|
438
|
-
|
|
439
|
-
body = {
|
|
358
|
+
return {
|
|
440
359
|
anthropic_version: 'bedrock-2023-05-31',
|
|
441
360
|
max_tokens: item.options?.maxTokens || 4096,
|
|
442
361
|
messages: [{ role: 'user', content: item.prompt }],
|
|
443
|
-
system: item.options
|
|
444
|
-
temperature: item.options
|
|
362
|
+
...(item.options?.system !== undefined && { system: item.options.system }),
|
|
363
|
+
...(item.options?.temperature !== undefined && { temperature: item.options.temperature }),
|
|
445
364
|
}
|
|
446
|
-
}
|
|
447
|
-
|
|
448
|
-
|
|
365
|
+
}
|
|
366
|
+
if (model.includes('amazon')) {
|
|
367
|
+
return {
|
|
449
368
|
inputText: item.prompt,
|
|
450
369
|
textGenerationConfig: {
|
|
451
370
|
maxTokenCount: item.options?.maxTokens || 4096,
|
|
452
371
|
temperature: item.options?.temperature || 0.7,
|
|
453
372
|
},
|
|
454
373
|
}
|
|
455
|
-
}
|
|
456
|
-
|
|
457
|
-
|
|
374
|
+
}
|
|
375
|
+
if (model.includes('meta')) {
|
|
376
|
+
return {
|
|
458
377
|
prompt: item.prompt,
|
|
459
378
|
max_gen_len: item.options?.maxTokens || 4096,
|
|
460
379
|
temperature: item.options?.temperature || 0.7,
|
|
461
380
|
}
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
|
|
381
|
+
}
|
|
382
|
+
if (model.includes('mistral')) {
|
|
383
|
+
return {
|
|
465
384
|
prompt: `<s>[INST] ${item.prompt} [/INST]`,
|
|
466
385
|
max_tokens: item.options?.maxTokens || 4096,
|
|
467
386
|
temperature: item.options?.temperature || 0.7,
|
|
468
387
|
}
|
|
469
|
-
} else {
|
|
470
|
-
// Generic format (Claude-style)
|
|
471
|
-
body = {
|
|
472
|
-
anthropic_version: 'bedrock-2023-05-31',
|
|
473
|
-
max_tokens: item.options?.maxTokens || 4096,
|
|
474
|
-
messages: [{ role: 'user', content: item.prompt }],
|
|
475
|
-
temperature: item.options?.temperature,
|
|
476
|
-
}
|
|
477
388
|
}
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
headers,
|
|
485
|
-
body: bodyStr,
|
|
486
|
-
})
|
|
487
|
-
|
|
488
|
-
if (!response.ok) {
|
|
489
|
-
const error = await response.text()
|
|
490
|
-
throw new Error(`Bedrock API error: ${response.status} ${error}`)
|
|
389
|
+
// Default: Claude-style.
|
|
390
|
+
return {
|
|
391
|
+
anthropic_version: 'bedrock-2023-05-31',
|
|
392
|
+
max_tokens: item.options?.maxTokens || 4096,
|
|
393
|
+
messages: [{ role: 'user', content: item.prompt }],
|
|
394
|
+
...(item.options?.temperature !== undefined && { temperature: item.options.temperature }),
|
|
491
395
|
}
|
|
396
|
+
}
|
|
492
397
|
|
|
493
|
-
|
|
494
|
-
|
|
398
|
+
/** Parse a Bedrock invoke response across model families. */
|
|
399
|
+
function parseBedrockResponse(item: BatchItem, raw: unknown): BatchResult {
|
|
400
|
+
const data = raw as {
|
|
495
401
|
content?: Array<{ type: string; text?: string }>
|
|
496
402
|
usage?: { input_tokens: number; output_tokens: number }
|
|
497
|
-
// Titan format
|
|
498
403
|
results?: Array<{ outputText: string; tokenCount: number }>
|
|
499
|
-
// Llama/Mistral format
|
|
500
404
|
generation?: string
|
|
501
405
|
generation_token_count?: number
|
|
502
406
|
prompt_token_count?: number
|
|
503
407
|
}
|
|
504
408
|
|
|
505
|
-
// Extract content based on model response format
|
|
506
409
|
let content: string | undefined
|
|
507
410
|
let usage: { promptTokens: number; completionTokens: number; totalTokens: number } | undefined
|
|
508
411
|
|
|
509
412
|
if (data.content) {
|
|
510
|
-
|
|
511
|
-
const textContent = data.content.find((c) => c.type === 'text')
|
|
512
|
-
content = textContent?.text
|
|
413
|
+
content = data.content.find((c) => c.type === 'text')?.text
|
|
513
414
|
if (data.usage) {
|
|
514
415
|
usage = {
|
|
515
416
|
promptTokens: data.usage.input_tokens,
|
|
@@ -518,15 +419,13 @@ async function processBedrockItem(
|
|
|
518
419
|
}
|
|
519
420
|
}
|
|
520
421
|
} else if (data.results?.[0]) {
|
|
521
|
-
// Titan format
|
|
522
422
|
content = data.results[0].outputText
|
|
523
423
|
usage = {
|
|
524
|
-
promptTokens: 0,
|
|
424
|
+
promptTokens: 0,
|
|
525
425
|
completionTokens: data.results[0].tokenCount || 0,
|
|
526
426
|
totalTokens: data.results[0].tokenCount || 0,
|
|
527
427
|
}
|
|
528
428
|
} else if (data.generation) {
|
|
529
|
-
// Llama/Mistral format
|
|
530
429
|
content = data.generation
|
|
531
430
|
if (data.generation_token_count !== undefined) {
|
|
532
431
|
usage = {
|
|
@@ -537,123 +436,22 @@ async function processBedrockItem(
|
|
|
537
436
|
}
|
|
538
437
|
}
|
|
539
438
|
|
|
540
|
-
let result: unknown = content
|
|
541
|
-
|
|
542
|
-
// Try to parse JSON if schema was provided
|
|
543
|
-
if (item.schema && content) {
|
|
544
|
-
try {
|
|
545
|
-
result = JSON.parse(content)
|
|
546
|
-
} catch {
|
|
547
|
-
// Keep as string
|
|
548
|
-
}
|
|
549
|
-
}
|
|
550
|
-
|
|
551
|
-
return {
|
|
552
|
-
id: item.id,
|
|
553
|
-
customId: item.id,
|
|
554
|
-
status: 'completed',
|
|
555
|
-
result,
|
|
556
|
-
usage,
|
|
557
|
-
}
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
/**
|
|
561
|
-
* Process a Bedrock item via Cloudflare AI Gateway
|
|
562
|
-
*
|
|
563
|
-
* NOTE: Unlike OpenAI and Google, Bedrock via AI Gateway still requires AWS Signature V4 signing.
|
|
564
|
-
* The gateway routes the request but doesn't handle authentication.
|
|
565
|
-
* @see https://developers.cloudflare.com/ai-gateway/usage/providers/bedrock/
|
|
566
|
-
*
|
|
567
|
-
* Gateway URL format: {gateway_url}/aws-bedrock/bedrock-runtime/{region}/model/{model}/invoke
|
|
568
|
-
*/
|
|
569
|
-
async function processBedrockItemViaGateway(
|
|
570
|
-
item: BatchItem,
|
|
571
|
-
config: ReturnType<typeof getConfig>,
|
|
572
|
-
model: string
|
|
573
|
-
): Promise<BatchResult> {
|
|
574
|
-
// AI Gateway URL for Bedrock - requires full path including region
|
|
575
|
-
// Format: {gateway_url}/aws-bedrock/bedrock-runtime/{region}/model/{model}/invoke
|
|
576
|
-
const url = `${config.gatewayUrl}/aws-bedrock/bedrock-runtime/${config.region}/model/${encodeURIComponent(model)}/invoke`
|
|
577
|
-
|
|
578
|
-
// Build the request body (Anthropic format for Claude models)
|
|
579
|
-
const body: Record<string, unknown> = {
|
|
580
|
-
anthropic_version: 'bedrock-2023-05-31',
|
|
581
|
-
max_tokens: item.options?.maxTokens || 4096,
|
|
582
|
-
messages: [{ role: 'user', content: item.prompt }],
|
|
583
|
-
system: item.options?.system,
|
|
584
|
-
temperature: item.options?.temperature,
|
|
585
|
-
}
|
|
586
|
-
|
|
587
|
-
const bodyStr = JSON.stringify(body)
|
|
588
|
-
|
|
589
|
-
// NOTE: Bedrock via Gateway still requires AWS SigV4 signing
|
|
590
|
-
// We need both the gateway token AND AWS credentials
|
|
591
|
-
if (!config.accessKeyId || !config.secretAccessKey) {
|
|
592
|
-
throw new Error(
|
|
593
|
-
'Bedrock via AI Gateway still requires AWS credentials for SigV4 signing. ' +
|
|
594
|
-
'Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.'
|
|
595
|
-
)
|
|
596
|
-
}
|
|
597
|
-
|
|
598
|
-
const headers = await signRequest('POST', url, bodyStr, config, 'bedrock')
|
|
599
|
-
headers.set('cf-aig-authorization', `Bearer ${config.gatewayToken}`)
|
|
600
|
-
|
|
601
|
-
const response = await fetch(url, {
|
|
602
|
-
method: 'POST',
|
|
603
|
-
headers,
|
|
604
|
-
body: bodyStr,
|
|
605
|
-
})
|
|
606
|
-
|
|
607
|
-
if (!response.ok) {
|
|
608
|
-
const error = await response.text()
|
|
609
|
-
throw new Error(`Bedrock via Gateway error: ${response.status} ${error}`)
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
const data = await response.json() as {
|
|
613
|
-
content?: Array<{ type: string; text?: string }>
|
|
614
|
-
usage?: { input_tokens: number; output_tokens: number }
|
|
615
|
-
}
|
|
616
|
-
|
|
617
|
-
// Extract content (Anthropic format)
|
|
618
|
-
const textContent = data.content?.find((c) => c.type === 'text')
|
|
619
|
-
let content = textContent?.text
|
|
620
|
-
let usage: { promptTokens: number; completionTokens: number; totalTokens: number } | undefined
|
|
621
|
-
|
|
622
|
-
if (data.usage) {
|
|
623
|
-
usage = {
|
|
624
|
-
promptTokens: data.usage.input_tokens,
|
|
625
|
-
completionTokens: data.usage.output_tokens,
|
|
626
|
-
totalTokens: data.usage.input_tokens + data.usage.output_tokens,
|
|
627
|
-
}
|
|
628
|
-
}
|
|
629
|
-
|
|
630
|
-
let result: unknown = content
|
|
631
|
-
|
|
632
|
-
// Try to parse JSON if schema was provided
|
|
633
|
-
if (item.schema && content) {
|
|
634
|
-
try {
|
|
635
|
-
result = JSON.parse(content)
|
|
636
|
-
} catch {
|
|
637
|
-
// Keep as string
|
|
638
|
-
}
|
|
639
|
-
}
|
|
640
|
-
|
|
641
439
|
return {
|
|
642
440
|
id: item.id,
|
|
643
441
|
customId: item.id,
|
|
644
442
|
status: 'completed',
|
|
645
|
-
result,
|
|
646
|
-
usage,
|
|
443
|
+
result: tryParseJson(content, !!item.schema),
|
|
444
|
+
...(usage && { usage }),
|
|
647
445
|
}
|
|
648
446
|
}
|
|
649
447
|
|
|
650
448
|
// ============================================================================
|
|
651
|
-
// True
|
|
449
|
+
// True S3-based batch inference (separate from the BatchProvider adapter)
|
|
652
450
|
// ============================================================================
|
|
653
451
|
|
|
654
452
|
/**
|
|
655
|
-
* Create and submit a true Bedrock batch inference job
|
|
656
|
-
*
|
|
453
|
+
* Create and submit a true Bedrock batch inference job.
|
|
454
|
+
* Requires S3 bucket access and proper IAM setup.
|
|
657
455
|
*/
|
|
658
456
|
export async function createBedrockBatchJob(
|
|
659
457
|
items: BatchItem[],
|
|
@@ -667,7 +465,6 @@ export async function createBedrockBatchJob(
|
|
|
667
465
|
): Promise<{ jobArn: string }> {
|
|
668
466
|
const config = getConfig()
|
|
669
467
|
|
|
670
|
-
// Build JSONL content
|
|
671
468
|
const jsonlLines = items.map((item) => {
|
|
672
469
|
const request: BedrockBatchRequest = {
|
|
673
470
|
recordId: item.id,
|
|
@@ -675,8 +472,10 @@ export async function createBedrockBatchJob(
|
|
|
675
472
|
anthropic_version: 'bedrock-2023-05-31',
|
|
676
473
|
max_tokens: item.options?.maxTokens || 4096,
|
|
677
474
|
messages: [{ role: 'user', content: item.prompt }],
|
|
678
|
-
system: item.options
|
|
679
|
-
|
|
475
|
+
...(item.options?.system !== undefined && { system: item.options.system }),
|
|
476
|
+
...(item.options?.temperature !== undefined && {
|
|
477
|
+
temperature: item.options.temperature,
|
|
478
|
+
}),
|
|
680
479
|
},
|
|
681
480
|
}
|
|
682
481
|
return JSON.stringify(request)
|
|
@@ -685,8 +484,6 @@ export async function createBedrockBatchJob(
|
|
|
685
484
|
const inputKey = `${options.s3InputPrefix || 'bedrock-batch/input'}/${options.jobName}.jsonl`
|
|
686
485
|
const outputPrefix = `${options.s3OutputPrefix || 'bedrock-batch/output'}/${options.jobName}/`
|
|
687
486
|
|
|
688
|
-
// Upload to S3
|
|
689
|
-
// In production, use @aws-sdk/client-s3
|
|
690
487
|
const s3Url = `https://${config.bucket}.s3.${config.region}.amazonaws.com/${inputKey}`
|
|
691
488
|
const content = jsonlLines.join('\n')
|
|
692
489
|
|
|
@@ -700,21 +497,16 @@ export async function createBedrockBatchJob(
|
|
|
700
497
|
throw new Error(`Failed to upload to S3: ${s3Response.status}`)
|
|
701
498
|
}
|
|
702
499
|
|
|
703
|
-
// Create batch inference job
|
|
704
500
|
const jobUrl = `https://bedrock.${config.region}.amazonaws.com/model-invocation-job`
|
|
705
501
|
const jobBody = JSON.stringify({
|
|
706
502
|
jobName: options.jobName,
|
|
707
503
|
modelId: model,
|
|
708
504
|
roleArn: options.roleArn,
|
|
709
505
|
inputDataConfig: {
|
|
710
|
-
s3InputDataConfig: {
|
|
711
|
-
s3Uri: `s3://${config.bucket}/${inputKey}`,
|
|
712
|
-
},
|
|
506
|
+
s3InputDataConfig: { s3Uri: `s3://${config.bucket}/${inputKey}` },
|
|
713
507
|
},
|
|
714
508
|
outputDataConfig: {
|
|
715
|
-
s3OutputDataConfig: {
|
|
716
|
-
s3Uri: `s3://${config.bucket}/${outputPrefix}`,
|
|
717
|
-
},
|
|
509
|
+
s3OutputDataConfig: { s3Uri: `s3://${config.bucket}/${outputPrefix}` },
|
|
718
510
|
},
|
|
719
511
|
})
|
|
720
512
|
|
|
@@ -729,70 +521,27 @@ export async function createBedrockBatchJob(
|
|
|
729
521
|
throw new Error(`Failed to create Bedrock batch job: ${jobResponse.status} ${error}`)
|
|
730
522
|
}
|
|
731
523
|
|
|
732
|
-
const jobData = await jobResponse.json() as { jobArn: string }
|
|
524
|
+
const jobData = (await jobResponse.json()) as { jobArn: string }
|
|
733
525
|
return jobData
|
|
734
526
|
}
|
|
735
527
|
|
|
736
528
|
// ============================================================================
|
|
737
|
-
//
|
|
738
|
-
// ============================================================================
|
|
739
|
-
|
|
740
|
-
// ============================================================================
|
|
741
|
-
// Bedrock Flex Adapter
|
|
529
|
+
// Bedrock flex adapter (FlexAdapter port)
|
|
742
530
|
// ============================================================================
|
|
743
531
|
|
|
744
|
-
/**
|
|
745
|
-
* AWS Bedrock Flex Adapter
|
|
746
|
-
*
|
|
747
|
-
* Flex processing uses concurrent requests for medium-sized batches (5-500 items).
|
|
748
|
-
* This provides a balance between:
|
|
749
|
-
* - Immediate execution (fast but full price, <5 items)
|
|
750
|
-
* - Full batch inference (50% discount but 24hr turnaround, 500+ items)
|
|
751
|
-
*
|
|
752
|
-
* Flex tier uses concurrent API calls with rate limiting, providing results
|
|
753
|
-
* in minutes rather than hours while still benefiting from efficient processing.
|
|
754
|
-
*/
|
|
755
532
|
const bedrockFlexAdapter: FlexAdapter = {
|
|
756
533
|
async submitFlex(items: BatchItem[], options: { model?: string }): Promise<BatchResult[]> {
|
|
757
534
|
const config = getConfig()
|
|
758
535
|
const model = options.model || 'anthropic.claude-3-sonnet-20240229-v1:0'
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
// Process items concurrently with rate limiting
|
|
764
|
-
for (let i = 0; i < items.length; i += CONCURRENCY) {
|
|
765
|
-
const batch = items.slice(i, i + CONCURRENCY)
|
|
766
|
-
|
|
767
|
-
const batchResults = await Promise.all(
|
|
768
|
-
batch.map(async (item) => {
|
|
769
|
-
try {
|
|
770
|
-
return await processBedrockItem(item, config, model)
|
|
771
|
-
} catch (error) {
|
|
772
|
-
return {
|
|
773
|
-
id: item.id,
|
|
774
|
-
customId: item.id,
|
|
775
|
-
status: 'failed' as const,
|
|
776
|
-
error: error instanceof Error ? error.message : 'Unknown error',
|
|
777
|
-
}
|
|
778
|
-
}
|
|
779
|
-
})
|
|
780
|
-
)
|
|
781
|
-
|
|
782
|
-
results.push(...batchResults)
|
|
783
|
-
|
|
784
|
-
// Add delay between batches to respect rate limits
|
|
785
|
-
if (i + CONCURRENCY < items.length) {
|
|
786
|
-
await new Promise((resolve) => setTimeout(resolve, 500))
|
|
787
|
-
}
|
|
788
|
-
}
|
|
789
|
-
|
|
790
|
-
return results
|
|
536
|
+
return processConcurrently(items, (item) => processBedrockItem(item, config, model), {
|
|
537
|
+
concurrency: 8,
|
|
538
|
+
delayBetweenWaves: 500,
|
|
539
|
+
})
|
|
791
540
|
},
|
|
792
541
|
}
|
|
793
542
|
|
|
794
543
|
// ============================================================================
|
|
795
|
-
// Register
|
|
544
|
+
// Register adapters
|
|
796
545
|
// ============================================================================
|
|
797
546
|
|
|
798
547
|
registerBatchAdapter('bedrock', bedrockAdapter)
|