@aeye/models 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ReplicateScrape.md +260 -23
- package/dist/scripts/scrape.js +1 -1
- package/dist/scripts/scrape.js.map +1 -1
- package/dist/scripts/scrapers/aws.d.ts.map +1 -1
- package/dist/scripts/scrapers/aws.js +1 -245
- package/dist/scripts/scrapers/aws.js.map +1 -1
- package/dist/scripts/scrapers/replicate.d.ts +4 -0
- package/dist/scripts/scrapers/replicate.d.ts.map +1 -1
- package/dist/scripts/scrapers/replicate.js +343 -10
- package/dist/scripts/scrapers/replicate.js.map +1 -1
- package/dist/src/models/replicate.d.ts.map +1 -1
- package/dist/src/models/replicate.js +2042 -1880
- package/dist/src/models/replicate.js.map +1 -1
- package/package.json +3 -2
- package/scripts/scrape.ts +1 -1
- package/scripts/scrapers/extract.md +274 -0
- package/scripts/scrapers/replicate.ts +382 -11
- package/src/models/replicate.ts +2042 -1880
|
@@ -7,10 +7,12 @@
|
|
|
7
7
|
import * as fs from 'fs/promises';
|
|
8
8
|
import * as path from 'path';
|
|
9
9
|
import * as url from 'url';
|
|
10
|
+
import { z } from 'zod';
|
|
10
11
|
import Replicate from 'replicate';
|
|
11
12
|
import type { ModelInfo, ModelCapability } from '@aeye/ai';
|
|
12
|
-
import { detectTier } from '@aeye/ai';
|
|
13
|
+
import { AI, detectTier } from '@aeye/ai';
|
|
13
14
|
import { writeModelTS } from '../codegen';
|
|
15
|
+
import { OpenRouterProvider } from 'packages/openrouter/src/openrouter';
|
|
14
16
|
|
|
15
17
|
const __filename = url.fileURLToPath(import.meta.url);
|
|
16
18
|
const __dirname = path.dirname(__filename);
|
|
@@ -177,6 +179,234 @@ function convertReplicateModel(model: ReplicateModelData): ModelInfo {
|
|
|
177
179
|
};
|
|
178
180
|
}
|
|
179
181
|
|
|
182
|
+
const ai = AI
|
|
183
|
+
.with()
|
|
184
|
+
.providers({
|
|
185
|
+
openrouter: new OpenRouterProvider({
|
|
186
|
+
apiKey: process.env.OPENROUTER_API_KEY!,
|
|
187
|
+
hooks: {
|
|
188
|
+
chat: {
|
|
189
|
+
beforeRequest: (a, b, c, d) => {
|
|
190
|
+
console.log('OpenRouter Chat Request:', b);
|
|
191
|
+
},
|
|
192
|
+
},
|
|
193
|
+
},
|
|
194
|
+
}),
|
|
195
|
+
})
|
|
196
|
+
.create({})
|
|
197
|
+
;
|
|
198
|
+
|
|
199
|
+
const typeInfo = await fs.readFile(path.join(__dirname, 'extract.md'), 'utf-8');
|
|
200
|
+
|
|
201
|
+
const extract = ai.prompt({
|
|
202
|
+
name: 'extract',
|
|
203
|
+
description: 'Extract model information & a transformer definition from Replicate model data',
|
|
204
|
+
content: `Extract model information & a transformer definition from the following data.
|
|
205
|
+
|
|
206
|
+
<modelData>
|
|
207
|
+
{{modelData}}
|
|
208
|
+
</modelData>
|
|
209
|
+
|
|
210
|
+
Create a TypeScript implementation of a ModelTransformer for the model.
|
|
211
|
+
Here's an example transformer implementation for reference. This is the exact format to follow:
|
|
212
|
+
<exampleTransformer>
|
|
213
|
+
{
|
|
214
|
+
// this key comes from modelData.owner/modelData.name
|
|
215
|
+
"google/nano-banana": () => ({
|
|
216
|
+
// you must use this ModelTransformer type to avoid TypeScript errors
|
|
217
|
+
const transformer: ModelTransformer = {
|
|
218
|
+
imageGenerate: {
|
|
219
|
+
// only use variable known to be in this request type based on the types below
|
|
220
|
+
convertRequest: async (request, ctx) => ({
|
|
221
|
+
prompt: request.prompt,
|
|
222
|
+
...request.extra,
|
|
223
|
+
}),
|
|
224
|
+
// only use known output schema from the model and only use properties expected on the response type based on the types below
|
|
225
|
+
parseResponse: async (response, ctx) => ({
|
|
226
|
+
images: [{ url: await toURL(response) }],
|
|
227
|
+
}),
|
|
228
|
+
},
|
|
229
|
+
imageEdit: {
|
|
230
|
+
convertRequest: async (request, ctx) => ({
|
|
231
|
+
prompt: request.prompt,
|
|
232
|
+
image_input: [await toURL(request.image)],
|
|
233
|
+
...request.extra,
|
|
234
|
+
}),
|
|
235
|
+
parseResponse: async (response, ctx) => ({
|
|
236
|
+
images: [{ url: await toURL(response) }],
|
|
237
|
+
}),
|
|
238
|
+
},
|
|
239
|
+
};
|
|
240
|
+
return transformer;
|
|
241
|
+
})(),
|
|
242
|
+
}
|
|
243
|
+
</exampleTransformer>
|
|
244
|
+
|
|
245
|
+
<availableFunctions>
|
|
246
|
+
- \`toURL(resource, mimeType?: string, fallback?: string): Promise<string>\` converts to URL string
|
|
247
|
+
- \`toBase64(resource, mimeType?: string, fallback?: string): Promise<string>\`
|
|
248
|
+
- \`toText(resource, fallback?: string): Promise<string>\`
|
|
249
|
+
- \`toStream(resource, fallback?: Readable): Promise<Readable>\`
|
|
250
|
+
- \`toFile(resource, mimeType?: string, filename?: string): Promise<File>\`
|
|
251
|
+
</availableFunctions>
|
|
252
|
+
|
|
253
|
+
Here's how the replicate code looks around the transformer:
|
|
254
|
+
<replicateCode>
|
|
255
|
+
// =============================================================
|
|
256
|
+
// Execute
|
|
257
|
+
// =============================================================
|
|
258
|
+
const { convertRequest, parseResponse } = transformer;
|
|
259
|
+
const client = new Replicate({ /* config */ });
|
|
260
|
+
const input = await convertRequest(request, ctx);
|
|
261
|
+
const output = await client.run(modelId, { input, signal });
|
|
262
|
+
const response = await parseResponse(output, ctx);
|
|
263
|
+
|
|
264
|
+
// =============================================================
|
|
265
|
+
// Stream
|
|
266
|
+
// =============================================================
|
|
267
|
+
const { convertRequest, parseChunk } = transformer;
|
|
268
|
+
const client = new Replicate({ /* config */ });
|
|
269
|
+
const input = await convertRequest(request, ctx);
|
|
270
|
+
for await (const event of client.stream(modelId, { input, signal })) {
|
|
271
|
+
if (signal?.aborted) {
|
|
272
|
+
throw new Error('Request aborted');
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Parse chunk using transformer
|
|
276
|
+
const chunk = await parseChunk(event, ctx);
|
|
277
|
+
yield chunk;
|
|
278
|
+
}
|
|
279
|
+
</replicateCode>
|
|
280
|
+
|
|
281
|
+
Here is type information you need to use to understand the shape of all the types:
|
|
282
|
+
It is EXTREMELY important you only use properties that are guaranteed to be present based on the types below when it comes to transformer, request, response, and chunk types.
|
|
283
|
+
The request parameters in the converted request should only be the properties that are known to be in the input schema for the model. The rest may be passed in via \`extra\` property.
|
|
284
|
+
<typeInfo>
|
|
285
|
+
{{typeInfo}}
|
|
286
|
+
</typeInfo>
|
|
287
|
+
`,
|
|
288
|
+
strict: false,
|
|
289
|
+
schema: z.object({
|
|
290
|
+
transformerImplementation: z.string().describe('The TypeScript implementation of the model handler (without imports or ``` marks)'),
|
|
291
|
+
modelInfo: z.object({
|
|
292
|
+
capabilities: z.array(z.enum([
|
|
293
|
+
'chat',
|
|
294
|
+
'tools',
|
|
295
|
+
'vision',
|
|
296
|
+
'json',
|
|
297
|
+
'structured',
|
|
298
|
+
'streaming',
|
|
299
|
+
'reasoning',
|
|
300
|
+
'image',
|
|
301
|
+
'audio',
|
|
302
|
+
'hearing',
|
|
303
|
+
'embedding',
|
|
304
|
+
'zdr'
|
|
305
|
+
])).describe(`Set of capabilities this model supports:
|
|
306
|
+
|
|
307
|
+
- chat: Text input & output
|
|
308
|
+
- tools: Ability to make tool calls
|
|
309
|
+
- vision: Image inputs
|
|
310
|
+
- json: Supports JSON response format
|
|
311
|
+
- structured: Supports structured output (strict JSON response format)
|
|
312
|
+
- streaming: Supports streaming responses
|
|
313
|
+
- reasoning: Enhanced reasoning capabilities
|
|
314
|
+
- image: Image output
|
|
315
|
+
- audio: Audio output
|
|
316
|
+
- hearing: Audio input
|
|
317
|
+
- embedding: Text -> embedding generation
|
|
318
|
+
- zdr: Supports zero-data-retention through a parameter`),
|
|
319
|
+
tier: z.enum(['flagship', 'efficient', 'legacy', 'experimental']).describe(`Model performance and quality tiers.
|
|
320
|
+
Used for categorizing models by their capabilities and cost.
|
|
321
|
+
|
|
322
|
+
- flagship: Top-tier models with best performance
|
|
323
|
+
- efficient: Smaller, faster, more cost-effective models
|
|
324
|
+
- legacy: Older models, may be deprecated
|
|
325
|
+
- experimental: Preview/beta models`),
|
|
326
|
+
pricing: z.object({
|
|
327
|
+
text: z.object({
|
|
328
|
+
input: z.number(),
|
|
329
|
+
output: z.number(),
|
|
330
|
+
cached: z.number(),
|
|
331
|
+
}).optional(),
|
|
332
|
+
audio: z.object({
|
|
333
|
+
input: z.number().optional(),
|
|
334
|
+
output: z.number().optional(),
|
|
335
|
+
perSecond: z.number().optional(),
|
|
336
|
+
}).optional(),
|
|
337
|
+
image: z.object({
|
|
338
|
+
input: z.number().optional(),
|
|
339
|
+
output: z.array(z.object({
|
|
340
|
+
quality: z.string().describe('Quality level - like low, medium, high'),
|
|
341
|
+
sizes: z.array(z.object({
|
|
342
|
+
width: z.number(),
|
|
343
|
+
height: z.number(),
|
|
344
|
+
cost: z.number().describe('Cost per image in USD'),
|
|
345
|
+
})).describe('Supported image sizes and their costs'),
|
|
346
|
+
})).optional(),
|
|
347
|
+
}).optional(),
|
|
348
|
+
reasoning: z.object({
|
|
349
|
+
input: z.number().optional(),
|
|
350
|
+
output: z.number().optional(),
|
|
351
|
+
cached: z.number().optional(),
|
|
352
|
+
}).optional(),
|
|
353
|
+
embedding: z.object({
|
|
354
|
+
cost: z.number().optional(),
|
|
355
|
+
}).optional(),
|
|
356
|
+
perRequest: z.number().optional().describe('Flat cost per request in USD'),
|
|
357
|
+
}).describe('Pricing information. input/output/cached are per 1 million tokens unless otherwise specified.'),
|
|
358
|
+
contextWindow: z.number(),
|
|
359
|
+
maxOutputTokens: z.number().optional(),
|
|
360
|
+
metrics: z.object({
|
|
361
|
+
tokensPerSecond: z.number().optional().describe('Processing speed in tokens per second'),
|
|
362
|
+
timeToFirstToken: z.number().optional().describe('Time to first token in milliseconds'),
|
|
363
|
+
averageRequestDuration: z.number().optional().describe('Average request duration in milliseconds'),
|
|
364
|
+
accuracyScore: z.number().optional().describe('Accuracy score based on benchmark tests 0-1'),
|
|
365
|
+
}).optional().describe('Performance metrics if available'),
|
|
366
|
+
tokenizer: z.enum(([
|
|
367
|
+
'Other', 'GPT', 'Mistral', 'Llama3', 'Qwen3', 'Qwen', 'Gemini', 'DeepSeek', 'Claude', 'Grok', 'Llama4', 'Llama2', 'Cohere', 'Nova', 'Router'
|
|
368
|
+
])).optional().describe('The tokenizer type used by this model if known, otherwise leave undefined'),
|
|
369
|
+
supportedParameters: z.array(z.enum([
|
|
370
|
+
// Chat Request
|
|
371
|
+
'maxTokens', // max_tokens / max_completion_tokens
|
|
372
|
+
'temperature', // temperature
|
|
373
|
+
'topP', // top_p
|
|
374
|
+
'frequencyPenalty', // frequency_penalty
|
|
375
|
+
'presencePenalty', // presence_penalty
|
|
376
|
+
'stop', // stop
|
|
377
|
+
'seed', // seed
|
|
378
|
+
'responseFormat', // response_format
|
|
379
|
+
'structuredOutput', // structured_outputs
|
|
380
|
+
'tools', // tools
|
|
381
|
+
'toolChoice', // tool_choice
|
|
382
|
+
'logitBias', // logit_bias
|
|
383
|
+
'logProbabilities', // logprobs
|
|
384
|
+
'reason', // reasoning
|
|
385
|
+
// Image
|
|
386
|
+
'imageBackground', // background
|
|
387
|
+
'imageMultiple', // n
|
|
388
|
+
'imageFormat', // output_format ()
|
|
389
|
+
'imageStream', // stream / partial_images
|
|
390
|
+
'imageStyle',
|
|
391
|
+
// Embedding
|
|
392
|
+
'embeddingDimensions', // dimensions
|
|
393
|
+
// Transcription
|
|
394
|
+
'transcribeStream', // stream
|
|
395
|
+
'transcribePrompt', // prompt
|
|
396
|
+
// Speech
|
|
397
|
+
'speechInstructions', // instructions
|
|
398
|
+
])).optional().describe('The supported parameters for this model'),
|
|
399
|
+
}).describe('The extracted model information'),
|
|
400
|
+
}),
|
|
401
|
+
input: ({ modelData }: { model: string, modelData: ReplicateModelData }) => ({
|
|
402
|
+
modelData: JSON.stringify(modelData, null, 2),
|
|
403
|
+
typeInfo,
|
|
404
|
+
}),
|
|
405
|
+
config: (input) => ({
|
|
406
|
+
model: input?.model,
|
|
407
|
+
}),
|
|
408
|
+
})
|
|
409
|
+
|
|
180
410
|
/**
|
|
181
411
|
* Fetch all models from Replicate collections
|
|
182
412
|
*/
|
|
@@ -196,10 +426,17 @@ async function fetchAllModels(apiKey?: string): Promise<ReplicateModelData[]> {
|
|
|
196
426
|
'image-to-text',
|
|
197
427
|
'text-to-speech',
|
|
198
428
|
'speech-to-text',
|
|
199
|
-
'image-to-image',
|
|
200
|
-
'text-to-video',
|
|
201
|
-
'image-restoration',
|
|
202
429
|
'super-resolution',
|
|
430
|
+
'image-editing',
|
|
431
|
+
'ai-face-generator',
|
|
432
|
+
'ai-music-generation',
|
|
433
|
+
'text-recognition-ocr',
|
|
434
|
+
'flux',
|
|
435
|
+
'ai-image-restoration',
|
|
436
|
+
'text-classification',
|
|
437
|
+
'sketch-to-image',
|
|
438
|
+
'embedding-models',
|
|
439
|
+
'vision-models',
|
|
203
440
|
];
|
|
204
441
|
|
|
205
442
|
console.log(`Fetching models from ${collections.length} collections...`);
|
|
@@ -230,7 +467,10 @@ async function fetchAllModels(apiKey?: string): Promise<ReplicateModelData[]> {
|
|
|
230
467
|
}
|
|
231
468
|
}
|
|
232
469
|
|
|
470
|
+
allModels.sort((a, b) => a.name.localeCompare(b.name));
|
|
471
|
+
|
|
233
472
|
console.log(`\n✓ Fetched ${allModels.length} unique Replicate models`);
|
|
473
|
+
|
|
234
474
|
return allModels;
|
|
235
475
|
}
|
|
236
476
|
|
|
@@ -289,7 +529,7 @@ async function fetchSchemasParallel(
|
|
|
289
529
|
return schemasCache;
|
|
290
530
|
}
|
|
291
531
|
|
|
292
|
-
console.log(` ${schemasCache.length} models already have schemas`);
|
|
532
|
+
console.log(` ${Object.keys(schemasCache).length - modelsToFetch.length} models already have schemas`);
|
|
293
533
|
console.log(` Fetching schemas for ${modelsToFetch.length} models...\n`);
|
|
294
534
|
|
|
295
535
|
// Process in batches with concurrency control
|
|
@@ -324,7 +564,15 @@ async function fetchSchemasParallel(
|
|
|
324
564
|
export async function scrapeReplicate(
|
|
325
565
|
outputDir: string,
|
|
326
566
|
cacheDir?: string,
|
|
327
|
-
options: {
|
|
567
|
+
options: {
|
|
568
|
+
concurrency?: number,
|
|
569
|
+
transformers?: boolean,
|
|
570
|
+
transformerModel: string,
|
|
571
|
+
models?: string[],
|
|
572
|
+
n?: number,
|
|
573
|
+
} = {
|
|
574
|
+
transformerModel: 'google/gemini-2.5-flash',
|
|
575
|
+
}
|
|
328
576
|
): Promise<void> {
|
|
329
577
|
const { concurrency = 50 } = options;
|
|
330
578
|
|
|
@@ -339,6 +587,9 @@ export async function scrapeReplicate(
|
|
|
339
587
|
// Fetch all models from collections
|
|
340
588
|
const models = await fetchAllModels(apiKey);
|
|
341
589
|
|
|
590
|
+
// Convert to ModelInfo format
|
|
591
|
+
const modelInfos = models.map(convertReplicateModel);
|
|
592
|
+
|
|
342
593
|
// Create output directory
|
|
343
594
|
await fs.mkdir(outputDir, { recursive: true });
|
|
344
595
|
|
|
@@ -354,12 +605,14 @@ export async function scrapeReplicate(
|
|
|
354
605
|
);
|
|
355
606
|
console.log(`✓ Saved raw Replicate models to replicate-models.json`);
|
|
356
607
|
|
|
608
|
+
let schemasCache: Record<string, ReplicateModelData> = {};
|
|
609
|
+
|
|
357
610
|
// Create cache directory if specified
|
|
358
611
|
if (cacheDir) {
|
|
359
612
|
await fs.mkdir(cacheDir, { recursive: true });
|
|
360
613
|
|
|
361
614
|
// Cache detailed model schemas (for transformer generation)
|
|
362
|
-
|
|
615
|
+
schemasCache = await fetchSchemasParallel(models, apiKey, concurrency);
|
|
363
616
|
|
|
364
617
|
// Save schemas cache
|
|
365
618
|
await fs.writeFile(
|
|
@@ -405,10 +658,112 @@ export async function scrapeReplicate(
|
|
|
405
658
|
);
|
|
406
659
|
console.log(`✓ Saved schema chunk ${chunkIndex} with ${Object.keys(currentChunk).length} models`);
|
|
407
660
|
}
|
|
408
|
-
}
|
|
409
661
|
|
|
410
|
-
|
|
411
|
-
|
|
662
|
+
// Calculate number of unique schemas
|
|
663
|
+
const uniqueSchemas = new Set<string>();
|
|
664
|
+
for (const modelData of Object.values(schemasCache)) {
|
|
665
|
+
if (modelData.latest_version) {
|
|
666
|
+
uniqueSchemas.add(JSON.stringify(modelData.latest_version.openapi_schema.components.schemas));
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
console.log(`\n✓ Found ${uniqueSchemas.size} unique model schemas across ${Object.keys(schemasCache).length} models\n`);
|
|
671
|
+
|
|
672
|
+
// Generate Model Transformers
|
|
673
|
+
if (options.transformers) {
|
|
674
|
+
console.log('=== Generating Model Transformers ===\n');
|
|
675
|
+
|
|
676
|
+
// Create replicate transformers directory
|
|
677
|
+
await fs.mkdir(path.join(cacheDir, 'replicate'), { recursive: true });
|
|
678
|
+
|
|
679
|
+
let transformingModels = models;
|
|
680
|
+
let overwrite = false;
|
|
681
|
+
|
|
682
|
+
// Filter to only models with schemas if transformers are desired
|
|
683
|
+
if (options.models && options.models.length > 0) {
|
|
684
|
+
const modelSet = new Set(options.models);
|
|
685
|
+
transformingModels = models.filter((model) => {
|
|
686
|
+
const modelId = `${model.owner}/${model.name}`;
|
|
687
|
+
return modelSet.has(modelId);
|
|
688
|
+
});
|
|
689
|
+
overwrite = true;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
// If models were not specified, filter out already transformed models
|
|
693
|
+
if (!overwrite) {
|
|
694
|
+
const alreadyTransformed = await fs.readdir(
|
|
695
|
+
path.join(cacheDir, 'replicate'),
|
|
696
|
+
);
|
|
697
|
+
const alreadyTransformedSet = new Set(
|
|
698
|
+
alreadyTransformed.map((filename) => filename.replace('.ts', ''))
|
|
699
|
+
);
|
|
700
|
+
transformingModels = transformingModels.filter((model) => {
|
|
701
|
+
const modelId = `${model.owner}/${model.name}`;
|
|
702
|
+
const transformedFile = modelId.replace(/[^\w]/g, '-') + '.ts';
|
|
703
|
+
return !alreadyTransformedSet.has(transformedFile);
|
|
704
|
+
});
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
// Only first N models if specified
|
|
708
|
+
if (options.n && options.n > 0) {
|
|
709
|
+
transformingModels = models.slice(0, options.n);
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
// Call extractor for each model
|
|
713
|
+
console.log(`Generating transformers for ${transformingModels.length} models...\n`);
|
|
714
|
+
|
|
715
|
+
for (const model of transformingModels) {
|
|
716
|
+
const modelId = `${model.owner}/${model.name}`;
|
|
717
|
+
const transformedFile = modelId.replace(/[^\w]/g, '-') + '.ts';
|
|
718
|
+
const modelData = schemasCache[modelId] || model;
|
|
719
|
+
|
|
720
|
+
console.log(`\n--- Generating transformer for model: ${modelId} ---`);
|
|
721
|
+
|
|
722
|
+
const result = await extract.get('result', { modelData, model: options.transformerModel });
|
|
723
|
+
if (!result) {
|
|
724
|
+
console.log(` ✗ Failed to generate transformer for model: ${modelId}`);
|
|
725
|
+
continue;
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
const converted = convertReplicateModel(modelData);
|
|
729
|
+
|
|
730
|
+
const translatedModelInfo: ModelInfo<'replicate'> = {
|
|
731
|
+
id: converted.id,
|
|
732
|
+
provider: 'replicate',
|
|
733
|
+
name: converted.name,
|
|
734
|
+
contextWindow: result.modelInfo.contextWindow,
|
|
735
|
+
maxOutputTokens: result.modelInfo.maxOutputTokens,
|
|
736
|
+
pricing: result.modelInfo.pricing,
|
|
737
|
+
tier: result.modelInfo.tier,
|
|
738
|
+
capabilities: new Set(result.modelInfo.capabilities),
|
|
739
|
+
metrics: result.modelInfo.metrics,
|
|
740
|
+
tokenizer: result.modelInfo.tokenizer,
|
|
741
|
+
supportedParameters: new Set(result.modelInfo.supportedParameters),
|
|
742
|
+
metadata: converted.metadata,
|
|
743
|
+
};
|
|
744
|
+
|
|
745
|
+
const replaceIndex = modelInfos.findIndex((m) => m.id === translatedModelInfo.id);
|
|
746
|
+
if (replaceIndex >= 0) {
|
|
747
|
+
modelInfos[replaceIndex] = translatedModelInfo;
|
|
748
|
+
} else {
|
|
749
|
+
console.log(` ⚠ ModelInfo for ${modelId} not found in main list, adding new entry`);
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
await fs.writeFile(
|
|
753
|
+
path.join(cacheDir, 'replicate', transformedFile),
|
|
754
|
+
`
|
|
755
|
+
import { toURL, toBase64, toText, toStream, toFile } from '@aeye/core';
|
|
756
|
+
import { ModelTransformer } from '@aeye/ai';
|
|
757
|
+
|
|
758
|
+
const transformer = ${result.transformerImplementation};
|
|
759
|
+
|
|
760
|
+
const modelInfo = ${JSON.stringify(translatedModelInfo, null, 2)};
|
|
761
|
+
|
|
762
|
+
export { transformer, modelInfo };`.trim()
|
|
763
|
+
);
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
}
|
|
412
767
|
|
|
413
768
|
// Save JSON for reference
|
|
414
769
|
await fs.writeFile(
|
|
@@ -441,7 +796,23 @@ if (process.argv[1].endsWith('replicate.ts')) {
|
|
|
441
796
|
? parseInt(concurrencyArg.split('=')[1], 10)
|
|
442
797
|
: 50;
|
|
443
798
|
|
|
444
|
-
|
|
799
|
+
const transformers = args.includes('--transformers');
|
|
800
|
+
|
|
801
|
+
const transformerModelArg = args.find((arg) => arg.startsWith('--transformer-model='));
|
|
802
|
+
const transformerModel = transformerModelArg
|
|
803
|
+
? transformerModelArg.split('=', 2)[1]
|
|
804
|
+
: 'google/gemini-2.5-pro';
|
|
805
|
+
|
|
806
|
+
const modelsArg = args.find((arg) => arg.startsWith('--models='));
|
|
807
|
+
let models: string[] | undefined = undefined;
|
|
808
|
+
if (modelsArg) {
|
|
809
|
+
models = modelsArg.substring(9).split(',').map((m) => m.trim());
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
const nArg = args.find((arg) => arg.startsWith('--n='));;
|
|
813
|
+
const n = nArg ? parseInt(nArg.split('=')[1], 10) : undefined;
|
|
814
|
+
|
|
815
|
+
scrapeReplicate(outputDir, cacheDir, { concurrency, transformers, transformerModel, models, n }).catch((error) => {
|
|
445
816
|
console.error('✗ Replicate scraping failed:', error);
|
|
446
817
|
process.exit(1);
|
|
447
818
|
});
|