@aeye/models 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/ReplicateScrape.md +54 -0
  2. package/dist/scripts/codegen.d.ts +21 -0
  3. package/dist/scripts/codegen.d.ts.map +1 -0
  4. package/dist/scripts/codegen.js +102 -0
  5. package/dist/scripts/codegen.js.map +1 -0
  6. package/dist/scripts/scrape.d.ts +19 -0
  7. package/dist/scripts/scrape.d.ts.map +1 -0
  8. package/dist/scripts/scrape.js +146 -0
  9. package/dist/scripts/scrape.js.map +1 -0
  10. package/dist/scripts/scrapers/__tests__/aws.test.d.ts +8 -0
  11. package/dist/scripts/scrapers/__tests__/aws.test.d.ts.map +1 -0
  12. package/dist/scripts/scrapers/__tests__/aws.test.js +73 -0
  13. package/dist/scripts/scrapers/__tests__/aws.test.js.map +1 -0
  14. package/dist/scripts/scrapers/aws.d.ts +12 -0
  15. package/dist/scripts/scrapers/aws.d.ts.map +1 -0
  16. package/dist/scripts/scrapers/aws.js +314 -0
  17. package/dist/scripts/scrapers/aws.js.map +1 -0
  18. package/dist/scripts/scrapers/openai.d.ts +12 -0
  19. package/dist/scripts/scrapers/openai.d.ts.map +1 -0
  20. package/dist/scripts/scrapers/openai.js +490 -0
  21. package/dist/scripts/scrapers/openai.js.map +1 -0
  22. package/dist/scripts/scrapers/openrouter.d.ts +13 -0
  23. package/dist/scripts/scrapers/openrouter.d.ts.map +1 -0
  24. package/dist/scripts/scrapers/openrouter.js +156 -0
  25. package/dist/scripts/scrapers/openrouter.js.map +1 -0
  26. package/dist/scripts/scrapers/replicate.d.ts +12 -0
  27. package/dist/scripts/scrapers/replicate.d.ts.map +1 -0
  28. package/dist/scripts/scrapers/replicate.js +305 -0
  29. package/dist/scripts/scrapers/replicate.js.map +1 -0
  30. package/dist/src/index.d.ts +11 -0
  31. package/dist/src/index.d.ts.map +1 -0
  32. package/dist/src/index.js +11 -0
  33. package/dist/src/index.js.map +1 -0
  34. package/dist/src/models/aws.d.ts +11 -0
  35. package/dist/src/models/aws.d.ts.map +1 -0
  36. package/dist/src/models/aws.js +2632 -0
  37. package/dist/src/models/aws.js.map +1 -0
  38. package/dist/src/models/index.d.ts +15 -0
  39. package/dist/src/models/index.d.ts.map +1 -0
  40. package/dist/src/models/index.js +18 -0
  41. package/dist/src/models/index.js.map +1 -0
  42. package/dist/src/models/openai.d.ts +11 -0
  43. package/dist/src/models/openai.d.ts.map +1 -0
  44. package/dist/src/models/openai.js +2207 -0
  45. package/dist/src/models/openai.js.map +1 -0
  46. package/dist/src/models/openrouter.d.ts +11 -0
  47. package/dist/src/models/openrouter.d.ts.map +1 -0
  48. package/dist/src/models/openrouter.js +9786 -0
  49. package/dist/src/models/openrouter.js.map +1 -0
  50. package/dist/src/models/replicate.d.ts +11 -0
  51. package/dist/src/models/replicate.d.ts.map +1 -0
  52. package/dist/src/models/replicate.js +4106 -0
  53. package/dist/src/models/replicate.js.map +1 -0
  54. package/dist/src/transformers/index.d.ts +23 -0
  55. package/dist/src/transformers/index.d.ts.map +1 -0
  56. package/dist/src/transformers/index.js +24 -0
  57. package/dist/src/transformers/index.js.map +1 -0
  58. package/package.json +50 -0
  59. package/scripts/codegen.ts +117 -0
  60. package/scripts/scrape.ts +182 -0
  61. package/scripts/scrapers/__tests__/aws.test.ts +86 -0
  62. package/scripts/scrapers/aws.ts +370 -0
  63. package/scripts/scrapers/openai.ts +619 -0
  64. package/scripts/scrapers/openrouter.ts +214 -0
  65. package/scripts/scrapers/replicate.ts +448 -0
  66. package/scripts/tsconfig.json +24 -0
  67. package/src/index.ts +11 -0
  68. package/src/models/aws.ts +2634 -0
  69. package/src/models/index.ts +21 -0
  70. package/src/models/openai.ts +2209 -0
  71. package/src/models/openrouter.ts +9788 -0
  72. package/src/models/replicate.ts +4108 -0
  73. package/src/transformers/index.ts +26 -0
  74. package/tsconfig.json +14 -0
@@ -0,0 +1,214 @@
1
+ /**
2
+ * OpenRouter Model Scraper
3
+ *
4
+ * Fetches model information from OpenRouter API endpoints and scrapes performance metrics
5
+ */
6
+
7
+ import * as fs from 'fs/promises';
8
+ import * as path from 'path';
9
+ import * as puppeteer from 'puppeteer';
10
+ import * as url from 'url';
11
+ import { fetchModels, fetchZDRModels, convertOpenRouterModel } from '@aeye/openrouter';
12
+ import { writeModelTS } from '../codegen';
13
+
14
+ const __filename = url.fileURLToPath(import.meta.url);
15
+ const __dirname = path.dirname(__filename);
16
+
17
+ /**
18
+ * Scrape performance metrics from OpenRouter model page
19
+ */
20
+ async function scrapeModelMetrics(modelId: string, browser: puppeteer.Browser): Promise<{
21
+ modelId: string;
22
+ metrics: {
23
+ latency?: number;
24
+ throughput?: number;
25
+ uptime?: number;
26
+ } | null;
27
+ }> {
28
+ const page = await browser.newPage();
29
+
30
+ try {
31
+ // OpenRouter model URLs use the canonical slug format
32
+ const url = `https://openrouter.ai/${modelId}`;
33
+ const response = await page.goto(url, {
34
+ waitUntil: ['domcontentloaded', 'networkidle2'],
35
+ timeout: 30000,
36
+ });
37
+
38
+ if (!response || response.status() === 404) {
39
+ return { modelId, metrics: null };
40
+ }
41
+
42
+ const bodyText = await page.$eval('body', el => el.textContent || '');
43
+ const metrics: { latency?: number; throughput?: number; uptime?: number } = {};
44
+
45
+ // Latency0.62sThroughput47.46tpsUptime100.0%Uptime 100.0
46
+
47
+ // Look for latency (in ms)
48
+ const latencyMatch = bodyText.match(/latency[:\s]*([0-9.]+)\s*s/i);
49
+ if (latencyMatch) {
50
+ metrics.latency = parseFloat(latencyMatch[1]);
51
+ }
52
+
53
+ // Look for throughput (tokens/second)
54
+ const throughputMatch = bodyText.match(/throughput[:\s]*([0-9.]+)\s*tps/i);
55
+ if (throughputMatch) {
56
+ metrics.throughput = parseFloat(throughputMatch[1]);
57
+ }
58
+
59
+ // Look for uptime (percentage)
60
+ const uptimeMatch = bodyText.match(/uptime[:\s]*([0-9.]+)%/i);
61
+ if (uptimeMatch) {
62
+ metrics.uptime = parseFloat(uptimeMatch[1]);
63
+ }
64
+
65
+ if (!latencyMatch || !throughputMatch || !uptimeMatch) {
66
+ console.log(`⚠ No metrics found on page ${modelId}`);
67
+ await fs.writeFile(`./data/pages/openrouter-${modelId.replace(/[^a-z]/gi, '')}.html`, bodyText);
68
+ }
69
+
70
+ return { modelId, metrics };
71
+ } catch (error) {
72
+ console.log(` ✗ Error scraping metrics for ${modelId}:`, error instanceof Error ? error.message : error);
73
+ return { modelId, metrics: null };
74
+ } finally {
75
+ await page.close();
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Scrape metrics for multiple models in parallel with concurrency control
81
+ */
82
+ async function scrapeMetricsParallel(
83
+ modelIds: string[],
84
+ concurrency: number = 5
85
+ ): Promise<Map<string, { latency?: number; throughput?: number; uptime?: number }>> {
86
+ console.log(`\nScraping performance metrics from OpenRouter model pages (concurrency: ${concurrency})...`);
87
+
88
+ const browser = await puppeteer.launch({ headless: true });
89
+ const results = new Map<string, { latency?: number; throughput?: number; uptime?: number }>();
90
+
91
+ try {
92
+ // Process in batches with concurrency control
93
+ for (let i = 0; i < modelIds.length; i += concurrency) {
94
+ const batch = modelIds.slice(i, i + concurrency);
95
+ console.log(` Processing batch ${Math.floor(i / concurrency) + 1}/${Math.ceil(modelIds.length / concurrency)} (${batch.length} models)...`);
96
+
97
+ const batchResults = await Promise.all(
98
+ batch.map((modelId) => scrapeModelMetrics(modelId, browser))
99
+ );
100
+
101
+ // Store results
102
+ for (const { modelId, metrics } of batchResults) {
103
+ if (metrics) {
104
+ results.set(modelId, metrics);
105
+ }
106
+ }
107
+
108
+ console.log(` ✓ Scraped ${batchResults.filter((r) => r.metrics).length}/${batch.length} models`);
109
+ }
110
+
111
+ console.log(`✓ Scraped metrics for ${results.size}/${modelIds.length} models\n`);
112
+ } finally {
113
+ await browser.close();
114
+ }
115
+
116
+ return results;
117
+ }
118
+
119
+ /**
120
+ * Main scraper function
121
+ */
122
+ export async function scrapeOpenRouter(
123
+ outputDir: string,
124
+ options: { metrics?: boolean; concurrency?: number } = {}
125
+ ): Promise<void> {
126
+ const { metrics: scrapeMetrics = false, concurrency = 5 } = options;
127
+
128
+ console.log('\n=== OpenRouter Scraper ===\n');
129
+
130
+ // Fetch models using existing functions
131
+ const [models, zdrModelIds] = await Promise.all([
132
+ fetchModels(process.env.OPENROUTER_API_KEY),
133
+ fetchZDRModels(process.env.OPENROUTER_API_KEY),
134
+ ]);
135
+
136
+ console.log(`✓ Fetched ${models.length} OpenRouter models`);
137
+ console.log(`✓ Fetched ${zdrModelIds.size} ZDR model IDs`);
138
+
139
+ // Save raw data
140
+ await fs.mkdir(outputDir, { recursive: true });
141
+
142
+ await fs.writeFile(
143
+ path.join(outputDir, 'openrouter-models.json'),
144
+ JSON.stringify({ data: models }, (key, value) => {
145
+ if (value instanceof Set) {
146
+ return Array.from(value);
147
+ }
148
+ return value;
149
+ }, 2)
150
+ );
151
+ console.log(`✓ Saved raw OpenRouter models to openrouter-models.json`);
152
+
153
+ if (zdrModelIds.size > 0) {
154
+ await fs.writeFile(
155
+ path.join(outputDir, 'openrouter-zdr.json'),
156
+ JSON.stringify({ data: Array.from(zdrModelIds) }, (key, value) => {
157
+ if (value instanceof Set) {
158
+ return Array.from(value);
159
+ }
160
+ return value;
161
+ }, 2)
162
+ );
163
+ console.log(`✓ Saved ZDR model IDs to openrouter-zdr.json`);
164
+ }
165
+
166
+ // Scrape performance metrics if requested
167
+ let metricsMap = new Map<string, { latency?: number; throughput?: number; uptime?: number }>();
168
+
169
+ if (scrapeMetrics) {
170
+ const modelIds = models.map((m) => m.id);
171
+ metricsMap = await scrapeMetricsParallel(modelIds, concurrency);
172
+ }
173
+
174
+ // Convert to ModelInfo format
175
+ const modelInfos = models.map((model) =>
176
+ convertOpenRouterModel(model, zdrModelIds, metricsMap.get(model.id))
177
+ );
178
+
179
+ // Save JSON for reference
180
+ await fs.writeFile(
181
+ path.join(outputDir, 'openrouter-modelinfo.json'),
182
+ JSON.stringify(modelInfos, (key, value) => {
183
+ if (value instanceof Set) {
184
+ return Array.from(value);
185
+ }
186
+ return value;
187
+ }, 2)
188
+ );
189
+ console.log(`✓ Saved ${modelInfos.length} models to JSON`);
190
+
191
+ // Generate TypeScript file
192
+ const srcDir = path.join(__dirname, '../../src/models');
193
+ await writeModelTS(modelInfos, 'openrouterModels', path.join(srcDir, 'openrouter.ts'));
194
+ console.log(`✓ Generated TypeScript file: src/models/openrouter.ts`);
195
+
196
+ console.log('\n✓ OpenRouter scraping complete\n');
197
+ }
198
+
199
+ // CLI execution
200
+ if (process.argv[1].endsWith('openrouter.ts')) {
201
+ const args = process.argv.slice(2);
202
+ const outputDir = args.find((arg) => !arg.startsWith('--')) || path.join(__dirname, '../../data');
203
+ const scrapeMetrics = args.includes('--metrics');
204
+
205
+ const concurrencyArg = args.find((arg) => arg.startsWith('--concurrency='));
206
+ const concurrency = concurrencyArg
207
+ ? parseInt(concurrencyArg.split('=')[1], 10)
208
+ : 5;
209
+
210
+ scrapeOpenRouter(outputDir, { metrics: scrapeMetrics, concurrency }).catch((error) => {
211
+ console.error('✗ OpenRouter scraping failed:', error);
212
+ process.exit(1);
213
+ });
214
+ }
@@ -0,0 +1,448 @@
1
+ /**
2
+ * Replicate Model Scraper
3
+ *
4
+ * Fetches model information from Replicate API using the Replicate npm module
5
+ */
6
+
7
+ import * as fs from 'fs/promises';
8
+ import * as path from 'path';
9
+ import * as url from 'url';
10
+ import Replicate from 'replicate';
11
+ import type { ModelInfo, ModelCapability } from '@aeye/ai';
12
+ import { detectTier } from '@aeye/ai';
13
+ import { writeModelTS } from '../codegen';
14
+
15
+ const __filename = url.fileURLToPath(import.meta.url);
16
+ const __dirname = path.dirname(__filename);
17
+
18
+ interface ReplicateModelData {
19
+ url: string;
20
+ owner: string;
21
+ name: string;
22
+ description: string | null;
23
+ visibility: string;
24
+ github_url: string | null;
25
+ paper_url: string | null;
26
+ license_url: string | null;
27
+ run_count: number;
28
+ cover_image_url: string | null;
29
+ default_example: {
30
+ model: string;
31
+ version: string;
32
+ input: Record<string, unknown>;
33
+ output: unknown;
34
+ } | null;
35
+ latest_version: {
36
+ id: string;
37
+ created_at: string;
38
+ cog_version: string;
39
+ openapi_schema: {
40
+ info: {
41
+ title: string;
42
+ version: string;
43
+ };
44
+ paths: Record<string, unknown>;
45
+ components: {
46
+ schemas: {
47
+ Input?: {
48
+ type: string;
49
+ properties: Record<string, unknown>;
50
+ required?: string[];
51
+ };
52
+ Output?: {
53
+ type: string;
54
+ properties?: Record<string, unknown>;
55
+ items?: unknown;
56
+ };
57
+ };
58
+ };
59
+ };
60
+ } | null;
61
+ }
62
+
63
+ /**
64
+ * Detect capabilities from model name, description, and schema
65
+ */
66
+ function detectCapabilities(model: ReplicateModelData): Set<ModelCapability> {
67
+ const capabilities = new Set<ModelCapability>();
68
+
69
+ const lowerName = model.name.toLowerCase();
70
+ const lowerDesc = (model.description || '').toLowerCase();
71
+
72
+ // Image generation
73
+ if (
74
+ lowerName.includes('stable-diffusion') ||
75
+ lowerName.includes('sdxl') ||
76
+ lowerName.includes('flux') ||
77
+ lowerName.includes('imagen') ||
78
+ lowerName.includes('midjourney') ||
79
+ lowerName.includes('dalle') ||
80
+ lowerDesc.includes('image generation') ||
81
+ lowerDesc.includes('image edit') ||
82
+ lowerDesc.includes('text-to-image')
83
+ ) {
84
+ capabilities.add('image');
85
+ }
86
+
87
+ // Vision/image input
88
+ if (
89
+ lowerName.includes('vision') ||
90
+ lowerName.includes('image-to-text') ||
91
+ lowerDesc.includes('image analysis') ||
92
+ lowerDesc.includes('image understanding') ||
93
+ lowerDesc.includes('image edit')
94
+ ) {
95
+ capabilities.add('vision');
96
+ }
97
+
98
+ // Transcription/hearing
99
+ if (
100
+ lowerName.includes('whisper') ||
101
+ lowerName.includes('transcribe') ||
102
+ lowerDesc.includes('speech-to-text') ||
103
+ lowerDesc.includes('transcription')
104
+ ) {
105
+ capabilities.add('hearing');
106
+ }
107
+
108
+ // Speech/audio output
109
+ if (
110
+ lowerName.includes('tts') ||
111
+ lowerName.includes('speech') ||
112
+ lowerName.includes('voice') ||
113
+ lowerDesc.includes('text-to-speech')
114
+ ) {
115
+ capabilities.add('audio');
116
+ }
117
+
118
+ // Embeddings
119
+ if (lowerName.includes('embed') || lowerDesc.includes('embedding')) {
120
+ capabilities.add('embedding');
121
+ }
122
+
123
+ // Chat/language models
124
+ if (
125
+ lowerName.includes('llm') ||
126
+ lowerName.includes('chat') ||
127
+ lowerName.includes('gpt') ||
128
+ lowerName.includes('llama') ||
129
+ lowerName.includes('mistral') ||
130
+ lowerName.includes('gemma') ||
131
+ lowerName.includes('vicuna') ||
132
+ lowerDesc.includes('language model') ||
133
+ lowerDesc.includes('conversational')
134
+ ) {
135
+ capabilities.add('chat');
136
+ capabilities.add('streaming');
137
+ }
138
+
139
+ // If no capabilities detected but has a schema, add chat as default
140
+ if (capabilities.size === 0 && model.latest_version?.openapi_schema) {
141
+ capabilities.add('chat');
142
+ }
143
+
144
+ return capabilities;
145
+ }
146
+
147
+ /**
148
+ * Convert Replicate model to ModelInfo
149
+ */
150
+ function convertReplicateModel(model: ReplicateModelData): ModelInfo {
151
+ const modelId = `${model.owner}/${model.name}`;
152
+ const capabilities = detectCapabilities(model);
153
+ const tier = detectTier(model.name);
154
+
155
+ return {
156
+ provider: 'replicate',
157
+ id: modelId,
158
+ name: model.name,
159
+ capabilities: capabilities, // Already an array
160
+ tier,
161
+ pricing: {},
162
+ contextWindow: 0, // Not consistently available in Replicate API
163
+ maxOutputTokens: undefined,
164
+ metadata: {
165
+ owner: model.owner,
166
+ description: model.description,
167
+ runCount: model.run_count,
168
+ githubUrl: model.github_url,
169
+ // paperUrl: model.paper_url,
170
+ // coverImageUrl: model.cover_image_url,
171
+ visibility: model.visibility,
172
+ source: 'replicate',
173
+ latestVersionId: model.latest_version?.id,
174
+ cogVersion: model.latest_version?.cog_version,
175
+ // schema: model.latest_version?.openapi_schema,
176
+ },
177
+ };
178
+ }
179
+
180
+ /**
181
+ * Fetch all models from Replicate collections
182
+ */
183
+ async function fetchAllModels(apiKey?: string): Promise<ReplicateModelData[]> {
184
+ console.log('Fetching Replicate models...');
185
+
186
+ const client = new Replicate({
187
+ auth: apiKey || process.env.REPLICATE_API_KEY,
188
+ });
189
+
190
+ const allModels: ReplicateModelData[] = [];
191
+ const seenModels = new Set<string>();
192
+
193
+ // Collections to scrape
194
+ const collections = [
195
+ 'text-to-image',
196
+ 'image-to-text',
197
+ 'text-to-speech',
198
+ 'speech-to-text',
199
+ 'image-to-image',
200
+ 'text-to-video',
201
+ 'image-restoration',
202
+ 'super-resolution',
203
+ ];
204
+
205
+ console.log(`Fetching models from ${collections.length} collections...`);
206
+
207
+ for (const collectionSlug of collections) {
208
+ try {
209
+ console.log(` Fetching collection: ${collectionSlug}...`);
210
+
211
+ const collection = await client.collections.get(collectionSlug);
212
+
213
+ if (collection.models) {
214
+ for (const model of collection.models) {
215
+ const modelId = `${(model as any).owner}/${(model as any).name}`;
216
+
217
+ // Skip duplicates
218
+ if (seenModels.has(modelId)) {
219
+ continue;
220
+ }
221
+
222
+ seenModels.add(modelId);
223
+ allModels.push(model as ReplicateModelData);
224
+ }
225
+
226
+ console.log(` ✓ Found ${collection.models.length} models in ${collectionSlug}`);
227
+ }
228
+ } catch (error) {
229
+ console.log(` ⚠ Failed to fetch collection ${collectionSlug}:`, error instanceof Error ? error.message : error);
230
+ }
231
+ }
232
+
233
+ console.log(`\n✓ Fetched ${allModels.length} unique Replicate models`);
234
+ return allModels;
235
+ }
236
+
237
+ /**
238
+ * Fetch model details including schema (for cache)
239
+ */
240
+ async function fetchModelSchema(
241
+ modelId: string,
242
+ client: Replicate
243
+ ): Promise<{
244
+ modelId: string;
245
+ data: ReplicateModelData | null;
246
+ }> {
247
+ try {
248
+ const [owner, name] = modelId.split('/');
249
+ const model = await client.models.get(owner, name);
250
+ return { modelId, data: model as ReplicateModelData };
251
+ } catch (error) {
252
+ console.log(` ⚠ Failed to fetch model ${modelId}:`, error instanceof Error ? error.message : error);
253
+ return { modelId, data: null };
254
+ }
255
+ }
256
+
257
+ /**
258
+ * Fetch schemas in parallel with concurrency control
259
+ */
260
+ async function fetchSchemasParallel(
261
+ models: ReplicateModelData[],
262
+ apiKey: string,
263
+ concurrency: number = 50
264
+ ): Promise<Record<string, ReplicateModelData>> {
265
+ console.log(`\nCaching detailed model schemas (concurrency: ${concurrency})...`);
266
+
267
+ const client = new Replicate({
268
+ auth: apiKey,
269
+ });
270
+
271
+ const schemasCache: Record<string, ReplicateModelData> = {};
272
+
273
+ // First, add models that already have schemas
274
+ for (const model of models) {
275
+ const modelId = `${model.owner}/${model.name}`;
276
+ if (model.latest_version?.openapi_schema) {
277
+ schemasCache[modelId] = model;
278
+ }
279
+ }
280
+
281
+ // Find models that need schema fetching
282
+ const modelsToFetch = models.filter((model) => {
283
+ const modelId = `${model.owner}/${model.name}`;
284
+ return !model.latest_version?.openapi_schema;
285
+ });
286
+
287
+ if (modelsToFetch.length === 0) {
288
+ console.log(`✓ All ${models.length} models already have schemas\n`);
289
+ return schemasCache;
290
+ }
291
+
292
+ console.log(` ${schemasCache.length} models already have schemas`);
293
+ console.log(` Fetching schemas for ${modelsToFetch.length} models...\n`);
294
+
295
+ // Process in batches with concurrency control
296
+ const modelIdsToFetch = modelsToFetch.map((m) => `${m.owner}/${m.name}`);
297
+
298
+ for (let i = 0; i < modelIdsToFetch.length; i += concurrency) {
299
+ const batch = modelIdsToFetch.slice(i, i + concurrency);
300
+ console.log(` Processing batch ${Math.floor(i / concurrency) + 1}/${Math.ceil(modelIdsToFetch.length / concurrency)} (${batch.length} models)...`);
301
+
302
+ const batchResults = await Promise.all(
303
+ batch.map((modelId) => fetchModelSchema(modelId, client))
304
+ );
305
+
306
+ // Store results
307
+ for (const { modelId, data } of batchResults) {
308
+ if (data) {
309
+ schemasCache[modelId] = data;
310
+ }
311
+ }
312
+
313
+ console.log(` ✓ Fetched ${batchResults.filter((r) => r.data).length}/${batch.length} schemas`);
314
+ }
315
+
316
+ console.log(`\n✓ Cached ${Object.keys(schemasCache).length} model schemas\n`);
317
+
318
+ return schemasCache;
319
+ }
320
+
321
+ /**
322
+ * Main scraper function
323
+ */
324
+ export async function scrapeReplicate(
325
+ outputDir: string,
326
+ cacheDir?: string,
327
+ options: { concurrency?: number } = {}
328
+ ): Promise<void> {
329
+ const { concurrency = 50 } = options;
330
+
331
+ console.log('\n=== Replicate Scraper ===\n');
332
+
333
+ const apiKey = process.env.REPLICATE_API_KEY;
334
+ if (!apiKey) {
335
+ console.error('✗ REPLICATE_API_KEY environment variable is required');
336
+ process.exit(1);
337
+ }
338
+
339
+ // Fetch all models from collections
340
+ const models = await fetchAllModels(apiKey);
341
+
342
+ // Create output directory
343
+ await fs.mkdir(outputDir, { recursive: true });
344
+
345
+ // Save raw models data
346
+ await fs.writeFile(
347
+ path.join(outputDir, 'replicate-models.json'),
348
+ JSON.stringify({ data: models }, (key, value) => {
349
+ if (value instanceof Set) {
350
+ return Array.from(value);
351
+ }
352
+ return value;
353
+ }, 2)
354
+ );
355
+ console.log(`✓ Saved raw Replicate models to replicate-models.json`);
356
+
357
+ // Create cache directory if specified
358
+ if (cacheDir) {
359
+ await fs.mkdir(cacheDir, { recursive: true });
360
+
361
+ // Cache detailed model schemas (for transformer generation)
362
+ const schemasCache = await fetchSchemasParallel(models, apiKey, concurrency);
363
+
364
+ // Save schemas cache
365
+ await fs.writeFile(
366
+ path.join(cacheDir, 'replicate-schemas.json'),
367
+ JSON.stringify(schemasCache, (key, value) => {
368
+ if (value instanceof Set) {
369
+ return Array.from(value);
370
+ }
371
+ return value;
372
+ }, 2)
373
+ );
374
+
375
+ console.log(`✓ Saved ${Object.keys(schemasCache).length} model schemas to ${cacheDir}/replicate-schemas.json`);
376
+ console.log(' (This cache file is for transformer generation and should not be committed)');
377
+
378
+ // Chunk it up to 80,000 character files for easier loading later
379
+ const chunkSize = 80000;
380
+ const schemaEntries = Object.entries(schemasCache);
381
+ let currentChunk: Record<string, ReplicateModelData> = {};
382
+ let currentSize = 0;
383
+ let chunkIndex = 1;
384
+ for (const [modelId, modelData] of schemaEntries) {
385
+ const entryString = JSON.stringify({ [modelId]: modelData });
386
+ if (currentSize + entryString.length > chunkSize && Object.keys(currentChunk).length > 0) {
387
+ // Save current chunk
388
+ await fs.writeFile(
389
+ path.join(cacheDir, `replicate-schemas-chunk-${chunkIndex}.json`),
390
+ JSON.stringify(currentChunk, null, 2)
391
+ );
392
+ console.log(`✓ Saved schema chunk ${chunkIndex} with ${Object.keys(currentChunk).length} models`);
393
+ chunkIndex++;
394
+ currentChunk = {};
395
+ currentSize = 0;
396
+ }
397
+ currentChunk[modelId] = modelData;
398
+ currentSize += entryString.length;
399
+ }
400
+ // Save any remaining chunk
401
+ if (Object.keys(currentChunk).length > 0) {
402
+ await fs.writeFile(
403
+ path.join(cacheDir, `replicate-schemas-chunk-${chunkIndex}.json`),
404
+ JSON.stringify(currentChunk, null, 2)
405
+ );
406
+ console.log(`✓ Saved schema chunk ${chunkIndex} with ${Object.keys(currentChunk).length} models`);
407
+ }
408
+ }
409
+
410
+ // Convert to ModelInfo format
411
+ const modelInfos = models.map(convertReplicateModel);
412
+
413
+ // Save JSON for reference
414
+ await fs.writeFile(
415
+ path.join(outputDir, 'replicate-modelinfo.json'),
416
+ JSON.stringify(modelInfos, (key, value) => {
417
+ if (value instanceof Set) {
418
+ return Array.from(value);
419
+ }
420
+ return value;
421
+ }, 2)
422
+ );
423
+ console.log(`✓ Saved ${modelInfos.length} models to JSON`);
424
+
425
+ // Generate TypeScript file
426
+ const srcDir = path.join(__dirname, '../../src/models');
427
+ await writeModelTS(modelInfos, 'replicateModels', path.join(srcDir, 'replicate.ts'));
428
+ console.log(`✓ Generated TypeScript file: src/models/replicate.ts`);
429
+
430
+ console.log('\n✓ Replicate scraping complete\n');
431
+ }
432
+
433
+ // CLI execution
434
+ if (process.argv[1].endsWith('replicate.ts')) {
435
+ const args = process.argv.slice(2);
436
+ const outputDir = args.find((arg) => !arg.startsWith('--')) || path.join(__dirname, '../../data');
437
+ const cacheDir = args.find((arg, i) => i > 0 && !arg.startsWith('--') && !args[i - 1].startsWith('--')) || path.join(__dirname, '../../cache');
438
+
439
+ const concurrencyArg = args.find((arg) => arg.startsWith('--concurrency='));
440
+ const concurrency = concurrencyArg
441
+ ? parseInt(concurrencyArg.split('=')[1], 10)
442
+ : 50;
443
+
444
+ scrapeReplicate(outputDir, cacheDir, { concurrency }).catch((error) => {
445
+ console.error('✗ Replicate scraping failed:', error);
446
+ process.exit(1);
447
+ });
448
+ }
@@ -0,0 +1,24 @@
1
+ {
2
+ "extends": "../../../tsconfig.base.json",
3
+ "compilerOptions": {
4
+ "module": "ESNext",
5
+ "moduleResolution": "bundler",
6
+ "noEmit": true,
7
+ "baseUrl": "../../..",
8
+ "paths": {
9
+ "@aeye/core": ["./packages/core/src"],
10
+ "@aeye/ai": ["./packages/ai/src"],
11
+ "@aeye/openai": ["./packages/openai/src"],
12
+ "@aeye/openrouter": ["./packages/openrouter/src"],
13
+ "@aeye/replicate": ["./packages/replicate/src"],
14
+ "@aeye/models": ["./packages/models/src"]
15
+ }
16
+ },
17
+ "include": ["scripts/**/*"],
18
+ "exclude": ["node_modules"],
19
+ "references": [
20
+ { "path": "../../ai" },
21
+ { "path": "../../openrouter" },
22
+ { "path": "../../core" }
23
+ ]
24
+ }
package/src/index.ts ADDED
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Generated main index
3
+ *
4
+ * This file is auto-generated by the scraper scripts.
5
+ * Do not edit manually - your changes will be overwritten.
6
+ *
7
+ * To regenerate, run: npm run scrape
8
+ */
9
+
10
+ export { models, openaiModels, openrouterModels, replicateModels } from './models';
11
+ export { transformers } from './transformers';