@aeye/models 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ReplicateScrape.md +54 -0
- package/dist/scripts/codegen.d.ts +21 -0
- package/dist/scripts/codegen.d.ts.map +1 -0
- package/dist/scripts/codegen.js +102 -0
- package/dist/scripts/codegen.js.map +1 -0
- package/dist/scripts/scrape.d.ts +19 -0
- package/dist/scripts/scrape.d.ts.map +1 -0
- package/dist/scripts/scrape.js +146 -0
- package/dist/scripts/scrape.js.map +1 -0
- package/dist/scripts/scrapers/__tests__/aws.test.d.ts +8 -0
- package/dist/scripts/scrapers/__tests__/aws.test.d.ts.map +1 -0
- package/dist/scripts/scrapers/__tests__/aws.test.js +73 -0
- package/dist/scripts/scrapers/__tests__/aws.test.js.map +1 -0
- package/dist/scripts/scrapers/aws.d.ts +12 -0
- package/dist/scripts/scrapers/aws.d.ts.map +1 -0
- package/dist/scripts/scrapers/aws.js +314 -0
- package/dist/scripts/scrapers/aws.js.map +1 -0
- package/dist/scripts/scrapers/openai.d.ts +12 -0
- package/dist/scripts/scrapers/openai.d.ts.map +1 -0
- package/dist/scripts/scrapers/openai.js +490 -0
- package/dist/scripts/scrapers/openai.js.map +1 -0
- package/dist/scripts/scrapers/openrouter.d.ts +13 -0
- package/dist/scripts/scrapers/openrouter.d.ts.map +1 -0
- package/dist/scripts/scrapers/openrouter.js +156 -0
- package/dist/scripts/scrapers/openrouter.js.map +1 -0
- package/dist/scripts/scrapers/replicate.d.ts +12 -0
- package/dist/scripts/scrapers/replicate.d.ts.map +1 -0
- package/dist/scripts/scrapers/replicate.js +305 -0
- package/dist/scripts/scrapers/replicate.js.map +1 -0
- package/dist/src/index.d.ts +11 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +11 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/models/aws.d.ts +11 -0
- package/dist/src/models/aws.d.ts.map +1 -0
- package/dist/src/models/aws.js +2632 -0
- package/dist/src/models/aws.js.map +1 -0
- package/dist/src/models/index.d.ts +15 -0
- package/dist/src/models/index.d.ts.map +1 -0
- package/dist/src/models/index.js +18 -0
- package/dist/src/models/index.js.map +1 -0
- package/dist/src/models/openai.d.ts +11 -0
- package/dist/src/models/openai.d.ts.map +1 -0
- package/dist/src/models/openai.js +2207 -0
- package/dist/src/models/openai.js.map +1 -0
- package/dist/src/models/openrouter.d.ts +11 -0
- package/dist/src/models/openrouter.d.ts.map +1 -0
- package/dist/src/models/openrouter.js +9786 -0
- package/dist/src/models/openrouter.js.map +1 -0
- package/dist/src/models/replicate.d.ts +11 -0
- package/dist/src/models/replicate.d.ts.map +1 -0
- package/dist/src/models/replicate.js +4106 -0
- package/dist/src/models/replicate.js.map +1 -0
- package/dist/src/transformers/index.d.ts +23 -0
- package/dist/src/transformers/index.d.ts.map +1 -0
- package/dist/src/transformers/index.js +24 -0
- package/dist/src/transformers/index.js.map +1 -0
- package/package.json +50 -0
- package/scripts/codegen.ts +117 -0
- package/scripts/scrape.ts +182 -0
- package/scripts/scrapers/__tests__/aws.test.ts +86 -0
- package/scripts/scrapers/aws.ts +370 -0
- package/scripts/scrapers/openai.ts +619 -0
- package/scripts/scrapers/openrouter.ts +214 -0
- package/scripts/scrapers/replicate.ts +448 -0
- package/scripts/tsconfig.json +24 -0
- package/src/index.ts +11 -0
- package/src/models/aws.ts +2634 -0
- package/src/models/index.ts +21 -0
- package/src/models/openai.ts +2209 -0
- package/src/models/openrouter.ts +9788 -0
- package/src/models/replicate.ts +4108 -0
- package/src/transformers/index.ts +26 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenRouter Model Scraper
|
|
3
|
+
*
|
|
4
|
+
* Fetches model information from OpenRouter API endpoints and scrapes performance metrics
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import * as fs from 'fs/promises';
|
|
8
|
+
import * as path from 'path';
|
|
9
|
+
import * as puppeteer from 'puppeteer';
|
|
10
|
+
import * as url from 'url';
|
|
11
|
+
import { fetchModels, fetchZDRModels, convertOpenRouterModel } from '@aeye/openrouter';
|
|
12
|
+
import { writeModelTS } from '../codegen';
|
|
13
|
+
|
|
14
|
+
const __filename = url.fileURLToPath(import.meta.url);
|
|
15
|
+
const __dirname = path.dirname(__filename);
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Scrape performance metrics from OpenRouter model page
|
|
19
|
+
*/
|
|
20
|
+
async function scrapeModelMetrics(modelId: string, browser: puppeteer.Browser): Promise<{
|
|
21
|
+
modelId: string;
|
|
22
|
+
metrics: {
|
|
23
|
+
latency?: number;
|
|
24
|
+
throughput?: number;
|
|
25
|
+
uptime?: number;
|
|
26
|
+
} | null;
|
|
27
|
+
}> {
|
|
28
|
+
const page = await browser.newPage();
|
|
29
|
+
|
|
30
|
+
try {
|
|
31
|
+
// OpenRouter model URLs use the canonical slug format
|
|
32
|
+
const url = `https://openrouter.ai/${modelId}`;
|
|
33
|
+
const response = await page.goto(url, {
|
|
34
|
+
waitUntil: ['domcontentloaded', 'networkidle2'],
|
|
35
|
+
timeout: 30000,
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
if (!response || response.status() === 404) {
|
|
39
|
+
return { modelId, metrics: null };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const bodyText = await page.$eval('body', el => el.textContent || '');
|
|
43
|
+
const metrics: { latency?: number; throughput?: number; uptime?: number } = {};
|
|
44
|
+
|
|
45
|
+
// Latency0.62sThroughput47.46tpsUptime100.0%Uptime 100.0
|
|
46
|
+
|
|
47
|
+
// Look for latency (in ms)
|
|
48
|
+
const latencyMatch = bodyText.match(/latency[:\s]*([0-9.]+)\s*s/i);
|
|
49
|
+
if (latencyMatch) {
|
|
50
|
+
metrics.latency = parseFloat(latencyMatch[1]);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Look for throughput (tokens/second)
|
|
54
|
+
const throughputMatch = bodyText.match(/throughput[:\s]*([0-9.]+)\s*tps/i);
|
|
55
|
+
if (throughputMatch) {
|
|
56
|
+
metrics.throughput = parseFloat(throughputMatch[1]);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Look for uptime (percentage)
|
|
60
|
+
const uptimeMatch = bodyText.match(/uptime[:\s]*([0-9.]+)%/i);
|
|
61
|
+
if (uptimeMatch) {
|
|
62
|
+
metrics.uptime = parseFloat(uptimeMatch[1]);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (!latencyMatch || !throughputMatch || !uptimeMatch) {
|
|
66
|
+
console.log(`⚠ No metrics found on page ${modelId}`);
|
|
67
|
+
await fs.writeFile(`./data/pages/openrouter-${modelId.replace(/[^a-z]/gi, '')}.html`, bodyText);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return { modelId, metrics };
|
|
71
|
+
} catch (error) {
|
|
72
|
+
console.log(` ✗ Error scraping metrics for ${modelId}:`, error instanceof Error ? error.message : error);
|
|
73
|
+
return { modelId, metrics: null };
|
|
74
|
+
} finally {
|
|
75
|
+
await page.close();
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Scrape metrics for multiple models in parallel with concurrency control
|
|
81
|
+
*/
|
|
82
|
+
async function scrapeMetricsParallel(
|
|
83
|
+
modelIds: string[],
|
|
84
|
+
concurrency: number = 5
|
|
85
|
+
): Promise<Map<string, { latency?: number; throughput?: number; uptime?: number }>> {
|
|
86
|
+
console.log(`\nScraping performance metrics from OpenRouter model pages (concurrency: ${concurrency})...`);
|
|
87
|
+
|
|
88
|
+
const browser = await puppeteer.launch({ headless: true });
|
|
89
|
+
const results = new Map<string, { latency?: number; throughput?: number; uptime?: number }>();
|
|
90
|
+
|
|
91
|
+
try {
|
|
92
|
+
// Process in batches with concurrency control
|
|
93
|
+
for (let i = 0; i < modelIds.length; i += concurrency) {
|
|
94
|
+
const batch = modelIds.slice(i, i + concurrency);
|
|
95
|
+
console.log(` Processing batch ${Math.floor(i / concurrency) + 1}/${Math.ceil(modelIds.length / concurrency)} (${batch.length} models)...`);
|
|
96
|
+
|
|
97
|
+
const batchResults = await Promise.all(
|
|
98
|
+
batch.map((modelId) => scrapeModelMetrics(modelId, browser))
|
|
99
|
+
);
|
|
100
|
+
|
|
101
|
+
// Store results
|
|
102
|
+
for (const { modelId, metrics } of batchResults) {
|
|
103
|
+
if (metrics) {
|
|
104
|
+
results.set(modelId, metrics);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
console.log(` ✓ Scraped ${batchResults.filter((r) => r.metrics).length}/${batch.length} models`);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
console.log(`✓ Scraped metrics for ${results.size}/${modelIds.length} models\n`);
|
|
112
|
+
} finally {
|
|
113
|
+
await browser.close();
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return results;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Main scraper function
|
|
121
|
+
*/
|
|
122
|
+
export async function scrapeOpenRouter(
|
|
123
|
+
outputDir: string,
|
|
124
|
+
options: { metrics?: boolean; concurrency?: number } = {}
|
|
125
|
+
): Promise<void> {
|
|
126
|
+
const { metrics: scrapeMetrics = false, concurrency = 5 } = options;
|
|
127
|
+
|
|
128
|
+
console.log('\n=== OpenRouter Scraper ===\n');
|
|
129
|
+
|
|
130
|
+
// Fetch models using existing functions
|
|
131
|
+
const [models, zdrModelIds] = await Promise.all([
|
|
132
|
+
fetchModels(process.env.OPENROUTER_API_KEY),
|
|
133
|
+
fetchZDRModels(process.env.OPENROUTER_API_KEY),
|
|
134
|
+
]);
|
|
135
|
+
|
|
136
|
+
console.log(`✓ Fetched ${models.length} OpenRouter models`);
|
|
137
|
+
console.log(`✓ Fetched ${zdrModelIds.size} ZDR model IDs`);
|
|
138
|
+
|
|
139
|
+
// Save raw data
|
|
140
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
141
|
+
|
|
142
|
+
await fs.writeFile(
|
|
143
|
+
path.join(outputDir, 'openrouter-models.json'),
|
|
144
|
+
JSON.stringify({ data: models }, (key, value) => {
|
|
145
|
+
if (value instanceof Set) {
|
|
146
|
+
return Array.from(value);
|
|
147
|
+
}
|
|
148
|
+
return value;
|
|
149
|
+
}, 2)
|
|
150
|
+
);
|
|
151
|
+
console.log(`✓ Saved raw OpenRouter models to openrouter-models.json`);
|
|
152
|
+
|
|
153
|
+
if (zdrModelIds.size > 0) {
|
|
154
|
+
await fs.writeFile(
|
|
155
|
+
path.join(outputDir, 'openrouter-zdr.json'),
|
|
156
|
+
JSON.stringify({ data: Array.from(zdrModelIds) }, (key, value) => {
|
|
157
|
+
if (value instanceof Set) {
|
|
158
|
+
return Array.from(value);
|
|
159
|
+
}
|
|
160
|
+
return value;
|
|
161
|
+
}, 2)
|
|
162
|
+
);
|
|
163
|
+
console.log(`✓ Saved ZDR model IDs to openrouter-zdr.json`);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Scrape performance metrics if requested
|
|
167
|
+
let metricsMap = new Map<string, { latency?: number; throughput?: number; uptime?: number }>();
|
|
168
|
+
|
|
169
|
+
if (scrapeMetrics) {
|
|
170
|
+
const modelIds = models.map((m) => m.id);
|
|
171
|
+
metricsMap = await scrapeMetricsParallel(modelIds, concurrency);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Convert to ModelInfo format
|
|
175
|
+
const modelInfos = models.map((model) =>
|
|
176
|
+
convertOpenRouterModel(model, zdrModelIds, metricsMap.get(model.id))
|
|
177
|
+
);
|
|
178
|
+
|
|
179
|
+
// Save JSON for reference
|
|
180
|
+
await fs.writeFile(
|
|
181
|
+
path.join(outputDir, 'openrouter-modelinfo.json'),
|
|
182
|
+
JSON.stringify(modelInfos, (key, value) => {
|
|
183
|
+
if (value instanceof Set) {
|
|
184
|
+
return Array.from(value);
|
|
185
|
+
}
|
|
186
|
+
return value;
|
|
187
|
+
}, 2)
|
|
188
|
+
);
|
|
189
|
+
console.log(`✓ Saved ${modelInfos.length} models to JSON`);
|
|
190
|
+
|
|
191
|
+
// Generate TypeScript file
|
|
192
|
+
const srcDir = path.join(__dirname, '../../src/models');
|
|
193
|
+
await writeModelTS(modelInfos, 'openrouterModels', path.join(srcDir, 'openrouter.ts'));
|
|
194
|
+
console.log(`✓ Generated TypeScript file: src/models/openrouter.ts`);
|
|
195
|
+
|
|
196
|
+
console.log('\n✓ OpenRouter scraping complete\n');
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// CLI execution
|
|
200
|
+
if (process.argv[1].endsWith('openrouter.ts')) {
|
|
201
|
+
const args = process.argv.slice(2);
|
|
202
|
+
const outputDir = args.find((arg) => !arg.startsWith('--')) || path.join(__dirname, '../../data');
|
|
203
|
+
const scrapeMetrics = args.includes('--metrics');
|
|
204
|
+
|
|
205
|
+
const concurrencyArg = args.find((arg) => arg.startsWith('--concurrency='));
|
|
206
|
+
const concurrency = concurrencyArg
|
|
207
|
+
? parseInt(concurrencyArg.split('=')[1], 10)
|
|
208
|
+
: 5;
|
|
209
|
+
|
|
210
|
+
scrapeOpenRouter(outputDir, { metrics: scrapeMetrics, concurrency }).catch((error) => {
|
|
211
|
+
console.error('✗ OpenRouter scraping failed:', error);
|
|
212
|
+
process.exit(1);
|
|
213
|
+
});
|
|
214
|
+
}
|
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Replicate Model Scraper
|
|
3
|
+
*
|
|
4
|
+
* Fetches model information from Replicate API using the Replicate npm module
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import * as fs from 'fs/promises';
|
|
8
|
+
import * as path from 'path';
|
|
9
|
+
import * as url from 'url';
|
|
10
|
+
import Replicate from 'replicate';
|
|
11
|
+
import type { ModelInfo, ModelCapability } from '@aeye/ai';
|
|
12
|
+
import { detectTier } from '@aeye/ai';
|
|
13
|
+
import { writeModelTS } from '../codegen';
|
|
14
|
+
|
|
15
|
+
const __filename = url.fileURLToPath(import.meta.url);
|
|
16
|
+
const __dirname = path.dirname(__filename);
|
|
17
|
+
|
|
18
|
+
interface ReplicateModelData {
|
|
19
|
+
url: string;
|
|
20
|
+
owner: string;
|
|
21
|
+
name: string;
|
|
22
|
+
description: string | null;
|
|
23
|
+
visibility: string;
|
|
24
|
+
github_url: string | null;
|
|
25
|
+
paper_url: string | null;
|
|
26
|
+
license_url: string | null;
|
|
27
|
+
run_count: number;
|
|
28
|
+
cover_image_url: string | null;
|
|
29
|
+
default_example: {
|
|
30
|
+
model: string;
|
|
31
|
+
version: string;
|
|
32
|
+
input: Record<string, unknown>;
|
|
33
|
+
output: unknown;
|
|
34
|
+
} | null;
|
|
35
|
+
latest_version: {
|
|
36
|
+
id: string;
|
|
37
|
+
created_at: string;
|
|
38
|
+
cog_version: string;
|
|
39
|
+
openapi_schema: {
|
|
40
|
+
info: {
|
|
41
|
+
title: string;
|
|
42
|
+
version: string;
|
|
43
|
+
};
|
|
44
|
+
paths: Record<string, unknown>;
|
|
45
|
+
components: {
|
|
46
|
+
schemas: {
|
|
47
|
+
Input?: {
|
|
48
|
+
type: string;
|
|
49
|
+
properties: Record<string, unknown>;
|
|
50
|
+
required?: string[];
|
|
51
|
+
};
|
|
52
|
+
Output?: {
|
|
53
|
+
type: string;
|
|
54
|
+
properties?: Record<string, unknown>;
|
|
55
|
+
items?: unknown;
|
|
56
|
+
};
|
|
57
|
+
};
|
|
58
|
+
};
|
|
59
|
+
};
|
|
60
|
+
} | null;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Detect capabilities from model name, description, and schema
|
|
65
|
+
*/
|
|
66
|
+
function detectCapabilities(model: ReplicateModelData): Set<ModelCapability> {
|
|
67
|
+
const capabilities = new Set<ModelCapability>();
|
|
68
|
+
|
|
69
|
+
const lowerName = model.name.toLowerCase();
|
|
70
|
+
const lowerDesc = (model.description || '').toLowerCase();
|
|
71
|
+
|
|
72
|
+
// Image generation
|
|
73
|
+
if (
|
|
74
|
+
lowerName.includes('stable-diffusion') ||
|
|
75
|
+
lowerName.includes('sdxl') ||
|
|
76
|
+
lowerName.includes('flux') ||
|
|
77
|
+
lowerName.includes('imagen') ||
|
|
78
|
+
lowerName.includes('midjourney') ||
|
|
79
|
+
lowerName.includes('dalle') ||
|
|
80
|
+
lowerDesc.includes('image generation') ||
|
|
81
|
+
lowerDesc.includes('image edit') ||
|
|
82
|
+
lowerDesc.includes('text-to-image')
|
|
83
|
+
) {
|
|
84
|
+
capabilities.add('image');
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Vision/image input
|
|
88
|
+
if (
|
|
89
|
+
lowerName.includes('vision') ||
|
|
90
|
+
lowerName.includes('image-to-text') ||
|
|
91
|
+
lowerDesc.includes('image analysis') ||
|
|
92
|
+
lowerDesc.includes('image understanding') ||
|
|
93
|
+
lowerDesc.includes('image edit')
|
|
94
|
+
) {
|
|
95
|
+
capabilities.add('vision');
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Transcription/hearing
|
|
99
|
+
if (
|
|
100
|
+
lowerName.includes('whisper') ||
|
|
101
|
+
lowerName.includes('transcribe') ||
|
|
102
|
+
lowerDesc.includes('speech-to-text') ||
|
|
103
|
+
lowerDesc.includes('transcription')
|
|
104
|
+
) {
|
|
105
|
+
capabilities.add('hearing');
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Speech/audio output
|
|
109
|
+
if (
|
|
110
|
+
lowerName.includes('tts') ||
|
|
111
|
+
lowerName.includes('speech') ||
|
|
112
|
+
lowerName.includes('voice') ||
|
|
113
|
+
lowerDesc.includes('text-to-speech')
|
|
114
|
+
) {
|
|
115
|
+
capabilities.add('audio');
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Embeddings
|
|
119
|
+
if (lowerName.includes('embed') || lowerDesc.includes('embedding')) {
|
|
120
|
+
capabilities.add('embedding');
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Chat/language models
|
|
124
|
+
if (
|
|
125
|
+
lowerName.includes('llm') ||
|
|
126
|
+
lowerName.includes('chat') ||
|
|
127
|
+
lowerName.includes('gpt') ||
|
|
128
|
+
lowerName.includes('llama') ||
|
|
129
|
+
lowerName.includes('mistral') ||
|
|
130
|
+
lowerName.includes('gemma') ||
|
|
131
|
+
lowerName.includes('vicuna') ||
|
|
132
|
+
lowerDesc.includes('language model') ||
|
|
133
|
+
lowerDesc.includes('conversational')
|
|
134
|
+
) {
|
|
135
|
+
capabilities.add('chat');
|
|
136
|
+
capabilities.add('streaming');
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// If no capabilities detected but has a schema, add chat as default
|
|
140
|
+
if (capabilities.size === 0 && model.latest_version?.openapi_schema) {
|
|
141
|
+
capabilities.add('chat');
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return capabilities;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Convert Replicate model to ModelInfo
|
|
149
|
+
*/
|
|
150
|
+
function convertReplicateModel(model: ReplicateModelData): ModelInfo {
|
|
151
|
+
const modelId = `${model.owner}/${model.name}`;
|
|
152
|
+
const capabilities = detectCapabilities(model);
|
|
153
|
+
const tier = detectTier(model.name);
|
|
154
|
+
|
|
155
|
+
return {
|
|
156
|
+
provider: 'replicate',
|
|
157
|
+
id: modelId,
|
|
158
|
+
name: model.name,
|
|
159
|
+
capabilities: capabilities, // Already an array
|
|
160
|
+
tier,
|
|
161
|
+
pricing: {},
|
|
162
|
+
contextWindow: 0, // Not consistently available in Replicate API
|
|
163
|
+
maxOutputTokens: undefined,
|
|
164
|
+
metadata: {
|
|
165
|
+
owner: model.owner,
|
|
166
|
+
description: model.description,
|
|
167
|
+
runCount: model.run_count,
|
|
168
|
+
githubUrl: model.github_url,
|
|
169
|
+
// paperUrl: model.paper_url,
|
|
170
|
+
// coverImageUrl: model.cover_image_url,
|
|
171
|
+
visibility: model.visibility,
|
|
172
|
+
source: 'replicate',
|
|
173
|
+
latestVersionId: model.latest_version?.id,
|
|
174
|
+
cogVersion: model.latest_version?.cog_version,
|
|
175
|
+
// schema: model.latest_version?.openapi_schema,
|
|
176
|
+
},
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Fetch all models from Replicate collections
|
|
182
|
+
*/
|
|
183
|
+
async function fetchAllModels(apiKey?: string): Promise<ReplicateModelData[]> {
|
|
184
|
+
console.log('Fetching Replicate models...');
|
|
185
|
+
|
|
186
|
+
const client = new Replicate({
|
|
187
|
+
auth: apiKey || process.env.REPLICATE_API_KEY,
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
const allModels: ReplicateModelData[] = [];
|
|
191
|
+
const seenModels = new Set<string>();
|
|
192
|
+
|
|
193
|
+
// Collections to scrape
|
|
194
|
+
const collections = [
|
|
195
|
+
'text-to-image',
|
|
196
|
+
'image-to-text',
|
|
197
|
+
'text-to-speech',
|
|
198
|
+
'speech-to-text',
|
|
199
|
+
'image-to-image',
|
|
200
|
+
'text-to-video',
|
|
201
|
+
'image-restoration',
|
|
202
|
+
'super-resolution',
|
|
203
|
+
];
|
|
204
|
+
|
|
205
|
+
console.log(`Fetching models from ${collections.length} collections...`);
|
|
206
|
+
|
|
207
|
+
for (const collectionSlug of collections) {
|
|
208
|
+
try {
|
|
209
|
+
console.log(` Fetching collection: ${collectionSlug}...`);
|
|
210
|
+
|
|
211
|
+
const collection = await client.collections.get(collectionSlug);
|
|
212
|
+
|
|
213
|
+
if (collection.models) {
|
|
214
|
+
for (const model of collection.models) {
|
|
215
|
+
const modelId = `${(model as any).owner}/${(model as any).name}`;
|
|
216
|
+
|
|
217
|
+
// Skip duplicates
|
|
218
|
+
if (seenModels.has(modelId)) {
|
|
219
|
+
continue;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
seenModels.add(modelId);
|
|
223
|
+
allModels.push(model as ReplicateModelData);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
console.log(` ✓ Found ${collection.models.length} models in ${collectionSlug}`);
|
|
227
|
+
}
|
|
228
|
+
} catch (error) {
|
|
229
|
+
console.log(` ⚠ Failed to fetch collection ${collectionSlug}:`, error instanceof Error ? error.message : error);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
console.log(`\n✓ Fetched ${allModels.length} unique Replicate models`);
|
|
234
|
+
return allModels;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Fetch model details including schema (for cache)
|
|
239
|
+
*/
|
|
240
|
+
async function fetchModelSchema(
|
|
241
|
+
modelId: string,
|
|
242
|
+
client: Replicate
|
|
243
|
+
): Promise<{
|
|
244
|
+
modelId: string;
|
|
245
|
+
data: ReplicateModelData | null;
|
|
246
|
+
}> {
|
|
247
|
+
try {
|
|
248
|
+
const [owner, name] = modelId.split('/');
|
|
249
|
+
const model = await client.models.get(owner, name);
|
|
250
|
+
return { modelId, data: model as ReplicateModelData };
|
|
251
|
+
} catch (error) {
|
|
252
|
+
console.log(` ⚠ Failed to fetch model ${modelId}:`, error instanceof Error ? error.message : error);
|
|
253
|
+
return { modelId, data: null };
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Fetch schemas in parallel with concurrency control
|
|
259
|
+
*/
|
|
260
|
+
async function fetchSchemasParallel(
|
|
261
|
+
models: ReplicateModelData[],
|
|
262
|
+
apiKey: string,
|
|
263
|
+
concurrency: number = 50
|
|
264
|
+
): Promise<Record<string, ReplicateModelData>> {
|
|
265
|
+
console.log(`\nCaching detailed model schemas (concurrency: ${concurrency})...`);
|
|
266
|
+
|
|
267
|
+
const client = new Replicate({
|
|
268
|
+
auth: apiKey,
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
const schemasCache: Record<string, ReplicateModelData> = {};
|
|
272
|
+
|
|
273
|
+
// First, add models that already have schemas
|
|
274
|
+
for (const model of models) {
|
|
275
|
+
const modelId = `${model.owner}/${model.name}`;
|
|
276
|
+
if (model.latest_version?.openapi_schema) {
|
|
277
|
+
schemasCache[modelId] = model;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Find models that need schema fetching
|
|
282
|
+
const modelsToFetch = models.filter((model) => {
|
|
283
|
+
const modelId = `${model.owner}/${model.name}`;
|
|
284
|
+
return !model.latest_version?.openapi_schema;
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
if (modelsToFetch.length === 0) {
|
|
288
|
+
console.log(`✓ All ${models.length} models already have schemas\n`);
|
|
289
|
+
return schemasCache;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
console.log(` ${schemasCache.length} models already have schemas`);
|
|
293
|
+
console.log(` Fetching schemas for ${modelsToFetch.length} models...\n`);
|
|
294
|
+
|
|
295
|
+
// Process in batches with concurrency control
|
|
296
|
+
const modelIdsToFetch = modelsToFetch.map((m) => `${m.owner}/${m.name}`);
|
|
297
|
+
|
|
298
|
+
for (let i = 0; i < modelIdsToFetch.length; i += concurrency) {
|
|
299
|
+
const batch = modelIdsToFetch.slice(i, i + concurrency);
|
|
300
|
+
console.log(` Processing batch ${Math.floor(i / concurrency) + 1}/${Math.ceil(modelIdsToFetch.length / concurrency)} (${batch.length} models)...`);
|
|
301
|
+
|
|
302
|
+
const batchResults = await Promise.all(
|
|
303
|
+
batch.map((modelId) => fetchModelSchema(modelId, client))
|
|
304
|
+
);
|
|
305
|
+
|
|
306
|
+
// Store results
|
|
307
|
+
for (const { modelId, data } of batchResults) {
|
|
308
|
+
if (data) {
|
|
309
|
+
schemasCache[modelId] = data;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
console.log(` ✓ Fetched ${batchResults.filter((r) => r.data).length}/${batch.length} schemas`);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
console.log(`\n✓ Cached ${Object.keys(schemasCache).length} model schemas\n`);
|
|
317
|
+
|
|
318
|
+
return schemasCache;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Main scraper function
|
|
323
|
+
*/
|
|
324
|
+
export async function scrapeReplicate(
|
|
325
|
+
outputDir: string,
|
|
326
|
+
cacheDir?: string,
|
|
327
|
+
options: { concurrency?: number } = {}
|
|
328
|
+
): Promise<void> {
|
|
329
|
+
const { concurrency = 50 } = options;
|
|
330
|
+
|
|
331
|
+
console.log('\n=== Replicate Scraper ===\n');
|
|
332
|
+
|
|
333
|
+
const apiKey = process.env.REPLICATE_API_KEY;
|
|
334
|
+
if (!apiKey) {
|
|
335
|
+
console.error('✗ REPLICATE_API_KEY environment variable is required');
|
|
336
|
+
process.exit(1);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Fetch all models from collections
|
|
340
|
+
const models = await fetchAllModels(apiKey);
|
|
341
|
+
|
|
342
|
+
// Create output directory
|
|
343
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
344
|
+
|
|
345
|
+
// Save raw models data
|
|
346
|
+
await fs.writeFile(
|
|
347
|
+
path.join(outputDir, 'replicate-models.json'),
|
|
348
|
+
JSON.stringify({ data: models }, (key, value) => {
|
|
349
|
+
if (value instanceof Set) {
|
|
350
|
+
return Array.from(value);
|
|
351
|
+
}
|
|
352
|
+
return value;
|
|
353
|
+
}, 2)
|
|
354
|
+
);
|
|
355
|
+
console.log(`✓ Saved raw Replicate models to replicate-models.json`);
|
|
356
|
+
|
|
357
|
+
// Create cache directory if specified
|
|
358
|
+
if (cacheDir) {
|
|
359
|
+
await fs.mkdir(cacheDir, { recursive: true });
|
|
360
|
+
|
|
361
|
+
// Cache detailed model schemas (for transformer generation)
|
|
362
|
+
const schemasCache = await fetchSchemasParallel(models, apiKey, concurrency);
|
|
363
|
+
|
|
364
|
+
// Save schemas cache
|
|
365
|
+
await fs.writeFile(
|
|
366
|
+
path.join(cacheDir, 'replicate-schemas.json'),
|
|
367
|
+
JSON.stringify(schemasCache, (key, value) => {
|
|
368
|
+
if (value instanceof Set) {
|
|
369
|
+
return Array.from(value);
|
|
370
|
+
}
|
|
371
|
+
return value;
|
|
372
|
+
}, 2)
|
|
373
|
+
);
|
|
374
|
+
|
|
375
|
+
console.log(`✓ Saved ${Object.keys(schemasCache).length} model schemas to ${cacheDir}/replicate-schemas.json`);
|
|
376
|
+
console.log(' (This cache file is for transformer generation and should not be committed)');
|
|
377
|
+
|
|
378
|
+
// Chunk it up to 80,000 character files for easier loading later
|
|
379
|
+
const chunkSize = 80000;
|
|
380
|
+
const schemaEntries = Object.entries(schemasCache);
|
|
381
|
+
let currentChunk: Record<string, ReplicateModelData> = {};
|
|
382
|
+
let currentSize = 0;
|
|
383
|
+
let chunkIndex = 1;
|
|
384
|
+
for (const [modelId, modelData] of schemaEntries) {
|
|
385
|
+
const entryString = JSON.stringify({ [modelId]: modelData });
|
|
386
|
+
if (currentSize + entryString.length > chunkSize && Object.keys(currentChunk).length > 0) {
|
|
387
|
+
// Save current chunk
|
|
388
|
+
await fs.writeFile(
|
|
389
|
+
path.join(cacheDir, `replicate-schemas-chunk-${chunkIndex}.json`),
|
|
390
|
+
JSON.stringify(currentChunk, null, 2)
|
|
391
|
+
);
|
|
392
|
+
console.log(`✓ Saved schema chunk ${chunkIndex} with ${Object.keys(currentChunk).length} models`);
|
|
393
|
+
chunkIndex++;
|
|
394
|
+
currentChunk = {};
|
|
395
|
+
currentSize = 0;
|
|
396
|
+
}
|
|
397
|
+
currentChunk[modelId] = modelData;
|
|
398
|
+
currentSize += entryString.length;
|
|
399
|
+
}
|
|
400
|
+
// Save any remaining chunk
|
|
401
|
+
if (Object.keys(currentChunk).length > 0) {
|
|
402
|
+
await fs.writeFile(
|
|
403
|
+
path.join(cacheDir, `replicate-schemas-chunk-${chunkIndex}.json`),
|
|
404
|
+
JSON.stringify(currentChunk, null, 2)
|
|
405
|
+
);
|
|
406
|
+
console.log(`✓ Saved schema chunk ${chunkIndex} with ${Object.keys(currentChunk).length} models`);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// Convert to ModelInfo format
|
|
411
|
+
const modelInfos = models.map(convertReplicateModel);
|
|
412
|
+
|
|
413
|
+
// Save JSON for reference
|
|
414
|
+
await fs.writeFile(
|
|
415
|
+
path.join(outputDir, 'replicate-modelinfo.json'),
|
|
416
|
+
JSON.stringify(modelInfos, (key, value) => {
|
|
417
|
+
if (value instanceof Set) {
|
|
418
|
+
return Array.from(value);
|
|
419
|
+
}
|
|
420
|
+
return value;
|
|
421
|
+
}, 2)
|
|
422
|
+
);
|
|
423
|
+
console.log(`✓ Saved ${modelInfos.length} models to JSON`);
|
|
424
|
+
|
|
425
|
+
// Generate TypeScript file
|
|
426
|
+
const srcDir = path.join(__dirname, '../../src/models');
|
|
427
|
+
await writeModelTS(modelInfos, 'replicateModels', path.join(srcDir, 'replicate.ts'));
|
|
428
|
+
console.log(`✓ Generated TypeScript file: src/models/replicate.ts`);
|
|
429
|
+
|
|
430
|
+
console.log('\n✓ Replicate scraping complete\n');
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// CLI execution
|
|
434
|
+
if (process.argv[1].endsWith('replicate.ts')) {
|
|
435
|
+
const args = process.argv.slice(2);
|
|
436
|
+
const outputDir = args.find((arg) => !arg.startsWith('--')) || path.join(__dirname, '../../data');
|
|
437
|
+
const cacheDir = args.find((arg, i) => i > 0 && !arg.startsWith('--') && !args[i - 1].startsWith('--')) || path.join(__dirname, '../../cache');
|
|
438
|
+
|
|
439
|
+
const concurrencyArg = args.find((arg) => arg.startsWith('--concurrency='));
|
|
440
|
+
const concurrency = concurrencyArg
|
|
441
|
+
? parseInt(concurrencyArg.split('=')[1], 10)
|
|
442
|
+
: 50;
|
|
443
|
+
|
|
444
|
+
scrapeReplicate(outputDir, cacheDir, { concurrency }).catch((error) => {
|
|
445
|
+
console.error('✗ Replicate scraping failed:', error);
|
|
446
|
+
process.exit(1);
|
|
447
|
+
});
|
|
448
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"extends": "../../../tsconfig.base.json",
|
|
3
|
+
"compilerOptions": {
|
|
4
|
+
"module": "ESNext",
|
|
5
|
+
"moduleResolution": "bundler",
|
|
6
|
+
"noEmit": true,
|
|
7
|
+
"baseUrl": "../../..",
|
|
8
|
+
"paths": {
|
|
9
|
+
"@aeye/core": ["./packages/core/src"],
|
|
10
|
+
"@aeye/ai": ["./packages/ai/src"],
|
|
11
|
+
"@aeye/openai": ["./packages/openai/src"],
|
|
12
|
+
"@aeye/openrouter": ["./packages/openrouter/src"],
|
|
13
|
+
"@aeye/replicate": ["./packages/replicate/src"],
|
|
14
|
+
"@aeye/models": ["./packages/models/src"]
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"include": ["scripts/**/*"],
|
|
18
|
+
"exclude": ["node_modules"],
|
|
19
|
+
"references": [
|
|
20
|
+
{ "path": "../../ai" },
|
|
21
|
+
{ "path": "../../openrouter" },
|
|
22
|
+
{ "path": "../../core" }
|
|
23
|
+
]
|
|
24
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generated main index
|
|
3
|
+
*
|
|
4
|
+
* This file is auto-generated by the scraper scripts.
|
|
5
|
+
* Do not edit manually - your changes will be overwritten.
|
|
6
|
+
*
|
|
7
|
+
* To regenerate, run: npm run scrape
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export { models, openaiModels, openrouterModels, replicateModels } from './models';
|
|
11
|
+
export { transformers } from './transformers';
|