@tikoci/rosetta 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +135 -197
- package/package.json +1 -1
- package/src/db.ts +31 -1
- package/src/extract-test-results.ts +359 -0
- package/src/mcp-http.test.ts +443 -0
- package/src/mcp.ts +92 -17
- package/src/query.test.ts +47 -3
- package/src/query.ts +38 -3
- package/src/release.test.ts +106 -0
- package/src/setup.ts +22 -0
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* extract-test-results.ts — Scrape MikroTik product pages for test results + block diagram URLs.
|
|
3
|
+
*
|
|
4
|
+
* Fetches each product page from mikrotik.com and extracts:
|
|
5
|
+
* - Ethernet test results (bridging/routing throughput at various packet sizes)
|
|
6
|
+
* - IPSec test results (tunnel throughput with various ciphers)
|
|
7
|
+
* - Block diagram PNG URL
|
|
8
|
+
* - Product page URL slug
|
|
9
|
+
*
|
|
10
|
+
* Idempotent: deletes all existing test results, updates device rows.
|
|
11
|
+
* Requires devices table to be populated first (via extract-devices.ts).
|
|
12
|
+
*
|
|
13
|
+
* Usage: bun run src/extract-test-results.ts [--concurrency N] [--delay MS]
|
|
14
|
+
*
|
|
15
|
+
* Product page URL slug discovery: fetches the product matrix page to build
|
|
16
|
+
* a name→slug mapping, then fetches each product page by slug.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { parseHTML } from "linkedom";
|
|
20
|
+
import { db, initDb } from "./db.ts";
|
|
21
|
+
|
|
22
|
+
// ── CLI flags ──
|
|
23
|
+
|
|
24
|
+
const args = process.argv.slice(2);
|
|
25
|
+
function getFlag(name: string, fallback: number): number {
|
|
26
|
+
const idx = args.indexOf(`--${name}`);
|
|
27
|
+
if (idx !== -1 && args[idx + 1]) return Number(args[idx + 1]);
|
|
28
|
+
return fallback;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const CONCURRENCY = getFlag("concurrency", 4);
|
|
32
|
+
const DELAY_MS = getFlag("delay", 500);
|
|
33
|
+
const PRODUCT_BASE = "https://mikrotik.com/product/";
|
|
34
|
+
|
|
35
|
+
// ── Types ──
|
|
36
|
+
|
|
37
|
+
interface TestResultRow {
|
|
38
|
+
mode: string;
|
|
39
|
+
configuration: string;
|
|
40
|
+
packet_size: number;
|
|
41
|
+
throughput_kpps: number | null;
|
|
42
|
+
throughput_mbps: number | null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
interface ProductPageData {
|
|
46
|
+
slug: string;
|
|
47
|
+
ethernet_results: TestResultRow[];
|
|
48
|
+
ipsec_results: TestResultRow[];
|
|
49
|
+
block_diagram_url: string | null;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// ── HTML Parsing ──
|
|
53
|
+
|
|
54
|
+
/** Decode HTML entities like none to text. */
|
|
55
|
+
function decodeEntities(html: string): string {
|
|
56
|
+
const { document } = parseHTML("<div></div>");
|
|
57
|
+
const el = document.createElement("div");
|
|
58
|
+
el.innerHTML = html;
|
|
59
|
+
return el.textContent || "";
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/** Parse a performance-table element into test result rows. */
|
|
63
|
+
function parsePerformanceTable(table: Element): { testType: string; rows: TestResultRow[] } {
|
|
64
|
+
const rows: TestResultRow[] = [];
|
|
65
|
+
|
|
66
|
+
// Header row: first <tr> in <thead> has [product_code, test_description]
|
|
67
|
+
const thead = table.querySelector("thead");
|
|
68
|
+
if (!thead) return { testType: "unknown", rows };
|
|
69
|
+
|
|
70
|
+
const headerRows = thead.querySelectorAll("tr");
|
|
71
|
+
if (headerRows.length < 2) return { testType: "unknown", rows };
|
|
72
|
+
|
|
73
|
+
// Determine test type from header description
|
|
74
|
+
const headerCells = headerRows[0].querySelectorAll("td");
|
|
75
|
+
const testDesc = headerCells.length >= 2 ? (headerCells[1].textContent || "").trim().toLowerCase() : "";
|
|
76
|
+
const testType = testDesc.includes("ipsec") ? "ipsec" : "ethernet";
|
|
77
|
+
|
|
78
|
+
// Determine packet sizes from the second header row
|
|
79
|
+
// Structure: [Mode, Configuration, (1518|1400) byte, 512 byte, 64 byte]
|
|
80
|
+
// The colspan=2 means each size has kpps + Mbps columns
|
|
81
|
+
const sizeRow = headerRows[1];
|
|
82
|
+
const sizeCells = sizeRow.querySelectorAll("td");
|
|
83
|
+
const packetSizes: number[] = [];
|
|
84
|
+
for (const cell of sizeCells) {
|
|
85
|
+
const text = (cell.textContent || "").trim();
|
|
86
|
+
const match = text.match(/^(\d+)\s*byte/i);
|
|
87
|
+
if (match) packetSizes.push(Number.parseInt(match[1], 10));
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// If we couldn't find sizes in the header, use defaults
|
|
91
|
+
if (packetSizes.length === 0) {
|
|
92
|
+
if (testType === "ipsec") {
|
|
93
|
+
packetSizes.push(1400, 512, 64);
|
|
94
|
+
} else {
|
|
95
|
+
packetSizes.push(1518, 512, 64);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Parse data rows from <tbody>
|
|
100
|
+
const tbody = table.querySelector("tbody");
|
|
101
|
+
if (!tbody) return { testType, rows };
|
|
102
|
+
|
|
103
|
+
for (const tr of tbody.querySelectorAll("tr")) {
|
|
104
|
+
const cells = tr.querySelectorAll("td");
|
|
105
|
+
if (cells.length < 2) continue;
|
|
106
|
+
|
|
107
|
+
const mode = (cells[0].textContent || "").trim();
|
|
108
|
+
const config = (cells[1].textContent || "").trim();
|
|
109
|
+
|
|
110
|
+
// Each packet size has 2 columns: kpps, Mbps
|
|
111
|
+
for (let i = 0; i < packetSizes.length; i++) {
|
|
112
|
+
const kppsIdx = 2 + i * 2;
|
|
113
|
+
const mbpsIdx = 3 + i * 2;
|
|
114
|
+
if (kppsIdx >= cells.length) break;
|
|
115
|
+
|
|
116
|
+
const kpps = Number.parseFloat((cells[kppsIdx].textContent || "").trim());
|
|
117
|
+
const mbps = mbpsIdx < cells.length
|
|
118
|
+
? Number.parseFloat((cells[mbpsIdx].textContent || "").trim())
|
|
119
|
+
: null;
|
|
120
|
+
|
|
121
|
+
rows.push({
|
|
122
|
+
mode,
|
|
123
|
+
configuration: config,
|
|
124
|
+
packet_size: packetSizes[i],
|
|
125
|
+
throughput_kpps: Number.isNaN(kpps) ? null : kpps,
|
|
126
|
+
throughput_mbps: mbps !== null && Number.isNaN(mbps) ? null : mbps,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return { testType, rows };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/** Generate candidate URL slugs for a product.
|
|
135
|
+
* MikroTik slugs are wildly inconsistent — some use lowercased names with underscores,
|
|
136
|
+
* some use product codes with original casing, and + is sometimes "plus", sometimes dropped.
|
|
137
|
+
* Unicode superscripts (², ³) are transliterated to digits.
|
|
138
|
+
* We try multiple variants and use the first that returns 200. */
|
|
139
|
+
function generateSlugs(name: string, code: string | null): string[] {
|
|
140
|
+
const slugs: string[] = [];
|
|
141
|
+
const seen = new Set<string>();
|
|
142
|
+
const add = (s: string) => {
|
|
143
|
+
if (s && !seen.has(s)) {
|
|
144
|
+
seen.add(s);
|
|
145
|
+
slugs.push(s);
|
|
146
|
+
}
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
// Normalize Unicode superscripts to regular digits
|
|
150
|
+
const norm = (s: string) =>
|
|
151
|
+
s.replace(/²/g, "2").replace(/³/g, "3").replace(/¹/g, "1");
|
|
152
|
+
|
|
153
|
+
const cleanName = norm(name);
|
|
154
|
+
|
|
155
|
+
// 1. Lowercased name: + → plus, non-alphanum → _
|
|
156
|
+
add(cleanName.toLowerCase().replace(/\+/g, "plus").replace(/[^a-z0-9plus]+/g, "_").replace(/^_|_$/g, ""));
|
|
157
|
+
|
|
158
|
+
// 2. Lowercased name: drop + entirely
|
|
159
|
+
add(cleanName.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, ""));
|
|
160
|
+
|
|
161
|
+
if (code) {
|
|
162
|
+
const cleanCode = norm(code);
|
|
163
|
+
|
|
164
|
+
// 3. Product code as-is (original casing, + → plus, strip other specials)
|
|
165
|
+
add(cleanCode.replace(/\+/g, "plus").replace(/[^a-zA-Z0-9plus\-]+/g, "").replace(/^-|-$/g, ""));
|
|
166
|
+
|
|
167
|
+
// 4. Product code as-is (original casing)
|
|
168
|
+
add(cleanCode.replace(/[^a-zA-Z0-9\-]+/g, "").replace(/^-|-$/g, ""));
|
|
169
|
+
|
|
170
|
+
// 5. Lowercased code: + → plus
|
|
171
|
+
add(cleanCode.toLowerCase().replace(/\+/g, "plus").replace(/[^a-z0-9plus]+/g, "_").replace(/^_|_$/g, ""));
|
|
172
|
+
|
|
173
|
+
// 6. Lowercased code: drop +
|
|
174
|
+
add(cleanCode.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, ""));
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return slugs;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/** Fetch and parse a single product page, trying multiple slug candidates. */
|
|
181
|
+
async function fetchProductPage(slugs: string[]): Promise<ProductPageData | null> {
|
|
182
|
+
for (const slug of slugs) {
|
|
183
|
+
const url = `${PRODUCT_BASE}${slug}`;
|
|
184
|
+
try {
|
|
185
|
+
const resp = await fetch(url);
|
|
186
|
+
if (resp.ok) {
|
|
187
|
+
const html = await resp.text();
|
|
188
|
+
return parseProductHtml(html, slug);
|
|
189
|
+
}
|
|
190
|
+
// Don't warn for intermediary attempts — only the last slug matters
|
|
191
|
+
} catch {
|
|
192
|
+
// network error, try next slug
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
console.warn(` [404] ${slugs[0]} (tried ${slugs.length} variants)`);
|
|
196
|
+
return null;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/** Parse product page HTML into structured data. */
|
|
200
|
+
function parseProductHtml(html: string, slug: string): ProductPageData | null {
|
|
201
|
+
|
|
202
|
+
const { document } = parseHTML(html);
|
|
203
|
+
|
|
204
|
+
// Parse performance tables
|
|
205
|
+
const tables = document.querySelectorAll("table.performance-table");
|
|
206
|
+
const ethernet_results: TestResultRow[] = [];
|
|
207
|
+
const ipsec_results: TestResultRow[] = [];
|
|
208
|
+
|
|
209
|
+
for (const table of tables) {
|
|
210
|
+
const { testType, rows } = parsePerformanceTable(table);
|
|
211
|
+
if (testType === "ipsec") {
|
|
212
|
+
ipsec_results.push(...rows);
|
|
213
|
+
} else {
|
|
214
|
+
ethernet_results.push(...rows);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Find block diagram URL
|
|
219
|
+
let block_diagram_url: string | null = null;
|
|
220
|
+
const links = document.querySelectorAll("a");
|
|
221
|
+
for (const a of links) {
|
|
222
|
+
const text = (a.textContent || "").trim();
|
|
223
|
+
if (text === "Block Diagram") {
|
|
224
|
+
const href = a.getAttribute("href");
|
|
225
|
+
if (href) {
|
|
226
|
+
block_diagram_url = href.startsWith("http")
|
|
227
|
+
? href
|
|
228
|
+
: `https://cdn.mikrotik.com${href}`;
|
|
229
|
+
}
|
|
230
|
+
break;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
return { slug, ethernet_results, ipsec_results, block_diagram_url };
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/** Sleep helper for rate limiting. */
|
|
238
|
+
function sleep(ms: number): Promise<void> {
|
|
239
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// ── Main ──
|
|
243
|
+
|
|
244
|
+
initDb();
|
|
245
|
+
|
|
246
|
+
// Get all devices from DB
|
|
247
|
+
const devices = db.prepare("SELECT id, product_name, product_code FROM devices ORDER BY product_name").all() as Array<{
|
|
248
|
+
id: number;
|
|
249
|
+
product_name: string;
|
|
250
|
+
product_code: string | null;
|
|
251
|
+
}>;
|
|
252
|
+
|
|
253
|
+
if (devices.length === 0) {
|
|
254
|
+
console.error("No devices in database. Run extract-devices.ts first.");
|
|
255
|
+
process.exit(1);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
console.log(`Found ${devices.length} devices in database`);
|
|
259
|
+
|
|
260
|
+
// Build device → candidate slugs mapping
|
|
261
|
+
const deviceSlugs: Array<{ id: number; name: string; slugs: string[] }> = [];
|
|
262
|
+
for (const dev of devices) {
|
|
263
|
+
const slugs = generateSlugs(dev.product_name, dev.product_code);
|
|
264
|
+
deviceSlugs.push({ id: dev.id, name: dev.product_name, slugs });
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// Idempotent: clear existing test results
|
|
268
|
+
db.run("DELETE FROM device_test_results");
|
|
269
|
+
|
|
270
|
+
// Prepare statements
|
|
271
|
+
const insertTest = db.prepare(`INSERT OR IGNORE INTO device_test_results (
|
|
272
|
+
device_id, test_type, mode, configuration, packet_size,
|
|
273
|
+
throughput_kpps, throughput_mbps
|
|
274
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?)`);
|
|
275
|
+
|
|
276
|
+
const updateDevice = db.prepare(`UPDATE devices
|
|
277
|
+
SET product_url = ?, block_diagram_url = ?
|
|
278
|
+
WHERE id = ?`);
|
|
279
|
+
|
|
280
|
+
console.log(`Fetching ${deviceSlugs.length} product pages (concurrency=${CONCURRENCY}, delay=${DELAY_MS}ms)...`);
|
|
281
|
+
|
|
282
|
+
let totalTests = 0;
|
|
283
|
+
let devicesWithTests = 0;
|
|
284
|
+
let devicesWithDiagrams = 0;
|
|
285
|
+
let fetchErrors = 0;
|
|
286
|
+
|
|
287
|
+
const insertAll = db.transaction(
|
|
288
|
+
(results: Array<{ deviceId: number; data: ProductPageData | null }>) => {
|
|
289
|
+
for (const { deviceId, data } of results) {
|
|
290
|
+
if (!data) {
|
|
291
|
+
fetchErrors++;
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Update device with URL and block diagram
|
|
296
|
+
updateDevice.run(
|
|
297
|
+
`https://mikrotik.com/product/${data.slug}`,
|
|
298
|
+
data.block_diagram_url,
|
|
299
|
+
deviceId,
|
|
300
|
+
);
|
|
301
|
+
|
|
302
|
+
if (data.block_diagram_url) devicesWithDiagrams++;
|
|
303
|
+
|
|
304
|
+
// Insert test results
|
|
305
|
+
const allResults = [
|
|
306
|
+
...data.ethernet_results.map((r) => ({ ...r, test_type: "ethernet" as const })),
|
|
307
|
+
...data.ipsec_results.map((r) => ({ ...r, test_type: "ipsec" as const })),
|
|
308
|
+
];
|
|
309
|
+
|
|
310
|
+
if (allResults.length > 0) devicesWithTests++;
|
|
311
|
+
|
|
312
|
+
for (const r of allResults) {
|
|
313
|
+
insertTest.run(
|
|
314
|
+
deviceId,
|
|
315
|
+
r.test_type,
|
|
316
|
+
r.mode,
|
|
317
|
+
r.configuration,
|
|
318
|
+
r.packet_size,
|
|
319
|
+
r.throughput_kpps,
|
|
320
|
+
r.throughput_mbps,
|
|
321
|
+
);
|
|
322
|
+
totalTests++;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
},
|
|
326
|
+
);
|
|
327
|
+
|
|
328
|
+
// Fetch all products with rate limiting
|
|
329
|
+
const allResults: Array<{ deviceId: number; data: ProductPageData | null }> = [];
|
|
330
|
+
let processed = 0;
|
|
331
|
+
|
|
332
|
+
for (let i = 0; i < deviceSlugs.length; i += CONCURRENCY) {
|
|
333
|
+
const batch = deviceSlugs.slice(i, i + CONCURRENCY);
|
|
334
|
+
const batchResults = await Promise.all(
|
|
335
|
+
batch.map(async (dev) => {
|
|
336
|
+
const data = await fetchProductPage(dev.slugs);
|
|
337
|
+
return { deviceId: dev.id, data };
|
|
338
|
+
}),
|
|
339
|
+
);
|
|
340
|
+
allResults.push(...batchResults);
|
|
341
|
+
processed += batch.length;
|
|
342
|
+
|
|
343
|
+
const pct = Math.round((processed / deviceSlugs.length) * 100);
|
|
344
|
+
process.stdout.write(`\r ${processed}/${deviceSlugs.length} (${pct}%)`);
|
|
345
|
+
|
|
346
|
+
if (i + CONCURRENCY < deviceSlugs.length) {
|
|
347
|
+
await sleep(DELAY_MS);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
console.log(""); // newline after progress
|
|
351
|
+
|
|
352
|
+
// Insert all results in one transaction
|
|
353
|
+
insertAll(allResults);
|
|
354
|
+
|
|
355
|
+
console.log(`Test results: ${totalTests} rows for ${devicesWithTests} devices`);
|
|
356
|
+
console.log(`Block diagrams: ${devicesWithDiagrams} devices`);
|
|
357
|
+
if (fetchErrors > 0) {
|
|
358
|
+
console.warn(`Fetch errors: ${fetchErrors} products`);
|
|
359
|
+
}
|