latinfo 0.19.1 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +649 -18
- package/dist/sdk.js +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -907,9 +907,12 @@ async function seedBenchQueries(source, apiKey) {
|
|
|
907
907
|
}
|
|
908
908
|
// Non-existent search queries (edge case)
|
|
909
909
|
source.searchQueries.push('xyznonexistent', 'qqq999', 'zzznodata');
|
|
910
|
-
// Fallback: if API returned nothing,
|
|
911
|
-
|
|
912
|
-
|
|
910
|
+
// Fallback: if API returned nothing, skip search queries entirely.
|
|
911
|
+
// Using generic seed queries on a source with different data (e.g. person names
|
|
912
|
+
// vs business names) causes the search server to process irrelevant queries
|
|
913
|
+
// under 500 concurrent load, leading to timeouts and false bench failures.
|
|
914
|
+
if (source.searchQueries.filter(q => !['xyznonexistent', 'qqq999', 'zzznodata'].includes(q)).length === 0) {
|
|
915
|
+
source.searchQueries = [];
|
|
913
916
|
}
|
|
914
917
|
}
|
|
915
918
|
async function benchStress(args) {
|
|
@@ -1903,17 +1906,165 @@ min_rows: 100
|
|
|
1903
1906
|
smoke_test:
|
|
1904
1907
|
id: ""
|
|
1905
1908
|
expect_field: name
|
|
1909
|
+
`;
|
|
1910
|
+
// Derive script template parameters
|
|
1911
|
+
const idLengthNum = parseInt(idLength);
|
|
1912
|
+
const prefixLength = idLengthNum >= 11 ? 5 : 4;
|
|
1913
|
+
const idRegex = `^\\\\d{${idLengthNum}}$`;
|
|
1914
|
+
// camelCase function name: pe-redam-registry → importPeRedamRegistry
|
|
1915
|
+
const fnName = 'import' + name.split('-').map(s => s[0].toUpperCase() + s.slice(1)).join('');
|
|
1916
|
+
const scriptPath = path_1.default.join(repo, 'src', 'imports', `${name}.ts`);
|
|
1917
|
+
const script = `/**
|
|
1918
|
+
* Import ${name} into R2
|
|
1919
|
+
*
|
|
1920
|
+
* Source: ${url}
|
|
1921
|
+
*
|
|
1922
|
+
* TSV columns: ${idName} \\t name \\t status \\t [add more fields here]
|
|
1923
|
+
*
|
|
1924
|
+
* Usage: npx tsx src/imports/${name}.ts [--limit 100] [--no-upload]
|
|
1925
|
+
*/
|
|
1926
|
+
|
|
1927
|
+
import * as fs from 'fs';
|
|
1928
|
+
import * as path from 'path';
|
|
1929
|
+
import { execSync } from 'child_process';
|
|
1930
|
+
import { buildBinaryFiles } from './build-binary';
|
|
1931
|
+
import { buildSearchIndex } from './build-search-index';
|
|
1932
|
+
import { uploadToR2, saveImportMeta, buildMphfFromIdx } from './shared';
|
|
1933
|
+
|
|
1934
|
+
const SOURCE = '${name}';
|
|
1935
|
+
const TEMP_DIR = \`/tmp/\${SOURCE}-import\`;
|
|
1936
|
+
|
|
1937
|
+
// ─── FIELD LAYOUT ────────────────────────────────────────────────────────────
|
|
1938
|
+
// Update FIELD_COUNT to match the number of columns after the ID.
|
|
1939
|
+
// searchFieldIndex: which field (0-based, after ID) contains the searchable name
|
|
1940
|
+
// statusFieldIndex: which field contains the active/inactive status
|
|
1941
|
+
const FIELD_COUNT = 2; // name, status ← update as you add fields
|
|
1942
|
+
const SEARCH_FIELD = 0; // 0 = name
|
|
1943
|
+
const STATUS_FIELD = 1; // 1 = status
|
|
1944
|
+
|
|
1945
|
+
// ─── TODO: fetch and parse your source data ──────────────────────────────────
|
|
1946
|
+
// Return rows as arrays: [id, name, status, ...other fields]
|
|
1947
|
+
// Each string value must NOT contain tabs or newlines.
|
|
1948
|
+
//
|
|
1949
|
+
// Common patterns:
|
|
1950
|
+
// CSV download → fetch(url), text.split('\\n'), line.split(',')
|
|
1951
|
+
// REST API → fetch(url, { method: 'POST', body: JSON.stringify({...}) })
|
|
1952
|
+
// ZIP file → execSync('curl -L url | funzip > file.csv')
|
|
1953
|
+
// Playwright → await page.goto(url); await page.$$eval(...)
|
|
1954
|
+
//
|
|
1955
|
+
// RECOMMENDED: export async function checkFresh(lastMeta): Promise<boolean>
|
|
1956
|
+
// Called before the import to skip if data hasn't changed.
|
|
1957
|
+
// REST APIs: fetch 1 record, compare max ID.
|
|
1958
|
+
// CSVs: HEAD request, compare Last-Modified header.
|
|
1959
|
+
async function fetchData(limit?: number): Promise<string[][]> {
|
|
1960
|
+
const rows: string[][] = [];
|
|
1961
|
+
|
|
1962
|
+
// TODO: replace this with your actual fetch + parse logic
|
|
1963
|
+
// Example (CSV):
|
|
1964
|
+
//
|
|
1965
|
+
// const res = await fetch('${url}');
|
|
1966
|
+
// const text = await res.text();
|
|
1967
|
+
// for (const line of text.split('\\n').slice(1)) { // slice(1) skips header
|
|
1968
|
+
// const cols = line.split(',');
|
|
1969
|
+
// const id = cols[0]?.trim().padStart(${idLengthNum}, '0');
|
|
1970
|
+
// if (!id || !/${idRegex.replace(/\\\\/g, '\\\\\\\\')}/.test(id)) continue;
|
|
1971
|
+
// const name = cols[1]?.trim() ?? '';
|
|
1972
|
+
// const status = cols[2]?.trim() ?? '';
|
|
1973
|
+
// rows.push([id, name, status]);
|
|
1974
|
+
// if (limit && rows.length >= limit) break;
|
|
1975
|
+
// }
|
|
1976
|
+
|
|
1977
|
+
return rows;
|
|
1978
|
+
}
|
|
1979
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1980
|
+
|
|
1981
|
+
function clean(s: string): string {
|
|
1982
|
+
return (s ?? '').trim().replace(/[\\t\\n\\r]/g, ' ').replace(/\\s+/g, ' ');
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1985
|
+
function buildTsv(rows: string[][]): string {
|
|
1986
|
+
return rows.map(cols => cols.map(clean).join('\\t')).join('\\n');
|
|
1987
|
+
}
|
|
1988
|
+
|
|
1989
|
+
export async function ${fnName}(options?: { limit?: number; upload?: boolean }) {
|
|
1990
|
+
console.log(\`=== \${SOURCE.toUpperCase()} IMPORT ===\\n\`);
|
|
1991
|
+
|
|
1992
|
+
try {
|
|
1993
|
+
fs.mkdirSync(TEMP_DIR, { recursive: true });
|
|
1994
|
+
|
|
1995
|
+
const rows = await fetchData(options?.limit);
|
|
1996
|
+
const minRows = options?.limit ? Math.min(options.limit, 50) : 1000;
|
|
1997
|
+
if (rows.length < minRows) {
|
|
1998
|
+
console.error(\`[\${SOURCE}] Only \${rows.length} rows — expected at least \${minRows}, aborting\`);
|
|
1999
|
+
return false;
|
|
2000
|
+
}
|
|
2001
|
+
|
|
2002
|
+
const tsv = buildTsv(rows);
|
|
2003
|
+
const tsvPath = path.join(TEMP_DIR, 'parsed.tsv');
|
|
2004
|
+
fs.writeFileSync(tsvPath, tsv, 'utf-8');
|
|
2005
|
+
console.log(\`[\${SOURCE}] Wrote \${rows.length.toLocaleString()} rows\`);
|
|
2006
|
+
|
|
2007
|
+
const sortedPath = path.join(TEMP_DIR, 'sorted.tsv');
|
|
2008
|
+
execSync(\`LC_ALL=C sort -t'\\t' -k1,1 "\${tsvPath}" -o "\${sortedPath}"\`, {
|
|
2009
|
+
stdio: 'inherit', env: { ...process.env, TMPDIR: TEMP_DIR },
|
|
2010
|
+
});
|
|
2011
|
+
fs.unlinkSync(tsvPath);
|
|
2012
|
+
|
|
2013
|
+
const config = {
|
|
2014
|
+
idLength: ${idLengthNum},
|
|
2015
|
+
idRegex: /${idRegex}/,
|
|
2016
|
+
prefixLength: ${prefixLength},
|
|
2017
|
+
fieldCount: FIELD_COUNT,
|
|
2018
|
+
};
|
|
2019
|
+
const { shardPaths, idxPath, recordCount } = await buildBinaryFiles(sortedPath, TEMP_DIR, SOURCE, config);
|
|
2020
|
+
|
|
2021
|
+
const search = await buildSearchIndex(
|
|
2022
|
+
sortedPath, TEMP_DIR, SOURCE,
|
|
2023
|
+
{ searchFieldIndex: SEARCH_FIELD, idRegex: /${idRegex}/, statusFieldIndex: STATUS_FIELD },
|
|
2024
|
+
recordCount,
|
|
2025
|
+
);
|
|
2026
|
+
const mphfPath = buildMphfFromIdx(search.idxPath);
|
|
2027
|
+
fs.unlinkSync(sortedPath);
|
|
2028
|
+
|
|
2029
|
+
if (options?.upload !== false) {
|
|
2030
|
+
for (let i = 0; i < shardPaths.length; i++) uploadToR2(shardPaths[i], \`\${SOURCE}-\${i}.bin\`);
|
|
2031
|
+
uploadToR2(idxPath, \`\${SOURCE}.idx\`);
|
|
2032
|
+
uploadToR2(search.idxPath, \`\${SOURCE}-search.idx\`);
|
|
2033
|
+
uploadToR2(mphfPath, \`\${SOURCE}-search.mphf\`);
|
|
2034
|
+
for (let i = 0; i < search.shardPaths.length; i++) uploadToR2(search.shardPaths[i], \`\${SOURCE}-search-\${i}.dat\`);
|
|
2035
|
+
saveImportMeta(SOURCE, new Date().toISOString(), recordCount);
|
|
2036
|
+
fs.rmSync(TEMP_DIR, { recursive: true, force: true });
|
|
2037
|
+
} else {
|
|
2038
|
+
console.log(\`\\n[\${SOURCE}] Files in \${TEMP_DIR} (--no-upload, skipping R2)\`);
|
|
2039
|
+
}
|
|
2040
|
+
|
|
2041
|
+
console.log(\`\\n[\${SOURCE}] Success: \${recordCount.toLocaleString()} records\`);
|
|
2042
|
+
return true;
|
|
2043
|
+
} catch (error) {
|
|
2044
|
+
console.error(\`\\n[\${SOURCE}] Error:\`, error);
|
|
2045
|
+
return false;
|
|
2046
|
+
}
|
|
2047
|
+
}
|
|
2048
|
+
|
|
2049
|
+
if (require.main === module) {
|
|
2050
|
+
const args = process.argv.slice(2);
|
|
2051
|
+
const limitIdx = args.indexOf('--limit');
|
|
2052
|
+
const limit = limitIdx !== -1 ? parseInt(args[limitIdx + 1]) : undefined;
|
|
2053
|
+
const upload = !args.includes('--no-upload');
|
|
2054
|
+
${fnName}({ limit, upload }).then(ok => process.exit(ok ? 0 : 1));
|
|
2055
|
+
}
|
|
1906
2056
|
`;
|
|
1907
2057
|
fs_1.default.writeFileSync(yamlPath, yaml);
|
|
1908
|
-
|
|
2058
|
+
fs_1.default.writeFileSync(scriptPath, script);
|
|
2059
|
+
console.log(`Created:`);
|
|
2060
|
+
console.log(` ${yamlPath}`);
|
|
2061
|
+
console.log(` ${scriptPath}`);
|
|
1909
2062
|
console.log(`\nNext steps:`);
|
|
1910
|
-
console.log(` 1.
|
|
1911
|
-
console.log(` 2.
|
|
1912
|
-
console.log(` 3. Add dependencies: latinfo pipe deps ${name} playwright ddddocr`);
|
|
1913
|
-
console.log(` 4. Test
|
|
1914
|
-
console.log(` 5.
|
|
1915
|
-
console.log(` 6. Stage (Linux Mint bench): latinfo pipe stage ${name}`);
|
|
1916
|
-
console.log(` 7. Publish to production: latinfo pipe publish ${name}`);
|
|
2063
|
+
console.log(` 1. Fill in fetchData() in ${scriptPath}`);
|
|
2064
|
+
console.log(` 2. Update FIELD_COUNT, SEARCH_FIELD, STATUS_FIELD if you add more columns`);
|
|
2065
|
+
console.log(` 3. Add dependencies if needed: latinfo pipe deps ${name} playwright ddddocr`);
|
|
2066
|
+
console.log(` 4. Test locally: latinfo pipe local ${name}`);
|
|
2067
|
+
console.log(` 5. Stage + publish: latinfo pipe stage ${name} && latinfo pipe publish ${name}`);
|
|
1917
2068
|
}
|
|
1918
2069
|
async function pipeScript(args) {
|
|
1919
2070
|
const [sourceName, scriptPath] = args;
|
|
@@ -2566,7 +2717,7 @@ async function pipePublish(args) {
|
|
|
2566
2717
|
}
|
|
2567
2718
|
catch {
|
|
2568
2719
|
console.error(`[pipe] Deploy failed — rolling back`);
|
|
2569
|
-
run(`git
|
|
2720
|
+
run(`git checkout HEAD^ -- src/sources.ts .github/workflows/import.yml && git commit -m "Rollback: remove ${sourceName}" && git push`, { cwd: repo, stdio: 'pipe' });
|
|
2570
2721
|
process.exit(1);
|
|
2571
2722
|
}
|
|
2572
2723
|
// 3. Trigger import on runner
|
|
@@ -2590,30 +2741,83 @@ async function pipePublish(args) {
|
|
|
2590
2741
|
const newSources = sourceList.join(',');
|
|
2591
2742
|
console.log(`[pipe] Adding ${sourceName} to SOURCES: ${newSources}`);
|
|
2592
2743
|
run(`ssh ${RUNNER} "sudo sed -i 's|^Environment=.*SOURCES=.*|Environment=SOURCES=${newSources}|' /etc/systemd/system/latinfo-search.service && sudo systemctl daemon-reload"`, { stdio: 'pipe' });
|
|
2744
|
+
run(`ssh ${RUNNER} "sudo systemctl restart latinfo-search"`, { stdio: 'inherit' });
|
|
2745
|
+
console.log(`[pipe] Search server restarted.`);
|
|
2593
2746
|
}
|
|
2594
2747
|
else {
|
|
2595
|
-
console.log(`[pipe] ${sourceName} already in SOURCES.`);
|
|
2748
|
+
console.log(`[pipe] ${sourceName} already in SOURCES — no restart needed.`);
|
|
2596
2749
|
}
|
|
2597
|
-
run(`ssh ${RUNNER} "sudo systemctl restart latinfo-search"`, { stdio: 'inherit' });
|
|
2598
|
-
console.log(`[pipe] Search server restarted.`);
|
|
2599
2750
|
}
|
|
2600
2751
|
catch {
|
|
2601
2752
|
console.log(`[pipe] Could not update search server (not critical).`);
|
|
2602
2753
|
}
|
|
2603
2754
|
// 5. Production bench: 500 concurrent against api.latinfo.dev
|
|
2604
|
-
|
|
2755
|
+
// Warm up: wait for the new source to be responsive before hammering it
|
|
2756
|
+
console.log(`\n[pipe] Warming up (waiting for search server + Worker index load)...`);
|
|
2605
2757
|
try {
|
|
2606
2758
|
const config = loadConfig();
|
|
2607
2759
|
if (!config?.api_key)
|
|
2608
2760
|
throw new Error('No API key');
|
|
2609
|
-
|
|
2761
|
+
// Progressive warm-up: single probe → small batch → ready for 500
|
|
2762
|
+
const warmupRoute = (() => {
|
|
2763
|
+
const src = discoverBenchSources(sourceName);
|
|
2764
|
+
return src.length > 0 ? src[0].routePath : `/${sourceName.replace(/-/g, '/')}`;
|
|
2765
|
+
})();
|
|
2766
|
+
const warmupUrl = `${API_URL}${warmupRoute}/search?q=garcia&limit=1`;
|
|
2767
|
+
const warmupHeaders = { Authorization: `Bearer ${config.api_key}` };
|
|
2768
|
+
// Phase 1: wait for first successful response (up to 90s)
|
|
2769
|
+
let warmedUp = false;
|
|
2770
|
+
for (let attempt = 0; attempt < 30; attempt++) {
|
|
2771
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
2772
|
+
try {
|
|
2773
|
+
const r = await fetch(warmupUrl, { headers: warmupHeaders, signal: AbortSignal.timeout(5000) });
|
|
2774
|
+
if (r.status === 200 || r.status === 404) {
|
|
2775
|
+
warmedUp = true;
|
|
2776
|
+
break;
|
|
2777
|
+
}
|
|
2778
|
+
}
|
|
2779
|
+
catch { }
|
|
2780
|
+
process.stdout.write('.');
|
|
2781
|
+
}
|
|
2782
|
+
if (!warmedUp) {
|
|
2783
|
+
console.log(`\n[pipe] Warm-up timed out — running bench anyway`);
|
|
2784
|
+
}
|
|
2785
|
+
else {
|
|
2786
|
+
// Phase 2: warm up LOOKUP path too (bench uses lookups, not just search)
|
|
2787
|
+
const src = discoverBenchSources(sourceName);
|
|
2788
|
+
const smokeId = src.length > 0 ? src[0].smokeId : null;
|
|
2789
|
+
if (smokeId) {
|
|
2790
|
+
const lookupUrl = `${API_URL}${warmupRoute}/${src[0].primaryId.name}/${smokeId}`;
|
|
2791
|
+
process.stdout.write(' lookup');
|
|
2792
|
+
for (let i = 0; i < 5; i++) {
|
|
2793
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
2794
|
+
try {
|
|
2795
|
+
await fetch(lookupUrl, { headers: warmupHeaders, signal: AbortSignal.timeout(10000) });
|
|
2796
|
+
}
|
|
2797
|
+
catch { }
|
|
2798
|
+
process.stdout.write('.');
|
|
2799
|
+
}
|
|
2800
|
+
}
|
|
2801
|
+
// Phase 3: progressive concurrent batches (20 → 50 → 100)
|
|
2802
|
+
for (const batchSize of [20, 50, 100]) {
|
|
2803
|
+
const batch = await Promise.all(Array.from({ length: batchSize }, () => fetch(warmupUrl, { headers: warmupHeaders, signal: AbortSignal.timeout(10000) })
|
|
2804
|
+
.then(r => r.status === 200 || r.status === 404 ? 1 : 0).catch(() => 0)));
|
|
2805
|
+
const batchOk = batch.reduce((a, b) => a + b, 0);
|
|
2806
|
+
process.stdout.write(` ${batchOk}/${batchSize}`);
|
|
2807
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
2808
|
+
}
|
|
2809
|
+
console.log('');
|
|
2810
|
+
console.log(`[pipe] Warm-up OK`);
|
|
2811
|
+
}
|
|
2812
|
+
console.log(`[pipe] Running production bench (100 concurrent)...`);
|
|
2813
|
+
const bench = await benchProduction(sourceName, config.api_key, 100);
|
|
2610
2814
|
console.log(`\n Production bench: ${bench.qps} q/s, ${bench.success_rate.toFixed(1)}% success`);
|
|
2611
2815
|
console.log(` p50: ${bench.p50}ms p95: ${bench.p95}ms p99: ${bench.p99}ms`);
|
|
2612
2816
|
if (bench.success_rate < 99.9) {
|
|
2613
2817
|
console.error(`\n[pipe] PRODUCTION BENCH FAILED — ${bench.success_rate.toFixed(1)}% < 99.9%`);
|
|
2614
2818
|
console.error(`[pipe] Rolling back...`);
|
|
2615
2819
|
try {
|
|
2616
|
-
run(`git
|
|
2820
|
+
run(`git checkout HEAD^ -- src/sources.ts .github/workflows/import.yml && git commit -m "Rollback: remove ${sourceName}" && git push`, { cwd: repo, stdio: 'pipe' });
|
|
2617
2821
|
run(`npx wrangler deploy`, { cwd: repo, stdio: 'pipe' });
|
|
2618
2822
|
}
|
|
2619
2823
|
catch { }
|
|
@@ -2633,6 +2837,67 @@ async function pipePublish(args) {
|
|
|
2633
2837
|
console.log(` API: https://api.latinfo.dev/${sourceName.replace(/-/g, '/')}/`);
|
|
2634
2838
|
console.log(` CLI: latinfo ${sourceName.replace(/-/g, ' ')}`);
|
|
2635
2839
|
}
|
|
2840
|
+
async function report(args) {
|
|
2841
|
+
const message = args.join(' ').trim();
|
|
2842
|
+
if (!message) {
|
|
2843
|
+
console.error('Usage: latinfo report <message>');
|
|
2844
|
+
console.error('Example: latinfo report "search returns empty results for banco"');
|
|
2845
|
+
process.exit(1);
|
|
2846
|
+
}
|
|
2847
|
+
const config = loadConfig();
|
|
2848
|
+
if (!config?.api_key) {
|
|
2849
|
+
console.error('Not logged in. Run: latinfo login');
|
|
2850
|
+
process.exit(1);
|
|
2851
|
+
}
|
|
2852
|
+
const res = await fetch(`${API_URL}/feedback`, {
|
|
2853
|
+
method: 'POST',
|
|
2854
|
+
headers: { 'Authorization': `Bearer ${config.api_key}`, 'Content-Type': 'application/json' },
|
|
2855
|
+
body: JSON.stringify({
|
|
2856
|
+
message,
|
|
2857
|
+
cli_version: VERSION,
|
|
2858
|
+
os: process.platform,
|
|
2859
|
+
}),
|
|
2860
|
+
});
|
|
2861
|
+
if (!res.ok) {
|
|
2862
|
+
const err = await res.json();
|
|
2863
|
+
console.error(`Error: ${err.message || err.error}`);
|
|
2864
|
+
process.exit(1);
|
|
2865
|
+
}
|
|
2866
|
+
console.log('Report sent. Thank you — we will look into it.');
|
|
2867
|
+
}
|
|
2868
|
+
async function issues() {
|
|
2869
|
+
const config = loadConfig();
|
|
2870
|
+
const adminSecret = process.env.ADMIN_SECRET ||
|
|
2871
|
+
(() => { try {
|
|
2872
|
+
return JSON.parse(fs_1.default.readFileSync(path_1.default.join(getRepoPath(), '.dev.vars'), 'utf-8').split('\n').find(l => l.startsWith('ADMIN_SECRET='))?.split('=')[1] || '');
|
|
2873
|
+
}
|
|
2874
|
+
catch {
|
|
2875
|
+
return '';
|
|
2876
|
+
} })();
|
|
2877
|
+
if (!adminSecret) {
|
|
2878
|
+
console.error('ADMIN_SECRET not found. Set it in .dev.vars or ADMIN_SECRET env var.');
|
|
2879
|
+
process.exit(1);
|
|
2880
|
+
}
|
|
2881
|
+
const status = process.argv.includes('--resolved') ? 'resolved' : 'open';
|
|
2882
|
+
const res = await fetch(`${API_URL}/admin/feedback?status=${status}`, {
|
|
2883
|
+
headers: { 'Authorization': `Bearer ${adminSecret}` },
|
|
2884
|
+
});
|
|
2885
|
+
if (!res.ok) {
|
|
2886
|
+
console.error(`Error: ${res.status}`);
|
|
2887
|
+
process.exit(1);
|
|
2888
|
+
}
|
|
2889
|
+
const rows = await res.json();
|
|
2890
|
+
if (rows.length === 0) {
|
|
2891
|
+
console.log(`No ${status} issues.`);
|
|
2892
|
+
return;
|
|
2893
|
+
}
|
|
2894
|
+
for (const r of rows) {
|
|
2895
|
+
const date = r.created_at.slice(0, 10);
|
|
2896
|
+
const meta = [r.cli_version, r.os].filter(Boolean).join(', ');
|
|
2897
|
+
console.log(`[#${r.id}] ${date} @${r.github_username}${meta ? ` (${meta})` : ''}`);
|
|
2898
|
+
console.log(` ${r.message}\n`);
|
|
2899
|
+
}
|
|
2900
|
+
}
|
|
2636
2901
|
async function pipeStatus(args) {
|
|
2637
2902
|
const [sourceName] = args;
|
|
2638
2903
|
if (sourceName) {
|
|
@@ -2669,6 +2934,357 @@ async function pipeStatus(args) {
|
|
|
2669
2934
|
}
|
|
2670
2935
|
}
|
|
2671
2936
|
}
|
|
2937
|
+
async function pipeLocal(args) {
|
|
2938
|
+
const [sourceName] = args;
|
|
2939
|
+
if (!sourceName) {
|
|
2940
|
+
console.error('Usage: latinfo pipe local <source-name>');
|
|
2941
|
+
process.exit(1);
|
|
2942
|
+
}
|
|
2943
|
+
const repo = getRepoPath();
|
|
2944
|
+
const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
|
|
2945
|
+
const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
|
|
2946
|
+
const yamlContent = fs_1.default.existsSync(yamlPath) ? fs_1.default.readFileSync(yamlPath, 'utf-8') : '';
|
|
2947
|
+
const importScriptMatch = yamlContent.match(/import_script:\s*(.+)/);
|
|
2948
|
+
const customScript = importScriptMatch ? path_1.default.join(repo, importScriptMatch[1].trim()) : null;
|
|
2949
|
+
const defaultScript = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
|
|
2950
|
+
const scriptPath = customScript && fs_1.default.existsSync(customScript) ? customScript
|
|
2951
|
+
: fs_1.default.existsSync(defaultScript) ? defaultScript : null;
|
|
2952
|
+
const easypipePath = path_1.default.join(repo, 'src', 'imports', 'easypipe.ts');
|
|
2953
|
+
const cmd = scriptPath
|
|
2954
|
+
? `npx tsx ${scriptPath} --no-upload`
|
|
2955
|
+
: `npx tsx ${easypipePath} ${yamlPath} --no-upload`;
|
|
2956
|
+
const TEMP_DIR = `/tmp/${sourceName}-import`;
|
|
2957
|
+
const MAX_SHARD_MB = 250;
|
|
2958
|
+
const BENCH_SAMPLES = 500;
|
|
2959
|
+
const IMPORT_RUNS = 1;
|
|
2960
|
+
let passed = true;
|
|
2961
|
+
const errors = [];
|
|
2962
|
+
// ── Run import N times ───────────────────────────────────────────────────
|
|
2963
|
+
for (let run_i = 1; run_i <= IMPORT_RUNS; run_i++) {
|
|
2964
|
+
console.log(`\n[pipe:local] ── Import run ${run_i}/${IMPORT_RUNS} ──`);
|
|
2965
|
+
try {
|
|
2966
|
+
run(cmd, { cwd: repo, stdio: 'inherit' });
|
|
2967
|
+
}
|
|
2968
|
+
catch {
|
|
2969
|
+
errors.push(`Import run ${run_i} crashed`);
|
|
2970
|
+
passed = false;
|
|
2971
|
+
break;
|
|
2972
|
+
}
|
|
2973
|
+
// ── Validate files ─────────────────────────────────────────────────────
|
|
2974
|
+
const expected = [
|
|
2975
|
+
`${sourceName}-0.bin`,
|
|
2976
|
+
`${sourceName}.idx`,
|
|
2977
|
+
`${sourceName}-search.idx`,
|
|
2978
|
+
`${sourceName}-search.mphf`,
|
|
2979
|
+
`${sourceName}-search-0.dat`,
|
|
2980
|
+
];
|
|
2981
|
+
for (const f of expected) {
|
|
2982
|
+
const fp = path_1.default.join(TEMP_DIR, f);
|
|
2983
|
+
if (!fs_1.default.existsSync(fp)) {
|
|
2984
|
+
errors.push(`Missing: ${f}`);
|
|
2985
|
+
passed = false;
|
|
2986
|
+
}
|
|
2987
|
+
}
|
|
2988
|
+
// ── Shard sizes ────────────────────────────────────────────────────────
|
|
2989
|
+
const bins = fs_1.default.existsSync(TEMP_DIR) ? fs_1.default.readdirSync(TEMP_DIR).filter(f => f.endsWith('.bin')) : [];
|
|
2990
|
+
for (const b of bins) {
|
|
2991
|
+
const mb = fs_1.default.statSync(path_1.default.join(TEMP_DIR, b)).size / 1_048_576;
|
|
2992
|
+
if (mb > MAX_SHARD_MB) {
|
|
2993
|
+
errors.push(`Shard ${b} is ${mb.toFixed(0)}MB > ${MAX_SHARD_MB}MB`);
|
|
2994
|
+
passed = false;
|
|
2995
|
+
}
|
|
2996
|
+
else
|
|
2997
|
+
console.log(` ✓ ${b}: ${mb.toFixed(1)} MB`);
|
|
2998
|
+
}
|
|
2999
|
+
// ── V2 search index ────────────────────────────────────────────────────
|
|
3000
|
+
const searchIdx = path_1.default.join(TEMP_DIR, `${sourceName}-search.idx`);
|
|
3001
|
+
if (fs_1.default.existsSync(searchIdx)) {
|
|
3002
|
+
const content = fs_1.default.readFileSync(searchIdx);
|
|
3003
|
+
// V2 magic: first 4 bytes are "LSRY" (V1 = "LSRX")
|
|
3004
|
+
const magic = content.subarray(0, 4).toString('ascii');
|
|
3005
|
+
if (magic !== 'LSRY') {
|
|
3006
|
+
errors.push(`Search index is V1 (magic=${magic}) — must use V2 (statusFieldIndex required)`);
|
|
3007
|
+
passed = false;
|
|
3008
|
+
}
|
|
3009
|
+
else
|
|
3010
|
+
console.log(` ✓ Search index: V2`);
|
|
3011
|
+
}
|
|
3012
|
+
// ── MPHF ───────────────────────────────────────────────────────────────
|
|
3013
|
+
const mphf = path_1.default.join(TEMP_DIR, `${sourceName}-search.mphf`);
|
|
3014
|
+
if (fs_1.default.existsSync(mphf) && fs_1.default.statSync(mphf).size > 0) {
|
|
3015
|
+
console.log(` ✓ MPHF: ${(fs_1.default.statSync(mphf).size / 1024).toFixed(1)} KB`);
|
|
3016
|
+
}
|
|
3017
|
+
else {
|
|
3018
|
+
errors.push('MPHF missing or empty — call buildMphfFromIdx()');
|
|
3019
|
+
passed = false;
|
|
3020
|
+
}
|
|
3021
|
+
if (!passed)
|
|
3022
|
+
break;
|
|
3023
|
+
if (run_i < IMPORT_RUNS)
|
|
3024
|
+
console.log(` ✓ Run ${run_i} OK`);
|
|
3025
|
+
}
|
|
3026
|
+
if (!passed) {
|
|
3027
|
+
console.error(`\n[pipe:local] FAILED:`);
|
|
3028
|
+
for (const e of errors)
|
|
3029
|
+
console.error(` ✗ ${e}`);
|
|
3030
|
+
process.exit(1);
|
|
3031
|
+
}
|
|
3032
|
+
// ── Load binary into memory (shared by bench + smoke test + quality check) ──
|
|
3033
|
+
const idLen = parseInt(yamlContent.match(/length:\s*(\d+)/)?.[1] || '8');
|
|
3034
|
+
const prefixLen = parseInt(yamlContent.match(/prefix_length:\s*(\d+)/)?.[1] || '5');
|
|
3035
|
+
const idxPath = path_1.default.join(TEMP_DIR, `${sourceName}.idx`);
|
|
3036
|
+
const binPath = path_1.default.join(TEMP_DIR, `${sourceName}-0.bin`);
|
|
3037
|
+
let binBuf;
|
|
3038
|
+
let index;
|
|
3039
|
+
try {
|
|
3040
|
+
if (!fs_1.default.existsSync(idxPath) || !fs_1.default.existsSync(binPath))
|
|
3041
|
+
throw new Error('Missing .idx or .bin');
|
|
3042
|
+
binBuf = fs_1.default.readFileSync(binPath);
|
|
3043
|
+
const HEADER_SIZE = 16, ENTRY_SIZE = 16, MAGIC = [0x4c, 0x49, 0x44, 0x58];
|
|
3044
|
+
const idxBuf = fs_1.default.readFileSync(idxPath);
|
|
3045
|
+
for (let i = 0; i < 4; i++)
|
|
3046
|
+
if (idxBuf[i] !== MAGIC[i])
|
|
3047
|
+
throw new Error('Invalid index magic');
|
|
3048
|
+
const entryCount = idxBuf.readUInt32LE(4);
|
|
3049
|
+
index = [];
|
|
3050
|
+
for (let i = 0; i < entryCount; i++) {
|
|
3051
|
+
const off = HEADER_SIZE + i * ENTRY_SIZE;
|
|
3052
|
+
index.push({
|
|
3053
|
+
prefix: idxBuf.readUInt32LE(off),
|
|
3054
|
+
shard: idxBuf.readUInt32LE(off + 4),
|
|
3055
|
+
offset: idxBuf.readUInt32LE(off + 8),
|
|
3056
|
+
length: idxBuf.readUInt32LE(off + 12),
|
|
3057
|
+
});
|
|
3058
|
+
}
|
|
3059
|
+
}
|
|
3060
|
+
catch (e) {
|
|
3061
|
+
errors.push(`Cannot load binary: ${e.message}`);
|
|
3062
|
+
passed = false;
|
|
3063
|
+
index = [];
|
|
3064
|
+
binBuf = Buffer.alloc(0);
|
|
3065
|
+
}
|
|
3066
|
+
// Lookup a record by ID — returns field array (after the ID) or null
|
|
3067
|
+
function lookupRecord(id) {
|
|
3068
|
+
const prefix = parseInt(id.substring(0, prefixLen));
|
|
3069
|
+
let lo = 0, hi = index.length - 1, entry = null;
|
|
3070
|
+
while (lo <= hi) {
|
|
3071
|
+
const mid = (lo + hi) >>> 1;
|
|
3072
|
+
if (index[mid].prefix === prefix) {
|
|
3073
|
+
entry = index[mid];
|
|
3074
|
+
break;
|
|
3075
|
+
}
|
|
3076
|
+
if (index[mid].prefix < prefix)
|
|
3077
|
+
lo = mid + 1;
|
|
3078
|
+
else
|
|
3079
|
+
hi = mid - 1;
|
|
3080
|
+
}
|
|
3081
|
+
if (!entry)
|
|
3082
|
+
return null;
|
|
3083
|
+
const chunk = binBuf.subarray(entry.offset, entry.offset + entry.length);
|
|
3084
|
+
let p = 0;
|
|
3085
|
+
while (p < chunk.length) {
|
|
3086
|
+
const rlen = chunk.readUInt16LE(p);
|
|
3087
|
+
if (rlen < 2)
|
|
3088
|
+
break;
|
|
3089
|
+
const rid = chunk.subarray(p + 2, p + 2 + idLen).toString();
|
|
3090
|
+
if (rid === id) {
|
|
3091
|
+
// Parse fields: [uint8 len][bytes]...
|
|
3092
|
+
const fields = [];
|
|
3093
|
+
let fp = p + 2 + idLen;
|
|
3094
|
+
while (fp < p + rlen) {
|
|
3095
|
+
const flen = chunk[fp++];
|
|
3096
|
+
fields.push(chunk.subarray(fp, fp + flen).toString('utf-8'));
|
|
3097
|
+
fp += flen;
|
|
3098
|
+
}
|
|
3099
|
+
return fields;
|
|
3100
|
+
}
|
|
3101
|
+
if (rid > id)
|
|
3102
|
+
return null;
|
|
3103
|
+
p += rlen;
|
|
3104
|
+
}
|
|
3105
|
+
return null;
|
|
3106
|
+
}
|
|
3107
|
+
// ── Smoke test: look up known ID and verify expected field ────────────────
|
|
3108
|
+
console.log(`\n[pipe:local] ── Smoke test ──`);
|
|
3109
|
+
const smokeId = yamlContent.match(/smoke_test:\s*\n\s+id:\s*"?([^"\n]+)"?/)?.[1]?.trim();
|
|
3110
|
+
const smokeField = yamlContent.match(/expect_field:\s*(\w+)/)?.[1]?.trim();
|
|
3111
|
+
if (!smokeId || smokeId === '""' || !smokeField) {
|
|
3112
|
+
errors.push('smoke_test.id and smoke_test.expect_field are required in sources YAML');
|
|
3113
|
+
passed = false;
|
|
3114
|
+
}
|
|
3115
|
+
else {
|
|
3116
|
+
const fieldNames = [...yamlContent.matchAll(/- name:\s*(\w+)/g)].map(m => m[1]);
|
|
3117
|
+
const fieldIdx = fieldNames.indexOf(smokeField);
|
|
3118
|
+
if (fieldIdx === -1) {
|
|
3119
|
+
errors.push(`smoke_test.expect_field "${smokeField}" not found in fields list`);
|
|
3120
|
+
passed = false;
|
|
3121
|
+
}
|
|
3122
|
+
else {
|
|
3123
|
+
const record = lookupRecord(smokeId);
|
|
3124
|
+
if (!record) {
|
|
3125
|
+
errors.push(`Smoke test FAILED: ID "${smokeId}" not found in binary — fetchData() may be returning wrong data`);
|
|
3126
|
+
passed = false;
|
|
3127
|
+
}
|
|
3128
|
+
else {
|
|
3129
|
+
// Field count check: binary record fields must match YAML fields
|
|
3130
|
+
if (record.length !== fieldNames.length) {
|
|
3131
|
+
errors.push(`Field count mismatch: binary has ${record.length} fields but YAML lists ${fieldNames.length} (${fieldNames.join(', ')}). Fix YAML or import script.`);
|
|
3132
|
+
passed = false;
|
|
3133
|
+
}
|
|
3134
|
+
else {
|
|
3135
|
+
console.log(` ✓ field count: ${record.length} (matches YAML)`);
|
|
3136
|
+
}
|
|
3137
|
+
// Field content validation: verify each field value matches its name
|
|
3138
|
+
for (let fi = 0; fi < Math.min(record.length, fieldNames.length); fi++) {
|
|
3139
|
+
const fname = fieldNames[fi];
|
|
3140
|
+
const fval = (record[fi] ?? '').trim();
|
|
3141
|
+
console.log(` ${fname} = "${fval.slice(0, 60)}${fval.length > 60 ? '...' : ''}"`);
|
|
3142
|
+
if (!fval)
|
|
3143
|
+
continue; // empty fields checked in data quality
|
|
3144
|
+
const isDate = /^\d{2}[\/\-]\d{2}[\/\-]\d{4}$/.test(fval) || /^\d{4}[\/\-]\d{2}[\/\-]\d{2}/.test(fval) || /^\d{8}$/.test(fval);
|
|
3145
|
+
const isNumeric = /^\d+$/.test(fval);
|
|
3146
|
+
const hasLetters = /[a-zA-ZÁÉÍÓÚÑáéíóúñ]/.test(fval);
|
|
3147
|
+
if (fname.startsWith('fecha') && !isDate && !fval.includes('/') && !fval.includes('-')) {
|
|
3148
|
+
errors.push(`Field "${fname}" = "${fval.slice(0, 40)}" — expected a date`);
|
|
3149
|
+
passed = false;
|
|
3150
|
+
}
|
|
3151
|
+
if (fname.endsWith('_count') && !isNumeric) {
|
|
3152
|
+
errors.push(`Field "${fname}" = "${fval.slice(0, 40)}" — expected a number`);
|
|
3153
|
+
passed = false;
|
|
3154
|
+
}
|
|
3155
|
+
if (fname === 'nombre' && !hasLetters) {
|
|
3156
|
+
errors.push(`Field "nombre" = "${fval.slice(0, 40)}" — expected letters (got numbers/dates)`);
|
|
3157
|
+
passed = false;
|
|
3158
|
+
}
|
|
3159
|
+
if ((fname.startsWith('estado') || fname === 'tipo_sancion') && isDate) {
|
|
3160
|
+
errors.push(`Field "${fname}" = "${fval.slice(0, 40)}" — looks like a date, not a status/type`);
|
|
3161
|
+
passed = false;
|
|
3162
|
+
}
|
|
3163
|
+
if (fname.includes('entidad') && isDate) {
|
|
3164
|
+
errors.push(`Field "${fname}" = "${fval.slice(0, 40)}" — looks like a date, not an entity name`);
|
|
3165
|
+
passed = false;
|
|
3166
|
+
}
|
|
3167
|
+
}
|
|
3168
|
+
const value = record[fieldIdx] ?? '';
|
|
3169
|
+
if (!value.trim()) {
|
|
3170
|
+
errors.push(`Smoke test FAILED: field "${smokeField}" is empty for ID "${smokeId}" — check your field mapping`);
|
|
3171
|
+
passed = false;
|
|
3172
|
+
}
|
|
3173
|
+
else {
|
|
3174
|
+
console.log(` ✓ ${smokeField} = "${value.trim()}"`);
|
|
3175
|
+
}
|
|
3176
|
+
}
|
|
3177
|
+
}
|
|
3178
|
+
}
|
|
3179
|
+
// ── Data quality: check empty name/status rates ───────────────────────────
|
|
3180
|
+
console.log(`\n[pipe:local] ── Data quality ──`);
|
|
3181
|
+
try {
|
|
3182
|
+
const fieldNames = [...yamlContent.matchAll(/- name:\s*(\w+)/g)].map(m => m[1]);
|
|
3183
|
+
let total = 0, emptyName = 0, emptyStatus = 0;
|
|
3184
|
+
let pos2 = 0;
|
|
3185
|
+
while (pos2 < binBuf.length) {
|
|
3186
|
+
const rlen = binBuf.readUInt16LE(pos2);
|
|
3187
|
+
if (rlen < 2 + idLen)
|
|
3188
|
+
break;
|
|
3189
|
+
let fp = pos2 + 2 + idLen;
|
|
3190
|
+
const fields = [];
|
|
3191
|
+
while (fp < pos2 + rlen) {
|
|
3192
|
+
const flen = binBuf[fp++];
|
|
3193
|
+
fields.push(binBuf.subarray(fp, fp + flen).toString('utf-8'));
|
|
3194
|
+
fp += flen;
|
|
3195
|
+
}
|
|
3196
|
+
total++;
|
|
3197
|
+
if (!fields[0]?.trim())
|
|
3198
|
+
emptyName++;
|
|
3199
|
+
if (!fields[1]?.trim())
|
|
3200
|
+
emptyStatus++;
|
|
3201
|
+
pos2 += rlen;
|
|
3202
|
+
}
|
|
3203
|
+
const emptyNamePct = total > 0 ? (emptyName / total) * 100 : 0;
|
|
3204
|
+
const emptyStatusPct = total > 0 ? (emptyStatus / total) * 100 : 0;
|
|
3205
|
+
const nameLabel = fieldNames[0] || 'name';
|
|
3206
|
+
const statusLabel = fieldNames[1] || 'status';
|
|
3207
|
+
console.log(` ${total.toLocaleString()} records scanned`);
|
|
3208
|
+
if (emptyNamePct > 5) {
|
|
3209
|
+
errors.push(`${emptyNamePct.toFixed(1)}% of records have empty "${nameLabel}" — check searchFieldIndex`);
|
|
3210
|
+
passed = false;
|
|
3211
|
+
}
|
|
3212
|
+
else {
|
|
3213
|
+
console.log(` ✓ ${nameLabel}: ${emptyNamePct.toFixed(1)}% empty`);
|
|
3214
|
+
}
|
|
3215
|
+
if (emptyStatusPct > 50) {
|
|
3216
|
+
errors.push(`${emptyStatusPct.toFixed(1)}% of records have empty "${statusLabel}" — check statusFieldIndex`);
|
|
3217
|
+
passed = false;
|
|
3218
|
+
}
|
|
3219
|
+
else {
|
|
3220
|
+
console.log(` ✓ ${statusLabel}: ${emptyStatusPct.toFixed(1)}% empty`);
|
|
3221
|
+
}
|
|
3222
|
+
}
|
|
3223
|
+
catch (e) {
|
|
3224
|
+
errors.push(`Quality check failed: ${e.message}`);
|
|
3225
|
+
passed = false;
|
|
3226
|
+
}
|
|
3227
|
+
// ── Local benchmark: N random lookups from binary ─────────────────────────
|
|
3228
|
+
console.log(`\n[pipe:local] ── Local benchmark: ${BENCH_SAMPLES} random lookups ──`);
|
|
3229
|
+
try {
|
|
3230
|
+
const allIds = [];
|
|
3231
|
+
let pos3 = 0;
|
|
3232
|
+
while (pos3 < binBuf.length && allIds.length < BENCH_SAMPLES * 10) {
|
|
3233
|
+
const rlen = binBuf.readUInt16LE(pos3);
|
|
3234
|
+
if (rlen < 2 + idLen || pos3 + rlen > binBuf.length)
|
|
3235
|
+
break;
|
|
3236
|
+
allIds.push(binBuf.subarray(pos3 + 2, pos3 + 2 + idLen).toString());
|
|
3237
|
+
pos3 += rlen;
|
|
3238
|
+
}
|
|
3239
|
+
if (allIds.length === 0)
|
|
3240
|
+
throw new Error('Could not read any records from .bin');
|
|
3241
|
+
for (let i = allIds.length - 1; i > 0; i--) {
|
|
3242
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
3243
|
+
[allIds[i], allIds[j]] = [allIds[j], allIds[i]];
|
|
3244
|
+
}
|
|
3245
|
+
const sample = allIds.slice(0, BENCH_SAMPLES);
|
|
3246
|
+
const latencies = [];
|
|
3247
|
+
let found = 0;
|
|
3248
|
+
for (const id of sample) {
|
|
3249
|
+
const t0 = performance.now();
|
|
3250
|
+
if (lookupRecord(id))
|
|
3251
|
+
found++;
|
|
3252
|
+
latencies.push(performance.now() - t0);
|
|
3253
|
+
}
|
|
3254
|
+
latencies.sort((a, b) => a - b);
|
|
3255
|
+
const p50 = latencies[Math.floor(latencies.length * 0.50)];
|
|
3256
|
+
const p95 = latencies[Math.floor(latencies.length * 0.95)];
|
|
3257
|
+
const p99 = latencies[Math.floor(latencies.length * 0.99)];
|
|
3258
|
+
console.log(` ${BENCH_SAMPLES} lookups: p50=${p50.toFixed(2)}ms p95=${p95.toFixed(2)}ms p99=${p99.toFixed(2)}ms`);
|
|
3259
|
+
console.log(` Hit rate: ${found}/${BENCH_SAMPLES} (${((found / BENCH_SAMPLES) * 100).toFixed(1)}%)`);
|
|
3260
|
+
if (found < BENCH_SAMPLES * 0.99) {
|
|
3261
|
+
errors.push(`Low hit rate: ${found}/${BENCH_SAMPLES}`);
|
|
3262
|
+
passed = false;
|
|
3263
|
+
}
|
|
3264
|
+
}
|
|
3265
|
+
catch (e) {
|
|
3266
|
+
errors.push(`Benchmark failed: ${e.message}`);
|
|
3267
|
+
passed = false;
|
|
3268
|
+
}
|
|
3269
|
+
// ── Final summary ─────────────────────────────────────────────────────────
|
|
3270
|
+
if (errors.length > 0) {
|
|
3271
|
+
console.error(`\n[pipe:local] FAILED:`);
|
|
3272
|
+
for (const e of errors)
|
|
3273
|
+
console.error(` ✗ ${e}`);
|
|
3274
|
+
process.exit(1);
|
|
3275
|
+
}
|
|
3276
|
+
// Mark test + validate as passed so pipe stage can proceed
|
|
3277
|
+
const status = loadPipeStatus(sourceName);
|
|
3278
|
+
const now = new Date().toISOString();
|
|
3279
|
+
status.test = { passed: true, timestamp: now };
|
|
3280
|
+
status.validate = { passed: true, timestamp: now };
|
|
3281
|
+
savePipeStatus(status);
|
|
3282
|
+
console.log(`\n[pipe:local] ✓ ALL CHECKS PASSED`);
|
|
3283
|
+
console.log(` shards ≤ ${MAX_SHARD_MB}MB • V2 • MPHF • bench OK`);
|
|
3284
|
+
console.log(` Gates test + validate marked as passed.`);
|
|
3285
|
+
console.log(` Files ready in: ${TEMP_DIR}`);
|
|
3286
|
+
console.log(`\n Next: latinfo pipe stage ${sourceName} (uploads to R2 + Linux Mint bench)`);
|
|
3287
|
+
}
|
|
2672
3288
|
async function pipe(args) {
|
|
2673
3289
|
requireAdmin();
|
|
2674
3290
|
const [subcommand, ...subArgs] = args;
|
|
@@ -2706,6 +3322,9 @@ async function pipe(args) {
|
|
|
2706
3322
|
for (const y of yamls)
|
|
2707
3323
|
console.log(` ${y.replace('.yaml', '')}`);
|
|
2708
3324
|
break;
|
|
3325
|
+
case 'local':
|
|
3326
|
+
await pipeLocal(subArgs);
|
|
3327
|
+
break;
|
|
2709
3328
|
case 'run':
|
|
2710
3329
|
const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
|
|
2711
3330
|
try {
|
|
@@ -2726,6 +3345,7 @@ COMMANDS
|
|
|
2726
3345
|
create <country> <institution> <dataset> [flags] Create source (YAML template)
|
|
2727
3346
|
script <source> <file.ts> Upload import script
|
|
2728
3347
|
deps <source> <pkg1> [pkg2] ... Add npm dependencies
|
|
3348
|
+
local <source> Full local test: 3x import + file checks + bench
|
|
2729
3349
|
test <source> Gate 1: test 100 records locally
|
|
2730
3350
|
validate <source> Gate 2: full import locally
|
|
2731
3351
|
stage <source> Gate 3: import + 500 bench on Linux Mint
|
|
@@ -2763,6 +3383,11 @@ SCRIPT REQUIREMENTS
|
|
|
2763
3383
|
7. uploadToR2() for each file
|
|
2764
3384
|
8. saveImportMeta()
|
|
2765
3385
|
|
|
3386
|
+
RECOMMENDED: export async function checkFresh(lastMeta): Promise<boolean>
|
|
3387
|
+
Called before the import to skip if data hasn't changed. Saves RAM + API calls.
|
|
3388
|
+
REST APIs: fetch 1 record, compare max ID. CSVs: HEAD request, compare Last-Modified.
|
|
3389
|
+
Without this, the import always runs regardless of whether the source updated.
|
|
3390
|
+
|
|
2766
3391
|
See SOURCES.md for full template. See src/imports/pe-osce-sanctioned.ts for example.
|
|
2767
3392
|
|
|
2768
3393
|
NAMING
|
|
@@ -3242,6 +3867,12 @@ else {
|
|
|
3242
3867
|
case 'ep':
|
|
3243
3868
|
easypipe(args).catch(e => { console.error(e); process.exit(1); });
|
|
3244
3869
|
break;
|
|
3870
|
+
case 'report':
|
|
3871
|
+
report(args).catch(e => { console.error(e); process.exit(1); });
|
|
3872
|
+
break;
|
|
3873
|
+
case 'issues':
|
|
3874
|
+
issues().catch(e => { console.error(e); process.exit(1); });
|
|
3875
|
+
break;
|
|
3245
3876
|
case 'completion':
|
|
3246
3877
|
completion();
|
|
3247
3878
|
break;
|
package/dist/sdk.js
CHANGED
|
@@ -70,7 +70,7 @@ class Country {
|
|
|
70
70
|
const tokenPostings = [];
|
|
71
71
|
for (const r of resolved) {
|
|
72
72
|
const lists = await Promise.all(r.entries.map(async (entry) => {
|
|
73
|
-
const byteLen = Math.min(entry.count,
|
|
73
|
+
const byteLen = Math.min(entry.count, 2000) * idx.entrySize;
|
|
74
74
|
const cdnUrl = `https://data.latinfo.dev/${this.cfg.baseName}-search-${entry.shard}.dat`;
|
|
75
75
|
const res = await fetch(cdnUrl, {
|
|
76
76
|
headers: { Range: `bytes=${entry.offset}-${entry.offset + byteLen - 1}` },
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "latinfo",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.20.0",
|
|
4
4
|
"description": "Tax registry & procurement API for Latin America. Query RUC, DNI, NIT, licitaciones from Peru & Colombia. Offline MPHF search, full OCDS data, updated daily.",
|
|
5
5
|
"homepage": "https://latinfo.dev",
|
|
6
6
|
"repository": {
|