latinfo 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +211 -29
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -47,7 +47,7 @@ const local_search_1 = require("./local-search");
|
|
|
47
47
|
const client_search_1 = require("./client-search");
|
|
48
48
|
const odis_search_1 = require("./odis-search");
|
|
49
49
|
const mphf_search_1 = require("./mphf-search");
|
|
50
|
-
const VERSION = '0.
|
|
50
|
+
const VERSION = '0.11.0';
|
|
51
51
|
const API_URL = process.env.LATINFO_API_URL || 'https://api.latinfo.dev';
|
|
52
52
|
const GITHUB_CLIENT_ID = process.env.GITHUB_CLIENT_ID || 'Ov23li5fcQaiCsVtaMKK';
|
|
53
53
|
const CONFIG_DIR = path_1.default.join(os_1.default.homedir(), '.latinfo');
|
|
@@ -1791,33 +1791,43 @@ async function pipeTest(args) {
|
|
|
1791
1791
|
const useEasypipe = !fs_1.default.existsSync(scriptPath);
|
|
1792
1792
|
const cmd = useEasypipe
|
|
1793
1793
|
? `npx tsx ${easypipePath} ${yamlPath} --limit 100 --local`
|
|
1794
|
-
: `npx tsx ${scriptPath} --limit 100`;
|
|
1794
|
+
: `npx tsx ${scriptPath} --limit 100 --local`;
|
|
1795
1795
|
console.log(`[pipe] Gate 1: TEST (100 records)\n`);
|
|
1796
1796
|
console.log(`Running: ${cmd}\n`);
|
|
1797
|
+
let output = '';
|
|
1797
1798
|
try {
|
|
1798
|
-
run(cmd, {
|
|
1799
|
+
output = run(cmd, { encoding: 'utf-8', cwd: repo, stdio: ['inherit', 'pipe', 'inherit'] }) || '';
|
|
1800
|
+
// Print output
|
|
1801
|
+
if (output)
|
|
1802
|
+
process.stdout.write(output);
|
|
1799
1803
|
}
|
|
1800
|
-
catch {
|
|
1804
|
+
catch (e) {
|
|
1805
|
+
if (e.stdout) {
|
|
1806
|
+
output = e.stdout;
|
|
1807
|
+
process.stdout.write(output);
|
|
1808
|
+
}
|
|
1801
1809
|
errors.push('Import script failed');
|
|
1802
1810
|
}
|
|
1803
|
-
// Validate output files
|
|
1804
|
-
const outDir = `/tmp/${sourceName}-import`;
|
|
1811
|
+
// Validate from script output (files are cleaned up by script)
|
|
1805
1812
|
if (errors.length === 0) {
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
if (
|
|
1811
|
-
|
|
1813
|
+
if (!output.includes('Success') && !output.includes('records')) {
|
|
1814
|
+
errors.push('Import did not report success');
|
|
1815
|
+
}
|
|
1816
|
+
// Check for V2 search index (not V1)
|
|
1817
|
+
if (output.includes('V1)') && !output.includes('V2')) {
|
|
1818
|
+
errors.push('Search index is V1 — MUST use V2. Add statusFieldIndex to buildSearchIndex()');
|
|
1812
1819
|
}
|
|
1813
|
-
// Check for V2 search index
|
|
1814
|
-
const searchIdx = fs_1.default.readdirSync('/tmp').filter(f => f.includes(sourceName) && f.includes('search.idx'));
|
|
1815
|
-
if (searchIdx.length === 0)
|
|
1816
|
-
errors.push('No V2 search index generated — use statusFieldIndex in buildSearchIndex');
|
|
1817
1820
|
// Check for MPHF
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
+
if (!output.includes('[mphf]') && !output.includes('MPHF')) {
|
|
1822
|
+
errors.push('No MPHF generated — call buildMphfFromIdx() after buildSearchIndex()');
|
|
1823
|
+
}
|
|
1824
|
+
// Check record count
|
|
1825
|
+
const recordMatch = output.match(/(\d[\d,]*)\s*records/);
|
|
1826
|
+
if (recordMatch) {
|
|
1827
|
+
const count = parseInt(recordMatch[1].replace(/,/g, ''));
|
|
1828
|
+
if (count === 0)
|
|
1829
|
+
errors.push('Zero records imported');
|
|
1830
|
+
}
|
|
1821
1831
|
}
|
|
1822
1832
|
if (errors.length > 0) {
|
|
1823
1833
|
console.error(`\n[pipe] Gate 1 FAILED:`);
|
|
@@ -1847,8 +1857,8 @@ async function pipeValidate(args) {
|
|
|
1847
1857
|
const useEasypipe = !fs_1.default.existsSync(scriptPath);
|
|
1848
1858
|
const cmd = useEasypipe
|
|
1849
1859
|
? `npx tsx ${easypipePath} ${path_1.default.join(repo, 'sources', `${sourceName}.yaml`)} --local`
|
|
1850
|
-
: `npx tsx ${scriptPath}`;
|
|
1851
|
-
console.log(`[pipe] Gate 2: VALIDATE (full import)\n`);
|
|
1860
|
+
: `npx tsx ${scriptPath} --local`;
|
|
1861
|
+
console.log(`[pipe] Gate 2: VALIDATE (full import, local only — no R2 upload)\n`);
|
|
1852
1862
|
console.log(`Running: ${cmd}\n`);
|
|
1853
1863
|
try {
|
|
1854
1864
|
const output = run(cmd, { cwd: repo, stdio: 'inherit', encoding: 'utf-8' });
|
|
@@ -1875,21 +1885,39 @@ async function pipeStage(args) {
|
|
|
1875
1885
|
const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
|
|
1876
1886
|
const RUNNER = 'f3mt0@100.109.82.87';
|
|
1877
1887
|
console.log(`[pipe] Gate 3: STAGE (Linux Mint — import + bench)\n`);
|
|
1878
|
-
// 1. Copy script + YAML to runner
|
|
1888
|
+
// 1. Copy script + YAML to runner via scp
|
|
1879
1889
|
const repo = getRepoPath();
|
|
1880
|
-
|
|
1890
|
+
const remoteRepo = '~/actions-runner/_work/latinfo-api/latinfo-api';
|
|
1891
|
+
console.log('[pipe] Syncing files to Linux Mint...');
|
|
1881
1892
|
try {
|
|
1882
|
-
run(`ssh ${RUNNER} "
|
|
1893
|
+
run(`ssh ${RUNNER} "echo OK"`, { stdio: 'pipe', timeout: 10_000 });
|
|
1894
|
+
// Copy import script and YAML
|
|
1895
|
+
const scriptFile = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
|
|
1896
|
+
const yamlFile = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
|
|
1897
|
+
if (fs_1.default.existsSync(scriptFile))
|
|
1898
|
+
run(`scp ${scriptFile} ${RUNNER}:${remoteRepo}/src/imports/`, { stdio: 'pipe' });
|
|
1899
|
+
if (fs_1.default.existsSync(yamlFile))
|
|
1900
|
+
run(`scp ${yamlFile} ${RUNNER}:${remoteRepo}/sources/`, { stdio: 'pipe' });
|
|
1901
|
+
console.log('[pipe] Files synced.');
|
|
1883
1902
|
}
|
|
1884
1903
|
catch {
|
|
1885
1904
|
console.error('[pipe] SSH failed. Is Linux Mint running? Check: ssh f3mt0@100.109.82.87');
|
|
1886
1905
|
process.exit(1);
|
|
1887
1906
|
}
|
|
1888
|
-
// 2.
|
|
1889
|
-
|
|
1907
|
+
// 2. Sync env files if missing on runner
|
|
1908
|
+
try {
|
|
1909
|
+
const envLocal = path_1.default.join(repo, '.env');
|
|
1910
|
+
const devVarsLocal = path_1.default.join(repo, '.dev.vars');
|
|
1911
|
+
if (fs_1.default.existsSync(envLocal))
|
|
1912
|
+
run(`scp ${envLocal} ${RUNNER}:${remoteRepo}/.env`, { stdio: 'pipe' });
|
|
1913
|
+
if (fs_1.default.existsSync(devVarsLocal))
|
|
1914
|
+
run(`scp ${devVarsLocal} ${RUNNER}:${remoteRepo}/.dev.vars`, { stdio: 'pipe' });
|
|
1915
|
+
}
|
|
1916
|
+
catch { }
|
|
1917
|
+
// 3. Run import on Linux Mint
|
|
1890
1918
|
console.log(`[pipe] Running import on Linux Mint...`);
|
|
1891
1919
|
try {
|
|
1892
|
-
run(`ssh ${RUNNER} "cd
|
|
1920
|
+
run(`ssh ${RUNNER} "cd ${remoteRepo} && set -a && source .env 2>/dev/null; source .dev.vars 2>/dev/null; set +a && R2_BUCKET_NAME=latinfo-data npx tsx src/imports/${sourceName}.ts"`, {
|
|
1893
1921
|
stdio: 'inherit', timeout: 600_000,
|
|
1894
1922
|
});
|
|
1895
1923
|
}
|
|
@@ -1942,6 +1970,147 @@ p50:lats[Math.floor(lats.length*0.5)],p95:lats[Math.floor(lats.length*0.95)],p99
|
|
|
1942
1970
|
savePipeStatus(status);
|
|
1943
1971
|
}
|
|
1944
1972
|
}
|
|
1973
|
+
async function pipeDocs(args) {
|
|
1974
|
+
const [sourceName, docPath] = args;
|
|
1975
|
+
if (!sourceName) {
|
|
1976
|
+
console.error(`Usage: latinfo pipe docs <source-name> [doc-file]
|
|
1977
|
+
|
|
1978
|
+
If no doc-file is provided, generates a template for you to fill in.
|
|
1979
|
+
If doc-file is provided, copies it as the source documentation.
|
|
1980
|
+
|
|
1981
|
+
The documentation MUST include these sections:
|
|
1982
|
+
|
|
1983
|
+
## Source
|
|
1984
|
+
URL, institution, what data it contains, update frequency
|
|
1985
|
+
|
|
1986
|
+
## How it works
|
|
1987
|
+
Download method (fetch, Playwright, API), authentication, CAPTCHA, encoding
|
|
1988
|
+
|
|
1989
|
+
## Fields
|
|
1990
|
+
All fields with types and examples
|
|
1991
|
+
|
|
1992
|
+
## Known issues
|
|
1993
|
+
Encoding problems, rate limits, CAPTCHA changes, session handling
|
|
1994
|
+
|
|
1995
|
+
## Troubleshooting
|
|
1996
|
+
What to do if:
|
|
1997
|
+
- URL changes
|
|
1998
|
+
- CAPTCHA type changes
|
|
1999
|
+
- Encoding changes
|
|
2000
|
+
- API response format changes
|
|
2001
|
+
- Authentication method changes
|
|
2002
|
+
- Rate limits increase
|
|
2003
|
+
- Data format changes (new columns, removed columns)
|
|
2004
|
+
|
|
2005
|
+
## Dependencies
|
|
2006
|
+
Required packages and why (playwright, ddddocr, etc.)
|
|
2007
|
+
|
|
2008
|
+
## Bench results
|
|
2009
|
+
Concurrent users tested, success rate, p50/p95/p99`);
|
|
2010
|
+
process.exit(1);
|
|
2011
|
+
}
|
|
2012
|
+
const status = loadPipeStatus(sourceName);
|
|
2013
|
+
requireGate(status, 'stage', 'docs');
|
|
2014
|
+
const repo = getRepoPath();
|
|
2015
|
+
const docsDir = path_1.default.join(repo, 'docs', 'sources');
|
|
2016
|
+
fs_1.default.mkdirSync(docsDir, { recursive: true });
|
|
2017
|
+
const destPath = path_1.default.join(docsDir, `${sourceName}.md`);
|
|
2018
|
+
if (docPath) {
|
|
2019
|
+
// Copy provided doc
|
|
2020
|
+
const src = path_1.default.resolve(docPath);
|
|
2021
|
+
if (!fs_1.default.existsSync(src)) {
|
|
2022
|
+
console.error(`File not found: ${src}`);
|
|
2023
|
+
process.exit(1);
|
|
2024
|
+
}
|
|
2025
|
+
const content = fs_1.default.readFileSync(src, 'utf-8');
|
|
2026
|
+
// Validate required sections
|
|
2027
|
+
const required = ['## Source', '## How it works', '## Fields', '## Known issues', '## Troubleshooting'];
|
|
2028
|
+
const missing = required.filter(s => !content.includes(s));
|
|
2029
|
+
if (missing.length > 0) {
|
|
2030
|
+
console.error(`[pipe] Documentation missing required sections:`);
|
|
2031
|
+
for (const m of missing)
|
|
2032
|
+
console.error(` ✗ ${m}`);
|
|
2033
|
+
console.error(`\nAdd these sections to your doc and try again.`);
|
|
2034
|
+
process.exit(1);
|
|
2035
|
+
}
|
|
2036
|
+
fs_1.default.copyFileSync(src, destPath);
|
|
2037
|
+
console.log(`[pipe] Documentation saved: ${destPath}`);
|
|
2038
|
+
}
|
|
2039
|
+
else {
|
|
2040
|
+
// Generate template
|
|
2041
|
+
const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
|
|
2042
|
+
const yamlContent = fs_1.default.existsSync(yamlPath) ? fs_1.default.readFileSync(yamlPath, 'utf-8') : '';
|
|
2043
|
+
const urlMatch = yamlContent.match(/url:\s*(.+)/);
|
|
2044
|
+
const url = urlMatch ? urlMatch[1].trim() : 'https://example.com';
|
|
2045
|
+
const benchData = status.stage?.bench;
|
|
2046
|
+
const template = `# ${sourceName}
|
|
2047
|
+
|
|
2048
|
+
## Source
|
|
2049
|
+
- **URL**: ${url}
|
|
2050
|
+
- **Institution**: TODO
|
|
2051
|
+
- **Data**: TODO (what records this contains)
|
|
2052
|
+
- **Records**: TODO (approximate count)
|
|
2053
|
+
- **Update frequency**: TODO (daily, weekly, manual)
|
|
2054
|
+
- **Format**: TODO (CSV, JSON API, web scraping)
|
|
2055
|
+
|
|
2056
|
+
## How it works
|
|
2057
|
+
TODO: Describe step by step how the import script works.
|
|
2058
|
+
- How is data downloaded? (direct URL, API with pagination, Playwright crawler)
|
|
2059
|
+
- Is there authentication? (API key, session, CAPTCHA)
|
|
2060
|
+
- What encoding is the source? (UTF-8, ISO-8859-1)
|
|
2061
|
+
- Any special parsing needed? (date formats, amount formats, field concatenation)
|
|
2062
|
+
|
|
2063
|
+
## Fields
|
|
2064
|
+
| Field | Type | Example | Notes |
|
|
2065
|
+
|-------|------|---------|-------|
|
|
2066
|
+
| TODO | string | TODO | TODO |
|
|
2067
|
+
|
|
2068
|
+
## Known issues
|
|
2069
|
+
- TODO: List any encoding problems, edge cases, data quality issues
|
|
2070
|
+
- TODO: Rate limits, CAPTCHA difficulty, session expiration
|
|
2071
|
+
|
|
2072
|
+
## Troubleshooting
|
|
2073
|
+
|
|
2074
|
+
### URL changes
|
|
2075
|
+
TODO: Where to find the new URL, how to update
|
|
2076
|
+
|
|
2077
|
+
### CAPTCHA changes
|
|
2078
|
+
TODO: What CAPTCHA solver is used, alternatives if it breaks
|
|
2079
|
+
|
|
2080
|
+
### Encoding changes
|
|
2081
|
+
TODO: Current encoding, how to detect changes
|
|
2082
|
+
|
|
2083
|
+
### Format changes
|
|
2084
|
+
TODO: How to detect if columns change, new fields added, fields removed
|
|
2085
|
+
|
|
2086
|
+
### Authentication changes
|
|
2087
|
+
TODO: Current auth method, what to check if it stops working
|
|
2088
|
+
|
|
2089
|
+
## Dependencies
|
|
2090
|
+
TODO: List npm packages and why each is needed
|
|
2091
|
+
\`\`\`
|
|
2092
|
+
playwright — browser automation for CAPTCHA/session
|
|
2093
|
+
ddddocr — CAPTCHA OCR solver
|
|
2094
|
+
\`\`\`
|
|
2095
|
+
|
|
2096
|
+
## Bench results
|
|
2097
|
+
${benchData ? `- **Concurrent**: ${benchData.concurrent}
|
|
2098
|
+
- **Success rate**: ${benchData.success_rate.toFixed(1)}%
|
|
2099
|
+
- **p50**: ${benchData.p50}ms
|
|
2100
|
+
- **p95**: ${benchData.p95}ms
|
|
2101
|
+
- **p99**: ${benchData.p99}ms` : 'TODO: Run latinfo pipe stage first'}
|
|
2102
|
+
`;
|
|
2103
|
+
fs_1.default.writeFileSync(destPath, template);
|
|
2104
|
+
console.log(`[pipe] Template generated: ${destPath}`);
|
|
2105
|
+
console.log(`\nFill in the TODO sections, then run:`);
|
|
2106
|
+
console.log(` latinfo pipe docs ${sourceName} ${destPath}`);
|
|
2107
|
+
process.exit(1); // Force them to fill it in
|
|
2108
|
+
}
|
|
2109
|
+
console.log(`\n[pipe] Gate 3.5 PASSED ✓`);
|
|
2110
|
+
console.log(`[pipe] Next: latinfo pipe publish ${sourceName}`);
|
|
2111
|
+
status.docs = { passed: true, timestamp: new Date().toISOString() };
|
|
2112
|
+
savePipeStatus(status);
|
|
2113
|
+
}
|
|
1945
2114
|
async function pipePublish(args) {
|
|
1946
2115
|
const [sourceName] = args;
|
|
1947
2116
|
if (!sourceName) {
|
|
@@ -1952,6 +2121,11 @@ async function pipePublish(args) {
|
|
|
1952
2121
|
requireGate(status, 'test', 'publish');
|
|
1953
2122
|
requireGate(status, 'validate', 'publish');
|
|
1954
2123
|
requireGate(status, 'stage', 'publish');
|
|
2124
|
+
if (!status.docs?.passed) {
|
|
2125
|
+
console.error(`[pipe] Gate "docs" has not passed. Run: latinfo pipe docs ${sourceName}`);
|
|
2126
|
+
console.error(`[pipe] Documentation is required before publishing.`);
|
|
2127
|
+
process.exit(1);
|
|
2128
|
+
}
|
|
1955
2129
|
const repo = getRepoPath();
|
|
1956
2130
|
const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
|
|
1957
2131
|
const RUNNER = 'f3mt0@100.109.82.87';
|
|
@@ -1963,6 +2137,9 @@ async function pipePublish(args) {
|
|
|
1963
2137
|
if (fs_1.default.existsSync(scriptPath))
|
|
1964
2138
|
files.push(`src/imports/${sourceName}.ts`);
|
|
1965
2139
|
try {
|
|
2140
|
+
const docsFile = `docs/sources/${sourceName}.md`;
|
|
2141
|
+
if (fs_1.default.existsSync(path_1.default.join(repo, docsFile)))
|
|
2142
|
+
files.push(docsFile);
|
|
1966
2143
|
run(`git add ${files.join(' ')} src/sources.ts .github/workflows/import.yml`, { cwd: repo, stdio: 'pipe' });
|
|
1967
2144
|
run(`git commit -m "Add data source: ${sourceName}"`, { cwd: repo, stdio: 'pipe' });
|
|
1968
2145
|
run(`git push`, { cwd: repo, stdio: 'pipe' });
|
|
@@ -2007,7 +2184,7 @@ async function pipeStatus(args) {
|
|
|
2007
2184
|
const [sourceName] = args;
|
|
2008
2185
|
if (sourceName) {
|
|
2009
2186
|
const status = loadPipeStatus(sourceName);
|
|
2010
|
-
const gates = ['test', 'validate', 'stage', 'publish'];
|
|
2187
|
+
const gates = ['test', 'validate', 'stage', 'docs', 'publish'];
|
|
2011
2188
|
console.log(`Source: ${sourceName}\n`);
|
|
2012
2189
|
for (const gate of gates) {
|
|
2013
2190
|
const g = status[gate];
|
|
@@ -2033,7 +2210,7 @@ async function pipeStatus(args) {
|
|
|
2033
2210
|
const files = fs_1.default.readdirSync(PIPE_STATUS_DIR).filter(f => f.endsWith('.json'));
|
|
2034
2211
|
for (const f of files) {
|
|
2035
2212
|
const s = JSON.parse(fs_1.default.readFileSync(path_1.default.join(PIPE_STATUS_DIR, f), 'utf-8'));
|
|
2036
|
-
const gates = ['test', 'validate', 'stage', 'publish'];
|
|
2213
|
+
const gates = ['test', 'validate', 'stage', 'docs', 'publish'];
|
|
2037
2214
|
const icons = gates.map(g => s[g]?.passed ? '✓' : s[g] ? '✗' : '⬚').join('');
|
|
2038
2215
|
console.log(` ${s.source} [${icons}]`);
|
|
2039
2216
|
}
|
|
@@ -2061,6 +2238,9 @@ async function pipe(args) {
|
|
|
2061
2238
|
case 'stage':
|
|
2062
2239
|
await pipeStage(subArgs);
|
|
2063
2240
|
break;
|
|
2241
|
+
case 'docs':
|
|
2242
|
+
await pipeDocs(subArgs);
|
|
2243
|
+
break;
|
|
2064
2244
|
case 'publish':
|
|
2065
2245
|
await pipePublish(subArgs);
|
|
2066
2246
|
break;
|
|
@@ -2096,6 +2276,7 @@ COMMANDS
|
|
|
2096
2276
|
test <source> Gate 1: test 100 records locally
|
|
2097
2277
|
validate <source> Gate 2: full import locally
|
|
2098
2278
|
stage <source> Gate 3: import + 500 bench on Linux Mint
|
|
2279
|
+
docs <source> [doc-file] Gate 3.5: write/upload documentation
|
|
2099
2280
|
publish <source> Gate 4: deploy to production
|
|
2100
2281
|
status [source] Show gate status
|
|
2101
2282
|
list List all sources
|
|
@@ -2105,6 +2286,7 @@ GATES (each must pass before the next unlocks)
|
|
|
2105
2286
|
test → 100 records, validates IDs, encoding, V2 search, MPHF
|
|
2106
2287
|
validate → full import, all records, field validation
|
|
2107
2288
|
stage → Linux Mint: import + 500 concurrent bench (99.9% required)
|
|
2289
|
+
docs → documentation with required sections (Source, How it works, Fields, etc.)
|
|
2108
2290
|
publish → production: deploy + smoke test + bench + rollback on failure
|
|
2109
2291
|
|
|
2110
2292
|
WORKFLOW
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "latinfo",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.0",
|
|
4
4
|
"description": "Tax registry & procurement API for Latin America. Query RUC, DNI, NIT, licitaciones from Peru & Colombia. Offline MPHF search, full OCDS data, updated daily.",
|
|
5
5
|
"homepage": "https://latinfo.dev",
|
|
6
6
|
"repository": {
|