latinfo 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +211 -29
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -47,7 +47,7 @@ const local_search_1 = require("./local-search");
47
47
  const client_search_1 = require("./client-search");
48
48
  const odis_search_1 = require("./odis-search");
49
49
  const mphf_search_1 = require("./mphf-search");
50
- const VERSION = '0.10.0';
50
+ const VERSION = '0.11.0';
51
51
  const API_URL = process.env.LATINFO_API_URL || 'https://api.latinfo.dev';
52
52
  const GITHUB_CLIENT_ID = process.env.GITHUB_CLIENT_ID || 'Ov23li5fcQaiCsVtaMKK';
53
53
  const CONFIG_DIR = path_1.default.join(os_1.default.homedir(), '.latinfo');
@@ -1791,33 +1791,43 @@ async function pipeTest(args) {
1791
1791
  const useEasypipe = !fs_1.default.existsSync(scriptPath);
1792
1792
  const cmd = useEasypipe
1793
1793
  ? `npx tsx ${easypipePath} ${yamlPath} --limit 100 --local`
1794
- : `npx tsx ${scriptPath} --limit 100`;
1794
+ : `npx tsx ${scriptPath} --limit 100 --local`;
1795
1795
  console.log(`[pipe] Gate 1: TEST (100 records)\n`);
1796
1796
  console.log(`Running: ${cmd}\n`);
1797
+ let output = '';
1797
1798
  try {
1798
- run(cmd, { stdio: 'inherit', cwd: repo });
1799
+ output = run(cmd, { encoding: 'utf-8', cwd: repo, stdio: ['inherit', 'pipe', 'inherit'] }) || '';
1800
+ // Print output
1801
+ if (output)
1802
+ process.stdout.write(output);
1799
1803
  }
1800
- catch {
1804
+ catch (e) {
1805
+ if (e.stdout) {
1806
+ output = e.stdout;
1807
+ process.stdout.write(output);
1808
+ }
1801
1809
  errors.push('Import script failed');
1802
1810
  }
1803
- // Validate output files exist
1804
- const outDir = `/tmp/${sourceName}-import`;
1811
+ // Validate from script output (files are cleaned up by script)
1805
1812
  if (errors.length === 0) {
1806
- const binFiles = fs_1.default.readdirSync(outDir || '/tmp').filter(f => f.startsWith(sourceName) && f.endsWith('.bin'));
1807
- if (binFiles.length === 0)
1808
- errors.push('No .bin files generated');
1809
- const idxFile = path_1.default.join(outDir, `${sourceName}.idx`);
1810
- if (!fs_1.default.existsSync(idxFile) && !fs_1.default.existsSync(`/tmp/${sourceName}.idx`)) {
1811
- // Check R2 upload happened (for non-local mode)
1813
+ if (!output.includes('Success') && !output.includes('records')) {
1814
+ errors.push('Import did not report success');
1815
+ }
1816
+ // Check for V2 search index (not V1)
1817
+ if (output.includes('V1)') && !output.includes('V2')) {
1818
+ errors.push('Search index is V1 MUST use V2. Add statusFieldIndex to buildSearchIndex()');
1812
1819
  }
1813
- // Check for V2 search index
1814
- const searchIdx = fs_1.default.readdirSync('/tmp').filter(f => f.includes(sourceName) && f.includes('search.idx'));
1815
- if (searchIdx.length === 0)
1816
- errors.push('No V2 search index generated — use statusFieldIndex in buildSearchIndex');
1817
1820
  // Check for MPHF
1818
- const mphf = fs_1.default.readdirSync('/tmp').filter(f => f.includes(sourceName) && f.includes('.mphf'));
1819
- if (mphf.length === 0)
1820
- errors.push('No MPHF generated — call buildMphfFromIdx after buildSearchIndex');
1821
+ if (!output.includes('[mphf]') && !output.includes('MPHF')) {
1822
+ errors.push('No MPHF generated — call buildMphfFromIdx() after buildSearchIndex()');
1823
+ }
1824
+ // Check record count
1825
+ const recordMatch = output.match(/(\d[\d,]*)\s*records/);
1826
+ if (recordMatch) {
1827
+ const count = parseInt(recordMatch[1].replace(/,/g, ''));
1828
+ if (count === 0)
1829
+ errors.push('Zero records imported');
1830
+ }
1821
1831
  }
1822
1832
  if (errors.length > 0) {
1823
1833
  console.error(`\n[pipe] Gate 1 FAILED:`);
@@ -1847,8 +1857,8 @@ async function pipeValidate(args) {
1847
1857
  const useEasypipe = !fs_1.default.existsSync(scriptPath);
1848
1858
  const cmd = useEasypipe
1849
1859
  ? `npx tsx ${easypipePath} ${path_1.default.join(repo, 'sources', `${sourceName}.yaml`)} --local`
1850
- : `npx tsx ${scriptPath}`;
1851
- console.log(`[pipe] Gate 2: VALIDATE (full import)\n`);
1860
+ : `npx tsx ${scriptPath} --local`;
1861
+ console.log(`[pipe] Gate 2: VALIDATE (full import, local only — no R2 upload)\n`);
1852
1862
  console.log(`Running: ${cmd}\n`);
1853
1863
  try {
1854
1864
  const output = run(cmd, { cwd: repo, stdio: 'inherit', encoding: 'utf-8' });
@@ -1875,21 +1885,39 @@ async function pipeStage(args) {
1875
1885
  const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1876
1886
  const RUNNER = 'f3mt0@100.109.82.87';
1877
1887
  console.log(`[pipe] Gate 3: STAGE (Linux Mint — import + bench)\n`);
1878
- // 1. Copy script + YAML to runner
1888
+ // 1. Copy script + YAML to runner via scp
1879
1889
  const repo = getRepoPath();
1880
- console.log('[pipe] Syncing repo on Linux Mint...');
1890
+ const remoteRepo = '~/actions-runner/_work/latinfo-api/latinfo-api';
1891
+ console.log('[pipe] Syncing files to Linux Mint...');
1881
1892
  try {
1882
- run(`ssh ${RUNNER} "cd ~/actions-runner/_work/latinfo-api/latinfo-api && git pull"`, { stdio: 'inherit' });
1893
+ run(`ssh ${RUNNER} "echo OK"`, { stdio: 'pipe', timeout: 10_000 });
1894
+ // Copy import script and YAML
1895
+ const scriptFile = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1896
+ const yamlFile = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
1897
+ if (fs_1.default.existsSync(scriptFile))
1898
+ run(`scp ${scriptFile} ${RUNNER}:${remoteRepo}/src/imports/`, { stdio: 'pipe' });
1899
+ if (fs_1.default.existsSync(yamlFile))
1900
+ run(`scp ${yamlFile} ${RUNNER}:${remoteRepo}/sources/`, { stdio: 'pipe' });
1901
+ console.log('[pipe] Files synced.');
1883
1902
  }
1884
1903
  catch {
1885
1904
  console.error('[pipe] SSH failed. Is Linux Mint running? Check: ssh f3mt0@100.109.82.87');
1886
1905
  process.exit(1);
1887
1906
  }
1888
- // 2. Run import on Linux Mint
1889
- const scriptPath = `src/imports/${sourceName}.ts`;
1907
+ // 2. Sync env files if missing on runner
1908
+ try {
1909
+ const envLocal = path_1.default.join(repo, '.env');
1910
+ const devVarsLocal = path_1.default.join(repo, '.dev.vars');
1911
+ if (fs_1.default.existsSync(envLocal))
1912
+ run(`scp ${envLocal} ${RUNNER}:${remoteRepo}/.env`, { stdio: 'pipe' });
1913
+ if (fs_1.default.existsSync(devVarsLocal))
1914
+ run(`scp ${devVarsLocal} ${RUNNER}:${remoteRepo}/.dev.vars`, { stdio: 'pipe' });
1915
+ }
1916
+ catch { }
1917
+ // 3. Run import on Linux Mint
1890
1918
  console.log(`[pipe] Running import on Linux Mint...`);
1891
1919
  try {
1892
- run(`ssh ${RUNNER} "cd ~/actions-runner/_work/latinfo-api/latinfo-api && npx tsx ${scriptPath}"`, {
1920
+ run(`ssh ${RUNNER} "cd ${remoteRepo} && set -a && source .env 2>/dev/null; source .dev.vars 2>/dev/null; set +a && R2_BUCKET_NAME=latinfo-data npx tsx src/imports/${sourceName}.ts"`, {
1893
1921
  stdio: 'inherit', timeout: 600_000,
1894
1922
  });
1895
1923
  }
@@ -1942,6 +1970,147 @@ p50:lats[Math.floor(lats.length*0.5)],p95:lats[Math.floor(lats.length*0.95)],p99
1942
1970
  savePipeStatus(status);
1943
1971
  }
1944
1972
  }
1973
+ async function pipeDocs(args) {
1974
+ const [sourceName, docPath] = args;
1975
+ if (!sourceName) {
1976
+ console.error(`Usage: latinfo pipe docs <source-name> [doc-file]
1977
+
1978
+ If no doc-file is provided, generates a template for you to fill in.
1979
+ If doc-file is provided, copies it as the source documentation.
1980
+
1981
+ The documentation MUST include these sections:
1982
+
1983
+ ## Source
1984
+ URL, institution, what data it contains, update frequency
1985
+
1986
+ ## How it works
1987
+ Download method (fetch, Playwright, API), authentication, CAPTCHA, encoding
1988
+
1989
+ ## Fields
1990
+ All fields with types and examples
1991
+
1992
+ ## Known issues
1993
+ Encoding problems, rate limits, CAPTCHA changes, session handling
1994
+
1995
+ ## Troubleshooting
1996
+ What to do if:
1997
+ - URL changes
1998
+ - CAPTCHA type changes
1999
+ - Encoding changes
2000
+ - API response format changes
2001
+ - Authentication method changes
2002
+ - Rate limits increase
2003
+ - Data format changes (new columns, removed columns)
2004
+
2005
+ ## Dependencies
2006
+ Required packages and why (playwright, ddddocr, etc.)
2007
+
2008
+ ## Bench results
2009
+ Concurrent users tested, success rate, p50/p95/p99`);
2010
+ process.exit(1);
2011
+ }
2012
+ const status = loadPipeStatus(sourceName);
2013
+ requireGate(status, 'stage', 'docs');
2014
+ const repo = getRepoPath();
2015
+ const docsDir = path_1.default.join(repo, 'docs', 'sources');
2016
+ fs_1.default.mkdirSync(docsDir, { recursive: true });
2017
+ const destPath = path_1.default.join(docsDir, `${sourceName}.md`);
2018
+ if (docPath) {
2019
+ // Copy provided doc
2020
+ const src = path_1.default.resolve(docPath);
2021
+ if (!fs_1.default.existsSync(src)) {
2022
+ console.error(`File not found: ${src}`);
2023
+ process.exit(1);
2024
+ }
2025
+ const content = fs_1.default.readFileSync(src, 'utf-8');
2026
+ // Validate required sections
2027
+ const required = ['## Source', '## How it works', '## Fields', '## Known issues', '## Troubleshooting'];
2028
+ const missing = required.filter(s => !content.includes(s));
2029
+ if (missing.length > 0) {
2030
+ console.error(`[pipe] Documentation missing required sections:`);
2031
+ for (const m of missing)
2032
+ console.error(` ✗ ${m}`);
2033
+ console.error(`\nAdd these sections to your doc and try again.`);
2034
+ process.exit(1);
2035
+ }
2036
+ fs_1.default.copyFileSync(src, destPath);
2037
+ console.log(`[pipe] Documentation saved: ${destPath}`);
2038
+ }
2039
+ else {
2040
+ // Generate template
2041
+ const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
2042
+ const yamlContent = fs_1.default.existsSync(yamlPath) ? fs_1.default.readFileSync(yamlPath, 'utf-8') : '';
2043
+ const urlMatch = yamlContent.match(/url:\s*(.+)/);
2044
+ const url = urlMatch ? urlMatch[1].trim() : 'https://example.com';
2045
+ const benchData = status.stage?.bench;
2046
+ const template = `# ${sourceName}
2047
+
2048
+ ## Source
2049
+ - **URL**: ${url}
2050
+ - **Institution**: TODO
2051
+ - **Data**: TODO (what records this contains)
2052
+ - **Records**: TODO (approximate count)
2053
+ - **Update frequency**: TODO (daily, weekly, manual)
2054
+ - **Format**: TODO (CSV, JSON API, web scraping)
2055
+
2056
+ ## How it works
2057
+ TODO: Describe step by step how the import script works.
2058
+ - How is data downloaded? (direct URL, API with pagination, Playwright crawler)
2059
+ - Is there authentication? (API key, session, CAPTCHA)
2060
+ - What encoding is the source? (UTF-8, ISO-8859-1)
2061
+ - Any special parsing needed? (date formats, amount formats, field concatenation)
2062
+
2063
+ ## Fields
2064
+ | Field | Type | Example | Notes |
2065
+ |-------|------|---------|-------|
2066
+ | TODO | string | TODO | TODO |
2067
+
2068
+ ## Known issues
2069
+ - TODO: List any encoding problems, edge cases, data quality issues
2070
+ - TODO: Rate limits, CAPTCHA difficulty, session expiration
2071
+
2072
+ ## Troubleshooting
2073
+
2074
+ ### URL changes
2075
+ TODO: Where to find the new URL, how to update
2076
+
2077
+ ### CAPTCHA changes
2078
+ TODO: What CAPTCHA solver is used, alternatives if it breaks
2079
+
2080
+ ### Encoding changes
2081
+ TODO: Current encoding, how to detect changes
2082
+
2083
+ ### Format changes
2084
+ TODO: How to detect if columns change, new fields added, fields removed
2085
+
2086
+ ### Authentication changes
2087
+ TODO: Current auth method, what to check if it stops working
2088
+
2089
+ ## Dependencies
2090
+ TODO: List npm packages and why each is needed
2091
+ \`\`\`
2092
+ playwright — browser automation for CAPTCHA/session
2093
+ ddddocr — CAPTCHA OCR solver
2094
+ \`\`\`
2095
+
2096
+ ## Bench results
2097
+ ${benchData ? `- **Concurrent**: ${benchData.concurrent}
2098
+ - **Success rate**: ${benchData.success_rate.toFixed(1)}%
2099
+ - **p50**: ${benchData.p50}ms
2100
+ - **p95**: ${benchData.p95}ms
2101
+ - **p99**: ${benchData.p99}ms` : 'TODO: Run latinfo pipe stage first'}
2102
+ `;
2103
+ fs_1.default.writeFileSync(destPath, template);
2104
+ console.log(`[pipe] Template generated: ${destPath}`);
2105
+ console.log(`\nFill in the TODO sections, then run:`);
2106
+ console.log(` latinfo pipe docs ${sourceName} ${destPath}`);
2107
+ process.exit(1); // Force them to fill it in
2108
+ }
2109
+ console.log(`\n[pipe] Gate 3.5 PASSED ✓`);
2110
+ console.log(`[pipe] Next: latinfo pipe publish ${sourceName}`);
2111
+ status.docs = { passed: true, timestamp: new Date().toISOString() };
2112
+ savePipeStatus(status);
2113
+ }
1945
2114
  async function pipePublish(args) {
1946
2115
  const [sourceName] = args;
1947
2116
  if (!sourceName) {
@@ -1952,6 +2121,11 @@ async function pipePublish(args) {
1952
2121
  requireGate(status, 'test', 'publish');
1953
2122
  requireGate(status, 'validate', 'publish');
1954
2123
  requireGate(status, 'stage', 'publish');
2124
+ if (!status.docs?.passed) {
2125
+ console.error(`[pipe] Gate "docs" has not passed. Run: latinfo pipe docs ${sourceName}`);
2126
+ console.error(`[pipe] Documentation is required before publishing.`);
2127
+ process.exit(1);
2128
+ }
1955
2129
  const repo = getRepoPath();
1956
2130
  const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1957
2131
  const RUNNER = 'f3mt0@100.109.82.87';
@@ -1963,6 +2137,9 @@ async function pipePublish(args) {
1963
2137
  if (fs_1.default.existsSync(scriptPath))
1964
2138
  files.push(`src/imports/${sourceName}.ts`);
1965
2139
  try {
2140
+ const docsFile = `docs/sources/${sourceName}.md`;
2141
+ if (fs_1.default.existsSync(path_1.default.join(repo, docsFile)))
2142
+ files.push(docsFile);
1966
2143
  run(`git add ${files.join(' ')} src/sources.ts .github/workflows/import.yml`, { cwd: repo, stdio: 'pipe' });
1967
2144
  run(`git commit -m "Add data source: ${sourceName}"`, { cwd: repo, stdio: 'pipe' });
1968
2145
  run(`git push`, { cwd: repo, stdio: 'pipe' });
@@ -2007,7 +2184,7 @@ async function pipeStatus(args) {
2007
2184
  const [sourceName] = args;
2008
2185
  if (sourceName) {
2009
2186
  const status = loadPipeStatus(sourceName);
2010
- const gates = ['test', 'validate', 'stage', 'publish'];
2187
+ const gates = ['test', 'validate', 'stage', 'docs', 'publish'];
2011
2188
  console.log(`Source: ${sourceName}\n`);
2012
2189
  for (const gate of gates) {
2013
2190
  const g = status[gate];
@@ -2033,7 +2210,7 @@ async function pipeStatus(args) {
2033
2210
  const files = fs_1.default.readdirSync(PIPE_STATUS_DIR).filter(f => f.endsWith('.json'));
2034
2211
  for (const f of files) {
2035
2212
  const s = JSON.parse(fs_1.default.readFileSync(path_1.default.join(PIPE_STATUS_DIR, f), 'utf-8'));
2036
- const gates = ['test', 'validate', 'stage', 'publish'];
2213
+ const gates = ['test', 'validate', 'stage', 'docs', 'publish'];
2037
2214
  const icons = gates.map(g => s[g]?.passed ? '✓' : s[g] ? '✗' : '⬚').join('');
2038
2215
  console.log(` ${s.source} [${icons}]`);
2039
2216
  }
@@ -2061,6 +2238,9 @@ async function pipe(args) {
2061
2238
  case 'stage':
2062
2239
  await pipeStage(subArgs);
2063
2240
  break;
2241
+ case 'docs':
2242
+ await pipeDocs(subArgs);
2243
+ break;
2064
2244
  case 'publish':
2065
2245
  await pipePublish(subArgs);
2066
2246
  break;
@@ -2096,6 +2276,7 @@ COMMANDS
2096
2276
  test <source> Gate 1: test 100 records locally
2097
2277
  validate <source> Gate 2: full import locally
2098
2278
  stage <source> Gate 3: import + 500 bench on Linux Mint
2279
+ docs <source> [doc-file] Gate 3.5: write/upload documentation
2099
2280
  publish <source> Gate 4: deploy to production
2100
2281
  status [source] Show gate status
2101
2282
  list List all sources
@@ -2105,6 +2286,7 @@ GATES (each must pass before the next unlocks)
2105
2286
  test → 100 records, validates IDs, encoding, V2 search, MPHF
2106
2287
  validate → full import, all records, field validation
2107
2288
  stage → Linux Mint: import + 500 concurrent bench (99.9% required)
2289
+ docs → documentation with required sections (Source, How it works, Fields, etc.)
2108
2290
  publish → production: deploy + smoke test + bench + rollback on failure
2109
2291
 
2110
2292
  WORKFLOW
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "latinfo",
3
- "version": "0.11.0",
3
+ "version": "0.12.0",
4
4
  "description": "Tax registry & procurement API for Latin America. Query RUC, DNI, NIT, licitaciones from Peru & Colombia. Offline MPHF search, full OCDS data, updated daily.",
5
5
  "homepage": "https://latinfo.dev",
6
6
  "repository": {