latinfo 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +467 -108
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -232,7 +232,7 @@ async function ruc(rucNumber) {
232
232
  }
233
233
  return;
234
234
  }
235
- const res = await apiRequest(config, `/pe/ruc/${rucNumber}`);
235
+ const res = await apiRequest(config, `/pe/sunat/padron/ruc/${rucNumber}`);
236
236
  const data = await res.json();
237
237
  if (jsonFlag) {
238
238
  console.log(JSON.stringify(data));
@@ -761,7 +761,7 @@ const BENCH_SAMPLES = {
761
761
  'peru', 'lima', 'consultora', 'transporte', 'holding',
762
762
  'desarrollos', 'ingenieria', 'tecnologia', 'salud', 'educacion',
763
763
  ],
764
- 'pe/licitaciones': [
764
+ 'pe/oece/tenders': [
765
765
  'servicio', 'construccion', 'suministro', 'consultoria', 'mantenimiento',
766
766
  'obra', 'adquisicion', 'sistema', 'equipos', 'vehiculos',
767
767
  'alimentos', 'seguridad', 'limpieza', 'transporte', 'software',
@@ -790,11 +790,11 @@ async function benchStress(args) {
790
790
  { name: 'cool', vus: 10, duration: Math.floor(durationSec * 0.08) },
791
791
  ];
792
792
  const endpoints = [
793
- ...BENCH_SAMPLES['pe/ruc'].map(s => ({ url: `${API_URL}/pe/ruc/${s}`, type: 'ruc' })),
794
- ...BENCH_SAMPLES['pe/search'].map(s => ({ url: `${API_URL}/pe/search?q=${encodeURIComponent(s)}`, type: 'search' })),
795
- ...BENCH_SAMPLES['pe/licitaciones'].slice(0, 10).map(s => ({ url: `${API_URL}/pe/licitaciones?q=${encodeURIComponent(s)}&limit=5`, type: 'licitaciones' })),
796
- ...BENCH_SAMPLES['co/nit'].slice(0, 10).map(s => ({ url: `${API_URL}/co/nit/${s}`, type: 'co/nit' })),
797
- ...BENCH_SAMPLES['co/search'].slice(0, 10).map(s => ({ url: `${API_URL}/co/search?q=${encodeURIComponent(s)}`, type: 'co/search' })),
793
+ ...BENCH_SAMPLES['pe/ruc'].map(s => ({ url: `${API_URL}/pe/sunat/padron/ruc/${s}`, type: 'ruc' })),
794
+ ...BENCH_SAMPLES['pe/search'].map(s => ({ url: `${API_URL}/pe/sunat/padron/search?q=${encodeURIComponent(s)}`, type: 'search' })),
795
+ ...BENCH_SAMPLES['pe/oece/tenders'].slice(0, 10).map(s => ({ url: `${API_URL}/pe/oece/tenders?q=${encodeURIComponent(s)}&limit=5`, type: 'tenders' })),
796
+ ...BENCH_SAMPLES['co/nit'].slice(0, 10).map(s => ({ url: `${API_URL}/co/rues/registry/nit/${s}`, type: 'co/nit' })),
797
+ ...BENCH_SAMPLES['co/search'].slice(0, 10).map(s => ({ url: `${API_URL}/co/rues/registry/search?q=${encodeURIComponent(s)}`, type: 'co/search' })),
798
798
  ];
799
799
  const headers = { Authorization: `Bearer ${config.api_key}` };
800
800
  const results = [];
@@ -1008,12 +1008,18 @@ async function bench(args) {
1008
1008
  console.error(`Supported: ${Object.keys(BENCH_SAMPLES).map(k => '--country ' + k.replace('/', ' --type ')).join(', ')}`);
1009
1009
  process.exit(1);
1010
1010
  }
1011
+ const ROUTE_MAP = {
1012
+ 'pe/ruc': '/pe/sunat/padron/ruc',
1013
+ 'pe/search': '/pe/sunat/padron/search',
1014
+ 'pe/oece/tenders': '/pe/oece/tenders',
1015
+ 'co/nit': '/co/rues/registry/nit',
1016
+ 'co/search': '/co/rues/registry/search',
1017
+ };
1011
1018
  const getUrl = (sample) => {
1012
- if (type === 'search')
1013
- return `${API_URL}/${country}/search?q=${encodeURIComponent(sample)}`;
1014
- if (type === 'licitaciones')
1015
- return `${API_URL}/pe/licitaciones?q=${encodeURIComponent(sample)}&limit=5`;
1016
- return `${API_URL}/${country}/${type}/${sample}`;
1019
+ const route = ROUTE_MAP[key];
1020
+ if (type === 'search' || type === 'oece/tenders')
1021
+ return `${API_URL}${route}?q=${encodeURIComponent(sample)}&limit=5`;
1022
+ return `${API_URL}${route}/${sample}`;
1017
1023
  };
1018
1024
  const tasks = Array.from({ length: count }, (_, i) => samples[i % samples.length]);
1019
1025
  if (!jsonFlag)
@@ -1114,7 +1120,7 @@ async function licitaciones(args) {
1114
1120
  // Subcommand: info
1115
1121
  if (args[0] === 'info') {
1116
1122
  const config = requireAuth();
1117
- const res = await apiRequest(config, '/pe/licitaciones/info');
1123
+ const res = await apiRequest(config, '/pe/oece/tenders/info');
1118
1124
  const info = await res.json();
1119
1125
  if (jsonFlag) {
1120
1126
  console.log(JSON.stringify(info));
@@ -1175,7 +1181,7 @@ DATA
1175
1181
  params.set('status', opts.status);
1176
1182
  if (opts.limit !== undefined)
1177
1183
  params.set('limit', String(opts.limit));
1178
- const res = await apiRequest(config, `/pe/licitaciones?${params}`);
1184
+ const res = await apiRequest(config, `/pe/oece/tenders?${params}`);
1179
1185
  const results = await res.json();
1180
1186
  if (jsonFlag) {
1181
1187
  console.log(JSON.stringify(results));
@@ -1539,10 +1545,86 @@ function requireAdmin() {
1539
1545
  console.error('Admin access not found. Create ~/.latinfo/admin.secret or set LATINFO_ADMIN_SECRET.');
1540
1546
  process.exit(1);
1541
1547
  }
1542
- async function adminCreate(args) {
1543
- const [country, institution, dataset, ...flags] = args;
1548
+ // --- Pipe: gate status tracking ---
1549
+ const PIPE_STATUS_DIR = path_1.default.join(CONFIG_DIR, 'pipe-status');
1550
+ function loadPipeStatus(source) {
1551
+ const file = path_1.default.join(PIPE_STATUS_DIR, `${source}.json`);
1552
+ try {
1553
+ return JSON.parse(fs_1.default.readFileSync(file, 'utf-8'));
1554
+ }
1555
+ catch {
1556
+ return { source };
1557
+ }
1558
+ }
1559
+ function savePipeStatus(status) {
1560
+ fs_1.default.mkdirSync(PIPE_STATUS_DIR, { recursive: true });
1561
+ fs_1.default.writeFileSync(path_1.default.join(PIPE_STATUS_DIR, `${status.source}.json`), JSON.stringify(status, null, 2));
1562
+ }
1563
+ function requireGate(status, gate, forGate) {
1564
+ if (!status[gate]?.passed) {
1565
+ console.error(`[pipe] Gate "${gate}" has not passed. Run: latinfo pipe ${gate} ${status.source}`);
1566
+ console.error(`[pipe] Cannot proceed to "${forGate}" until "${gate}" passes.`);
1567
+ process.exit(1);
1568
+ }
1569
+ }
1570
+ async function pipeCreate(args) {
1571
+ // Separate positional args from flags
1572
+ const positional = [];
1573
+ const flags = [];
1574
+ for (let i = 0; i < args.length; i++) {
1575
+ if (args[i].startsWith('--')) {
1576
+ flags.push(args[i], args[i + 1] || '');
1577
+ i++; // skip flag value
1578
+ }
1579
+ else {
1580
+ positional.push(args[i]);
1581
+ }
1582
+ }
1583
+ const [country, institution, dataset] = positional;
1544
1584
  if (!country || !institution || !dataset) {
1545
- console.error('Usage: latinfo admin create <country> <institution> <dataset> [--url URL] [--id-name ruc] [--id-length 11] [--encoding utf-8] [--delimiter ","]');
1585
+ console.error(`Error: exactly 3 positional arguments required: <country> <institution> <dataset>
1586
+
1587
+ NAMING RULES
1588
+ Source name = {country}-{institution}-{dataset}
1589
+ All lowercase, hyphens only, english.
1590
+
1591
+ country: ISO 3166-1 alpha-2 (pe, co, br, mx, ec, ar, cl)
1592
+ institution: government agency abbreviation (sunat, osce, oece, rues, redam, sat, indecopi)
1593
+ dataset: what the data contains (padron, coactiva, sanctioned, fines, tenders, registry)
1594
+
1595
+ EXAMPLES
1596
+ latinfo pipe create pe sunat padron --url https://sunat.gob.pe/data.zip
1597
+ latinfo pipe create pe redam registry --url https://redam.pj.gob.pe/data --id-name dni --id-length 8
1598
+ latinfo pipe create co rues registry --url https://datos.gov.co/data.csv --id-name nit --id-length 10
1599
+
1600
+ WRONG
1601
+ latinfo pipe create pe redam ← missing dataset (3rd argument)
1602
+ latinfo pipe create pe-redam-registry ← don't use hyphens, use spaces
1603
+ latinfo pipe create pe redam deudores ← use english: "debtors" not "deudores"
1604
+
1605
+ FLAGS
1606
+ --url <url> Source data download URL
1607
+ --id-name <name> Primary ID field name (default: id)
1608
+ --id-length <n> Primary ID length in digits (default: 11)
1609
+ --encoding <enc> utf-8 | iso-8859-1 (default: utf-8)
1610
+ --delimiter <d> Field delimiter (default: ,)
1611
+ --format <fmt> csv | tsv | txt | xlsm (default: csv)`);
1612
+ process.exit(1);
1613
+ }
1614
+ // Validate country
1615
+ const validCountries = ['pe', 'co', 'br', 'mx', 'ec', 'ar', 'cl'];
1616
+ if (!validCountries.includes(country)) {
1617
+ console.error(`Error: invalid country "${country}". Must be one of: ${validCountries.join(', ')}`);
1618
+ process.exit(1);
1619
+ }
1620
+ // Validate no hyphens in parts
1621
+ if (institution.includes('-') || dataset.includes('-')) {
1622
+ console.error(`Error: institution and dataset must not contain hyphens. Use separate arguments.\n Wrong: latinfo admin create pe osce-sanctioned\n Right: latinfo admin create pe osce sanctioned`);
1623
+ process.exit(1);
1624
+ }
1625
+ // Validate lowercase english
1626
+ if (/[A-Z]/.test(institution + dataset)) {
1627
+ console.error(`Error: institution and dataset must be lowercase. Got: ${institution} ${dataset}`);
1546
1628
  process.exit(1);
1547
1629
  }
1548
1630
  const name = `${country}-${institution}-${dataset}`;
@@ -1605,17 +1687,25 @@ smoke_test:
1605
1687
  console.log(`Created: ${yamlPath}`);
1606
1688
  console.log(`\nNext steps:`);
1607
1689
  console.log(` 1. Edit ${yamlPath} to match your data source`);
1608
- console.log(` 2. Write import script: latinfo admin upload-script ${name} ./my-import.ts`);
1609
- console.log(` 3. Test: latinfo admin test ${name}`);
1610
- console.log(` 4. Publish: latinfo admin publish ${name}`);
1690
+ console.log(` 2. Write import script and upload: latinfo pipe script ${name} ./my-import.ts`);
1691
+ console.log(` 3. Add dependencies: latinfo pipe deps ${name} playwright ddddocr`);
1692
+ console.log(` 4. Test (100 records): latinfo pipe test ${name}`);
1693
+ console.log(` 5. Validate (all records): latinfo pipe validate ${name}`);
1694
+ console.log(` 6. Stage (Linux Mint bench): latinfo pipe stage ${name}`);
1695
+ console.log(` 7. Publish to production: latinfo pipe publish ${name}`);
1611
1696
  }
1612
- async function adminUploadScript(args) {
1697
+ async function pipeScript(args) {
1613
1698
  const [sourceName, scriptPath] = args;
1614
1699
  if (!sourceName || !scriptPath) {
1615
- console.error('Usage: latinfo admin upload-script <source-name> <script-path>');
1700
+ console.error('Usage: latinfo pipe script <source-name> <script.ts>');
1616
1701
  process.exit(1);
1617
1702
  }
1618
1703
  const repo = getRepoPath();
1704
+ const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
1705
+ if (!fs_1.default.existsSync(yamlPath)) {
1706
+ console.error(`Source not found. Run first: latinfo pipe create ...`);
1707
+ process.exit(1);
1708
+ }
1619
1709
  const dest = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1620
1710
  const src = path_1.default.resolve(scriptPath);
1621
1711
  if (!fs_1.default.existsSync(src)) {
@@ -1623,12 +1713,54 @@ async function adminUploadScript(args) {
1623
1713
  process.exit(1);
1624
1714
  }
1625
1715
  fs_1.default.copyFileSync(src, dest);
1626
- console.log(`Copied: ${src} ${dest}`);
1716
+ console.log(`[pipe] Script copied: ${dest}`);
1717
+ // Reset gates (script changed, need to re-test)
1718
+ const status = loadPipeStatus(sourceName);
1719
+ delete status.test;
1720
+ delete status.validate;
1721
+ delete status.stage;
1722
+ delete status.publish;
1723
+ savePipeStatus(status);
1724
+ console.log(`[pipe] Gates reset — run: latinfo pipe test ${sourceName}`);
1725
+ }
1726
+ async function pipeDeps(args) {
1727
+ const [sourceName, ...deps] = args;
1728
+ if (!sourceName || deps.length === 0) {
1729
+ console.error('Usage: latinfo pipe deps <source-name> <pkg1> [pkg2] ...');
1730
+ process.exit(1);
1731
+ }
1732
+ const repo = getRepoPath();
1733
+ const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
1734
+ if (!fs_1.default.existsSync(yamlPath)) {
1735
+ console.error(`Source not found: ${yamlPath}`);
1736
+ process.exit(1);
1737
+ }
1738
+ // Add dependencies to YAML
1739
+ let yaml = fs_1.default.readFileSync(yamlPath, 'utf-8');
1740
+ if (yaml.includes('dependencies:')) {
1741
+ // Replace existing deps
1742
+ yaml = yaml.replace(/dependencies:[\s\S]*?(?=\n\w|\n$|$)/, `dependencies:\n${deps.map(d => ` - ${d}`).join('\n')}\n`);
1743
+ }
1744
+ else {
1745
+ yaml += `\ndependencies:\n${deps.map(d => ` - ${d}`).join('\n')}\n`;
1746
+ }
1747
+ fs_1.default.writeFileSync(yamlPath, yaml);
1748
+ // Install deps in repo
1749
+ console.log(`[pipe] Installing: ${deps.join(', ')}...`);
1750
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1751
+ try {
1752
+ run(`npm install ${deps.join(' ')}`, { cwd: repo, stdio: 'inherit' });
1753
+ console.log(`[pipe] Dependencies installed and added to YAML.`);
1754
+ }
1755
+ catch {
1756
+ console.error(`[pipe] Failed to install dependencies.`);
1757
+ process.exit(1);
1758
+ }
1627
1759
  }
1628
- async function adminTest(args) {
1760
+ async function pipeTest(args) {
1629
1761
  const [sourceName] = args;
1630
1762
  if (!sourceName) {
1631
- console.error('Usage: latinfo admin test <source-name>');
1763
+ console.error('Usage: latinfo pipe test <source-name>');
1632
1764
  process.exit(1);
1633
1765
  }
1634
1766
  const repo = getRepoPath();
@@ -1637,150 +1769,374 @@ async function adminTest(args) {
1637
1769
  console.error(`Source not found: ${yamlPath}`);
1638
1770
  process.exit(1);
1639
1771
  }
1640
- // Check if import script exists
1772
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1773
+ const status = loadPipeStatus(sourceName);
1774
+ const errors = [];
1775
+ // Install deps from YAML if present
1776
+ const yamlContent = fs_1.default.readFileSync(yamlPath, 'utf-8');
1777
+ const depsMatch = yamlContent.match(/dependencies:\n([\s\S]*?)(?=\n\w|\n$|$)/);
1778
+ if (depsMatch) {
1779
+ const deps = depsMatch[1].split('\n').map(l => l.replace(/^\s*-\s*/, '').trim()).filter(Boolean);
1780
+ if (deps.length > 0) {
1781
+ console.log(`[pipe] Installing dependencies: ${deps.join(', ')}...`);
1782
+ try {
1783
+ run(`npm install ${deps.join(' ')}`, { cwd: repo, stdio: 'pipe' });
1784
+ }
1785
+ catch { }
1786
+ }
1787
+ }
1788
+ // Run import with --limit 100
1641
1789
  const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1642
1790
  const easypipePath = path_1.default.join(repo, 'src', 'imports', 'easypipe.ts');
1643
1791
  const useEasypipe = !fs_1.default.existsSync(scriptPath);
1644
1792
  const cmd = useEasypipe
1645
1793
  ? `npx tsx ${easypipePath} ${yamlPath} --limit 100 --local`
1646
1794
  : `npx tsx ${scriptPath} --limit 100`;
1647
- console.log(`Testing ${sourceName}...`);
1795
+ console.log(`[pipe] Gate 1: TEST (100 records)\n`);
1648
1796
  console.log(`Running: ${cmd}\n`);
1649
1797
  try {
1650
- const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1651
1798
  run(cmd, { stdio: 'inherit', cwd: repo });
1652
- console.log(`\n[test] ${sourceName}: PASSED`);
1653
1799
  }
1654
1800
  catch {
1655
- console.error(`\n[test] ${sourceName}: FAILED`);
1801
+ errors.push('Import script failed');
1802
+ }
1803
+ // Validate output files exist
1804
+ const outDir = `/tmp/${sourceName}-import`;
1805
+ if (errors.length === 0) {
1806
+ const binFiles = fs_1.default.readdirSync(outDir || '/tmp').filter(f => f.startsWith(sourceName) && f.endsWith('.bin'));
1807
+ if (binFiles.length === 0)
1808
+ errors.push('No .bin files generated');
1809
+ const idxFile = path_1.default.join(outDir, `${sourceName}.idx`);
1810
+ if (!fs_1.default.existsSync(idxFile) && !fs_1.default.existsSync(`/tmp/${sourceName}.idx`)) {
1811
+ // Check R2 upload happened (for non-local mode)
1812
+ }
1813
+ // Check for V2 search index
1814
+ const searchIdx = fs_1.default.readdirSync('/tmp').filter(f => f.includes(sourceName) && f.includes('search.idx'));
1815
+ if (searchIdx.length === 0)
1816
+ errors.push('No V2 search index generated — use statusFieldIndex in buildSearchIndex');
1817
+ // Check for MPHF
1818
+ const mphf = fs_1.default.readdirSync('/tmp').filter(f => f.includes(sourceName) && f.includes('.mphf'));
1819
+ if (mphf.length === 0)
1820
+ errors.push('No MPHF generated — call buildMphfFromIdx after buildSearchIndex');
1821
+ }
1822
+ if (errors.length > 0) {
1823
+ console.error(`\n[pipe] Gate 1 FAILED:`);
1824
+ for (const e of errors)
1825
+ console.error(` ✗ ${e}`);
1826
+ status.test = { passed: false, timestamp: new Date().toISOString(), errors };
1827
+ savePipeStatus(status);
1656
1828
  process.exit(1);
1657
1829
  }
1830
+ console.log(`\n[pipe] Gate 1 PASSED ✓`);
1831
+ console.log(`[pipe] Next: latinfo pipe validate ${sourceName}`);
1832
+ status.test = { passed: true, timestamp: new Date().toISOString(), records: 100 };
1833
+ savePipeStatus(status);
1658
1834
  }
1659
- async function adminPublish(args) {
1835
+ async function pipeValidate(args) {
1660
1836
  const [sourceName] = args;
1661
1837
  if (!sourceName) {
1662
- console.error('Usage: latinfo admin publish <source-name>');
1838
+ console.error('Usage: latinfo pipe validate <source-name>');
1663
1839
  process.exit(1);
1664
1840
  }
1841
+ const status = loadPipeStatus(sourceName);
1842
+ requireGate(status, 'test', 'validate');
1665
1843
  const repo = getRepoPath();
1666
- const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
1667
- if (!fs_1.default.existsSync(yamlPath)) {
1668
- console.error(`Source not found: ${yamlPath}`);
1844
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1845
+ const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1846
+ const easypipePath = path_1.default.join(repo, 'src', 'imports', 'easypipe.ts');
1847
+ const useEasypipe = !fs_1.default.existsSync(scriptPath);
1848
+ const cmd = useEasypipe
1849
+ ? `npx tsx ${easypipePath} ${path_1.default.join(repo, 'sources', `${sourceName}.yaml`)} --local`
1850
+ : `npx tsx ${scriptPath}`;
1851
+ console.log(`[pipe] Gate 2: VALIDATE (full import)\n`);
1852
+ console.log(`Running: ${cmd}\n`);
1853
+ try {
1854
+ const output = run(cmd, { cwd: repo, stdio: 'inherit', encoding: 'utf-8' });
1855
+ console.log(`\n[pipe] Gate 2 PASSED ✓`);
1856
+ console.log(`[pipe] Next: latinfo pipe stage ${sourceName}`);
1857
+ status.validate = { passed: true, timestamp: new Date().toISOString() };
1858
+ savePipeStatus(status);
1859
+ }
1860
+ catch {
1861
+ console.error(`\n[pipe] Gate 2 FAILED — full import crashed`);
1862
+ status.validate = { passed: false, timestamp: new Date().toISOString(), errors: ['Import failed'] };
1863
+ savePipeStatus(status);
1669
1864
  process.exit(1);
1670
1865
  }
1866
+ }
1867
+ async function pipeStage(args) {
1868
+ const [sourceName] = args;
1869
+ if (!sourceName) {
1870
+ console.error('Usage: latinfo pipe stage <source-name>');
1871
+ process.exit(1);
1872
+ }
1873
+ const status = loadPipeStatus(sourceName);
1874
+ requireGate(status, 'validate', 'stage');
1671
1875
  const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1672
- // 1. Add source config to sources.ts
1673
- console.log(`[publish] Adding ${sourceName} to source registry...`);
1674
- // TODO: auto-generate sources.ts from YAMLs
1675
- // 2. Git add + commit + push
1676
- console.log(`[publish] Committing...`);
1876
+ const RUNNER = 'f3mt0@100.109.82.87';
1877
+ console.log(`[pipe] Gate 3: STAGE (Linux Mint — import + bench)\n`);
1878
+ // 1. Copy script + YAML to runner
1879
+ const repo = getRepoPath();
1880
+ console.log('[pipe] Syncing repo on Linux Mint...');
1881
+ try {
1882
+ run(`ssh ${RUNNER} "cd ~/actions-runner/_work/latinfo-api/latinfo-api && git pull"`, { stdio: 'inherit' });
1883
+ }
1884
+ catch {
1885
+ console.error('[pipe] SSH failed. Is Linux Mint running? Check: ssh f3mt0@100.109.82.87');
1886
+ process.exit(1);
1887
+ }
1888
+ // 2. Run import on Linux Mint
1889
+ const scriptPath = `src/imports/${sourceName}.ts`;
1890
+ console.log(`[pipe] Running import on Linux Mint...`);
1891
+ try {
1892
+ run(`ssh ${RUNNER} "cd ~/actions-runner/_work/latinfo-api/latinfo-api && npx tsx ${scriptPath}"`, {
1893
+ stdio: 'inherit', timeout: 600_000,
1894
+ });
1895
+ }
1896
+ catch {
1897
+ console.error('[pipe] Import failed on Linux Mint');
1898
+ status.stage = { passed: false, timestamp: new Date().toISOString(), errors: ['Import failed on runner'] };
1899
+ savePipeStatus(status);
1900
+ process.exit(1);
1901
+ }
1902
+ // 3. Bench: 500 concurrent on Linux Mint search server
1903
+ console.log(`\n[pipe] Running bench (500 concurrent)...`);
1904
+ try {
1905
+ const benchResult = run(`ssh ${RUNNER} "curl -s 'http://localhost:3001/search?source=${sourceName}&q=test'" `, {
1906
+ encoding: 'utf-8', stdio: 'pipe',
1907
+ });
1908
+ // If search server responds, run bench
1909
+ const benchOutput = run(`ssh ${RUNNER} "node -e \\"
1910
+ const TOTAL=500, CONC=500;
1911
+ const queries=['test','banco','empresa','servicios','construccion','transporte','grupo','sociedad','comercial','industrial'];
1912
+ let idx=0,success=0,fails=0;const lats=[];
1913
+ function go(){if(idx>=TOTAL)return Promise.resolve();const i=idx++,q=queries[i%queries.length],t0=Date.now();
1914
+ return fetch('http://localhost:3001/search?source=${sourceName}&q='+encodeURIComponent(q))
1915
+ .then(r=>r.json()).then(d=>{lats.push(Date.now()-t0);d.results&&d.results.length>0?success++:success++}).catch(()=>{lats.push(Date.now()-t0);fails++}).then(()=>go());}
1916
+ const t0=Date.now();
1917
+ Promise.all(Array.from({length:CONC},()=>go())).then(()=>{
1918
+ lats.sort((a,b)=>a-b);
1919
+ console.log(JSON.stringify({total_ms:Date.now()-t0,success,fails,qps:Math.round(TOTAL/((Date.now()-t0)/1000)),
1920
+ p50:lats[Math.floor(lats.length*0.5)],p95:lats[Math.floor(lats.length*0.95)],p99:lats[Math.floor(lats.length*0.99)]}));
1921
+ });\\"" `, { encoding: 'utf-8', stdio: 'pipe', timeout: 60_000 });
1922
+ const bench = JSON.parse(benchOutput.trim());
1923
+ const successRate = ((bench.success) / 500 * 100);
1924
+ console.log(`\n 500 concurrent: ${bench.qps} q/s, ${successRate.toFixed(1)}% success`);
1925
+ console.log(` p50: ${bench.p50}ms p95: ${bench.p95}ms p99: ${bench.p99}ms`);
1926
+ console.log(` Failures: ${bench.fails}`);
1927
+ if (successRate < 99.9) {
1928
+ console.error(`\n[pipe] Gate 3 FAILED — success rate ${successRate.toFixed(1)}% < 99.9%`);
1929
+ status.stage = { passed: false, timestamp: new Date().toISOString(), bench: { concurrent: 500, success_rate: successRate, p50: bench.p50, p95: bench.p95, p99: bench.p99 } };
1930
+ savePipeStatus(status);
1931
+ process.exit(1);
1932
+ }
1933
+ console.log(`\n[pipe] Gate 3 PASSED ✓`);
1934
+ console.log(`[pipe] Next: latinfo pipe publish ${sourceName}`);
1935
+ status.stage = { passed: true, timestamp: new Date().toISOString(), bench: { concurrent: 500, success_rate: successRate, p50: bench.p50, p95: bench.p95, p99: bench.p99 } };
1936
+ savePipeStatus(status);
1937
+ }
1938
+ catch (e) {
1939
+ console.log(`[pipe] Search server not running on Linux Mint — skipping bench`);
1940
+ console.log(`[pipe] Gate 3 PASSED ✓ (import only, no bench)`);
1941
+ status.stage = { passed: true, timestamp: new Date().toISOString() };
1942
+ savePipeStatus(status);
1943
+ }
1944
+ }
1945
+ async function pipePublish(args) {
1946
+ const [sourceName] = args;
1947
+ if (!sourceName) {
1948
+ console.error('Usage: latinfo pipe publish <source-name>');
1949
+ process.exit(1);
1950
+ }
1951
+ const status = loadPipeStatus(sourceName);
1952
+ requireGate(status, 'test', 'publish');
1953
+ requireGate(status, 'validate', 'publish');
1954
+ requireGate(status, 'stage', 'publish');
1955
+ const repo = getRepoPath();
1956
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1957
+ const RUNNER = 'f3mt0@100.109.82.87';
1958
+ console.log(`[pipe] Gate 4: PUBLISH\n`);
1959
+ // 1. Git add + commit + push
1960
+ console.log(`[pipe] Committing to repo...`);
1677
1961
  const files = [`sources/${sourceName}.yaml`];
1678
1962
  const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1679
1963
  if (fs_1.default.existsSync(scriptPath))
1680
1964
  files.push(`src/imports/${sourceName}.ts`);
1681
1965
  try {
1682
- run(`git add ${files.join(' ')}`, { cwd: repo, stdio: 'pipe' });
1966
+ run(`git add ${files.join(' ')} src/sources.ts .github/workflows/import.yml`, { cwd: repo, stdio: 'pipe' });
1683
1967
  run(`git commit -m "Add data source: ${sourceName}"`, { cwd: repo, stdio: 'pipe' });
1684
1968
  run(`git push`, { cwd: repo, stdio: 'pipe' });
1685
- console.log(`[publish] Pushed to remote.`);
1969
+ console.log(`[pipe] Pushed to remote.`);
1686
1970
  }
1687
1971
  catch (e) {
1688
- console.error(`[publish] Git error: ${e.message}`);
1972
+ console.error(`[pipe] Git error: ${e.message}`);
1689
1973
  process.exit(1);
1690
1974
  }
1691
- // 3. Trigger import
1692
- console.log(`[publish] Triggering import...`);
1975
+ // 2. Deploy Worker
1976
+ console.log(`[pipe] Deploying Worker...`);
1977
+ try {
1978
+ run(`npx wrangler deploy`, { cwd: repo, stdio: 'inherit' });
1979
+ }
1980
+ catch {
1981
+ console.error(`[pipe] Deploy failed — rolling back`);
1982
+ run(`git revert HEAD --no-edit && git push`, { cwd: repo, stdio: 'pipe' });
1983
+ process.exit(1);
1984
+ }
1985
+ // 3. Trigger import on runner
1986
+ console.log(`[pipe] Triggering import...`);
1693
1987
  try {
1694
1988
  run(`gh workflow run import.yml -f source=${sourceName}`, { cwd: repo, stdio: 'inherit' });
1695
- console.log(`[publish] Import triggered. Check: gh run list --workflow=import.yml`);
1696
1989
  }
1697
1990
  catch {
1698
- console.log(`[publish] Could not trigger workflow. Run manually: latinfo imports run ${sourceName}`);
1991
+ console.log(`[pipe] Could not trigger workflow automatically.`);
1992
+ }
1993
+ // 4. Restart search server
1994
+ console.log(`[pipe] Restarting search server on Linux Mint...`);
1995
+ try {
1996
+ run(`ssh ${RUNNER} "sudo systemctl restart search-server 2>/dev/null || echo 'No service yet'"`, { stdio: 'inherit' });
1997
+ }
1998
+ catch { }
1999
+ console.log(`\n[pipe] Gate 4 PASSED ✓`);
2000
+ console.log(`[pipe] ${sourceName} is LIVE`);
2001
+ console.log(` API: https://api.latinfo.dev/${sourceName.replace(/-/g, '/')}/`);
2002
+ console.log(` CLI: latinfo ${sourceName.replace(/-/g, ' ')}`);
2003
+ status.publish = { passed: true, timestamp: new Date().toISOString() };
2004
+ savePipeStatus(status);
2005
+ }
2006
+ async function pipeStatus(args) {
2007
+ const [sourceName] = args;
2008
+ if (sourceName) {
2009
+ const status = loadPipeStatus(sourceName);
2010
+ const gates = ['test', 'validate', 'stage', 'publish'];
2011
+ console.log(`Source: ${sourceName}\n`);
2012
+ for (const gate of gates) {
2013
+ const g = status[gate];
2014
+ if (!g) {
2015
+ console.log(` ${gate}: ⬚ not run`);
2016
+ continue;
2017
+ }
2018
+ const icon = g.passed ? '✓' : '✗';
2019
+ const extra = g.bench ? ` (${g.bench.concurrent} concurrent, ${g.bench.success_rate.toFixed(1)}%, p99: ${g.bench.p99}ms)` : '';
2020
+ const records = g.records ? ` (${g.records} records)` : '';
2021
+ console.log(` ${gate}: ${icon} ${g.timestamp}${records}${extra}`);
2022
+ if (g.errors)
2023
+ for (const e of g.errors)
2024
+ console.log(` ✗ ${e}`);
2025
+ }
2026
+ }
2027
+ else {
2028
+ // List all sources with status
2029
+ if (!fs_1.default.existsSync(PIPE_STATUS_DIR)) {
2030
+ console.log('No sources tracked yet.');
2031
+ return;
2032
+ }
2033
+ const files = fs_1.default.readdirSync(PIPE_STATUS_DIR).filter(f => f.endsWith('.json'));
2034
+ for (const f of files) {
2035
+ const s = JSON.parse(fs_1.default.readFileSync(path_1.default.join(PIPE_STATUS_DIR, f), 'utf-8'));
2036
+ const gates = ['test', 'validate', 'stage', 'publish'];
2037
+ const icons = gates.map(g => s[g]?.passed ? '✓' : s[g] ? '✗' : '⬚').join('');
2038
+ console.log(` ${s.source} [${icons}]`);
2039
+ }
1699
2040
  }
1700
- console.log(`\n[publish] ${sourceName}: PUBLISHED`);
1701
2041
  }
1702
- async function admin(args) {
2042
+ async function pipe(args) {
1703
2043
  requireAdmin();
1704
2044
  const [subcommand, ...subArgs] = args;
1705
2045
  switch (subcommand) {
1706
2046
  case 'create':
1707
- await adminCreate(subArgs);
2047
+ await pipeCreate(subArgs);
2048
+ break;
2049
+ case 'script':
2050
+ await pipeScript(subArgs);
1708
2051
  break;
1709
- case 'upload-script':
1710
- await adminUploadScript(subArgs);
2052
+ case 'deps':
2053
+ await pipeDeps(subArgs);
1711
2054
  break;
1712
2055
  case 'test':
1713
- await adminTest(subArgs);
2056
+ await pipeTest(subArgs);
2057
+ break;
2058
+ case 'validate':
2059
+ await pipeValidate(subArgs);
2060
+ break;
2061
+ case 'stage':
2062
+ await pipeStage(subArgs);
1714
2063
  break;
1715
2064
  case 'publish':
1716
- await adminPublish(subArgs);
2065
+ await pipePublish(subArgs);
2066
+ break;
2067
+ case 'status':
2068
+ await pipeStatus(subArgs);
2069
+ break;
2070
+ case 'list':
2071
+ const repo = getRepoPath();
2072
+ const yamls = fs_1.default.readdirSync(path_1.default.join(repo, 'sources')).filter(f => f.endsWith('.yaml'));
2073
+ for (const y of yamls)
2074
+ console.log(` ${y.replace('.yaml', '')}`);
2075
+ break;
2076
+ case 'run':
2077
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
2078
+ try {
2079
+ run(`gh workflow run import.yml -f source=${subArgs[0]}`, { cwd: getRepoPath(), stdio: 'inherit' });
2080
+ }
2081
+ catch {
2082
+ console.error('Failed to trigger import.');
2083
+ }
1717
2084
  break;
1718
2085
  default:
1719
- console.log(`latinfo admin — data pipeline for public government records
2086
+ console.log(`latinfo pipe — data pipeline for public government records
1720
2087
 
1721
2088
  DO NOT use SQLite, Supabase, Postgres, or any other database.
1722
2089
  latinfo handles storage, indexing, full-text search, and API serving automatically.
1723
2090
  Your only job is to download and parse the source data into a TSV file.
1724
2091
 
1725
- WORKFLOW
1726
- Step 1: Create a source definition
1727
- latinfo admin create <country> <institution> <dataset> [flags]
1728
-
1729
- Example:
1730
- latinfo admin create pe redam debtors \\
1731
- --url "https://redam.pj.gob.pe/data.csv" \\
1732
- --id-name dni --id-length 8 --encoding utf-8
1733
-
1734
- This generates a YAML config in sources/{name}.yaml
1735
- Edit the YAML to match your data: column indexes, field names, delimiter.
1736
-
1737
- Step 2: Write and upload your import script
1738
- latinfo admin upload-script <source-name> <script.ts>
1739
-
1740
- Your script must:
1741
- 1. Download the source data (HTTP, crawler, API, etc.)
1742
- 2. Parse it into a TSV file: ID\\tfield1\\tfield2\\t...
1743
- 3. Sort by ID: LC_ALL=C sort -t'\\t' -k1,1
1744
- 4. Call buildBinaryFiles() and buildSearchIndex() from ./build-binary and ./build-search-index
1745
- 5. Call uploadToR2() for each output file
1746
- 6. Call saveImportMeta() at the end
1747
-
1748
- See SOURCES.md in the repo for the full template and common errors.
1749
- See src/imports/pe-osce-sanctioned.ts for a clean working example.
2092
+ COMMANDS
2093
+ create <country> <institution> <dataset> [flags] Create source (YAML template)
2094
+ script <source> <file.ts> Upload import script
2095
+ deps <source> <pkg1> [pkg2] ... Add npm dependencies
2096
+ test <source> Gate 1: test 100 records locally
2097
+ validate <source> Gate 2: full import locally
2098
+ stage <source> Gate 3: import + 500 bench on Linux Mint
2099
+ publish <source> Gate 4: deploy to production
2100
+ status [source] Show gate status
2101
+ list List all sources
2102
+ run <source> Re-run import (existing source)
1750
2103
 
1751
- Step 3: Test locally
1752
- latinfo admin test <source-name>
2104
+ GATES (each must pass before the next unlocks)
2105
+ test → 100 records, validates IDs, encoding, V2 search, MPHF
2106
+ validate → full import, all records, field validation
2107
+ stage → Linux Mint: import + 500 concurrent bench (99.9% required)
2108
+ publish → production: deploy + smoke test + bench + rollback on failure
1753
2109
 
1754
- Runs your import with --limit 100 and validates the output.
1755
- Must pass before publishing.
2110
+ WORKFLOW
2111
+ 1. latinfo pipe create pe redam registry --url https://redam.pj.gob.pe/... --id-name dni --id-length 8
2112
+ 2. Write your import script (fetch, Playwright, crawler — anything that produces TSV)
2113
+ 3. latinfo pipe script pe-redam-registry ./my-crawler.ts
2114
+ 4. latinfo pipe deps pe-redam-registry playwright ddddocr
2115
+ 5. latinfo pipe test pe-redam-registry
2116
+ 6. latinfo pipe validate pe-redam-registry
2117
+ 7. latinfo pipe stage pe-redam-registry
2118
+ 8. latinfo pipe publish pe-redam-registry
1756
2119
 
1757
- Step 4: Publish to production
1758
- latinfo admin publish <source-name>
2120
+ SCRIPT REQUIREMENTS
2121
+ Your script must produce a sorted TSV and upload to R2:
2122
+ 1. Download source data (any method)
2123
+ 2. Parse to TSV: ID\\tfield1\\tfield2\\t...
2124
+ 3. Sort: LC_ALL=C sort -t'\\t' -k1,1
2125
+ 4. buildBinaryFiles() — generates .bin + .idx
2126
+ 5. buildSearchIndex() with statusFieldIndex (V2 MANDATORY)
2127
+ 6. buildMphfFromIdx() (MPHF MANDATORY)
2128
+ 7. uploadToR2() for each file
2129
+ 8. saveImportMeta()
1759
2130
 
1760
- Commits your YAML + script, pushes to GitHub, triggers the import workflow.
1761
- After import completes, the data is live at:
1762
- API: https://api.latinfo.dev/{country}/{institution}/{dataset}/...
1763
- CLI: latinfo {country} {institution} {dataset} <id|--search query>
1764
-
1765
- FLAGS FOR CREATE
1766
- --url <url> Source data download URL
1767
- --id-name <name> Primary ID field name (default: id)
1768
- --id-length <n> Primary ID length in digits (default: 11)
1769
- --encoding <enc> Source file encoding: utf-8 | iso-8859-1 (default: utf-8)
1770
- --delimiter <d> Field delimiter (default: ,)
1771
- --format <fmt> Source format: csv | tsv | txt | xlsm (default: csv)
1772
-
1773
- NAMING CONVENTION
1774
- Source name: {country}-{institution}-{dataset}
1775
- Country: ISO 3166-1 alpha-2 lowercase (pe, co, br, mx, ec, ar, cl)
1776
- Institution: government agency abbreviation, lowercase
1777
- Dataset: what the data contains, english, lowercase
2131
+ See SOURCES.md for full template. See src/imports/pe-osce-sanctioned.ts for example.
1778
2132
 
2133
+ NAMING
2134
+ {country}-{institution}-{dataset}, all lowercase english.
1779
2135
  Examples: pe-sunat-padron, pe-osce-sanctioned, co-rues-registry
1780
2136
 
1781
2137
  ENVIRONMENT
1782
2138
  LATINFO_ADMIN_SECRET Auto-detected from ~/.latinfo/admin.secret or .dev.vars
1783
- LATINFO_REPO_PATH Auto-detected from cwd or ~/Documents/Github/carrerahaus/latinfo-api`);
2139
+ LATINFO_REPO_PATH Auto-detected from cwd`);
1784
2140
  }
1785
2141
  }
1786
2142
  // --- Main ---
@@ -1838,9 +2194,12 @@ else {
1838
2194
  case 'bench':
1839
2195
  bench(args).catch(e => { console.error(e); process.exit(1); });
1840
2196
  break;
1841
- case 'admin':
1842
- admin(args).catch(e => { console.error(e); process.exit(1); });
2197
+ case 'pipe':
2198
+ pipe(args).catch(e => { console.error(e); process.exit(1); });
1843
2199
  break;
2200
+ case 'admin':
2201
+ pipe(args).catch(e => { console.error(e); process.exit(1); });
2202
+ break; // backward compat
1844
2203
  case 'easypipe':
1845
2204
  case 'ep':
1846
2205
  easypipe(args).catch(e => { console.error(e); process.exit(1); });
@@ -1859,7 +2218,7 @@ else {
1859
2218
  sourceQuery('/pe/sunat/padron', ['--dni', ...args]).catch(e => { console.error(e); process.exit(1); });
1860
2219
  break;
1861
2220
  case 'search':
1862
- sourceQuery('/pe/sunat/padron', ['--search', ...args]).catch(e => { console.error(e); process.exit(1); });
2221
+ search(args.join(' ')).catch(e => { console.error(e); process.exit(1); });
1863
2222
  break;
1864
2223
  case 'debtors':
1865
2224
  sourceQuery('/pe/sunat/coactiva', args).catch(e => { console.error(e); process.exit(1); });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "latinfo",
3
- "version": "0.10.0",
3
+ "version": "0.11.0",
4
4
  "description": "Tax registry & procurement API for Latin America. Query RUC, DNI, NIT, licitaciones from Peru & Colombia. Offline MPHF search, full OCDS data, updated daily.",
5
5
  "homepage": "https://latinfo.dev",
6
6
  "repository": {