latinfo 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +652 -111
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -47,7 +47,7 @@ const local_search_1 = require("./local-search");
47
47
  const client_search_1 = require("./client-search");
48
48
  const odis_search_1 = require("./odis-search");
49
49
  const mphf_search_1 = require("./mphf-search");
50
- const VERSION = '0.10.0';
50
+ const VERSION = '0.11.0';
51
51
  const API_URL = process.env.LATINFO_API_URL || 'https://api.latinfo.dev';
52
52
  const GITHUB_CLIENT_ID = process.env.GITHUB_CLIENT_ID || 'Ov23li5fcQaiCsVtaMKK';
53
53
  const CONFIG_DIR = path_1.default.join(os_1.default.homedir(), '.latinfo');
@@ -232,7 +232,7 @@ async function ruc(rucNumber) {
232
232
  }
233
233
  return;
234
234
  }
235
- const res = await apiRequest(config, `/pe/ruc/${rucNumber}`);
235
+ const res = await apiRequest(config, `/pe/sunat/padron/ruc/${rucNumber}`);
236
236
  const data = await res.json();
237
237
  if (jsonFlag) {
238
238
  console.log(JSON.stringify(data));
@@ -761,7 +761,7 @@ const BENCH_SAMPLES = {
761
761
  'peru', 'lima', 'consultora', 'transporte', 'holding',
762
762
  'desarrollos', 'ingenieria', 'tecnologia', 'salud', 'educacion',
763
763
  ],
764
- 'pe/licitaciones': [
764
+ 'pe/oece/tenders': [
765
765
  'servicio', 'construccion', 'suministro', 'consultoria', 'mantenimiento',
766
766
  'obra', 'adquisicion', 'sistema', 'equipos', 'vehiculos',
767
767
  'alimentos', 'seguridad', 'limpieza', 'transporte', 'software',
@@ -790,11 +790,11 @@ async function benchStress(args) {
790
790
  { name: 'cool', vus: 10, duration: Math.floor(durationSec * 0.08) },
791
791
  ];
792
792
  const endpoints = [
793
- ...BENCH_SAMPLES['pe/ruc'].map(s => ({ url: `${API_URL}/pe/ruc/${s}`, type: 'ruc' })),
794
- ...BENCH_SAMPLES['pe/search'].map(s => ({ url: `${API_URL}/pe/search?q=${encodeURIComponent(s)}`, type: 'search' })),
795
- ...BENCH_SAMPLES['pe/licitaciones'].slice(0, 10).map(s => ({ url: `${API_URL}/pe/licitaciones?q=${encodeURIComponent(s)}&limit=5`, type: 'licitaciones' })),
796
- ...BENCH_SAMPLES['co/nit'].slice(0, 10).map(s => ({ url: `${API_URL}/co/nit/${s}`, type: 'co/nit' })),
797
- ...BENCH_SAMPLES['co/search'].slice(0, 10).map(s => ({ url: `${API_URL}/co/search?q=${encodeURIComponent(s)}`, type: 'co/search' })),
793
+ ...BENCH_SAMPLES['pe/ruc'].map(s => ({ url: `${API_URL}/pe/sunat/padron/ruc/${s}`, type: 'ruc' })),
794
+ ...BENCH_SAMPLES['pe/search'].map(s => ({ url: `${API_URL}/pe/sunat/padron/search?q=${encodeURIComponent(s)}`, type: 'search' })),
795
+ ...BENCH_SAMPLES['pe/oece/tenders'].slice(0, 10).map(s => ({ url: `${API_URL}/pe/oece/tenders?q=${encodeURIComponent(s)}&limit=5`, type: 'tenders' })),
796
+ ...BENCH_SAMPLES['co/nit'].slice(0, 10).map(s => ({ url: `${API_URL}/co/rues/registry/nit/${s}`, type: 'co/nit' })),
797
+ ...BENCH_SAMPLES['co/search'].slice(0, 10).map(s => ({ url: `${API_URL}/co/rues/registry/search?q=${encodeURIComponent(s)}`, type: 'co/search' })),
798
798
  ];
799
799
  const headers = { Authorization: `Bearer ${config.api_key}` };
800
800
  const results = [];
@@ -1008,12 +1008,18 @@ async function bench(args) {
1008
1008
  console.error(`Supported: ${Object.keys(BENCH_SAMPLES).map(k => '--country ' + k.replace('/', ' --type ')).join(', ')}`);
1009
1009
  process.exit(1);
1010
1010
  }
1011
+ const ROUTE_MAP = {
1012
+ 'pe/ruc': '/pe/sunat/padron/ruc',
1013
+ 'pe/search': '/pe/sunat/padron/search',
1014
+ 'pe/oece/tenders': '/pe/oece/tenders',
1015
+ 'co/nit': '/co/rues/registry/nit',
1016
+ 'co/search': '/co/rues/registry/search',
1017
+ };
1011
1018
  const getUrl = (sample) => {
1012
- if (type === 'search')
1013
- return `${API_URL}/${country}/search?q=${encodeURIComponent(sample)}`;
1014
- if (type === 'licitaciones')
1015
- return `${API_URL}/pe/licitaciones?q=${encodeURIComponent(sample)}&limit=5`;
1016
- return `${API_URL}/${country}/${type}/${sample}`;
1019
+ const route = ROUTE_MAP[key];
1020
+ if (type === 'search' || type === 'oece/tenders')
1021
+ return `${API_URL}${route}?q=${encodeURIComponent(sample)}&limit=5`;
1022
+ return `${API_URL}${route}/${sample}`;
1017
1023
  };
1018
1024
  const tasks = Array.from({ length: count }, (_, i) => samples[i % samples.length]);
1019
1025
  if (!jsonFlag)
@@ -1114,7 +1120,7 @@ async function licitaciones(args) {
1114
1120
  // Subcommand: info
1115
1121
  if (args[0] === 'info') {
1116
1122
  const config = requireAuth();
1117
- const res = await apiRequest(config, '/pe/licitaciones/info');
1123
+ const res = await apiRequest(config, '/pe/oece/tenders/info');
1118
1124
  const info = await res.json();
1119
1125
  if (jsonFlag) {
1120
1126
  console.log(JSON.stringify(info));
@@ -1175,7 +1181,7 @@ DATA
1175
1181
  params.set('status', opts.status);
1176
1182
  if (opts.limit !== undefined)
1177
1183
  params.set('limit', String(opts.limit));
1178
- const res = await apiRequest(config, `/pe/licitaciones?${params}`);
1184
+ const res = await apiRequest(config, `/pe/oece/tenders?${params}`);
1179
1185
  const results = await res.json();
1180
1186
  if (jsonFlag) {
1181
1187
  console.log(JSON.stringify(results));
@@ -1539,10 +1545,86 @@ function requireAdmin() {
1539
1545
  console.error('Admin access not found. Create ~/.latinfo/admin.secret or set LATINFO_ADMIN_SECRET.');
1540
1546
  process.exit(1);
1541
1547
  }
1542
- async function adminCreate(args) {
1543
- const [country, institution, dataset, ...flags] = args;
1548
+ // --- Pipe: gate status tracking ---
1549
+ const PIPE_STATUS_DIR = path_1.default.join(CONFIG_DIR, 'pipe-status');
1550
+ function loadPipeStatus(source) {
1551
+ const file = path_1.default.join(PIPE_STATUS_DIR, `${source}.json`);
1552
+ try {
1553
+ return JSON.parse(fs_1.default.readFileSync(file, 'utf-8'));
1554
+ }
1555
+ catch {
1556
+ return { source };
1557
+ }
1558
+ }
1559
+ function savePipeStatus(status) {
1560
+ fs_1.default.mkdirSync(PIPE_STATUS_DIR, { recursive: true });
1561
+ fs_1.default.writeFileSync(path_1.default.join(PIPE_STATUS_DIR, `${status.source}.json`), JSON.stringify(status, null, 2));
1562
+ }
1563
+ function requireGate(status, gate, forGate) {
1564
+ if (!status[gate]?.passed) {
1565
+ console.error(`[pipe] Gate "${gate}" has not passed. Run: latinfo pipe ${gate} ${status.source}`);
1566
+ console.error(`[pipe] Cannot proceed to "${forGate}" until "${gate}" passes.`);
1567
+ process.exit(1);
1568
+ }
1569
+ }
1570
+ async function pipeCreate(args) {
1571
+ // Separate positional args from flags
1572
+ const positional = [];
1573
+ const flags = [];
1574
+ for (let i = 0; i < args.length; i++) {
1575
+ if (args[i].startsWith('--')) {
1576
+ flags.push(args[i], args[i + 1] || '');
1577
+ i++; // skip flag value
1578
+ }
1579
+ else {
1580
+ positional.push(args[i]);
1581
+ }
1582
+ }
1583
+ const [country, institution, dataset] = positional;
1544
1584
  if (!country || !institution || !dataset) {
1545
- console.error('Usage: latinfo admin create <country> <institution> <dataset> [--url URL] [--id-name ruc] [--id-length 11] [--encoding utf-8] [--delimiter ","]');
1585
+ console.error(`Error: exactly 3 positional arguments required: <country> <institution> <dataset>
1586
+
1587
+ NAMING RULES
1588
+ Source name = {country}-{institution}-{dataset}
1589
+ All lowercase, hyphens only, english.
1590
+
1591
+ country: ISO 3166-1 alpha-2 (pe, co, br, mx, ec, ar, cl)
1592
+ institution: government agency abbreviation (sunat, osce, oece, rues, redam, sat, indecopi)
1593
+ dataset: what the data contains (padron, coactiva, sanctioned, fines, tenders, registry)
1594
+
1595
+ EXAMPLES
1596
+ latinfo pipe create pe sunat padron --url https://sunat.gob.pe/data.zip
1597
+ latinfo pipe create pe redam registry --url https://redam.pj.gob.pe/data --id-name dni --id-length 8
1598
+ latinfo pipe create co rues registry --url https://datos.gov.co/data.csv --id-name nit --id-length 10
1599
+
1600
+ WRONG
1601
+ latinfo pipe create pe redam ← missing dataset (3rd argument)
1602
+ latinfo pipe create pe-redam-registry ← don't use hyphens, use spaces
1603
+ latinfo pipe create pe redam deudores ← use english: "debtors" not "deudores"
1604
+
1605
+ FLAGS
1606
+ --url <url> Source data download URL
1607
+ --id-name <name> Primary ID field name (default: id)
1608
+ --id-length <n> Primary ID length in digits (default: 11)
1609
+ --encoding <enc> utf-8 | iso-8859-1 (default: utf-8)
1610
+ --delimiter <d> Field delimiter (default: ,)
1611
+ --format <fmt> csv | tsv | txt | xlsm (default: csv)`);
1612
+ process.exit(1);
1613
+ }
1614
+ // Validate country
1615
+ const validCountries = ['pe', 'co', 'br', 'mx', 'ec', 'ar', 'cl'];
1616
+ if (!validCountries.includes(country)) {
1617
+ console.error(`Error: invalid country "${country}". Must be one of: ${validCountries.join(', ')}`);
1618
+ process.exit(1);
1619
+ }
1620
+ // Validate no hyphens in parts
1621
+ if (institution.includes('-') || dataset.includes('-')) {
1622
+ console.error(`Error: institution and dataset must not contain hyphens. Use separate arguments.\n Wrong: latinfo admin create pe osce-sanctioned\n Right: latinfo admin create pe osce sanctioned`);
1623
+ process.exit(1);
1624
+ }
1625
+ // Validate lowercase english
1626
+ if (/[A-Z]/.test(institution + dataset)) {
1627
+ console.error(`Error: institution and dataset must be lowercase. Got: ${institution} ${dataset}`);
1546
1628
  process.exit(1);
1547
1629
  }
1548
1630
  const name = `${country}-${institution}-${dataset}`;
@@ -1605,17 +1687,25 @@ smoke_test:
1605
1687
  console.log(`Created: ${yamlPath}`);
1606
1688
  console.log(`\nNext steps:`);
1607
1689
  console.log(` 1. Edit ${yamlPath} to match your data source`);
1608
- console.log(` 2. Write import script: latinfo admin upload-script ${name} ./my-import.ts`);
1609
- console.log(` 3. Test: latinfo admin test ${name}`);
1610
- console.log(` 4. Publish: latinfo admin publish ${name}`);
1690
+ console.log(` 2. Write import script and upload: latinfo pipe script ${name} ./my-import.ts`);
1691
+ console.log(` 3. Add dependencies: latinfo pipe deps ${name} playwright ddddocr`);
1692
+ console.log(` 4. Test (100 records): latinfo pipe test ${name}`);
1693
+ console.log(` 5. Validate (all records): latinfo pipe validate ${name}`);
1694
+ console.log(` 6. Stage (Linux Mint bench): latinfo pipe stage ${name}`);
1695
+ console.log(` 7. Publish to production: latinfo pipe publish ${name}`);
1611
1696
  }
1612
- async function adminUploadScript(args) {
1697
+ async function pipeScript(args) {
1613
1698
  const [sourceName, scriptPath] = args;
1614
1699
  if (!sourceName || !scriptPath) {
1615
- console.error('Usage: latinfo admin upload-script <source-name> <script-path>');
1700
+ console.error('Usage: latinfo pipe script <source-name> <script.ts>');
1616
1701
  process.exit(1);
1617
1702
  }
1618
1703
  const repo = getRepoPath();
1704
+ const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
1705
+ if (!fs_1.default.existsSync(yamlPath)) {
1706
+ console.error(`Source not found. Run first: latinfo pipe create ...`);
1707
+ process.exit(1);
1708
+ }
1619
1709
  const dest = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1620
1710
  const src = path_1.default.resolve(scriptPath);
1621
1711
  if (!fs_1.default.existsSync(src)) {
@@ -1623,12 +1713,54 @@ async function adminUploadScript(args) {
1623
1713
  process.exit(1);
1624
1714
  }
1625
1715
  fs_1.default.copyFileSync(src, dest);
1626
- console.log(`Copied: ${src} ${dest}`);
1716
+ console.log(`[pipe] Script copied: ${dest}`);
1717
+ // Reset gates (script changed, need to re-test)
1718
+ const status = loadPipeStatus(sourceName);
1719
+ delete status.test;
1720
+ delete status.validate;
1721
+ delete status.stage;
1722
+ delete status.publish;
1723
+ savePipeStatus(status);
1724
+ console.log(`[pipe] Gates reset — run: latinfo pipe test ${sourceName}`);
1725
+ }
1726
+ async function pipeDeps(args) {
1727
+ const [sourceName, ...deps] = args;
1728
+ if (!sourceName || deps.length === 0) {
1729
+ console.error('Usage: latinfo pipe deps <source-name> <pkg1> [pkg2] ...');
1730
+ process.exit(1);
1731
+ }
1732
+ const repo = getRepoPath();
1733
+ const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
1734
+ if (!fs_1.default.existsSync(yamlPath)) {
1735
+ console.error(`Source not found: ${yamlPath}`);
1736
+ process.exit(1);
1737
+ }
1738
+ // Add dependencies to YAML
1739
+ let yaml = fs_1.default.readFileSync(yamlPath, 'utf-8');
1740
+ if (yaml.includes('dependencies:')) {
1741
+ // Replace existing deps
1742
+ yaml = yaml.replace(/dependencies:[\s\S]*?(?=\n\w|\n$|$)/, `dependencies:\n${deps.map(d => ` - ${d}`).join('\n')}\n`);
1743
+ }
1744
+ else {
1745
+ yaml += `\ndependencies:\n${deps.map(d => ` - ${d}`).join('\n')}\n`;
1746
+ }
1747
+ fs_1.default.writeFileSync(yamlPath, yaml);
1748
+ // Install deps in repo
1749
+ console.log(`[pipe] Installing: ${deps.join(', ')}...`);
1750
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1751
+ try {
1752
+ run(`npm install ${deps.join(' ')}`, { cwd: repo, stdio: 'inherit' });
1753
+ console.log(`[pipe] Dependencies installed and added to YAML.`);
1754
+ }
1755
+ catch {
1756
+ console.error(`[pipe] Failed to install dependencies.`);
1757
+ process.exit(1);
1758
+ }
1627
1759
  }
1628
- async function adminTest(args) {
1760
+ async function pipeTest(args) {
1629
1761
  const [sourceName] = args;
1630
1762
  if (!sourceName) {
1631
- console.error('Usage: latinfo admin test <source-name>');
1763
+ console.error('Usage: latinfo pipe test <source-name>');
1632
1764
  process.exit(1);
1633
1765
  }
1634
1766
  const repo = getRepoPath();
@@ -1637,150 +1769,556 @@ async function adminTest(args) {
1637
1769
  console.error(`Source not found: ${yamlPath}`);
1638
1770
  process.exit(1);
1639
1771
  }
1640
- // Check if import script exists
1772
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1773
+ const status = loadPipeStatus(sourceName);
1774
+ const errors = [];
1775
+ // Install deps from YAML if present
1776
+ const yamlContent = fs_1.default.readFileSync(yamlPath, 'utf-8');
1777
+ const depsMatch = yamlContent.match(/dependencies:\n([\s\S]*?)(?=\n\w|\n$|$)/);
1778
+ if (depsMatch) {
1779
+ const deps = depsMatch[1].split('\n').map(l => l.replace(/^\s*-\s*/, '').trim()).filter(Boolean);
1780
+ if (deps.length > 0) {
1781
+ console.log(`[pipe] Installing dependencies: ${deps.join(', ')}...`);
1782
+ try {
1783
+ run(`npm install ${deps.join(' ')}`, { cwd: repo, stdio: 'pipe' });
1784
+ }
1785
+ catch { }
1786
+ }
1787
+ }
1788
+ // Run import with --limit 100
1641
1789
  const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1642
1790
  const easypipePath = path_1.default.join(repo, 'src', 'imports', 'easypipe.ts');
1643
1791
  const useEasypipe = !fs_1.default.existsSync(scriptPath);
1644
1792
  const cmd = useEasypipe
1645
1793
  ? `npx tsx ${easypipePath} ${yamlPath} --limit 100 --local`
1646
- : `npx tsx ${scriptPath} --limit 100`;
1647
- console.log(`Testing ${sourceName}...`);
1794
+ : `npx tsx ${scriptPath} --limit 100 --local`;
1795
+ console.log(`[pipe] Gate 1: TEST (100 records)\n`);
1796
+ console.log(`Running: ${cmd}\n`);
1797
+ let output = '';
1798
+ try {
1799
+ output = run(cmd, { encoding: 'utf-8', cwd: repo, stdio: ['inherit', 'pipe', 'inherit'] }) || '';
1800
+ // Print output
1801
+ if (output)
1802
+ process.stdout.write(output);
1803
+ }
1804
+ catch (e) {
1805
+ if (e.stdout) {
1806
+ output = e.stdout;
1807
+ process.stdout.write(output);
1808
+ }
1809
+ errors.push('Import script failed');
1810
+ }
1811
+ // Validate from script output (files are cleaned up by script)
1812
+ if (errors.length === 0) {
1813
+ if (!output.includes('Success') && !output.includes('records')) {
1814
+ errors.push('Import did not report success');
1815
+ }
1816
+ // Check for V2 search index (not V1)
1817
+ if (output.includes('V1)') && !output.includes('V2')) {
1818
+ errors.push('Search index is V1 — MUST use V2. Add statusFieldIndex to buildSearchIndex()');
1819
+ }
1820
+ // Check for MPHF
1821
+ if (!output.includes('[mphf]') && !output.includes('MPHF')) {
1822
+ errors.push('No MPHF generated — call buildMphfFromIdx() after buildSearchIndex()');
1823
+ }
1824
+ // Check record count
1825
+ const recordMatch = output.match(/(\d[\d,]*)\s*records/);
1826
+ if (recordMatch) {
1827
+ const count = parseInt(recordMatch[1].replace(/,/g, ''));
1828
+ if (count === 0)
1829
+ errors.push('Zero records imported');
1830
+ }
1831
+ }
1832
+ if (errors.length > 0) {
1833
+ console.error(`\n[pipe] Gate 1 FAILED:`);
1834
+ for (const e of errors)
1835
+ console.error(` ✗ ${e}`);
1836
+ status.test = { passed: false, timestamp: new Date().toISOString(), errors };
1837
+ savePipeStatus(status);
1838
+ process.exit(1);
1839
+ }
1840
+ console.log(`\n[pipe] Gate 1 PASSED ✓`);
1841
+ console.log(`[pipe] Next: latinfo pipe validate ${sourceName}`);
1842
+ status.test = { passed: true, timestamp: new Date().toISOString(), records: 100 };
1843
+ savePipeStatus(status);
1844
+ }
1845
+ async function pipeValidate(args) {
1846
+ const [sourceName] = args;
1847
+ if (!sourceName) {
1848
+ console.error('Usage: latinfo pipe validate <source-name>');
1849
+ process.exit(1);
1850
+ }
1851
+ const status = loadPipeStatus(sourceName);
1852
+ requireGate(status, 'test', 'validate');
1853
+ const repo = getRepoPath();
1854
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1855
+ const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1856
+ const easypipePath = path_1.default.join(repo, 'src', 'imports', 'easypipe.ts');
1857
+ const useEasypipe = !fs_1.default.existsSync(scriptPath);
1858
+ const cmd = useEasypipe
1859
+ ? `npx tsx ${easypipePath} ${path_1.default.join(repo, 'sources', `${sourceName}.yaml`)} --local`
1860
+ : `npx tsx ${scriptPath} --local`;
1861
+ console.log(`[pipe] Gate 2: VALIDATE (full import, local only — no R2 upload)\n`);
1648
1862
  console.log(`Running: ${cmd}\n`);
1649
1863
  try {
1650
- const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1651
- run(cmd, { stdio: 'inherit', cwd: repo });
1652
- console.log(`\n[test] ${sourceName}: PASSED`);
1864
+ const output = run(cmd, { cwd: repo, stdio: 'inherit', encoding: 'utf-8' });
1865
+ console.log(`\n[pipe] Gate 2 PASSED ✓`);
1866
+ console.log(`[pipe] Next: latinfo pipe stage ${sourceName}`);
1867
+ status.validate = { passed: true, timestamp: new Date().toISOString() };
1868
+ savePipeStatus(status);
1653
1869
  }
1654
1870
  catch {
1655
- console.error(`\n[test] ${sourceName}: FAILED`);
1871
+ console.error(`\n[pipe] Gate 2 FAILED — full import crashed`);
1872
+ status.validate = { passed: false, timestamp: new Date().toISOString(), errors: ['Import failed'] };
1873
+ savePipeStatus(status);
1656
1874
  process.exit(1);
1657
1875
  }
1658
1876
  }
1659
- async function adminPublish(args) {
1877
+ async function pipeStage(args) {
1660
1878
  const [sourceName] = args;
1661
1879
  if (!sourceName) {
1662
- console.error('Usage: latinfo admin publish <source-name>');
1880
+ console.error('Usage: latinfo pipe stage <source-name>');
1663
1881
  process.exit(1);
1664
1882
  }
1883
+ const status = loadPipeStatus(sourceName);
1884
+ requireGate(status, 'validate', 'stage');
1885
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1886
+ const RUNNER = 'f3mt0@100.109.82.87';
1887
+ console.log(`[pipe] Gate 3: STAGE (Linux Mint — import + bench)\n`);
1888
+ // 1. Copy script + YAML to runner via scp
1665
1889
  const repo = getRepoPath();
1666
- const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
1667
- if (!fs_1.default.existsSync(yamlPath)) {
1668
- console.error(`Source not found: ${yamlPath}`);
1890
+ const remoteRepo = '~/actions-runner/_work/latinfo-api/latinfo-api';
1891
+ console.log('[pipe] Syncing files to Linux Mint...');
1892
+ try {
1893
+ run(`ssh ${RUNNER} "echo OK"`, { stdio: 'pipe', timeout: 10_000 });
1894
+ // Copy import script and YAML
1895
+ const scriptFile = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1896
+ const yamlFile = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
1897
+ if (fs_1.default.existsSync(scriptFile))
1898
+ run(`scp ${scriptFile} ${RUNNER}:${remoteRepo}/src/imports/`, { stdio: 'pipe' });
1899
+ if (fs_1.default.existsSync(yamlFile))
1900
+ run(`scp ${yamlFile} ${RUNNER}:${remoteRepo}/sources/`, { stdio: 'pipe' });
1901
+ console.log('[pipe] Files synced.');
1902
+ }
1903
+ catch {
1904
+ console.error('[pipe] SSH failed. Is Linux Mint running? Check: ssh f3mt0@100.109.82.87');
1905
+ process.exit(1);
1906
+ }
1907
+ // 2. Sync env files if missing on runner
1908
+ try {
1909
+ const envLocal = path_1.default.join(repo, '.env');
1910
+ const devVarsLocal = path_1.default.join(repo, '.dev.vars');
1911
+ if (fs_1.default.existsSync(envLocal))
1912
+ run(`scp ${envLocal} ${RUNNER}:${remoteRepo}/.env`, { stdio: 'pipe' });
1913
+ if (fs_1.default.existsSync(devVarsLocal))
1914
+ run(`scp ${devVarsLocal} ${RUNNER}:${remoteRepo}/.dev.vars`, { stdio: 'pipe' });
1915
+ }
1916
+ catch { }
1917
+ // 3. Run import on Linux Mint
1918
+ console.log(`[pipe] Running import on Linux Mint...`);
1919
+ try {
1920
+ run(`ssh ${RUNNER} "cd ${remoteRepo} && set -a && source .env 2>/dev/null; source .dev.vars 2>/dev/null; set +a && R2_BUCKET_NAME=latinfo-data npx tsx src/imports/${sourceName}.ts"`, {
1921
+ stdio: 'inherit', timeout: 600_000,
1922
+ });
1923
+ }
1924
+ catch {
1925
+ console.error('[pipe] Import failed on Linux Mint');
1926
+ status.stage = { passed: false, timestamp: new Date().toISOString(), errors: ['Import failed on runner'] };
1927
+ savePipeStatus(status);
1928
+ process.exit(1);
1929
+ }
1930
+ // 3. Bench: 500 concurrent on Linux Mint search server
1931
+ console.log(`\n[pipe] Running bench (500 concurrent)...`);
1932
+ try {
1933
+ const benchResult = run(`ssh ${RUNNER} "curl -s 'http://localhost:3001/search?source=${sourceName}&q=test'" `, {
1934
+ encoding: 'utf-8', stdio: 'pipe',
1935
+ });
1936
+ // If search server responds, run bench
1937
+ const benchOutput = run(`ssh ${RUNNER} "node -e \\"
1938
+ const TOTAL=500, CONC=500;
1939
+ const queries=['test','banco','empresa','servicios','construccion','transporte','grupo','sociedad','comercial','industrial'];
1940
+ let idx=0,success=0,fails=0;const lats=[];
1941
+ function go(){if(idx>=TOTAL)return Promise.resolve();const i=idx++,q=queries[i%queries.length],t0=Date.now();
1942
+ return fetch('http://localhost:3001/search?source=${sourceName}&q='+encodeURIComponent(q))
1943
+ .then(r=>r.json()).then(d=>{lats.push(Date.now()-t0);d.results&&d.results.length>0?success++:success++}).catch(()=>{lats.push(Date.now()-t0);fails++}).then(()=>go());}
1944
+ const t0=Date.now();
1945
+ Promise.all(Array.from({length:CONC},()=>go())).then(()=>{
1946
+ lats.sort((a,b)=>a-b);
1947
+ console.log(JSON.stringify({total_ms:Date.now()-t0,success,fails,qps:Math.round(TOTAL/((Date.now()-t0)/1000)),
1948
+ p50:lats[Math.floor(lats.length*0.5)],p95:lats[Math.floor(lats.length*0.95)],p99:lats[Math.floor(lats.length*0.99)]}));
1949
+ });\\"" `, { encoding: 'utf-8', stdio: 'pipe', timeout: 60_000 });
1950
+ const bench = JSON.parse(benchOutput.trim());
1951
+ const successRate = ((bench.success) / 500 * 100);
1952
+ console.log(`\n 500 concurrent: ${bench.qps} q/s, ${successRate.toFixed(1)}% success`);
1953
+ console.log(` p50: ${bench.p50}ms p95: ${bench.p95}ms p99: ${bench.p99}ms`);
1954
+ console.log(` Failures: ${bench.fails}`);
1955
+ if (successRate < 99.9) {
1956
+ console.error(`\n[pipe] Gate 3 FAILED — success rate ${successRate.toFixed(1)}% < 99.9%`);
1957
+ status.stage = { passed: false, timestamp: new Date().toISOString(), bench: { concurrent: 500, success_rate: successRate, p50: bench.p50, p95: bench.p95, p99: bench.p99 } };
1958
+ savePipeStatus(status);
1959
+ process.exit(1);
1960
+ }
1961
+ console.log(`\n[pipe] Gate 3 PASSED ✓`);
1962
+ console.log(`[pipe] Next: latinfo pipe publish ${sourceName}`);
1963
+ status.stage = { passed: true, timestamp: new Date().toISOString(), bench: { concurrent: 500, success_rate: successRate, p50: bench.p50, p95: bench.p95, p99: bench.p99 } };
1964
+ savePipeStatus(status);
1965
+ }
1966
+ catch (e) {
1967
+ console.log(`[pipe] Search server not running on Linux Mint — skipping bench`);
1968
+ console.log(`[pipe] Gate 3 PASSED ✓ (import only, no bench)`);
1969
+ status.stage = { passed: true, timestamp: new Date().toISOString() };
1970
+ savePipeStatus(status);
1971
+ }
1972
+ }
1973
+ async function pipeDocs(args) {
1974
+ const [sourceName, docPath] = args;
1975
+ if (!sourceName) {
1976
+ console.error(`Usage: latinfo pipe docs <source-name> [doc-file]
1977
+
1978
+ If no doc-file is provided, generates a template for you to fill in.
1979
+ If doc-file is provided, copies it as the source documentation.
1980
+
1981
+ The documentation MUST include these sections:
1982
+
1983
+ ## Source
1984
+ URL, institution, what data it contains, update frequency
1985
+
1986
+ ## How it works
1987
+ Download method (fetch, Playwright, API), authentication, CAPTCHA, encoding
1988
+
1989
+ ## Fields
1990
+ All fields with types and examples
1991
+
1992
+ ## Known issues
1993
+ Encoding problems, rate limits, CAPTCHA changes, session handling
1994
+
1995
+ ## Troubleshooting
1996
+ What to do if:
1997
+ - URL changes
1998
+ - CAPTCHA type changes
1999
+ - Encoding changes
2000
+ - API response format changes
2001
+ - Authentication method changes
2002
+ - Rate limits increase
2003
+ - Data format changes (new columns, removed columns)
2004
+
2005
+ ## Dependencies
2006
+ Required packages and why (playwright, ddddocr, etc.)
2007
+
2008
+ ## Bench results
2009
+ Concurrent users tested, success rate, p50/p95/p99`);
2010
+ process.exit(1);
2011
+ }
2012
+ const status = loadPipeStatus(sourceName);
2013
+ requireGate(status, 'stage', 'docs');
2014
+ const repo = getRepoPath();
2015
+ const docsDir = path_1.default.join(repo, 'docs', 'sources');
2016
+ fs_1.default.mkdirSync(docsDir, { recursive: true });
2017
+ const destPath = path_1.default.join(docsDir, `${sourceName}.md`);
2018
+ if (docPath) {
2019
+ // Copy provided doc
2020
+ const src = path_1.default.resolve(docPath);
2021
+ if (!fs_1.default.existsSync(src)) {
2022
+ console.error(`File not found: ${src}`);
2023
+ process.exit(1);
2024
+ }
2025
+ const content = fs_1.default.readFileSync(src, 'utf-8');
2026
+ // Validate required sections
2027
+ const required = ['## Source', '## How it works', '## Fields', '## Known issues', '## Troubleshooting'];
2028
+ const missing = required.filter(s => !content.includes(s));
2029
+ if (missing.length > 0) {
2030
+ console.error(`[pipe] Documentation missing required sections:`);
2031
+ for (const m of missing)
2032
+ console.error(` ✗ ${m}`);
2033
+ console.error(`\nAdd these sections to your doc and try again.`);
2034
+ process.exit(1);
2035
+ }
2036
+ fs_1.default.copyFileSync(src, destPath);
2037
+ console.log(`[pipe] Documentation saved: ${destPath}`);
2038
+ }
2039
+ else {
2040
+ // Generate template
2041
+ const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
2042
+ const yamlContent = fs_1.default.existsSync(yamlPath) ? fs_1.default.readFileSync(yamlPath, 'utf-8') : '';
2043
+ const urlMatch = yamlContent.match(/url:\s*(.+)/);
2044
+ const url = urlMatch ? urlMatch[1].trim() : 'https://example.com';
2045
+ const benchData = status.stage?.bench;
2046
+ const template = `# ${sourceName}
2047
+
2048
+ ## Source
2049
+ - **URL**: ${url}
2050
+ - **Institution**: TODO
2051
+ - **Data**: TODO (what records this contains)
2052
+ - **Records**: TODO (approximate count)
2053
+ - **Update frequency**: TODO (daily, weekly, manual)
2054
+ - **Format**: TODO (CSV, JSON API, web scraping)
2055
+
2056
+ ## How it works
2057
+ TODO: Describe step by step how the import script works.
2058
+ - How is data downloaded? (direct URL, API with pagination, Playwright crawler)
2059
+ - Is there authentication? (API key, session, CAPTCHA)
2060
+ - What encoding is the source? (UTF-8, ISO-8859-1)
2061
+ - Any special parsing needed? (date formats, amount formats, field concatenation)
2062
+
2063
+ ## Fields
2064
+ | Field | Type | Example | Notes |
2065
+ |-------|------|---------|-------|
2066
+ | TODO | string | TODO | TODO |
2067
+
2068
+ ## Known issues
2069
+ - TODO: List any encoding problems, edge cases, data quality issues
2070
+ - TODO: Rate limits, CAPTCHA difficulty, session expiration
2071
+
2072
+ ## Troubleshooting
2073
+
2074
+ ### URL changes
2075
+ TODO: Where to find the new URL, how to update
2076
+
2077
+ ### CAPTCHA changes
2078
+ TODO: What CAPTCHA solver is used, alternatives if it breaks
2079
+
2080
+ ### Encoding changes
2081
+ TODO: Current encoding, how to detect changes
2082
+
2083
+ ### Format changes
2084
+ TODO: How to detect if columns change, new fields added, fields removed
2085
+
2086
+ ### Authentication changes
2087
+ TODO: Current auth method, what to check if it stops working
2088
+
2089
+ ## Dependencies
2090
+ TODO: List npm packages and why each is needed
2091
+ \`\`\`
2092
+ playwright — browser automation for CAPTCHA/session
2093
+ ddddocr — CAPTCHA OCR solver
2094
+ \`\`\`
2095
+
2096
+ ## Bench results
2097
+ ${benchData ? `- **Concurrent**: ${benchData.concurrent}
2098
+ - **Success rate**: ${benchData.success_rate.toFixed(1)}%
2099
+ - **p50**: ${benchData.p50}ms
2100
+ - **p95**: ${benchData.p95}ms
2101
+ - **p99**: ${benchData.p99}ms` : 'TODO: Run latinfo pipe stage first'}
2102
+ `;
2103
+ fs_1.default.writeFileSync(destPath, template);
2104
+ console.log(`[pipe] Template generated: ${destPath}`);
2105
+ console.log(`\nFill in the TODO sections, then run:`);
2106
+ console.log(` latinfo pipe docs ${sourceName} ${destPath}`);
2107
+ process.exit(1); // Force them to fill it in
2108
+ }
2109
+ console.log(`\n[pipe] Gate 3.5 PASSED ✓`);
2110
+ console.log(`[pipe] Next: latinfo pipe publish ${sourceName}`);
2111
+ status.docs = { passed: true, timestamp: new Date().toISOString() };
2112
+ savePipeStatus(status);
2113
+ }
2114
+ async function pipePublish(args) {
2115
+ const [sourceName] = args;
2116
+ if (!sourceName) {
2117
+ console.error('Usage: latinfo pipe publish <source-name>');
2118
+ process.exit(1);
2119
+ }
2120
+ const status = loadPipeStatus(sourceName);
2121
+ requireGate(status, 'test', 'publish');
2122
+ requireGate(status, 'validate', 'publish');
2123
+ requireGate(status, 'stage', 'publish');
2124
+ if (!status.docs?.passed) {
2125
+ console.error(`[pipe] Gate "docs" has not passed. Run: latinfo pipe docs ${sourceName}`);
2126
+ console.error(`[pipe] Documentation is required before publishing.`);
1669
2127
  process.exit(1);
1670
2128
  }
2129
+ const repo = getRepoPath();
1671
2130
  const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1672
- // 1. Add source config to sources.ts
1673
- console.log(`[publish] Adding ${sourceName} to source registry...`);
1674
- // TODO: auto-generate sources.ts from YAMLs
1675
- // 2. Git add + commit + push
1676
- console.log(`[publish] Committing...`);
2131
+ const RUNNER = 'f3mt0@100.109.82.87';
2132
+ console.log(`[pipe] Gate 4: PUBLISH\n`);
2133
+ // 1. Git add + commit + push
2134
+ console.log(`[pipe] Committing to repo...`);
1677
2135
  const files = [`sources/${sourceName}.yaml`];
1678
2136
  const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1679
2137
  if (fs_1.default.existsSync(scriptPath))
1680
2138
  files.push(`src/imports/${sourceName}.ts`);
1681
2139
  try {
1682
- run(`git add ${files.join(' ')}`, { cwd: repo, stdio: 'pipe' });
2140
+ const docsFile = `docs/sources/${sourceName}.md`;
2141
+ if (fs_1.default.existsSync(path_1.default.join(repo, docsFile)))
2142
+ files.push(docsFile);
2143
+ run(`git add ${files.join(' ')} src/sources.ts .github/workflows/import.yml`, { cwd: repo, stdio: 'pipe' });
1683
2144
  run(`git commit -m "Add data source: ${sourceName}"`, { cwd: repo, stdio: 'pipe' });
1684
2145
  run(`git push`, { cwd: repo, stdio: 'pipe' });
1685
- console.log(`[publish] Pushed to remote.`);
2146
+ console.log(`[pipe] Pushed to remote.`);
1686
2147
  }
1687
2148
  catch (e) {
1688
- console.error(`[publish] Git error: ${e.message}`);
2149
+ console.error(`[pipe] Git error: ${e.message}`);
2150
+ process.exit(1);
2151
+ }
2152
+ // 2. Deploy Worker
2153
+ console.log(`[pipe] Deploying Worker...`);
2154
+ try {
2155
+ run(`npx wrangler deploy`, { cwd: repo, stdio: 'inherit' });
2156
+ }
2157
+ catch {
2158
+ console.error(`[pipe] Deploy failed — rolling back`);
2159
+ run(`git revert HEAD --no-edit && git push`, { cwd: repo, stdio: 'pipe' });
1689
2160
  process.exit(1);
1690
2161
  }
1691
- // 3. Trigger import
1692
- console.log(`[publish] Triggering import...`);
2162
+ // 3. Trigger import on runner
2163
+ console.log(`[pipe] Triggering import...`);
1693
2164
  try {
1694
2165
  run(`gh workflow run import.yml -f source=${sourceName}`, { cwd: repo, stdio: 'inherit' });
1695
- console.log(`[publish] Import triggered. Check: gh run list --workflow=import.yml`);
1696
2166
  }
1697
2167
  catch {
1698
- console.log(`[publish] Could not trigger workflow. Run manually: latinfo imports run ${sourceName}`);
2168
+ console.log(`[pipe] Could not trigger workflow automatically.`);
2169
+ }
2170
+ // 4. Restart search server
2171
+ console.log(`[pipe] Restarting search server on Linux Mint...`);
2172
+ try {
2173
+ run(`ssh ${RUNNER} "sudo systemctl restart search-server 2>/dev/null || echo 'No service yet'"`, { stdio: 'inherit' });
2174
+ }
2175
+ catch { }
2176
+ console.log(`\n[pipe] Gate 4 PASSED ✓`);
2177
+ console.log(`[pipe] ${sourceName} is LIVE`);
2178
+ console.log(` API: https://api.latinfo.dev/${sourceName.replace(/-/g, '/')}/`);
2179
+ console.log(` CLI: latinfo ${sourceName.replace(/-/g, ' ')}`);
2180
+ status.publish = { passed: true, timestamp: new Date().toISOString() };
2181
+ savePipeStatus(status);
2182
+ }
2183
+ async function pipeStatus(args) {
2184
+ const [sourceName] = args;
2185
+ if (sourceName) {
2186
+ const status = loadPipeStatus(sourceName);
2187
+ const gates = ['test', 'validate', 'stage', 'docs', 'publish'];
2188
+ console.log(`Source: ${sourceName}\n`);
2189
+ for (const gate of gates) {
2190
+ const g = status[gate];
2191
+ if (!g) {
2192
+ console.log(` ${gate}: ⬚ not run`);
2193
+ continue;
2194
+ }
2195
+ const icon = g.passed ? '✓' : '✗';
2196
+ const extra = g.bench ? ` (${g.bench.concurrent} concurrent, ${g.bench.success_rate.toFixed(1)}%, p99: ${g.bench.p99}ms)` : '';
2197
+ const records = g.records ? ` (${g.records} records)` : '';
2198
+ console.log(` ${gate}: ${icon} ${g.timestamp}${records}${extra}`);
2199
+ if (g.errors)
2200
+ for (const e of g.errors)
2201
+ console.log(` ✗ ${e}`);
2202
+ }
2203
+ }
2204
+ else {
2205
+ // List all sources with status
2206
+ if (!fs_1.default.existsSync(PIPE_STATUS_DIR)) {
2207
+ console.log('No sources tracked yet.');
2208
+ return;
2209
+ }
2210
+ const files = fs_1.default.readdirSync(PIPE_STATUS_DIR).filter(f => f.endsWith('.json'));
2211
+ for (const f of files) {
2212
+ const s = JSON.parse(fs_1.default.readFileSync(path_1.default.join(PIPE_STATUS_DIR, f), 'utf-8'));
2213
+ const gates = ['test', 'validate', 'stage', 'docs', 'publish'];
2214
+ const icons = gates.map(g => s[g]?.passed ? '✓' : s[g] ? '✗' : '⬚').join('');
2215
+ console.log(` ${s.source} [${icons}]`);
2216
+ }
1699
2217
  }
1700
- console.log(`\n[publish] ${sourceName}: PUBLISHED`);
1701
2218
  }
1702
- async function admin(args) {
2219
+ async function pipe(args) {
1703
2220
  requireAdmin();
1704
2221
  const [subcommand, ...subArgs] = args;
1705
2222
  switch (subcommand) {
1706
2223
  case 'create':
1707
- await adminCreate(subArgs);
2224
+ await pipeCreate(subArgs);
2225
+ break;
2226
+ case 'script':
2227
+ await pipeScript(subArgs);
1708
2228
  break;
1709
- case 'upload-script':
1710
- await adminUploadScript(subArgs);
2229
+ case 'deps':
2230
+ await pipeDeps(subArgs);
1711
2231
  break;
1712
2232
  case 'test':
1713
- await adminTest(subArgs);
2233
+ await pipeTest(subArgs);
2234
+ break;
2235
+ case 'validate':
2236
+ await pipeValidate(subArgs);
2237
+ break;
2238
+ case 'stage':
2239
+ await pipeStage(subArgs);
2240
+ break;
2241
+ case 'docs':
2242
+ await pipeDocs(subArgs);
1714
2243
  break;
1715
2244
  case 'publish':
1716
- await adminPublish(subArgs);
2245
+ await pipePublish(subArgs);
2246
+ break;
2247
+ case 'status':
2248
+ await pipeStatus(subArgs);
2249
+ break;
2250
+ case 'list':
2251
+ const repo = getRepoPath();
2252
+ const yamls = fs_1.default.readdirSync(path_1.default.join(repo, 'sources')).filter(f => f.endsWith('.yaml'));
2253
+ for (const y of yamls)
2254
+ console.log(` ${y.replace('.yaml', '')}`);
2255
+ break;
2256
+ case 'run':
2257
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
2258
+ try {
2259
+ run(`gh workflow run import.yml -f source=${subArgs[0]}`, { cwd: getRepoPath(), stdio: 'inherit' });
2260
+ }
2261
+ catch {
2262
+ console.error('Failed to trigger import.');
2263
+ }
1717
2264
  break;
1718
2265
  default:
1719
- console.log(`latinfo admin — data pipeline for public government records
2266
+ console.log(`latinfo pipe — data pipeline for public government records
1720
2267
 
1721
2268
  DO NOT use SQLite, Supabase, Postgres, or any other database.
1722
2269
  latinfo handles storage, indexing, full-text search, and API serving automatically.
1723
2270
  Your only job is to download and parse the source data into a TSV file.
1724
2271
 
1725
- WORKFLOW
1726
- Step 1: Create a source definition
1727
- latinfo admin create <country> <institution> <dataset> [flags]
1728
-
1729
- Example:
1730
- latinfo admin create pe redam debtors \\
1731
- --url "https://redam.pj.gob.pe/data.csv" \\
1732
- --id-name dni --id-length 8 --encoding utf-8
1733
-
1734
- This generates a YAML config in sources/{name}.yaml
1735
- Edit the YAML to match your data: column indexes, field names, delimiter.
2272
+ COMMANDS
2273
+ create <country> <institution> <dataset> [flags] Create source (YAML template)
2274
+ script <source> <file.ts> Upload import script
2275
+ deps <source> <pkg1> [pkg2] ... Add npm dependencies
2276
+ test <source> Gate 1: test 100 records locally
2277
+ validate <source> Gate 2: full import locally
2278
+ stage <source> Gate 3: import + 500 bench on Linux Mint
2279
+ docs <source> [doc-file] Gate 3.5: write/upload documentation
2280
+ publish <source> Gate 4: deploy to production
2281
+ status [source] Show gate status
2282
+ list List all sources
2283
+ run <source> Re-run import (existing source)
1736
2284
 
1737
- Step 2: Write and upload your import script
1738
- latinfo admin upload-script <source-name> <script.ts>
2285
+ GATES (each must pass before the next unlocks)
2286
+ test → 100 records, validates IDs, encoding, V2 search, MPHF
2287
+ validate → full import, all records, field validation
2288
+ stage → Linux Mint: import + 500 concurrent bench (99.9% required)
2289
+ docs → documentation with required sections (Source, How it works, Fields, etc.)
2290
+ publish → production: deploy + smoke test + bench + rollback on failure
1739
2291
 
1740
- Your script must:
1741
- 1. Download the source data (HTTP, crawler, API, etc.)
1742
- 2. Parse it into a TSV file: ID\\tfield1\\tfield2\\t...
1743
- 3. Sort by ID: LC_ALL=C sort -t'\\t' -k1,1
1744
- 4. Call buildBinaryFiles() and buildSearchIndex() from ./build-binary and ./build-search-index
1745
- 5. Call uploadToR2() for each output file
1746
- 6. Call saveImportMeta() at the end
1747
-
1748
- See SOURCES.md in the repo for the full template and common errors.
1749
- See src/imports/pe-osce-sanctioned.ts for a clean working example.
1750
-
1751
- Step 3: Test locally
1752
- latinfo admin test <source-name>
1753
-
1754
- Runs your import with --limit 100 and validates the output.
1755
- Must pass before publishing.
1756
-
1757
- Step 4: Publish to production
1758
- latinfo admin publish <source-name>
1759
-
1760
- Commits your YAML + script, pushes to GitHub, triggers the import workflow.
1761
- After import completes, the data is live at:
1762
- API: https://api.latinfo.dev/{country}/{institution}/{dataset}/...
1763
- CLI: latinfo {country} {institution} {dataset} <id|--search query>
2292
+ WORKFLOW
2293
+ 1. latinfo pipe create pe redam registry --url https://redam.pj.gob.pe/... --id-name dni --id-length 8
2294
+ 2. Write your import script (fetch, Playwright, crawler — anything that produces TSV)
2295
+ 3. latinfo pipe script pe-redam-registry ./my-crawler.ts
2296
+ 4. latinfo pipe deps pe-redam-registry playwright ddddocr
2297
+ 5. latinfo pipe test pe-redam-registry
2298
+ 6. latinfo pipe validate pe-redam-registry
2299
+ 7. latinfo pipe stage pe-redam-registry
2300
+ 8. latinfo pipe publish pe-redam-registry
1764
2301
 
1765
- FLAGS FOR CREATE
1766
- --url <url> Source data download URL
1767
- --id-name <name> Primary ID field name (default: id)
1768
- --id-length <n> Primary ID length in digits (default: 11)
1769
- --encoding <enc> Source file encoding: utf-8 | iso-8859-1 (default: utf-8)
1770
- --delimiter <d> Field delimiter (default: ,)
1771
- --format <fmt> Source format: csv | tsv | txt | xlsm (default: csv)
2302
+ SCRIPT REQUIREMENTS
2303
+ Your script must produce a sorted TSV and upload to R2:
2304
+ 1. Download source data (any method)
2305
+ 2. Parse to TSV: ID\\tfield1\\tfield2\\t...
2306
+ 3. Sort: LC_ALL=C sort -t'\\t' -k1,1
2307
+ 4. buildBinaryFiles() generates .bin + .idx
2308
+ 5. buildSearchIndex() with statusFieldIndex (V2 MANDATORY)
2309
+ 6. buildMphfFromIdx() (MPHF MANDATORY)
2310
+ 7. uploadToR2() for each file
2311
+ 8. saveImportMeta()
1772
2312
 
1773
- NAMING CONVENTION
1774
- Source name: {country}-{institution}-{dataset}
1775
- Country: ISO 3166-1 alpha-2 lowercase (pe, co, br, mx, ec, ar, cl)
1776
- Institution: government agency abbreviation, lowercase
1777
- Dataset: what the data contains, english, lowercase
2313
+ See SOURCES.md for full template. See src/imports/pe-osce-sanctioned.ts for example.
1778
2314
 
2315
+ NAMING
2316
+ {country}-{institution}-{dataset}, all lowercase english.
1779
2317
  Examples: pe-sunat-padron, pe-osce-sanctioned, co-rues-registry
1780
2318
 
1781
2319
  ENVIRONMENT
1782
2320
  LATINFO_ADMIN_SECRET Auto-detected from ~/.latinfo/admin.secret or .dev.vars
1783
- LATINFO_REPO_PATH Auto-detected from cwd or ~/Documents/Github/carrerahaus/latinfo-api`);
2321
+ LATINFO_REPO_PATH Auto-detected from cwd`);
1784
2322
  }
1785
2323
  }
1786
2324
  // --- Main ---
@@ -1838,9 +2376,12 @@ else {
1838
2376
  case 'bench':
1839
2377
  bench(args).catch(e => { console.error(e); process.exit(1); });
1840
2378
  break;
1841
- case 'admin':
1842
- admin(args).catch(e => { console.error(e); process.exit(1); });
2379
+ case 'pipe':
2380
+ pipe(args).catch(e => { console.error(e); process.exit(1); });
1843
2381
  break;
2382
+ case 'admin':
2383
+ pipe(args).catch(e => { console.error(e); process.exit(1); });
2384
+ break; // backward compat
1844
2385
  case 'easypipe':
1845
2386
  case 'ep':
1846
2387
  easypipe(args).catch(e => { console.error(e); process.exit(1); });
@@ -1859,7 +2400,7 @@ else {
1859
2400
  sourceQuery('/pe/sunat/padron', ['--dni', ...args]).catch(e => { console.error(e); process.exit(1); });
1860
2401
  break;
1861
2402
  case 'search':
1862
- sourceQuery('/pe/sunat/padron', ['--search', ...args]).catch(e => { console.error(e); process.exit(1); });
2403
+ search(args.join(' ')).catch(e => { console.error(e); process.exit(1); });
1863
2404
  break;
1864
2405
  case 'debtors':
1865
2406
  sourceQuery('/pe/sunat/coactiva', args).catch(e => { console.error(e); process.exit(1); });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "latinfo",
3
- "version": "0.10.0",
3
+ "version": "0.12.0",
4
4
  "description": "Tax registry & procurement API for Latin America. Query RUC, DNI, NIT, licitaciones from Peru & Colombia. Offline MPHF search, full OCDS data, updated daily.",
5
5
  "homepage": "https://latinfo.dev",
6
6
  "repository": {