latinfo 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +894 -138
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -47,7 +47,7 @@ const local_search_1 = require("./local-search");
47
47
  const client_search_1 = require("./client-search");
48
48
  const odis_search_1 = require("./odis-search");
49
49
  const mphf_search_1 = require("./mphf-search");
50
- const VERSION = '0.8.1';
50
+ const VERSION = '0.10.0';
51
51
  const API_URL = process.env.LATINFO_API_URL || 'https://api.latinfo.dev';
52
52
  const GITHUB_CLIENT_ID = process.env.GITHUB_CLIENT_ID || 'Ov23li5fcQaiCsVtaMKK';
53
53
  const CONFIG_DIR = path_1.default.join(os_1.default.homedir(), '.latinfo');
@@ -232,7 +232,7 @@ async function ruc(rucNumber) {
232
232
  }
233
233
  return;
234
234
  }
235
- const res = await apiRequest(config, `/pe/ruc/${rucNumber}`);
235
+ const res = await apiRequest(config, `/pe/sunat/padron/ruc/${rucNumber}`);
236
236
  const data = await res.json();
237
237
  if (jsonFlag) {
238
238
  console.log(JSON.stringify(data));
@@ -339,7 +339,9 @@ async function search(query) {
339
339
  const odisMode = allArgs.includes('--odis');
340
340
  const mphfMode = allArgs.includes('--mphf');
341
341
  // MPHF: fully autonomous client-side search (zero server for search)
342
- if (mphfMode || (!clientMode && !odisMode && (0, mphf_search_1.hasMphfData)(country))) {
342
+ // Skip auto-detection in demo mode (no config) so demo data path is used
343
+ const config = loadConfig();
344
+ if (mphfMode || (config && !clientMode && !odisMode && (0, mphf_search_1.hasMphfData)(country))) {
343
345
  const stripped = query.split(' ').filter((w, i, a) => !['--mphf', '--client', '--odis', '--json'].includes(w) &&
344
346
  !(i > 0 && ['--country'].includes(a[i - 1])) &&
345
347
  !['--country'].includes(w)).join(' ');
@@ -416,7 +418,8 @@ async function search(query) {
416
418
  return;
417
419
  }
418
420
  // Local search: if data exists in ~/.latinfo/data/, search offline
419
- if ((0, local_search_1.hasLocalData)(country)) {
421
+ // Skip in demo mode (no config) so demo data path is used
422
+ if (config && (0, local_search_1.hasLocalData)(country)) {
420
423
  const results = (0, local_search_1.localSearch)(country, query);
421
424
  if (jsonFlag) {
422
425
  console.log(JSON.stringify(results));
@@ -436,7 +439,6 @@ async function search(query) {
436
439
  return;
437
440
  }
438
441
  // Fallback: API search
439
- const config = loadConfig();
440
442
  if (!config) {
441
443
  const results = (0, demo_data_1.searchDemo)(query);
442
444
  if (jsonFlag) {
@@ -759,7 +761,7 @@ const BENCH_SAMPLES = {
759
761
  'peru', 'lima', 'consultora', 'transporte', 'holding',
760
762
  'desarrollos', 'ingenieria', 'tecnologia', 'salud', 'educacion',
761
763
  ],
762
- 'pe/licitaciones': [
764
+ 'pe/oece/tenders': [
763
765
  'servicio', 'construccion', 'suministro', 'consultoria', 'mantenimiento',
764
766
  'obra', 'adquisicion', 'sistema', 'equipos', 'vehiculos',
765
767
  'alimentos', 'seguridad', 'limpieza', 'transporte', 'software',
@@ -788,11 +790,11 @@ async function benchStress(args) {
788
790
  { name: 'cool', vus: 10, duration: Math.floor(durationSec * 0.08) },
789
791
  ];
790
792
  const endpoints = [
791
- ...BENCH_SAMPLES['pe/ruc'].map(s => ({ url: `${API_URL}/pe/ruc/${s}`, type: 'ruc' })),
792
- ...BENCH_SAMPLES['pe/search'].map(s => ({ url: `${API_URL}/pe/search?q=${encodeURIComponent(s)}`, type: 'search' })),
793
- ...BENCH_SAMPLES['pe/licitaciones'].slice(0, 10).map(s => ({ url: `${API_URL}/pe/licitaciones?q=${encodeURIComponent(s)}&limit=5`, type: 'licitaciones' })),
794
- ...BENCH_SAMPLES['co/nit'].slice(0, 10).map(s => ({ url: `${API_URL}/co/nit/${s}`, type: 'co/nit' })),
795
- ...BENCH_SAMPLES['co/search'].slice(0, 10).map(s => ({ url: `${API_URL}/co/search?q=${encodeURIComponent(s)}`, type: 'co/search' })),
793
+ ...BENCH_SAMPLES['pe/ruc'].map(s => ({ url: `${API_URL}/pe/sunat/padron/ruc/${s}`, type: 'ruc' })),
794
+ ...BENCH_SAMPLES['pe/search'].map(s => ({ url: `${API_URL}/pe/sunat/padron/search?q=${encodeURIComponent(s)}`, type: 'search' })),
795
+ ...BENCH_SAMPLES['pe/oece/tenders'].slice(0, 10).map(s => ({ url: `${API_URL}/pe/oece/tenders?q=${encodeURIComponent(s)}&limit=5`, type: 'tenders' })),
796
+ ...BENCH_SAMPLES['co/nit'].slice(0, 10).map(s => ({ url: `${API_URL}/co/rues/registry/nit/${s}`, type: 'co/nit' })),
797
+ ...BENCH_SAMPLES['co/search'].slice(0, 10).map(s => ({ url: `${API_URL}/co/rues/registry/search?q=${encodeURIComponent(s)}`, type: 'co/search' })),
796
798
  ];
797
799
  const headers = { Authorization: `Bearer ${config.api_key}` };
798
800
  const results = [];
@@ -1006,12 +1008,18 @@ async function bench(args) {
1006
1008
  console.error(`Supported: ${Object.keys(BENCH_SAMPLES).map(k => '--country ' + k.replace('/', ' --type ')).join(', ')}`);
1007
1009
  process.exit(1);
1008
1010
  }
1011
+ const ROUTE_MAP = {
1012
+ 'pe/ruc': '/pe/sunat/padron/ruc',
1013
+ 'pe/search': '/pe/sunat/padron/search',
1014
+ 'pe/oece/tenders': '/pe/oece/tenders',
1015
+ 'co/nit': '/co/rues/registry/nit',
1016
+ 'co/search': '/co/rues/registry/search',
1017
+ };
1009
1018
  const getUrl = (sample) => {
1010
- if (type === 'search')
1011
- return `${API_URL}/${country}/search?q=${encodeURIComponent(sample)}`;
1012
- if (type === 'licitaciones')
1013
- return `${API_URL}/pe/licitaciones?q=${encodeURIComponent(sample)}&limit=5`;
1014
- return `${API_URL}/${country}/${type}/${sample}`;
1019
+ const route = ROUTE_MAP[key];
1020
+ if (type === 'search' || type === 'oece/tenders')
1021
+ return `${API_URL}${route}?q=${encodeURIComponent(sample)}&limit=5`;
1022
+ return `${API_URL}${route}/${sample}`;
1015
1023
  };
1016
1024
  const tasks = Array.from({ length: count }, (_, i) => samples[i % samples.length]);
1017
1025
  if (!jsonFlag)
@@ -1112,7 +1120,7 @@ async function licitaciones(args) {
1112
1120
  // Subcommand: info
1113
1121
  if (args[0] === 'info') {
1114
1122
  const config = requireAuth();
1115
- const res = await apiRequest(config, '/pe/licitaciones/info');
1123
+ const res = await apiRequest(config, '/pe/oece/tenders/info');
1116
1124
  const info = await res.json();
1117
1125
  if (jsonFlag) {
1118
1126
  console.log(JSON.stringify(info));
@@ -1173,7 +1181,7 @@ DATA
1173
1181
  params.set('status', opts.status);
1174
1182
  if (opts.limit !== undefined)
1175
1183
  params.set('limit', String(opts.limit));
1176
- const res = await apiRequest(config, `/pe/licitaciones?${params}`);
1184
+ const res = await apiRequest(config, `/pe/oece/tenders?${params}`);
1177
1185
  const results = await res.json();
1178
1186
  if (jsonFlag) {
1179
1187
  console.log(JSON.stringify(results));
@@ -1221,108 +1229,65 @@ function help() {
1221
1229
  console.log(`latinfo v${VERSION} — Tax registry API for Latin America
1222
1230
 
1223
1231
  USAGE
1224
- latinfo <command> [args] [--json]
1232
+ latinfo <country> <institution> <dataset> <id|--search query|--dni id> [--json]
1233
+ latinfo <admin-command> [args]
1225
1234
 
1226
1235
  QUICK START
1227
1236
  npm install -g latinfo
1228
- latinfo login # GitHub OAuth, 30 seconds
1229
- latinfo ruc 20100047218 # Banco de Crédito del Perú
1230
- latinfo search "banco de credito" # search by company name
1231
- latinfo ruc 20100047218 --json # JSON output
1232
-
1233
- Works instantly with ${Object.keys(demo_data_1.DEMO_DATA).length} embedded records (no login needed).
1234
- Run 'latinfo login' for 18M+ records and DNI lookup.
1235
-
1236
- COMMANDS
1237
- login
1238
- GitHub OAuth. Opens browser, stores API key in ~/.latinfo/config.json.
1239
-
1240
- login --token <github_pat>
1241
- Login with a GitHub Personal Access Token. No browser needed.
1242
- Create a PAT at github.com/settings/tokens (scope: read:user).
1243
-
1244
- logout
1245
- Remove stored credentials.
1246
-
1247
- whoami
1248
- Show authenticated GitHub username.
1249
- --json: { username, api_key }
1237
+ latinfo login
1238
+ latinfo pe sunat padron 20100047218
1239
+ latinfo pe sunat padron --search "banco de credito"
1240
+ latinfo pe sunat padron --dni 09346247
1250
1241
 
1251
- ruc <ruc>
1252
- Lookup by RUC (11 digits).
1253
- --json fields: ruc, razon_social, estado, condicion, ubigeo,
1254
- tipo_via, nombre_via, numero, interior, lote, codigo_zona,
1255
- tipo_zona, departamento, manzana, kilometro
1242
+ DATA SOURCES
1243
+ Peru SUNAT
1244
+ latinfo pe sunat padron <ruc> Padrón RUC (18M records)
1245
+ latinfo pe sunat padron --dni <dni> Lookup by DNI
1246
+ latinfo pe sunat padron --search <query> Search by name
1247
+ latinfo pe sunat coactiva <ruc> Tax debt (cobranza coactiva)
1248
+ latinfo pe sunat coactiva --search <query>
1256
1249
 
1257
- dni <dni>
1258
- Lookup by DNI (8 digits). Converts to RUC automatically.
1259
- --json fields: same as ruc
1250
+ Peru — OSCE
1251
+ latinfo pe osce sanctioned <ruc> Sanctioned providers
1252
+ latinfo pe osce sanctioned --search <query>
1253
+ latinfo pe osce fines <ruc> Provider fines
1254
+ latinfo pe osce fines --search <query>
1260
1255
 
1261
- search <query>
1262
- Search by company name (razón social). Returns ranked results
1263
- with prefix autocomplete and abbreviation handling (S.A.C., E.I.R.L.).
1264
- --json fields: array of ruc objects
1256
+ Peru — OECE
1257
+ latinfo pe oece tenders <query> [flags] Government procurement
1258
+ Flags: --category, --min-amount, --max-amount, --buyer, --status, --limit
1265
1259
 
1266
- costs <users> [avg_req/user/month] [pro_%]
1267
- Simulate Cloudflare cost vs revenue.
1268
- Defaults: 1000 req/user, 1% Pro.
1269
- --json fields: users, pro_users, requests, cf_tier, cf_cost,
1270
- revenue, margin, safe
1260
+ Colombia RUES
1261
+ latinfo co rues registry <nit> Business registry (3.3M records)
1262
+ latinfo co rues registry --search <query>
1271
1263
 
1272
- costs --live
1273
- Real-time cost report from production (admin only).
1274
- Requires LATINFO_ADMIN_SECRET env var.
1275
-
1276
- debtors <ruc|name> [--source <source>]
1277
- Search Peru debtors across SUNAT Coactiva, OSCE Inhabilitados, OSCE Multas.
1278
- --source: filter by sunat-coactiva, osce-inhabilitado, osce-multa
1279
- --json fields: ruc, name, source, detail, amount, date_start, date_end, resolution
1280
-
1281
- licitaciones <query> [flags]
1282
- Search Peru government procurement (OECE/SEACE).
1283
- Flags: --category, --min-amount, --max-amount, --buyer, --method, --status, --limit
1284
- Run 'latinfo licitaciones help' for details.
1285
-
1286
- easypipe <command>
1287
- Generic import pipeline driven by YAML configs.
1288
- Commands: list, build <source>, sync, add <url>
1289
- Run 'latinfo easypipe help' for details.
1290
-
1291
- bench [--country pe|co] [--type ruc|nit|search|licitaciones] [--count N] [--concurrency N]
1292
- Stress test the API.
1293
-
1294
- completion [bash|zsh]
1295
- Output shell completion script.
1296
- bash: eval "$(latinfo completion bash)"
1297
- zsh: eval "$(latinfo completion zsh)"
1298
-
1299
- help Show this help text.
1264
+ ADMIN
1265
+ login [--token <github_pat>] GitHub OAuth or PAT login
1266
+ logout Remove credentials
1267
+ whoami Show authenticated user
1268
+ imports Show import status
1269
+ imports run <source> Trigger import
1270
+ imports report [days] Import diagnostics
1271
+ costs <users> [avg_req] [pro_%] Cost simulation
1272
+ costs --live Production cost report
1273
+ bench [flags] Stress test API
1274
+ easypipe <command> Generic import pipeline
1275
+ completion [bash|zsh] Shell completions
1276
+ help This help text
1300
1277
 
1301
1278
  FLAGS
1302
- --json Output raw JSON. Errors → stderr as { error, message }.
1303
- --live Use production data (costs command).
1304
- --version Print version and exit.
1305
-
1306
- COUNTRIES
1307
- pe Peru (SUNAT padrón) — 18M+ records, updated daily. Active.
1308
- br, mx, co, ar, cl — in development.
1279
+ --json Raw JSON output
1280
+ --search Search by name instead of ID lookup
1281
+ --dni Lookup by DNI (Peru only)
1282
+ --version Print version
1309
1283
 
1310
1284
  PRICING
1311
- Free 100,000 requests/day — no credit card
1312
- Pro 10M requests/month — $1/month
1313
-
1314
- LINKS
1315
- latinfo.dev/docs API reference
1316
- latinfo.dev/changelog Changelog
1317
- carrera.instatus.com Status page
1285
+ Free 100,000 requests/day
1286
+ Pro 10M requests/month — $1/month
1318
1287
 
1319
1288
  CONFIG
1320
- ~/.latinfo/config.json API key (written by 'latinfo login')
1321
- LATINFO_API_URL Override API base URL
1322
-
1323
- EXIT CODES
1324
- 0 Success
1325
- 1 Error`);
1289
+ ~/.latinfo/config.json API key
1290
+ LATINFO_API_URL Override API URL`);
1326
1291
  }
1327
1292
  function printLogo() {
1328
1293
  if (!process.stdout.isTTY)
@@ -1379,15 +1344,31 @@ function completion() {
1379
1344
  if (shell === 'zsh') {
1380
1345
  console.log(`#compdef latinfo
1381
1346
  _latinfo() {
1382
- local -a commands=(login logout whoami plan costs ruc dni search debtors licitaciones lic help)
1383
- local -a lic_flags=(--category --min-amount --max-amount --buyer --method --status --limit --json)
1384
- local -a global_flags=(--json --live --version --token)
1347
+ local -a countries=(pe co)
1348
+ local -a admin=(login logout whoami imports plan costs bench easypipe completion help)
1349
+ local -a pe_inst=(sunat osce oece)
1350
+ local -a co_inst=(rues)
1351
+ local -a sunat_ds=(padron coactiva)
1352
+ local -a osce_ds=(sanctioned fines)
1353
+ local -a oece_ds=(tenders)
1354
+ local -a rues_ds=(registry)
1355
+ local -a flags=(--json --search --dni --version)
1385
1356
  if (( CURRENT == 2 )); then
1386
- _describe 'command' commands
1387
- elif [[ "\${words[2]}" == "licitaciones" || "\${words[2]}" == "lic" ]]; then
1388
- _describe 'flag' lic_flags
1357
+ _describe 'command' countries -- admin
1358
+ elif (( CURRENT == 3 )); then
1359
+ case "\${words[2]}" in
1360
+ pe) _describe 'institution' pe_inst;;
1361
+ co) _describe 'institution' co_inst;;
1362
+ esac
1363
+ elif (( CURRENT == 4 )); then
1364
+ case "\${words[3]}" in
1365
+ sunat) _describe 'dataset' sunat_ds;;
1366
+ osce) _describe 'dataset' osce_ds;;
1367
+ oece) _describe 'dataset' oece_ds;;
1368
+ rues) _describe 'dataset' rues_ds;;
1369
+ esac
1389
1370
  else
1390
- _describe 'flag' global_flags
1371
+ _describe 'flag' flags
1391
1372
  fi
1392
1373
  }
1393
1374
  compdef _latinfo latinfo`);
@@ -1395,26 +1376,794 @@ compdef _latinfo latinfo`);
1395
1376
  else {
1396
1377
  console.log(`_latinfo_completions() {
1397
1378
  local cur="\${COMP_WORDS[COMP_CWORD]}"
1398
- local prev="\${COMP_WORDS[1]}"
1399
- if [[ \${COMP_CWORD} -eq 1 ]]; then
1400
- COMPREPLY=( $(compgen -W "login logout whoami plan costs ruc dni search debtors licitaciones lic help" -- "$cur") )
1401
- elif [[ "$prev" == "licitaciones" || "$prev" == "lic" ]]; then
1402
- COMPREPLY=( $(compgen -W "--category --min-amount --max-amount --buyer --method --status --limit --json info help" -- "$cur") )
1403
- elif [[ "$prev" == "--category" || "$prev" == "-c" ]]; then
1404
- COMPREPLY=( $(compgen -W "goods services works" -- "$cur") )
1405
- elif [[ "$prev" == "--status" || "$prev" == "-s" ]]; then
1406
- COMPREPLY=( $(compgen -W "CONVOCADO CONTRATADO DESIERTO NULO CONSENTIDO" -- "$cur") )
1379
+ local lvl=\${COMP_CWORD}
1380
+ local w1="\${COMP_WORDS[1]}" w2="\${COMP_WORDS[2]}" w3="\${COMP_WORDS[3]}"
1381
+ if [[ \$lvl -eq 1 ]]; then
1382
+ COMPREPLY=( $(compgen -W "pe co login logout whoami imports plan costs bench easypipe completion help" -- "$cur") )
1383
+ elif [[ \$lvl -eq 2 ]]; then
1384
+ case "$w1" in
1385
+ pe) COMPREPLY=( $(compgen -W "sunat osce oece" -- "$cur") );;
1386
+ co) COMPREPLY=( $(compgen -W "rues" -- "$cur") );;
1387
+ esac
1388
+ elif [[ \$lvl -eq 3 ]]; then
1389
+ case "$w2" in
1390
+ sunat) COMPREPLY=( $(compgen -W "padron coactiva" -- "$cur") );;
1391
+ osce) COMPREPLY=( $(compgen -W "sanctioned fines" -- "$cur") );;
1392
+ oece) COMPREPLY=( $(compgen -W "tenders" -- "$cur") );;
1393
+ rues) COMPREPLY=( $(compgen -W "registry" -- "$cur") );;
1394
+ esac
1395
+ else
1396
+ COMPREPLY=( $(compgen -W "--json --search --dni" -- "$cur") )
1407
1397
  fi
1408
1398
  }
1409
1399
  complete -F _latinfo_completions latinfo`);
1410
1400
  }
1411
1401
  }
1402
+ // --- Generic source query ---
1403
+ async function sourceQuery(routePath, datasetArgs) {
1404
+ const searchFlag = datasetArgs.includes('--search');
1405
+ const dniFlag = datasetArgs.includes('--dni');
1406
+ if (searchFlag) {
1407
+ const query = datasetArgs.filter(a => a !== '--search').join(' ');
1408
+ if (!query) {
1409
+ if (jsonFlag)
1410
+ jsonError('invalid_input', 'Search query is required.');
1411
+ console.error('Search query is required.');
1412
+ process.exit(1);
1413
+ }
1414
+ const config = requireAuth();
1415
+ const res = await apiRequest(config, `${routePath}/search?q=${encodeURIComponent(query)}`);
1416
+ const data = await res.json();
1417
+ if (jsonFlag) {
1418
+ console.log(JSON.stringify(data));
1419
+ return;
1420
+ }
1421
+ const results = Array.isArray(data) ? data : [];
1422
+ if (results.length === 0) {
1423
+ console.log('No results found.');
1424
+ return;
1425
+ }
1426
+ for (const r of results) {
1427
+ const fields = Object.values(r);
1428
+ console.log(` ${fields.slice(0, 3).join(' ')}`);
1429
+ }
1430
+ console.log(`\n${results.length} result(s)`);
1431
+ return;
1432
+ }
1433
+ if (dniFlag) {
1434
+ const dniVal = datasetArgs.find(a => a !== '--dni' && !a.startsWith('--'));
1435
+ if (!dniVal || !/^\d{8}$/.test(dniVal)) {
1436
+ if (jsonFlag)
1437
+ jsonError('invalid_input', 'Invalid DNI. Must be 8 digits.');
1438
+ console.error('Invalid DNI. Must be 8 digits.');
1439
+ process.exit(1);
1440
+ }
1441
+ const config = requireAuth();
1442
+ const res = await apiRequest(config, `${routePath}/dni/${dniVal}`);
1443
+ const data = await res.json();
1444
+ if (jsonFlag) {
1445
+ console.log(JSON.stringify(data));
1446
+ return;
1447
+ }
1448
+ for (const [k, v] of Object.entries(data)) {
1449
+ if (v && v !== '-')
1450
+ console.log(` ${k}: ${v}`);
1451
+ }
1452
+ return;
1453
+ }
1454
+ // Direct ID lookup
1455
+ const id = datasetArgs.find(a => !a.startsWith('--'));
1456
+ if (!id) {
1457
+ if (jsonFlag)
1458
+ jsonError('invalid_input', 'ID or --search <query> required.');
1459
+ console.error('ID or --search <query> required.');
1460
+ process.exit(1);
1461
+ }
1462
+ const config = loadConfig();
1463
+ // Demo mode for pe/sunat/padron
1464
+ if (!config && routePath === '/pe/sunat/padron' && /^\d{11}$/.test(id)) {
1465
+ const demo = demo_data_1.DEMO_DATA[id];
1466
+ if (demo) {
1467
+ if (jsonFlag) {
1468
+ console.log(JSON.stringify({ ...demo, _demo: true }));
1469
+ }
1470
+ else {
1471
+ for (const [k, v] of Object.entries(demo)) {
1472
+ if (v && v !== '-')
1473
+ console.log(` ${k}: ${v}`);
1474
+ }
1475
+ }
1476
+ process.stderr.write(`Demo data (${Object.keys(demo_data_1.DEMO_DATA).length} records). Run 'latinfo login' for full access.\n`);
1477
+ return;
1478
+ }
1479
+ if (jsonFlag)
1480
+ jsonError('not_found', "Not in demo data. Run 'latinfo login' for full access.");
1481
+ console.error("Not in demo data. Run 'latinfo login' for full access.");
1482
+ process.exit(1);
1483
+ }
1484
+ if (!config) {
1485
+ if (jsonFlag)
1486
+ jsonError('auth_required', "Lookup requires login. Run 'latinfo login'");
1487
+ console.error("Lookup requires login. Run 'latinfo login'");
1488
+ process.exit(1);
1489
+ }
1490
+ // Detect primary ID name from route path
1491
+ const idNames = {
1492
+ '/pe/sunat/padron': 'ruc', '/pe/sunat/coactiva': 'ruc',
1493
+ '/pe/osce/sanctioned': 'ruc', '/pe/osce/fines': 'ruc',
1494
+ '/co/rues/registry': 'nit',
1495
+ };
1496
+ const idName = idNames[routePath] || 'id';
1497
+ const res = await apiRequest(config, `${routePath}/${idName}/${id}`);
1498
+ const data = await res.json();
1499
+ if (jsonFlag) {
1500
+ console.log(JSON.stringify(data));
1501
+ return;
1502
+ }
1503
+ for (const [k, v] of Object.entries(data)) {
1504
+ if (v && v !== '-')
1505
+ console.log(` ${k}: ${v}`);
1506
+ }
1507
+ }
1508
+ // --- Admin: source management ---
1509
+ function getRepoPath() {
1510
+ const envPath = process.env.LATINFO_REPO_PATH;
1511
+ if (envPath)
1512
+ return envPath;
1513
+ // Try to detect from cwd
1514
+ const candidates = [
1515
+ process.cwd(),
1516
+ path_1.default.join(os_1.default.homedir(), 'Documents/Github/carrerahaus/latinfo-api'),
1517
+ path_1.default.join(os_1.default.homedir(), 'latinfo-api'),
1518
+ ];
1519
+ for (const p of candidates) {
1520
+ if (fs_1.default.existsSync(path_1.default.join(p, 'sources')) && fs_1.default.existsSync(path_1.default.join(p, 'src/imports')))
1521
+ return p;
1522
+ }
1523
+ console.error('Cannot find latinfo-api repo. Set LATINFO_REPO_PATH or run from repo dir.');
1524
+ process.exit(1);
1525
+ }
1526
+ function requireAdmin() {
1527
+ // 1. Env var
1528
+ if (process.env.LATINFO_ADMIN_SECRET)
1529
+ return process.env.LATINFO_ADMIN_SECRET;
1530
+ // 2. ~/.latinfo/admin.secret
1531
+ const secretFile = path_1.default.join(CONFIG_DIR, 'admin.secret');
1532
+ if (fs_1.default.existsSync(secretFile))
1533
+ return fs_1.default.readFileSync(secretFile, 'utf-8').trim();
1534
+ // 3. .dev.vars in repo
1535
+ try {
1536
+ const repo = getRepoPath();
1537
+ const devVars = path_1.default.join(repo, '.dev.vars');
1538
+ if (fs_1.default.existsSync(devVars)) {
1539
+ const match = fs_1.default.readFileSync(devVars, 'utf-8').match(/ADMIN_SECRET=(.+)/);
1540
+ if (match)
1541
+ return match[1].trim();
1542
+ }
1543
+ }
1544
+ catch { }
1545
+ console.error('Admin access not found. Create ~/.latinfo/admin.secret or set LATINFO_ADMIN_SECRET.');
1546
+ process.exit(1);
1547
+ }
1548
+ // --- Pipe: gate status tracking ---
1549
+ const PIPE_STATUS_DIR = path_1.default.join(CONFIG_DIR, 'pipe-status');
1550
+ function loadPipeStatus(source) {
1551
+ const file = path_1.default.join(PIPE_STATUS_DIR, `${source}.json`);
1552
+ try {
1553
+ return JSON.parse(fs_1.default.readFileSync(file, 'utf-8'));
1554
+ }
1555
+ catch {
1556
+ return { source };
1557
+ }
1558
+ }
1559
+ function savePipeStatus(status) {
1560
+ fs_1.default.mkdirSync(PIPE_STATUS_DIR, { recursive: true });
1561
+ fs_1.default.writeFileSync(path_1.default.join(PIPE_STATUS_DIR, `${status.source}.json`), JSON.stringify(status, null, 2));
1562
+ }
1563
+ function requireGate(status, gate, forGate) {
1564
+ if (!status[gate]?.passed) {
1565
+ console.error(`[pipe] Gate "${gate}" has not passed. Run: latinfo pipe ${gate} ${status.source}`);
1566
+ console.error(`[pipe] Cannot proceed to "${forGate}" until "${gate}" passes.`);
1567
+ process.exit(1);
1568
+ }
1569
+ }
1570
+ async function pipeCreate(args) {
1571
+ // Separate positional args from flags
1572
+ const positional = [];
1573
+ const flags = [];
1574
+ for (let i = 0; i < args.length; i++) {
1575
+ if (args[i].startsWith('--')) {
1576
+ flags.push(args[i], args[i + 1] || '');
1577
+ i++; // skip flag value
1578
+ }
1579
+ else {
1580
+ positional.push(args[i]);
1581
+ }
1582
+ }
1583
+ const [country, institution, dataset] = positional;
1584
+ if (!country || !institution || !dataset) {
1585
+ console.error(`Error: exactly 3 positional arguments required: <country> <institution> <dataset>
1586
+
1587
+ NAMING RULES
1588
+ Source name = {country}-{institution}-{dataset}
1589
+ All lowercase, hyphens only, english.
1590
+
1591
+ country: ISO 3166-1 alpha-2 (pe, co, br, mx, ec, ar, cl)
1592
+ institution: government agency abbreviation (sunat, osce, oece, rues, redam, sat, indecopi)
1593
+ dataset: what the data contains (padron, coactiva, sanctioned, fines, tenders, registry)
1594
+
1595
+ EXAMPLES
1596
+ latinfo pipe create pe sunat padron --url https://sunat.gob.pe/data.zip
1597
+ latinfo pipe create pe redam registry --url https://redam.pj.gob.pe/data --id-name dni --id-length 8
1598
+ latinfo pipe create co rues registry --url https://datos.gov.co/data.csv --id-name nit --id-length 10
1599
+
1600
+ WRONG
1601
+ latinfo pipe create pe redam ← missing dataset (3rd argument)
1602
+ latinfo pipe create pe-redam-registry ← don't use hyphens, use spaces
1603
+ latinfo pipe create pe redam deudores ← use english: "debtors" not "deudores"
1604
+
1605
+ FLAGS
1606
+ --url <url> Source data download URL
1607
+ --id-name <name> Primary ID field name (default: id)
1608
+ --id-length <n> Primary ID length in digits (default: 11)
1609
+ --encoding <enc> utf-8 | iso-8859-1 (default: utf-8)
1610
+ --delimiter <d> Field delimiter (default: ,)
1611
+ --format <fmt> csv | tsv | txt | xlsm (default: csv)`);
1612
+ process.exit(1);
1613
+ }
1614
+ // Validate country
1615
+ const validCountries = ['pe', 'co', 'br', 'mx', 'ec', 'ar', 'cl'];
1616
+ if (!validCountries.includes(country)) {
1617
+ console.error(`Error: invalid country "${country}". Must be one of: ${validCountries.join(', ')}`);
1618
+ process.exit(1);
1619
+ }
1620
+ // Validate no hyphens in parts
1621
+ if (institution.includes('-') || dataset.includes('-')) {
1622
+ console.error(`Error: institution and dataset must not contain hyphens. Use separate arguments.\n Wrong: latinfo admin create pe osce-sanctioned\n Right: latinfo admin create pe osce sanctioned`);
1623
+ process.exit(1);
1624
+ }
1625
+ // Validate lowercase english
1626
+ if (/[A-Z]/.test(institution + dataset)) {
1627
+ console.error(`Error: institution and dataset must be lowercase. Got: ${institution} ${dataset}`);
1628
+ process.exit(1);
1629
+ }
1630
+ const name = `${country}-${institution}-${dataset}`;
1631
+ const repo = getRepoPath();
1632
+ const yamlPath = path_1.default.join(repo, 'sources', `${name}.yaml`);
1633
+ if (fs_1.default.existsSync(yamlPath)) {
1634
+ console.error(`Source ${name} already exists: ${yamlPath}`);
1635
+ process.exit(1);
1636
+ }
1637
+ const getFlag = (flag) => {
1638
+ const idx = flags.indexOf(flag);
1639
+ return idx !== -1 ? flags[idx + 1] : undefined;
1640
+ };
1641
+ const url = getFlag('--url') || 'https://example.com/data.csv';
1642
+ const idName = getFlag('--id-name') || 'id';
1643
+ const idLength = getFlag('--id-length') || '11';
1644
+ const encoding = getFlag('--encoding') || 'utf-8';
1645
+ const delimiter = getFlag('--delimiter') || ',';
1646
+ const format = getFlag('--format') || 'csv';
1647
+ const yaml = `name: ${name}
1648
+ country: ${country}
1649
+ institution: ${institution}
1650
+ dataset: ${dataset}
1651
+ source: ${institution}-${dataset}
1652
+
1653
+ url: ${url}
1654
+ format: ${format}
1655
+ encoding: ${encoding}
1656
+ delimiter: "${delimiter}"
1657
+ skip_header: true
1658
+
1659
+ primary_id:
1660
+ name: ${idName}
1661
+ column: 0
1662
+ length: ${idLength}
1663
+ regex: "^\\\\d{${idLength}}$"
1664
+ prefix_length: 5
1665
+
1666
+ alternate_ids: []
1667
+
1668
+ fields:
1669
+ - name: name
1670
+ column: 1
1671
+ search: true
1672
+ - name: status
1673
+ column: 2
1674
+
1675
+ import: custom
1676
+ import_script: src/imports/${name}.ts
1677
+
1678
+ schedule: manual
1679
+ change_detection: none
1680
+ min_rows: 100
1681
+
1682
+ smoke_test:
1683
+ id: ""
1684
+ expect_field: name
1685
+ `;
1686
+ fs_1.default.writeFileSync(yamlPath, yaml);
1687
+ console.log(`Created: ${yamlPath}`);
1688
+ console.log(`\nNext steps:`);
1689
+ console.log(` 1. Edit ${yamlPath} to match your data source`);
1690
+ console.log(` 2. Write import script and upload: latinfo pipe script ${name} ./my-import.ts`);
1691
+ console.log(` 3. Add dependencies: latinfo pipe deps ${name} playwright ddddocr`);
1692
+ console.log(` 4. Test (100 records): latinfo pipe test ${name}`);
1693
+ console.log(` 5. Validate (all records): latinfo pipe validate ${name}`);
1694
+ console.log(` 6. Stage (Linux Mint bench): latinfo pipe stage ${name}`);
1695
+ console.log(` 7. Publish to production: latinfo pipe publish ${name}`);
1696
+ }
1697
+ async function pipeScript(args) {
1698
+ const [sourceName, scriptPath] = args;
1699
+ if (!sourceName || !scriptPath) {
1700
+ console.error('Usage: latinfo pipe script <source-name> <script.ts>');
1701
+ process.exit(1);
1702
+ }
1703
+ const repo = getRepoPath();
1704
+ const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
1705
+ if (!fs_1.default.existsSync(yamlPath)) {
1706
+ console.error(`Source not found. Run first: latinfo pipe create ...`);
1707
+ process.exit(1);
1708
+ }
1709
+ const dest = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1710
+ const src = path_1.default.resolve(scriptPath);
1711
+ if (!fs_1.default.existsSync(src)) {
1712
+ console.error(`Script not found: ${src}`);
1713
+ process.exit(1);
1714
+ }
1715
+ fs_1.default.copyFileSync(src, dest);
1716
+ console.log(`[pipe] Script copied: ${dest}`);
1717
+ // Reset gates (script changed, need to re-test)
1718
+ const status = loadPipeStatus(sourceName);
1719
+ delete status.test;
1720
+ delete status.validate;
1721
+ delete status.stage;
1722
+ delete status.publish;
1723
+ savePipeStatus(status);
1724
+ console.log(`[pipe] Gates reset — run: latinfo pipe test ${sourceName}`);
1725
+ }
1726
+ async function pipeDeps(args) {
1727
+ const [sourceName, ...deps] = args;
1728
+ if (!sourceName || deps.length === 0) {
1729
+ console.error('Usage: latinfo pipe deps <source-name> <pkg1> [pkg2] ...');
1730
+ process.exit(1);
1731
+ }
1732
+ const repo = getRepoPath();
1733
+ const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
1734
+ if (!fs_1.default.existsSync(yamlPath)) {
1735
+ console.error(`Source not found: ${yamlPath}`);
1736
+ process.exit(1);
1737
+ }
1738
+ // Add dependencies to YAML
1739
+ let yaml = fs_1.default.readFileSync(yamlPath, 'utf-8');
1740
+ if (yaml.includes('dependencies:')) {
1741
+ // Replace existing deps
1742
+ yaml = yaml.replace(/dependencies:[\s\S]*?(?=\n\w|\n$|$)/, `dependencies:\n${deps.map(d => ` - ${d}`).join('\n')}\n`);
1743
+ }
1744
+ else {
1745
+ yaml += `\ndependencies:\n${deps.map(d => ` - ${d}`).join('\n')}\n`;
1746
+ }
1747
+ fs_1.default.writeFileSync(yamlPath, yaml);
1748
+ // Install deps in repo
1749
+ console.log(`[pipe] Installing: ${deps.join(', ')}...`);
1750
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1751
+ try {
1752
+ run(`npm install ${deps.join(' ')}`, { cwd: repo, stdio: 'inherit' });
1753
+ console.log(`[pipe] Dependencies installed and added to YAML.`);
1754
+ }
1755
+ catch {
1756
+ console.error(`[pipe] Failed to install dependencies.`);
1757
+ process.exit(1);
1758
+ }
1759
+ }
1760
+ async function pipeTest(args) {
1761
+ const [sourceName] = args;
1762
+ if (!sourceName) {
1763
+ console.error('Usage: latinfo pipe test <source-name>');
1764
+ process.exit(1);
1765
+ }
1766
+ const repo = getRepoPath();
1767
+ const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
1768
+ if (!fs_1.default.existsSync(yamlPath)) {
1769
+ console.error(`Source not found: ${yamlPath}`);
1770
+ process.exit(1);
1771
+ }
1772
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1773
+ const status = loadPipeStatus(sourceName);
1774
+ const errors = [];
1775
+ // Install deps from YAML if present
1776
+ const yamlContent = fs_1.default.readFileSync(yamlPath, 'utf-8');
1777
+ const depsMatch = yamlContent.match(/dependencies:\n([\s\S]*?)(?=\n\w|\n$|$)/);
1778
+ if (depsMatch) {
1779
+ const deps = depsMatch[1].split('\n').map(l => l.replace(/^\s*-\s*/, '').trim()).filter(Boolean);
1780
+ if (deps.length > 0) {
1781
+ console.log(`[pipe] Installing dependencies: ${deps.join(', ')}...`);
1782
+ try {
1783
+ run(`npm install ${deps.join(' ')}`, { cwd: repo, stdio: 'pipe' });
1784
+ }
1785
+ catch { }
1786
+ }
1787
+ }
1788
+ // Run import with --limit 100
1789
+ const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1790
+ const easypipePath = path_1.default.join(repo, 'src', 'imports', 'easypipe.ts');
1791
+ const useEasypipe = !fs_1.default.existsSync(scriptPath);
1792
+ const cmd = useEasypipe
1793
+ ? `npx tsx ${easypipePath} ${yamlPath} --limit 100 --local`
1794
+ : `npx tsx ${scriptPath} --limit 100`;
1795
+ console.log(`[pipe] Gate 1: TEST (100 records)\n`);
1796
+ console.log(`Running: ${cmd}\n`);
1797
+ try {
1798
+ run(cmd, { stdio: 'inherit', cwd: repo });
1799
+ }
1800
+ catch {
1801
+ errors.push('Import script failed');
1802
+ }
1803
+ // Validate output files exist
1804
+ const outDir = `/tmp/${sourceName}-import`;
1805
+ if (errors.length === 0) {
1806
+ const binFiles = fs_1.default.readdirSync(outDir || '/tmp').filter(f => f.startsWith(sourceName) && f.endsWith('.bin'));
1807
+ if (binFiles.length === 0)
1808
+ errors.push('No .bin files generated');
1809
+ const idxFile = path_1.default.join(outDir, `${sourceName}.idx`);
1810
+ if (!fs_1.default.existsSync(idxFile) && !fs_1.default.existsSync(`/tmp/${sourceName}.idx`)) {
1811
+ // Check R2 upload happened (for non-local mode)
1812
+ }
1813
+ // Check for V2 search index
1814
+ const searchIdx = fs_1.default.readdirSync('/tmp').filter(f => f.includes(sourceName) && f.includes('search.idx'));
1815
+ if (searchIdx.length === 0)
1816
+ errors.push('No V2 search index generated — use statusFieldIndex in buildSearchIndex');
1817
+ // Check for MPHF
1818
+ const mphf = fs_1.default.readdirSync('/tmp').filter(f => f.includes(sourceName) && f.includes('.mphf'));
1819
+ if (mphf.length === 0)
1820
+ errors.push('No MPHF generated — call buildMphfFromIdx after buildSearchIndex');
1821
+ }
1822
+ if (errors.length > 0) {
1823
+ console.error(`\n[pipe] Gate 1 FAILED:`);
1824
+ for (const e of errors)
1825
+ console.error(` ✗ ${e}`);
1826
+ status.test = { passed: false, timestamp: new Date().toISOString(), errors };
1827
+ savePipeStatus(status);
1828
+ process.exit(1);
1829
+ }
1830
+ console.log(`\n[pipe] Gate 1 PASSED ✓`);
1831
+ console.log(`[pipe] Next: latinfo pipe validate ${sourceName}`);
1832
+ status.test = { passed: true, timestamp: new Date().toISOString(), records: 100 };
1833
+ savePipeStatus(status);
1834
+ }
1835
+ async function pipeValidate(args) {
1836
+ const [sourceName] = args;
1837
+ if (!sourceName) {
1838
+ console.error('Usage: latinfo pipe validate <source-name>');
1839
+ process.exit(1);
1840
+ }
1841
+ const status = loadPipeStatus(sourceName);
1842
+ requireGate(status, 'test', 'validate');
1843
+ const repo = getRepoPath();
1844
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1845
+ const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1846
+ const easypipePath = path_1.default.join(repo, 'src', 'imports', 'easypipe.ts');
1847
+ const useEasypipe = !fs_1.default.existsSync(scriptPath);
1848
+ const cmd = useEasypipe
1849
+ ? `npx tsx ${easypipePath} ${path_1.default.join(repo, 'sources', `${sourceName}.yaml`)} --local`
1850
+ : `npx tsx ${scriptPath}`;
1851
+ console.log(`[pipe] Gate 2: VALIDATE (full import)\n`);
1852
+ console.log(`Running: ${cmd}\n`);
1853
+ try {
1854
+ const output = run(cmd, { cwd: repo, stdio: 'inherit', encoding: 'utf-8' });
1855
+ console.log(`\n[pipe] Gate 2 PASSED ✓`);
1856
+ console.log(`[pipe] Next: latinfo pipe stage ${sourceName}`);
1857
+ status.validate = { passed: true, timestamp: new Date().toISOString() };
1858
+ savePipeStatus(status);
1859
+ }
1860
+ catch {
1861
+ console.error(`\n[pipe] Gate 2 FAILED — full import crashed`);
1862
+ status.validate = { passed: false, timestamp: new Date().toISOString(), errors: ['Import failed'] };
1863
+ savePipeStatus(status);
1864
+ process.exit(1);
1865
+ }
1866
+ }
1867
+ async function pipeStage(args) {
1868
+ const [sourceName] = args;
1869
+ if (!sourceName) {
1870
+ console.error('Usage: latinfo pipe stage <source-name>');
1871
+ process.exit(1);
1872
+ }
1873
+ const status = loadPipeStatus(sourceName);
1874
+ requireGate(status, 'validate', 'stage');
1875
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1876
+ const RUNNER = 'f3mt0@100.109.82.87';
1877
+ console.log(`[pipe] Gate 3: STAGE (Linux Mint — import + bench)\n`);
1878
+ // 1. Copy script + YAML to runner
1879
+ const repo = getRepoPath();
1880
+ console.log('[pipe] Syncing repo on Linux Mint...');
1881
+ try {
1882
+ run(`ssh ${RUNNER} "cd ~/actions-runner/_work/latinfo-api/latinfo-api && git pull"`, { stdio: 'inherit' });
1883
+ }
1884
+ catch {
1885
+ console.error('[pipe] SSH failed. Is Linux Mint running? Check: ssh f3mt0@100.109.82.87');
1886
+ process.exit(1);
1887
+ }
1888
+ // 2. Run import on Linux Mint
1889
+ const scriptPath = `src/imports/${sourceName}.ts`;
1890
+ console.log(`[pipe] Running import on Linux Mint...`);
1891
+ try {
1892
+ run(`ssh ${RUNNER} "cd ~/actions-runner/_work/latinfo-api/latinfo-api && npx tsx ${scriptPath}"`, {
1893
+ stdio: 'inherit', timeout: 600_000,
1894
+ });
1895
+ }
1896
+ catch {
1897
+ console.error('[pipe] Import failed on Linux Mint');
1898
+ status.stage = { passed: false, timestamp: new Date().toISOString(), errors: ['Import failed on runner'] };
1899
+ savePipeStatus(status);
1900
+ process.exit(1);
1901
+ }
1902
+ // 3. Bench: 500 concurrent on Linux Mint search server
1903
+ console.log(`\n[pipe] Running bench (500 concurrent)...`);
1904
+ try {
1905
+ const benchResult = run(`ssh ${RUNNER} "curl -s 'http://localhost:3001/search?source=${sourceName}&q=test'" `, {
1906
+ encoding: 'utf-8', stdio: 'pipe',
1907
+ });
1908
+ // If search server responds, run bench
1909
+ const benchOutput = run(`ssh ${RUNNER} "node -e \\"
1910
+ const TOTAL=500, CONC=500;
1911
+ const queries=['test','banco','empresa','servicios','construccion','transporte','grupo','sociedad','comercial','industrial'];
1912
+ let idx=0,success=0,fails=0;const lats=[];
1913
+ function go(){if(idx>=TOTAL)return Promise.resolve();const i=idx++,q=queries[i%queries.length],t0=Date.now();
1914
+ return fetch('http://localhost:3001/search?source=${sourceName}&q='+encodeURIComponent(q))
1915
+ .then(r=>r.json()).then(d=>{lats.push(Date.now()-t0);d.results&&d.results.length>0?success++:success++}).catch(()=>{lats.push(Date.now()-t0);fails++}).then(()=>go());}
1916
+ const t0=Date.now();
1917
+ Promise.all(Array.from({length:CONC},()=>go())).then(()=>{
1918
+ lats.sort((a,b)=>a-b);
1919
+ console.log(JSON.stringify({total_ms:Date.now()-t0,success,fails,qps:Math.round(TOTAL/((Date.now()-t0)/1000)),
1920
+ p50:lats[Math.floor(lats.length*0.5)],p95:lats[Math.floor(lats.length*0.95)],p99:lats[Math.floor(lats.length*0.99)]}));
1921
+ });\\"" `, { encoding: 'utf-8', stdio: 'pipe', timeout: 60_000 });
1922
+ const bench = JSON.parse(benchOutput.trim());
1923
+ const successRate = ((bench.success) / 500 * 100);
1924
+ console.log(`\n 500 concurrent: ${bench.qps} q/s, ${successRate.toFixed(1)}% success`);
1925
+ console.log(` p50: ${bench.p50}ms p95: ${bench.p95}ms p99: ${bench.p99}ms`);
1926
+ console.log(` Failures: ${bench.fails}`);
1927
+ if (successRate < 99.9) {
1928
+ console.error(`\n[pipe] Gate 3 FAILED — success rate ${successRate.toFixed(1)}% < 99.9%`);
1929
+ status.stage = { passed: false, timestamp: new Date().toISOString(), bench: { concurrent: 500, success_rate: successRate, p50: bench.p50, p95: bench.p95, p99: bench.p99 } };
1930
+ savePipeStatus(status);
1931
+ process.exit(1);
1932
+ }
1933
+ console.log(`\n[pipe] Gate 3 PASSED ✓`);
1934
+ console.log(`[pipe] Next: latinfo pipe publish ${sourceName}`);
1935
+ status.stage = { passed: true, timestamp: new Date().toISOString(), bench: { concurrent: 500, success_rate: successRate, p50: bench.p50, p95: bench.p95, p99: bench.p99 } };
1936
+ savePipeStatus(status);
1937
+ }
1938
+ catch (e) {
1939
+ console.log(`[pipe] Search server not running on Linux Mint — skipping bench`);
1940
+ console.log(`[pipe] Gate 3 PASSED ✓ (import only, no bench)`);
1941
+ status.stage = { passed: true, timestamp: new Date().toISOString() };
1942
+ savePipeStatus(status);
1943
+ }
1944
+ }
1945
+ async function pipePublish(args) {
1946
+ const [sourceName] = args;
1947
+ if (!sourceName) {
1948
+ console.error('Usage: latinfo pipe publish <source-name>');
1949
+ process.exit(1);
1950
+ }
1951
+ const status = loadPipeStatus(sourceName);
1952
+ requireGate(status, 'test', 'publish');
1953
+ requireGate(status, 'validate', 'publish');
1954
+ requireGate(status, 'stage', 'publish');
1955
+ const repo = getRepoPath();
1956
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
1957
+ const RUNNER = 'f3mt0@100.109.82.87';
1958
+ console.log(`[pipe] Gate 4: PUBLISH\n`);
1959
+ // 1. Git add + commit + push
1960
+ console.log(`[pipe] Committing to repo...`);
1961
+ const files = [`sources/${sourceName}.yaml`];
1962
+ const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
1963
+ if (fs_1.default.existsSync(scriptPath))
1964
+ files.push(`src/imports/${sourceName}.ts`);
1965
+ try {
1966
+ run(`git add ${files.join(' ')} src/sources.ts .github/workflows/import.yml`, { cwd: repo, stdio: 'pipe' });
1967
+ run(`git commit -m "Add data source: ${sourceName}"`, { cwd: repo, stdio: 'pipe' });
1968
+ run(`git push`, { cwd: repo, stdio: 'pipe' });
1969
+ console.log(`[pipe] Pushed to remote.`);
1970
+ }
1971
+ catch (e) {
1972
+ console.error(`[pipe] Git error: ${e.message}`);
1973
+ process.exit(1);
1974
+ }
1975
+ // 2. Deploy Worker
1976
+ console.log(`[pipe] Deploying Worker...`);
1977
+ try {
1978
+ run(`npx wrangler deploy`, { cwd: repo, stdio: 'inherit' });
1979
+ }
1980
+ catch {
1981
+ console.error(`[pipe] Deploy failed — rolling back`);
1982
+ run(`git revert HEAD --no-edit && git push`, { cwd: repo, stdio: 'pipe' });
1983
+ process.exit(1);
1984
+ }
1985
+ // 3. Trigger import on runner
1986
+ console.log(`[pipe] Triggering import...`);
1987
+ try {
1988
+ run(`gh workflow run import.yml -f source=${sourceName}`, { cwd: repo, stdio: 'inherit' });
1989
+ }
1990
+ catch {
1991
+ console.log(`[pipe] Could not trigger workflow automatically.`);
1992
+ }
1993
+ // 4. Restart search server
1994
+ console.log(`[pipe] Restarting search server on Linux Mint...`);
1995
+ try {
1996
+ run(`ssh ${RUNNER} "sudo systemctl restart search-server 2>/dev/null || echo 'No service yet'"`, { stdio: 'inherit' });
1997
+ }
1998
+ catch { }
1999
+ console.log(`\n[pipe] Gate 4 PASSED ✓`);
2000
+ console.log(`[pipe] ${sourceName} is LIVE`);
2001
+ console.log(` API: https://api.latinfo.dev/${sourceName.replace(/-/g, '/')}/`);
2002
+ console.log(` CLI: latinfo ${sourceName.replace(/-/g, ' ')}`);
2003
+ status.publish = { passed: true, timestamp: new Date().toISOString() };
2004
+ savePipeStatus(status);
2005
+ }
2006
+ async function pipeStatus(args) {
2007
+ const [sourceName] = args;
2008
+ if (sourceName) {
2009
+ const status = loadPipeStatus(sourceName);
2010
+ const gates = ['test', 'validate', 'stage', 'publish'];
2011
+ console.log(`Source: ${sourceName}\n`);
2012
+ for (const gate of gates) {
2013
+ const g = status[gate];
2014
+ if (!g) {
2015
+ console.log(` ${gate}: ⬚ not run`);
2016
+ continue;
2017
+ }
2018
+ const icon = g.passed ? '✓' : '✗';
2019
+ const extra = g.bench ? ` (${g.bench.concurrent} concurrent, ${g.bench.success_rate.toFixed(1)}%, p99: ${g.bench.p99}ms)` : '';
2020
+ const records = g.records ? ` (${g.records} records)` : '';
2021
+ console.log(` ${gate}: ${icon} ${g.timestamp}${records}${extra}`);
2022
+ if (g.errors)
2023
+ for (const e of g.errors)
2024
+ console.log(` ✗ ${e}`);
2025
+ }
2026
+ }
2027
+ else {
2028
+ // List all sources with status
2029
+ if (!fs_1.default.existsSync(PIPE_STATUS_DIR)) {
2030
+ console.log('No sources tracked yet.');
2031
+ return;
2032
+ }
2033
+ const files = fs_1.default.readdirSync(PIPE_STATUS_DIR).filter(f => f.endsWith('.json'));
2034
+ for (const f of files) {
2035
+ const s = JSON.parse(fs_1.default.readFileSync(path_1.default.join(PIPE_STATUS_DIR, f), 'utf-8'));
2036
+ const gates = ['test', 'validate', 'stage', 'publish'];
2037
+ const icons = gates.map(g => s[g]?.passed ? '✓' : s[g] ? '✗' : '⬚').join('');
2038
+ console.log(` ${s.source} [${icons}]`);
2039
+ }
2040
+ }
2041
+ }
2042
+ async function pipe(args) {
2043
+ requireAdmin();
2044
+ const [subcommand, ...subArgs] = args;
2045
+ switch (subcommand) {
2046
+ case 'create':
2047
+ await pipeCreate(subArgs);
2048
+ break;
2049
+ case 'script':
2050
+ await pipeScript(subArgs);
2051
+ break;
2052
+ case 'deps':
2053
+ await pipeDeps(subArgs);
2054
+ break;
2055
+ case 'test':
2056
+ await pipeTest(subArgs);
2057
+ break;
2058
+ case 'validate':
2059
+ await pipeValidate(subArgs);
2060
+ break;
2061
+ case 'stage':
2062
+ await pipeStage(subArgs);
2063
+ break;
2064
+ case 'publish':
2065
+ await pipePublish(subArgs);
2066
+ break;
2067
+ case 'status':
2068
+ await pipeStatus(subArgs);
2069
+ break;
2070
+ case 'list':
2071
+ const repo = getRepoPath();
2072
+ const yamls = fs_1.default.readdirSync(path_1.default.join(repo, 'sources')).filter(f => f.endsWith('.yaml'));
2073
+ for (const y of yamls)
2074
+ console.log(` ${y.replace('.yaml', '')}`);
2075
+ break;
2076
+ case 'run':
2077
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
2078
+ try {
2079
+ run(`gh workflow run import.yml -f source=${subArgs[0]}`, { cwd: getRepoPath(), stdio: 'inherit' });
2080
+ }
2081
+ catch {
2082
+ console.error('Failed to trigger import.');
2083
+ }
2084
+ break;
2085
+ default:
2086
+ console.log(`latinfo pipe — data pipeline for public government records
2087
+
2088
+ DO NOT use SQLite, Supabase, Postgres, or any other database.
2089
+ latinfo handles storage, indexing, full-text search, and API serving automatically.
2090
+ Your only job is to download and parse the source data into a TSV file.
2091
+
2092
+ COMMANDS
2093
+ create <country> <institution> <dataset> [flags] Create source (YAML template)
2094
+ script <source> <file.ts> Upload import script
2095
+ deps <source> <pkg1> [pkg2] ... Add npm dependencies
2096
+ test <source> Gate 1: test 100 records locally
2097
+ validate <source> Gate 2: full import locally
2098
+ stage <source> Gate 3: import + 500 bench on Linux Mint
2099
+ publish <source> Gate 4: deploy to production
2100
+ status [source] Show gate status
2101
+ list List all sources
2102
+ run <source> Re-run import (existing source)
2103
+
2104
+ GATES (each must pass before the next unlocks)
2105
+ test → 100 records, validates IDs, encoding, V2 search, MPHF
2106
+ validate → full import, all records, field validation
2107
+ stage → Linux Mint: import + 500 concurrent bench (99.9% required)
2108
+ publish → production: deploy + smoke test + bench + rollback on failure
2109
+
2110
+ WORKFLOW
2111
+ 1. latinfo pipe create pe redam registry --url https://redam.pj.gob.pe/... --id-name dni --id-length 8
2112
+ 2. Write your import script (fetch, Playwright, crawler — anything that produces TSV)
2113
+ 3. latinfo pipe script pe-redam-registry ./my-crawler.ts
2114
+ 4. latinfo pipe deps pe-redam-registry playwright ddddocr
2115
+ 5. latinfo pipe test pe-redam-registry
2116
+ 6. latinfo pipe validate pe-redam-registry
2117
+ 7. latinfo pipe stage pe-redam-registry
2118
+ 8. latinfo pipe publish pe-redam-registry
2119
+
2120
+ SCRIPT REQUIREMENTS
2121
+ Your script must produce a sorted TSV and upload to R2:
2122
+ 1. Download source data (any method)
2123
+ 2. Parse to TSV: ID\\tfield1\\tfield2\\t...
2124
+ 3. Sort: LC_ALL=C sort -t'\\t' -k1,1
2125
+ 4. buildBinaryFiles() — generates .bin + .idx
2126
+ 5. buildSearchIndex() with statusFieldIndex (V2 MANDATORY)
2127
+ 6. buildMphfFromIdx() (MPHF MANDATORY)
2128
+ 7. uploadToR2() for each file
2129
+ 8. saveImportMeta()
2130
+
2131
+ See SOURCES.md for full template. See src/imports/pe-osce-sanctioned.ts for example.
2132
+
2133
+ NAMING
2134
+ {country}-{institution}-{dataset}, all lowercase english.
2135
+ Examples: pe-sunat-padron, pe-osce-sanctioned, co-rues-registry
2136
+
2137
+ ENVIRONMENT
2138
+ LATINFO_ADMIN_SECRET Auto-detected from ~/.latinfo/admin.secret or .dev.vars
2139
+ LATINFO_REPO_PATH Auto-detected from cwd`);
2140
+ }
2141
+ }
1412
2142
  // --- Main ---
1413
2143
  const [command, ...args] = rawArgs;
2144
+ const COUNTRIES = ['pe', 'co', 'br', 'mx', 'ar', 'cl', 'ec'];
1414
2145
  if (rawArgs.includes('--version') || rawArgs.includes('-v')) {
1415
2146
  version();
1416
2147
  }
2148
+ else if (COUNTRIES.includes(command)) {
2149
+ // New structure: latinfo <country> <institution> <dataset> [args]
2150
+ const [institution, dataset, ...datasetArgs] = args;
2151
+ if (!institution || !dataset) {
2152
+ console.error(`Usage: latinfo ${command} <institution> <dataset> <id|--search query|--dni id>`);
2153
+ console.error(`Example: latinfo ${command} sunat padron 20100047218`);
2154
+ process.exit(1);
2155
+ }
2156
+ // Special case: pe oece tenders → licitaciones (custom routes)
2157
+ if (command === 'pe' && institution === 'oece' && dataset === 'tenders') {
2158
+ licitaciones(datasetArgs).catch(e => { console.error(e); process.exit(1); });
2159
+ }
2160
+ else {
2161
+ const routePath = `/${command}/${institution}/${dataset}`;
2162
+ sourceQuery(routePath, datasetArgs).catch(e => { console.error(e); process.exit(1); });
2163
+ }
2164
+ }
1417
2165
  else {
2166
+ // Admin commands (flat)
1418
2167
  switch (command) {
1419
2168
  case 'login':
1420
2169
  login(tokenFlag).catch(e => { console.error(e); process.exit(1); });
@@ -1442,25 +2191,15 @@ else {
1442
2191
  case 'costs':
1443
2192
  (liveFlag ? costsLive() : Promise.resolve(costsSimulate(args[0], args[1], args[2]))).catch(e => { console.error(e); process.exit(1); });
1444
2193
  break;
1445
- case 'ruc':
1446
- ruc(args[0]).catch(e => { console.error(e); process.exit(1); });
1447
- break;
1448
- case 'dni':
1449
- dni(args[0]).catch(e => { console.error(e); process.exit(1); });
1450
- break;
1451
- case 'search':
1452
- search(args.join(' ')).catch(e => { console.error(e); process.exit(1); });
1453
- break;
1454
- case 'debtors':
1455
- debtors(args).catch(e => { console.error(e); process.exit(1); });
1456
- break;
1457
- case 'licitaciones':
1458
- case 'lic':
1459
- licitaciones(args).catch(e => { console.error(e); process.exit(1); });
1460
- break;
1461
2194
  case 'bench':
1462
2195
  bench(args).catch(e => { console.error(e); process.exit(1); });
1463
2196
  break;
2197
+ case 'pipe':
2198
+ pipe(args).catch(e => { console.error(e); process.exit(1); });
2199
+ break;
2200
+ case 'admin':
2201
+ pipe(args).catch(e => { console.error(e); process.exit(1); });
2202
+ break; // backward compat
1464
2203
  case 'easypipe':
1465
2204
  case 'ep':
1466
2205
  easypipe(args).catch(e => { console.error(e); process.exit(1); });
@@ -1471,6 +2210,23 @@ else {
1471
2210
  case 'help':
1472
2211
  help();
1473
2212
  break;
2213
+ // Backward compat: old flat commands redirect
2214
+ case 'ruc':
2215
+ sourceQuery('/pe/sunat/padron', args).catch(e => { console.error(e); process.exit(1); });
2216
+ break;
2217
+ case 'dni':
2218
+ sourceQuery('/pe/sunat/padron', ['--dni', ...args]).catch(e => { console.error(e); process.exit(1); });
2219
+ break;
2220
+ case 'search':
2221
+ search(args.join(' ')).catch(e => { console.error(e); process.exit(1); });
2222
+ break;
2223
+ case 'debtors':
2224
+ sourceQuery('/pe/sunat/coactiva', args).catch(e => { console.error(e); process.exit(1); });
2225
+ break;
2226
+ case 'licitaciones':
2227
+ case 'lic':
2228
+ licitaciones(args).catch(e => { console.error(e); process.exit(1); });
2229
+ break;
1474
2230
  default:
1475
2231
  printLogo();
1476
2232
  help();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "latinfo",
3
- "version": "0.9.0",
3
+ "version": "0.11.0",
4
4
  "description": "Tax registry & procurement API for Latin America. Query RUC, DNI, NIT, licitaciones from Peru & Colombia. Offline MPHF search, full OCDS data, updated daily.",
5
5
  "homepage": "https://latinfo.dev",
6
6
  "repository": {