latinfo 0.19.2 → 0.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -63,6 +63,10 @@ function jsonError(error, message) {
63
63
  process.exit(1);
64
64
  }
65
65
  function loadConfig() {
66
+ // Env var takes priority — zero friction for agents
67
+ if (process.env.LATINFO_API_KEY) {
68
+ return { api_key: process.env.LATINFO_API_KEY, github_username: '', is_team: true, team_role: 'member' };
69
+ }
66
70
  try {
67
71
  return JSON.parse(fs_1.default.readFileSync(CONFIG_FILE, 'utf-8'));
68
72
  }
@@ -169,8 +173,19 @@ async function login(token) {
169
173
  process.exit(1);
170
174
  }
171
175
  const authData = await authRes.json();
172
- saveConfig({ api_key: authData.api_key, github_username: authData.github_username });
173
- console.log(`Logged in as ${authData.github_username}`);
176
+ const config = { api_key: authData.api_key, github_username: authData.github_username };
177
+ // Check team membership
178
+ try {
179
+ const teamRes = await fetch(`${API_URL}/team/me`, { headers: { Authorization: `Bearer ${config.api_key}` } });
180
+ if (teamRes.ok) {
181
+ const team = await teamRes.json();
182
+ config.is_team = true;
183
+ config.team_role = team.role;
184
+ }
185
+ }
186
+ catch { }
187
+ saveConfig(config);
188
+ console.log(`Logged in as ${authData.github_username}${config.is_team ? ` (team: ${config.team_role})` : ''}`);
174
189
  return;
175
190
  }
176
191
  // OAuth login: opens browser
@@ -194,8 +209,19 @@ async function login(token) {
194
209
  process.exit(1);
195
210
  }
196
211
  const authData = await authRes.json();
197
- saveConfig({ api_key: authData.api_key, github_username: authData.github_username });
198
- console.log(`Logged in as ${authData.github_username}`);
212
+ const config = { api_key: authData.api_key, github_username: authData.github_username };
213
+ // Check team membership
214
+ try {
215
+ const teamRes = await fetch(`${API_URL}/team/me`, { headers: { Authorization: `Bearer ${config.api_key}` } });
216
+ if (teamRes.ok) {
217
+ const team = await teamRes.json();
218
+ config.is_team = true;
219
+ config.team_role = team.role;
220
+ }
221
+ }
222
+ catch { }
223
+ saveConfig(config);
224
+ console.log(`Logged in as ${authData.github_username}${config.is_team ? ` (team: ${config.team_role})` : ''}`);
199
225
  }
200
226
  async function ruc(rucNumber) {
201
227
  if (!rucNumber || !/^\d{11}$/.test(rucNumber)) {
@@ -477,10 +503,17 @@ async function search(query) {
477
503
  function whoami() {
478
504
  const config = requireAuth();
479
505
  if (jsonFlag) {
480
- console.log(JSON.stringify({ username: config.github_username, api_key: config.api_key }));
506
+ console.log(JSON.stringify({ username: config.github_username, api_key: config.api_key, is_team: config.is_team, team_role: config.team_role }));
481
507
  return;
482
508
  }
483
- console.log(config.github_username);
509
+ if (config.is_team) {
510
+ const badges = { admin: '★', member: '●' };
511
+ const badge = badges[config.team_role || 'member'] || '●';
512
+ console.log(`${badge} ${config.github_username} [TEAM ${(config.team_role || 'member').toUpperCase()}]`);
513
+ }
514
+ else {
515
+ console.log(config.github_username);
516
+ }
484
517
  }
485
518
  async function plan() {
486
519
  const config = requireAuth();
@@ -553,7 +586,7 @@ async function adminRequest(path) {
553
586
  return res;
554
587
  }
555
588
  async function importsRun(source) {
556
- const valid = ['pe-sunat-padron', 'pe-oece-licitaciones', 'co-rues', 'all'];
589
+ const valid = ['pe-sunat-padron', 'pe-oece-tenders', 'co-rues', 'all'];
557
590
  if (!valid.includes(source)) {
558
591
  console.error(`Unknown source. Valid: ${valid.join(', ')}`);
559
592
  process.exit(1);
@@ -907,9 +940,12 @@ async function seedBenchQueries(source, apiKey) {
907
940
  }
908
941
  // Non-existent search queries (edge case)
909
942
  source.searchQueries.push('xyznonexistent', 'qqq999', 'zzznodata');
910
- // Fallback: if API returned nothing, use seed queries
911
- if (source.searchQueries.length < 5) {
912
- source.searchQueries.push(...SEED_QUERIES);
943
+ // Fallback: if API returned nothing, skip search queries entirely.
944
+ // Using generic seed queries on a source with different data (e.g. person names
945
+ // vs business names) causes the search server to process irrelevant queries
946
+ // under 500 concurrent load, leading to timeouts and false bench failures.
947
+ if (source.searchQueries.filter(q => !['xyznonexistent', 'qqq999', 'zzznodata'].includes(q)).length === 0) {
948
+ source.searchQueries = [];
913
949
  }
914
950
  }
915
951
  async function benchStress(args) {
@@ -1447,11 +1483,12 @@ function logout() {
1447
1483
  console.log('Logged out.');
1448
1484
  }
1449
1485
  function help() {
1486
+ const config = loadConfig();
1487
+ const isTeam = config?.is_team;
1450
1488
  console.log(`latinfo v${VERSION} — Tax registry API for Latin America
1451
1489
 
1452
1490
  USAGE
1453
1491
  latinfo <country> <institution> <dataset> <id|--search query|--dni id> [--json]
1454
- latinfo <admin-command> [args]
1455
1492
 
1456
1493
  QUICK START
1457
1494
  npm install -g latinfo
@@ -1474,25 +1511,26 @@ DATA SOURCES
1474
1511
  latinfo pe osce fines <ruc> Provider fines
1475
1512
  latinfo pe osce fines --search <query>
1476
1513
 
1514
+ Peru — SERVIR
1515
+ latinfo pe servir sanctions <dni> Public sector sanctions
1516
+ latinfo pe servir sanctions --search <query>
1517
+
1518
+ Peru — REDAM
1519
+ latinfo pe redam registry <dni> Food debt debtors
1520
+ latinfo pe redam registry --search <query>
1521
+
1477
1522
  Peru — OECE
1478
1523
  latinfo pe oece tenders <query> [flags] Government procurement
1479
1524
  Flags: --category, --min-amount, --max-amount, --buyer, --status, --limit
1480
1525
 
1481
1526
  Colombia — RUES
1482
- latinfo co rues registry <nit> Business registry (3.3M records)
1527
+ latinfo co rues registry <nit> Business registry (9M+ records)
1483
1528
  latinfo co rues registry --search <query>
1484
1529
 
1485
- ADMIN
1530
+ COMMANDS
1486
1531
  login [--token <github_pat>] GitHub OAuth or PAT login
1487
1532
  logout Remove credentials
1488
1533
  whoami Show authenticated user
1489
- imports Show import status
1490
- imports run <source> Trigger import
1491
- imports report [days] Import diagnostics
1492
- costs <users> [avg_req] [pro_%] Cost simulation
1493
- costs --live Production cost report
1494
- bench [flags] Stress test API
1495
- easypipe <command> Generic import pipeline
1496
1534
  completion [bash|zsh] Shell completions
1497
1535
  help This help text
1498
1536
 
@@ -1502,13 +1540,34 @@ FLAGS
1502
1540
  --dni Lookup by DNI (Peru only)
1503
1541
  --version Print version
1504
1542
 
1505
- PRICING
1506
- Free 100,000 requests/day
1507
- Pro 10M requests/month $1/month
1508
-
1509
- CONFIG
1510
- ~/.latinfo/config.json API key
1511
- LATINFO_API_URL Override API URL`);
1543
+ Free and unlimited. No credit card needed.`);
1544
+ if (isTeam) {
1545
+ const isAdmin = config?.team_role === 'admin';
1546
+ console.log(`
1547
+ TEAM
1548
+ tasks My tasks
1549
+ tasks complete <id> Mark done
1550
+ tasks rank Team ranking
1551
+ pipe local <source> Import data locally
1552
+ pipe publish <source> Publish to production
1553
+ docs <topic> Internal documentation`);
1554
+ if (isAdmin) {
1555
+ console.log(`
1556
+ ADMIN
1557
+ team add <username> [--admin] Add team member
1558
+ team remove <username> Remove member
1559
+ team list List all members
1560
+ tasks assign <user> "<title>" [--points N] Assign task
1561
+ tasks approve <id> Approve + award points
1562
+ tasks reject <id> "<reason>" Reject back
1563
+ tasks delete <id> Delete task
1564
+ tasks list --all All tasks
1565
+ imports Show import status
1566
+ imports run <source> Trigger import
1567
+ bench [flags] Stress test API
1568
+ costs --live Production cost report`);
1569
+ }
1570
+ }
1512
1571
  }
1513
1572
  function printLogo() {
1514
1573
  if (!process.stdout.isTTY)
@@ -1903,17 +1962,165 @@ min_rows: 100
1903
1962
  smoke_test:
1904
1963
  id: ""
1905
1964
  expect_field: name
1965
+ `;
1966
+ // Derive script template parameters
1967
+ const idLengthNum = parseInt(idLength);
1968
+ const prefixLength = idLengthNum >= 11 ? 5 : 4;
1969
+ const idRegex = `^\\\\d{${idLengthNum}}$`;
1970
+ // camelCase function name: pe-redam-registry → importPeRedamRegistry
1971
+ const fnName = 'import' + name.split('-').map(s => s[0].toUpperCase() + s.slice(1)).join('');
1972
+ const scriptPath = path_1.default.join(repo, 'src', 'imports', `${name}.ts`);
1973
+ const script = `/**
1974
+ * Import ${name} into R2
1975
+ *
1976
+ * Source: ${url}
1977
+ *
1978
+ * TSV columns: ${idName} \\t name \\t status \\t [add more fields here]
1979
+ *
1980
+ * Usage: npx tsx src/imports/${name}.ts [--limit 100] [--no-upload]
1981
+ */
1982
+
1983
+ import * as fs from 'fs';
1984
+ import * as path from 'path';
1985
+ import { execSync } from 'child_process';
1986
+ import { buildBinaryFiles } from './build-binary';
1987
+ import { buildSearchIndex } from './build-search-index';
1988
+ import { uploadToR2, saveImportMeta, buildMphfFromIdx } from './shared';
1989
+
1990
+ const SOURCE = '${name}';
1991
+ const TEMP_DIR = \`/tmp/\${SOURCE}-import\`;
1992
+
1993
+ // ─── FIELD LAYOUT ────────────────────────────────────────────────────────────
1994
+ // Update FIELD_COUNT to match the number of columns after the ID.
1995
+ // searchFieldIndex: which field (0-based, after ID) contains the searchable name
1996
+ // statusFieldIndex: which field contains the active/inactive status
1997
+ const FIELD_COUNT = 2; // name, status ← update as you add fields
1998
+ const SEARCH_FIELD = 0; // 0 = name
1999
+ const STATUS_FIELD = 1; // 1 = status
2000
+
2001
+ // ─── TODO: fetch and parse your source data ──────────────────────────────────
2002
+ // Return rows as arrays: [id, name, status, ...other fields]
2003
+ // Each string value must NOT contain tabs or newlines.
2004
+ //
2005
+ // Common patterns:
2006
+ // CSV download → fetch(url), text.split('\\n'), line.split(',')
2007
+ // REST API → fetch(url, { method: 'POST', body: JSON.stringify({...}) })
2008
+ // ZIP file → execSync('curl -L url | funzip > file.csv')
2009
+ // Playwright → await page.goto(url); await page.$$eval(...)
2010
+ //
2011
+ // RECOMMENDED: export async function checkFresh(lastMeta): Promise<boolean>
2012
+ // Called before the import to skip if data hasn't changed.
2013
+ // REST APIs: fetch 1 record, compare max ID.
2014
+ // CSVs: HEAD request, compare Last-Modified header.
2015
+ async function fetchData(limit?: number): Promise<string[][]> {
2016
+ const rows: string[][] = [];
2017
+
2018
+ // TODO: replace this with your actual fetch + parse logic
2019
+ // Example (CSV):
2020
+ //
2021
+ // const res = await fetch('${url}');
2022
+ // const text = await res.text();
2023
+ // for (const line of text.split('\\n').slice(1)) { // slice(1) skips header
2024
+ // const cols = line.split(',');
2025
+ // const id = cols[0]?.trim().padStart(${idLengthNum}, '0');
2026
+ // if (!id || !/${idRegex.replace(/\\\\/g, '\\\\\\\\')}/.test(id)) continue;
2027
+ // const name = cols[1]?.trim() ?? '';
2028
+ // const status = cols[2]?.trim() ?? '';
2029
+ // rows.push([id, name, status]);
2030
+ // if (limit && rows.length >= limit) break;
2031
+ // }
2032
+
2033
+ return rows;
2034
+ }
2035
+ // ─────────────────────────────────────────────────────────────────────────────
2036
+
2037
+ function clean(s: string): string {
2038
+ return (s ?? '').trim().replace(/[\\t\\n\\r]/g, ' ').replace(/\\s+/g, ' ');
2039
+ }
2040
+
2041
+ function buildTsv(rows: string[][]): string {
2042
+ return rows.map(cols => cols.map(clean).join('\\t')).join('\\n');
2043
+ }
2044
+
2045
+ export async function ${fnName}(options?: { limit?: number; upload?: boolean }) {
2046
+ console.log(\`=== \${SOURCE.toUpperCase()} IMPORT ===\\n\`);
2047
+
2048
+ try {
2049
+ fs.mkdirSync(TEMP_DIR, { recursive: true });
2050
+
2051
+ const rows = await fetchData(options?.limit);
2052
+ const minRows = options?.limit ? Math.min(options.limit, 50) : 1000;
2053
+ if (rows.length < minRows) {
2054
+ console.error(\`[\${SOURCE}] Only \${rows.length} rows — expected at least \${minRows}, aborting\`);
2055
+ return false;
2056
+ }
2057
+
2058
+ const tsv = buildTsv(rows);
2059
+ const tsvPath = path.join(TEMP_DIR, 'parsed.tsv');
2060
+ fs.writeFileSync(tsvPath, tsv, 'utf-8');
2061
+ console.log(\`[\${SOURCE}] Wrote \${rows.length.toLocaleString()} rows\`);
2062
+
2063
+ const sortedPath = path.join(TEMP_DIR, 'sorted.tsv');
2064
+ execSync(\`LC_ALL=C sort -t'\\t' -k1,1 "\${tsvPath}" -o "\${sortedPath}"\`, {
2065
+ stdio: 'inherit', env: { ...process.env, TMPDIR: TEMP_DIR },
2066
+ });
2067
+ fs.unlinkSync(tsvPath);
2068
+
2069
+ const config = {
2070
+ idLength: ${idLengthNum},
2071
+ idRegex: /${idRegex}/,
2072
+ prefixLength: ${prefixLength},
2073
+ fieldCount: FIELD_COUNT,
2074
+ };
2075
+ const { shardPaths, idxPath, recordCount } = await buildBinaryFiles(sortedPath, TEMP_DIR, SOURCE, config);
2076
+
2077
+ const search = await buildSearchIndex(
2078
+ sortedPath, TEMP_DIR, SOURCE,
2079
+ { searchFieldIndex: SEARCH_FIELD, idRegex: /${idRegex}/, statusFieldIndex: STATUS_FIELD },
2080
+ recordCount,
2081
+ );
2082
+ const mphfPath = buildMphfFromIdx(search.idxPath);
2083
+ fs.unlinkSync(sortedPath);
2084
+
2085
+ if (options?.upload !== false) {
2086
+ for (let i = 0; i < shardPaths.length; i++) uploadToR2(shardPaths[i], \`\${SOURCE}-\${i}.bin\`);
2087
+ uploadToR2(idxPath, \`\${SOURCE}.idx\`);
2088
+ uploadToR2(search.idxPath, \`\${SOURCE}-search.idx\`);
2089
+ uploadToR2(mphfPath, \`\${SOURCE}-search.mphf\`);
2090
+ for (let i = 0; i < search.shardPaths.length; i++) uploadToR2(search.shardPaths[i], \`\${SOURCE}-search-\${i}.dat\`);
2091
+ saveImportMeta(SOURCE, new Date().toISOString(), recordCount);
2092
+ fs.rmSync(TEMP_DIR, { recursive: true, force: true });
2093
+ } else {
2094
+ console.log(\`\\n[\${SOURCE}] Files in \${TEMP_DIR} (--no-upload, skipping R2)\`);
2095
+ }
2096
+
2097
+ console.log(\`\\n[\${SOURCE}] Success: \${recordCount.toLocaleString()} records\`);
2098
+ return true;
2099
+ } catch (error) {
2100
+ console.error(\`\\n[\${SOURCE}] Error:\`, error);
2101
+ return false;
2102
+ }
2103
+ }
2104
+
2105
+ if (require.main === module) {
2106
+ const args = process.argv.slice(2);
2107
+ const limitIdx = args.indexOf('--limit');
2108
+ const limit = limitIdx !== -1 ? parseInt(args[limitIdx + 1]) : undefined;
2109
+ const upload = !args.includes('--no-upload');
2110
+ ${fnName}({ limit, upload }).then(ok => process.exit(ok ? 0 : 1));
2111
+ }
1906
2112
  `;
1907
2113
  fs_1.default.writeFileSync(yamlPath, yaml);
1908
- console.log(`Created: ${yamlPath}`);
2114
+ fs_1.default.writeFileSync(scriptPath, script);
2115
+ console.log(`Created:`);
2116
+ console.log(` ${yamlPath}`);
2117
+ console.log(` ${scriptPath}`);
1909
2118
  console.log(`\nNext steps:`);
1910
- console.log(` 1. Edit ${yamlPath} to match your data source`);
1911
- console.log(` 2. Write import script and upload: latinfo pipe script ${name} ./my-import.ts`);
1912
- console.log(` 3. Add dependencies: latinfo pipe deps ${name} playwright ddddocr`);
1913
- console.log(` 4. Test (100 records): latinfo pipe test ${name}`);
1914
- console.log(` 5. Validate (all records): latinfo pipe validate ${name}`);
1915
- console.log(` 6. Stage (Linux Mint bench): latinfo pipe stage ${name}`);
1916
- console.log(` 7. Publish to production: latinfo pipe publish ${name}`);
2119
+ console.log(` 1. Fill in fetchData() in ${scriptPath}`);
2120
+ console.log(` 2. Update FIELD_COUNT, SEARCH_FIELD, STATUS_FIELD if you add more columns`);
2121
+ console.log(` 3. Add dependencies if needed: latinfo pipe deps ${name} playwright ddddocr`);
2122
+ console.log(` 4. Test locally: latinfo pipe local ${name}`);
2123
+ console.log(` 5. Stage + publish: latinfo pipe stage ${name} && latinfo pipe publish ${name}`);
1917
2124
  }
1918
2125
  async function pipeScript(args) {
1919
2126
  const [sourceName, scriptPath] = args;
@@ -2559,14 +2766,47 @@ async function pipePublish(args) {
2559
2766
  console.error(`[pipe] Git error: ${e.message}`);
2560
2767
  process.exit(1);
2561
2768
  }
2562
- // 2. Deploy Worker
2563
- console.log(`[pipe] Deploying Worker...`);
2769
+ // 2. Deploy Worker via GitHub Actions (no local Cloudflare token needed)
2770
+ console.log(`[pipe] Waiting for deploy workflow (GitHub Actions)...`);
2564
2771
  try {
2565
- run(`npx wrangler deploy`, { cwd: repo, stdio: 'inherit' });
2772
+ const { execSync: exec } = await Promise.resolve().then(() => __importStar(require('child_process')));
2773
+ const maxWait = 300; // 5 min max
2774
+ const interval = 10;
2775
+ let elapsed = 0;
2776
+ let deployed = false;
2777
+ // Give GitHub a moment to register the push event
2778
+ exec('sleep 5');
2779
+ while (elapsed < maxWait) {
2780
+ const result = exec(`gh run list --workflow=deploy.yml --branch=main --limit=1 --json status,conclusion,headSha`, { cwd: repo, encoding: 'utf-8', stdio: 'pipe' }).toString().trim();
2781
+ const ghRuns = JSON.parse(result);
2782
+ if (ghRuns.length > 0) {
2783
+ const latest = ghRuns[0];
2784
+ if (latest.status === 'completed') {
2785
+ if (latest.conclusion === 'success') {
2786
+ console.log(`[pipe] Deploy successful.`);
2787
+ deployed = true;
2788
+ break;
2789
+ }
2790
+ else {
2791
+ console.error(`[pipe] Deploy failed (${latest.conclusion}) — rolling back`);
2792
+ exec(`git checkout HEAD^ -- src/sources.ts .github/workflows/import.yml && git commit -m "Rollback: remove ${sourceName}" && git push`, { cwd: repo, stdio: 'pipe' });
2793
+ process.exit(1);
2794
+ }
2795
+ }
2796
+ if (elapsed % 30 === 0 && elapsed > 0) {
2797
+ console.log(`[pipe] Still deploying... (${elapsed}s)`);
2798
+ }
2799
+ }
2800
+ exec(`sleep ${interval}`);
2801
+ elapsed += interval;
2802
+ }
2803
+ if (!deployed) {
2804
+ console.error(`[pipe] Deploy timed out after ${maxWait}s`);
2805
+ process.exit(1);
2806
+ }
2566
2807
  }
2567
- catch {
2568
- console.error(`[pipe] Deploy failed — rolling back`);
2569
- run(`git revert HEAD --no-edit && git push`, { cwd: repo, stdio: 'pipe' });
2808
+ catch (e) {
2809
+ console.error(`[pipe] Deploy error: ${e.message}`);
2570
2810
  process.exit(1);
2571
2811
  }
2572
2812
  // 3. Trigger import on runner
@@ -2590,30 +2830,83 @@ async function pipePublish(args) {
2590
2830
  const newSources = sourceList.join(',');
2591
2831
  console.log(`[pipe] Adding ${sourceName} to SOURCES: ${newSources}`);
2592
2832
  run(`ssh ${RUNNER} "sudo sed -i 's|^Environment=.*SOURCES=.*|Environment=SOURCES=${newSources}|' /etc/systemd/system/latinfo-search.service && sudo systemctl daemon-reload"`, { stdio: 'pipe' });
2833
+ run(`ssh ${RUNNER} "sudo systemctl restart latinfo-search"`, { stdio: 'inherit' });
2834
+ console.log(`[pipe] Search server restarted.`);
2593
2835
  }
2594
2836
  else {
2595
- console.log(`[pipe] ${sourceName} already in SOURCES.`);
2837
+ console.log(`[pipe] ${sourceName} already in SOURCES — no restart needed.`);
2596
2838
  }
2597
- run(`ssh ${RUNNER} "sudo systemctl restart latinfo-search"`, { stdio: 'inherit' });
2598
- console.log(`[pipe] Search server restarted.`);
2599
2839
  }
2600
2840
  catch {
2601
2841
  console.log(`[pipe] Could not update search server (not critical).`);
2602
2842
  }
2603
2843
  // 5. Production bench: 500 concurrent against api.latinfo.dev
2604
- console.log(`\n[pipe] Running production bench (500 concurrent)...`);
2844
+ // Warm up: wait for the new source to be responsive before hammering it
2845
+ console.log(`\n[pipe] Warming up (waiting for search server + Worker index load)...`);
2605
2846
  try {
2606
2847
  const config = loadConfig();
2607
2848
  if (!config?.api_key)
2608
2849
  throw new Error('No API key');
2609
- const bench = await benchProduction(sourceName, config.api_key, 500);
2850
+ // Progressive warm-up: single probe small batch → ready for 500
2851
+ const warmupRoute = (() => {
2852
+ const src = discoverBenchSources(sourceName);
2853
+ return src.length > 0 ? src[0].routePath : `/${sourceName.replace(/-/g, '/')}`;
2854
+ })();
2855
+ const warmupUrl = `${API_URL}${warmupRoute}/search?q=garcia&limit=1`;
2856
+ const warmupHeaders = { Authorization: `Bearer ${config.api_key}` };
2857
+ // Phase 1: wait for first successful response (up to 90s)
2858
+ let warmedUp = false;
2859
+ for (let attempt = 0; attempt < 30; attempt++) {
2860
+ await new Promise(r => setTimeout(r, 3000));
2861
+ try {
2862
+ const r = await fetch(warmupUrl, { headers: warmupHeaders, signal: AbortSignal.timeout(5000) });
2863
+ if (r.status === 200 || r.status === 404) {
2864
+ warmedUp = true;
2865
+ break;
2866
+ }
2867
+ }
2868
+ catch { }
2869
+ process.stdout.write('.');
2870
+ }
2871
+ if (!warmedUp) {
2872
+ console.log(`\n[pipe] Warm-up timed out — running bench anyway`);
2873
+ }
2874
+ else {
2875
+ // Phase 2: warm up LOOKUP path too (bench uses lookups, not just search)
2876
+ const src = discoverBenchSources(sourceName);
2877
+ const smokeId = src.length > 0 ? src[0].smokeId : null;
2878
+ if (smokeId) {
2879
+ const lookupUrl = `${API_URL}${warmupRoute}/${src[0].primaryId.name}/${smokeId}`;
2880
+ process.stdout.write(' lookup');
2881
+ for (let i = 0; i < 5; i++) {
2882
+ await new Promise(r => setTimeout(r, 2000));
2883
+ try {
2884
+ await fetch(lookupUrl, { headers: warmupHeaders, signal: AbortSignal.timeout(10000) });
2885
+ }
2886
+ catch { }
2887
+ process.stdout.write('.');
2888
+ }
2889
+ }
2890
+ // Phase 3: progressive concurrent batches (20 → 50 → 100)
2891
+ for (const batchSize of [20, 50, 100]) {
2892
+ const batch = await Promise.all(Array.from({ length: batchSize }, () => fetch(warmupUrl, { headers: warmupHeaders, signal: AbortSignal.timeout(10000) })
2893
+ .then(r => r.status === 200 || r.status === 404 ? 1 : 0).catch(() => 0)));
2894
+ const batchOk = batch.reduce((a, b) => a + b, 0);
2895
+ process.stdout.write(` ${batchOk}/${batchSize}`);
2896
+ await new Promise(r => setTimeout(r, 2000));
2897
+ }
2898
+ console.log('');
2899
+ console.log(`[pipe] Warm-up OK`);
2900
+ }
2901
+ console.log(`[pipe] Running production bench (100 concurrent)...`);
2902
+ const bench = await benchProduction(sourceName, config.api_key, 100);
2610
2903
  console.log(`\n Production bench: ${bench.qps} q/s, ${bench.success_rate.toFixed(1)}% success`);
2611
2904
  console.log(` p50: ${bench.p50}ms p95: ${bench.p95}ms p99: ${bench.p99}ms`);
2612
2905
  if (bench.success_rate < 99.9) {
2613
2906
  console.error(`\n[pipe] PRODUCTION BENCH FAILED — ${bench.success_rate.toFixed(1)}% < 99.9%`);
2614
2907
  console.error(`[pipe] Rolling back...`);
2615
2908
  try {
2616
- run(`git revert HEAD --no-edit && git push`, { cwd: repo, stdio: 'pipe' });
2909
+ run(`git checkout HEAD^ -- src/sources.ts .github/workflows/import.yml && git commit -m "Rollback: remove ${sourceName}" && git push`, { cwd: repo, stdio: 'pipe' });
2617
2910
  run(`npx wrangler deploy`, { cwd: repo, stdio: 'pipe' });
2618
2911
  }
2619
2912
  catch { }
@@ -2633,6 +2926,67 @@ async function pipePublish(args) {
2633
2926
  console.log(` API: https://api.latinfo.dev/${sourceName.replace(/-/g, '/')}/`);
2634
2927
  console.log(` CLI: latinfo ${sourceName.replace(/-/g, ' ')}`);
2635
2928
  }
2929
+ async function report(args) {
2930
+ const message = args.join(' ').trim();
2931
+ if (!message) {
2932
+ console.error('Usage: latinfo report <message>');
2933
+ console.error('Example: latinfo report "search returns empty results for banco"');
2934
+ process.exit(1);
2935
+ }
2936
+ const config = loadConfig();
2937
+ if (!config?.api_key) {
2938
+ console.error('Not logged in. Run: latinfo login');
2939
+ process.exit(1);
2940
+ }
2941
+ const res = await fetch(`${API_URL}/feedback`, {
2942
+ method: 'POST',
2943
+ headers: { 'Authorization': `Bearer ${config.api_key}`, 'Content-Type': 'application/json' },
2944
+ body: JSON.stringify({
2945
+ message,
2946
+ cli_version: VERSION,
2947
+ os: process.platform,
2948
+ }),
2949
+ });
2950
+ if (!res.ok) {
2951
+ const err = await res.json();
2952
+ console.error(`Error: ${err.message || err.error}`);
2953
+ process.exit(1);
2954
+ }
2955
+ console.log('Report sent. Thank you — we will look into it.');
2956
+ }
2957
+ async function issues() {
2958
+ const config = loadConfig();
2959
+ const adminSecret = process.env.ADMIN_SECRET ||
2960
+ (() => { try {
2961
+ return JSON.parse(fs_1.default.readFileSync(path_1.default.join(getRepoPath(), '.dev.vars'), 'utf-8').split('\n').find(l => l.startsWith('ADMIN_SECRET='))?.split('=')[1] || '');
2962
+ }
2963
+ catch {
2964
+ return '';
2965
+ } })();
2966
+ if (!adminSecret) {
2967
+ console.error('ADMIN_SECRET not found. Set it in .dev.vars or ADMIN_SECRET env var.');
2968
+ process.exit(1);
2969
+ }
2970
+ const status = process.argv.includes('--resolved') ? 'resolved' : 'open';
2971
+ const res = await fetch(`${API_URL}/admin/feedback?status=${status}`, {
2972
+ headers: { 'Authorization': `Bearer ${adminSecret}` },
2973
+ });
2974
+ if (!res.ok) {
2975
+ console.error(`Error: ${res.status}`);
2976
+ process.exit(1);
2977
+ }
2978
+ const rows = await res.json();
2979
+ if (rows.length === 0) {
2980
+ console.log(`No ${status} issues.`);
2981
+ return;
2982
+ }
2983
+ for (const r of rows) {
2984
+ const date = r.created_at.slice(0, 10);
2985
+ const meta = [r.cli_version, r.os].filter(Boolean).join(', ');
2986
+ console.log(`[#${r.id}] ${date} @${r.github_username}${meta ? ` (${meta})` : ''}`);
2987
+ console.log(` ${r.message}\n`);
2988
+ }
2989
+ }
2636
2990
  async function pipeStatus(args) {
2637
2991
  const [sourceName] = args;
2638
2992
  if (sourceName) {
@@ -2669,6 +3023,357 @@ async function pipeStatus(args) {
2669
3023
  }
2670
3024
  }
2671
3025
  }
3026
+ async function pipeLocal(args) {
3027
+ const [sourceName] = args;
3028
+ if (!sourceName) {
3029
+ console.error('Usage: latinfo pipe local <source-name>');
3030
+ process.exit(1);
3031
+ }
3032
+ const repo = getRepoPath();
3033
+ const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
3034
+ const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
3035
+ const yamlContent = fs_1.default.existsSync(yamlPath) ? fs_1.default.readFileSync(yamlPath, 'utf-8') : '';
3036
+ const importScriptMatch = yamlContent.match(/import_script:\s*(.+)/);
3037
+ const customScript = importScriptMatch ? path_1.default.join(repo, importScriptMatch[1].trim()) : null;
3038
+ const defaultScript = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
3039
+ const scriptPath = customScript && fs_1.default.existsSync(customScript) ? customScript
3040
+ : fs_1.default.existsSync(defaultScript) ? defaultScript : null;
3041
+ const easypipePath = path_1.default.join(repo, 'src', 'imports', 'easypipe.ts');
3042
+ const cmd = scriptPath
3043
+ ? `npx tsx ${scriptPath} --no-upload`
3044
+ : `npx tsx ${easypipePath} ${yamlPath} --no-upload`;
3045
+ const TEMP_DIR = `/tmp/${sourceName}-import`;
3046
+ const MAX_SHARD_MB = 250;
3047
+ const BENCH_SAMPLES = 500;
3048
+ const IMPORT_RUNS = 1;
3049
+ let passed = true;
3050
+ const errors = [];
3051
+ // ── Run import N times ───────────────────────────────────────────────────
3052
+ for (let run_i = 1; run_i <= IMPORT_RUNS; run_i++) {
3053
+ console.log(`\n[pipe:local] ── Import run ${run_i}/${IMPORT_RUNS} ──`);
3054
+ try {
3055
+ run(cmd, { cwd: repo, stdio: 'inherit' });
3056
+ }
3057
+ catch {
3058
+ errors.push(`Import run ${run_i} crashed`);
3059
+ passed = false;
3060
+ break;
3061
+ }
3062
+ // ── Validate files ─────────────────────────────────────────────────────
3063
+ const expected = [
3064
+ `${sourceName}-0.bin`,
3065
+ `${sourceName}.idx`,
3066
+ `${sourceName}-search.idx`,
3067
+ `${sourceName}-search.mphf`,
3068
+ `${sourceName}-search-0.dat`,
3069
+ ];
3070
+ for (const f of expected) {
3071
+ const fp = path_1.default.join(TEMP_DIR, f);
3072
+ if (!fs_1.default.existsSync(fp)) {
3073
+ errors.push(`Missing: ${f}`);
3074
+ passed = false;
3075
+ }
3076
+ }
3077
+ // ── Shard sizes ────────────────────────────────────────────────────────
3078
+ const bins = fs_1.default.existsSync(TEMP_DIR) ? fs_1.default.readdirSync(TEMP_DIR).filter(f => f.endsWith('.bin')) : [];
3079
+ for (const b of bins) {
3080
+ const mb = fs_1.default.statSync(path_1.default.join(TEMP_DIR, b)).size / 1_048_576;
3081
+ if (mb > MAX_SHARD_MB) {
3082
+ errors.push(`Shard ${b} is ${mb.toFixed(0)}MB > ${MAX_SHARD_MB}MB`);
3083
+ passed = false;
3084
+ }
3085
+ else
3086
+ console.log(` ✓ ${b}: ${mb.toFixed(1)} MB`);
3087
+ }
3088
+ // ── V2 search index ────────────────────────────────────────────────────
3089
+ const searchIdx = path_1.default.join(TEMP_DIR, `${sourceName}-search.idx`);
3090
+ if (fs_1.default.existsSync(searchIdx)) {
3091
+ const content = fs_1.default.readFileSync(searchIdx);
3092
+ // V2 magic: first 4 bytes are "LSRY" (V1 = "LSRX")
3093
+ const magic = content.subarray(0, 4).toString('ascii');
3094
+ if (magic !== 'LSRY') {
3095
+ errors.push(`Search index is V1 (magic=${magic}) — must use V2 (statusFieldIndex required)`);
3096
+ passed = false;
3097
+ }
3098
+ else
3099
+ console.log(` ✓ Search index: V2`);
3100
+ }
3101
+ // ── MPHF ───────────────────────────────────────────────────────────────
3102
+ const mphf = path_1.default.join(TEMP_DIR, `${sourceName}-search.mphf`);
3103
+ if (fs_1.default.existsSync(mphf) && fs_1.default.statSync(mphf).size > 0) {
3104
+ console.log(` ✓ MPHF: ${(fs_1.default.statSync(mphf).size / 1024).toFixed(1)} KB`);
3105
+ }
3106
+ else {
3107
+ errors.push('MPHF missing or empty — call buildMphfFromIdx()');
3108
+ passed = false;
3109
+ }
3110
+ if (!passed)
3111
+ break;
3112
+ if (run_i < IMPORT_RUNS)
3113
+ console.log(` ✓ Run ${run_i} OK`);
3114
+ }
3115
+ if (!passed) {
3116
+ console.error(`\n[pipe:local] FAILED:`);
3117
+ for (const e of errors)
3118
+ console.error(` ✗ ${e}`);
3119
+ process.exit(1);
3120
+ }
3121
+ // ── Load binary into memory (shared by bench + smoke test + quality check) ──
3122
+ const idLen = parseInt(yamlContent.match(/length:\s*(\d+)/)?.[1] || '8');
3123
+ const prefixLen = parseInt(yamlContent.match(/prefix_length:\s*(\d+)/)?.[1] || '5');
3124
+ const idxPath = path_1.default.join(TEMP_DIR, `${sourceName}.idx`);
3125
+ const binPath = path_1.default.join(TEMP_DIR, `${sourceName}-0.bin`);
3126
+ let binBuf;
3127
+ let index;
3128
+ try {
3129
+ if (!fs_1.default.existsSync(idxPath) || !fs_1.default.existsSync(binPath))
3130
+ throw new Error('Missing .idx or .bin');
3131
+ binBuf = fs_1.default.readFileSync(binPath);
3132
+ const HEADER_SIZE = 16, ENTRY_SIZE = 16, MAGIC = [0x4c, 0x49, 0x44, 0x58];
3133
+ const idxBuf = fs_1.default.readFileSync(idxPath);
3134
+ for (let i = 0; i < 4; i++)
3135
+ if (idxBuf[i] !== MAGIC[i])
3136
+ throw new Error('Invalid index magic');
3137
+ const entryCount = idxBuf.readUInt32LE(4);
3138
+ index = [];
3139
+ for (let i = 0; i < entryCount; i++) {
3140
+ const off = HEADER_SIZE + i * ENTRY_SIZE;
3141
+ index.push({
3142
+ prefix: idxBuf.readUInt32LE(off),
3143
+ shard: idxBuf.readUInt32LE(off + 4),
3144
+ offset: idxBuf.readUInt32LE(off + 8),
3145
+ length: idxBuf.readUInt32LE(off + 12),
3146
+ });
3147
+ }
3148
+ }
3149
+ catch (e) {
3150
+ errors.push(`Cannot load binary: ${e.message}`);
3151
+ passed = false;
3152
+ index = [];
3153
+ binBuf = Buffer.alloc(0);
3154
+ }
3155
+ // Lookup a record by ID — returns field array (after the ID) or null
3156
+ function lookupRecord(id) {
3157
+ const prefix = parseInt(id.substring(0, prefixLen));
3158
+ let lo = 0, hi = index.length - 1, entry = null;
3159
+ while (lo <= hi) {
3160
+ const mid = (lo + hi) >>> 1;
3161
+ if (index[mid].prefix === prefix) {
3162
+ entry = index[mid];
3163
+ break;
3164
+ }
3165
+ if (index[mid].prefix < prefix)
3166
+ lo = mid + 1;
3167
+ else
3168
+ hi = mid - 1;
3169
+ }
3170
+ if (!entry)
3171
+ return null;
3172
+ const chunk = binBuf.subarray(entry.offset, entry.offset + entry.length);
3173
+ let p = 0;
3174
+ while (p < chunk.length) {
3175
+ const rlen = chunk.readUInt16LE(p);
3176
+ if (rlen < 2)
3177
+ break;
3178
+ const rid = chunk.subarray(p + 2, p + 2 + idLen).toString();
3179
+ if (rid === id) {
3180
+ // Parse fields: [uint8 len][bytes]...
3181
+ const fields = [];
3182
+ let fp = p + 2 + idLen;
3183
+ while (fp < p + rlen) {
3184
+ const flen = chunk[fp++];
3185
+ fields.push(chunk.subarray(fp, fp + flen).toString('utf-8'));
3186
+ fp += flen;
3187
+ }
3188
+ return fields;
3189
+ }
3190
+ if (rid > id)
3191
+ return null;
3192
+ p += rlen;
3193
+ }
3194
+ return null;
3195
+ }
3196
+ // ── Smoke test: look up known ID and verify expected field ────────────────
3197
+ console.log(`\n[pipe:local] ── Smoke test ──`);
3198
+ const smokeId = yamlContent.match(/smoke_test:\s*\n\s+id:\s*"?([^"\n]+)"?/)?.[1]?.trim();
3199
+ const smokeField = yamlContent.match(/expect_field:\s*(\w+)/)?.[1]?.trim();
3200
+ if (!smokeId || smokeId === '""' || !smokeField) {
3201
+ errors.push('smoke_test.id and smoke_test.expect_field are required in sources YAML');
3202
+ passed = false;
3203
+ }
3204
+ else {
3205
+ const fieldNames = [...yamlContent.matchAll(/- name:\s*(\w+)/g)].map(m => m[1]);
3206
+ const fieldIdx = fieldNames.indexOf(smokeField);
3207
+ if (fieldIdx === -1) {
3208
+ errors.push(`smoke_test.expect_field "${smokeField}" not found in fields list`);
3209
+ passed = false;
3210
+ }
3211
+ else {
3212
+ const record = lookupRecord(smokeId);
3213
+ if (!record) {
3214
+ errors.push(`Smoke test FAILED: ID "${smokeId}" not found in binary — fetchData() may be returning wrong data`);
3215
+ passed = false;
3216
+ }
3217
+ else {
3218
+ // Field count check: binary record fields must match YAML fields
3219
+ if (record.length !== fieldNames.length) {
3220
+ errors.push(`Field count mismatch: binary has ${record.length} fields but YAML lists ${fieldNames.length} (${fieldNames.join(', ')}). Fix YAML or import script.`);
3221
+ passed = false;
3222
+ }
3223
+ else {
3224
+ console.log(` ✓ field count: ${record.length} (matches YAML)`);
3225
+ }
3226
+ // Field content validation: verify each field value matches its name
3227
+ for (let fi = 0; fi < Math.min(record.length, fieldNames.length); fi++) {
3228
+ const fname = fieldNames[fi];
3229
+ const fval = (record[fi] ?? '').trim();
3230
+ console.log(` ${fname} = "${fval.slice(0, 60)}${fval.length > 60 ? '...' : ''}"`);
3231
+ if (!fval)
3232
+ continue; // empty fields checked in data quality
3233
+ const isDate = /^\d{2}[\/\-]\d{2}[\/\-]\d{4}$/.test(fval) || /^\d{4}[\/\-]\d{2}[\/\-]\d{2}/.test(fval) || /^\d{8}$/.test(fval);
3234
+ const isNumeric = /^\d+$/.test(fval);
3235
+ const hasLetters = /[a-zA-ZÁÉÍÓÚÑáéíóúñ]/.test(fval);
3236
+ if (fname.startsWith('fecha') && !isDate && !fval.includes('/') && !fval.includes('-')) {
3237
+ errors.push(`Field "${fname}" = "${fval.slice(0, 40)}" — expected a date`);
3238
+ passed = false;
3239
+ }
3240
+ if (fname.endsWith('_count') && !isNumeric) {
3241
+ errors.push(`Field "${fname}" = "${fval.slice(0, 40)}" — expected a number`);
3242
+ passed = false;
3243
+ }
3244
+ if (fname === 'nombre' && !hasLetters) {
3245
+ errors.push(`Field "nombre" = "${fval.slice(0, 40)}" — expected letters (got numbers/dates)`);
3246
+ passed = false;
3247
+ }
3248
+ if ((fname.startsWith('estado') || fname === 'tipo_sancion') && isDate) {
3249
+ errors.push(`Field "${fname}" = "${fval.slice(0, 40)}" — looks like a date, not a status/type`);
3250
+ passed = false;
3251
+ }
3252
+ if (fname.includes('entidad') && isDate) {
3253
+ errors.push(`Field "${fname}" = "${fval.slice(0, 40)}" — looks like a date, not an entity name`);
3254
+ passed = false;
3255
+ }
3256
+ }
3257
+ const value = record[fieldIdx] ?? '';
3258
+ if (!value.trim()) {
3259
+ errors.push(`Smoke test FAILED: field "${smokeField}" is empty for ID "${smokeId}" — check your field mapping`);
3260
+ passed = false;
3261
+ }
3262
+ else {
3263
+ console.log(` ✓ ${smokeField} = "${value.trim()}"`);
3264
+ }
3265
+ }
3266
+ }
3267
+ }
3268
+ // ── Data quality: check empty name/status rates ───────────────────────────
3269
+ console.log(`\n[pipe:local] ── Data quality ──`);
3270
+ try {
3271
+ const fieldNames = [...yamlContent.matchAll(/- name:\s*(\w+)/g)].map(m => m[1]);
3272
+ let total = 0, emptyName = 0, emptyStatus = 0;
3273
+ let pos2 = 0;
3274
+ while (pos2 < binBuf.length) {
3275
+ const rlen = binBuf.readUInt16LE(pos2);
3276
+ if (rlen < 2 + idLen)
3277
+ break;
3278
+ let fp = pos2 + 2 + idLen;
3279
+ const fields = [];
3280
+ while (fp < pos2 + rlen) {
3281
+ const flen = binBuf[fp++];
3282
+ fields.push(binBuf.subarray(fp, fp + flen).toString('utf-8'));
3283
+ fp += flen;
3284
+ }
3285
+ total++;
3286
+ if (!fields[0]?.trim())
3287
+ emptyName++;
3288
+ if (!fields[1]?.trim())
3289
+ emptyStatus++;
3290
+ pos2 += rlen;
3291
+ }
3292
+ const emptyNamePct = total > 0 ? (emptyName / total) * 100 : 0;
3293
+ const emptyStatusPct = total > 0 ? (emptyStatus / total) * 100 : 0;
3294
+ const nameLabel = fieldNames[0] || 'name';
3295
+ const statusLabel = fieldNames[1] || 'status';
3296
+ console.log(` ${total.toLocaleString()} records scanned`);
3297
+ if (emptyNamePct > 5) {
3298
+ errors.push(`${emptyNamePct.toFixed(1)}% of records have empty "${nameLabel}" — check searchFieldIndex`);
3299
+ passed = false;
3300
+ }
3301
+ else {
3302
+ console.log(` ✓ ${nameLabel}: ${emptyNamePct.toFixed(1)}% empty`);
3303
+ }
3304
+ if (emptyStatusPct > 50) {
3305
+ errors.push(`${emptyStatusPct.toFixed(1)}% of records have empty "${statusLabel}" — check statusFieldIndex`);
3306
+ passed = false;
3307
+ }
3308
+ else {
3309
+ console.log(` ✓ ${statusLabel}: ${emptyStatusPct.toFixed(1)}% empty`);
3310
+ }
3311
+ }
3312
+ catch (e) {
3313
+ errors.push(`Quality check failed: ${e.message}`);
3314
+ passed = false;
3315
+ }
3316
+ // ── Local benchmark: N random lookups from binary ─────────────────────────
3317
+ console.log(`\n[pipe:local] ── Local benchmark: ${BENCH_SAMPLES} random lookups ──`);
3318
+ try {
3319
+ const allIds = [];
3320
+ let pos3 = 0;
3321
+ while (pos3 < binBuf.length && allIds.length < BENCH_SAMPLES * 10) {
3322
+ const rlen = binBuf.readUInt16LE(pos3);
3323
+ if (rlen < 2 + idLen || pos3 + rlen > binBuf.length)
3324
+ break;
3325
+ allIds.push(binBuf.subarray(pos3 + 2, pos3 + 2 + idLen).toString());
3326
+ pos3 += rlen;
3327
+ }
3328
+ if (allIds.length === 0)
3329
+ throw new Error('Could not read any records from .bin');
3330
+ for (let i = allIds.length - 1; i > 0; i--) {
3331
+ const j = Math.floor(Math.random() * (i + 1));
3332
+ [allIds[i], allIds[j]] = [allIds[j], allIds[i]];
3333
+ }
3334
+ const sample = allIds.slice(0, BENCH_SAMPLES);
3335
+ const latencies = [];
3336
+ let found = 0;
3337
+ for (const id of sample) {
3338
+ const t0 = performance.now();
3339
+ if (lookupRecord(id))
3340
+ found++;
3341
+ latencies.push(performance.now() - t0);
3342
+ }
3343
+ latencies.sort((a, b) => a - b);
3344
+ const p50 = latencies[Math.floor(latencies.length * 0.50)];
3345
+ const p95 = latencies[Math.floor(latencies.length * 0.95)];
3346
+ const p99 = latencies[Math.floor(latencies.length * 0.99)];
3347
+ console.log(` ${BENCH_SAMPLES} lookups: p50=${p50.toFixed(2)}ms p95=${p95.toFixed(2)}ms p99=${p99.toFixed(2)}ms`);
3348
+ console.log(` Hit rate: ${found}/${BENCH_SAMPLES} (${((found / BENCH_SAMPLES) * 100).toFixed(1)}%)`);
3349
+ if (found < BENCH_SAMPLES * 0.99) {
3350
+ errors.push(`Low hit rate: ${found}/${BENCH_SAMPLES}`);
3351
+ passed = false;
3352
+ }
3353
+ }
3354
+ catch (e) {
3355
+ errors.push(`Benchmark failed: ${e.message}`);
3356
+ passed = false;
3357
+ }
3358
+ // ── Final summary ─────────────────────────────────────────────────────────
3359
+ if (errors.length > 0) {
3360
+ console.error(`\n[pipe:local] FAILED:`);
3361
+ for (const e of errors)
3362
+ console.error(` ✗ ${e}`);
3363
+ process.exit(1);
3364
+ }
3365
+ // Mark test + validate as passed so pipe stage can proceed
3366
+ const status = loadPipeStatus(sourceName);
3367
+ const now = new Date().toISOString();
3368
+ status.test = { passed: true, timestamp: now };
3369
+ status.validate = { passed: true, timestamp: now };
3370
+ savePipeStatus(status);
3371
+ console.log(`\n[pipe:local] ✓ ALL CHECKS PASSED`);
3372
+ console.log(` shards ≤ ${MAX_SHARD_MB}MB • V2 • MPHF • bench OK`);
3373
+ console.log(` Gates test + validate marked as passed.`);
3374
+ console.log(` Files ready in: ${TEMP_DIR}`);
3375
+ console.log(`\n Next: latinfo pipe stage ${sourceName} (uploads to R2 + Linux Mint bench)`);
3376
+ }
2672
3377
  async function pipe(args) {
2673
3378
  requireAdmin();
2674
3379
  const [subcommand, ...subArgs] = args;
@@ -2706,6 +3411,9 @@ async function pipe(args) {
2706
3411
  for (const y of yamls)
2707
3412
  console.log(` ${y.replace('.yaml', '')}`);
2708
3413
  break;
3414
+ case 'local':
3415
+ await pipeLocal(subArgs);
3416
+ break;
2709
3417
  case 'run':
2710
3418
  const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
2711
3419
  try {
@@ -2726,6 +3434,7 @@ COMMANDS
2726
3434
  create <country> <institution> <dataset> [flags] Create source (YAML template)
2727
3435
  script <source> <file.ts> Upload import script
2728
3436
  deps <source> <pkg1> [pkg2] ... Add npm dependencies
3437
+ local <source> Full local test: 3x import + file checks + bench
2729
3438
  test <source> Gate 1: test 100 records locally
2730
3439
  validate <source> Gate 2: full import locally
2731
3440
  stage <source> Gate 3: import + 500 bench on Linux Mint
@@ -2763,6 +3472,11 @@ SCRIPT REQUIREMENTS
2763
3472
  7. uploadToR2() for each file
2764
3473
  8. saveImportMeta()
2765
3474
 
3475
+ RECOMMENDED: export async function checkFresh(lastMeta): Promise<boolean>
3476
+ Called before the import to skip if data hasn't changed. Saves RAM + API calls.
3477
+ REST APIs: fetch 1 record, compare max ID. CSVs: HEAD request, compare Last-Modified.
3478
+ Without this, the import always runs regardless of whether the source updated.
3479
+
2766
3480
  See SOURCES.md for full template. See src/imports/pe-osce-sanctioned.ts for example.
2767
3481
 
2768
3482
  NAMING
@@ -2783,6 +3497,7 @@ const DOCS = {
2783
3497
  index: `latinfo docs — complete documentation
2784
3498
 
2785
3499
  TOPICS
3500
+ latinfo docs team Team system, tasks, ranking, and how the AI PM works
2786
3501
  latinfo docs pipe How to create a data pipeline (full guide)
2787
3502
  latinfo docs fields searchFieldIndex, statusFieldIndex explained
2788
3503
  latinfo docs v2 V2 search index + MPHF (mandatory)
@@ -2791,6 +3506,75 @@ TOPICS
2791
3506
  latinfo docs troubleshooting Common errors and fixes
2792
3507
  latinfo docs architecture How latinfo works internally
2793
3508
  latinfo docs api API endpoints and response format`,
3509
+ team: `TEAM SYSTEM
3510
+
3511
+ The AI agent is the project manager (PM). It assigns tasks, tracks progress,
3512
+ and coordinates the team through the CLI. No human PM needed.
3513
+
3514
+ GETTING STARTED (new member)
3515
+
3516
+ 1. Install: npm i -g latinfo
3517
+ 2. Login: latinfo login (authenticates with your GitHub account)
3518
+ 3. See tasks: latinfo tasks (shows your assigned tasks)
3519
+ 4. Complete: latinfo tasks complete <id>
3520
+ 5. Ranking: latinfo tasks rank
3521
+
3522
+ ADMIN COMMANDS (requires admin access)
3523
+
3524
+ latinfo team add <username> Add team member
3525
+ latinfo team add <username> --admin Add as admin
3526
+ latinfo team remove <username> Remove member
3527
+ latinfo team list List all members + points
3528
+
3529
+ latinfo tasks assign <user> "<title>" [--points N] Assign task (default 10 pts)
3530
+ latinfo tasks approve <id> Approve completed task (awards points)
3531
+ latinfo tasks reject <id> "<reason>" Reject back to member
3532
+ latinfo tasks delete <id> Delete task
3533
+ latinfo tasks list --all See all tasks from all members
3534
+
3535
+ MEMBER COMMANDS (requires latinfo login)
3536
+
3537
+ latinfo tasks My tasks
3538
+ latinfo tasks complete <id> Mark task as done (waits for approval)
3539
+ latinfo tasks rank Team ranking
3540
+
3541
+ TASK FLOW
3542
+
3543
+ Admin assigns task → Member sees it (pending)
3544
+ Member completes → Status changes to "completed"
3545
+ Admin approves → Points awarded, status "approved"
3546
+ Admin rejects → Back to "pending" with reason
3547
+
3548
+ RANKING LEVELS
3549
+
3550
+ Bronze 0-49 pts
3551
+ Silver 50-149 pts
3552
+ Gold 150-299 pts
3553
+ Diamond 300+ pts
3554
+
3555
+ Points come from approved tasks. The ranking updates automatically.
3556
+ Admins cannot manually change points — only approve/reject tasks.
3557
+
3558
+ HOW THE AI PM WORKS
3559
+
3560
+ The AI agent (Claude, GPT, etc.) can run all these commands autonomously:
3561
+
3562
+ latinfo tasks list --all → See everything
3563
+ latinfo tasks assign ... → Assign work
3564
+ latinfo tasks approve ... → Approve completed work
3565
+ latinfo tasks rank → Report progress
3566
+
3567
+ The AI reads "latinfo docs team" to learn how to operate,
3568
+ then manages the team without human intervention.
3569
+
3570
+ FIRST TIME SETUP (admin)
3571
+
3572
+ 1. You (admin) run: latinfo team add <their-github-username>
3573
+ 2. They run: latinfo login
3574
+ 3. You assign: latinfo tasks assign <username> "Find Mexico SAT data sources"
3575
+ 4. They work and: latinfo tasks complete <id>
3576
+ 5. You approve: latinfo tasks approve <id>
3577
+ 6. Everyone sees: latinfo tasks rank`,
2794
3578
  pipe: `HOW TO CREATE A DATA PIPELINE
2795
3579
 
2796
3580
  latinfo pipe handles storage, indexing, search, and API serving automatically.
@@ -3174,6 +3958,281 @@ function docs(args) {
3174
3958
  }
3175
3959
  console.log(content);
3176
3960
  }
3961
+ function requireTeam() {
3962
+ const config = loadConfig();
3963
+ if (!config?.is_team) {
3964
+ console.error('This command is only available to team members.');
3965
+ process.exit(1);
3966
+ }
3967
+ }
3968
+ function requireTeamAdmin() {
3969
+ const config = loadConfig();
3970
+ if (!config?.is_team) {
3971
+ console.error('This command is only available to team members.');
3972
+ process.exit(1);
3973
+ }
3974
+ if (config.team_role !== 'admin') {
3975
+ console.error('This command requires admin access.');
3976
+ process.exit(1);
3977
+ }
3978
+ }
3979
+ // --- Team & Tasks ---
3980
+ async function teamCmd(args) {
3981
+ const sub = args[0];
3982
+ const adminSecret = requireAdmin();
3983
+ const headers = { Authorization: `Bearer ${adminSecret}`, 'Content-Type': 'application/json' };
3984
+ switch (sub) {
3985
+ case 'add': {
3986
+ const username = args[1];
3987
+ const isAdmin = args.includes('--admin');
3988
+ if (!username) {
3989
+ console.error('Usage: latinfo team add <github-username> [--admin]');
3990
+ process.exit(1);
3991
+ }
3992
+ const res = await fetch(`${API_URL}/team/members`, {
3993
+ method: 'POST', headers,
3994
+ body: JSON.stringify({ github_username: username, role: isAdmin ? 'admin' : 'member' }),
3995
+ });
3996
+ const data = await res.json();
3997
+ if (!res.ok) {
3998
+ console.error(data.message || data.error);
3999
+ process.exit(1);
4000
+ }
4001
+ console.log(`Added ${username} to team (${data.role}).`);
4002
+ if (data.api_key) {
4003
+ console.log(`\n API key: ${data.api_key}\n`);
4004
+ console.log(` Share this with the member. They set:`);
4005
+ console.log(` export LATINFO_API_KEY=${data.api_key}`);
4006
+ }
4007
+ try {
4008
+ const { execSync: exec } = await Promise.resolve().then(() => __importStar(require('child_process')));
4009
+ exec(`gh api repos/carrerahaus/latinfo-api/collaborators/${username} -X PUT -f permission=push`, { stdio: 'pipe' });
4010
+ console.log(`Added ${username} as GitHub collaborator.`);
4011
+ }
4012
+ catch {
4013
+ console.log('Note: Could not add as GitHub collaborator (gh CLI required).');
4014
+ }
4015
+ break;
4016
+ }
4017
+ case 'remove': {
4018
+ const username = args[1];
4019
+ if (!username) {
4020
+ console.error('Usage: latinfo team remove <github-username>');
4021
+ process.exit(1);
4022
+ }
4023
+ const res = await fetch(`${API_URL}/team/members/${username}`, { method: 'DELETE', headers });
4024
+ const data = await res.json();
4025
+ if (!res.ok) {
4026
+ console.error(data.message || data.error);
4027
+ process.exit(1);
4028
+ }
4029
+ console.log(`Removed ${username} from team.`);
4030
+ try {
4031
+ const { execSync: exec } = await Promise.resolve().then(() => __importStar(require('child_process')));
4032
+ exec(`gh api repos/carrerahaus/latinfo-api/collaborators/${username} -X DELETE`, { stdio: 'pipe' });
4033
+ console.log(`Removed ${username} from GitHub.`);
4034
+ }
4035
+ catch { }
4036
+ break;
4037
+ }
4038
+ case 'list': {
4039
+ const res = await fetch(`${API_URL}/team/members`, { headers });
4040
+ const data = await res.json();
4041
+ if (!res.ok) {
4042
+ console.error('Failed to list members');
4043
+ process.exit(1);
4044
+ }
4045
+ if (data.length === 0) {
4046
+ console.log('No team members.');
4047
+ return;
4048
+ }
4049
+ console.log('\n TEAM\n');
4050
+ for (const m of data) {
4051
+ const level = m.points >= 300 ? 'Diamond' : m.points >= 150 ? 'Gold' : m.points >= 50 ? 'Silver' : 'Bronze';
4052
+ console.log(` ${m.github_username.padEnd(22)} ${m.role.padEnd(8)} ${String(m.points).padStart(4)} pts ${level}`);
4053
+ }
4054
+ console.log();
4055
+ break;
4056
+ }
4057
+ default:
4058
+ console.log(`Usage:
4059
+ latinfo team add <username> [--admin] Add team member
4060
+ latinfo team remove <username> Remove member
4061
+ latinfo team list List all members`);
4062
+ }
4063
+ }
4064
+ async function tasksCmd(args) {
4065
+ const sub = args[0];
4066
+ // Admin commands use ADMIN_SECRET
4067
+ if (['assign', 'approve', 'reject', 'delete'].includes(sub)) {
4068
+ const adminSecret = requireAdmin();
4069
+ const headers = { Authorization: `Bearer ${adminSecret}`, 'Content-Type': 'application/json' };
4070
+ switch (sub) {
4071
+ case 'assign': {
4072
+ const username = args[1];
4073
+ const title = args[2];
4074
+ const pointsIdx = args.indexOf('--points');
4075
+ const points = pointsIdx !== -1 ? parseInt(args[pointsIdx + 1]) : 10;
4076
+ if (!username || !title) {
4077
+ console.error('Usage: latinfo tasks assign <username> "<title>" [--points N]');
4078
+ process.exit(1);
4079
+ }
4080
+ const res = await fetch(`${API_URL}/team/tasks`, {
4081
+ method: 'POST', headers,
4082
+ body: JSON.stringify({ assignee_username: username, title, points }),
4083
+ });
4084
+ const data = await res.json();
4085
+ if (!res.ok) {
4086
+ console.error(data.message || data.error);
4087
+ process.exit(1);
4088
+ }
4089
+ console.log(`Task #${data.id} assigned to ${username}: "${title}" (${data.points} pts)`);
4090
+ break;
4091
+ }
4092
+ case 'approve': {
4093
+ const taskId = args[1];
4094
+ if (!taskId) {
4095
+ console.error('Usage: latinfo tasks approve <task-id>');
4096
+ process.exit(1);
4097
+ }
4098
+ const res = await fetch(`${API_URL}/team/tasks/${taskId}`, {
4099
+ method: 'PATCH', headers,
4100
+ body: JSON.stringify({ action: 'approve' }),
4101
+ });
4102
+ const data = await res.json();
4103
+ if (!res.ok) {
4104
+ console.error(data.message || data.error);
4105
+ process.exit(1);
4106
+ }
4107
+ console.log(`Task #${taskId} approved. ${data.points_awarded} points awarded.`);
4108
+ break;
4109
+ }
4110
+ case 'reject': {
4111
+ const taskId = args[1];
4112
+ const reason = args[2] || '';
4113
+ if (!taskId) {
4114
+ console.error('Usage: latinfo tasks reject <task-id> "<reason>"');
4115
+ process.exit(1);
4116
+ }
4117
+ const res = await fetch(`${API_URL}/team/tasks/${taskId}`, {
4118
+ method: 'PATCH', headers,
4119
+ body: JSON.stringify({ action: 'reject', reason }),
4120
+ });
4121
+ const data = await res.json();
4122
+ if (!res.ok) {
4123
+ console.error(data.message || data.error);
4124
+ process.exit(1);
4125
+ }
4126
+ console.log(`Task #${taskId} rejected.`);
4127
+ break;
4128
+ }
4129
+ case 'delete': {
4130
+ const taskId = args[1];
4131
+ if (!taskId) {
4132
+ console.error('Usage: latinfo tasks delete <task-id>');
4133
+ process.exit(1);
4134
+ }
4135
+ const res = await fetch(`${API_URL}/team/tasks/${taskId}`, { method: 'DELETE', headers });
4136
+ const data = await res.json();
4137
+ if (!res.ok) {
4138
+ console.error(data.message || data.error);
4139
+ process.exit(1);
4140
+ }
4141
+ console.log(`Task #${taskId} deleted.`);
4142
+ break;
4143
+ }
4144
+ }
4145
+ return;
4146
+ }
4147
+ // Member commands use API key
4148
+ const config = loadConfig();
4149
+ if (!config?.api_key) {
4150
+ console.error('Not logged in. Run: latinfo login');
4151
+ process.exit(1);
4152
+ }
4153
+ const headers = { Authorization: `Bearer ${config.api_key}`, 'Content-Type': 'application/json' };
4154
+ switch (sub) {
4155
+ case 'complete': {
4156
+ const taskId = args[1];
4157
+ if (!taskId) {
4158
+ console.error('Usage: latinfo tasks complete <task-id>');
4159
+ process.exit(1);
4160
+ }
4161
+ const res = await fetch(`${API_URL}/team/tasks/${taskId}`, {
4162
+ method: 'PATCH', headers,
4163
+ body: JSON.stringify({ action: 'complete' }),
4164
+ });
4165
+ const data = await res.json();
4166
+ if (!res.ok) {
4167
+ console.error(data.message || data.error);
4168
+ process.exit(1);
4169
+ }
4170
+ console.log(`Task #${taskId} marked as completed. Waiting for approval.`);
4171
+ break;
4172
+ }
4173
+ case 'rank': {
4174
+ const res = await fetch(`${API_URL}/team/rank`, { headers });
4175
+ const ranking = await res.json();
4176
+ if (!res.ok) {
4177
+ console.error('Failed to get ranking');
4178
+ process.exit(1);
4179
+ }
4180
+ if (ranking.length === 0) {
4181
+ console.log('No team members yet.');
4182
+ return;
4183
+ }
4184
+ const badges = { Bronze: '●', Silver: '◆', Gold: '★', Diamond: '◈' };
4185
+ console.log('\n RANKING\n');
4186
+ for (const r of ranking) {
4187
+ console.log(` #${r.rank} ${r.username.padEnd(22)} ${String(r.points).padStart(4)} pts ${badges[r.level] || ''} ${r.level}`);
4188
+ }
4189
+ console.log();
4190
+ break;
4191
+ }
4192
+ case 'list': {
4193
+ const allFlag = args.includes('--all');
4194
+ const url = allFlag ? `${API_URL}/team/tasks?all=true` : `${API_URL}/team/tasks`;
4195
+ const reqHeaders = allFlag
4196
+ ? { ...headers, 'X-Admin-Secret': requireAdmin() }
4197
+ : headers;
4198
+ const res = await fetch(url, { headers: reqHeaders });
4199
+ const tasksList = await res.json();
4200
+ if (!res.ok) {
4201
+ console.error('Failed to list tasks');
4202
+ process.exit(1);
4203
+ }
4204
+ if (tasksList.length === 0) {
4205
+ console.log('No tasks.');
4206
+ return;
4207
+ }
4208
+ const icons = { pending: '○', completed: '◉', approved: '★' };
4209
+ for (const t of tasksList) {
4210
+ const assignee = allFlag ? ` @${t.assignee_username}` : '';
4211
+ const rejection = t.status === 'pending' && t.reject_reason ? ` (rejected: ${t.reject_reason})` : '';
4212
+ console.log(` [#${t.id}] ${icons[t.status] || t.status} ${t.title}${assignee} (${t.points} pts)${rejection}`);
4213
+ }
4214
+ break;
4215
+ }
4216
+ default: {
4217
+ // No subcommand = show my tasks
4218
+ const res = await fetch(`${API_URL}/team/tasks`, { headers });
4219
+ const tasksList = await res.json();
4220
+ if (!res.ok) {
4221
+ console.error('Failed to list tasks');
4222
+ process.exit(1);
4223
+ }
4224
+ if (tasksList.length === 0) {
4225
+ console.log('No tasks assigned to you.');
4226
+ return;
4227
+ }
4228
+ const icons = { pending: '○', completed: '◉', approved: '★' };
4229
+ for (const t of tasksList) {
4230
+ const rejection = t.status === 'pending' && t.reject_reason ? ` (rejected: ${t.reject_reason})` : '';
4231
+ console.log(` [#${t.id}] ${icons[t.status] || t.status} ${t.title} (${t.points} pts)${rejection}`);
4232
+ }
4233
+ }
4234
+ }
4235
+ }
3177
4236
  // --- Main ---
3178
4237
  const [command, ...args] = rawArgs;
3179
4238
  const COUNTRIES = ['pe', 'co', 'br', 'mx', 'ar', 'cl', 'ec'];
@@ -3212,7 +4271,15 @@ else {
3212
4271
  case 'users':
3213
4272
  users().catch(e => { console.error(e); process.exit(1); });
3214
4273
  break;
4274
+ case 'plan':
4275
+ plan().catch(e => { console.error(e); process.exit(1); });
4276
+ break;
4277
+ case 'completion':
4278
+ completion();
4279
+ break;
4280
+ // Team-only commands
3215
4281
  case 'imports':
4282
+ requireTeam();
3216
4283
  if (args[0] === 'run')
3217
4284
  importsRun(args[1] || 'all').catch(e => { console.error(e); process.exit(1); });
3218
4285
  else if (args[0] === 'report')
@@ -3220,32 +4287,49 @@ else {
3220
4287
  else
3221
4288
  imports().catch(e => { console.error(e); process.exit(1); });
3222
4289
  break;
3223
- case 'plan':
3224
- plan().catch(e => { console.error(e); process.exit(1); });
3225
- break;
3226
4290
  case 'costs':
4291
+ requireTeamAdmin();
3227
4292
  (liveFlag ? costsLive() : Promise.resolve(costsSimulate(args[0], args[1], args[2]))).catch(e => { console.error(e); process.exit(1); });
3228
4293
  break;
3229
4294
  case 'bench':
4295
+ requireTeamAdmin();
3230
4296
  bench(args).catch(e => { console.error(e); process.exit(1); });
3231
4297
  break;
3232
4298
  case 'search-server':
4299
+ requireTeamAdmin();
3233
4300
  searchServerStatus().catch(e => { console.error(e); process.exit(1); });
3234
4301
  break;
4302
+ case 'team':
4303
+ requireTeamAdmin();
4304
+ teamCmd(args).catch(e => { console.error(e); process.exit(1); });
4305
+ break;
4306
+ case 'tasks':
4307
+ requireTeam();
4308
+ tasksCmd(args).catch(e => { console.error(e); process.exit(1); });
4309
+ break;
3235
4310
  case 'pipe':
4311
+ requireTeam();
3236
4312
  pipe(args).catch(e => { console.error(e); process.exit(1); });
3237
4313
  break;
3238
4314
  case 'admin':
4315
+ requireTeamAdmin();
3239
4316
  pipe(args).catch(e => { console.error(e); process.exit(1); });
3240
- break; // backward compat
4317
+ break;
3241
4318
  case 'easypipe':
3242
4319
  case 'ep':
4320
+ requireTeam();
3243
4321
  easypipe(args).catch(e => { console.error(e); process.exit(1); });
3244
4322
  break;
3245
- case 'completion':
3246
- completion();
4323
+ case 'report':
4324
+ requireTeamAdmin();
4325
+ report(args).catch(e => { console.error(e); process.exit(1); });
4326
+ break;
4327
+ case 'issues':
4328
+ requireTeamAdmin();
4329
+ issues().catch(e => { console.error(e); process.exit(1); });
3247
4330
  break;
3248
4331
  case 'docs':
4332
+ requireTeam();
3249
4333
  docs(args);
3250
4334
  break;
3251
4335
  case 'help':