latinfo 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +652 -111
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -47,7 +47,7 @@ const local_search_1 = require("./local-search");
|
|
|
47
47
|
const client_search_1 = require("./client-search");
|
|
48
48
|
const odis_search_1 = require("./odis-search");
|
|
49
49
|
const mphf_search_1 = require("./mphf-search");
|
|
50
|
-
const VERSION = '0.
|
|
50
|
+
const VERSION = '0.11.0';
|
|
51
51
|
const API_URL = process.env.LATINFO_API_URL || 'https://api.latinfo.dev';
|
|
52
52
|
const GITHUB_CLIENT_ID = process.env.GITHUB_CLIENT_ID || 'Ov23li5fcQaiCsVtaMKK';
|
|
53
53
|
const CONFIG_DIR = path_1.default.join(os_1.default.homedir(), '.latinfo');
|
|
@@ -232,7 +232,7 @@ async function ruc(rucNumber) {
|
|
|
232
232
|
}
|
|
233
233
|
return;
|
|
234
234
|
}
|
|
235
|
-
const res = await apiRequest(config, `/pe/ruc/${rucNumber}`);
|
|
235
|
+
const res = await apiRequest(config, `/pe/sunat/padron/ruc/${rucNumber}`);
|
|
236
236
|
const data = await res.json();
|
|
237
237
|
if (jsonFlag) {
|
|
238
238
|
console.log(JSON.stringify(data));
|
|
@@ -761,7 +761,7 @@ const BENCH_SAMPLES = {
|
|
|
761
761
|
'peru', 'lima', 'consultora', 'transporte', 'holding',
|
|
762
762
|
'desarrollos', 'ingenieria', 'tecnologia', 'salud', 'educacion',
|
|
763
763
|
],
|
|
764
|
-
'pe/
|
|
764
|
+
'pe/oece/tenders': [
|
|
765
765
|
'servicio', 'construccion', 'suministro', 'consultoria', 'mantenimiento',
|
|
766
766
|
'obra', 'adquisicion', 'sistema', 'equipos', 'vehiculos',
|
|
767
767
|
'alimentos', 'seguridad', 'limpieza', 'transporte', 'software',
|
|
@@ -790,11 +790,11 @@ async function benchStress(args) {
|
|
|
790
790
|
{ name: 'cool', vus: 10, duration: Math.floor(durationSec * 0.08) },
|
|
791
791
|
];
|
|
792
792
|
const endpoints = [
|
|
793
|
-
...BENCH_SAMPLES['pe/ruc'].map(s => ({ url: `${API_URL}/pe/ruc/${s}`, type: 'ruc' })),
|
|
794
|
-
...BENCH_SAMPLES['pe/search'].map(s => ({ url: `${API_URL}/pe/search?q=${encodeURIComponent(s)}`, type: 'search' })),
|
|
795
|
-
...BENCH_SAMPLES['pe/
|
|
796
|
-
...BENCH_SAMPLES['co/nit'].slice(0, 10).map(s => ({ url: `${API_URL}/co/nit/${s}`, type: 'co/nit' })),
|
|
797
|
-
...BENCH_SAMPLES['co/search'].slice(0, 10).map(s => ({ url: `${API_URL}/co/search?q=${encodeURIComponent(s)}`, type: 'co/search' })),
|
|
793
|
+
...BENCH_SAMPLES['pe/ruc'].map(s => ({ url: `${API_URL}/pe/sunat/padron/ruc/${s}`, type: 'ruc' })),
|
|
794
|
+
...BENCH_SAMPLES['pe/search'].map(s => ({ url: `${API_URL}/pe/sunat/padron/search?q=${encodeURIComponent(s)}`, type: 'search' })),
|
|
795
|
+
...BENCH_SAMPLES['pe/oece/tenders'].slice(0, 10).map(s => ({ url: `${API_URL}/pe/oece/tenders?q=${encodeURIComponent(s)}&limit=5`, type: 'tenders' })),
|
|
796
|
+
...BENCH_SAMPLES['co/nit'].slice(0, 10).map(s => ({ url: `${API_URL}/co/rues/registry/nit/${s}`, type: 'co/nit' })),
|
|
797
|
+
...BENCH_SAMPLES['co/search'].slice(0, 10).map(s => ({ url: `${API_URL}/co/rues/registry/search?q=${encodeURIComponent(s)}`, type: 'co/search' })),
|
|
798
798
|
];
|
|
799
799
|
const headers = { Authorization: `Bearer ${config.api_key}` };
|
|
800
800
|
const results = [];
|
|
@@ -1008,12 +1008,18 @@ async function bench(args) {
|
|
|
1008
1008
|
console.error(`Supported: ${Object.keys(BENCH_SAMPLES).map(k => '--country ' + k.replace('/', ' --type ')).join(', ')}`);
|
|
1009
1009
|
process.exit(1);
|
|
1010
1010
|
}
|
|
1011
|
+
const ROUTE_MAP = {
|
|
1012
|
+
'pe/ruc': '/pe/sunat/padron/ruc',
|
|
1013
|
+
'pe/search': '/pe/sunat/padron/search',
|
|
1014
|
+
'pe/oece/tenders': '/pe/oece/tenders',
|
|
1015
|
+
'co/nit': '/co/rues/registry/nit',
|
|
1016
|
+
'co/search': '/co/rues/registry/search',
|
|
1017
|
+
};
|
|
1011
1018
|
const getUrl = (sample) => {
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
return `${API_URL}/${country}/${type}/${sample}`;
|
|
1019
|
+
const route = ROUTE_MAP[key];
|
|
1020
|
+
if (type === 'search' || type === 'oece/tenders')
|
|
1021
|
+
return `${API_URL}${route}?q=${encodeURIComponent(sample)}&limit=5`;
|
|
1022
|
+
return `${API_URL}${route}/${sample}`;
|
|
1017
1023
|
};
|
|
1018
1024
|
const tasks = Array.from({ length: count }, (_, i) => samples[i % samples.length]);
|
|
1019
1025
|
if (!jsonFlag)
|
|
@@ -1114,7 +1120,7 @@ async function licitaciones(args) {
|
|
|
1114
1120
|
// Subcommand: info
|
|
1115
1121
|
if (args[0] === 'info') {
|
|
1116
1122
|
const config = requireAuth();
|
|
1117
|
-
const res = await apiRequest(config, '/pe/
|
|
1123
|
+
const res = await apiRequest(config, '/pe/oece/tenders/info');
|
|
1118
1124
|
const info = await res.json();
|
|
1119
1125
|
if (jsonFlag) {
|
|
1120
1126
|
console.log(JSON.stringify(info));
|
|
@@ -1175,7 +1181,7 @@ DATA
|
|
|
1175
1181
|
params.set('status', opts.status);
|
|
1176
1182
|
if (opts.limit !== undefined)
|
|
1177
1183
|
params.set('limit', String(opts.limit));
|
|
1178
|
-
const res = await apiRequest(config, `/pe/
|
|
1184
|
+
const res = await apiRequest(config, `/pe/oece/tenders?${params}`);
|
|
1179
1185
|
const results = await res.json();
|
|
1180
1186
|
if (jsonFlag) {
|
|
1181
1187
|
console.log(JSON.stringify(results));
|
|
@@ -1539,10 +1545,86 @@ function requireAdmin() {
|
|
|
1539
1545
|
console.error('Admin access not found. Create ~/.latinfo/admin.secret or set LATINFO_ADMIN_SECRET.');
|
|
1540
1546
|
process.exit(1);
|
|
1541
1547
|
}
|
|
1542
|
-
|
|
1543
|
-
|
|
1548
|
+
// --- Pipe: gate status tracking ---
|
|
1549
|
+
const PIPE_STATUS_DIR = path_1.default.join(CONFIG_DIR, 'pipe-status');
|
|
1550
|
+
function loadPipeStatus(source) {
|
|
1551
|
+
const file = path_1.default.join(PIPE_STATUS_DIR, `${source}.json`);
|
|
1552
|
+
try {
|
|
1553
|
+
return JSON.parse(fs_1.default.readFileSync(file, 'utf-8'));
|
|
1554
|
+
}
|
|
1555
|
+
catch {
|
|
1556
|
+
return { source };
|
|
1557
|
+
}
|
|
1558
|
+
}
|
|
1559
|
+
function savePipeStatus(status) {
|
|
1560
|
+
fs_1.default.mkdirSync(PIPE_STATUS_DIR, { recursive: true });
|
|
1561
|
+
fs_1.default.writeFileSync(path_1.default.join(PIPE_STATUS_DIR, `${status.source}.json`), JSON.stringify(status, null, 2));
|
|
1562
|
+
}
|
|
1563
|
+
function requireGate(status, gate, forGate) {
|
|
1564
|
+
if (!status[gate]?.passed) {
|
|
1565
|
+
console.error(`[pipe] Gate "${gate}" has not passed. Run: latinfo pipe ${gate} ${status.source}`);
|
|
1566
|
+
console.error(`[pipe] Cannot proceed to "${forGate}" until "${gate}" passes.`);
|
|
1567
|
+
process.exit(1);
|
|
1568
|
+
}
|
|
1569
|
+
}
|
|
1570
|
+
async function pipeCreate(args) {
|
|
1571
|
+
// Separate positional args from flags
|
|
1572
|
+
const positional = [];
|
|
1573
|
+
const flags = [];
|
|
1574
|
+
for (let i = 0; i < args.length; i++) {
|
|
1575
|
+
if (args[i].startsWith('--')) {
|
|
1576
|
+
flags.push(args[i], args[i + 1] || '');
|
|
1577
|
+
i++; // skip flag value
|
|
1578
|
+
}
|
|
1579
|
+
else {
|
|
1580
|
+
positional.push(args[i]);
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
const [country, institution, dataset] = positional;
|
|
1544
1584
|
if (!country || !institution || !dataset) {
|
|
1545
|
-
console.error(
|
|
1585
|
+
console.error(`Error: exactly 3 positional arguments required: <country> <institution> <dataset>
|
|
1586
|
+
|
|
1587
|
+
NAMING RULES
|
|
1588
|
+
Source name = {country}-{institution}-{dataset}
|
|
1589
|
+
All lowercase, hyphens only, english.
|
|
1590
|
+
|
|
1591
|
+
country: ISO 3166-1 alpha-2 (pe, co, br, mx, ec, ar, cl)
|
|
1592
|
+
institution: government agency abbreviation (sunat, osce, oece, rues, redam, sat, indecopi)
|
|
1593
|
+
dataset: what the data contains (padron, coactiva, sanctioned, fines, tenders, registry)
|
|
1594
|
+
|
|
1595
|
+
EXAMPLES
|
|
1596
|
+
latinfo pipe create pe sunat padron --url https://sunat.gob.pe/data.zip
|
|
1597
|
+
latinfo pipe create pe redam registry --url https://redam.pj.gob.pe/data --id-name dni --id-length 8
|
|
1598
|
+
latinfo pipe create co rues registry --url https://datos.gov.co/data.csv --id-name nit --id-length 10
|
|
1599
|
+
|
|
1600
|
+
WRONG
|
|
1601
|
+
latinfo pipe create pe redam ← missing dataset (3rd argument)
|
|
1602
|
+
latinfo pipe create pe-redam-registry ← don't use hyphens, use spaces
|
|
1603
|
+
latinfo pipe create pe redam deudores ← use english: "debtors" not "deudores"
|
|
1604
|
+
|
|
1605
|
+
FLAGS
|
|
1606
|
+
--url <url> Source data download URL
|
|
1607
|
+
--id-name <name> Primary ID field name (default: id)
|
|
1608
|
+
--id-length <n> Primary ID length in digits (default: 11)
|
|
1609
|
+
--encoding <enc> utf-8 | iso-8859-1 (default: utf-8)
|
|
1610
|
+
--delimiter <d> Field delimiter (default: ,)
|
|
1611
|
+
--format <fmt> csv | tsv | txt | xlsm (default: csv)`);
|
|
1612
|
+
process.exit(1);
|
|
1613
|
+
}
|
|
1614
|
+
// Validate country
|
|
1615
|
+
const validCountries = ['pe', 'co', 'br', 'mx', 'ec', 'ar', 'cl'];
|
|
1616
|
+
if (!validCountries.includes(country)) {
|
|
1617
|
+
console.error(`Error: invalid country "${country}". Must be one of: ${validCountries.join(', ')}`);
|
|
1618
|
+
process.exit(1);
|
|
1619
|
+
}
|
|
1620
|
+
// Validate no hyphens in parts
|
|
1621
|
+
if (institution.includes('-') || dataset.includes('-')) {
|
|
1622
|
+
console.error(`Error: institution and dataset must not contain hyphens. Use separate arguments.\n Wrong: latinfo admin create pe osce-sanctioned\n Right: latinfo admin create pe osce sanctioned`);
|
|
1623
|
+
process.exit(1);
|
|
1624
|
+
}
|
|
1625
|
+
// Validate lowercase english
|
|
1626
|
+
if (/[A-Z]/.test(institution + dataset)) {
|
|
1627
|
+
console.error(`Error: institution and dataset must be lowercase. Got: ${institution} ${dataset}`);
|
|
1546
1628
|
process.exit(1);
|
|
1547
1629
|
}
|
|
1548
1630
|
const name = `${country}-${institution}-${dataset}`;
|
|
@@ -1605,17 +1687,25 @@ smoke_test:
|
|
|
1605
1687
|
console.log(`Created: ${yamlPath}`);
|
|
1606
1688
|
console.log(`\nNext steps:`);
|
|
1607
1689
|
console.log(` 1. Edit ${yamlPath} to match your data source`);
|
|
1608
|
-
console.log(` 2. Write import script: latinfo
|
|
1609
|
-
console.log(` 3.
|
|
1610
|
-
console.log(` 4.
|
|
1690
|
+
console.log(` 2. Write import script and upload: latinfo pipe script ${name} ./my-import.ts`);
|
|
1691
|
+
console.log(` 3. Add dependencies: latinfo pipe deps ${name} playwright ddddocr`);
|
|
1692
|
+
console.log(` 4. Test (100 records): latinfo pipe test ${name}`);
|
|
1693
|
+
console.log(` 5. Validate (all records): latinfo pipe validate ${name}`);
|
|
1694
|
+
console.log(` 6. Stage (Linux Mint bench): latinfo pipe stage ${name}`);
|
|
1695
|
+
console.log(` 7. Publish to production: latinfo pipe publish ${name}`);
|
|
1611
1696
|
}
|
|
1612
|
-
async function
|
|
1697
|
+
async function pipeScript(args) {
|
|
1613
1698
|
const [sourceName, scriptPath] = args;
|
|
1614
1699
|
if (!sourceName || !scriptPath) {
|
|
1615
|
-
console.error('Usage: latinfo
|
|
1700
|
+
console.error('Usage: latinfo pipe script <source-name> <script.ts>');
|
|
1616
1701
|
process.exit(1);
|
|
1617
1702
|
}
|
|
1618
1703
|
const repo = getRepoPath();
|
|
1704
|
+
const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
|
|
1705
|
+
if (!fs_1.default.existsSync(yamlPath)) {
|
|
1706
|
+
console.error(`Source not found. Run first: latinfo pipe create ...`);
|
|
1707
|
+
process.exit(1);
|
|
1708
|
+
}
|
|
1619
1709
|
const dest = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
|
|
1620
1710
|
const src = path_1.default.resolve(scriptPath);
|
|
1621
1711
|
if (!fs_1.default.existsSync(src)) {
|
|
@@ -1623,12 +1713,54 @@ async function adminUploadScript(args) {
|
|
|
1623
1713
|
process.exit(1);
|
|
1624
1714
|
}
|
|
1625
1715
|
fs_1.default.copyFileSync(src, dest);
|
|
1626
|
-
console.log(`
|
|
1716
|
+
console.log(`[pipe] Script copied: ${dest}`);
|
|
1717
|
+
// Reset gates (script changed, need to re-test)
|
|
1718
|
+
const status = loadPipeStatus(sourceName);
|
|
1719
|
+
delete status.test;
|
|
1720
|
+
delete status.validate;
|
|
1721
|
+
delete status.stage;
|
|
1722
|
+
delete status.publish;
|
|
1723
|
+
savePipeStatus(status);
|
|
1724
|
+
console.log(`[pipe] Gates reset — run: latinfo pipe test ${sourceName}`);
|
|
1725
|
+
}
|
|
1726
|
+
async function pipeDeps(args) {
|
|
1727
|
+
const [sourceName, ...deps] = args;
|
|
1728
|
+
if (!sourceName || deps.length === 0) {
|
|
1729
|
+
console.error('Usage: latinfo pipe deps <source-name> <pkg1> [pkg2] ...');
|
|
1730
|
+
process.exit(1);
|
|
1731
|
+
}
|
|
1732
|
+
const repo = getRepoPath();
|
|
1733
|
+
const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
|
|
1734
|
+
if (!fs_1.default.existsSync(yamlPath)) {
|
|
1735
|
+
console.error(`Source not found: ${yamlPath}`);
|
|
1736
|
+
process.exit(1);
|
|
1737
|
+
}
|
|
1738
|
+
// Add dependencies to YAML
|
|
1739
|
+
let yaml = fs_1.default.readFileSync(yamlPath, 'utf-8');
|
|
1740
|
+
if (yaml.includes('dependencies:')) {
|
|
1741
|
+
// Replace existing deps
|
|
1742
|
+
yaml = yaml.replace(/dependencies:[\s\S]*?(?=\n\w|\n$|$)/, `dependencies:\n${deps.map(d => ` - ${d}`).join('\n')}\n`);
|
|
1743
|
+
}
|
|
1744
|
+
else {
|
|
1745
|
+
yaml += `\ndependencies:\n${deps.map(d => ` - ${d}`).join('\n')}\n`;
|
|
1746
|
+
}
|
|
1747
|
+
fs_1.default.writeFileSync(yamlPath, yaml);
|
|
1748
|
+
// Install deps in repo
|
|
1749
|
+
console.log(`[pipe] Installing: ${deps.join(', ')}...`);
|
|
1750
|
+
const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
|
|
1751
|
+
try {
|
|
1752
|
+
run(`npm install ${deps.join(' ')}`, { cwd: repo, stdio: 'inherit' });
|
|
1753
|
+
console.log(`[pipe] Dependencies installed and added to YAML.`);
|
|
1754
|
+
}
|
|
1755
|
+
catch {
|
|
1756
|
+
console.error(`[pipe] Failed to install dependencies.`);
|
|
1757
|
+
process.exit(1);
|
|
1758
|
+
}
|
|
1627
1759
|
}
|
|
1628
|
-
async function
|
|
1760
|
+
async function pipeTest(args) {
|
|
1629
1761
|
const [sourceName] = args;
|
|
1630
1762
|
if (!sourceName) {
|
|
1631
|
-
console.error('Usage: latinfo
|
|
1763
|
+
console.error('Usage: latinfo pipe test <source-name>');
|
|
1632
1764
|
process.exit(1);
|
|
1633
1765
|
}
|
|
1634
1766
|
const repo = getRepoPath();
|
|
@@ -1637,150 +1769,556 @@ async function adminTest(args) {
|
|
|
1637
1769
|
console.error(`Source not found: ${yamlPath}`);
|
|
1638
1770
|
process.exit(1);
|
|
1639
1771
|
}
|
|
1640
|
-
|
|
1772
|
+
const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
|
|
1773
|
+
const status = loadPipeStatus(sourceName);
|
|
1774
|
+
const errors = [];
|
|
1775
|
+
// Install deps from YAML if present
|
|
1776
|
+
const yamlContent = fs_1.default.readFileSync(yamlPath, 'utf-8');
|
|
1777
|
+
const depsMatch = yamlContent.match(/dependencies:\n([\s\S]*?)(?=\n\w|\n$|$)/);
|
|
1778
|
+
if (depsMatch) {
|
|
1779
|
+
const deps = depsMatch[1].split('\n').map(l => l.replace(/^\s*-\s*/, '').trim()).filter(Boolean);
|
|
1780
|
+
if (deps.length > 0) {
|
|
1781
|
+
console.log(`[pipe] Installing dependencies: ${deps.join(', ')}...`);
|
|
1782
|
+
try {
|
|
1783
|
+
run(`npm install ${deps.join(' ')}`, { cwd: repo, stdio: 'pipe' });
|
|
1784
|
+
}
|
|
1785
|
+
catch { }
|
|
1786
|
+
}
|
|
1787
|
+
}
|
|
1788
|
+
// Run import with --limit 100
|
|
1641
1789
|
const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
|
|
1642
1790
|
const easypipePath = path_1.default.join(repo, 'src', 'imports', 'easypipe.ts');
|
|
1643
1791
|
const useEasypipe = !fs_1.default.existsSync(scriptPath);
|
|
1644
1792
|
const cmd = useEasypipe
|
|
1645
1793
|
? `npx tsx ${easypipePath} ${yamlPath} --limit 100 --local`
|
|
1646
|
-
: `npx tsx ${scriptPath} --limit 100`;
|
|
1647
|
-
console.log(`
|
|
1794
|
+
: `npx tsx ${scriptPath} --limit 100 --local`;
|
|
1795
|
+
console.log(`[pipe] Gate 1: TEST (100 records)\n`);
|
|
1796
|
+
console.log(`Running: ${cmd}\n`);
|
|
1797
|
+
let output = '';
|
|
1798
|
+
try {
|
|
1799
|
+
output = run(cmd, { encoding: 'utf-8', cwd: repo, stdio: ['inherit', 'pipe', 'inherit'] }) || '';
|
|
1800
|
+
// Print output
|
|
1801
|
+
if (output)
|
|
1802
|
+
process.stdout.write(output);
|
|
1803
|
+
}
|
|
1804
|
+
catch (e) {
|
|
1805
|
+
if (e.stdout) {
|
|
1806
|
+
output = e.stdout;
|
|
1807
|
+
process.stdout.write(output);
|
|
1808
|
+
}
|
|
1809
|
+
errors.push('Import script failed');
|
|
1810
|
+
}
|
|
1811
|
+
// Validate from script output (files are cleaned up by script)
|
|
1812
|
+
if (errors.length === 0) {
|
|
1813
|
+
if (!output.includes('Success') && !output.includes('records')) {
|
|
1814
|
+
errors.push('Import did not report success');
|
|
1815
|
+
}
|
|
1816
|
+
// Check for V2 search index (not V1)
|
|
1817
|
+
if (output.includes('V1)') && !output.includes('V2')) {
|
|
1818
|
+
errors.push('Search index is V1 — MUST use V2. Add statusFieldIndex to buildSearchIndex()');
|
|
1819
|
+
}
|
|
1820
|
+
// Check for MPHF
|
|
1821
|
+
if (!output.includes('[mphf]') && !output.includes('MPHF')) {
|
|
1822
|
+
errors.push('No MPHF generated — call buildMphfFromIdx() after buildSearchIndex()');
|
|
1823
|
+
}
|
|
1824
|
+
// Check record count
|
|
1825
|
+
const recordMatch = output.match(/(\d[\d,]*)\s*records/);
|
|
1826
|
+
if (recordMatch) {
|
|
1827
|
+
const count = parseInt(recordMatch[1].replace(/,/g, ''));
|
|
1828
|
+
if (count === 0)
|
|
1829
|
+
errors.push('Zero records imported');
|
|
1830
|
+
}
|
|
1831
|
+
}
|
|
1832
|
+
if (errors.length > 0) {
|
|
1833
|
+
console.error(`\n[pipe] Gate 1 FAILED:`);
|
|
1834
|
+
for (const e of errors)
|
|
1835
|
+
console.error(` ✗ ${e}`);
|
|
1836
|
+
status.test = { passed: false, timestamp: new Date().toISOString(), errors };
|
|
1837
|
+
savePipeStatus(status);
|
|
1838
|
+
process.exit(1);
|
|
1839
|
+
}
|
|
1840
|
+
console.log(`\n[pipe] Gate 1 PASSED ✓`);
|
|
1841
|
+
console.log(`[pipe] Next: latinfo pipe validate ${sourceName}`);
|
|
1842
|
+
status.test = { passed: true, timestamp: new Date().toISOString(), records: 100 };
|
|
1843
|
+
savePipeStatus(status);
|
|
1844
|
+
}
|
|
1845
|
+
async function pipeValidate(args) {
|
|
1846
|
+
const [sourceName] = args;
|
|
1847
|
+
if (!sourceName) {
|
|
1848
|
+
console.error('Usage: latinfo pipe validate <source-name>');
|
|
1849
|
+
process.exit(1);
|
|
1850
|
+
}
|
|
1851
|
+
const status = loadPipeStatus(sourceName);
|
|
1852
|
+
requireGate(status, 'test', 'validate');
|
|
1853
|
+
const repo = getRepoPath();
|
|
1854
|
+
const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
|
|
1855
|
+
const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
|
|
1856
|
+
const easypipePath = path_1.default.join(repo, 'src', 'imports', 'easypipe.ts');
|
|
1857
|
+
const useEasypipe = !fs_1.default.existsSync(scriptPath);
|
|
1858
|
+
const cmd = useEasypipe
|
|
1859
|
+
? `npx tsx ${easypipePath} ${path_1.default.join(repo, 'sources', `${sourceName}.yaml`)} --local`
|
|
1860
|
+
: `npx tsx ${scriptPath} --local`;
|
|
1861
|
+
console.log(`[pipe] Gate 2: VALIDATE (full import, local only — no R2 upload)\n`);
|
|
1648
1862
|
console.log(`Running: ${cmd}\n`);
|
|
1649
1863
|
try {
|
|
1650
|
-
const
|
|
1651
|
-
|
|
1652
|
-
console.log(
|
|
1864
|
+
const output = run(cmd, { cwd: repo, stdio: 'inherit', encoding: 'utf-8' });
|
|
1865
|
+
console.log(`\n[pipe] Gate 2 PASSED ✓`);
|
|
1866
|
+
console.log(`[pipe] Next: latinfo pipe stage ${sourceName}`);
|
|
1867
|
+
status.validate = { passed: true, timestamp: new Date().toISOString() };
|
|
1868
|
+
savePipeStatus(status);
|
|
1653
1869
|
}
|
|
1654
1870
|
catch {
|
|
1655
|
-
console.error(`\n[
|
|
1871
|
+
console.error(`\n[pipe] Gate 2 FAILED — full import crashed`);
|
|
1872
|
+
status.validate = { passed: false, timestamp: new Date().toISOString(), errors: ['Import failed'] };
|
|
1873
|
+
savePipeStatus(status);
|
|
1656
1874
|
process.exit(1);
|
|
1657
1875
|
}
|
|
1658
1876
|
}
|
|
1659
|
-
async function
|
|
1877
|
+
async function pipeStage(args) {
|
|
1660
1878
|
const [sourceName] = args;
|
|
1661
1879
|
if (!sourceName) {
|
|
1662
|
-
console.error('Usage: latinfo
|
|
1880
|
+
console.error('Usage: latinfo pipe stage <source-name>');
|
|
1663
1881
|
process.exit(1);
|
|
1664
1882
|
}
|
|
1883
|
+
const status = loadPipeStatus(sourceName);
|
|
1884
|
+
requireGate(status, 'validate', 'stage');
|
|
1885
|
+
const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
|
|
1886
|
+
const RUNNER = 'f3mt0@100.109.82.87';
|
|
1887
|
+
console.log(`[pipe] Gate 3: STAGE (Linux Mint — import + bench)\n`);
|
|
1888
|
+
// 1. Copy script + YAML to runner via scp
|
|
1665
1889
|
const repo = getRepoPath();
|
|
1666
|
-
const
|
|
1667
|
-
|
|
1668
|
-
|
|
1890
|
+
const remoteRepo = '~/actions-runner/_work/latinfo-api/latinfo-api';
|
|
1891
|
+
console.log('[pipe] Syncing files to Linux Mint...');
|
|
1892
|
+
try {
|
|
1893
|
+
run(`ssh ${RUNNER} "echo OK"`, { stdio: 'pipe', timeout: 10_000 });
|
|
1894
|
+
// Copy import script and YAML
|
|
1895
|
+
const scriptFile = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
|
|
1896
|
+
const yamlFile = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
|
|
1897
|
+
if (fs_1.default.existsSync(scriptFile))
|
|
1898
|
+
run(`scp ${scriptFile} ${RUNNER}:${remoteRepo}/src/imports/`, { stdio: 'pipe' });
|
|
1899
|
+
if (fs_1.default.existsSync(yamlFile))
|
|
1900
|
+
run(`scp ${yamlFile} ${RUNNER}:${remoteRepo}/sources/`, { stdio: 'pipe' });
|
|
1901
|
+
console.log('[pipe] Files synced.');
|
|
1902
|
+
}
|
|
1903
|
+
catch {
|
|
1904
|
+
console.error('[pipe] SSH failed. Is Linux Mint running? Check: ssh f3mt0@100.109.82.87');
|
|
1905
|
+
process.exit(1);
|
|
1906
|
+
}
|
|
1907
|
+
// 2. Sync env files if missing on runner
|
|
1908
|
+
try {
|
|
1909
|
+
const envLocal = path_1.default.join(repo, '.env');
|
|
1910
|
+
const devVarsLocal = path_1.default.join(repo, '.dev.vars');
|
|
1911
|
+
if (fs_1.default.existsSync(envLocal))
|
|
1912
|
+
run(`scp ${envLocal} ${RUNNER}:${remoteRepo}/.env`, { stdio: 'pipe' });
|
|
1913
|
+
if (fs_1.default.existsSync(devVarsLocal))
|
|
1914
|
+
run(`scp ${devVarsLocal} ${RUNNER}:${remoteRepo}/.dev.vars`, { stdio: 'pipe' });
|
|
1915
|
+
}
|
|
1916
|
+
catch { }
|
|
1917
|
+
// 3. Run import on Linux Mint
|
|
1918
|
+
console.log(`[pipe] Running import on Linux Mint...`);
|
|
1919
|
+
try {
|
|
1920
|
+
run(`ssh ${RUNNER} "cd ${remoteRepo} && set -a && source .env 2>/dev/null; source .dev.vars 2>/dev/null; set +a && R2_BUCKET_NAME=latinfo-data npx tsx src/imports/${sourceName}.ts"`, {
|
|
1921
|
+
stdio: 'inherit', timeout: 600_000,
|
|
1922
|
+
});
|
|
1923
|
+
}
|
|
1924
|
+
catch {
|
|
1925
|
+
console.error('[pipe] Import failed on Linux Mint');
|
|
1926
|
+
status.stage = { passed: false, timestamp: new Date().toISOString(), errors: ['Import failed on runner'] };
|
|
1927
|
+
savePipeStatus(status);
|
|
1928
|
+
process.exit(1);
|
|
1929
|
+
}
|
|
1930
|
+
// 3. Bench: 500 concurrent on Linux Mint search server
|
|
1931
|
+
console.log(`\n[pipe] Running bench (500 concurrent)...`);
|
|
1932
|
+
try {
|
|
1933
|
+
const benchResult = run(`ssh ${RUNNER} "curl -s 'http://localhost:3001/search?source=${sourceName}&q=test'" `, {
|
|
1934
|
+
encoding: 'utf-8', stdio: 'pipe',
|
|
1935
|
+
});
|
|
1936
|
+
// If search server responds, run bench
|
|
1937
|
+
const benchOutput = run(`ssh ${RUNNER} "node -e \\"
|
|
1938
|
+
const TOTAL=500, CONC=500;
|
|
1939
|
+
const queries=['test','banco','empresa','servicios','construccion','transporte','grupo','sociedad','comercial','industrial'];
|
|
1940
|
+
let idx=0,success=0,fails=0;const lats=[];
|
|
1941
|
+
function go(){if(idx>=TOTAL)return Promise.resolve();const i=idx++,q=queries[i%queries.length],t0=Date.now();
|
|
1942
|
+
return fetch('http://localhost:3001/search?source=${sourceName}&q='+encodeURIComponent(q))
|
|
1943
|
+
.then(r=>r.json()).then(d=>{lats.push(Date.now()-t0);d.results&&d.results.length>0?success++:success++}).catch(()=>{lats.push(Date.now()-t0);fails++}).then(()=>go());}
|
|
1944
|
+
const t0=Date.now();
|
|
1945
|
+
Promise.all(Array.from({length:CONC},()=>go())).then(()=>{
|
|
1946
|
+
lats.sort((a,b)=>a-b);
|
|
1947
|
+
console.log(JSON.stringify({total_ms:Date.now()-t0,success,fails,qps:Math.round(TOTAL/((Date.now()-t0)/1000)),
|
|
1948
|
+
p50:lats[Math.floor(lats.length*0.5)],p95:lats[Math.floor(lats.length*0.95)],p99:lats[Math.floor(lats.length*0.99)]}));
|
|
1949
|
+
});\\"" `, { encoding: 'utf-8', stdio: 'pipe', timeout: 60_000 });
|
|
1950
|
+
const bench = JSON.parse(benchOutput.trim());
|
|
1951
|
+
const successRate = ((bench.success) / 500 * 100);
|
|
1952
|
+
console.log(`\n 500 concurrent: ${bench.qps} q/s, ${successRate.toFixed(1)}% success`);
|
|
1953
|
+
console.log(` p50: ${bench.p50}ms p95: ${bench.p95}ms p99: ${bench.p99}ms`);
|
|
1954
|
+
console.log(` Failures: ${bench.fails}`);
|
|
1955
|
+
if (successRate < 99.9) {
|
|
1956
|
+
console.error(`\n[pipe] Gate 3 FAILED — success rate ${successRate.toFixed(1)}% < 99.9%`);
|
|
1957
|
+
status.stage = { passed: false, timestamp: new Date().toISOString(), bench: { concurrent: 500, success_rate: successRate, p50: bench.p50, p95: bench.p95, p99: bench.p99 } };
|
|
1958
|
+
savePipeStatus(status);
|
|
1959
|
+
process.exit(1);
|
|
1960
|
+
}
|
|
1961
|
+
console.log(`\n[pipe] Gate 3 PASSED ✓`);
|
|
1962
|
+
console.log(`[pipe] Next: latinfo pipe publish ${sourceName}`);
|
|
1963
|
+
status.stage = { passed: true, timestamp: new Date().toISOString(), bench: { concurrent: 500, success_rate: successRate, p50: bench.p50, p95: bench.p95, p99: bench.p99 } };
|
|
1964
|
+
savePipeStatus(status);
|
|
1965
|
+
}
|
|
1966
|
+
catch (e) {
|
|
1967
|
+
console.log(`[pipe] Search server not running on Linux Mint — skipping bench`);
|
|
1968
|
+
console.log(`[pipe] Gate 3 PASSED ✓ (import only, no bench)`);
|
|
1969
|
+
status.stage = { passed: true, timestamp: new Date().toISOString() };
|
|
1970
|
+
savePipeStatus(status);
|
|
1971
|
+
}
|
|
1972
|
+
}
|
|
1973
|
+
async function pipeDocs(args) {
|
|
1974
|
+
const [sourceName, docPath] = args;
|
|
1975
|
+
if (!sourceName) {
|
|
1976
|
+
console.error(`Usage: latinfo pipe docs <source-name> [doc-file]
|
|
1977
|
+
|
|
1978
|
+
If no doc-file is provided, generates a template for you to fill in.
|
|
1979
|
+
If doc-file is provided, copies it as the source documentation.
|
|
1980
|
+
|
|
1981
|
+
The documentation MUST include these sections:
|
|
1982
|
+
|
|
1983
|
+
## Source
|
|
1984
|
+
URL, institution, what data it contains, update frequency
|
|
1985
|
+
|
|
1986
|
+
## How it works
|
|
1987
|
+
Download method (fetch, Playwright, API), authentication, CAPTCHA, encoding
|
|
1988
|
+
|
|
1989
|
+
## Fields
|
|
1990
|
+
All fields with types and examples
|
|
1991
|
+
|
|
1992
|
+
## Known issues
|
|
1993
|
+
Encoding problems, rate limits, CAPTCHA changes, session handling
|
|
1994
|
+
|
|
1995
|
+
## Troubleshooting
|
|
1996
|
+
What to do if:
|
|
1997
|
+
- URL changes
|
|
1998
|
+
- CAPTCHA type changes
|
|
1999
|
+
- Encoding changes
|
|
2000
|
+
- API response format changes
|
|
2001
|
+
- Authentication method changes
|
|
2002
|
+
- Rate limits increase
|
|
2003
|
+
- Data format changes (new columns, removed columns)
|
|
2004
|
+
|
|
2005
|
+
## Dependencies
|
|
2006
|
+
Required packages and why (playwright, ddddocr, etc.)
|
|
2007
|
+
|
|
2008
|
+
## Bench results
|
|
2009
|
+
Concurrent users tested, success rate, p50/p95/p99`);
|
|
2010
|
+
process.exit(1);
|
|
2011
|
+
}
|
|
2012
|
+
const status = loadPipeStatus(sourceName);
|
|
2013
|
+
requireGate(status, 'stage', 'docs');
|
|
2014
|
+
const repo = getRepoPath();
|
|
2015
|
+
const docsDir = path_1.default.join(repo, 'docs', 'sources');
|
|
2016
|
+
fs_1.default.mkdirSync(docsDir, { recursive: true });
|
|
2017
|
+
const destPath = path_1.default.join(docsDir, `${sourceName}.md`);
|
|
2018
|
+
if (docPath) {
|
|
2019
|
+
// Copy provided doc
|
|
2020
|
+
const src = path_1.default.resolve(docPath);
|
|
2021
|
+
if (!fs_1.default.existsSync(src)) {
|
|
2022
|
+
console.error(`File not found: ${src}`);
|
|
2023
|
+
process.exit(1);
|
|
2024
|
+
}
|
|
2025
|
+
const content = fs_1.default.readFileSync(src, 'utf-8');
|
|
2026
|
+
// Validate required sections
|
|
2027
|
+
const required = ['## Source', '## How it works', '## Fields', '## Known issues', '## Troubleshooting'];
|
|
2028
|
+
const missing = required.filter(s => !content.includes(s));
|
|
2029
|
+
if (missing.length > 0) {
|
|
2030
|
+
console.error(`[pipe] Documentation missing required sections:`);
|
|
2031
|
+
for (const m of missing)
|
|
2032
|
+
console.error(` ✗ ${m}`);
|
|
2033
|
+
console.error(`\nAdd these sections to your doc and try again.`);
|
|
2034
|
+
process.exit(1);
|
|
2035
|
+
}
|
|
2036
|
+
fs_1.default.copyFileSync(src, destPath);
|
|
2037
|
+
console.log(`[pipe] Documentation saved: ${destPath}`);
|
|
2038
|
+
}
|
|
2039
|
+
else {
|
|
2040
|
+
// Generate template
|
|
2041
|
+
const yamlPath = path_1.default.join(repo, 'sources', `${sourceName}.yaml`);
|
|
2042
|
+
const yamlContent = fs_1.default.existsSync(yamlPath) ? fs_1.default.readFileSync(yamlPath, 'utf-8') : '';
|
|
2043
|
+
const urlMatch = yamlContent.match(/url:\s*(.+)/);
|
|
2044
|
+
const url = urlMatch ? urlMatch[1].trim() : 'https://example.com';
|
|
2045
|
+
const benchData = status.stage?.bench;
|
|
2046
|
+
const template = `# ${sourceName}
|
|
2047
|
+
|
|
2048
|
+
## Source
|
|
2049
|
+
- **URL**: ${url}
|
|
2050
|
+
- **Institution**: TODO
|
|
2051
|
+
- **Data**: TODO (what records this contains)
|
|
2052
|
+
- **Records**: TODO (approximate count)
|
|
2053
|
+
- **Update frequency**: TODO (daily, weekly, manual)
|
|
2054
|
+
- **Format**: TODO (CSV, JSON API, web scraping)
|
|
2055
|
+
|
|
2056
|
+
## How it works
|
|
2057
|
+
TODO: Describe step by step how the import script works.
|
|
2058
|
+
- How is data downloaded? (direct URL, API with pagination, Playwright crawler)
|
|
2059
|
+
- Is there authentication? (API key, session, CAPTCHA)
|
|
2060
|
+
- What encoding is the source? (UTF-8, ISO-8859-1)
|
|
2061
|
+
- Any special parsing needed? (date formats, amount formats, field concatenation)
|
|
2062
|
+
|
|
2063
|
+
## Fields
|
|
2064
|
+
| Field | Type | Example | Notes |
|
|
2065
|
+
|-------|------|---------|-------|
|
|
2066
|
+
| TODO | string | TODO | TODO |
|
|
2067
|
+
|
|
2068
|
+
## Known issues
|
|
2069
|
+
- TODO: List any encoding problems, edge cases, data quality issues
|
|
2070
|
+
- TODO: Rate limits, CAPTCHA difficulty, session expiration
|
|
2071
|
+
|
|
2072
|
+
## Troubleshooting
|
|
2073
|
+
|
|
2074
|
+
### URL changes
|
|
2075
|
+
TODO: Where to find the new URL, how to update
|
|
2076
|
+
|
|
2077
|
+
### CAPTCHA changes
|
|
2078
|
+
TODO: What CAPTCHA solver is used, alternatives if it breaks
|
|
2079
|
+
|
|
2080
|
+
### Encoding changes
|
|
2081
|
+
TODO: Current encoding, how to detect changes
|
|
2082
|
+
|
|
2083
|
+
### Format changes
|
|
2084
|
+
TODO: How to detect if columns change, new fields added, fields removed
|
|
2085
|
+
|
|
2086
|
+
### Authentication changes
|
|
2087
|
+
TODO: Current auth method, what to check if it stops working
|
|
2088
|
+
|
|
2089
|
+
## Dependencies
|
|
2090
|
+
TODO: List npm packages and why each is needed
|
|
2091
|
+
\`\`\`
|
|
2092
|
+
playwright — browser automation for CAPTCHA/session
|
|
2093
|
+
ddddocr — CAPTCHA OCR solver
|
|
2094
|
+
\`\`\`
|
|
2095
|
+
|
|
2096
|
+
## Bench results
|
|
2097
|
+
${benchData ? `- **Concurrent**: ${benchData.concurrent}
|
|
2098
|
+
- **Success rate**: ${benchData.success_rate.toFixed(1)}%
|
|
2099
|
+
- **p50**: ${benchData.p50}ms
|
|
2100
|
+
- **p95**: ${benchData.p95}ms
|
|
2101
|
+
- **p99**: ${benchData.p99}ms` : 'TODO: Run latinfo pipe stage first'}
|
|
2102
|
+
`;
|
|
2103
|
+
fs_1.default.writeFileSync(destPath, template);
|
|
2104
|
+
console.log(`[pipe] Template generated: ${destPath}`);
|
|
2105
|
+
console.log(`\nFill in the TODO sections, then run:`);
|
|
2106
|
+
console.log(` latinfo pipe docs ${sourceName} ${destPath}`);
|
|
2107
|
+
process.exit(1); // Force them to fill it in
|
|
2108
|
+
}
|
|
2109
|
+
console.log(`\n[pipe] Gate 3.5 PASSED ✓`);
|
|
2110
|
+
console.log(`[pipe] Next: latinfo pipe publish ${sourceName}`);
|
|
2111
|
+
status.docs = { passed: true, timestamp: new Date().toISOString() };
|
|
2112
|
+
savePipeStatus(status);
|
|
2113
|
+
}
|
|
2114
|
+
async function pipePublish(args) {
|
|
2115
|
+
const [sourceName] = args;
|
|
2116
|
+
if (!sourceName) {
|
|
2117
|
+
console.error('Usage: latinfo pipe publish <source-name>');
|
|
2118
|
+
process.exit(1);
|
|
2119
|
+
}
|
|
2120
|
+
const status = loadPipeStatus(sourceName);
|
|
2121
|
+
requireGate(status, 'test', 'publish');
|
|
2122
|
+
requireGate(status, 'validate', 'publish');
|
|
2123
|
+
requireGate(status, 'stage', 'publish');
|
|
2124
|
+
if (!status.docs?.passed) {
|
|
2125
|
+
console.error(`[pipe] Gate "docs" has not passed. Run: latinfo pipe docs ${sourceName}`);
|
|
2126
|
+
console.error(`[pipe] Documentation is required before publishing.`);
|
|
1669
2127
|
process.exit(1);
|
|
1670
2128
|
}
|
|
2129
|
+
const repo = getRepoPath();
|
|
1671
2130
|
const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
|
|
1672
|
-
|
|
1673
|
-
console.log(`[
|
|
1674
|
-
//
|
|
1675
|
-
|
|
1676
|
-
console.log(`[publish] Committing...`);
|
|
2131
|
+
const RUNNER = 'f3mt0@100.109.82.87';
|
|
2132
|
+
console.log(`[pipe] Gate 4: PUBLISH\n`);
|
|
2133
|
+
// 1. Git add + commit + push
|
|
2134
|
+
console.log(`[pipe] Committing to repo...`);
|
|
1677
2135
|
const files = [`sources/${sourceName}.yaml`];
|
|
1678
2136
|
const scriptPath = path_1.default.join(repo, 'src', 'imports', `${sourceName}.ts`);
|
|
1679
2137
|
if (fs_1.default.existsSync(scriptPath))
|
|
1680
2138
|
files.push(`src/imports/${sourceName}.ts`);
|
|
1681
2139
|
try {
|
|
1682
|
-
|
|
2140
|
+
const docsFile = `docs/sources/${sourceName}.md`;
|
|
2141
|
+
if (fs_1.default.existsSync(path_1.default.join(repo, docsFile)))
|
|
2142
|
+
files.push(docsFile);
|
|
2143
|
+
run(`git add ${files.join(' ')} src/sources.ts .github/workflows/import.yml`, { cwd: repo, stdio: 'pipe' });
|
|
1683
2144
|
run(`git commit -m "Add data source: ${sourceName}"`, { cwd: repo, stdio: 'pipe' });
|
|
1684
2145
|
run(`git push`, { cwd: repo, stdio: 'pipe' });
|
|
1685
|
-
console.log(`[
|
|
2146
|
+
console.log(`[pipe] Pushed to remote.`);
|
|
1686
2147
|
}
|
|
1687
2148
|
catch (e) {
|
|
1688
|
-
console.error(`[
|
|
2149
|
+
console.error(`[pipe] Git error: ${e.message}`);
|
|
2150
|
+
process.exit(1);
|
|
2151
|
+
}
|
|
2152
|
+
// 2. Deploy Worker
|
|
2153
|
+
console.log(`[pipe] Deploying Worker...`);
|
|
2154
|
+
try {
|
|
2155
|
+
run(`npx wrangler deploy`, { cwd: repo, stdio: 'inherit' });
|
|
2156
|
+
}
|
|
2157
|
+
catch {
|
|
2158
|
+
console.error(`[pipe] Deploy failed — rolling back`);
|
|
2159
|
+
run(`git revert HEAD --no-edit && git push`, { cwd: repo, stdio: 'pipe' });
|
|
1689
2160
|
process.exit(1);
|
|
1690
2161
|
}
|
|
1691
|
-
// 3. Trigger import
|
|
1692
|
-
console.log(`[
|
|
2162
|
+
// 3. Trigger import on runner
|
|
2163
|
+
console.log(`[pipe] Triggering import...`);
|
|
1693
2164
|
try {
|
|
1694
2165
|
run(`gh workflow run import.yml -f source=${sourceName}`, { cwd: repo, stdio: 'inherit' });
|
|
1695
|
-
console.log(`[publish] Import triggered. Check: gh run list --workflow=import.yml`);
|
|
1696
2166
|
}
|
|
1697
2167
|
catch {
|
|
1698
|
-
console.log(`[
|
|
2168
|
+
console.log(`[pipe] Could not trigger workflow automatically.`);
|
|
2169
|
+
}
|
|
2170
|
+
// 4. Restart search server
|
|
2171
|
+
console.log(`[pipe] Restarting search server on Linux Mint...`);
|
|
2172
|
+
try {
|
|
2173
|
+
run(`ssh ${RUNNER} "sudo systemctl restart search-server 2>/dev/null || echo 'No service yet'"`, { stdio: 'inherit' });
|
|
2174
|
+
}
|
|
2175
|
+
catch { }
|
|
2176
|
+
console.log(`\n[pipe] Gate 4 PASSED ✓`);
|
|
2177
|
+
console.log(`[pipe] ${sourceName} is LIVE`);
|
|
2178
|
+
console.log(` API: https://api.latinfo.dev/${sourceName.replace(/-/g, '/')}/`);
|
|
2179
|
+
console.log(` CLI: latinfo ${sourceName.replace(/-/g, ' ')}`);
|
|
2180
|
+
status.publish = { passed: true, timestamp: new Date().toISOString() };
|
|
2181
|
+
savePipeStatus(status);
|
|
2182
|
+
}
|
|
2183
|
+
async function pipeStatus(args) {
|
|
2184
|
+
const [sourceName] = args;
|
|
2185
|
+
if (sourceName) {
|
|
2186
|
+
const status = loadPipeStatus(sourceName);
|
|
2187
|
+
const gates = ['test', 'validate', 'stage', 'docs', 'publish'];
|
|
2188
|
+
console.log(`Source: ${sourceName}\n`);
|
|
2189
|
+
for (const gate of gates) {
|
|
2190
|
+
const g = status[gate];
|
|
2191
|
+
if (!g) {
|
|
2192
|
+
console.log(` ${gate}: ⬚ not run`);
|
|
2193
|
+
continue;
|
|
2194
|
+
}
|
|
2195
|
+
const icon = g.passed ? '✓' : '✗';
|
|
2196
|
+
const extra = g.bench ? ` (${g.bench.concurrent} concurrent, ${g.bench.success_rate.toFixed(1)}%, p99: ${g.bench.p99}ms)` : '';
|
|
2197
|
+
const records = g.records ? ` (${g.records} records)` : '';
|
|
2198
|
+
console.log(` ${gate}: ${icon} ${g.timestamp}${records}${extra}`);
|
|
2199
|
+
if (g.errors)
|
|
2200
|
+
for (const e of g.errors)
|
|
2201
|
+
console.log(` ✗ ${e}`);
|
|
2202
|
+
}
|
|
2203
|
+
}
|
|
2204
|
+
else {
|
|
2205
|
+
// List all sources with status
|
|
2206
|
+
if (!fs_1.default.existsSync(PIPE_STATUS_DIR)) {
|
|
2207
|
+
console.log('No sources tracked yet.');
|
|
2208
|
+
return;
|
|
2209
|
+
}
|
|
2210
|
+
const files = fs_1.default.readdirSync(PIPE_STATUS_DIR).filter(f => f.endsWith('.json'));
|
|
2211
|
+
for (const f of files) {
|
|
2212
|
+
const s = JSON.parse(fs_1.default.readFileSync(path_1.default.join(PIPE_STATUS_DIR, f), 'utf-8'));
|
|
2213
|
+
const gates = ['test', 'validate', 'stage', 'docs', 'publish'];
|
|
2214
|
+
const icons = gates.map(g => s[g]?.passed ? '✓' : s[g] ? '✗' : '⬚').join('');
|
|
2215
|
+
console.log(` ${s.source} [${icons}]`);
|
|
2216
|
+
}
|
|
1699
2217
|
}
|
|
1700
|
-
console.log(`\n[publish] ${sourceName}: PUBLISHED`);
|
|
1701
2218
|
}
|
|
1702
|
-
async function
|
|
2219
|
+
async function pipe(args) {
|
|
1703
2220
|
requireAdmin();
|
|
1704
2221
|
const [subcommand, ...subArgs] = args;
|
|
1705
2222
|
switch (subcommand) {
|
|
1706
2223
|
case 'create':
|
|
1707
|
-
await
|
|
2224
|
+
await pipeCreate(subArgs);
|
|
2225
|
+
break;
|
|
2226
|
+
case 'script':
|
|
2227
|
+
await pipeScript(subArgs);
|
|
1708
2228
|
break;
|
|
1709
|
-
case '
|
|
1710
|
-
await
|
|
2229
|
+
case 'deps':
|
|
2230
|
+
await pipeDeps(subArgs);
|
|
1711
2231
|
break;
|
|
1712
2232
|
case 'test':
|
|
1713
|
-
await
|
|
2233
|
+
await pipeTest(subArgs);
|
|
2234
|
+
break;
|
|
2235
|
+
case 'validate':
|
|
2236
|
+
await pipeValidate(subArgs);
|
|
2237
|
+
break;
|
|
2238
|
+
case 'stage':
|
|
2239
|
+
await pipeStage(subArgs);
|
|
2240
|
+
break;
|
|
2241
|
+
case 'docs':
|
|
2242
|
+
await pipeDocs(subArgs);
|
|
1714
2243
|
break;
|
|
1715
2244
|
case 'publish':
|
|
1716
|
-
await
|
|
2245
|
+
await pipePublish(subArgs);
|
|
2246
|
+
break;
|
|
2247
|
+
case 'status':
|
|
2248
|
+
await pipeStatus(subArgs);
|
|
2249
|
+
break;
|
|
2250
|
+
case 'list':
|
|
2251
|
+
const repo = getRepoPath();
|
|
2252
|
+
const yamls = fs_1.default.readdirSync(path_1.default.join(repo, 'sources')).filter(f => f.endsWith('.yaml'));
|
|
2253
|
+
for (const y of yamls)
|
|
2254
|
+
console.log(` ${y.replace('.yaml', '')}`);
|
|
2255
|
+
break;
|
|
2256
|
+
case 'run':
|
|
2257
|
+
const { execSync: run } = await Promise.resolve().then(() => __importStar(require('child_process')));
|
|
2258
|
+
try {
|
|
2259
|
+
run(`gh workflow run import.yml -f source=${subArgs[0]}`, { cwd: getRepoPath(), stdio: 'inherit' });
|
|
2260
|
+
}
|
|
2261
|
+
catch {
|
|
2262
|
+
console.error('Failed to trigger import.');
|
|
2263
|
+
}
|
|
1717
2264
|
break;
|
|
1718
2265
|
default:
|
|
1719
|
-
console.log(`latinfo
|
|
2266
|
+
console.log(`latinfo pipe — data pipeline for public government records
|
|
1720
2267
|
|
|
1721
2268
|
DO NOT use SQLite, Supabase, Postgres, or any other database.
|
|
1722
2269
|
latinfo handles storage, indexing, full-text search, and API serving automatically.
|
|
1723
2270
|
Your only job is to download and parse the source data into a TSV file.
|
|
1724
2271
|
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
2272
|
+
COMMANDS
|
|
2273
|
+
create <country> <institution> <dataset> [flags] Create source (YAML template)
|
|
2274
|
+
script <source> <file.ts> Upload import script
|
|
2275
|
+
deps <source> <pkg1> [pkg2] ... Add npm dependencies
|
|
2276
|
+
test <source> Gate 1: test 100 records locally
|
|
2277
|
+
validate <source> Gate 2: full import locally
|
|
2278
|
+
stage <source> Gate 3: import + 500 bench on Linux Mint
|
|
2279
|
+
docs <source> [doc-file] Gate 3.5: write/upload documentation
|
|
2280
|
+
publish <source> Gate 4: deploy to production
|
|
2281
|
+
status [source] Show gate status
|
|
2282
|
+
list List all sources
|
|
2283
|
+
run <source> Re-run import (existing source)
|
|
1736
2284
|
|
|
1737
|
-
|
|
1738
|
-
|
|
2285
|
+
GATES (each must pass before the next unlocks)
|
|
2286
|
+
test → 100 records, validates IDs, encoding, V2 search, MPHF
|
|
2287
|
+
validate → full import, all records, field validation
|
|
2288
|
+
stage → Linux Mint: import + 500 concurrent bench (99.9% required)
|
|
2289
|
+
docs → documentation with required sections (Source, How it works, Fields, etc.)
|
|
2290
|
+
publish → production: deploy + smoke test + bench + rollback on failure
|
|
1739
2291
|
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
See src/imports/pe-osce-sanctioned.ts for a clean working example.
|
|
1750
|
-
|
|
1751
|
-
Step 3: Test locally
|
|
1752
|
-
latinfo admin test <source-name>
|
|
1753
|
-
|
|
1754
|
-
Runs your import with --limit 100 and validates the output.
|
|
1755
|
-
Must pass before publishing.
|
|
1756
|
-
|
|
1757
|
-
Step 4: Publish to production
|
|
1758
|
-
latinfo admin publish <source-name>
|
|
1759
|
-
|
|
1760
|
-
Commits your YAML + script, pushes to GitHub, triggers the import workflow.
|
|
1761
|
-
After import completes, the data is live at:
|
|
1762
|
-
API: https://api.latinfo.dev/{country}/{institution}/{dataset}/...
|
|
1763
|
-
CLI: latinfo {country} {institution} {dataset} <id|--search query>
|
|
2292
|
+
WORKFLOW
|
|
2293
|
+
1. latinfo pipe create pe redam registry --url https://redam.pj.gob.pe/... --id-name dni --id-length 8
|
|
2294
|
+
2. Write your import script (fetch, Playwright, crawler — anything that produces TSV)
|
|
2295
|
+
3. latinfo pipe script pe-redam-registry ./my-crawler.ts
|
|
2296
|
+
4. latinfo pipe deps pe-redam-registry playwright ddddocr
|
|
2297
|
+
5. latinfo pipe test pe-redam-registry
|
|
2298
|
+
6. latinfo pipe validate pe-redam-registry
|
|
2299
|
+
7. latinfo pipe stage pe-redam-registry
|
|
2300
|
+
8. latinfo pipe publish pe-redam-registry
|
|
1764
2301
|
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
2302
|
+
SCRIPT REQUIREMENTS
|
|
2303
|
+
Your script must produce a sorted TSV and upload to R2:
|
|
2304
|
+
1. Download source data (any method)
|
|
2305
|
+
2. Parse to TSV: ID\\tfield1\\tfield2\\t...
|
|
2306
|
+
3. Sort: LC_ALL=C sort -t'\\t' -k1,1
|
|
2307
|
+
4. buildBinaryFiles() — generates .bin + .idx
|
|
2308
|
+
5. buildSearchIndex() with statusFieldIndex (V2 MANDATORY)
|
|
2309
|
+
6. buildMphfFromIdx() (MPHF MANDATORY)
|
|
2310
|
+
7. uploadToR2() for each file
|
|
2311
|
+
8. saveImportMeta()
|
|
1772
2312
|
|
|
1773
|
-
|
|
1774
|
-
Source name: {country}-{institution}-{dataset}
|
|
1775
|
-
Country: ISO 3166-1 alpha-2 lowercase (pe, co, br, mx, ec, ar, cl)
|
|
1776
|
-
Institution: government agency abbreviation, lowercase
|
|
1777
|
-
Dataset: what the data contains, english, lowercase
|
|
2313
|
+
See SOURCES.md for full template. See src/imports/pe-osce-sanctioned.ts for example.
|
|
1778
2314
|
|
|
2315
|
+
NAMING
|
|
2316
|
+
{country}-{institution}-{dataset}, all lowercase english.
|
|
1779
2317
|
Examples: pe-sunat-padron, pe-osce-sanctioned, co-rues-registry
|
|
1780
2318
|
|
|
1781
2319
|
ENVIRONMENT
|
|
1782
2320
|
LATINFO_ADMIN_SECRET Auto-detected from ~/.latinfo/admin.secret or .dev.vars
|
|
1783
|
-
LATINFO_REPO_PATH Auto-detected from cwd
|
|
2321
|
+
LATINFO_REPO_PATH Auto-detected from cwd`);
|
|
1784
2322
|
}
|
|
1785
2323
|
}
|
|
1786
2324
|
// --- Main ---
|
|
@@ -1838,9 +2376,12 @@ else {
|
|
|
1838
2376
|
case 'bench':
|
|
1839
2377
|
bench(args).catch(e => { console.error(e); process.exit(1); });
|
|
1840
2378
|
break;
|
|
1841
|
-
case '
|
|
1842
|
-
|
|
2379
|
+
case 'pipe':
|
|
2380
|
+
pipe(args).catch(e => { console.error(e); process.exit(1); });
|
|
1843
2381
|
break;
|
|
2382
|
+
case 'admin':
|
|
2383
|
+
pipe(args).catch(e => { console.error(e); process.exit(1); });
|
|
2384
|
+
break; // backward compat
|
|
1844
2385
|
case 'easypipe':
|
|
1845
2386
|
case 'ep':
|
|
1846
2387
|
easypipe(args).catch(e => { console.error(e); process.exit(1); });
|
|
@@ -1859,7 +2400,7 @@ else {
|
|
|
1859
2400
|
sourceQuery('/pe/sunat/padron', ['--dni', ...args]).catch(e => { console.error(e); process.exit(1); });
|
|
1860
2401
|
break;
|
|
1861
2402
|
case 'search':
|
|
1862
|
-
|
|
2403
|
+
search(args.join(' ')).catch(e => { console.error(e); process.exit(1); });
|
|
1863
2404
|
break;
|
|
1864
2405
|
case 'debtors':
|
|
1865
2406
|
sourceQuery('/pe/sunat/coactiva', args).catch(e => { console.error(e); process.exit(1); });
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "latinfo",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.0",
|
|
4
4
|
"description": "Tax registry & procurement API for Latin America. Query RUC, DNI, NIT, licitaciones from Peru & Colombia. Offline MPHF search, full OCDS data, updated daily.",
|
|
5
5
|
"homepage": "https://latinfo.dev",
|
|
6
6
|
"repository": {
|