latinfo 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Client-side search: server sends minimal posting list bytes,
3
+ * client does intersection + scoring, then fetches only the winning records.
4
+ *
5
+ * Flow:
6
+ * 1. GET /posting?q=... → compact IDs (8 bytes each, ~10-80KB total)
7
+ * 2. Client intersects + ranks by IDF locally
8
+ * 3. GET /records?ids=... → display data for top-20 (~5-10KB)
9
+ * Total: 2 requests, ~20-90KB transferred
10
+ */
11
+ interface ClientSearchOpts {
12
+ apiUrl: string;
13
+ apiKey: string;
14
+ country: string;
15
+ query: string;
16
+ json: boolean;
17
+ simLatency: number;
18
+ simBandwidth: number;
19
+ }
20
+ export declare function clientSearch(opts: ClientSearchOpts): Promise<void>;
21
+ export {};
@@ -0,0 +1,141 @@
1
+ "use strict";
2
+ /**
3
+ * Client-side search: server sends minimal posting list bytes,
4
+ * client does intersection + scoring, then fetches only the winning records.
5
+ *
6
+ * Flow:
7
+ * 1. GET /posting?q=... → compact IDs (8 bytes each, ~10-80KB total)
8
+ * 2. Client intersects + ranks by IDF locally
9
+ * 3. GET /records?ids=... → display data for top-20 (~5-10KB)
10
+ * Total: 2 requests, ~20-90KB transferred
11
+ */
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.clientSearch = clientSearch;
14
+ // --- Intersection on BigUint64Arrays ---
15
+ function decodeIds(b64) {
16
+ const binary = atob(b64);
17
+ const buf = new Uint8Array(binary.length);
18
+ for (let i = 0; i < binary.length; i++)
19
+ buf[i] = binary.charCodeAt(i);
20
+ const view = new DataView(buf.buffer);
21
+ const ids = [];
22
+ for (let i = 0; i < buf.length; i += 8)
23
+ ids.push(view.getBigUint64(i, true));
24
+ return ids;
25
+ }
26
+ function intersect(lists, limit) {
27
+ if (lists.length === 0)
28
+ return [];
29
+ if (lists.length === 1)
30
+ return lists[0].slice(0, limit);
31
+ lists.sort((a, b) => a.length - b.length);
32
+ let current = lists[0];
33
+ for (let li = 1; li < lists.length; li++) {
34
+ const other = lists[li];
35
+ const next = [];
36
+ let oi = 0;
37
+ for (let ci = 0; ci < current.length && next.length < limit; ci++) {
38
+ while (oi < other.length && other[oi] < current[ci])
39
+ oi++;
40
+ if (oi < other.length && other[oi] === current[ci])
41
+ next.push(current[ci]);
42
+ }
43
+ current = next;
44
+ if (current.length === 0)
45
+ break;
46
+ }
47
+ return current.slice(0, limit);
48
+ }
49
+ // --- IDF scoring (no record data needed) ---
50
+ function scoreByIdf(dfs, totalDocs) {
51
+ let s = 0;
52
+ for (const df of dfs) {
53
+ if (df > 0)
54
+ s += Math.log(totalDocs / df);
55
+ }
56
+ return s;
57
+ }
58
+ async function simulateDelay(bytes, simLatency, simBandwidth) {
59
+ let delay = simLatency;
60
+ if (simBandwidth > 0)
61
+ delay += (bytes / 1024) / simBandwidth * 1000;
62
+ if (delay > 0)
63
+ await new Promise(r => setTimeout(r, delay));
64
+ }
65
+ async function clientSearch(opts) {
66
+ const { apiUrl, apiKey, country, query, json, simLatency, simBandwidth } = opts;
67
+ const headers = { Authorization: `Bearer ${apiKey}` };
68
+ // --- Request 1: posting lists (compact IDs) ---
69
+ const t0 = performance.now();
70
+ const postingRes = await fetch(`${apiUrl}/${country}/posting?q=${encodeURIComponent(query)}&limit=50000`, { headers });
71
+ const postingBody = await postingRes.text();
72
+ const postingBytes = new TextEncoder().encode(postingBody).length;
73
+ if (!postingRes.ok) {
74
+ const err = JSON.parse(postingBody);
75
+ console.error(err.message || err.error);
76
+ process.exit(1);
77
+ }
78
+ await simulateDelay(postingBytes, simLatency, simBandwidth);
79
+ const tPosting = performance.now();
80
+ // --- Client computation: decode + intersect ---
81
+ const tCompute0 = performance.now();
82
+ const data = JSON.parse(postingBody);
83
+ const tokenPostings = [];
84
+ let totalEntries = 0;
85
+ for (const t of data.terms) {
86
+ const ids = decodeIds(t.data);
87
+ tokenPostings.push(ids);
88
+ totalEntries += ids.length;
89
+ }
90
+ const candidateIds = intersect(tokenPostings, 20);
91
+ const tCompute1 = performance.now();
92
+ if (candidateIds.length === 0) {
93
+ if (json)
94
+ console.log(JSON.stringify({ results: [], diagnostics: {} }));
95
+ else
96
+ console.log('No results found.');
97
+ return;
98
+ }
99
+ // --- Request 2: fetch records for winning IDs ---
100
+ const idStrings = candidateIds.map(id => id.toString().padStart(10, '0'));
101
+ const tRecords0 = performance.now();
102
+ const recordsRes = await fetch(`${apiUrl}/${country}/records?ids=${idStrings.join(',')}`, { headers });
103
+ const recordsBody = await recordsRes.text();
104
+ const recordsBytes = new TextEncoder().encode(recordsBody).length;
105
+ await simulateDelay(recordsBytes, simLatency, simBandwidth);
106
+ const tRecords1 = performance.now();
107
+ const records = JSON.parse(recordsBody);
108
+ const tTotal = performance.now();
109
+ // --- Display ---
110
+ if (json) {
111
+ console.log(JSON.stringify({
112
+ results: records,
113
+ diagnostics: {
114
+ postingMs: Math.round(tPosting - t0),
115
+ postingBytes,
116
+ computeMs: Math.round(tCompute1 - tCompute0),
117
+ recordsMs: Math.round(tRecords1 - tRecords0),
118
+ recordsBytes,
119
+ totalMs: Math.round(tTotal - t0),
120
+ totalBytes: postingBytes + recordsBytes,
121
+ terms: data.terms.length,
122
+ entries: totalEntries,
123
+ candidates: candidateIds.length,
124
+ },
125
+ }));
126
+ return;
127
+ }
128
+ for (const r of records) {
129
+ const vals = Object.values(r);
130
+ console.log(` ${vals[0]} ${vals[1]} [${vals[2] || ''}]`);
131
+ }
132
+ console.log(`\n${records.length} result(s)`);
133
+ console.log();
134
+ console.log(` Request 1 (posting lists): ${Math.round(tPosting - t0)}ms ${(postingBytes / 1024).toFixed(1)} KB (${totalEntries} IDs)`);
135
+ console.log(` Client compute: ${Math.round(tCompute1 - tCompute0)}ms (decode + intersect)`);
136
+ console.log(` Request 2 (records): ${Math.round(tRecords1 - tRecords0)}ms ${(recordsBytes / 1024).toFixed(1)} KB (${records.length} records)`);
137
+ console.log(` Total: ${Math.round(tTotal - t0)}ms ${((postingBytes + recordsBytes) / 1024).toFixed(1)} KB transferred`);
138
+ if (simLatency > 0 || simBandwidth > 0) {
139
+ console.log(` Simulation: +${simLatency}ms latency, ${simBandwidth || '∞'} KB/s`);
140
+ }
141
+ }