prepia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +312 -0
- package/bin/prepia.mjs +119 -0
- package/package.json +53 -0
- package/skill/SKILL.md +148 -0
- package/skill/config.json +29 -0
- package/src/analytics/dashboard.mjs +84 -0
- package/src/analytics/tracker.mjs +131 -0
- package/src/api/middleware.mjs +219 -0
- package/src/api/routes.mjs +142 -0
- package/src/api/server.mjs +150 -0
- package/src/cache/disk-store.mjs +199 -0
- package/src/cache/manager.mjs +142 -0
- package/src/cache/memory-store.mjs +205 -0
- package/src/chain/dag.mjs +209 -0
- package/src/chain/executor.mjs +103 -0
- package/src/chain/scheduler.mjs +89 -0
- package/src/client/adapters.mjs +483 -0
- package/src/client/connector.mjs +391 -0
- package/src/client/index.mjs +483 -0
- package/src/client/websocket.mjs +353 -0
- package/src/core/context-packager.mjs +169 -0
- package/src/core/engine.mjs +338 -0
- package/src/core/event-bus.mjs +84 -0
- package/src/core/prepimshot.mjs +120 -0
- package/src/core/task-decomposer.mjs +158 -0
- package/src/edge/lite.mjs +90 -0
- package/src/guard/checker.mjs +123 -0
- package/src/guard/fact-checker.mjs +105 -0
- package/src/guard/hallucination.mjs +108 -0
- package/src/index.mjs +67 -0
- package/src/models/local-model.mjs +171 -0
- package/src/models/provider.mjs +192 -0
- package/src/models/router.mjs +156 -0
- package/src/morph/optimizer.mjs +142 -0
- package/src/network/p2p.mjs +146 -0
- package/src/persona/detector.mjs +118 -0
- package/src/plugins/loader.mjs +120 -0
- package/src/plugins/registry.mjs +164 -0
- package/src/plugins/sandbox.mjs +79 -0
- package/src/rate/limiter.mjs +145 -0
- package/src/rate/shield.mjs +150 -0
- package/src/script/executor.mjs +164 -0
- package/src/script/parser.mjs +134 -0
- package/src/security/privacy.mjs +108 -0
- package/src/security/sanitizer.mjs +133 -0
- package/src/shadow/daemon.mjs +128 -0
- package/src/stream/handler.mjs +204 -0
- package/src/tools/calculator.mjs +312 -0
- package/src/tools/file-ops.mjs +138 -0
- package/src/tools/http-client.mjs +127 -0
- package/src/tools/orchestrator.mjs +205 -0
- package/src/tools/web-scraper.mjs +159 -0
- package/src/tools/web-search.mjs +129 -0
- package/src/vault/knowledge-base.mjs +207 -0
- package/src/vault/pattern-learner.mjs +192 -0
- package/workflows/analyze.json +32 -0
- package/workflows/automate.json +32 -0
- package/workflows/research.json +37 -0
- package/workflows/summarize.json +32 -0
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prepia WebSocket Server
|
|
3
|
+
* Real-time streaming and bidirectional communication
|
|
4
|
+
* Built on top of Node.js built-in modules
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { EventEmitter } from 'events';
|
|
8
|
+
import crypto from 'crypto';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Simple WebSocket frame parser/builder (RFC 6455)
|
|
12
|
+
* No external dependencies needed
|
|
13
|
+
*/
|
|
14
|
+
class WebSocketFrame {
|
|
15
|
+
/**
|
|
16
|
+
* Parse a WebSocket frame from buffer
|
|
17
|
+
* @param {Buffer} buffer
|
|
18
|
+
* @returns {{ fin: boolean, opcode: number, payload: Buffer, bytesRead: number }|null}
|
|
19
|
+
*/
|
|
20
|
+
static parse(buffer) {
|
|
21
|
+
if (buffer.length < 2) return null;
|
|
22
|
+
|
|
23
|
+
const firstByte = buffer[0];
|
|
24
|
+
const secondByte = buffer[1];
|
|
25
|
+
const fin = (firstByte & 0x80) !== 0;
|
|
26
|
+
const opcode = firstByte & 0x0f;
|
|
27
|
+
const masked = (secondByte & 0x80) !== 0;
|
|
28
|
+
let payloadLength = secondByte & 0x7f;
|
|
29
|
+
let offset = 2;
|
|
30
|
+
|
|
31
|
+
if (payloadLength === 126) {
|
|
32
|
+
if (buffer.length < 4) return null;
|
|
33
|
+
payloadLength = buffer.readUInt16BE(2);
|
|
34
|
+
offset = 4;
|
|
35
|
+
} else if (payloadLength === 127) {
|
|
36
|
+
if (buffer.length < 10) return null;
|
|
37
|
+
payloadLength = Number(buffer.readBigUInt64BE(2));
|
|
38
|
+
offset = 10;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const maskLength = masked ? 4 : 0;
|
|
42
|
+
const totalLength = offset + maskLength + payloadLength;
|
|
43
|
+
if (buffer.length < totalLength) return null;
|
|
44
|
+
|
|
45
|
+
let payload = buffer.slice(offset + maskLength, totalLength);
|
|
46
|
+
if (masked) {
|
|
47
|
+
const mask = buffer.slice(offset, offset + 4);
|
|
48
|
+
for (let i = 0; i < payload.length; i++) {
|
|
49
|
+
payload[i] ^= mask[i % 4];
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return { fin, opcode, payload, bytesRead: totalLength };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Build a WebSocket frame
|
|
58
|
+
* @param {Buffer|string} data
|
|
59
|
+
* @param {number} [opcode=1] - 1=text, 2=binary, 8=close, 9=ping, 10=pong
|
|
60
|
+
* @returns {Buffer}
|
|
61
|
+
*/
|
|
62
|
+
static build(data, opcode = 1) {
|
|
63
|
+
const payload = Buffer.isBuffer(data) ? data : Buffer.from(data);
|
|
64
|
+
const mask = crypto.randomBytes(4);
|
|
65
|
+
let header;
|
|
66
|
+
|
|
67
|
+
if (payload.length < 126) {
|
|
68
|
+
header = Buffer.alloc(2);
|
|
69
|
+
header[0] = 0x80 | opcode;
|
|
70
|
+
header[1] = 0x80 | payload.length;
|
|
71
|
+
} else if (payload.length < 65536) {
|
|
72
|
+
header = Buffer.alloc(4);
|
|
73
|
+
header[0] = 0x80 | opcode;
|
|
74
|
+
header[1] = 0x80 | 126;
|
|
75
|
+
header.writeUInt16BE(payload.length, 2);
|
|
76
|
+
} else {
|
|
77
|
+
header = Buffer.alloc(10);
|
|
78
|
+
header[0] = 0x80 | opcode;
|
|
79
|
+
header[1] = 0x80 | 127;
|
|
80
|
+
header.writeBigUInt64BE(BigInt(payload.length), 2);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Mask the payload (server -> client doesn't need masking per spec, but we keep it simple)
|
|
84
|
+
const maskedPayload = Buffer.alloc(payload.length);
|
|
85
|
+
for (let i = 0; i < payload.length; i++) {
|
|
86
|
+
maskedPayload[i] = payload[i] ^ mask[i % 4];
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return Buffer.concat([header, mask, maskedPayload]);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Individual WebSocket connection
|
|
95
|
+
*/
|
|
96
|
+
export class WebSocketConnection extends EventEmitter {
|
|
97
|
+
/**
|
|
98
|
+
* @param {import('net').Socket} socket
|
|
99
|
+
* @param {string} id
|
|
100
|
+
*/
|
|
101
|
+
constructor(socket, id) {
|
|
102
|
+
super();
|
|
103
|
+
this.socket = socket;
|
|
104
|
+
this.id = id;
|
|
105
|
+
this.alive = true;
|
|
106
|
+
this.buffer = Buffer.alloc(0);
|
|
107
|
+
this.metadata = {};
|
|
108
|
+
|
|
109
|
+
this.socket.on('data', (chunk) => this._onData(chunk));
|
|
110
|
+
this.socket.on('close', () => {
|
|
111
|
+
this.alive = false;
|
|
112
|
+
clearInterval(this._pingInterval);
|
|
113
|
+
this.emit('close');
|
|
114
|
+
});
|
|
115
|
+
this.socket.on('end', () => {
|
|
116
|
+
// Client disconnected — close our side too
|
|
117
|
+
if (this.alive) {
|
|
118
|
+
this.alive = false;
|
|
119
|
+
clearInterval(this._pingInterval);
|
|
120
|
+
this.socket.end();
|
|
121
|
+
this.emit('close');
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
this.socket.on('error', (err) => {
|
|
125
|
+
if (this.alive) {
|
|
126
|
+
this.alive = false;
|
|
127
|
+
clearInterval(this._pingInterval);
|
|
128
|
+
this.emit('close');
|
|
129
|
+
}
|
|
130
|
+
this.emit('error', err);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
// Ping/pong keepalive
|
|
134
|
+
this._pingInterval = setInterval(() => {
|
|
135
|
+
if (this.alive) this.ping();
|
|
136
|
+
}, 30000);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
_onData(chunk) {
|
|
140
|
+
this.buffer = Buffer.concat([this.buffer, chunk]);
|
|
141
|
+
|
|
142
|
+
while (this.buffer.length > 0) {
|
|
143
|
+
const frame = WebSocketFrame.parse(this.buffer);
|
|
144
|
+
if (!frame) break;
|
|
145
|
+
|
|
146
|
+
this.buffer = this.buffer.slice(frame.bytesRead);
|
|
147
|
+
|
|
148
|
+
switch (frame.opcode) {
|
|
149
|
+
case 0x01: // Text frame
|
|
150
|
+
this.emit('message', frame.payload.toString());
|
|
151
|
+
break;
|
|
152
|
+
case 0x02: // Binary frame
|
|
153
|
+
this.emit('message', frame.payload);
|
|
154
|
+
break;
|
|
155
|
+
case 0x08: // Close
|
|
156
|
+
this.close();
|
|
157
|
+
break;
|
|
158
|
+
case 0x09: // Ping
|
|
159
|
+
this.pong();
|
|
160
|
+
break;
|
|
161
|
+
case 0x0a: // Pong
|
|
162
|
+
this.alive = true;
|
|
163
|
+
break;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Send data to the client
|
|
170
|
+
* @param {string|Buffer|Object} data
|
|
171
|
+
*/
|
|
172
|
+
send(data) {
|
|
173
|
+
if (!this.alive) return;
|
|
174
|
+
const payload = typeof data === 'object' && !Buffer.isBuffer(data) ? JSON.stringify(data) : data;
|
|
175
|
+
const frame = WebSocketFrame.build(payload, typeof payload === 'string' ? 0x01 : 0x02);
|
|
176
|
+
try {
|
|
177
|
+
this.socket.write(frame);
|
|
178
|
+
} catch {
|
|
179
|
+
this.alive = false;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Send a ping
|
|
185
|
+
*/
|
|
186
|
+
ping() {
|
|
187
|
+
if (!this.alive) return;
|
|
188
|
+
try {
|
|
189
|
+
const frame = WebSocketFrame.build(Buffer.alloc(0), 0x09);
|
|
190
|
+
this.socket.write(frame);
|
|
191
|
+
} catch {
|
|
192
|
+
this.alive = false;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Send a pong
|
|
198
|
+
*/
|
|
199
|
+
pong() {
|
|
200
|
+
if (!this.alive) return;
|
|
201
|
+
try {
|
|
202
|
+
const frame = WebSocketFrame.build(Buffer.alloc(0), 0x0a);
|
|
203
|
+
this.socket.write(frame);
|
|
204
|
+
} catch {
|
|
205
|
+
this.alive = false;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Close the connection
|
|
211
|
+
* @param {number} [code=1000]
|
|
212
|
+
* @param {string} [reason]
|
|
213
|
+
*/
|
|
214
|
+
close(code = 1000, reason = '') {
|
|
215
|
+
if (!this.alive) return;
|
|
216
|
+
this.alive = false;
|
|
217
|
+
clearInterval(this._pingInterval);
|
|
218
|
+
try {
|
|
219
|
+
const payload = Buffer.alloc(2 + Buffer.byteLength(reason));
|
|
220
|
+
payload.writeUInt16BE(code, 0);
|
|
221
|
+
Buffer.from(reason).copy(payload, 2);
|
|
222
|
+
const frame = WebSocketFrame.build(payload, 0x08);
|
|
223
|
+
this.socket.write(frame);
|
|
224
|
+
} catch { /* ignore */ }
|
|
225
|
+
this.socket.end();
|
|
226
|
+
this.emit('close');
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* WebSocket Server that upgrades HTTP connections
|
|
232
|
+
*/
|
|
233
|
+
export class WebSocketServer extends EventEmitter {
|
|
234
|
+
constructor() {
|
|
235
|
+
super();
|
|
236
|
+
/** @type {Map<string, WebSocketConnection>} */
|
|
237
|
+
this.clients = new Map();
|
|
238
|
+
this._clientCounter = 0;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Handle HTTP upgrade request
|
|
243
|
+
* @param {import('http').IncomingMessage} req
|
|
244
|
+
* @param {import('net').Socket} socket
|
|
245
|
+
* @param {Buffer} head
|
|
246
|
+
*/
|
|
247
|
+
handleUpgrade(req, socket, head) {
|
|
248
|
+
const key = req.headers['sec-websocket-key'];
|
|
249
|
+
if (!key) {
|
|
250
|
+
socket.destroy();
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const acceptKey = crypto
|
|
255
|
+
.createHash('sha1')
|
|
256
|
+
.update(key + '258EAFA5-E914-47DA-95CA-5AB5DC525AA5')
|
|
257
|
+
.digest('base64');
|
|
258
|
+
|
|
259
|
+
const response = [
|
|
260
|
+
'HTTP/1.1 101 Switching Protocols',
|
|
261
|
+
'Upgrade: websocket',
|
|
262
|
+
'Connection: Upgrade',
|
|
263
|
+
`Sec-WebSocket-Accept: ${acceptKey}`,
|
|
264
|
+
'',
|
|
265
|
+
'',
|
|
266
|
+
].join('\r\n');
|
|
267
|
+
|
|
268
|
+
socket.write(response);
|
|
269
|
+
|
|
270
|
+
const id = `ws-${++this._clientCounter}-${Date.now()}`;
|
|
271
|
+
const conn = new WebSocketConnection(socket, id);
|
|
272
|
+
this.clients.set(id, conn);
|
|
273
|
+
|
|
274
|
+
conn.on('close', () => {
|
|
275
|
+
this.clients.delete(id);
|
|
276
|
+
this.emit('disconnect', conn);
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
conn.on('message', (msg) => {
|
|
280
|
+
try {
|
|
281
|
+
const parsed = JSON.parse(msg.toString());
|
|
282
|
+
this.emit('message', conn, parsed);
|
|
283
|
+
if (parsed.event) {
|
|
284
|
+
this.emit(parsed.event, conn, parsed.data, parsed);
|
|
285
|
+
}
|
|
286
|
+
} catch {
|
|
287
|
+
this.emit('raw', conn, msg);
|
|
288
|
+
}
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
this.emit('connection', conn);
|
|
292
|
+
|
|
293
|
+
// Process any buffered data
|
|
294
|
+
if (head && head.length > 0) {
|
|
295
|
+
socket.emit('data', head);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Broadcast to all connected clients
|
|
301
|
+
* @param {string} event
|
|
302
|
+
* @param {*} data
|
|
303
|
+
* @param {string} [excludeId] - Exclude a specific client
|
|
304
|
+
*/
|
|
305
|
+
broadcast(event, data, excludeId) {
|
|
306
|
+
const msg = { event, data, timestamp: Date.now() };
|
|
307
|
+
for (const [id, conn] of this.clients) {
|
|
308
|
+
if (id !== excludeId && conn.alive) {
|
|
309
|
+
conn.send(msg);
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Send to a specific client
|
|
316
|
+
* @param {string} clientId
|
|
317
|
+
* @param {string} event
|
|
318
|
+
* @param {*} data
|
|
319
|
+
*/
|
|
320
|
+
sendTo(clientId, event, data) {
|
|
321
|
+
const conn = this.clients.get(clientId);
|
|
322
|
+
if (conn && conn.alive) {
|
|
323
|
+
conn.send({ event, data, timestamp: Date.now() });
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Get connected client count
|
|
329
|
+
* @returns {number}
|
|
330
|
+
*/
|
|
331
|
+
get clientCount() {
|
|
332
|
+
return this.clients.size;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Close all connections
|
|
337
|
+
*/
|
|
338
|
+
closeAll() {
|
|
339
|
+
for (const conn of this.clients.values()) {
|
|
340
|
+
try {
|
|
341
|
+
// Remove error listeners to prevent uncaught exceptions
|
|
342
|
+
conn.socket.removeAllListeners('error');
|
|
343
|
+
conn.socket.on('error', () => {});
|
|
344
|
+
conn.alive = false;
|
|
345
|
+
clearInterval(conn._pingInterval);
|
|
346
|
+
conn.socket.end();
|
|
347
|
+
} catch { /* ignore cleanup errors */ }
|
|
348
|
+
}
|
|
349
|
+
this.clients.clear();
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
export default WebSocketServer;
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Context packager - compresses and packages context for LLM.
|
|
3
|
+
* Deduplication, summarization, relevance scoring, and token estimation.
|
|
4
|
+
* @module core/context-packager
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Estimate token count (rough: 1 token ≈ 4 chars).
|
|
9
|
+
* @param {string} text
|
|
10
|
+
* @returns {number}
|
|
11
|
+
*/
|
|
12
|
+
export function estimateTokens(text) {
|
|
13
|
+
if (!text) return 0;
|
|
14
|
+
return Math.ceil(text.length / 4);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Package gathered context into minimal, focused form for LLM consumption.
|
|
19
|
+
* @param {Object} data - Gathered data from tools/sub-tasks
|
|
20
|
+
* @param {Object} [options]
|
|
21
|
+
* @param {number} [options.maxTokens=4000] - Max token budget
|
|
22
|
+
* @param {string} [options.query] - Original query for relevance scoring
|
|
23
|
+
* @returns {Object} Packaged context
|
|
24
|
+
*/
|
|
25
|
+
export function packageContext(data, options = {}) {
|
|
26
|
+
const { maxTokens = 4000, query = '' } = options;
|
|
27
|
+
|
|
28
|
+
if (!data) {
|
|
29
|
+
return { context: '', tokens: 0, items: 0, compressed: false };
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Collect all text items
|
|
33
|
+
let items = collectItems(data);
|
|
34
|
+
|
|
35
|
+
// Deduplicate
|
|
36
|
+
items = deduplicate(items);
|
|
37
|
+
|
|
38
|
+
// Score relevance if query provided
|
|
39
|
+
if (query) {
|
|
40
|
+
items = items.map(item => ({
|
|
41
|
+
...item,
|
|
42
|
+
relevance: scoreRelevance(item.text, query),
|
|
43
|
+
}));
|
|
44
|
+
items.sort((a, b) => b.relevance - a.relevance);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Fit within token budget
|
|
48
|
+
const result = [];
|
|
49
|
+
let totalTokens = 0;
|
|
50
|
+
|
|
51
|
+
for (const item of items) {
|
|
52
|
+
const itemTokens = estimateTokens(item.text);
|
|
53
|
+
if (totalTokens + itemTokens > maxTokens) {
|
|
54
|
+
// Try to fit a truncated version
|
|
55
|
+
const remaining = maxTokens - totalTokens;
|
|
56
|
+
if (remaining > 50) {
|
|
57
|
+
const truncated = item.text.substring(0, remaining * 4);
|
|
58
|
+
result.push({ ...item, text: truncated, truncated: true });
|
|
59
|
+
totalTokens += estimateTokens(truncated);
|
|
60
|
+
}
|
|
61
|
+
break;
|
|
62
|
+
}
|
|
63
|
+
result.push(item);
|
|
64
|
+
totalTokens += itemTokens;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Build final context string
|
|
68
|
+
const context = result.map(item => {
|
|
69
|
+
if (item.source) return `[${item.source}] ${item.text}`;
|
|
70
|
+
return item.text;
|
|
71
|
+
}).join('\n\n');
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
context,
|
|
75
|
+
tokens: estimateTokens(context),
|
|
76
|
+
items: result.length,
|
|
77
|
+
compressed: result.length < items.length,
|
|
78
|
+
totalAvailable: items.length,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Collect all text items from data.
|
|
84
|
+
* @param {*} data
|
|
85
|
+
* @returns {Object[]}
|
|
86
|
+
*/
|
|
87
|
+
function collectItems(data) {
|
|
88
|
+
const items = [];
|
|
89
|
+
|
|
90
|
+
if (typeof data === 'string') {
|
|
91
|
+
items.push({ text: data, source: '' });
|
|
92
|
+
return items;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (Array.isArray(data)) {
|
|
96
|
+
for (const item of data) {
|
|
97
|
+
items.push(...collectItems(item));
|
|
98
|
+
}
|
|
99
|
+
return items;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (typeof data === 'object' && data !== null) {
|
|
103
|
+
// Handle search results
|
|
104
|
+
if (data.results && Array.isArray(data.results)) {
|
|
105
|
+
for (const result of data.results) {
|
|
106
|
+
items.push({
|
|
107
|
+
text: `${result.title || ''} ${result.snippet || result.text || ''}`.trim(),
|
|
108
|
+
source: result.source || result.url || '',
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
return items;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Handle scraped content
|
|
115
|
+
if (data.text) {
|
|
116
|
+
items.push({ text: data.text, source: data.url || data.source || '' });
|
|
117
|
+
return items;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Generic object
|
|
121
|
+
for (const [key, value] of Object.entries(data)) {
|
|
122
|
+
if (typeof value === 'string' && value.length > 10) {
|
|
123
|
+
items.push({ text: `${key}: ${value}`, source: '' });
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return items;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Deduplicate text items.
|
|
133
|
+
* @param {Object[]} items
|
|
134
|
+
* @returns {Object[]}
|
|
135
|
+
*/
|
|
136
|
+
function deduplicate(items) {
|
|
137
|
+
const seen = new Set();
|
|
138
|
+
return items.filter(item => {
|
|
139
|
+
// Normalize for comparison
|
|
140
|
+
const normalized = item.text.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
141
|
+
if (normalized.length < 20) return true; // Don't dedup very short items
|
|
142
|
+
const hash = normalized.substring(0, 100); // Use first 100 chars as fingerprint
|
|
143
|
+
if (seen.has(hash)) return false;
|
|
144
|
+
seen.add(hash);
|
|
145
|
+
return true;
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Score relevance of text to a query.
|
|
151
|
+
* @param {string} text
|
|
152
|
+
* @param {string} query
|
|
153
|
+
* @returns {number} 0-1
|
|
154
|
+
*/
|
|
155
|
+
function scoreRelevance(text, query) {
|
|
156
|
+
const queryWords = query.toLowerCase().split(/\s+/).filter(w => w.length > 3);
|
|
157
|
+
const textLower = text.toLowerCase();
|
|
158
|
+
|
|
159
|
+
if (queryWords.length === 0) return 0.5;
|
|
160
|
+
|
|
161
|
+
let matches = 0;
|
|
162
|
+
for (const word of queryWords) {
|
|
163
|
+
if (textLower.includes(word)) matches++;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return matches / queryWords.length;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export default { packageContext, estimateTokens };
|