pplx-zero 2.2.2 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -1
- package/package.json +14 -4
- package/src/api.ts +22 -1
- package/src/env.ts +6 -6
- package/src/history.ts +30 -24
- package/src/index.ts +63 -1
- package/src/rag.ts +269 -0
package/README.md
CHANGED
|
@@ -32,6 +32,7 @@ Query [Perplexity AI](https://perplexity.ai) directly from your terminal. Respon
|
|
|
32
32
|
- **📝 Export** — Save research to markdown
|
|
33
33
|
- **🎨 Pretty** — Rendered markdown by default
|
|
34
34
|
- **🕐 History** — Browse and search past queries
|
|
35
|
+
- **🧠 Local RAG** — Index your own notes with `--ingest`
|
|
35
36
|
|
|
36
37
|
## Install
|
|
37
38
|
|
|
@@ -78,6 +79,11 @@ pplx --raw "explain monads"
|
|
|
78
79
|
|
|
79
80
|
# history
|
|
80
81
|
pplx --history
|
|
82
|
+
|
|
83
|
+
# local knowledge base
|
|
84
|
+
pplx --ingest notes.md # index a file
|
|
85
|
+
pplx --ingest ./docs/ # index a directory
|
|
86
|
+
pplx -l "my notes on rust" # search local knowledge
|
|
81
87
|
```
|
|
82
88
|
|
|
83
89
|
## Models
|
|
@@ -98,13 +104,15 @@ pplx --history
|
|
|
98
104
|
| `-i` | Attach image |
|
|
99
105
|
| `-o` | Output to file |
|
|
100
106
|
| `-c` | Continue conversation |
|
|
107
|
+
| `-l` | Search local knowledge |
|
|
108
|
+
| `--ingest` | Index files to local knowledge |
|
|
101
109
|
| `--raw` | Raw output (no markdown) |
|
|
102
110
|
| `--history` | View history |
|
|
103
111
|
| `--json` | JSON output |
|
|
104
112
|
|
|
105
113
|
## Philosophy
|
|
106
114
|
|
|
107
|
-
|
|
115
|
+
Minimal. 1 dependency. No frameworks.
|
|
108
116
|
|
|
109
117
|
---
|
|
110
118
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pplx-zero",
|
|
3
|
-
"version": "2.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "2.3.1",
|
|
4
|
+
"description": "Fast, minimal Perplexity AI CLI. Stream answers, analyze PDFs/images, continue conversations. Zero bloat.",
|
|
5
5
|
"author": "kenzo",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"type": "module",
|
|
@@ -26,7 +26,8 @@
|
|
|
26
26
|
"files": [
|
|
27
27
|
"src",
|
|
28
28
|
"bin",
|
|
29
|
-
"!src/**/*.test.ts"
|
|
29
|
+
"!src/**/*.test.ts",
|
|
30
|
+
"!**/AGENTS.md"
|
|
30
31
|
],
|
|
31
32
|
"repository": {
|
|
32
33
|
"type": "git",
|
|
@@ -42,6 +43,15 @@
|
|
|
42
43
|
"search",
|
|
43
44
|
"cli",
|
|
44
45
|
"terminal",
|
|
45
|
-
"bun"
|
|
46
|
+
"bun",
|
|
47
|
+
"llm",
|
|
48
|
+
"chatgpt",
|
|
49
|
+
"api",
|
|
50
|
+
"streaming",
|
|
51
|
+
"pdf",
|
|
52
|
+
"image",
|
|
53
|
+
"multimodal",
|
|
54
|
+
"research",
|
|
55
|
+
"assistant"
|
|
46
56
|
]
|
|
47
57
|
}
|
package/src/api.ts
CHANGED
|
@@ -90,6 +90,8 @@ export async function search(
|
|
|
90
90
|
let buffer = '';
|
|
91
91
|
let citations: SearchResult[] = [];
|
|
92
92
|
let usage = { prompt_tokens: 0, completion_tokens: 0 };
|
|
93
|
+
let parseErrors = 0;
|
|
94
|
+
let unexpectedLines = 0;
|
|
93
95
|
|
|
94
96
|
while (true) {
|
|
95
97
|
const { done, value } = await reader.read();
|
|
@@ -100,7 +102,18 @@ export async function search(
|
|
|
100
102
|
buffer = lines.pop() || '';
|
|
101
103
|
|
|
102
104
|
for (const line of lines) {
|
|
103
|
-
if (!line.
|
|
105
|
+
if (!line.trim()) continue;
|
|
106
|
+
|
|
107
|
+
if (!line.startsWith('data: ')) {
|
|
108
|
+
if (!line.startsWith(':')) {
|
|
109
|
+
unexpectedLines++;
|
|
110
|
+
if (unexpectedLines <= 3) {
|
|
111
|
+
console.error(`[pplx] Unexpected SSE line: ${line.slice(0, 100)}${line.length > 100 ? '...' : ''}`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
|
|
104
117
|
const data = line.slice(6).trim();
|
|
105
118
|
if (data === '[DONE]') continue;
|
|
106
119
|
|
|
@@ -118,10 +131,18 @@ export async function search(
|
|
|
118
131
|
usage = parsed.usage;
|
|
119
132
|
}
|
|
120
133
|
} catch {
|
|
134
|
+
parseErrors++;
|
|
135
|
+
if (parseErrors <= 3) {
|
|
136
|
+
console.error(`[pplx] Failed to parse JSON: ${data.slice(0, 100)}${data.length > 100 ? '...' : ''}`);
|
|
137
|
+
}
|
|
121
138
|
continue;
|
|
122
139
|
}
|
|
123
140
|
}
|
|
124
141
|
}
|
|
125
142
|
|
|
143
|
+
if (parseErrors > 0 || unexpectedLines > 0) {
|
|
144
|
+
console.error(`[pplx] Stream completed with ${parseErrors} parse errors, ${unexpectedLines} unexpected lines`);
|
|
145
|
+
}
|
|
146
|
+
|
|
126
147
|
callbacks.onDone(citations, usage);
|
|
127
148
|
}
|
package/src/env.ts
CHANGED
|
@@ -17,9 +17,9 @@ let _env: z.infer<typeof envSchema> | null = null;
|
|
|
17
17
|
export function getEnv() {
|
|
18
18
|
if (_env) return _env;
|
|
19
19
|
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
if (!
|
|
20
|
+
const apiKey = process.env.PERPLEXITY_API_KEY || process.env.PERPLEXITY_AI_API_KEY;
|
|
21
|
+
|
|
22
|
+
if (!apiKey) {
|
|
23
23
|
console.error(`
|
|
24
24
|
${c.red}✗ Missing API Key${c.reset}
|
|
25
25
|
|
|
@@ -27,11 +27,11 @@ Set your Perplexity API key:
|
|
|
27
27
|
|
|
28
28
|
${c.cyan}export PERPLEXITY_API_KEY="pplx-..."${c.reset}
|
|
29
29
|
|
|
30
|
-
${c.dim}Get one at: https://perplexity.ai/settings/api${c.reset}
|
|
30
|
+
${c.dim}Get one at: https://www.perplexity.ai/settings/api${c.reset}
|
|
31
31
|
`);
|
|
32
|
-
process.exit(
|
|
32
|
+
process.exit(1);
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
-
_env = { PERPLEXITY_API_KEY:
|
|
35
|
+
_env = { PERPLEXITY_API_KEY: apiKey };
|
|
36
36
|
return _env;
|
|
37
37
|
}
|
package/src/history.ts
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import { homedir } from 'node:os';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import { mkdir, appendFile } from 'node:fs/promises';
|
|
1
4
|
import type { Model } from './api';
|
|
2
5
|
|
|
3
6
|
export interface HistoryEntry {
|
|
@@ -8,33 +11,42 @@ export interface HistoryEntry {
|
|
|
8
11
|
citations?: string[];
|
|
9
12
|
}
|
|
10
13
|
|
|
11
|
-
const HISTORY_DIR =
|
|
12
|
-
const HISTORY_PATH =
|
|
13
|
-
const
|
|
14
|
+
const HISTORY_DIR = join(homedir(), '.pplx');
|
|
15
|
+
const HISTORY_PATH = join(HISTORY_DIR, 'history.jsonl');
|
|
16
|
+
const MAX_ENTRIES_TO_KEEP = 500;
|
|
17
|
+
const ROTATION_SIZE_THRESHOLD = 512 * 1024; // 512KB
|
|
14
18
|
const MAX_ANSWER_LENGTH = 2000;
|
|
15
19
|
|
|
20
|
+
let appendCount = 0;
|
|
21
|
+
|
|
16
22
|
async function ensureDir(): Promise<void> {
|
|
17
23
|
const dir = Bun.file(HISTORY_DIR);
|
|
18
24
|
if (!(await dir.exists())) {
|
|
19
|
-
await
|
|
25
|
+
await mkdir(HISTORY_DIR, { recursive: true });
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export async function maybeRotateHistory(): Promise<void> {
|
|
30
|
+
const file = Bun.file(HISTORY_PATH);
|
|
31
|
+
if (!(await file.exists()) || file.size < ROTATION_SIZE_THRESHOLD) return;
|
|
32
|
+
|
|
33
|
+
const text = await file.text();
|
|
34
|
+
const lines = text.trim().split('\n').filter(l => l.length > 0);
|
|
35
|
+
|
|
36
|
+
if (lines.length > MAX_ENTRIES_TO_KEEP) {
|
|
37
|
+
const keep = lines.slice(-MAX_ENTRIES_TO_KEEP).join('\n') + '\n';
|
|
38
|
+
await Bun.write(HISTORY_PATH, keep);
|
|
20
39
|
}
|
|
21
40
|
}
|
|
22
41
|
|
|
23
42
|
export async function appendHistory(entry: Omit<HistoryEntry, 'ts'>): Promise<void> {
|
|
24
43
|
await ensureDir();
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
if (exists) {
|
|
30
|
-
const text = await file.text();
|
|
31
|
-
const lines = text.trim().split('\n').filter(l => l.length > 0);
|
|
32
|
-
if (lines.length >= MAX_ENTRIES) {
|
|
33
|
-
const keep = lines.slice(-MAX_ENTRIES + 1).join('\n') + '\n';
|
|
34
|
-
await Bun.write(HISTORY_PATH, keep);
|
|
35
|
-
}
|
|
44
|
+
|
|
45
|
+
if (appendCount % 10 === 0) {
|
|
46
|
+
await maybeRotateHistory();
|
|
36
47
|
}
|
|
37
|
-
|
|
48
|
+
appendCount++;
|
|
49
|
+
|
|
38
50
|
const record: HistoryEntry = {
|
|
39
51
|
ts: Date.now(),
|
|
40
52
|
q: entry.q,
|
|
@@ -42,15 +54,9 @@ export async function appendHistory(entry: Omit<HistoryEntry, 'ts'>): Promise<vo
|
|
|
42
54
|
a: entry.a.slice(0, MAX_ANSWER_LENGTH),
|
|
43
55
|
...(entry.citations?.length ? { citations: entry.citations } : {}),
|
|
44
56
|
};
|
|
45
|
-
|
|
57
|
+
|
|
46
58
|
const line = JSON.stringify(record) + '\n';
|
|
47
|
-
|
|
48
|
-
if (exists) {
|
|
49
|
-
const current = await Bun.file(HISTORY_PATH).text();
|
|
50
|
-
await Bun.write(HISTORY_PATH, current + line);
|
|
51
|
-
} else {
|
|
52
|
-
await Bun.write(HISTORY_PATH, line);
|
|
53
|
-
}
|
|
59
|
+
await appendFile(HISTORY_PATH, line);
|
|
54
60
|
}
|
|
55
61
|
|
|
56
62
|
export async function readHistory(limit = 20): Promise<HistoryEntry[]> {
|
package/src/index.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { getEnv } from './env';
|
|
|
6
6
|
import { fmt, write, writeLn } from './output';
|
|
7
7
|
import { appendHistory, readHistory, getLastEntry } from './history';
|
|
8
8
|
import { renderMarkdown, createMarkdownState } from './markdown';
|
|
9
|
+
import { search as ragSearch, searchForRag, ingestDirectory, ingestFile, ingestPath, getDocCount, getKnowledgeDir } from './rag';
|
|
9
10
|
|
|
10
11
|
getEnv();
|
|
11
12
|
|
|
@@ -20,6 +21,8 @@ const { values, positionals } = parseArgs({
|
|
|
20
21
|
history: { type: 'boolean', default: false },
|
|
21
22
|
'no-history': { type: 'boolean', default: false },
|
|
22
23
|
continue: { type: 'boolean', short: 'c', default: false },
|
|
24
|
+
local: { type: 'boolean', short: 'l', default: false },
|
|
25
|
+
ingest: { type: 'boolean', default: false },
|
|
23
26
|
output: { type: 'string', short: 'o' },
|
|
24
27
|
raw: { type: 'boolean', default: false },
|
|
25
28
|
},
|
|
@@ -39,6 +42,8 @@ Options:
|
|
|
39
42
|
-i, --image <path> Attach an image (PNG, JPG, etc.)
|
|
40
43
|
-o, --output <path> Save output to file (.md, .txt)
|
|
41
44
|
-c, --continue Continue from last query (add context)
|
|
45
|
+
-l, --local RAG: search local docs, inject as context
|
|
46
|
+
--ingest [path] Index file/dir/glob (default: ~/.pplx/knowledge/)
|
|
42
47
|
--history Show query history
|
|
43
48
|
--no-history Don't save this query to history
|
|
44
49
|
--raw Raw output (no markdown rendering)
|
|
@@ -51,6 +56,8 @@ Examples:
|
|
|
51
56
|
pplx -f report.pdf "summarize this document"
|
|
52
57
|
pplx -c "tell me more about that"
|
|
53
58
|
pplx --history | grep "bun"
|
|
59
|
+
pplx --ingest
|
|
60
|
+
pplx -l "my notes on rust"
|
|
54
61
|
`);
|
|
55
62
|
process.exit(0);
|
|
56
63
|
}
|
|
@@ -67,7 +74,58 @@ if (values.history) {
|
|
|
67
74
|
process.exit(0);
|
|
68
75
|
}
|
|
69
76
|
|
|
70
|
-
if (
|
|
77
|
+
if (values.ingest) {
|
|
78
|
+
const target = positionals[0];
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
if (target) {
|
|
82
|
+
const stats = await ingestPath(target);
|
|
83
|
+
if (stats.added + stats.updated + stats.skipped === 0) {
|
|
84
|
+
console.log(`No files found matching: ${target}`);
|
|
85
|
+
} else {
|
|
86
|
+
console.log(`Done! Added: ${stats.added}, Updated: ${stats.updated}, Skipped: ${stats.skipped}`);
|
|
87
|
+
}
|
|
88
|
+
} else {
|
|
89
|
+
console.log(`Indexing files from ${getKnowledgeDir()}...`);
|
|
90
|
+
const stats = await ingestDirectory();
|
|
91
|
+
console.log(`Done! Added: ${stats.added}, Updated: ${stats.updated}, Skipped: ${stats.skipped}`);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
console.log(`Total documents: ${getDocCount()}`);
|
|
95
|
+
} catch (err) {
|
|
96
|
+
console.error(fmt.error(err instanceof Error ? err.message : 'Ingest failed'));
|
|
97
|
+
process.exit(2);
|
|
98
|
+
}
|
|
99
|
+
process.exit(0);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
let ragContext = '';
|
|
103
|
+
|
|
104
|
+
if (values.local) {
|
|
105
|
+
const query = positionals.join(' ');
|
|
106
|
+
if (!query) {
|
|
107
|
+
console.error(fmt.error('No query provided for local search.'));
|
|
108
|
+
process.exit(2);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const results = searchForRag(query);
|
|
112
|
+
|
|
113
|
+
if (results.length === 0) {
|
|
114
|
+
console.log('No local documents match. Proceeding with Perplexity only...\n');
|
|
115
|
+
} else {
|
|
116
|
+
if (!values.json) {
|
|
117
|
+
console.log(`${fmt.model('local')} Found ${results.length} relevant doc(s), using as context...\n`);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
ragContext = 'Context from user\'s knowledge base:\n---\n';
|
|
121
|
+
for (const r of results) {
|
|
122
|
+
ragContext += `[${r.title}]:\n${r.content}\n\n`;
|
|
123
|
+
}
|
|
124
|
+
ragContext += '---\n\nQuestion: ';
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if (positionals.length === 0 && !values.continue && !values.local) {
|
|
71
129
|
console.error(fmt.error('No query provided. Use -h for help.'));
|
|
72
130
|
process.exit(2);
|
|
73
131
|
}
|
|
@@ -81,6 +139,10 @@ const model = values.model as Model;
|
|
|
81
139
|
|
|
82
140
|
let query = positionals.join(' ');
|
|
83
141
|
|
|
142
|
+
if (ragContext) {
|
|
143
|
+
query = ragContext + query;
|
|
144
|
+
}
|
|
145
|
+
|
|
84
146
|
if (values.continue) {
|
|
85
147
|
const last = await getLastEntry();
|
|
86
148
|
if (last) {
|
package/src/rag.ts
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
import { Database } from 'bun:sqlite';
|
|
2
|
+
import { Glob } from 'bun';
|
|
3
|
+
import { homedir } from 'node:os';
|
|
4
|
+
import { join, basename, resolve, extname } from 'node:path';
|
|
5
|
+
import { mkdir, stat, copyFile } from 'node:fs/promises';
|
|
6
|
+
|
|
7
|
+
const PPLX_DIR = join(homedir(), '.pplx');
|
|
8
|
+
const KNOWLEDGE_DIR = join(PPLX_DIR, 'knowledge');
|
|
9
|
+
const DB_PATH = join(PPLX_DIR, 'rag.db');
|
|
10
|
+
|
|
11
|
+
let db: Database | null = null;
|
|
12
|
+
|
|
13
|
+
function getDb(): Database {
|
|
14
|
+
if (db) return db;
|
|
15
|
+
|
|
16
|
+
db = new Database(DB_PATH);
|
|
17
|
+
|
|
18
|
+
db.exec(`
|
|
19
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS docs_fts USING fts5(
|
|
20
|
+
path,
|
|
21
|
+
title,
|
|
22
|
+
content,
|
|
23
|
+
tokenize = 'porter unicode61'
|
|
24
|
+
);
|
|
25
|
+
`);
|
|
26
|
+
|
|
27
|
+
db.exec(`
|
|
28
|
+
CREATE TABLE IF NOT EXISTS docs (
|
|
29
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
30
|
+
path TEXT UNIQUE NOT NULL,
|
|
31
|
+
title TEXT,
|
|
32
|
+
ingested_at INTEGER DEFAULT (unixepoch())
|
|
33
|
+
);
|
|
34
|
+
`);
|
|
35
|
+
|
|
36
|
+
return db;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface SearchResult {
|
|
40
|
+
path: string;
|
|
41
|
+
title: string;
|
|
42
|
+
snippet: string;
|
|
43
|
+
score: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface IngestStats {
|
|
47
|
+
added: number;
|
|
48
|
+
updated: number;
|
|
49
|
+
skipped: number;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export async function ensureKnowledgeDir(): Promise<void> {
|
|
53
|
+
await mkdir(KNOWLEDGE_DIR, { recursive: true });
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export async function ingestFile(filePath: string): Promise<'added' | 'updated' | 'skipped'> {
|
|
57
|
+
const db = getDb();
|
|
58
|
+
const content = await Bun.file(filePath).text();
|
|
59
|
+
|
|
60
|
+
if (!content.trim()) return 'skipped';
|
|
61
|
+
|
|
62
|
+
const title = filePath.split('/').pop()?.replace(/\.(md|txt)$/, '') || filePath;
|
|
63
|
+
|
|
64
|
+
const existing = db.query('SELECT id FROM docs WHERE path = ?').get(filePath) as { id: number } | null;
|
|
65
|
+
|
|
66
|
+
if (existing) {
|
|
67
|
+
db.run('DELETE FROM docs_fts WHERE rowid = ?', [existing.id]);
|
|
68
|
+
db.run('UPDATE docs SET title = ?, ingested_at = unixepoch() WHERE id = ?', [title, existing.id]);
|
|
69
|
+
db.run('INSERT INTO docs_fts (rowid, path, title, content) VALUES (?, ?, ?, ?)',
|
|
70
|
+
[existing.id, filePath, title, content]);
|
|
71
|
+
return 'updated';
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const result = db.run('INSERT INTO docs (path, title) VALUES (?, ?)', [filePath, title]);
|
|
75
|
+
const docId = result.lastInsertRowid;
|
|
76
|
+
db.run('INSERT INTO docs_fts (rowid, path, title, content) VALUES (?, ?, ?, ?)',
|
|
77
|
+
[docId, filePath, title, content]);
|
|
78
|
+
|
|
79
|
+
return 'added';
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export async function ingestDirectory(dir?: string): Promise<IngestStats> {
|
|
83
|
+
await ensureKnowledgeDir();
|
|
84
|
+
|
|
85
|
+
const targetDir = dir || KNOWLEDGE_DIR;
|
|
86
|
+
const glob = new Glob('**/*.{md,txt}');
|
|
87
|
+
const files: string[] = [];
|
|
88
|
+
|
|
89
|
+
for await (const file of glob.scan({ cwd: targetDir, absolute: true })) {
|
|
90
|
+
files.push(file);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const stats: IngestStats = { added: 0, updated: 0, skipped: 0 };
|
|
94
|
+
|
|
95
|
+
for (const file of files) {
|
|
96
|
+
const result = await ingestFile(file);
|
|
97
|
+
stats[result]++;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return stats;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function search(query: string, limit = 5): SearchResult[] {
|
|
104
|
+
const db = getDb();
|
|
105
|
+
|
|
106
|
+
const ftsQuery = query
|
|
107
|
+
.trim()
|
|
108
|
+
.split(/\s+/)
|
|
109
|
+
.map(word => `${word}*`)
|
|
110
|
+
.join(' OR ');
|
|
111
|
+
|
|
112
|
+
const stmt = db.prepare(`
|
|
113
|
+
SELECT
|
|
114
|
+
path,
|
|
115
|
+
title,
|
|
116
|
+
snippet(docs_fts, 2, '>', '<', '...', 40) as snippet,
|
|
117
|
+
bm25(docs_fts) as score
|
|
118
|
+
FROM docs_fts
|
|
119
|
+
WHERE docs_fts MATCH ?
|
|
120
|
+
ORDER BY score
|
|
121
|
+
LIMIT ?
|
|
122
|
+
`);
|
|
123
|
+
|
|
124
|
+
try {
|
|
125
|
+
return stmt.all(ftsQuery, limit) as SearchResult[];
|
|
126
|
+
} catch {
|
|
127
|
+
return [];
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
export interface RagContext {
|
|
132
|
+
title: string;
|
|
133
|
+
content: string;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function truncateUtf8Safe(str: string, maxLen: number): string {
|
|
137
|
+
if (str.length <= maxLen) return str;
|
|
138
|
+
let truncated = str.slice(0, maxLen);
|
|
139
|
+
const lastChar = truncated.charCodeAt(truncated.length - 1);
|
|
140
|
+
if (lastChar >= 0xD800 && lastChar <= 0xDBFF) {
|
|
141
|
+
truncated = truncated.slice(0, -1);
|
|
142
|
+
}
|
|
143
|
+
return truncated;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
export function searchForRag(query: string, limit = 3, maxChars = 4000): RagContext[] {
|
|
147
|
+
const db = getDb();
|
|
148
|
+
|
|
149
|
+
const ftsQuery = query
|
|
150
|
+
.trim()
|
|
151
|
+
.split(/\s+/)
|
|
152
|
+
.map(word => `${word}*`)
|
|
153
|
+
.join(' OR ');
|
|
154
|
+
|
|
155
|
+
const stmt = db.prepare(`
|
|
156
|
+
SELECT title, content
|
|
157
|
+
FROM docs_fts
|
|
158
|
+
WHERE docs_fts MATCH ?
|
|
159
|
+
ORDER BY bm25(docs_fts)
|
|
160
|
+
LIMIT ?
|
|
161
|
+
`);
|
|
162
|
+
|
|
163
|
+
try {
|
|
164
|
+
const results = stmt.all(ftsQuery, limit) as { title: string; content: string }[];
|
|
165
|
+
let totalChars = 0;
|
|
166
|
+
const truncated: RagContext[] = [];
|
|
167
|
+
|
|
168
|
+
for (const r of results) {
|
|
169
|
+
const remaining = maxChars - totalChars;
|
|
170
|
+
if (remaining <= 0) break;
|
|
171
|
+
|
|
172
|
+
const content = truncateUtf8Safe(r.content, remaining);
|
|
173
|
+
truncated.push({
|
|
174
|
+
title: r.title,
|
|
175
|
+
content,
|
|
176
|
+
});
|
|
177
|
+
totalChars += content.length;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return truncated;
|
|
181
|
+
} catch {
|
|
182
|
+
return [];
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
export function getDocCount(): number {
|
|
187
|
+
const db = getDb();
|
|
188
|
+
const result = db.query('SELECT COUNT(*) as count FROM docs').get() as { count: number };
|
|
189
|
+
return result.count;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export function getKnowledgeDir(): string {
|
|
193
|
+
return KNOWLEDGE_DIR;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const SUPPORTED_EXTS = new Set(['.md', '.txt']);
|
|
197
|
+
|
|
198
|
+
async function copyToKnowledge(filePath: string): Promise<string> {
|
|
199
|
+
await ensureKnowledgeDir();
|
|
200
|
+
const ext = extname(filePath);
|
|
201
|
+
const name = basename(filePath, ext);
|
|
202
|
+
let dest = join(KNOWLEDGE_DIR, basename(filePath));
|
|
203
|
+
let counter = 1;
|
|
204
|
+
while (await Bun.file(dest).exists()) {
|
|
205
|
+
dest = join(KNOWLEDGE_DIR, `${name}_${counter}${ext}`);
|
|
206
|
+
counter++;
|
|
207
|
+
}
|
|
208
|
+
await copyFile(filePath, dest);
|
|
209
|
+
return dest;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
export async function ingestPath(target: string): Promise<IngestStats> {
|
|
213
|
+
await ensureKnowledgeDir();
|
|
214
|
+
const stats: IngestStats = { added: 0, updated: 0, skipped: 0 };
|
|
215
|
+
const resolved = resolve(target);
|
|
216
|
+
|
|
217
|
+
const isGlob = target.includes('*');
|
|
218
|
+
|
|
219
|
+
if (isGlob) {
|
|
220
|
+
const glob = new Glob(target);
|
|
221
|
+
for await (const file of glob.scan({ cwd: process.cwd(), absolute: true })) {
|
|
222
|
+
const ext = extname(file).toLowerCase();
|
|
223
|
+
if (!SUPPORTED_EXTS.has(ext)) {
|
|
224
|
+
console.log(`Skipping unsupported: ${file} (only .md, .txt)`);
|
|
225
|
+
stats.skipped++;
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
const dest = await copyToKnowledge(file);
|
|
229
|
+
const result = await ingestFile(dest);
|
|
230
|
+
stats[result]++;
|
|
231
|
+
console.log(`${result}: ${basename(file)}`);
|
|
232
|
+
}
|
|
233
|
+
return stats;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
let info;
|
|
237
|
+
try {
|
|
238
|
+
info = await stat(resolved);
|
|
239
|
+
} catch {
|
|
240
|
+
throw new Error(`Path not found: ${target}`);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (info.isDirectory()) {
|
|
244
|
+
console.log(`Indexing directory: ${resolved}`);
|
|
245
|
+
const glob = new Glob('**/*.{md,txt}');
|
|
246
|
+
for await (const file of glob.scan({ cwd: resolved, absolute: true })) {
|
|
247
|
+
const dest = await copyToKnowledge(file);
|
|
248
|
+
const result = await ingestFile(dest);
|
|
249
|
+
stats[result]++;
|
|
250
|
+
console.log(`${result}: ${basename(file)}`);
|
|
251
|
+
}
|
|
252
|
+
return stats;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const ext = extname(resolved).toLowerCase();
|
|
256
|
+
if (ext === '.pdf') {
|
|
257
|
+
throw new Error('PDF indexing not supported (requires text extraction library). Use -f to send PDFs to Perplexity API instead.');
|
|
258
|
+
}
|
|
259
|
+
if (!SUPPORTED_EXTS.has(ext)) {
|
|
260
|
+
throw new Error(`Unsupported file type: ${ext}. Supported: .md, .txt`);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const dest = await copyToKnowledge(resolved);
|
|
264
|
+
const result = await ingestFile(dest);
|
|
265
|
+
stats[result]++;
|
|
266
|
+
console.log(`${result}: ${basename(resolved)} → ~/.pplx/knowledge/`);
|
|
267
|
+
|
|
268
|
+
return stats;
|
|
269
|
+
}
|