@a13xu/lucid 1.13.0 → 1.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +147 -21
- package/build/compression/semantic.d.ts +31 -0
- package/build/compression/semantic.js +196 -0
- package/build/config.d.ts +15 -0
- package/build/http/routes.d.ts +3 -0
- package/build/http/routes.js +56 -0
- package/build/http/server.d.ts +7 -0
- package/build/http/server.js +11 -0
- package/build/index.js +176 -0
- package/build/lucid-sync.d.ts +15 -0
- package/build/lucid-sync.js +72 -0
- package/build/retrieval/context.js +6 -0
- package/build/retrieval/qdrant.d.ts +1 -1
- package/build/retrieval/qdrant.js +11 -2
- package/build/tools/compress.d.ts +15 -0
- package/build/tools/compress.js +18 -0
- package/build/tools/init.js +16 -1
- package/build/tools/model-advisor.d.ts +9 -0
- package/build/tools/model-advisor.js +30 -0
- package/build/tools/smart-context.d.ts +16 -0
- package/build/tools/smart-context.js +54 -0
- package/build/tools/sync.js +8 -0
- package/package.json +64 -59
- package/skills/lucid-audit/SKILL.md +43 -23
- package/skills/lucid-context/SKILL.md +54 -20
- package/skills/lucid-plan/SKILL.md +25 -33
- package/skills/lucid-security/SKILL.md +22 -40
- package/skills/lucid-start/SKILL.md +70 -0
- package/skills/lucid-webdev/SKILL.md +31 -109
package/README.md
CHANGED
|
@@ -4,11 +4,11 @@
|
|
|
4
4
|
[](https://www.npmjs.com/package/@a13xu/lucid)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
|
|
7
|
-
> **MCP server for Claude Code** — persistent memory, smart code indexing, and code quality validation. Works out of the box with zero configuration.
|
|
7
|
+
> **MCP server for Claude Code** — persistent memory, smart code indexing, model selection, and code quality validation. Works out of the box with zero configuration.
|
|
8
8
|
|
|
9
9
|
Token-efficient memory, code indexing, and validation for Claude Code agents — backed by **SQLite + FTS5**.
|
|
10
10
|
|
|
11
|
-
Stores a persistent knowledge graph (entities, relations, observations), indexes source files as compressed binary with change detection, retrieves minimal relevant context via TF-IDF or Qdrant, and validates code for LLM drift patterns. Supports TypeScript, JavaScript, Python, **Vue, Nuxt**.
|
|
11
|
+
Stores a persistent knowledge graph (entities, relations, observations), indexes source files as compressed binary with change detection, retrieves minimal relevant context via TF-IDF or Qdrant, and validates code for LLM drift patterns. Supports TypeScript, JavaScript, Python, **Vue, Nuxt**. Optional **LLMLingua-2 semantic compression** reduces context tokens by 30–70% while preserving meaning.
|
|
12
12
|
|
|
13
13
|
## Install
|
|
14
14
|
|
|
@@ -45,15 +45,16 @@ Default DB path: `~/.claude/memory.db`
|
|
|
45
45
|
## Quick start
|
|
46
46
|
|
|
47
47
|
```
|
|
48
|
-
1. "Index this project" → init_project()
|
|
49
|
-
2. Write code
|
|
50
|
-
3. "What's relevant?" →
|
|
51
|
-
4. "What
|
|
52
|
-
5. "
|
|
53
|
-
6. "
|
|
48
|
+
1. "Index this project" → init_project() → scans CLAUDE.md, package.json, src/**
|
|
49
|
+
2. Write code → sync_file(path) → compressed + hashed + diff stored
|
|
50
|
+
3. "What's relevant?" → smart_context("auth flow") → recall + code in one call, adaptive budget
|
|
51
|
+
4. "What model?" → suggest_model("refactor auth") → haiku (lookup) or sonnet (reasoning)
|
|
52
|
+
5. "What changed?" → get_recent(hours=2) → line diffs of recent edits
|
|
53
|
+
6. "Where is X used?" → grep_code("X") → matching lines only, ~30 tokens
|
|
54
|
+
7. "What do we know?" → recall("query") → knowledge graph search
|
|
54
55
|
```
|
|
55
56
|
|
|
56
|
-
## Tools (
|
|
57
|
+
## Tools (37)
|
|
57
58
|
|
|
58
59
|
### Memory
|
|
59
60
|
| Tool | Description |
|
|
@@ -76,8 +77,11 @@ Default DB path: `~/.claude/memory.db`
|
|
|
76
77
|
### Token optimization
|
|
77
78
|
| Tool | Description |
|
|
78
79
|
|---|---|
|
|
79
|
-
| `
|
|
80
|
+
| `smart_context` | **Recommended entry point.** Combines `recall()` (knowledge graph) + `get_context()` (code files) in one call. Adaptive token budget: `simple`=2000, `moderate`=6000, `complex`=12000. Logs an experience for `reward()`/`penalize()` feedback. |
|
|
81
|
+
| `suggest_model` | Classify task complexity → recommend Claude model. Returns `{ model, model_id, reasoning, context_budget }`. Simple lookups → Haiku; reasoning/code → Sonnet. Call at the start of any workflow. |
|
|
82
|
+
| `get_context` | **Classic code context.** Ranks indexed files by TF-IDF (or Qdrant), applies recency boost, returns skeletons for large files. Respects `maxContextTokens` budget. |
|
|
80
83
|
| `get_recent` | Return files modified in the last N hours with line-level diffs. |
|
|
84
|
+
| `compress_text` | Compress any text using LLMLingua-2 semantic compression. Returns compressed text + stats (ratio, tokens saved). Model downloads ~700MB on first use. |
|
|
81
85
|
|
|
82
86
|
### Logic Guardian
|
|
83
87
|
| Tool | Description |
|
|
@@ -86,11 +90,19 @@ Default DB path: `~/.claude/memory.db`
|
|
|
86
90
|
| `check_drift` | Analyze a code snippet inline without saving to disk. |
|
|
87
91
|
| `get_checklist` | Return the full 5-pass validation protocol (Logic Trace, Contract Verification, Stupid Mistakes, Integration Sanity, Explain It). |
|
|
88
92
|
|
|
93
|
+
### Plans
|
|
94
|
+
| Tool | Description |
|
|
95
|
+
|---|---|
|
|
96
|
+
| `plan_create` | Create a development plan with title, description, and tasks. Returns plan ID. |
|
|
97
|
+
| `plan_list` | List all plans with status summary (total/done/in-progress tasks). |
|
|
98
|
+
| `plan_get` | Get full plan details including all tasks and their status. |
|
|
99
|
+
| `plan_update_task` | Update a task's status (`todo` → `in_progress` → `done`) and optionally add notes. |
|
|
100
|
+
|
|
89
101
|
### Reward system
|
|
90
102
|
| Tool | Description |
|
|
91
103
|
|---|---|
|
|
92
|
-
| `reward` | Signal that the last `get_context()` result was helpful (+1). Rewarded files rank higher in future similar queries. |
|
|
93
|
-
| `penalize` | Signal that the last
|
|
104
|
+
| `reward` | Signal that the last `smart_context()`/`get_context()` result was helpful (+1). Rewarded files rank higher in future similar queries. |
|
|
105
|
+
| `penalize` | Signal that the last result was unhelpful (-1). Penalized files rank lower. Accepts optional `note` to log what was missing. |
|
|
94
106
|
| `show_rewards` | Show top rewarded experiences and most rewarded files. Rewards decay exponentially (half-life ~14 days). |
|
|
95
107
|
|
|
96
108
|
### Code Quality Guard
|
|
@@ -115,7 +127,31 @@ Default DB path: `~/.claude/memory.db`
|
|
|
115
127
|
|
|
116
128
|
## Token optimization in depth
|
|
117
129
|
|
|
118
|
-
### How `
|
|
130
|
+
### How `smart_context` works (recommended)
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
query: "auth middleware"
|
|
134
|
+
↓
|
|
135
|
+
1. recall(query) — knowledge graph search (entities, relations)
|
|
136
|
+
↓
|
|
137
|
+
2. TF-IDF score all indexed files against query
|
|
138
|
+
(or Qdrant top-k if QDRANT_URL is set)
|
|
139
|
+
↓
|
|
140
|
+
3. Boost recently-modified files (+0.3 score)
|
|
141
|
+
Boost rewarded files (+0.25 score, decayed)
|
|
142
|
+
↓
|
|
143
|
+
4. For each file within token budget:
|
|
144
|
+
file < maxTokensPerFile → return full source
|
|
145
|
+
file > maxTokensPerFile → return skeleton only
|
|
146
|
+
(imports + signatures + TODOs)
|
|
147
|
+
+ relevant fragments around query terms
|
|
148
|
+
↓
|
|
149
|
+
5. Optional: LLMLingua-2 compression (if enabled in config)
|
|
150
|
+
↓
|
|
151
|
+
output: merged knowledge + code — budget: 2k/6k/12k by task_type
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### How `get_context` works (classic)
|
|
119
155
|
|
|
120
156
|
```
|
|
121
157
|
query: "auth middleware"
|
|
@@ -183,6 +219,32 @@ Or in `.mcp.json`:
|
|
|
183
219
|
|
|
184
220
|
Falls back to TF-IDF automatically if Qdrant is unreachable.
|
|
185
221
|
|
|
222
|
+
### Semantic compression (optional)
|
|
223
|
+
|
|
224
|
+
LLMLingua-2 (`microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank`) identifies and drops semantically unimportant tokens before returning context to Claude — and before generating Qdrant embeddings.
|
|
225
|
+
|
|
226
|
+
Enable in `lucid.config.json`:
|
|
227
|
+
|
|
228
|
+
```json
|
|
229
|
+
{
|
|
230
|
+
"semanticCompression": {
|
|
231
|
+
"enabled": true,
|
|
232
|
+
"ratio": 0.5,
|
|
233
|
+
"minLength": 300,
|
|
234
|
+
"applyToEmbeddings": true
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
| Key | Default | Description |
|
|
240
|
+
|---|---|---|
|
|
241
|
+
| `enabled` | `false` | Opt-in — model downloads ~700MB on first use |
|
|
242
|
+
| `ratio` | `0.5` | Fraction of tokens to keep (0.3 = keep 30%) |
|
|
243
|
+
| `minLength` | `300` | Skip compression for texts shorter than this |
|
|
244
|
+
| `applyToEmbeddings` | `true` | Also compress chunk text before Qdrant embedding |
|
|
245
|
+
|
|
246
|
+
Model is cached in `~/.lucid/models/` after first download. Falls back to uncompressed text on any error — safe to enable unconditionally.
|
|
247
|
+
|
|
186
248
|
### Configuration (`lucid.config.json`)
|
|
187
249
|
|
|
188
250
|
Create in your project root to customize behavior:
|
|
@@ -191,9 +253,13 @@ Create in your project root to customize behavior:
|
|
|
191
253
|
{
|
|
192
254
|
"whitelistDirs": ["src", "backend", "api"],
|
|
193
255
|
"blacklistDirs": ["migrations", "fixtures"],
|
|
194
|
-
"maxTokensPerFile":
|
|
195
|
-
"maxContextTokens":
|
|
196
|
-
"recentWindowHours":
|
|
256
|
+
"maxTokensPerFile": 600,
|
|
257
|
+
"maxContextTokens": 8000,
|
|
258
|
+
"recentWindowHours": 48,
|
|
259
|
+
"semanticCompression": {
|
|
260
|
+
"enabled": false,
|
|
261
|
+
"ratio": 0.5
|
|
262
|
+
}
|
|
197
263
|
}
|
|
198
264
|
```
|
|
199
265
|
|
|
@@ -201,9 +267,9 @@ Create in your project root to customize behavior:
|
|
|
201
267
|
|---|---|---|
|
|
202
268
|
| `whitelistDirs` | — | Only index/return files from these dirs |
|
|
203
269
|
| `blacklistDirs` | — | Extra dirs to skip (merged with built-in skips) |
|
|
204
|
-
| `maxTokensPerFile` | `
|
|
205
|
-
| `maxContextTokens` | `
|
|
206
|
-
| `recentWindowHours` | `
|
|
270
|
+
| `maxTokensPerFile` | `600` | Files above this get skeleton treatment |
|
|
271
|
+
| `maxContextTokens` | `8000` | Total token budget for `get_context` |
|
|
272
|
+
| `recentWindowHours` | `48` | "Recently touched" threshold |
|
|
207
273
|
|
|
208
274
|
## Why no vectors by default?
|
|
209
275
|
|
|
@@ -237,6 +303,63 @@ TF-IDF is fast, deterministic, and requires zero external services. Qdrant is av
|
|
|
237
303
|
## Relation types
|
|
238
304
|
`uses` · `depends_on` · `created_by` · `part_of` · `replaced_by` · `conflicts_with` · `tested_by`
|
|
239
305
|
|
|
306
|
+
## HTTP daemon & auto-sync
|
|
307
|
+
|
|
308
|
+
Lucid can run as a background HTTP daemon (port 7821) for auto-syncing files without Claude's cooperation.
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
# Start daemon (watches for sync requests, serves REST API)
|
|
312
|
+
lucid watch
|
|
313
|
+
|
|
314
|
+
# With HTTP server
|
|
315
|
+
lucid watch --http
|
|
316
|
+
|
|
317
|
+
# Check status
|
|
318
|
+
lucid status
|
|
319
|
+
|
|
320
|
+
# Stop
|
|
321
|
+
lucid stop
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### REST API (when `--http` is active)
|
|
325
|
+
|
|
326
|
+
| Endpoint | Description |
|
|
327
|
+
|---|---|
|
|
328
|
+
| `POST /sync` `{ path }` | Sync a single file |
|
|
329
|
+
| `POST /sync-project` `{ directory? }` | Sync entire project |
|
|
330
|
+
| `GET /context?q=<query>` | Get context via HTTP |
|
|
331
|
+
| `POST /validate` `{ path }` | Validate file for drift |
|
|
332
|
+
| `GET /health` | Daemon health check |
|
|
333
|
+
|
|
334
|
+
### Auto-sync hook (`lucid-sync`)
|
|
335
|
+
|
|
336
|
+
`init_project` installs a Claude Code `PostToolUse` hook that calls `lucid-sync` after every file write/edit. The sync binary:
|
|
337
|
+
|
|
338
|
+
1. Tries HTTP sync (500ms timeout, if daemon running)
|
|
339
|
+
2. Falls back to direct SQLite sync (no daemon needed)
|
|
340
|
+
|
|
341
|
+
This keeps the knowledge graph current automatically — without relying on Claude remembering to call `sync_file`.
|
|
342
|
+
|
|
343
|
+
## Skills enforcement
|
|
344
|
+
|
|
345
|
+
Lucid ships **enforcement skills** that install globally into `~/.claude/skills/` and activate in every project:
|
|
346
|
+
|
|
347
|
+
| Skill | Purpose |
|
|
348
|
+
|---|---|
|
|
349
|
+
| `lucid-start` | Session start — `get_recent` + `smart_context` before any coding |
|
|
350
|
+
| `lucid-context` | Pre-task context loading — `suggest_model` + `smart_context` |
|
|
351
|
+
| `lucid-audit` | Pre-done gate — validate + check drift before marking complete |
|
|
352
|
+
| `lucid-plan` | Planning workflow |
|
|
353
|
+
| `lucid-sync` | Post-edit sync reminder |
|
|
354
|
+
| `lucid-webdev` | Web dev workflow with context |
|
|
355
|
+
|
|
356
|
+
All skills use `<HARD-GATE>` blocks that prevent proceeding until required tools are called.
|
|
357
|
+
|
|
358
|
+
Install globally:
|
|
359
|
+
```bash
|
|
360
|
+
init_project() # installs skills to ~/.claude/skills/ automatically
|
|
361
|
+
```
|
|
362
|
+
|
|
240
363
|
## Debugging
|
|
241
364
|
|
|
242
365
|
```bash
|
|
@@ -244,7 +367,7 @@ echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"capabilities":{},
|
|
|
244
367
|
| npx @a13xu/lucid
|
|
245
368
|
```
|
|
246
369
|
|
|
247
|
-
In Claude Code: run `/mcp` — you should see `lucid` with
|
|
370
|
+
In Claude Code: run `/mcp` — you should see `lucid` with 37 tools.
|
|
248
371
|
|
|
249
372
|
## Contributing
|
|
250
373
|
|
|
@@ -260,8 +383,11 @@ Bug reports and pull requests are welcome on [GitHub](https://github.com/a13xu/l
|
|
|
260
383
|
- **Runtime:** Node.js 18+, TypeScript, ES modules
|
|
261
384
|
- **MCP SDK:** `@modelcontextprotocol/sdk`
|
|
262
385
|
- **Database:** `better-sqlite3` (synchronous, WAL mode)
|
|
263
|
-
- **Compression:** Node.js built-in `zlib` (deflate level 9)
|
|
386
|
+
- **Compression:** Node.js built-in `zlib` (deflate level 9) + LLMLingua-2 semantic compression (optional)
|
|
264
387
|
- **Hashing:** SHA-256 via `crypto` (change detection)
|
|
265
388
|
- **Ranking:** TF-IDF (built-in) or Qdrant (optional, via REST)
|
|
389
|
+
- **Semantic compression:** `@huggingface/transformers` (ONNX Runtime, q8 quantization)
|
|
390
|
+
- **HTTP daemon:** Express 5 on port 7821 (optional)
|
|
391
|
+
- **File watcher:** `chokidar`
|
|
266
392
|
- **Validation:** `zod`
|
|
267
393
|
- **Transport:** stdio
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic compression using LLMLingua-2
|
|
3
|
+
* Model: microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank
|
|
4
|
+
*
|
|
5
|
+
* Reduces text by identifying and dropping semantically unimportant tokens.
|
|
6
|
+
* Uses @huggingface/transformers (ONNX Runtime) for local inference.
|
|
7
|
+
*
|
|
8
|
+
* Pipeline is loaded lazily on first use and cached in memory.
|
|
9
|
+
* Model files are cached in ~/.lucid/models/ after first download (~700MB).
|
|
10
|
+
*
|
|
11
|
+
* Falls back to original text on any error — safe to call unconditionally.
|
|
12
|
+
*/
|
|
13
|
+
export interface SemanticCompressionResult {
|
|
14
|
+
compressed: string;
|
|
15
|
+
originalLength: number;
|
|
16
|
+
compressedLength: number;
|
|
17
|
+
/** Fraction of tokens kept (1.0 = no compression) */
|
|
18
|
+
ratio: number;
|
|
19
|
+
method: "llmlingua2" | "fallback";
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Compress text using LLMLingua-2 token importance scoring.
|
|
23
|
+
*
|
|
24
|
+
* @param text Input text to compress
|
|
25
|
+
* @param targetRatio Target compression ratio (0.3 = keep 30%, 0.5 = keep 50%)
|
|
26
|
+
* @param minLength Skip compression for texts shorter than this (chars)
|
|
27
|
+
*/
|
|
28
|
+
export declare function compressTextSemantic(text: string, targetRatio?: number, minLength?: number): Promise<SemanticCompressionResult>;
|
|
29
|
+
export declare function tryCompressTextSemantic(text: string, targetRatio?: number, minLength?: number): Promise<string>;
|
|
30
|
+
export declare function isSemanticCompressionAvailable(): Promise<boolean>;
|
|
31
|
+
export declare function warmUpSemanticCompression(): void;
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic compression using LLMLingua-2
|
|
3
|
+
* Model: microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank
|
|
4
|
+
*
|
|
5
|
+
* Reduces text by identifying and dropping semantically unimportant tokens.
|
|
6
|
+
* Uses @huggingface/transformers (ONNX Runtime) for local inference.
|
|
7
|
+
*
|
|
8
|
+
* Pipeline is loaded lazily on first use and cached in memory.
|
|
9
|
+
* Model files are cached in ~/.lucid/models/ after first download (~700MB).
|
|
10
|
+
*
|
|
11
|
+
* Falls back to original text on any error — safe to call unconditionally.
|
|
12
|
+
*/
|
|
13
|
+
import { join } from "path";
|
|
14
|
+
import { homedir } from "os";
|
|
15
|
+
import { mkdirSync } from "fs";
|
|
16
|
+
const MODEL_ID = "microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank";
|
|
17
|
+
const MODELS_DIR = join(homedir(), ".lucid", "models");
|
|
18
|
+
let _pipeline = null;
|
|
19
|
+
let _loadError = null;
|
|
20
|
+
let _loading = false;
|
|
21
|
+
async function getPipeline() {
|
|
22
|
+
if (_loadError)
|
|
23
|
+
throw _loadError;
|
|
24
|
+
if (_pipeline)
|
|
25
|
+
return _pipeline;
|
|
26
|
+
if (_loading) {
|
|
27
|
+
// Wait for concurrent load
|
|
28
|
+
await new Promise((resolve) => {
|
|
29
|
+
const check = setInterval(() => {
|
|
30
|
+
if (!_loading) {
|
|
31
|
+
clearInterval(check);
|
|
32
|
+
resolve();
|
|
33
|
+
}
|
|
34
|
+
}, 100);
|
|
35
|
+
});
|
|
36
|
+
if (_loadError)
|
|
37
|
+
throw _loadError;
|
|
38
|
+
if (_pipeline)
|
|
39
|
+
return _pipeline;
|
|
40
|
+
}
|
|
41
|
+
_loading = true;
|
|
42
|
+
try {
|
|
43
|
+
mkdirSync(MODELS_DIR, { recursive: true });
|
|
44
|
+
// Dynamic import keeps startup fast when compression is not used
|
|
45
|
+
const { pipeline, env } = await import("@huggingface/transformers");
|
|
46
|
+
env.cacheDir = MODELS_DIR;
|
|
47
|
+
env.allowRemoteModels = true;
|
|
48
|
+
process.stderr.write(`[Lucid] Loading LLMLingua-2 model (first run: downloads ~700MB to ${MODELS_DIR})…\n`);
|
|
49
|
+
_pipeline = (await pipeline("token-classification", MODEL_ID, {
|
|
50
|
+
dtype: "q8", // 8-bit quantization — smaller, faster, minimal quality loss
|
|
51
|
+
device: "cpu",
|
|
52
|
+
}));
|
|
53
|
+
process.stderr.write("[Lucid] LLMLingua-2 model ready.\n");
|
|
54
|
+
return _pipeline;
|
|
55
|
+
}
|
|
56
|
+
catch (e) {
|
|
57
|
+
_loadError = e instanceof Error ? e : new Error(String(e));
|
|
58
|
+
throw _loadError;
|
|
59
|
+
}
|
|
60
|
+
finally {
|
|
61
|
+
_loading = false;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
// Core compression
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
/**
|
|
68
|
+
* Compress text using LLMLingua-2 token importance scoring.
|
|
69
|
+
*
|
|
70
|
+
* @param text Input text to compress
|
|
71
|
+
* @param targetRatio Target compression ratio (0.3 = keep 30%, 0.5 = keep 50%)
|
|
72
|
+
* @param minLength Skip compression for texts shorter than this (chars)
|
|
73
|
+
*/
|
|
74
|
+
export async function compressTextSemantic(text, targetRatio = 0.5, minLength = 300) {
|
|
75
|
+
if (text.length < minLength) {
|
|
76
|
+
return {
|
|
77
|
+
compressed: text,
|
|
78
|
+
originalLength: text.length,
|
|
79
|
+
compressedLength: text.length,
|
|
80
|
+
ratio: 1.0,
|
|
81
|
+
method: "fallback",
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
const pipe = await getPipeline();
|
|
85
|
+
// Run token classification — each token gets entity "LABEL_0" (drop) / "LABEL_1" (keep)
|
|
86
|
+
const tokens = await pipe(text, {
|
|
87
|
+
// Disable aggregation to get per-sub-token results with offsets
|
|
88
|
+
aggregation_strategy: "none",
|
|
89
|
+
});
|
|
90
|
+
if (!tokens || tokens.length === 0) {
|
|
91
|
+
return {
|
|
92
|
+
compressed: text,
|
|
93
|
+
originalLength: text.length,
|
|
94
|
+
compressedLength: text.length,
|
|
95
|
+
ratio: 1.0,
|
|
96
|
+
method: "fallback",
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
// Determine importance threshold:
|
|
100
|
+
// Sort all "keep" scores descending, keep the top (targetRatio * N) tokens
|
|
101
|
+
const keepScores = tokens
|
|
102
|
+
.filter((t) => t.entity === "LABEL_1" || t.entity === "1")
|
|
103
|
+
.map((t) => t.score)
|
|
104
|
+
.sort((a, b) => b - a);
|
|
105
|
+
// If not enough LABEL_1 tokens, use score-based threshold
|
|
106
|
+
let threshold;
|
|
107
|
+
if (keepScores.length > 0) {
|
|
108
|
+
const cutoffIdx = Math.floor(tokens.length * targetRatio);
|
|
109
|
+
// Find the score at the cutoff rank among all tokens sorted by score
|
|
110
|
+
const allScores = tokens.map((t) => ({
|
|
111
|
+
score: t.entity === "LABEL_1" || t.entity === "1" ? t.score : 1 - t.score,
|
|
112
|
+
})).sort((a, b) => b.score - a.score);
|
|
113
|
+
threshold = allScores[Math.min(cutoffIdx, allScores.length - 1)]?.score ?? 0.5;
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
// Fallback: use raw score threshold
|
|
117
|
+
threshold = 0.5;
|
|
118
|
+
}
|
|
119
|
+
// Mark characters to keep based on token offsets
|
|
120
|
+
const keepChars = new Uint8Array(text.length);
|
|
121
|
+
for (const token of tokens) {
|
|
122
|
+
const isImportant = token.entity === "LABEL_1" ||
|
|
123
|
+
token.entity === "1" ||
|
|
124
|
+
(token.entity !== "LABEL_0" && token.entity !== "0" && token.score >= threshold);
|
|
125
|
+
if (isImportant && token.start !== undefined && token.end !== undefined) {
|
|
126
|
+
keepChars.fill(1, token.start, token.end);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
// Always keep structural markers (newlines, sentence boundaries)
|
|
130
|
+
const FORCE_KEEP = new Set(["\n", ".", "!", "?", ","]);
|
|
131
|
+
for (let i = 0; i < text.length; i++) {
|
|
132
|
+
if (FORCE_KEEP.has(text[i]))
|
|
133
|
+
keepChars[i] = 1;
|
|
134
|
+
}
|
|
135
|
+
// Reconstruct compressed text from character mask
|
|
136
|
+
let compressed = "";
|
|
137
|
+
let prevKept = false;
|
|
138
|
+
for (let i = 0; i < text.length; i++) {
|
|
139
|
+
if (keepChars[i]) {
|
|
140
|
+
// Preserve a single space when skipping tokens in mid-sentence
|
|
141
|
+
if (!prevKept && compressed.length > 0 && text[i] !== " " && !FORCE_KEEP.has(text[i - 1] ?? "")) {
|
|
142
|
+
compressed += " ";
|
|
143
|
+
}
|
|
144
|
+
compressed += text[i];
|
|
145
|
+
prevKept = true;
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
prevKept = false;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
// Clean up artefacts from compression
|
|
152
|
+
compressed = compressed
|
|
153
|
+
.replace(/ +/g, " ") // multiple spaces → single
|
|
154
|
+
.replace(/\n{3,}/g, "\n\n") // more than 2 newlines → 2
|
|
155
|
+
.replace(/ ([.,!?])/g, "$1") // space before punctuation → no space
|
|
156
|
+
.trim();
|
|
157
|
+
const keptCount = keepScores.length;
|
|
158
|
+
const actualRatio = tokens.length > 0 ? keptCount / tokens.length : 1.0;
|
|
159
|
+
return {
|
|
160
|
+
compressed,
|
|
161
|
+
originalLength: text.length,
|
|
162
|
+
compressedLength: compressed.length,
|
|
163
|
+
ratio: actualRatio,
|
|
164
|
+
method: "llmlingua2",
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
// ---------------------------------------------------------------------------
|
|
168
|
+
// Safe wrapper — always returns a string, never throws
|
|
169
|
+
// ---------------------------------------------------------------------------
|
|
170
|
+
export async function tryCompressTextSemantic(text, targetRatio = 0.5, minLength = 300) {
|
|
171
|
+
try {
|
|
172
|
+
const result = await compressTextSemantic(text, targetRatio, minLength);
|
|
173
|
+
return result.compressed;
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
return text;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
// ---------------------------------------------------------------------------
|
|
180
|
+
// Availability check — call before bulk compression to fail fast
|
|
181
|
+
// ---------------------------------------------------------------------------
|
|
182
|
+
export async function isSemanticCompressionAvailable() {
|
|
183
|
+
try {
|
|
184
|
+
await getPipeline();
|
|
185
|
+
return true;
|
|
186
|
+
}
|
|
187
|
+
catch {
|
|
188
|
+
return false;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
// ---------------------------------------------------------------------------
|
|
192
|
+
// Warm-up (optional — call at startup to pre-load model)
|
|
193
|
+
// ---------------------------------------------------------------------------
|
|
194
|
+
export function warmUpSemanticCompression() {
|
|
195
|
+
getPipeline().catch(() => { });
|
|
196
|
+
}
|
package/build/config.d.ts
CHANGED
|
@@ -12,6 +12,21 @@ export interface LucidConfig {
|
|
|
12
12
|
recentWindowHours?: number;
|
|
13
13
|
/** Security guard configuration */
|
|
14
14
|
security?: SecurityConfig;
|
|
15
|
+
/**
|
|
16
|
+
* Semantic compression via LLMLingua-2 (microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank).
|
|
17
|
+
* When enabled, file content is compressed before being returned to Claude and before Qdrant embedding.
|
|
18
|
+
* Model is downloaded on first use (~700MB) and cached in ~/.lucid/models/.
|
|
19
|
+
*/
|
|
20
|
+
semanticCompression?: {
|
|
21
|
+
/** Enable semantic compression (default: false — opt-in) */
|
|
22
|
+
enabled?: boolean;
|
|
23
|
+
/** Target compression ratio: 0.3 = keep 30%, 0.5 = keep 50% (default: 0.5) */
|
|
24
|
+
ratio?: number;
|
|
25
|
+
/** Skip compression for texts shorter than this in chars (default: 300) */
|
|
26
|
+
minLength?: number;
|
|
27
|
+
/** Also compress text before Qdrant embedding generation (default: true when enabled) */
|
|
28
|
+
applyToEmbeddings?: boolean;
|
|
29
|
+
};
|
|
15
30
|
/** Optional Qdrant vector search (falls back to TF-IDF if not configured) */
|
|
16
31
|
qdrant?: {
|
|
17
32
|
url: string;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { Router } from "express";
|
|
2
|
+
import { handleSyncFile, handleSyncProject } from "../tools/sync.js";
|
|
3
|
+
import { handleGetContext } from "../tools/context.js";
|
|
4
|
+
import { handleValidateFile } from "../tools/guardian.js";
|
|
5
|
+
import { getCurrentVersion } from "../tools/updater.js";
|
|
6
|
+
export function createRoutes(stmts) {
|
|
7
|
+
const router = Router();
|
|
8
|
+
// POST /sync — sync a single file
|
|
9
|
+
router.post("/sync", (req, res) => {
|
|
10
|
+
try {
|
|
11
|
+
const result = handleSyncFile(stmts, req.body);
|
|
12
|
+
res.json({ ok: true, result });
|
|
13
|
+
}
|
|
14
|
+
catch (e) {
|
|
15
|
+
res.status(500).json({ ok: false, error: String(e) });
|
|
16
|
+
}
|
|
17
|
+
});
|
|
18
|
+
// POST /sync-project — sync entire project directory
|
|
19
|
+
router.post("/sync-project", (req, res) => {
|
|
20
|
+
try {
|
|
21
|
+
const result = handleSyncProject(stmts, req.body);
|
|
22
|
+
res.json({ ok: true, result });
|
|
23
|
+
}
|
|
24
|
+
catch (e) {
|
|
25
|
+
res.status(500).json({ ok: false, error: String(e) });
|
|
26
|
+
}
|
|
27
|
+
});
|
|
28
|
+
// GET /context?q=...&maxTokens=4000 — retrieve relevant context
|
|
29
|
+
router.get("/context", async (req, res) => {
|
|
30
|
+
try {
|
|
31
|
+
const result = await handleGetContext(stmts, {
|
|
32
|
+
query: String(req.query["q"] ?? ""),
|
|
33
|
+
maxTokens: req.query["maxTokens"] ? Number(req.query["maxTokens"]) : 4000,
|
|
34
|
+
});
|
|
35
|
+
res.json({ ok: true, result });
|
|
36
|
+
}
|
|
37
|
+
catch (e) {
|
|
38
|
+
res.status(500).json({ ok: false, error: String(e) });
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
// POST /validate — validate a file for drift/quality issues
|
|
42
|
+
router.post("/validate", (req, res) => {
|
|
43
|
+
try {
|
|
44
|
+
const result = handleValidateFile(req.body);
|
|
45
|
+
res.json({ ok: true, result });
|
|
46
|
+
}
|
|
47
|
+
catch (e) {
|
|
48
|
+
res.status(500).json({ ok: false, error: String(e) });
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
// GET /health — liveness check
|
|
52
|
+
router.get("/health", (_req, res) => {
|
|
53
|
+
res.json({ ok: true, version: getCurrentVersion() });
|
|
54
|
+
});
|
|
55
|
+
return router;
|
|
56
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import express from "express";
|
|
2
|
+
import { createRoutes } from "./routes.js";
|
|
3
|
+
export function startHttpServer(stmts, options = {}) {
|
|
4
|
+
const { port = 7821, host = "127.0.0.1" } = options;
|
|
5
|
+
const app = express();
|
|
6
|
+
app.use(express.json());
|
|
7
|
+
app.use("/", createRoutes(stmts));
|
|
8
|
+
return app.listen(port, host, () => {
|
|
9
|
+
process.stderr.write(`[Lucid] HTTP server listening on ${host}:${port}\n`);
|
|
10
|
+
});
|
|
11
|
+
}
|