@a13xu/lucid 1.1.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +221 -99
  3. package/build/config.d.ts +37 -0
  4. package/build/config.js +45 -0
  5. package/build/database.d.ts +54 -0
  6. package/build/database.js +175 -62
  7. package/build/guardian/checklist.js +66 -66
  8. package/build/guardian/coding-analyzer.d.ts +11 -0
  9. package/build/guardian/coding-analyzer.js +393 -0
  10. package/build/guardian/coding-rules.d.ts +1 -0
  11. package/build/guardian/coding-rules.js +97 -0
  12. package/build/index.js +241 -2
  13. package/build/indexer/ast.d.ts +9 -0
  14. package/build/indexer/ast.js +158 -0
  15. package/build/indexer/file.d.ts +15 -0
  16. package/build/indexer/file.js +100 -0
  17. package/build/indexer/project.d.ts +8 -0
  18. package/build/indexer/project.js +320 -0
  19. package/build/memory/experience.d.ts +11 -0
  20. package/build/memory/experience.js +85 -0
  21. package/build/retrieval/context.d.ts +29 -0
  22. package/build/retrieval/context.js +219 -0
  23. package/build/retrieval/qdrant.d.ts +16 -0
  24. package/build/retrieval/qdrant.js +135 -0
  25. package/build/retrieval/tfidf.d.ts +14 -0
  26. package/build/retrieval/tfidf.js +64 -0
  27. package/build/security/alerts.d.ts +44 -0
  28. package/build/security/alerts.js +228 -0
  29. package/build/security/env.d.ts +24 -0
  30. package/build/security/env.js +85 -0
  31. package/build/security/guard.d.ts +35 -0
  32. package/build/security/guard.js +133 -0
  33. package/build/security/ratelimit.d.ts +34 -0
  34. package/build/security/ratelimit.js +105 -0
  35. package/build/security/smtp.d.ts +26 -0
  36. package/build/security/smtp.js +125 -0
  37. package/build/security/ssrf.d.ts +18 -0
  38. package/build/security/ssrf.js +109 -0
  39. package/build/security/waf.d.ts +33 -0
  40. package/build/security/waf.js +174 -0
  41. package/build/store/content.d.ts +3 -0
  42. package/build/store/content.js +11 -0
  43. package/build/tools/coding-guard.d.ts +24 -0
  44. package/build/tools/coding-guard.js +82 -0
  45. package/build/tools/context.d.ts +39 -0
  46. package/build/tools/context.js +105 -0
  47. package/build/tools/grep.d.ts +17 -0
  48. package/build/tools/grep.js +65 -0
  49. package/build/tools/init.d.ts +51 -0
  50. package/build/tools/init.js +212 -0
  51. package/build/tools/remember.d.ts +4 -4
  52. package/build/tools/reward.d.ts +29 -0
  53. package/build/tools/reward.js +154 -0
  54. package/build/tools/sync.d.ts +18 -0
  55. package/build/tools/sync.js +76 -0
  56. package/package.json +55 -48
package/build/index.js CHANGED
@@ -4,6 +4,9 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
4
4
  import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
5
5
  import { z } from "zod";
6
6
  import { initDatabase, prepareStatements } from "./database.js";
7
+ import { guardRequest, guardOutput, configureGuard } from "./security/guard.js";
8
+ import { allowHost } from "./security/ssrf.js";
9
+ import { loadConfig } from "./config.js";
7
10
  import { remember, RememberSchema } from "./tools/remember.js";
8
11
  import { relate, RelateSchema } from "./tools/relate.js";
9
12
  import { recall, RecallSchema } from "./tools/recall.js";
@@ -11,15 +14,45 @@ import { recallAll } from "./tools/recall-all.js";
11
14
  import { forget, ForgetSchema } from "./tools/forget.js";
12
15
  import { memoryStats } from "./tools/stats.js";
13
16
  import { handleValidateFile, ValidateFileSchema, handleCheckDrift, CheckDriftSchema, handleGetChecklist, } from "./tools/guardian.js";
17
+ import { handleGrepCode, GrepCodeSchema } from "./tools/grep.js";
18
+ import { handleInitProject, InitProjectSchema } from "./tools/init.js";
19
+ import { handleSyncFile, SyncFileSchema, handleSyncProject, SyncProjectSchema, } from "./tools/sync.js";
20
+ import { handleGetContext, GetContextSchema, handleGetRecent, GetRecentSchema, } from "./tools/context.js";
21
+ import { handleReward, RewardSchema, handlePenalize, PenalizeSchema, handleShowRewards, ShowRewardsSchema, } from "./tools/reward.js";
22
+ import { handleGetCodingRules, handleCheckCodeQuality, CheckCodeQualitySchema, } from "./tools/coding-guard.js";
14
23
  // ---------------------------------------------------------------------------
15
24
  // Init DB
16
25
  // ---------------------------------------------------------------------------
17
26
  const db = initDatabase();
18
27
  const stmts = prepareStatements(db);
19
28
  // ---------------------------------------------------------------------------
29
+ // Security guard — initialize from config + env
30
+ // ---------------------------------------------------------------------------
31
+ const _appCfg = loadConfig();
32
+ configureGuard(_appCfg.security ?? {});
33
+ // Register Qdrant host in SSRF allowlist if configured
34
+ const _qdrantUrl = process.env["QDRANT_URL"] ?? _appCfg.qdrant?.url;
35
+ if (_qdrantUrl) {
36
+ try {
37
+ allowHost(_qdrantUrl);
38
+ }
39
+ catch { /* ignore invalid URL */ }
40
+ }
41
+ const _embeddingUrl = process.env["EMBEDDING_URL"] ?? _appCfg.qdrant?.embeddingUrl;
42
+ if (_embeddingUrl) {
43
+ try {
44
+ allowHost(_embeddingUrl);
45
+ }
46
+ catch { /* ignore */ }
47
+ }
48
+ else {
49
+ // Default embedding endpoint
50
+ allowHost("https://api.openai.com");
51
+ }
52
+ // ---------------------------------------------------------------------------
20
53
  // MCP Server
21
54
  // ---------------------------------------------------------------------------
22
- const server = new Server({ name: "lucid", version: "1.1.0" }, { capabilities: { tools: {} } });
55
+ const server = new Server({ name: "lucid", version: "1.9.0" }, { capabilities: { tools: {} } });
23
56
  // ---------------------------------------------------------------------------
24
57
  // Tool definitions
25
58
  // ---------------------------------------------------------------------------
@@ -90,6 +123,140 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
90
123
  description: "Get memory usage statistics.",
91
124
  inputSchema: { type: "object", properties: {} },
92
125
  },
126
+ // ── Init / Indexing ──────────────────────────────────────────────────────
127
+ {
128
+ name: "init_project",
129
+ description: "Scan and index a project directory into the knowledge graph. " +
130
+ "Reads CLAUDE.md (directives, conventions), package.json / pyproject.toml (dependencies, scripts), " +
131
+ "README.md (description), .mcp.json (MCP servers), logic-guardian.yaml (drift patterns), " +
132
+ "and source files (exported functions/classes). " +
133
+ "Call this once when starting work on a project to bootstrap memory with project context.",
134
+ inputSchema: {
135
+ type: "object",
136
+ properties: {
137
+ directory: {
138
+ type: "string",
139
+ description: "Absolute path to the project root. Defaults to current working directory.",
140
+ },
141
+ },
142
+ },
143
+ },
144
+ {
145
+ name: "sync_file",
146
+ description: "Index or re-index a single source file after it was written or modified. " +
147
+ "Extracts exports, description, and open TODOs, then updates the knowledge graph. " +
148
+ "IMPORTANT: call this automatically after every Write or Edit tool call.",
149
+ inputSchema: {
150
+ type: "object",
151
+ properties: {
152
+ path: { type: "string", description: "Absolute or relative path to the modified file." },
153
+ },
154
+ required: ["path"],
155
+ },
156
+ },
157
+ {
158
+ name: "sync_project",
159
+ description: "Re-index the entire project directory incrementally. " +
160
+ "Use this when multiple files have changed (e.g. after a refactor or git pull).",
161
+ inputSchema: {
162
+ type: "object",
163
+ properties: {
164
+ directory: {
165
+ type: "string",
166
+ description: "Project root directory. Defaults to current working directory.",
167
+ },
168
+ },
169
+ },
170
+ },
171
+ {
172
+ name: "grep_code",
173
+ description: "Search indexed source files using a regex pattern. " +
174
+ "Decompresses stored binary content and returns only matching lines with context. " +
175
+ "Token-efficient: returns ~20-50 tokens instead of full file contents. " +
176
+ "Useful for finding function calls, variable usages, import patterns.",
177
+ inputSchema: {
178
+ type: "object",
179
+ properties: {
180
+ pattern: { type: "string", description: "Regex pattern to search for." },
181
+ language: { type: "string", enum: ["python", "javascript", "typescript", "generic"], description: "Filter by language." },
182
+ context: { type: "number", description: "Lines of context before/after each match (0-10, default 2)." },
183
+ },
184
+ required: ["pattern"],
185
+ },
186
+ },
187
+ // ── Context & Token Optimization ─────────────────────────────────────────
188
+ {
189
+ name: "get_context",
190
+ description: "Retrieve the minimal relevant context for a task or query. " +
191
+ "Uses TF-IDF scoring (or Qdrant vector search if configured) to rank files by relevance, " +
192
+ "applies recency boost for recently modified files, and returns skeletons (signatures only) " +
193
+ "for large files to stay within the token budget. " +
194
+ "Configure limits in lucid.config.json. Set QDRANT_URL env var for vector search.",
195
+ inputSchema: {
196
+ type: "object",
197
+ properties: {
198
+ query: { type: "string", description: "What you are working on or searching for" },
199
+ maxTokens: { type: "number", description: "Total token budget (default 4000)" },
200
+ dirs: { type: "array", items: { type: "string" }, description: "Whitelist directories (e.g. [\"src\", \"backend\"])" },
201
+ recentOnly: { type: "boolean", description: "Only files modified within recentWindowHours" },
202
+ recentHours: { type: "number", description: "Override recent window (hours)" },
203
+ skeletonOnly: { type: "boolean", description: "Always show skeleton (signatures only)" },
204
+ topK: { type: "number", description: "Max files to consider (default 10)" },
205
+ },
206
+ required: ["query"],
207
+ },
208
+ },
209
+ {
210
+ name: "get_recent",
211
+ description: "Return files modified recently with line-level diffs. " +
212
+ "Shows what changed in each file since the previous sync. " +
213
+ "Useful for catching up after a git pull or resuming a session.",
214
+ inputSchema: {
215
+ type: "object",
216
+ properties: {
217
+ hours: { type: "number", description: "Look back N hours (default 24)" },
218
+ withDiffs: { type: "boolean", description: "Include line diffs (default true)" },
219
+ },
220
+ },
221
+ },
222
+ // ── Reward System ────────────────────────────────────────────────────────
223
+ {
224
+ name: "reward",
225
+ description: "Signal that the last get_context() result was helpful (+1 reward). " +
226
+ "The files returned in that context will be ranked higher in future similar queries. " +
227
+ "Call this after a get_context() result led to a correct fix or useful code.",
228
+ inputSchema: {
229
+ type: "object",
230
+ properties: {
231
+ note: { type: "string", description: "Optional note about what worked (stored for future reference)" },
232
+ },
233
+ },
234
+ },
235
+ {
236
+ name: "penalize",
237
+ description: "Signal that the last get_context() result was unhelpful (-1 reward). " +
238
+ "The files returned in that context will be ranked lower in future similar queries. " +
239
+ "Call this after a get_context() result missed important files or was irrelevant.",
240
+ inputSchema: {
241
+ type: "object",
242
+ properties: {
243
+ note: { type: "string", description: "Optional note about what was missing or wrong" },
244
+ },
245
+ },
246
+ },
247
+ {
248
+ name: "show_rewards",
249
+ description: "Show the top rewarded experiences and most rewarded files. " +
250
+ "Rewards decay exponentially (half-life ~14 days). " +
251
+ "Use this to understand which context queries and files have been most valuable.",
252
+ inputSchema: {
253
+ type: "object",
254
+ properties: {
255
+ query: { type: "string", description: "Filter experiences by query text (optional)" },
256
+ topK: { type: "number", description: "Number of top results to show (default 10)" },
257
+ },
258
+ },
259
+ },
93
260
  // ── Logic Guardian ───────────────────────────────────────────────────────
94
261
  {
95
262
  name: "validate_file",
@@ -127,6 +294,34 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
127
294
  "Call this before marking any implementation task as done.",
128
295
  inputSchema: { type: "object", properties: {} },
129
296
  },
297
+ // ── Coding Guard ─────────────────────────────────────────────────────────
298
+ {
299
+ name: "coding_rules",
300
+ description: "Get the 25 Golden Rules coding checklist. Covers clarity, naming, single responsibility, " +
301
+ "error handling, frontend component size/reuse/props, singleton rules, library selection, " +
302
+ "and architecture separation. Review before marking any task done.",
303
+ inputSchema: { type: "object", properties: {} },
304
+ },
305
+ {
306
+ name: "check_code_quality",
307
+ description: "Analyze a file or code snippet against the 25 Golden Rules. " +
308
+ "Detects: file/function size violations, vague naming, deep nesting, dead code, and — " +
309
+ "for React/Vue component files — inline styles, prop explosion, fetch-in-component, " +
310
+ "direct DOM access, mixed styling systems. " +
311
+ "Complements validate_file (which checks logic correctness).",
312
+ inputSchema: {
313
+ type: "object",
314
+ properties: {
315
+ path: { type: "string", description: "Absolute or relative path to the file to analyze." },
316
+ code: { type: "string", description: "Code snippet to analyze inline." },
317
+ language: {
318
+ type: "string",
319
+ enum: ["python", "javascript", "typescript", "vue", "generic"],
320
+ description: "Language hint. Auto-detected from file extension if path is provided.",
321
+ },
322
+ },
323
+ },
324
+ },
130
325
  ],
131
326
  }));
132
327
  // ---------------------------------------------------------------------------
@@ -134,6 +329,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
134
329
  // ---------------------------------------------------------------------------
135
330
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
136
331
  const { name, arguments: args } = request.params;
332
+ // Security: rate limit + WAF check before any execution
333
+ const guard = guardRequest(name, args);
334
+ if (guard.blocked) {
335
+ return { content: [{ type: "text", text: guard.reason ?? "Request blocked by security guard" }], isError: true };
336
+ }
137
337
  try {
138
338
  let text;
139
339
  switch (name) {
@@ -156,6 +356,37 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
156
356
  case "memory_stats":
157
357
  text = memoryStats(db, stmts);
158
358
  break;
359
+ // Init & Sync
360
+ case "init_project":
361
+ text = await handleInitProject(stmts, InitProjectSchema.parse(args));
362
+ break;
363
+ case "sync_file":
364
+ text = handleSyncFile(stmts, SyncFileSchema.parse(args));
365
+ break;
366
+ case "sync_project":
367
+ text = handleSyncProject(stmts, SyncProjectSchema.parse(args));
368
+ break;
369
+ // Grep
370
+ case "grep_code":
371
+ text = handleGrepCode(stmts, GrepCodeSchema.parse(args));
372
+ break;
373
+ // Context & Token Optimization
374
+ case "get_context":
375
+ text = await handleGetContext(stmts, GetContextSchema.parse(args));
376
+ break;
377
+ case "get_recent":
378
+ text = handleGetRecent(stmts, GetRecentSchema.parse(args));
379
+ break;
380
+ // Reward System
381
+ case "reward":
382
+ text = handleReward(stmts, RewardSchema.parse(args));
383
+ break;
384
+ case "penalize":
385
+ text = handlePenalize(stmts, PenalizeSchema.parse(args));
386
+ break;
387
+ case "show_rewards":
388
+ text = handleShowRewards(stmts, ShowRewardsSchema.parse(args));
389
+ break;
159
390
  // Logic Guardian
160
391
  case "validate_file":
161
392
  text = handleValidateFile(ValidateFileSchema.parse(args));
@@ -166,10 +397,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
166
397
  case "get_checklist":
167
398
  text = handleGetChecklist();
168
399
  break;
400
+ // Coding Guard
401
+ case "coding_rules":
402
+ text = handleGetCodingRules();
403
+ break;
404
+ case "check_code_quality":
405
+ text = handleCheckCodeQuality(CheckCodeQualitySchema.parse(args));
406
+ break;
169
407
  default:
170
408
  return { content: [{ type: "text", text: `Unknown tool: ${name}` }], isError: true };
171
409
  }
172
- return { content: [{ type: "text", text }] };
410
+ // Security: scan output for sensitive data leakage
411
+ return { content: [{ type: "text", text: guardOutput(name, text) }] };
173
412
  }
174
413
  catch (err) {
175
414
  const message = err instanceof z.ZodError
@@ -0,0 +1,9 @@
1
+ export interface Skeleton {
2
+ imports: string[];
3
+ exports: string[];
4
+ todos: string[];
5
+ summary: string;
6
+ }
7
+ export declare function extractSkeleton(source: string, language: string): Skeleton;
8
+ /** Render skeleton as compact text for context assembly. */
9
+ export declare function renderSkeleton(sk: Skeleton, filepath: string): string;
@@ -0,0 +1,158 @@
1
+ // Structural skeleton extraction — regex-based AST-like parsing
2
+ // Returns only signatures, imports, and TODO comments (no function bodies)
3
+ // Used by get_context when a file exceeds the per-file token budget
4
+ // ---------------------------------------------------------------------------
5
+ // TypeScript / JavaScript
6
+ // ---------------------------------------------------------------------------
7
+ function skeletonTS(source) {
8
+ const lines = source.split("\n");
9
+ const imports = [];
10
+ const exports = [];
11
+ const todos = [];
12
+ let summary = "";
13
+ // Grab first JSDoc comment as summary
14
+ const jsdoc = source.match(/^\/\*\*([\s\S]*?)\*\//m);
15
+ if (jsdoc) {
16
+ summary = jsdoc[1].replace(/\s*\*\s*/g, " ").trim().slice(0, 150);
17
+ }
18
+ let i = 0;
19
+ while (i < lines.length) {
20
+ const line = lines[i];
21
+ const trimmed = line.trim();
22
+ // Imports
23
+ if (/^import\s/.test(trimmed)) {
24
+ // Multi-line import: collect until ';'
25
+ let full = line;
26
+ while (!full.includes(";") && i + 1 < lines.length) {
27
+ i++;
28
+ full += " " + lines[i].trim();
29
+ }
30
+ imports.push(full.replace(/\s+/g, " ").trim());
31
+ i++;
32
+ continue;
33
+ }
34
+ // Exported declarations
35
+ if (/^export\s/.test(trimmed)) {
36
+ // Grab JSDoc above if present
37
+ let sig = line;
38
+ // If it's a function/class/interface, find the signature (up to first '{' or ';')
39
+ if (/^export\s+(async\s+)?function|^export\s+(abstract\s+)?class|^export\s+interface/.test(trimmed)) {
40
+ let j = i;
41
+ let full = "";
42
+ while (j < lines.length) {
43
+ full += lines[j] + "\n";
44
+ if (lines[j].includes("{") || lines[j].includes(";"))
45
+ break;
46
+ j++;
47
+ }
48
+ // Show only up to opening brace
49
+ sig = full.split("{")[0].replace(/\n/g, " ").replace(/\s+/g, " ").trim() + " { … }";
50
+ }
51
+ else if (/^export\s+(type|interface)\s/.test(trimmed)) {
52
+ // Multi-line type — take first line
53
+ sig = trimmed.split("{")[0].trim() + (trimmed.includes("{") ? " { … }" : "");
54
+ }
55
+ else {
56
+ // const/enum/default — take line
57
+ sig = trimmed.slice(0, 120);
58
+ }
59
+ exports.push(sig);
60
+ i++;
61
+ continue;
62
+ }
63
+ // TODOs
64
+ if (/\/\/\s*(TODO|FIXME|HACK)/i.test(trimmed)) {
65
+ todos.push(trimmed.slice(0, 100));
66
+ }
67
+ i++;
68
+ }
69
+ return { imports, exports, todos, summary };
70
+ }
71
+ // ---------------------------------------------------------------------------
72
+ // Python
73
+ // ---------------------------------------------------------------------------
74
+ function skeletonPython(source) {
75
+ const lines = source.split("\n");
76
+ const imports = [];
77
+ const exports = [];
78
+ const todos = [];
79
+ let summary = "";
80
+ // Module docstring
81
+ const docMatch = source.match(/^['"]{3}([\s\S]*?)['"]{3}/m);
82
+ if (docMatch)
83
+ summary = docMatch[1].trim().slice(0, 150);
84
+ for (let i = 0; i < lines.length; i++) {
85
+ const line = lines[i];
86
+ const trimmed = line.trim();
87
+ if (trimmed.startsWith("import ") || trimmed.startsWith("from ")) {
88
+ imports.push(trimmed.slice(0, 100));
89
+ continue;
90
+ }
91
+ // Public function/class/async def at top level (no indent)
92
+ if (/^(def|class|async def)\s+(\w)/.test(trimmed) && !trimmed.startsWith("_")) {
93
+ // Collect signature (may span multiple lines until ':')
94
+ let sig = line;
95
+ let j = i + 1;
96
+ while (!sig.includes(":") && j < lines.length) {
97
+ sig += " " + lines[j].trim();
98
+ j++;
99
+ }
100
+ sig = sig.split(":")[0].replace(/\s+/g, " ").trim() + ":";
101
+ exports.push(sig.slice(0, 120));
102
+ continue;
103
+ }
104
+ if (/^\s*#\s*(TODO|FIXME|HACK)/i.test(line)) {
105
+ todos.push(trimmed.slice(0, 100));
106
+ }
107
+ }
108
+ return { imports, exports, todos, summary };
109
+ }
110
+ // ---------------------------------------------------------------------------
111
+ // Generic (markdown, yaml, json, etc.)
112
+ // ---------------------------------------------------------------------------
113
+ function skeletonGeneric(source) {
114
+ const lines = source.split("\n").slice(0, 30);
115
+ const todos = [];
116
+ for (const line of source.split("\n")) {
117
+ if (/(?:\/\/|#)\s*(TODO|FIXME|HACK)/i.test(line)) {
118
+ todos.push(line.trim().slice(0, 100));
119
+ }
120
+ }
121
+ return {
122
+ imports: [],
123
+ exports: [],
124
+ todos,
125
+ summary: lines.join("\n").slice(0, 300),
126
+ };
127
+ }
128
+ // ---------------------------------------------------------------------------
129
+ // Public
130
+ // ---------------------------------------------------------------------------
131
+ export function extractSkeleton(source, language) {
132
+ switch (language) {
133
+ case "typescript":
134
+ case "javascript":
135
+ return skeletonTS(source);
136
+ case "python":
137
+ return skeletonPython(source);
138
+ default:
139
+ return skeletonGeneric(source);
140
+ }
141
+ }
142
+ /** Render skeleton as compact text for context assembly. */
143
+ export function renderSkeleton(sk, filepath) {
144
+ const parts = [`// ${filepath} [skeleton]`];
145
+ if (sk.summary)
146
+ parts.push(`// ${sk.summary}`);
147
+ if (sk.imports.length > 0)
148
+ parts.push(sk.imports.slice(0, 8).join("\n"));
149
+ if (sk.exports.length > 0) {
150
+ parts.push("// — exports —");
151
+ parts.push(sk.exports.join("\n"));
152
+ }
153
+ if (sk.todos.length > 0) {
154
+ parts.push("// — TODOs —");
155
+ parts.push(sk.todos.join("\n"));
156
+ }
157
+ return parts.join("\n\n");
158
+ }
@@ -0,0 +1,15 @@
1
+ import type { Statements } from "../database.js";
2
+ export interface FileIndex {
3
+ module: string;
4
+ exports: string[];
5
+ description: string;
6
+ todos: string[];
7
+ language: string;
8
+ }
9
+ export declare function indexFile(filepath: string): FileIndex | null;
10
+ export interface UpsertResult {
11
+ observations: string[];
12
+ stored: boolean;
13
+ savedBytes: number;
14
+ }
15
+ export declare function upsertFileIndex(index: FileIndex, source: string, stmts: Statements): UpsertResult;
@@ -0,0 +1,100 @@
1
+ import { readFileSync } from "fs";
2
+ import { extname } from "path";
3
+ import { compress, sha256 } from "../store/content.js";
4
+ function extractTS(source) {
5
+ const exports = [];
6
+ const todos = [];
7
+ // Exported symbols
8
+ for (const m of source.matchAll(/export\s+(?:async\s+)?(?:function|class|const|type|interface|enum)\s+(\w+)/g)) {
9
+ exports.push(m[1]);
10
+ }
11
+ // First JSDoc / block comment as description
12
+ const docMatch = source.match(/^\/\*\*([\s\S]*?)\*\//m) ?? source.match(/^\/\/(.*)/m);
13
+ const description = docMatch
14
+ ? docMatch[1].replace(/\s*\*\s*/g, " ").trim().slice(0, 200)
15
+ : "";
16
+ // TODOs
17
+ for (const m of source.matchAll(/\/\/\s*(TODO|FIXME|HACK)[:\s]+(.+)/gi)) {
18
+ todos.push(`${m[1]}: ${m[2].trim()}`);
19
+ }
20
+ return { exports, description, todos };
21
+ }
22
+ function extractPython(source) {
23
+ const exports = [];
24
+ const todos = [];
25
+ // Public functions and classes
26
+ for (const m of source.matchAll(/^(?:def|class|async def)\s+(\w+)/gm)) {
27
+ if (!m[1].startsWith("_"))
28
+ exports.push(m[1]);
29
+ }
30
+ // Module docstring
31
+ const docMatch = source.match(/^["']{3}([\s\S]*?)["']{3}/m);
32
+ const description = docMatch ? docMatch[1].trim().slice(0, 200) : "";
33
+ // TODOs
34
+ for (const m of source.matchAll(/#\s*(TODO|FIXME|HACK)[:\s]+(.+)/gi)) {
35
+ todos.push(`${m[1]}: ${m[2].trim()}`);
36
+ }
37
+ return { exports, description, todos };
38
+ }
39
+ function extractGeneric(source) {
40
+ const todos = [];
41
+ for (const m of source.matchAll(/(?:\/\/|#)\s*(TODO|FIXME|HACK)[:\s]+(.+)/gi)) {
42
+ todos.push(`${m[1]}: ${m[2].trim()}`);
43
+ }
44
+ return { exports: [], description: "", todos };
45
+ }
46
+ export function indexFile(filepath) {
47
+ let source;
48
+ try {
49
+ source = readFileSync(filepath, { encoding: "utf-8" });
50
+ }
51
+ catch {
52
+ return null;
53
+ }
54
+ const ext = extname(filepath).toLowerCase();
55
+ const module = filepath.replace(/\\/g, "/");
56
+ let extracted;
57
+ let language;
58
+ if ([".ts", ".tsx", ".js", ".jsx"].includes(ext)) {
59
+ extracted = extractTS(source);
60
+ language = ext.includes("ts") ? "typescript" : "javascript";
61
+ }
62
+ else if (ext === ".py") {
63
+ extracted = extractPython(source);
64
+ language = "python";
65
+ }
66
+ else {
67
+ extracted = extractGeneric(source);
68
+ language = "generic";
69
+ }
70
+ return { module, language, ...extracted };
71
+ }
72
+ export function upsertFileIndex(index, source, stmts) {
73
+ const fileHash = sha256(source);
74
+ // Change detection — skip everything se hash-ul e identic
75
+ const existing = stmts.getFileByPath.get(index.module);
76
+ if (existing?.content_hash === fileHash) {
77
+ return { observations: [], stored: false, savedBytes: 0 };
78
+ }
79
+ // Comprimă și stochează conținutul binar
80
+ const blob = compress(source);
81
+ stmts.upsertFile.run(index.module, blob, fileHash, Buffer.byteLength(source, "utf-8"), blob.byteLength, index.language);
82
+ // Index structural în entities (compact, pentru recall)
83
+ const observations = [];
84
+ if (index.description)
85
+ observations.push(`description: ${index.description}`);
86
+ if (index.exports.length > 0)
87
+ observations.push(`exports: ${index.exports.join(", ")}`);
88
+ if (index.todos.length > 0)
89
+ observations.push(`TODOs: ${index.todos.join(" | ")}`);
90
+ observations.push(`language: ${index.language}`);
91
+ const entityRow = stmts.getEntityByName.get(index.module);
92
+ if (entityRow) {
93
+ stmts.updateEntity.run(JSON.stringify(observations), entityRow.id);
94
+ }
95
+ else {
96
+ stmts.insertEntity.run(index.module, "pattern", JSON.stringify(observations));
97
+ }
98
+ const savedBytes = Buffer.byteLength(source, "utf-8") - blob.byteLength;
99
+ return { observations, stored: true, savedBytes };
100
+ }
@@ -0,0 +1,8 @@
1
+ import type { Statements } from "../database.js";
2
+ export interface IndexResult {
3
+ entity: string;
4
+ type: string;
5
+ observations: number;
6
+ source: string;
7
+ }
8
+ export declare function indexProject(directory: string, stmts: Statements): IndexResult[];