@toolbaux/guardian 0.1.17 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,209 @@
1
+ /**
2
+ * Function-level intelligence extraction and persistence.
3
+ *
4
+ * Produces `function-intelligence.json` in the guardian machine output dir.
5
+ * Contains:
6
+ * - Full FunctionRecord list (all languages)
7
+ * - Call graph: name → { calls, called_by }
8
+ * - Literal index: token → [{ file, function, line }] ← drives `guardian search --types functions`
9
+ *
10
+ * This is a second-pass scan that runs after the main extraction.
11
+ * It re-uses the adapter pipeline on the same files; results are not fed
12
+ * back into the architecture snapshot (additive, non-breaking).
13
+ *
14
+ * Language-specific domain concepts (e.g. Lean4 `sorry`, Python re.* patterns)
15
+ * are surfaced entirely by each adapter — this module has zero language knowledge.
16
+ * Adapters encode domain specifics into `stringLiterals`, making them searchable
17
+ * through the generic literal_index.
18
+ */
19
+ import fs from "node:fs/promises";
20
+ import path from "node:path";
21
+ import { getAdapterForFile, runAdapter } from "../adapters/index.js";
22
+ // ── Token helpers ─────────────────────────────────────────────────────────
23
+ /** Split text into lowercase alphanumeric tokens (min 3 chars). */
24
+ function tokenize(text) {
25
+ return text
26
+ .toLowerCase()
27
+ .split(/[^a-z0-9_]+/)
28
+ .filter((t) => t.length >= 3);
29
+ }
30
+ // ── Core build ────────────────────────────────────────────────────────────
31
+ /**
32
+ * Given a flat list of FunctionRecord entries (from all files / all adapters),
33
+ * build the call graph and literal index.
34
+ */
35
+ export function buildFunctionIntelligence(allFunctions) {
36
+ // ── Call graph ──
37
+ const callGraph = Object.create(null);
38
+ for (const fn of allFunctions) {
39
+ if (!Object.prototype.hasOwnProperty.call(callGraph, fn.name))
40
+ callGraph[fn.name] = { calls: [], called_by: [] };
41
+ callGraph[fn.name].calls = [...new Set(fn.calls)];
42
+ }
43
+ // Invert: for each callee name, record who calls it
44
+ for (const fn of allFunctions) {
45
+ for (const callee of fn.calls) {
46
+ if (!Object.prototype.hasOwnProperty.call(callGraph, callee))
47
+ callGraph[callee] = { calls: [], called_by: [] };
48
+ const entry = callGraph[callee];
49
+ if (!entry.called_by.includes(fn.name)) {
50
+ entry.called_by.push(fn.name);
51
+ }
52
+ }
53
+ }
54
+ // ── Literal index ──
55
+ // Object.create(null) avoids prototype property collisions (e.g. "constructor",
56
+ // "toString") that would cause `existing.some is not a function` errors when
57
+ // source tokens match built-in Object property names.
58
+ const literalIndex = Object.create(null);
59
+ function addHit(token, fn) {
60
+ const key = token.toLowerCase().trim();
61
+ if (!key)
62
+ return;
63
+ if (!Object.prototype.hasOwnProperty.call(literalIndex, key))
64
+ literalIndex[key] = [];
65
+ const existing = literalIndex[key];
66
+ // One hit per function per token — no duplicates
67
+ if (!existing.some((h) => h.file === fn.file && h.function === fn.name)) {
68
+ existing.push({ file: fn.file, function: fn.name, line: fn.lines[0] });
69
+ }
70
+ }
71
+ for (const fn of allFunctions) {
72
+ for (const lit of fn.stringLiterals) {
73
+ const full = lit.slice(0, 100);
74
+ if (full.length >= 3)
75
+ addHit(full, fn);
76
+ for (const tok of tokenize(lit))
77
+ addHit(tok, fn);
78
+ }
79
+ for (const pat of fn.regexPatterns) {
80
+ const full = pat.slice(0, 100);
81
+ if (full.length >= 3)
82
+ addHit(full, fn);
83
+ for (const tok of tokenize(pat))
84
+ addHit(tok, fn);
85
+ }
86
+ }
87
+ return {
88
+ version: "0.1",
89
+ generated_at: new Date().toISOString(),
90
+ total_functions: allFunctions.length,
91
+ functions: allFunctions,
92
+ call_graph: callGraph,
93
+ literal_index: literalIndex,
94
+ };
95
+ }
96
+ // ── File scanning ─────────────────────────────────────────────────────────
97
+ const DEFAULT_IGNORE_DIRS = new Set([
98
+ // Version control
99
+ ".git",
100
+ // Lean4 / Lake package manager (contains Mathlib — thousands of .lean files)
101
+ ".lake",
102
+ // JS/TS
103
+ "node_modules",
104
+ "dist",
105
+ ".next",
106
+ ".nuxt",
107
+ "coverage",
108
+ // Python
109
+ ".venv",
110
+ "venv",
111
+ "__pycache__",
112
+ ".mypy_cache",
113
+ ".pytest_cache",
114
+ // Java/Kotlin (Maven + Gradle build output + generated sources)
115
+ "target",
116
+ ".gradle",
117
+ "generated",
118
+ "generated-sources",
119
+ // Go
120
+ "vendor",
121
+ // .NET
122
+ "obj",
123
+ "bin",
124
+ // Generic
125
+ "build",
126
+ ".specs",
127
+ ".cache",
128
+ ]);
129
+ async function listSourceFiles(dir, config, results = []) {
130
+ const ignoreDirs = new Set([
131
+ ...DEFAULT_IGNORE_DIRS,
132
+ ...(config.ignore?.directories ?? []),
133
+ ]);
134
+ const ignorePaths = config.ignore?.paths ?? [];
135
+ let entries;
136
+ try {
137
+ // encoding: "utf8" ensures entry.name is always string, not Buffer
138
+ entries = await fs.readdir(dir, { withFileTypes: true, encoding: "utf8" });
139
+ }
140
+ catch {
141
+ return results;
142
+ }
143
+ for (const entry of entries) {
144
+ const name = entry.name;
145
+ const fullPath = path.join(dir, name);
146
+ if (entry.isDirectory()) {
147
+ if (!ignoreDirs.has(name) && !ignorePaths.some((p) => fullPath.includes(p))) {
148
+ await listSourceFiles(fullPath, config, results);
149
+ }
150
+ }
151
+ else if (entry.isFile()) {
152
+ if (getAdapterForFile(name)) {
153
+ results.push(fullPath);
154
+ }
155
+ }
156
+ }
157
+ return results;
158
+ }
159
+ /**
160
+ * Scan one or more project roots, run adapters on every source file, and
161
+ * return the aggregated FunctionIntelligence index.
162
+ */
163
+ export async function buildFunctionIntelligenceFromRoots(roots, config) {
164
+ const allFunctions = [];
165
+ for (const root of roots) {
166
+ const files = await listSourceFiles(root, config);
167
+ await Promise.all(files.map(async (filePath) => {
168
+ const adapter = getAdapterForFile(path.basename(filePath));
169
+ if (!adapter)
170
+ return;
171
+ let source;
172
+ try {
173
+ source = await fs.readFile(filePath, "utf8");
174
+ }
175
+ catch {
176
+ return;
177
+ }
178
+ try {
179
+ const result = runAdapter(adapter, filePath, source);
180
+ allFunctions.push(...result.functions);
181
+ }
182
+ catch {
183
+ // Skip files that fail to parse (malformed source, encoding issues)
184
+ }
185
+ }));
186
+ }
187
+ return buildFunctionIntelligence(allFunctions);
188
+ }
189
+ // ── Write ─────────────────────────────────────────────────────────────────
190
+ /** Persist function-intelligence.json to the guardian machine output dir. */
191
+ export async function writeFunctionIntelligence(outputDir, intel) {
192
+ const filePath = path.join(outputDir, "function-intelligence.json");
193
+ await fs.mkdir(outputDir, { recursive: true });
194
+ await fs.writeFile(filePath, JSON.stringify(intel, null, 2), "utf8");
195
+ console.log(`Wrote ${filePath}`);
196
+ return filePath;
197
+ }
198
+ // ── Load ──────────────────────────────────────────────────────────────────
199
+ /** Load function-intelligence.json if it exists; returns null if absent. */
200
+ export async function loadFunctionIntelligence(machineDir) {
201
+ const filePath = path.join(machineDir, "function-intelligence.json");
202
+ try {
203
+ const raw = await fs.readFile(filePath, "utf8");
204
+ return JSON.parse(raw);
205
+ }
206
+ catch {
207
+ return null;
208
+ }
209
+ }
@@ -13,6 +13,7 @@ import { validateArchitectureSnapshot, validateUxSnapshot } from "../schema/inde
13
13
  import { getOutputLayout } from "../output-layout.js";
14
14
  import { logResolvedProjectPaths, resolveProjectPaths } from "../project-discovery.js";
15
15
  import { analyzeDepth } from "./analyzers/depth.js";
16
+ import { buildFunctionIntelligenceFromRoots, writeFunctionIntelligence, } from "./function-intel.js";
16
17
  export async function buildSnapshots(options) {
17
18
  const startedAt = Date.now();
18
19
  const includeFileGraph = options.includeFileGraph ?? false;
@@ -187,6 +188,17 @@ export async function extractProject(options) {
187
188
  await fs.writeFile(siPath, JSON.stringify(siReports, null, 2), "utf8");
188
189
  console.log(`Wrote ${siPath}`);
189
190
  }
191
+ // Generate Function Intelligence — call graph, literal index across all languages.
192
+ // Runs as an additive second pass; never modifies the architecture snapshot.
193
+ try {
194
+ const allRoots = (architecture.project.roots ?? [projectRoot]).map((r) => path.isAbsolute(r) ? r : path.join(projectRoot, r));
195
+ const funcIntel = await buildFunctionIntelligenceFromRoots(allRoots, config);
196
+ await writeFunctionIntelligence(layout.machineDir, funcIntel);
197
+ }
198
+ catch (err) {
199
+ // Non-fatal — function intel is additive; don't block the main extract
200
+ console.warn(`Function intelligence skipped: ${err.message}`);
201
+ }
190
202
  return result;
191
203
  }
192
204
  async function generateStructuralIntelligenceReports(architecture) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@toolbaux/guardian",
3
- "version": "0.1.17",
3
+ "version": "0.1.19",
4
4
  "type": "module",
5
5
  "description": "Architectural intelligence for codebases. Verify that AI-generated code matches your architectural intent.",
6
6
  "keywords": [