@graphpilot-oss/graphpilot 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.editorconfig +15 -0
- package/.github/CODEOWNERS +22 -0
- package/.github/FUNDING.yml +1 -0
- package/.github/ISSUE_TEMPLATE/bug_report.md +33 -0
- package/.github/ISSUE_TEMPLATE/config.yml +5 -0
- package/.github/ISSUE_TEMPLATE/feature_request.md +23 -0
- package/.github/PULL_REQUEST_TEMPLATE.md +19 -0
- package/.github/dependabot.yml +15 -0
- package/.github/workflows/ci.yml +62 -0
- package/.github/workflows/release.yml +50 -0
- package/.prettierignore +19 -0
- package/.prettierrc.json +20 -0
- package/CHANGELOG.md +138 -0
- package/CODE_OF_CONDUCT.md +83 -0
- package/CONTRIBUTING.md +111 -0
- package/LICENSE +201 -0
- package/README.md +132 -0
- package/SECURITY.md +44 -0
- package/assets/logo.png +0 -0
- package/assets/logo.svg +1 -0
- package/bench/README.md +544 -0
- package/bench/results/agent-tier-2026-05-22.md +28 -0
- package/bench/results/agent-tier-summary.md +44 -0
- package/bench/results/baseline-tier-2026-05-22.md +23 -0
- package/bench/results/baseline.json +810 -0
- package/bench/results/baseline.md +28 -0
- package/bench/run-agent-tier-automated.ts +234 -0
- package/bench/run-agent-tier.md +125 -0
- package/bench/run-baseline-tier.ts +200 -0
- package/bench/run.ts +210 -0
- package/bench/runner-baseline.ts +177 -0
- package/bench/runner-graphpilot.ts +131 -0
- package/bench/score-agent-tier.ts +191 -0
- package/bench/score.ts +59 -0
- package/bench/tasks.ts +236 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +162 -0
- package/dist/cli.js.map +1 -0
- package/dist/edges.d.ts +57 -0
- package/dist/edges.js +170 -0
- package/dist/edges.js.map +1 -0
- package/dist/git.d.ts +95 -0
- package/dist/git.js +247 -0
- package/dist/git.js.map +1 -0
- package/dist/graph-schema.d.ts +36 -0
- package/dist/graph-schema.js +208 -0
- package/dist/graph-schema.js.map +1 -0
- package/dist/impact.d.ts +99 -0
- package/dist/impact.js +123 -0
- package/dist/impact.js.map +1 -0
- package/dist/indexer.d.ts +28 -0
- package/dist/indexer.js +111 -0
- package/dist/indexer.js.map +1 -0
- package/dist/interactions.d.ts +46 -0
- package/dist/interactions.js +0 -0
- package/dist/interactions.js.map +1 -0
- package/dist/mcp.d.ts +3 -0
- package/dist/mcp.js +567 -0
- package/dist/mcp.js.map +1 -0
- package/dist/parser.d.ts +24 -0
- package/dist/parser.js +128 -0
- package/dist/parser.js.map +1 -0
- package/dist/provenance.d.ts +74 -0
- package/dist/provenance.js +95 -0
- package/dist/provenance.js.map +1 -0
- package/dist/query.d.ts +68 -0
- package/dist/query.js +127 -0
- package/dist/query.js.map +1 -0
- package/dist/redact.d.ts +30 -0
- package/dist/redact.js +117 -0
- package/dist/redact.js.map +1 -0
- package/dist/storage.d.ts +42 -0
- package/dist/storage.js +85 -0
- package/dist/storage.js.map +1 -0
- package/dist/symbols.d.ts +20 -0
- package/dist/symbols.js +140 -0
- package/dist/symbols.js.map +1 -0
- package/dist/validation.d.ts +9 -0
- package/dist/validation.js +65 -0
- package/dist/validation.js.map +1 -0
- package/dist/validators.d.ts +55 -0
- package/dist/validators.js +205 -0
- package/dist/validators.js.map +1 -0
- package/dist/watcher.d.ts +86 -0
- package/dist/watcher.js +310 -0
- package/dist/watcher.js.map +1 -0
- package/docs/architecture.md +311 -0
- package/docs/limitations.md +156 -0
- package/docs/mcp-setup.md +231 -0
- package/docs/quickstart.md +202 -0
- package/eslint.config.js +148 -0
- package/lefthook.yml +81 -0
- package/package.json +56 -0
- package/pnpm-workspace.yaml +6 -0
- package/scripts/smoke-stdio.mjs +97 -0
- package/src/cli.ts +171 -0
- package/src/edges.ts +202 -0
- package/src/git.ts +255 -0
- package/src/graph-schema.ts +229 -0
- package/src/impact.ts +218 -0
- package/src/indexer.ts +152 -0
- package/src/interactions.ts +0 -0
- package/src/mcp.ts +652 -0
- package/src/parser.ts +138 -0
- package/src/provenance.ts +115 -0
- package/src/query.ts +148 -0
- package/src/redact.ts +122 -0
- package/src/storage.ts +115 -0
- package/src/symbols.ts +173 -0
- package/src/validation.ts +69 -0
- package/src/validators.ts +253 -0
- package/src/watcher.ts +383 -0
- package/tests/edges.test.ts +175 -0
- package/tests/fixtures/sample.ts +32 -0
- package/tests/git.test.ts +303 -0
- package/tests/graph-schema.test.ts +321 -0
- package/tests/impact.test.ts +454 -0
- package/tests/interactions.test.ts +180 -0
- package/tests/lint-policy.test.ts +106 -0
- package/tests/mcp-stdio.test.ts +171 -0
- package/tests/mcp.test.ts +335 -0
- package/tests/parser.test.ts +31 -0
- package/tests/provenance.test.ts +132 -0
- package/tests/query.test.ts +160 -0
- package/tests/redact.test.ts +167 -0
- package/tests/security.test.ts +144 -0
- package/tests/symbols.test.ts +78 -0
- package/tests/validators.test.ts +193 -0
- package/tests/watcher.test.ts +250 -0
- package/tsconfig.json +18 -0
package/dist/parser.js
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import Parser from 'tree-sitter';
|
|
2
|
+
// @ts-ignore — tree-sitter-typescript ships JS, has no types
|
|
3
|
+
import TS from 'tree-sitter-typescript';
|
|
4
|
+
import { readFileSync, statSync } from 'node:fs';
|
|
5
|
+
import { extname } from 'node:path';
|
|
6
|
+
import { MAX_FILE_BYTES } from './validation.js';
|
|
7
|
+
const PARSER_CACHE = new Map();
|
|
8
|
+
function getParser(lang) {
|
|
9
|
+
if (PARSER_CACHE.has(lang))
|
|
10
|
+
return PARSER_CACHE.get(lang);
|
|
11
|
+
const p = new Parser();
|
|
12
|
+
// Cast around the peer-dep type-version skew between tree-sitter and
|
|
13
|
+
// tree-sitter-typescript. Runtime is fine; only the .d.ts files disagree.
|
|
14
|
+
const langs = TS;
|
|
15
|
+
switch (lang) {
|
|
16
|
+
case 'typescript':
|
|
17
|
+
p.setLanguage(langs.typescript);
|
|
18
|
+
break;
|
|
19
|
+
case 'tsx':
|
|
20
|
+
p.setLanguage(langs.tsx);
|
|
21
|
+
break;
|
|
22
|
+
case 'javascript':
|
|
23
|
+
case 'jsx':
|
|
24
|
+
p.setLanguage(langs.typescript);
|
|
25
|
+
break;
|
|
26
|
+
}
|
|
27
|
+
PARSER_CACHE.set(lang, p);
|
|
28
|
+
return p;
|
|
29
|
+
}
|
|
30
|
+
export function detectLang(path) {
|
|
31
|
+
switch (extname(path).toLowerCase()) {
|
|
32
|
+
case '.ts':
|
|
33
|
+
return 'typescript';
|
|
34
|
+
case '.tsx':
|
|
35
|
+
return 'tsx';
|
|
36
|
+
case '.js':
|
|
37
|
+
case '.mjs':
|
|
38
|
+
case '.cjs':
|
|
39
|
+
return 'javascript';
|
|
40
|
+
case '.jsx':
|
|
41
|
+
return 'jsx';
|
|
42
|
+
default:
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
export function parseFile(path) {
|
|
47
|
+
const lang = detectLang(path);
|
|
48
|
+
if (!lang)
|
|
49
|
+
return null;
|
|
50
|
+
// Defence against T1 (resource exhaustion): skip oversized files rather than
|
|
51
|
+
// OOM the process. 5 MB is enough for any real source file.
|
|
52
|
+
let size;
|
|
53
|
+
try {
|
|
54
|
+
size = statSync(path).size;
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
if (size > MAX_FILE_BYTES)
|
|
60
|
+
return null;
|
|
61
|
+
const source = readFileSync(path, 'utf8');
|
|
62
|
+
return parseSource(path, source, lang);
|
|
63
|
+
}
|
|
64
|
+
export function parseSource(path, source, lang) {
|
|
65
|
+
const tree = getParser(lang).parse(source);
|
|
66
|
+
return { path, lang, tree, source };
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Walk the tree and yield every node. Depth-first, pre-order.
|
|
70
|
+
*
|
|
71
|
+
* Iterative (not recursive) so a deeply-nested AST can't blow the JS stack.
|
|
72
|
+
* Tree-sitter trees on real codebases hit ~50–80 depth; pathological generated
|
|
73
|
+
* code can go much deeper. Defence against T1.
|
|
74
|
+
*/
|
|
75
|
+
export function* walk(node) {
|
|
76
|
+
const stack = [node];
|
|
77
|
+
while (stack.length > 0) {
|
|
78
|
+
const cur = stack.pop();
|
|
79
|
+
yield cur;
|
|
80
|
+
// Push children in reverse so they pop in original order (preserves
|
|
81
|
+
// pre-order traversal — matters for callers that rely on source ordering).
|
|
82
|
+
for (let i = cur.childCount - 1; i >= 0; i--) {
|
|
83
|
+
const child = cur.child(i);
|
|
84
|
+
if (child)
|
|
85
|
+
stack.push(child);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Day-2 deliverable: list every function name in a parsed file.
|
|
91
|
+
* Catches: function declarations, arrow functions assigned to consts,
|
|
92
|
+
* class methods, function expressions assigned to variables.
|
|
93
|
+
*/
|
|
94
|
+
export function listFunctions(parsed) {
|
|
95
|
+
const names = [];
|
|
96
|
+
for (const node of walk(parsed.tree.rootNode)) {
|
|
97
|
+
const name = functionNameOf(node);
|
|
98
|
+
if (name)
|
|
99
|
+
names.push(name);
|
|
100
|
+
}
|
|
101
|
+
return names;
|
|
102
|
+
}
|
|
103
|
+
function functionNameOf(node) {
|
|
104
|
+
switch (node.type) {
|
|
105
|
+
case 'function_declaration':
|
|
106
|
+
case 'generator_function_declaration':
|
|
107
|
+
case 'method_definition':
|
|
108
|
+
case 'function_signature': {
|
|
109
|
+
const nameNode = node.childForFieldName('name');
|
|
110
|
+
return nameNode?.text ?? null;
|
|
111
|
+
}
|
|
112
|
+
case 'variable_declarator': {
|
|
113
|
+
// const foo = () => ... | const foo = function() {}
|
|
114
|
+
const valueNode = node.childForFieldName('value');
|
|
115
|
+
if (valueNode &&
|
|
116
|
+
(valueNode.type === 'arrow_function' ||
|
|
117
|
+
valueNode.type === 'function_expression' ||
|
|
118
|
+
valueNode.type === 'function')) {
|
|
119
|
+
const nameNode = node.childForFieldName('name');
|
|
120
|
+
return nameNode?.text ?? null;
|
|
121
|
+
}
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
124
|
+
default:
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
//# sourceMappingURL=parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,aAAa,CAAC;AACjC,6DAA6D;AAC7D,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AASjD,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;AAE/C,SAAS,SAAS,CAAC,IAAwB;IACzC,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,YAAY,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;IAC3D,MAAM,CAAC,GAAG,IAAI,MAAM,EAAE,CAAC;IACvB,qEAAqE;IACrE,0EAA0E;IAC1E,MAAM,KAAK,GAAG,EAA2D,CAAC;IAC1E,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,YAAY;YACf,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;YAChC,MAAM;QACR,KAAK,KAAK;YACR,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACzB,MAAM;QACR,KAAK,YAAY,CAAC;QAClB,KAAK,KAAK;YACR,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;YAChC,MAAM;IACV,CAAC;IACD,YAAY,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAC1B,OAAO,CAAC,CAAC;AACX,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,QAAQ,OAAO,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;QACpC,KAAK,KAAK;YACR,OAAO,YAAY,CAAC;QACtB,KAAK,MAAM;YACT,OAAO,KAAK,CAAC;QACf,KAAK,KAAK,CAAC;QACX,KAAK,MAAM,CAAC;QACZ,KAAK,MAAM;YACT,OAAO,YAAY,CAAC;QACtB,KAAK,MAAM;YACT,OAAO,KAAK,CAAC;QACf;YACE,OAAO,IAAI,CAAC;IAChB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,6EAA6E;IAC7E,4DAA4D;IAC5D,IAAI,IAAY,CAAC;IACjB,IAAI,CAAC;QACH,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,IAAI,GAAG,cAAc;QAAE,OAAO,IAAI,CAAC;IACvC,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAC1C,OAAO,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC;AACzC,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,IAAY,EAAE,MAAc,EAAE,IAAwB;IAChF,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC3C,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;AACtC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,SAAS,CAAC,CAAC,IAAI,CAAC,IAAuB;IAC3C,MAAM,KAAK,GAAwB,CAAC,IAAI,CAAC,CAAC;IAC1C,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,EAAG,CAAC;QACzB,MAAM,GAAG,CAAC;QACV,oEAAoE;QACpE,2EAA2E;QAC3E,KAAK,IAAI,CAAC,GAAG,GAAG,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC3B,IAAI,KAAK;gBAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,MAAkB;IAC9C,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CAAC,IAAuB;IAC7C,QAAQ,IAAI,CAAC,IAAI,EAAE,CAAC;QAClB,KAAK,sBAAsB,CAAC;QAC5B,KAAK,gCAAgC,CAAC;QACtC,KAAK,mBAAmB,CAAC;QACzB,KAAK,oBAAoB,CAAC,CAAC,CAAC;YAC1B,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;YAChD,OAAO,QAAQ,EAAE,IAAI,IAAI,IAAI,CAAC;QAChC,CAAC;QACD,KAAK,qBAAqB,CAAC,CAAC,CAAC;YAC3B,oDAAoD;YACpD,MAAM,SAAS,GAAG,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;YAClD,IACE,SAAS;gBACT,CAAC,SAAS,CAAC,IAAI,KAAK,gBAAgB;oBAClC,SAAS,CAAC,IAAI,KAAK,qBAAqB;oBACxC,SAAS,CAAC,IAAI,KAAK,UAAU,CAAC,EAChC,CAAC;gBACD,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;gBAChD,OAAO,QAAQ,EAAE,IAAI,IAAI,IAAI,CAAC;YAChC,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;QACD;YACE,OAAO,IAAI,CAAC;IAChB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provenance / evidence anchors for tool responses.
|
|
3
|
+
*
|
|
4
|
+
* Why this exists: agents hallucinate. The most common Cursor / Claude
|
|
5
|
+
* Code failure pattern is "the model called a function that doesn't
|
|
6
|
+
* exist" — and the user has no quick way to verify a tool's claim
|
|
7
|
+
* before acting on it.
|
|
8
|
+
*
|
|
9
|
+
* Solution: every result we return carries an evidence anchor — a
|
|
10
|
+
* structured `{file, line, sha, excerpt}` reference that the agent
|
|
11
|
+
* can include verbatim in its reply. The user (or the agent itself)
|
|
12
|
+
* can then jump to the file/line and confirm the cited code matches
|
|
13
|
+
* the claim. If we ever fabricate, the anchor exposes us instantly.
|
|
14
|
+
*
|
|
15
|
+
* Format design:
|
|
16
|
+
* - Path is relative to the indexed root (portable).
|
|
17
|
+
* - Line is 1-indexed (matches editor convention).
|
|
18
|
+
* - SHA is optional — null when the indexed repo isn't a git repo.
|
|
19
|
+
* When present, it's the 7-char short SHA of the index time.
|
|
20
|
+
* - Excerpt is optional and short (~200 chars) — for symbol records
|
|
21
|
+
* it's the signature line. For edges it's the call expression.
|
|
22
|
+
*
|
|
23
|
+
* Per the differentiation research, this is the SINGLE feature no
|
|
24
|
+
* competitor in the 15+ landscape has shipped. See
|
|
25
|
+
* .notes/differentiation-research-2026-05-21.md §1.3.
|
|
26
|
+
*/
|
|
27
|
+
import type { SymbolRecord } from './symbols.js';
|
|
28
|
+
import type { CallEdge } from './edges.js';
|
|
29
|
+
/** A single evidence anchor pointing at a specific location in the repo. */
|
|
30
|
+
export interface Provenance {
|
|
31
|
+
/** Relative path from the indexed repo root, e.g. "src/auth.ts". */
|
|
32
|
+
file: string;
|
|
33
|
+
/** 1-indexed line number. */
|
|
34
|
+
line: number;
|
|
35
|
+
/** Optional 1-indexed column. */
|
|
36
|
+
column?: number;
|
|
37
|
+
/** End line, when the entity spans multiple lines. */
|
|
38
|
+
endLine?: number;
|
|
39
|
+
/** Short git SHA (7 chars) at index time. Null if not a git repo. */
|
|
40
|
+
sha?: string | null;
|
|
41
|
+
/** Short text excerpt — symbol signature, call expression, etc. */
|
|
42
|
+
excerpt?: string;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Make provenance for a symbol. Excerpt is the symbol's stored
|
|
46
|
+
* signature (already secret-redacted by T3).
|
|
47
|
+
*/
|
|
48
|
+
export declare function symbolProvenance(s: SymbolRecord, sha: string | null): Provenance;
|
|
49
|
+
/**
|
|
50
|
+
* Make provenance for a call edge. Points at the CALL SITE (where the
|
|
51
|
+
* call happens), not the callee's definition. The callee's own
|
|
52
|
+
* provenance is available via `symbolProvenance(idx.findById(edge.toId))`
|
|
53
|
+
* when toId is non-null.
|
|
54
|
+
*/
|
|
55
|
+
export declare function edgeProvenance(e: CallEdge, sha: string | null): Provenance;
|
|
56
|
+
/**
|
|
57
|
+
* Format a Provenance as a single-line human/agent-readable string.
|
|
58
|
+
* Used inline in tool text responses.
|
|
59
|
+
*
|
|
60
|
+
* Examples:
|
|
61
|
+
* src/auth.ts:42 (no sha — not a git repo)
|
|
62
|
+
* src/auth.ts:42 @ ab12cd3 (with sha)
|
|
63
|
+
* src/auth.ts:42:5 @ ab12cd3 (with column)
|
|
64
|
+
*/
|
|
65
|
+
export declare function formatProvenance(p: Provenance): string;
|
|
66
|
+
/**
|
|
67
|
+
* Format a Provenance as a verifiable evidence tag the agent can
|
|
68
|
+
* include in its reply. The agent's user (or a downstream tool) can
|
|
69
|
+
* paste this directly into a search.
|
|
70
|
+
*
|
|
71
|
+
* Example:
|
|
72
|
+
* [evidence: src/auth.ts:42 @ ab12cd3]
|
|
73
|
+
*/
|
|
74
|
+
export declare function formatEvidenceTag(p: Provenance): string;
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provenance / evidence anchors for tool responses.
|
|
3
|
+
*
|
|
4
|
+
* Why this exists: agents hallucinate. The most common Cursor / Claude
|
|
5
|
+
* Code failure pattern is "the model called a function that doesn't
|
|
6
|
+
* exist" — and the user has no quick way to verify a tool's claim
|
|
7
|
+
* before acting on it.
|
|
8
|
+
*
|
|
9
|
+
* Solution: every result we return carries an evidence anchor — a
|
|
10
|
+
* structured `{file, line, sha, excerpt}` reference that the agent
|
|
11
|
+
* can include verbatim in its reply. The user (or the agent itself)
|
|
12
|
+
* can then jump to the file/line and confirm the cited code matches
|
|
13
|
+
* the claim. If we ever fabricate, the anchor exposes us instantly.
|
|
14
|
+
*
|
|
15
|
+
* Format design:
|
|
16
|
+
* - Path is relative to the indexed root (portable).
|
|
17
|
+
* - Line is 1-indexed (matches editor convention).
|
|
18
|
+
* - SHA is optional — null when the indexed repo isn't a git repo.
|
|
19
|
+
* When present, it's the 7-char short SHA of the index time.
|
|
20
|
+
* - Excerpt is optional and short (~200 chars) — for symbol records
|
|
21
|
+
* it's the signature line. For edges it's the call expression.
|
|
22
|
+
*
|
|
23
|
+
* Per the differentiation research, this is the SINGLE feature no
|
|
24
|
+
* competitor in the 15+ landscape has shipped. See
|
|
25
|
+
* .notes/differentiation-research-2026-05-21.md §1.3.
|
|
26
|
+
*/
|
|
27
|
+
const MAX_EXCERPT_LEN = 200;
|
|
28
|
+
function clipExcerpt(s) {
|
|
29
|
+
if (!s)
|
|
30
|
+
return undefined;
|
|
31
|
+
const trimmed = s.trim();
|
|
32
|
+
if (trimmed.length === 0)
|
|
33
|
+
return undefined;
|
|
34
|
+
return trimmed.length > MAX_EXCERPT_LEN ? trimmed.slice(0, MAX_EXCERPT_LEN - 1) + '…' : trimmed;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Make provenance for a symbol. Excerpt is the symbol's stored
|
|
38
|
+
* signature (already secret-redacted by T3).
|
|
39
|
+
*/
|
|
40
|
+
export function symbolProvenance(s, sha) {
|
|
41
|
+
return {
|
|
42
|
+
file: s.file,
|
|
43
|
+
line: s.line,
|
|
44
|
+
column: s.column,
|
|
45
|
+
endLine: s.endLine,
|
|
46
|
+
sha: sha ?? null,
|
|
47
|
+
excerpt: clipExcerpt(s.signature),
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Make provenance for a call edge. Points at the CALL SITE (where the
|
|
52
|
+
* call happens), not the callee's definition. The callee's own
|
|
53
|
+
* provenance is available via `symbolProvenance(idx.findById(edge.toId))`
|
|
54
|
+
* when toId is non-null.
|
|
55
|
+
*/
|
|
56
|
+
export function edgeProvenance(e, sha) {
|
|
57
|
+
return {
|
|
58
|
+
file: e.file,
|
|
59
|
+
line: e.line,
|
|
60
|
+
column: e.column,
|
|
61
|
+
sha: sha ?? null,
|
|
62
|
+
// No excerpt for edges — we don't store the call line text.
|
|
63
|
+
// (Future v0.2: capture the call line at index time so the agent
|
|
64
|
+
// sees the actual call expression.)
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Format a Provenance as a single-line human/agent-readable string.
|
|
69
|
+
* Used inline in tool text responses.
|
|
70
|
+
*
|
|
71
|
+
* Examples:
|
|
72
|
+
* src/auth.ts:42 (no sha — not a git repo)
|
|
73
|
+
* src/auth.ts:42 @ ab12cd3 (with sha)
|
|
74
|
+
* src/auth.ts:42:5 @ ab12cd3 (with column)
|
|
75
|
+
*/
|
|
76
|
+
export function formatProvenance(p) {
|
|
77
|
+
let out = p.file + ':' + p.line;
|
|
78
|
+
if (p.column !== undefined)
|
|
79
|
+
out += ':' + p.column;
|
|
80
|
+
if (p.sha)
|
|
81
|
+
out += ' @ ' + p.sha;
|
|
82
|
+
return out;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Format a Provenance as a verifiable evidence tag the agent can
|
|
86
|
+
* include in its reply. The agent's user (or a downstream tool) can
|
|
87
|
+
* paste this directly into a search.
|
|
88
|
+
*
|
|
89
|
+
* Example:
|
|
90
|
+
* [evidence: src/auth.ts:42 @ ab12cd3]
|
|
91
|
+
*/
|
|
92
|
+
export function formatEvidenceTag(p) {
|
|
93
|
+
return `[evidence: ${formatProvenance(p)}]`;
|
|
94
|
+
}
|
|
95
|
+
//# sourceMappingURL=provenance.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"provenance.js","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAqBH,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B,SAAS,WAAW,CAAC,CAAqB;IACxC,IAAI,CAAC,CAAC;QAAE,OAAO,SAAS,CAAC;IACzB,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACzB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAC3C,OAAO,OAAO,CAAC,MAAM,GAAG,eAAe,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,eAAe,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC;AAClG,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,CAAe,EAAE,GAAkB;IAClE,OAAO;QACL,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,GAAG,EAAE,GAAG,IAAI,IAAI;QAChB,OAAO,EAAE,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC;KAClC,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,CAAW,EAAE,GAAkB;IAC5D,OAAO;QACL,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,GAAG,EAAE,GAAG,IAAI,IAAI;QAChB,4DAA4D;QAC5D,iEAAiE;QACjE,oCAAoC;KACrC,CAAC;AACJ,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,gBAAgB,CAAC,CAAa;IAC5C,IAAI,GAAG,GAAG,CAAC,CAAC,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC;IAChC,IAAI,CAAC,CAAC,MAAM,KAAK,SAAS;QAAE,GAAG,IAAI,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC;IAClD,IAAI,CAAC,CAAC,GAAG;QAAE,GAAG,IAAI,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC;IAChC,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,iBAAiB,CAAC,CAAa;IAC7C,OAAO,cAAc,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC;AAC9C,CAAC"}
|
package/dist/query.d.ts
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import type { Graph } from './storage.js';
|
|
2
|
+
import type { SymbolRecord } from './symbols.js';
|
|
3
|
+
import type { CallEdge } from './edges.js';
|
|
4
|
+
export interface RecallOptions {
|
|
5
|
+
/** Max results. Default 10. Capped at 100. */
|
|
6
|
+
limit?: number;
|
|
7
|
+
/**
|
|
8
|
+
* If true, match if the query is a substring of the symbol name.
|
|
9
|
+
* If false (default), require exact case-insensitive match.
|
|
10
|
+
*/
|
|
11
|
+
substring?: boolean;
|
|
12
|
+
}
|
|
13
|
+
export interface EdgeQueryOptions {
|
|
14
|
+
/** Max edges to return. Default 50. Capped at 500. */
|
|
15
|
+
limit?: number;
|
|
16
|
+
/**
|
|
17
|
+
* If true, include edges where the callee is unresolved (toId === null).
|
|
18
|
+
* Default true — agents usually want to see those too.
|
|
19
|
+
*/
|
|
20
|
+
includeUnresolved?: boolean;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Pre-computed lookup tables over a Graph. Build once after loading; every
|
|
24
|
+
* query is then O(1) or O(k) (k = result count).
|
|
25
|
+
*
|
|
26
|
+
* The whole index is built in one pass on construction. For graphpilot's own
|
|
27
|
+
* code (50 symbols, 155 edges) build time is <1ms. For a 50k-symbol repo
|
|
28
|
+
* we'd expect ~20ms — still negligible compared to a Claude Code round trip.
|
|
29
|
+
*/
|
|
30
|
+
export declare class GraphIndex {
|
|
31
|
+
readonly graph: Graph;
|
|
32
|
+
private readonly byNameLower;
|
|
33
|
+
private readonly byId;
|
|
34
|
+
/** Edges keyed by callee id — answers "who calls X?". */
|
|
35
|
+
private readonly callersOf;
|
|
36
|
+
/** Edges keyed by caller id — answers "what does X call?". */
|
|
37
|
+
private readonly calleesOf;
|
|
38
|
+
constructor(graph: Graph);
|
|
39
|
+
/**
|
|
40
|
+
* Find symbols by name. Default behaviour is exact case-insensitive match;
|
|
41
|
+
* pass `substring: true` to enable substring search.
|
|
42
|
+
*
|
|
43
|
+
* Returns ranked roughly by "best first" — exact case match before
|
|
44
|
+
* case-folded matches.
|
|
45
|
+
*/
|
|
46
|
+
findByName(query: string, opts?: RecallOptions): SymbolRecord[];
|
|
47
|
+
/** Look up a symbol by its id. Returns null if not found. */
|
|
48
|
+
findById(id: string): SymbolRecord | null;
|
|
49
|
+
/**
|
|
50
|
+
* Resolve a name (or id) to a unique symbol. Used by tools that take a
|
|
51
|
+
* "symbol" argument — accepts either a bare name or a full id.
|
|
52
|
+
*
|
|
53
|
+
* Ambiguity policy: if more than one symbol matches the name, returns the
|
|
54
|
+
* first one (same heuristic as the resolver). Caller can disambiguate by
|
|
55
|
+
* passing the full id.
|
|
56
|
+
*/
|
|
57
|
+
resolveSymbol(nameOrId: string): SymbolRecord | null;
|
|
58
|
+
/** Edges where this symbol is the target — "who calls X?". */
|
|
59
|
+
callers(symbolId: string, opts?: EdgeQueryOptions): CallEdge[];
|
|
60
|
+
/** Edges where this symbol is the source — "what does X call?". */
|
|
61
|
+
callees(symbolId: string, opts?: EdgeQueryOptions): CallEdge[];
|
|
62
|
+
/** Convenience: how many symbols / edges are indexed. */
|
|
63
|
+
get stats(): {
|
|
64
|
+
symbols: number;
|
|
65
|
+
edges: number;
|
|
66
|
+
resolvedEdges: number;
|
|
67
|
+
};
|
|
68
|
+
}
|
package/dist/query.js
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
const HARD_RESULT_CAP = 100;
|
|
2
|
+
const HARD_EDGE_CAP = 500;
|
|
3
|
+
/**
|
|
4
|
+
* Pre-computed lookup tables over a Graph. Build once after loading; every
|
|
5
|
+
* query is then O(1) or O(k) (k = result count).
|
|
6
|
+
*
|
|
7
|
+
* The whole index is built in one pass on construction. For graphpilot's own
|
|
8
|
+
* code (50 symbols, 155 edges) build time is <1ms. For a 50k-symbol repo
|
|
9
|
+
* we'd expect ~20ms — still negligible compared to a Claude Code round trip.
|
|
10
|
+
*/
|
|
11
|
+
export class GraphIndex {
|
|
12
|
+
graph;
|
|
13
|
+
byNameLower = new Map();
|
|
14
|
+
byId = new Map();
|
|
15
|
+
/** Edges keyed by callee id — answers "who calls X?". */
|
|
16
|
+
callersOf = new Map();
|
|
17
|
+
/** Edges keyed by caller id — answers "what does X call?". */
|
|
18
|
+
calleesOf = new Map();
|
|
19
|
+
constructor(graph) {
|
|
20
|
+
this.graph = graph;
|
|
21
|
+
for (const s of graph.symbols) {
|
|
22
|
+
this.byId.set(s.id, s);
|
|
23
|
+
const key = s.name.toLowerCase();
|
|
24
|
+
const list = this.byNameLower.get(key);
|
|
25
|
+
if (list)
|
|
26
|
+
list.push(s);
|
|
27
|
+
else
|
|
28
|
+
this.byNameLower.set(key, [s]);
|
|
29
|
+
}
|
|
30
|
+
for (const e of graph.edges) {
|
|
31
|
+
// callers index — only resolved edges have a toId
|
|
32
|
+
if (e.toId) {
|
|
33
|
+
const list = this.callersOf.get(e.toId);
|
|
34
|
+
if (list)
|
|
35
|
+
list.push(e);
|
|
36
|
+
else
|
|
37
|
+
this.callersOf.set(e.toId, [e]);
|
|
38
|
+
}
|
|
39
|
+
// callees index — every edge has a fromId
|
|
40
|
+
const list = this.calleesOf.get(e.fromId);
|
|
41
|
+
if (list)
|
|
42
|
+
list.push(e);
|
|
43
|
+
else
|
|
44
|
+
this.calleesOf.set(e.fromId, [e]);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Find symbols by name. Default behaviour is exact case-insensitive match;
|
|
49
|
+
* pass `substring: true` to enable substring search.
|
|
50
|
+
*
|
|
51
|
+
* Returns ranked roughly by "best first" — exact case match before
|
|
52
|
+
* case-folded matches.
|
|
53
|
+
*/
|
|
54
|
+
findByName(query, opts = {}) {
|
|
55
|
+
const limit = Math.min(opts.limit ?? 10, HARD_RESULT_CAP);
|
|
56
|
+
if (!query)
|
|
57
|
+
return [];
|
|
58
|
+
if (opts.substring) {
|
|
59
|
+
const q = query.toLowerCase();
|
|
60
|
+
const results = [];
|
|
61
|
+
for (const [name, syms] of this.byNameLower) {
|
|
62
|
+
if (!name.includes(q))
|
|
63
|
+
continue;
|
|
64
|
+
for (const s of syms) {
|
|
65
|
+
results.push(s);
|
|
66
|
+
if (results.length >= limit)
|
|
67
|
+
return results;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return results;
|
|
71
|
+
}
|
|
72
|
+
// Exact case-insensitive — fast path through the map.
|
|
73
|
+
const candidates = this.byNameLower.get(query.toLowerCase()) ?? [];
|
|
74
|
+
if (candidates.length === 0)
|
|
75
|
+
return [];
|
|
76
|
+
// Prefer exact case match first, then the rest.
|
|
77
|
+
const exact = candidates.filter((s) => s.name === query);
|
|
78
|
+
const rest = candidates.filter((s) => s.name !== query);
|
|
79
|
+
return [...exact, ...rest].slice(0, limit);
|
|
80
|
+
}
|
|
81
|
+
/** Look up a symbol by its id. Returns null if not found. */
|
|
82
|
+
findById(id) {
|
|
83
|
+
return this.byId.get(id) ?? null;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Resolve a name (or id) to a unique symbol. Used by tools that take a
|
|
87
|
+
* "symbol" argument — accepts either a bare name or a full id.
|
|
88
|
+
*
|
|
89
|
+
* Ambiguity policy: if more than one symbol matches the name, returns the
|
|
90
|
+
* first one (same heuristic as the resolver). Caller can disambiguate by
|
|
91
|
+
* passing the full id.
|
|
92
|
+
*/
|
|
93
|
+
resolveSymbol(nameOrId) {
|
|
94
|
+
if (nameOrId.includes('#') && nameOrId.includes('@')) {
|
|
95
|
+
return this.findById(nameOrId);
|
|
96
|
+
}
|
|
97
|
+
const matches = this.findByName(nameOrId);
|
|
98
|
+
return matches[0] ?? null;
|
|
99
|
+
}
|
|
100
|
+
/** Edges where this symbol is the target — "who calls X?". */
|
|
101
|
+
callers(symbolId, opts = {}) {
|
|
102
|
+
const limit = Math.min(opts.limit ?? 50, HARD_EDGE_CAP);
|
|
103
|
+
return (this.callersOf.get(symbolId) ?? []).slice(0, limit);
|
|
104
|
+
}
|
|
105
|
+
/** Edges where this symbol is the source — "what does X call?". */
|
|
106
|
+
callees(symbolId, opts = {}) {
|
|
107
|
+
const limit = Math.min(opts.limit ?? 50, HARD_EDGE_CAP);
|
|
108
|
+
const all = this.calleesOf.get(symbolId) ?? [];
|
|
109
|
+
if (opts.includeUnresolved === false) {
|
|
110
|
+
return all.filter((e) => e.toId !== null).slice(0, limit);
|
|
111
|
+
}
|
|
112
|
+
return all.slice(0, limit);
|
|
113
|
+
}
|
|
114
|
+
/** Convenience: how many symbols / edges are indexed. */
|
|
115
|
+
get stats() {
|
|
116
|
+
let resolved = 0;
|
|
117
|
+
for (const e of this.graph.edges)
|
|
118
|
+
if (e.toId)
|
|
119
|
+
resolved++;
|
|
120
|
+
return {
|
|
121
|
+
symbols: this.graph.symbols.length,
|
|
122
|
+
edges: this.graph.edges.length,
|
|
123
|
+
resolvedEdges: resolved,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=query.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"query.js","sourceRoot":"","sources":["../src/query.ts"],"names":[],"mappings":"AAwBA,MAAM,eAAe,GAAG,GAAG,CAAC;AAC5B,MAAM,aAAa,GAAG,GAAG,CAAC;AAE1B;;;;;;;GAOG;AACH,MAAM,OAAO,UAAU;IAQO;IAPX,WAAW,GAAgC,IAAI,GAAG,EAAE,CAAC;IACrD,IAAI,GAA8B,IAAI,GAAG,EAAE,CAAC;IAC7D,yDAAyD;IACxC,SAAS,GAA4B,IAAI,GAAG,EAAE,CAAC;IAChE,8DAA8D;IAC7C,SAAS,GAA4B,IAAI,GAAG,EAAE,CAAC;IAEhE,YAA4B,KAAY;QAAZ,UAAK,GAAL,KAAK,CAAO;QACtC,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAC9B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;YACvB,MAAM,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACjC,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACvC,IAAI,IAAI;gBAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;gBAClB,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACtC,CAAC;QAED,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;YAC5B,kDAAkD;YAClD,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;gBACX,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBACxC,IAAI,IAAI;oBAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;oBAClB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YACvC,CAAC;YACD,0CAA0C;YAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YAC1C,IAAI,IAAI;gBAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;gBAClB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACzC,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACH,UAAU,CAAC,KAAa,EAAE,OAAsB,EAAE;QAChD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,eAAe,CAAC,CAAC;QAC1D,IAAI,CAAC,KAAK;YAAE,OAAO,EAAE,CAAC;QAEtB,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,CAAC,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;YAC9B,MAAM,OAAO,GAAmB,EAAE,CAAC;YACnC,KAAK,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;gBAC5C,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;oBAAE,SAAS;gBAChC,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;oBACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBAChB,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK;wBAAE,OAAO,OAAO,CAAC;gBAC9C,CAAC;YACH,CAAC;YACD,OAAO,OAAO,CAAC;QACjB,CAAC;QAED,sDAAsD;QACtD,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QACnE,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAEvC,gDAAgD;QAChD,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC;QACzD,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC;QACxD,OAAO,CAAC,GAAG,KAAK,EAAE,GAAG,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAC7C,CAAC;IAED,6DAA6D;IAC7D,QAAQ,CAAC,EAAU;QACjB,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC;IACnC,CAAC;IAED;;;;;;;OAOG;IACH,aAAa,CAAC,QAAgB;QAC5B,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACrD,OAAO,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QACjC,CAAC;QACD,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAC1C,OAAO,OAAO,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IAC5B,CAAC;IAED,8DAA8D;IAC9D,OAAO,CAAC,QAAgB,EAAE,OAAyB,EAAE;QACnD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,aAAa,CAAC,CAAC;QACxD,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAC9D,CAAC;IAED,mEAAmE;IACnE,OAAO,CAAC,QAAgB,EAAE,OAAyB,EAAE;QACnD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,aAAa,CAAC,CAAC;QACxD,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,iBAAiB,KAAK,KAAK,EAAE,CAAC;YACrC,OAAO,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;QAC5D,CAAC;QACD,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAC7B,CAAC;IAED,yDAAyD;IACzD,IAAI,KAAK;QACP,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,KAAK;YAAE,IAAI,CAAC,CAAC,IAAI;gBAAE,QAAQ,EAAE,CAAC;QACzD,OAAO;YACL,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM;YAClC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM;YAC9B,aAAa,EAAE,QAAQ;SACxB,CAAC;IACJ,CAAC;CACF"}
|
package/dist/redact.d.ts
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Secret-pattern redaction for symbol signatures before they're stored.
|
|
3
|
+
*
|
|
4
|
+
* Code occasionally contains literal secrets — `const API_KEY = "sk-..."`,
|
|
5
|
+
* embedded JWTs, AWS access keys, GitHub tokens. When we extract a symbol's
|
|
6
|
+
* signature line into graph.json, those literals come with it. We redact
|
|
7
|
+
* them here so:
|
|
8
|
+
* 1. The on-disk graph.json doesn't carry plaintext secrets
|
|
9
|
+
* 2. The MCP tool output sent to the agent doesn't expose them either
|
|
10
|
+
*
|
|
11
|
+
* This is NOT a full secret scanner — it covers well-known fixed-prefix
|
|
12
|
+
* formats. Determined leakage (custom-format secrets) will still escape,
|
|
13
|
+
* but the common ones are covered.
|
|
14
|
+
*
|
|
15
|
+
* Pattern coverage and replacement tokens are intentionally short so a
|
|
16
|
+
* signature stays readable after redaction.
|
|
17
|
+
*/
|
|
18
|
+
/**
|
|
19
|
+
* Run every secret pattern over the input string. Returns the redacted text.
|
|
20
|
+
* Safe for any input length: regex operations are linear in input size.
|
|
21
|
+
*
|
|
22
|
+
* Order matters — more specific patterns run first so a JWT doesn't get
|
|
23
|
+
* eaten by the generic long-token catch-all.
|
|
24
|
+
*/
|
|
25
|
+
export declare function redactSecrets(text: string): string;
|
|
26
|
+
/**
|
|
27
|
+
* Test helper / introspection: which patterns matched in this input?
|
|
28
|
+
* Useful for diagnostics; not on a hot path.
|
|
29
|
+
*/
|
|
30
|
+
export declare function detectSecrets(text: string): string[];
|
package/dist/redact.js
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Secret-pattern redaction for symbol signatures before they're stored.
|
|
3
|
+
*
|
|
4
|
+
* Code occasionally contains literal secrets — `const API_KEY = "sk-..."`,
|
|
5
|
+
* embedded JWTs, AWS access keys, GitHub tokens. When we extract a symbol's
|
|
6
|
+
* signature line into graph.json, those literals come with it. We redact
|
|
7
|
+
* them here so:
|
|
8
|
+
* 1. The on-disk graph.json doesn't carry plaintext secrets
|
|
9
|
+
* 2. The MCP tool output sent to the agent doesn't expose them either
|
|
10
|
+
*
|
|
11
|
+
* This is NOT a full secret scanner — it covers well-known fixed-prefix
|
|
12
|
+
* formats. Determined leakage (custom-format secrets) will still escape,
|
|
13
|
+
* but the common ones are covered.
|
|
14
|
+
*
|
|
15
|
+
* Pattern coverage and replacement tokens are intentionally short so a
|
|
16
|
+
* signature stays readable after redaction.
|
|
17
|
+
*/
|
|
18
|
+
const SECRET_PATTERNS = [
|
|
19
|
+
// PEM private-key headers — match the BEGIN line before generic long-token
|
|
20
|
+
// catches it, so we get the precise label.
|
|
21
|
+
{
|
|
22
|
+
pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----/g,
|
|
23
|
+
replacement: '-----BEGIN ***REDACTED*** PRIVATE KEY-----',
|
|
24
|
+
label: 'pem-private-key',
|
|
25
|
+
},
|
|
26
|
+
// JSON Web Tokens: three base64url segments separated by dots.
|
|
27
|
+
{
|
|
28
|
+
pattern: /eyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}/g,
|
|
29
|
+
replacement: '***JWT-REDACTED***',
|
|
30
|
+
label: 'jwt',
|
|
31
|
+
},
|
|
32
|
+
// OpenAI / Anthropic style API keys: `sk-` then 20+ alphanumerics.
|
|
33
|
+
// Also covers `sk-ant-...`, `sk-proj-...` etc.
|
|
34
|
+
{
|
|
35
|
+
pattern: /\bsk-[A-Za-z0-9_-]{20,}\b/g,
|
|
36
|
+
replacement: 'sk-***REDACTED***',
|
|
37
|
+
label: 'sk-token',
|
|
38
|
+
},
|
|
39
|
+
// GitHub personal access tokens.
|
|
40
|
+
{ pattern: /\bghp_[A-Za-z0-9]{30,}\b/g, replacement: 'ghp_***REDACTED***', label: 'github-pat' },
|
|
41
|
+
// GitHub server tokens.
|
|
42
|
+
{
|
|
43
|
+
pattern: /\bghs_[A-Za-z0-9]{30,}\b/g,
|
|
44
|
+
replacement: 'ghs_***REDACTED***',
|
|
45
|
+
label: 'github-server',
|
|
46
|
+
},
|
|
47
|
+
// GitHub user / OAuth tokens.
|
|
48
|
+
{
|
|
49
|
+
pattern: /\bgho_[A-Za-z0-9]{30,}\b/g,
|
|
50
|
+
replacement: 'gho_***REDACTED***',
|
|
51
|
+
label: 'github-oauth',
|
|
52
|
+
},
|
|
53
|
+
// GitHub refresh tokens.
|
|
54
|
+
{
|
|
55
|
+
pattern: /\bghr_[A-Za-z0-9]{30,}\b/g,
|
|
56
|
+
replacement: 'ghr_***REDACTED***',
|
|
57
|
+
label: 'github-refresh',
|
|
58
|
+
},
|
|
59
|
+
// AWS access key IDs.
|
|
60
|
+
{ pattern: /\bAKIA[0-9A-Z]{16}\b/g, replacement: 'AKIA***REDACTED***', label: 'aws-akia' },
|
|
61
|
+
// AWS short-term session tokens (less specific, but the leading prefix is unique).
|
|
62
|
+
{ pattern: /\bASIA[0-9A-Z]{16}\b/g, replacement: 'ASIA***REDACTED***', label: 'aws-asia' },
|
|
63
|
+
// Slack tokens.
|
|
64
|
+
{
|
|
65
|
+
pattern: /\bxox[abprs]-[A-Za-z0-9-]{10,}\b/g,
|
|
66
|
+
replacement: 'xox*-***REDACTED***',
|
|
67
|
+
label: 'slack',
|
|
68
|
+
},
|
|
69
|
+
// Stripe live secret keys.
|
|
70
|
+
{
|
|
71
|
+
pattern: /\bsk_live_[A-Za-z0-9]{20,}\b/g,
|
|
72
|
+
replacement: 'sk_live_***REDACTED***',
|
|
73
|
+
label: 'stripe-live',
|
|
74
|
+
},
|
|
75
|
+
// Generic long high-entropy token inside a string literal.
|
|
76
|
+
// Heuristic: 32+ chars of alphanumeric / underscore / hyphen / equals,
|
|
77
|
+
// immediately surrounded by matching quotes. Keeps false positives down
|
|
78
|
+
// because random function names rarely live inside quotes.
|
|
79
|
+
{
|
|
80
|
+
pattern: /(["'])([A-Za-z0-9_+/=-]{40,})\1/g,
|
|
81
|
+
replacement: '$1***REDACTED-LONG-TOKEN***$1',
|
|
82
|
+
label: 'long-token-in-string',
|
|
83
|
+
},
|
|
84
|
+
];
|
|
85
|
+
/**
|
|
86
|
+
* Run every secret pattern over the input string. Returns the redacted text.
|
|
87
|
+
* Safe for any input length: regex operations are linear in input size.
|
|
88
|
+
*
|
|
89
|
+
* Order matters — more specific patterns run first so a JWT doesn't get
|
|
90
|
+
* eaten by the generic long-token catch-all.
|
|
91
|
+
*/
|
|
92
|
+
export function redactSecrets(text) {
|
|
93
|
+
if (!text)
|
|
94
|
+
return text;
|
|
95
|
+
let out = text;
|
|
96
|
+
for (const { pattern, replacement } of SECRET_PATTERNS) {
|
|
97
|
+
out = out.replace(pattern, replacement);
|
|
98
|
+
}
|
|
99
|
+
return out;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Test helper / introspection: which patterns matched in this input?
|
|
103
|
+
* Useful for diagnostics; not on a hot path.
|
|
104
|
+
*/
|
|
105
|
+
export function detectSecrets(text) {
|
|
106
|
+
if (!text)
|
|
107
|
+
return [];
|
|
108
|
+
const hits = [];
|
|
109
|
+
for (const { pattern, label } of SECRET_PATTERNS) {
|
|
110
|
+
// Build a fresh regex from the source to avoid lastIndex state on /g.
|
|
111
|
+
const fresh = new RegExp(pattern.source, pattern.flags);
|
|
112
|
+
if (fresh.test(text))
|
|
113
|
+
hits.push(label);
|
|
114
|
+
}
|
|
115
|
+
return hits;
|
|
116
|
+
}
|
|
117
|
+
//# sourceMappingURL=redact.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"redact.js","sourceRoot":"","sources":["../src/redact.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAQH,MAAM,eAAe,GAA6B;IAChD,2EAA2E;IAC3E,2CAA2C;IAC3C;QACE,OAAO,EAAE,qCAAqC;QAC9C,WAAW,EAAE,4CAA4C;QACzD,KAAK,EAAE,iBAAiB;KACzB;IACD,+DAA+D;IAC/D;QACE,OAAO,EAAE,6DAA6D;QACtE,WAAW,EAAE,oBAAoB;QACjC,KAAK,EAAE,KAAK;KACb;IACD,mEAAmE;IACnE,+CAA+C;IAC/C;QACE,OAAO,EAAE,4BAA4B;QACrC,WAAW,EAAE,mBAAmB;QAChC,KAAK,EAAE,UAAU;KAClB;IACD,iCAAiC;IACjC,EAAE,OAAO,EAAE,2BAA2B,EAAE,WAAW,EAAE,oBAAoB,EAAE,KAAK,EAAE,YAAY,EAAE;IAChG,wBAAwB;IACxB;QACE,OAAO,EAAE,2BAA2B;QACpC,WAAW,EAAE,oBAAoB;QACjC,KAAK,EAAE,eAAe;KACvB;IACD,8BAA8B;IAC9B;QACE,OAAO,EAAE,2BAA2B;QACpC,WAAW,EAAE,oBAAoB;QACjC,KAAK,EAAE,cAAc;KACtB;IACD,yBAAyB;IACzB;QACE,OAAO,EAAE,2BAA2B;QACpC,WAAW,EAAE,oBAAoB;QACjC,KAAK,EAAE,gBAAgB;KACxB;IACD,sBAAsB;IACtB,EAAE,OAAO,EAAE,uBAAuB,EAAE,WAAW,EAAE,oBAAoB,EAAE,KAAK,EAAE,UAAU,EAAE;IAC1F,mFAAmF;IACnF,EAAE,OAAO,EAAE,uBAAuB,EAAE,WAAW,EAAE,oBAAoB,EAAE,KAAK,EAAE,UAAU,EAAE;IAC1F,gBAAgB;IAChB;QACE,OAAO,EAAE,mCAAmC;QAC5C,WAAW,EAAE,qBAAqB;QAClC,KAAK,EAAE,OAAO;KACf;IACD,2BAA2B;IAC3B;QACE,OAAO,EAAE,+BAA+B;QACxC,WAAW,EAAE,wBAAwB;QACrC,KAAK,EAAE,aAAa;KACrB;IACD,2DAA2D;IAC3D,uEAAuE;IACvE,wEAAwE;IACxE,2DAA2D;IAC3D;QACE,OAAO,EAAE,kCAAkC;QAC3C,WAAW,EAAE,+BAA+B;QAC5C,KAAK,EAAE,sBAAsB;KAC9B;CACF,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,IAAI,GAAG,GAAG,IAAI,CAAC;IACf,KAAK,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,IAAI,eAAe,EAAE,CAAC;QACvD,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IAC1C,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,KAAK,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,eAAe,EAAE,CAAC;QACjD,sEAAsE;QACtE,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;QACxD,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC"}
|