@sdsrs/code-graph 0.70.0 → 0.71.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -48,7 +48,15 @@ const { runGrepAnswer, runShowAnswer, sanitizeSearchPath } = require('./cg-answe
|
|
|
48
48
|
// v0.48: also match bare `KEY=VALUE grep` prefixes (no `env` verb) — the shape
|
|
49
49
|
// the deny message itself teaches (`CODE_GRAPH_NO_BLOCK_GREP=1 grep …`). With
|
|
50
50
|
// the old `env`-only form those commands failed gate 1 and were invisible.
|
|
51
|
-
|
|
51
|
+
// v0.71: `git grep` shares the verb set — its head is `git`, so it leaked past
|
|
52
|
+
// the matcher until folded in here. cg grep is a SUPERSET (covers tracked AND
|
|
53
|
+
// gitignored files), so routing `git grep` to it is sound. GREP_VERB is the
|
|
54
|
+
// single source of truth for every parse site that recognizes the search verb.
|
|
55
|
+
const GREP_VERB = 'git\\s+grep|grep|rg|ag';
|
|
56
|
+
const GREP_HEAD = new RegExp(`^\\s*(?:env\\s+)?(?:[A-Za-z_][A-Za-z0-9_]*=\\S*\\s+)*(${GREP_VERB})\\b`);
|
|
57
|
+
// Verb + prefix strip (kept in sync with GREP_HEAD via GREP_VERB; non-capturing).
|
|
58
|
+
// Shared by extractPatterns and countNamedPaths so the verb is removed identically.
|
|
59
|
+
const VERB_STRIP = new RegExp(`^\\s*(?:env\\s+)?(?:[A-Za-z_][A-Za-z0-9_]*=\\S*\\s+)*(?:${GREP_VERB})\\s+`);
|
|
52
60
|
// Source-tree prefix list. Expanded v0.27+ Phase C: original `src/tests/lib/...`
|
|
53
61
|
// missed real-world backend conventions where the prefix list term is preceded
|
|
54
62
|
// by something else (`backend/app/...` — `app/` doesn't match because `/` isn't
|
|
@@ -63,7 +71,7 @@ const SRC_PREFIXES =
|
|
|
63
71
|
const SRC_PATH = new RegExp(`(?:^|\\s|["'])(${SRC_PREFIXES})/`);
|
|
64
72
|
// Anchored variant for whole-token matching in extractSearchPath.
|
|
65
73
|
const SRC_PATH_TOKEN = new RegExp(`^(?:\\./)?(${SRC_PREFIXES})/`);
|
|
66
|
-
const PIPE_INTO_GREP =
|
|
74
|
+
const PIPE_INTO_GREP = new RegExp(`\\|\\s*(?:${GREP_VERB})\\b`);
|
|
67
75
|
const CG_INVOKED = /\bcode-graph-mcp\b/;
|
|
68
76
|
// File argument(s) that end in a config/lockfile/data extension. If, after removing
|
|
69
77
|
// ALL of them, no source-tree path remains, the grep is searching config/data not code.
|
|
@@ -80,12 +88,41 @@ const CONFIG_TARGET_ONLY = new RegExp(`(?:^|\\s)[^\\s|<>]*\\.(?:${NON_SOURCE_EXT
|
|
|
80
88
|
// data-file tokens both match; global so every one is peeled before the SRC_PATH re-check.
|
|
81
89
|
const CONFIG_TARGET_STRIP = new RegExp(`(?:^|\\s)[^\\s|<>]*\\.(?:${NON_SOURCE_EXTS})(?=\\s|$)`, 'gi');
|
|
82
90
|
|
|
91
|
+
// v0.71 — `git grep --cached`/`--staged` searches the STAGED index, and a treeish
|
|
92
|
+
// ref (`git grep "X" HEAD~3 -- src/`, `git grep "X" main -- src/`) searches another
|
|
93
|
+
// commit/branch — a scope the working-tree inline answer (`code-graph-mcp grep`)
|
|
94
|
+
// CANNOT honor. Folding them would substitute current-tree hits for a different
|
|
95
|
+
// revision with no signal. These are NOT the working-tree source searches this hook
|
|
96
|
+
// folds, so it stays out entirely (no hint, no deny) and the real git grep runs.
|
|
97
|
+
// (`--no-index` is working-tree scope → cg covers it → NOT excluded; plain grep/rg/ag
|
|
98
|
+
// have no revision concept.) A bare treeish without `--` (`git grep X main src/`) is
|
|
99
|
+
// genuinely ambiguous with a pathspec → left as the residual minority.
|
|
100
|
+
const GIT_GREP_HEAD = /^\s*(?:env\s+)?(?:[A-Za-z_][A-Za-z0-9_]*=\S*\s+)*git\s+grep\b/;
|
|
101
|
+
const GIT_GREP_STAGED = /(?:^|\s)--(?:cached|staged)(?:\s|$)/;
|
|
102
|
+
|
|
103
|
+
function isRevisionScopedGitGrep(cmd) {
|
|
104
|
+
if (typeof cmd !== 'string' || !GIT_GREP_HEAD.test(cmd)) return false;
|
|
105
|
+
if (GIT_GREP_STAGED.test(cmd)) return true;
|
|
106
|
+
// treeish before the `--` pathspec separator: git grep [flags] PATTERN <ref>... -- <path>
|
|
107
|
+
const sep = cmd.indexOf(' -- ');
|
|
108
|
+
if (sep === -1) return false;
|
|
109
|
+
const afterVerb = cmd.slice(0, sep).replace(GIT_GREP_HEAD, '').trimStart();
|
|
110
|
+
let seenPattern = false;
|
|
111
|
+
for (const tok of afterVerb.split(/\s+/)) {
|
|
112
|
+
if (!tok || tok.startsWith('-')) continue; // a flag
|
|
113
|
+
if (!seenPattern) { seenPattern = true; continue; } // the search pattern
|
|
114
|
+
return true; // a 2nd non-flag token before `--` = treeish
|
|
115
|
+
}
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
|
|
83
119
|
function shouldHint(cmd) {
|
|
84
120
|
if (!cmd || typeof cmd !== 'string') return false;
|
|
85
121
|
if (cmd.length > 1000) return false; // sanity — oversize commands are noise
|
|
86
122
|
if (CG_INVOKED.test(cmd)) return false; // already using cg
|
|
87
123
|
if (PIPE_INTO_GREP.test(cmd)) return false; // `cargo test | grep FAILED` is output filter
|
|
88
124
|
if (!GREP_HEAD.test(cmd)) return false; // not a search command
|
|
125
|
+
if (isRevisionScopedGitGrep(cmd)) return false; // v0.71 — git grep --cached/treeish: scope cg can't honor
|
|
89
126
|
if (!SRC_PATH.test(cmd)) return false; // not against indexed source tree
|
|
90
127
|
// If a config file appears AND no source path remains after stripping it, skip.
|
|
91
128
|
if (CONFIG_TARGET_ONLY.test(cmd)) {
|
|
@@ -128,7 +165,7 @@ const MARKER_ONLY =
|
|
|
128
165
|
function extractPatterns(cmd) {
|
|
129
166
|
if (!cmd || typeof cmd !== 'string') return [];
|
|
130
167
|
// Strip leading verb + env/assignment prefix (kept in sync with GREP_HEAD)
|
|
131
|
-
const stripped = cmd.replace(
|
|
168
|
+
const stripped = cmd.replace(VERB_STRIP, '');
|
|
132
169
|
// Collect every quoted argument — first one is the pattern in standard grep
|
|
133
170
|
// usage; subsequent ones (e.g. `-e "second"`) are also patterns or filter
|
|
134
171
|
// expressions and worth screening too.
|
|
@@ -322,7 +359,7 @@ function countNamedPaths(cmd, patterns) {
|
|
|
322
359
|
// Only the grep's OWN path args count. Stop at the first top-level command separator so a
|
|
323
360
|
// path in a compound tail (`grep X src/a.py | sed … src/b.py`) is NOT mistaken for a second
|
|
324
361
|
// grep target — that would wrongly downgrade a complete single-file grep to a hint.
|
|
325
|
-
let seg = cmd.replace(
|
|
362
|
+
let seg = cmd.replace(VERB_STRIP, '');
|
|
326
363
|
let quote = null;
|
|
327
364
|
for (let i = 0; i < seg.length; i++) {
|
|
328
365
|
const c = seg[i];
|
|
@@ -450,7 +487,8 @@ function buildShowDenyReason(answer, unansweredTail) {
|
|
|
450
487
|
function translateBreToRg(cmd, pattern) {
|
|
451
488
|
if (typeof pattern !== 'string' || !pattern) return pattern;
|
|
452
489
|
const verb = (cmd.match(GREP_HEAD) || [])[1];
|
|
453
|
-
|
|
490
|
+
// git grep speaks BRE like plain grep; rg/ag are already extended-regex.
|
|
491
|
+
if (!verb || !/grep$/.test(verb)) return pattern;
|
|
454
492
|
if (/(?:^|\s)-[a-zA-Z]*[EP][a-zA-Z]*(?:\s|=|\d|$)|--(?:extended-regexp|perl-regexp)\b/.test(cmd)) {
|
|
455
493
|
return pattern;
|
|
456
494
|
}
|
|
@@ -645,6 +683,7 @@ module.exports = {
|
|
|
645
683
|
extractUnansweredTail, // v0.50 — compound-tail honesty in answered denies
|
|
646
684
|
extractPatterns, // v0.32.1 — exposed for tests
|
|
647
685
|
countNamedPaths, // v0.70 — multi-path deny→hint downgrade
|
|
686
|
+
isRevisionScopedGitGrep, // v0.71 — git grep --cached/treeish exclusion
|
|
648
687
|
extractSearchPath, // v0.47.0 — deny-with-answer
|
|
649
688
|
normalizeCommandPaths, // v0.47.1 — abs-path matcher fix
|
|
650
689
|
resolveProjectRoot, // v0.48 — subdir-cwd dark fix
|
|
@@ -63,6 +63,71 @@ test('shouldHint: env-prefixed grep on src/', () => {
|
|
|
63
63
|
assert.equal(shouldHint('env LANG=C grep -rn "Foo" src/'), true);
|
|
64
64
|
});
|
|
65
65
|
|
|
66
|
+
// ── git grep coverage (v0.71): `git grep` is raw BRE search on the tracked
|
|
67
|
+
// source tree — same foldable intent as `grep`, but its command HEAD is
|
|
68
|
+
// `git`, so it leaked past GREP_HEAD until v0.71. cg grep is a superset
|
|
69
|
+
// (tracked AND gitignored), so folding `git grep` into it is sound. The verb
|
|
70
|
+
// set is shared across GREP_HEAD / VERB_STRIP / PIPE_INTO_GREP — these lock
|
|
71
|
+
// each parse site that touches the verb.
|
|
72
|
+
|
|
73
|
+
test('git grep: shouldHint fires on `git grep` against src/', () => {
|
|
74
|
+
assert.equal(shouldHint('git grep -n "fts5_search" src/storage/'), true);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test('git grep: shouldHint fires with the `--` pathspec separator', () => {
|
|
78
|
+
assert.equal(shouldHint('git grep "FooBar" -- src/lib.rs'), true);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
test('git grep: identifier search is a deny (block tier, same as grep)', () => {
|
|
82
|
+
assert.equal(shouldBlock('git grep "FooBar" src/'), true);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
test('git grep: context flag + decl anchor → show mode', () => {
|
|
86
|
+
assert.deepEqual(
|
|
87
|
+
classifyBlock('git grep "fn handle_message" -A 5 src/'),
|
|
88
|
+
{ mode: 'show', symbols: ['handle_message'] });
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test('git grep: multi-file named search downgrades to hint (v0.70 parity)', () => {
|
|
92
|
+
// inline answer scopes to ONE path; ≥2 named files → hint so the full grep runs.
|
|
93
|
+
assert.equal(classifyBlock('git grep "FooBar" src/a.rs src/b.rs'), null);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test('git grep: BRE alternation is translated to rust-regex dialect', () => {
|
|
97
|
+
// git grep speaks BRE like plain grep → an escaped \| must unescape for cg grep.
|
|
98
|
+
assert.equal(translateBreToRg('git grep "a\\|b" src/', 'a\\|b'), 'a|b');
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test('git grep: `| git grep` is an output-filter pipe (no fire)', () => {
|
|
102
|
+
assert.equal(shouldHint('grep -rn "Foo" src/ | git grep "Bar"'), false);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
test('git grep: rebaseRelativePaths rebases the real subdir path, not the `grep` word', () => {
|
|
106
|
+
// shell sits in backend/; `app` is subdir-relative → rebased. `grep` is the
|
|
107
|
+
// git subcommand and is existence-gated so it never masquerades as a path.
|
|
108
|
+
const exists = (p) => p.endsWith('/root/backend/app');
|
|
109
|
+
const out = rebaseRelativePaths('git grep "Foo" app', 'backend', '/root', exists);
|
|
110
|
+
assert.match(out, /git grep "Foo" backend\/app/);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
// v0.71 — git grep at a scope the working-tree cg answer can't honor (staged
|
|
114
|
+
// index / another revision) must NOT deny: folding it would substitute
|
|
115
|
+
// current-tree hits for a different revision. The hook stays out entirely.
|
|
116
|
+
test('git grep: --cached (staged index) is not denied — cg cannot honor that scope', () => {
|
|
117
|
+
assert.equal(shouldHint('git grep --cached "FooBar" src/'), false);
|
|
118
|
+
assert.equal(shouldBlock('git grep --cached "FooBar" src/'), false);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
test('git grep: a treeish ref before `--` (another revision) is not denied', () => {
|
|
122
|
+
assert.equal(shouldHint('git grep "FooBar" HEAD~3 -- src/'), false);
|
|
123
|
+
assert.equal(shouldBlock('git grep "cascade_failure" main -- src/'), false);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test('git grep: a bare `-- path` (no ref, working-tree scope) STILL denies', () => {
|
|
127
|
+
// guard: the revision-scope exclusion must not over-catch a plain pathspec sep.
|
|
128
|
+
assert.equal(shouldBlock('git grep "FooBar" -- src/lib.rs'), true);
|
|
129
|
+
});
|
|
130
|
+
|
|
66
131
|
// ── Should NOT fire: pipe-grep (output filter, not search) ──────────
|
|
67
132
|
|
|
68
133
|
test('shouldHint: pipe-grep on cargo test output', () => {
|
|
@@ -985,6 +1050,22 @@ test('e2e: denied grep with stub hits → deny JSON embeds the answer + records
|
|
|
985
1050
|
}
|
|
986
1051
|
});
|
|
987
1052
|
|
|
1053
|
+
test('e2e: `git grep` identifier on src/ → deny with the embedded answer', () => {
|
|
1054
|
+
const uniq = `GitHit${Date.now()}`;
|
|
1055
|
+
const fixture = e2eFixture(
|
|
1056
|
+
`process.stdout.write('src/foo.rs:9 fn ' + process.argv[3] + '()\\n');`);
|
|
1057
|
+
const cmd = `git grep -n "${uniq}" src/`;
|
|
1058
|
+
try {
|
|
1059
|
+
const res = runHook(cmd, fixture);
|
|
1060
|
+
assert.equal(res.status, 0);
|
|
1061
|
+
const out = JSON.parse(res.stdout);
|
|
1062
|
+
assert.equal(out.hookSpecificOutput.permissionDecision, 'deny');
|
|
1063
|
+
assert.match(out.hookSpecificOutput.permissionDecisionReason, new RegExp(uniq));
|
|
1064
|
+
} finally {
|
|
1065
|
+
cleanupFixture(fixture, cmd);
|
|
1066
|
+
}
|
|
1067
|
+
});
|
|
1068
|
+
|
|
988
1069
|
test('e2e: denied grep records the denied pattern (fingerprint for verbatim re-grep detection)', () => {
|
|
989
1070
|
// The Rust funnel (aggregate_recommendations_jsonl) scores a follow-up search
|
|
990
1071
|
// carrying the SAME pattern as the armed answered deny as fall-through, not a
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sdsrs/code-graph",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.71.0",
|
|
4
4
|
"description": "MCP server that indexes codebases into an AST knowledge graph with semantic search, call graph traversal, and HTTP route tracing",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -35,10 +35,10 @@
|
|
|
35
35
|
"node": ">=16"
|
|
36
36
|
},
|
|
37
37
|
"optionalDependencies": {
|
|
38
|
-
"@sdsrs/code-graph-linux-x64": "0.
|
|
39
|
-
"@sdsrs/code-graph-linux-arm64": "0.
|
|
40
|
-
"@sdsrs/code-graph-darwin-x64": "0.
|
|
41
|
-
"@sdsrs/code-graph-darwin-arm64": "0.
|
|
42
|
-
"@sdsrs/code-graph-win32-x64": "0.
|
|
38
|
+
"@sdsrs/code-graph-linux-x64": "0.71.0",
|
|
39
|
+
"@sdsrs/code-graph-linux-arm64": "0.71.0",
|
|
40
|
+
"@sdsrs/code-graph-darwin-x64": "0.71.0",
|
|
41
|
+
"@sdsrs/code-graph-darwin-arm64": "0.71.0",
|
|
42
|
+
"@sdsrs/code-graph-win32-x64": "0.71.0"
|
|
43
43
|
}
|
|
44
44
|
}
|