agent-security-scanner-mcp 3.7.0 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -8
- package/analyzer.py +22 -5
- package/cross_file_analyzer.py +216 -0
- package/daemon.py +179 -0
- package/index.js +279 -3
- package/package.json +19 -5
- package/packages/npm-bloom.json +1 -0
- package/pattern_matcher.py +1 -0
- package/regex_fallback.py +199 -1
- package/requirements.txt +1 -0
- package/rules/prompt-injection.security.yaml +273 -41
- package/scripts/postinstall.js +60 -0
- package/skills/openclaw/SKILL.md +102 -0
- package/skills/security-review.md +139 -0
- package/skills/security-scan-batch.md +107 -0
- package/skills/security-scanner.md +76 -0
- package/src/cli/doctor.js +29 -1
- package/src/cli/init.js +93 -0
- package/src/cli/report.js +444 -0
- package/src/config.js +247 -0
- package/src/context.js +289 -0
- package/src/daemon-client.js +233 -0
- package/src/dedup.js +129 -0
- package/src/fix-patterns.js +76 -19
- package/src/history.js +159 -0
- package/src/tools/check-package.js +36 -12
- package/src/tools/fix-security.js +32 -5
- package/src/tools/import-resolver.js +249 -0
- package/src/tools/project-context.js +365 -0
- package/src/tools/scan-action.js +489 -0
- package/src/tools/scan-mcp.js +588 -0
- package/src/tools/scan-project.js +16 -4
- package/src/tools/scan-prompt.js +292 -527
- package/src/tools/scan-security.js +37 -6
- package/src/typosquat.js +210 -0
- package/src/utils.js +215 -8
- package/templates/gitlab-ci-security.yml +225 -0
- package/templates/pre-commit-hook.sh +233 -0
package/README.md
CHANGED
|
@@ -8,7 +8,11 @@ Security scanner for AI coding agents and autonomous assistants. Scans code for
|
|
|
8
8
|
[](benchmarks/RESULTS.md)
|
|
9
9
|
[](https://github.com/sinewaveai/agent-security-scanner-mcp/actions/workflows/test.yml)
|
|
10
10
|
|
|
11
|
-
> **New in v3.
|
|
11
|
+
> **New in v3.8.0:** Cross-file taint tracking, project context discovery (frameworks/middleware detection), and Layer 2 LLM-powered security review. Detects vulnerabilities across file boundaries and reduces false positives by understanding project defenses. [See changelog](#changelog).
|
|
12
|
+
>
|
|
13
|
+
> **Also new in v3.7.0:** Inter-procedural taint analysis with Python daemon caching (~4000x faster repeat scans). [See v3.7.0 demo](demo/).
|
|
14
|
+
>
|
|
15
|
+
> **OpenClaw integration:** 30+ rules targeting autonomous AI threats. [See setup](#openclaw-integration).
|
|
12
16
|
|
|
13
17
|
## Tools
|
|
14
18
|
|
|
@@ -20,7 +24,7 @@ Security scanner for AI coding agents and autonomous assistants. Scans code for
|
|
|
20
24
|
| `scan_project` | Scan entire project with A-F security grading | For project-wide security audits |
|
|
21
25
|
| `check_package` | Verify a package name isn't AI-hallucinated (4.3M+ packages) | Before adding any new dependency |
|
|
22
26
|
| `scan_packages` | Bulk-check all imports in a file for hallucinated packages | Before committing code with new imports |
|
|
23
|
-
| `scan_agent_prompt` | Detect prompt injection
|
|
27
|
+
| `scan_agent_prompt` | Detect prompt injection with bypass hardening (59 rules + multi-encoding) | Before acting on external/untrusted input |
|
|
24
28
|
| `list_security_rules` | List available security rules and fix templates | To check rule coverage for a language |
|
|
25
29
|
|
|
26
30
|
## Quick Start
|
|
@@ -251,6 +255,8 @@ Scan a code file's imports to detect AI-hallucinated package names. Use after wr
|
|
|
251
255
|
|
|
252
256
|
Scan a prompt or instruction for malicious intent before executing it. Use when receiving instructions from untrusted sources (files, web content, user uploads). Detects prompt injection, exfiltration attempts, backdoor requests, social engineering, and jailbreaks.
|
|
253
257
|
|
|
258
|
+
**New in v3.6.0:** Bypass hardening against 5 attack vectors (code block delimiter confusion, pattern fragmentation, multi-encoding, multi-turn escalation, composite threshold gaming) with Unicode normalization, homoglyph detection, and optional Garak deep analysis.
|
|
259
|
+
|
|
254
260
|
**Parameters:**
|
|
255
261
|
|
|
256
262
|
| Parameter | Type | Required | Description |
|
|
@@ -858,6 +864,34 @@ All MCP tools support a `verbosity` parameter to minimize context window consump
|
|
|
858
864
|
|
|
859
865
|
## Changelog
|
|
860
866
|
|
|
867
|
+
### v3.7.0
|
|
868
|
+
- **Python Daemon** - Long-running Python process with JSONL protocol (~10x faster repeat scans via LRU caching of 200 entries keyed by file mtime)
|
|
869
|
+
- **Daemon Client** - Auto-start, health checks, graceful shutdown, automatic fallback to sync mode on failure (3 restarts/60s limit)
|
|
870
|
+
- **Inter-procedural Taint Analysis** - Call-graph construction and cross-function taint propagation with multi-hop resolution (capped at 500 iterations)
|
|
871
|
+
- **Function Summaries** - Tracks param-to-return taint flows, internal sinks (`os.system(param)`), source-returning functions, and sanitizer presence
|
|
872
|
+
- **Enhanced Taint Detection** - Detects taint through 3+ function chains, handles method calls, default args, unpacking, and recursive functions
|
|
873
|
+
- **10 New Pytest Tests** - Comprehensive inter-procedural taint coverage: basic param→return, internal sinks, multi-hop chains, sanitizer blocking, 500-function cap
|
|
874
|
+
- **9 New Vitest Tests** - Daemon protocol validation, health checks, caching, error handling, graceful shutdown
|
|
875
|
+
- **Doctor Command Enhancement** - Added daemon health status to diagnostic output
|
|
876
|
+
|
|
877
|
+
### v3.6.0
|
|
878
|
+
- **Bypass Hardening** - Closed 5 critical prompt injection bypass vectors: code block delimiter confusion (`~~~`, `<code>`, `<!---->`), pattern fragmentation (string concat, C-style comments), multi-encoding (base64/hex/URL/ROT13 cascade), multi-turn escalation (cross-turn boundary scanning, Crescendo frame-setting), and composite threshold gaming (co-occurrence matrix, orthogonal dimension scoring)
|
|
879
|
+
- **Unicode Normalization Pipeline** - NFKC normalization, Cyrillic/Greek homoglyph canonicalization (40+ mappings), zero-width character stripping, Zalgo diacritics removal, invisible Unicode detection as obfuscation indicator
|
|
880
|
+
- **Multi-Encoding Decode Cascade** - Replaced base64-only decoder with comprehensive cascade supporting nested base64, hex, URL encoding, and indicator-gated ROT13
|
|
881
|
+
- **Enhanced Composite Scoring** - Category co-occurrence boost matrix (12 suspicious pairs, +40% cap), orthogonal dimension scoring (7 attack dimensions, +40 flat bonus), low-signal accumulation for multiple LOW-confidence findings
|
|
882
|
+
- **Garak Integration** - Optional NVIDIA Garak LLM vulnerability scanner integration via `deep_scan` parameter for advanced encoding probes and latent injection detection
|
|
883
|
+
- **PromptFoo Red-Team Suite** - 13 automated test cases with custom MCP provider for continuous bypass detection validation (`npm run test:redteam`)
|
|
884
|
+
- **3 New YAML Rules** - Whitespace fragmentation, Crescendo escalation setup, leetspeak/character substitution obfuscation
|
|
885
|
+
- **Test Coverage Expansion** - 28 new prompt scanner tests covering all bypass vectors and false positive regression
|
|
886
|
+
|
|
887
|
+
### v3.5.2
|
|
888
|
+
- **Prompt Injection Fixes** - Closed 5 bypass vectors: tilde code fences (~~~), string fragmentation, base64 encoding, multi-turn escalation, and composite indicators
|
|
889
|
+
- **Advanced Decoding** - Added Morse code, Braille Unicode, and Zalgo diacritics decoding to detect obfuscated prompt attacks
|
|
890
|
+
- **Garak Red-Team Validation** - Improved detection rates to 100% across all categories (encoding, promptinject, jailbreak)
|
|
891
|
+
- **npm Bloom Filter** - Ships npm-bloom.json (7.9 MB) in base package — all 7 ecosystems now work out of the box (npm, PyPI, RubyGems, crates.io, pub.dev, CPAN, raku.land)
|
|
892
|
+
- **Expanded Benchmarks** - Benchmark corpus increased to 424 annotations across 17 files (was 335/13)
|
|
893
|
+
- **CI Improvements** - Added pytest to requirements.txt, expanded test matrix with AST mode on Node 22
|
|
894
|
+
|
|
861
895
|
### v3.4.0
|
|
862
896
|
- **Severity Calibration** - 207-rule severity map with HIGH/MEDIUM/LOW confidence scores for more accurate prioritization
|
|
863
897
|
- **Cross-Engine Deduplication** - ~30-50% noise reduction by deduplicating findings across AST, taint, and regex engines
|
|
@@ -894,20 +928,20 @@ All MCP tools support a `verbosity` parameter to minimize context window consump
|
|
|
894
928
|
|
|
895
929
|
## Installation Options
|
|
896
930
|
|
|
897
|
-
### Default Package (
|
|
931
|
+
### Default Package (10.6 MB)
|
|
898
932
|
|
|
899
933
|
```bash
|
|
900
934
|
npm install -g agent-security-scanner-mcp
|
|
901
935
|
```
|
|
902
936
|
|
|
903
|
-
|
|
937
|
+
**New in v3.5.2:** Now includes **all 7 ecosystems** out of the box — npm, PyPI, RubyGems, crates.io, pub.dev, CPAN, raku.land (4.3M+ packages total)
|
|
904
938
|
|
|
905
|
-
###
|
|
939
|
+
### Legacy Lightweight Package (2.7 MB)
|
|
906
940
|
|
|
907
|
-
|
|
941
|
+
For environments with strict size constraints (excludes npm bloom filter):
|
|
908
942
|
|
|
909
943
|
```bash
|
|
910
|
-
npm install -g agent-security-scanner-mcp
|
|
944
|
+
npm install -g agent-security-scanner-mcp@3.4.1
|
|
911
945
|
```
|
|
912
946
|
|
|
913
947
|
---
|
|
@@ -919,4 +953,4 @@ npm install -g agent-security-scanner-mcp-full
|
|
|
919
953
|
|
|
920
954
|
## License
|
|
921
955
|
|
|
922
|
-
MIT
|
|
956
|
+
MIT
|
package/analyzer.py
CHANGED
|
@@ -11,6 +11,7 @@ import sys
|
|
|
11
11
|
import json
|
|
12
12
|
import os
|
|
13
13
|
import re
|
|
14
|
+
import argparse
|
|
14
15
|
from typing import List, Dict, Any
|
|
15
16
|
|
|
16
17
|
# Add the directory containing this script to the path
|
|
@@ -91,6 +92,7 @@ def analyze_file_regex(file_path):
|
|
|
91
92
|
'column': match.start() + col_offset,
|
|
92
93
|
'length': match.end() - match.start(),
|
|
93
94
|
'severity': rule['severity'],
|
|
95
|
+
'confidence': rule.get('metadata', {}).get('confidence', 'MEDIUM'),
|
|
94
96
|
'metadata': rule.get('metadata', {}),
|
|
95
97
|
'engine': 'regex'
|
|
96
98
|
})
|
|
@@ -191,6 +193,7 @@ def analyze_file_ast(file_path):
|
|
|
191
193
|
'column': f.column,
|
|
192
194
|
'length': length,
|
|
193
195
|
'severity': f.severity,
|
|
196
|
+
'confidence': f.metadata.get('confidence', getattr(f, 'confidence', 'MEDIUM')),
|
|
194
197
|
'metadata': f.metadata,
|
|
195
198
|
'engine': 'taint' if is_taint else 'ast',
|
|
196
199
|
})
|
|
@@ -229,16 +232,30 @@ def analyze_file(file_path):
|
|
|
229
232
|
|
|
230
233
|
|
|
231
234
|
def main():
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
+
parser = argparse.ArgumentParser(description='Security Analyzer - AST-based with regex fallback')
|
|
236
|
+
parser.add_argument('file_path', help='Path to the file to analyze')
|
|
237
|
+
parser.add_argument('--engine', choices=['auto', 'ast', 'regex'], default='auto',
|
|
238
|
+
help='Analysis engine: auto (default), ast (tree-sitter only), regex (regex only)')
|
|
239
|
+
args = parser.parse_args()
|
|
235
240
|
|
|
236
|
-
file_path =
|
|
241
|
+
file_path = args.file_path
|
|
237
242
|
if not os.path.exists(file_path):
|
|
238
243
|
print(json.dumps({'error': f'File not found: {file_path}'}))
|
|
239
244
|
sys.exit(1)
|
|
240
245
|
|
|
241
|
-
|
|
246
|
+
engine = args.engine
|
|
247
|
+
|
|
248
|
+
if engine == 'regex':
|
|
249
|
+
results = analyze_file_regex(file_path)
|
|
250
|
+
elif engine == 'ast':
|
|
251
|
+
if not HAS_AST_ENGINE:
|
|
252
|
+
print(json.dumps({'error': 'AST engine requested but tree-sitter is not available. Install dependencies: python3 -m pip install -r requirements.txt'}))
|
|
253
|
+
sys.exit(1)
|
|
254
|
+
results = analyze_file_ast(file_path)
|
|
255
|
+
else:
|
|
256
|
+
# auto: use AST if available, otherwise regex
|
|
257
|
+
results = analyze_file(file_path)
|
|
258
|
+
|
|
242
259
|
print(json.dumps(results))
|
|
243
260
|
|
|
244
261
|
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Cross-file taint analysis for security scanning.
|
|
3
|
+
|
|
4
|
+
Builds an import graph across local files, runs per-file analysis,
|
|
5
|
+
and propagates taint warnings when a file imports from another file
|
|
6
|
+
that has ERROR-severity findings.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
# Import the per-file analyzer
|
|
15
|
+
from analyzer import analyze_file
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def extract_js_imports(source):
|
|
19
|
+
"""Extract import/require statements from JavaScript/TypeScript."""
|
|
20
|
+
imports = []
|
|
21
|
+
# require('...')
|
|
22
|
+
for m in re.finditer(r'''require\s*\(\s*['"]([^'"]+)['"]\s*\)''', source):
|
|
23
|
+
imports.append(m.group(1))
|
|
24
|
+
# import ... from '...'
|
|
25
|
+
for m in re.finditer(r'''from\s+['"]([^'"]+)['"]''', source):
|
|
26
|
+
imports.append(m.group(1))
|
|
27
|
+
# import '...'
|
|
28
|
+
for m in re.finditer(r'''import\s+['"]([^'"]+)['"]''', source):
|
|
29
|
+
imports.append(m.group(1))
|
|
30
|
+
return imports
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def extract_py_imports(source):
|
|
34
|
+
"""Extract import statements from Python."""
|
|
35
|
+
imports = []
|
|
36
|
+
# import module
|
|
37
|
+
for m in re.finditer(r'^import\s+(\S+)', source, re.MULTILINE):
|
|
38
|
+
imports.append(m.group(1).split('.')[0])
|
|
39
|
+
# from module import ...
|
|
40
|
+
for m in re.finditer(r'^from\s+(\S+)\s+import', source, re.MULTILINE):
|
|
41
|
+
imports.append(m.group(1).split('.')[0])
|
|
42
|
+
return imports
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def detect_language(file_path):
|
|
46
|
+
"""Detect language from file extension."""
|
|
47
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
48
|
+
lang_map = {
|
|
49
|
+
'.py': 'python', '.js': 'javascript', '.ts': 'typescript',
|
|
50
|
+
'.tsx': 'typescript', '.jsx': 'javascript',
|
|
51
|
+
}
|
|
52
|
+
return lang_map.get(ext, 'unknown')
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def resolve_local_import(module, base_dir, lang):
|
|
56
|
+
"""Resolve a relative/local import to an actual file path."""
|
|
57
|
+
if lang in ('javascript', 'typescript'):
|
|
58
|
+
# Only resolve relative imports
|
|
59
|
+
if not module.startswith('.'):
|
|
60
|
+
return None
|
|
61
|
+
# Try common extensions
|
|
62
|
+
candidates = [
|
|
63
|
+
module,
|
|
64
|
+
module + '.js', module + '.ts', module + '.tsx', module + '.jsx',
|
|
65
|
+
os.path.join(module, 'index.js'), os.path.join(module, 'index.ts'),
|
|
66
|
+
]
|
|
67
|
+
for candidate in candidates:
|
|
68
|
+
full = os.path.normpath(os.path.join(base_dir, candidate))
|
|
69
|
+
if os.path.isfile(full):
|
|
70
|
+
return full
|
|
71
|
+
elif lang == 'python':
|
|
72
|
+
# Only resolve relative imports (starting with .)
|
|
73
|
+
if module.startswith('.'):
|
|
74
|
+
rel = module.lstrip('.')
|
|
75
|
+
candidates = [
|
|
76
|
+
os.path.join(base_dir, rel.replace('.', os.sep) + '.py'),
|
|
77
|
+
os.path.join(base_dir, rel.replace('.', os.sep), '__init__.py'),
|
|
78
|
+
]
|
|
79
|
+
for candidate in candidates:
|
|
80
|
+
if os.path.isfile(candidate):
|
|
81
|
+
return candidate
|
|
82
|
+
# Also check if the module name matches a sibling file
|
|
83
|
+
sibling = os.path.join(base_dir, module + '.py')
|
|
84
|
+
if os.path.isfile(sibling):
|
|
85
|
+
return sibling
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def extract_exports(source, lang):
|
|
90
|
+
"""Extract exported function/class names."""
|
|
91
|
+
exports = []
|
|
92
|
+
if lang in ('javascript', 'typescript'):
|
|
93
|
+
for m in re.finditer(r'export\s+(?:function|class|const|let|var)\s+(\w+)', source):
|
|
94
|
+
exports.append(m.group(1))
|
|
95
|
+
for m in re.finditer(r'module\.exports\s*=', source):
|
|
96
|
+
exports.append('default')
|
|
97
|
+
elif lang == 'python':
|
|
98
|
+
for m in re.finditer(r'^(?:def|class)\s+(\w+)', source, re.MULTILINE):
|
|
99
|
+
exports.append(m.group(1))
|
|
100
|
+
return exports
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def build_import_graph(file_paths):
|
|
104
|
+
"""Build import graph: {file -> [{module, resolved_path, line}]}."""
|
|
105
|
+
graph = {}
|
|
106
|
+
file_set = set(os.path.abspath(f) for f in file_paths)
|
|
107
|
+
|
|
108
|
+
for file_path in file_paths:
|
|
109
|
+
abs_path = os.path.abspath(file_path)
|
|
110
|
+
lang = detect_language(file_path)
|
|
111
|
+
if lang == 'unknown':
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
source = open(file_path, 'r', encoding='utf-8', errors='ignore').read()
|
|
116
|
+
except (OSError, IOError):
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
if lang in ('javascript', 'typescript'):
|
|
120
|
+
modules = extract_js_imports(source)
|
|
121
|
+
elif lang == 'python':
|
|
122
|
+
modules = extract_py_imports(source)
|
|
123
|
+
else:
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
base_dir = os.path.dirname(abs_path)
|
|
127
|
+
edges = []
|
|
128
|
+
for mod in modules:
|
|
129
|
+
resolved = resolve_local_import(mod, base_dir, lang)
|
|
130
|
+
if resolved:
|
|
131
|
+
resolved_abs = os.path.abspath(resolved)
|
|
132
|
+
if resolved_abs in file_set and resolved_abs != abs_path:
|
|
133
|
+
edges.append({
|
|
134
|
+
'module': mod,
|
|
135
|
+
'resolved_path': resolved_abs,
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
graph[abs_path] = edges
|
|
139
|
+
|
|
140
|
+
return graph
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def cross_file_analyze(file_paths):
|
|
144
|
+
"""Run cross-file taint analysis.
|
|
145
|
+
|
|
146
|
+
1. Analyze each file independently
|
|
147
|
+
2. Build import graph
|
|
148
|
+
3. For each file importing from another file with ERROR-severity findings,
|
|
149
|
+
add a cross-file-taint-warning
|
|
150
|
+
"""
|
|
151
|
+
# Analyze each file
|
|
152
|
+
file_findings = {}
|
|
153
|
+
all_findings = []
|
|
154
|
+
|
|
155
|
+
for file_path in file_paths:
|
|
156
|
+
try:
|
|
157
|
+
results = analyze_file(file_path)
|
|
158
|
+
if isinstance(results, list):
|
|
159
|
+
file_findings[os.path.abspath(file_path)] = results
|
|
160
|
+
for finding in results:
|
|
161
|
+
finding['file'] = file_path
|
|
162
|
+
all_findings.extend(results)
|
|
163
|
+
except Exception:
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
# Build import graph
|
|
167
|
+
graph = build_import_graph(file_paths)
|
|
168
|
+
|
|
169
|
+
# Propagate taint warnings
|
|
170
|
+
cross_file_warnings = []
|
|
171
|
+
for file_path, edges in graph.items():
|
|
172
|
+
for edge in edges:
|
|
173
|
+
imported_path = edge['resolved_path']
|
|
174
|
+
imported_findings = file_findings.get(imported_path, [])
|
|
175
|
+
|
|
176
|
+
# Check for ERROR-severity findings in imported file
|
|
177
|
+
error_findings = [f for f in imported_findings if f.get('severity') == 'error']
|
|
178
|
+
if error_findings:
|
|
179
|
+
warning = {
|
|
180
|
+
'ruleId': 'cross-file-taint-warning',
|
|
181
|
+
'severity': 'warning',
|
|
182
|
+
'message': f"Imports from '{os.path.basename(imported_path)}' which has {len(error_findings)} critical finding(s): {', '.join(set(f.get('ruleId', 'unknown') for f in error_findings))}",
|
|
183
|
+
'file': file_path,
|
|
184
|
+
'line': 0,
|
|
185
|
+
'metadata': {
|
|
186
|
+
'imported_file': imported_path,
|
|
187
|
+
'imported_findings_count': len(error_findings),
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
cross_file_warnings.append(warning)
|
|
191
|
+
|
|
192
|
+
# Combine: per-file findings + cross-file warnings
|
|
193
|
+
combined = all_findings + cross_file_warnings
|
|
194
|
+
return combined
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def main():
|
|
198
|
+
"""CLI entry point. Accepts file paths as arguments, outputs JSON."""
|
|
199
|
+
if len(sys.argv) < 2:
|
|
200
|
+
print(json.dumps({'error': 'Usage: cross_file_analyzer.py file1 file2 ...'}))
|
|
201
|
+
sys.exit(1)
|
|
202
|
+
|
|
203
|
+
file_paths = sys.argv[1:]
|
|
204
|
+
# Filter to existing files
|
|
205
|
+
file_paths = [f for f in file_paths if os.path.isfile(f)]
|
|
206
|
+
|
|
207
|
+
if not file_paths:
|
|
208
|
+
print(json.dumps({'error': 'No valid files provided'}))
|
|
209
|
+
sys.exit(1)
|
|
210
|
+
|
|
211
|
+
results = cross_file_analyze(file_paths)
|
|
212
|
+
print(json.dumps(results))
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
if __name__ == '__main__':
|
|
216
|
+
main()
|
package/daemon.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""JSONL daemon wrapping analyzer.py for persistent process reuse.
|
|
3
|
+
|
|
4
|
+
Protocol: One JSON object per line over stdin/stdout. stderr for debug logs only.
|
|
5
|
+
Startup: sends {"id":"__ready__","success":true,"result":{"status":"ready"}}
|
|
6
|
+
Actions: analyze, cross_file_analyze, health, shutdown
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
# CRITICAL: Redirect stdout to stderr BEFORE any imports can print to stdout.
|
|
13
|
+
# This prevents any imported library from corrupting the JSONL protocol channel.
|
|
14
|
+
_protocol_stdout = sys.stdout
|
|
15
|
+
sys.stdout = sys.stderr
|
|
16
|
+
|
|
17
|
+
# Now safe to import everything
|
|
18
|
+
import json
|
|
19
|
+
import time
|
|
20
|
+
from collections import OrderedDict
|
|
21
|
+
|
|
22
|
+
# Add script directory to path so analyzer imports work
|
|
23
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
24
|
+
|
|
25
|
+
from analyzer import analyze_file, analyze_file_ast, analyze_file_regex
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
from cross_file_analyzer import cross_file_analyze
|
|
29
|
+
HAS_CROSS_FILE = True
|
|
30
|
+
except ImportError:
|
|
31
|
+
HAS_CROSS_FILE = False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class LRUCache:
|
|
35
|
+
"""Simple LRU cache keyed by (file_path, mtime), capped at max_size entries."""
|
|
36
|
+
|
|
37
|
+
def __init__(self, max_size=200):
|
|
38
|
+
self._cache = OrderedDict()
|
|
39
|
+
self._max_size = max_size
|
|
40
|
+
|
|
41
|
+
def get(self, file_path):
|
|
42
|
+
try:
|
|
43
|
+
mtime = os.path.getmtime(file_path)
|
|
44
|
+
except OSError:
|
|
45
|
+
return None
|
|
46
|
+
key = (file_path, mtime)
|
|
47
|
+
if key in self._cache:
|
|
48
|
+
self._cache.move_to_end(key)
|
|
49
|
+
return self._cache[key]
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
def put(self, file_path, result):
|
|
53
|
+
try:
|
|
54
|
+
mtime = os.path.getmtime(file_path)
|
|
55
|
+
except OSError:
|
|
56
|
+
return
|
|
57
|
+
key = (file_path, mtime)
|
|
58
|
+
self._cache[key] = result
|
|
59
|
+
self._cache.move_to_end(key)
|
|
60
|
+
while len(self._cache) > self._max_size:
|
|
61
|
+
self._cache.popitem(last=False)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def size(self):
|
|
65
|
+
return len(self._cache)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
_cache = LRUCache(max_size=200)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def send_response(obj):
|
|
72
|
+
"""Write a JSON line to the protocol channel (original stdout)."""
|
|
73
|
+
line = json.dumps(obj, separators=(',', ':'))
|
|
74
|
+
_protocol_stdout.write(line + '\n')
|
|
75
|
+
_protocol_stdout.flush()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def handle_analyze(req):
|
|
79
|
+
file_path = req.get('file_path')
|
|
80
|
+
engine = req.get('engine', 'auto')
|
|
81
|
+
|
|
82
|
+
if not file_path or not os.path.exists(file_path):
|
|
83
|
+
return {'success': False, 'error': f'File not found: {file_path}'}
|
|
84
|
+
|
|
85
|
+
# Check cache (only for engine=auto)
|
|
86
|
+
if engine == 'auto':
|
|
87
|
+
cached = _cache.get(file_path)
|
|
88
|
+
if cached is not None:
|
|
89
|
+
return {'success': True, 'result': cached, 'cached': True, 'cache_size': _cache.size}
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
if engine == 'regex':
|
|
93
|
+
result = analyze_file_regex(file_path)
|
|
94
|
+
elif engine == 'ast':
|
|
95
|
+
result = analyze_file_ast(file_path)
|
|
96
|
+
else:
|
|
97
|
+
result = analyze_file(file_path)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
return {'success': False, 'error': str(e)}
|
|
100
|
+
|
|
101
|
+
if isinstance(result, dict) and 'error' in result:
|
|
102
|
+
return {'success': False, 'error': result['error']}
|
|
103
|
+
|
|
104
|
+
# Cache result (only for engine=auto)
|
|
105
|
+
if engine == 'auto':
|
|
106
|
+
_cache.put(file_path, result)
|
|
107
|
+
|
|
108
|
+
return {'success': True, 'result': result, 'cached': False, 'cache_size': _cache.size}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def handle_cross_file_analyze(req):
|
|
112
|
+
file_paths = req.get('file_paths', [])
|
|
113
|
+
if not file_paths:
|
|
114
|
+
return {'success': False, 'error': 'No file_paths provided'}
|
|
115
|
+
if not HAS_CROSS_FILE:
|
|
116
|
+
return {'success': False, 'error': 'cross_file_analyzer not available'}
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
result = cross_file_analyze(file_paths)
|
|
120
|
+
return {'success': True, 'result': result}
|
|
121
|
+
except Exception as e:
|
|
122
|
+
return {'success': False, 'error': str(e)}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def handle_health():
|
|
126
|
+
return {
|
|
127
|
+
'success': True,
|
|
128
|
+
'result': {
|
|
129
|
+
'status': 'healthy',
|
|
130
|
+
'cache_size': _cache.size,
|
|
131
|
+
'pid': os.getpid(),
|
|
132
|
+
'uptime': time.time() - _start_time,
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def main():
|
|
138
|
+
global _start_time
|
|
139
|
+
_start_time = time.time()
|
|
140
|
+
|
|
141
|
+
# Signal readiness
|
|
142
|
+
send_response({
|
|
143
|
+
'id': '__ready__',
|
|
144
|
+
'success': True,
|
|
145
|
+
'result': {'status': 'ready'}
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
for line in sys.stdin:
|
|
149
|
+
line = line.strip()
|
|
150
|
+
if not line:
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
req = json.loads(line)
|
|
155
|
+
except json.JSONDecodeError as e:
|
|
156
|
+
send_response({'id': None, 'success': False, 'error': f'Invalid JSON: {e}'})
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
req_id = req.get('id')
|
|
160
|
+
action = req.get('action')
|
|
161
|
+
|
|
162
|
+
if action == 'shutdown':
|
|
163
|
+
send_response({'id': req_id, 'success': True, 'result': {'status': 'shutdown'}})
|
|
164
|
+
break
|
|
165
|
+
elif action == 'health':
|
|
166
|
+
resp = handle_health()
|
|
167
|
+
elif action == 'analyze':
|
|
168
|
+
resp = handle_analyze(req)
|
|
169
|
+
elif action == 'cross_file_analyze':
|
|
170
|
+
resp = handle_cross_file_analyze(req)
|
|
171
|
+
else:
|
|
172
|
+
resp = {'success': False, 'error': f'Unknown action: {action}'}
|
|
173
|
+
|
|
174
|
+
resp['id'] = req_id
|
|
175
|
+
send_response(resp)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
if __name__ == '__main__':
|
|
179
|
+
main()
|