sift-triage 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. sift_triage-1.0.0/PKG-INFO +388 -0
  2. sift_triage-1.0.0/README.md +349 -0
  3. sift_triage-1.0.0/pyproject.toml +69 -0
  4. sift_triage-1.0.0/setup.cfg +4 -0
  5. sift_triage-1.0.0/sift/__init__.py +3 -0
  6. sift_triage-1.0.0/sift/banner.py +49 -0
  7. sift_triage-1.0.0/sift/cache.py +389 -0
  8. sift_triage-1.0.0/sift/config.py +147 -0
  9. sift_triage-1.0.0/sift/doctor.py +374 -0
  10. sift_triage-1.0.0/sift/enrichers/__init__.py +1 -0
  11. sift_triage-1.0.0/sift/enrichers/barb_bridge.py +95 -0
  12. sift_triage-1.0.0/sift/enrichers/local_heuristics.py +114 -0
  13. sift_triage-1.0.0/sift/enrichers/protocol.py +21 -0
  14. sift_triage-1.0.0/sift/enrichers/runner.py +95 -0
  15. sift_triage-1.0.0/sift/enrichers/vex_bridge.py +103 -0
  16. sift_triage-1.0.0/sift/filtering.py +594 -0
  17. sift_triage-1.0.0/sift/main.py +656 -0
  18. sift_triage-1.0.0/sift/metrics.py +170 -0
  19. sift_triage-1.0.0/sift/models.py +227 -0
  20. sift_triage-1.0.0/sift/normalizers/__init__.py +1 -0
  21. sift_triage-1.0.0/sift/normalizers/csv_normalizer.py +77 -0
  22. sift_triage-1.0.0/sift/normalizers/generic.py +126 -0
  23. sift_triage-1.0.0/sift/normalizers/protocol.py +28 -0
  24. sift_triage-1.0.0/sift/normalizers/splunk.py +103 -0
  25. sift_triage-1.0.0/sift/output/__init__.py +1 -0
  26. sift_triage-1.0.0/sift/output/export.py +232 -0
  27. sift_triage-1.0.0/sift/output/formatter.py +375 -0
  28. sift_triage-1.0.0/sift/output/stix.py +351 -0
  29. sift_triage-1.0.0/sift/pipeline/__init__.py +9 -0
  30. sift_triage-1.0.0/sift/pipeline/attck.py +30 -0
  31. sift_triage-1.0.0/sift/pipeline/chunker.py +165 -0
  32. sift_triage-1.0.0/sift/pipeline/clusterer.py +465 -0
  33. sift_triage-1.0.0/sift/pipeline/dedup.py +206 -0
  34. sift_triage-1.0.0/sift/pipeline/ioc_extractor.py +360 -0
  35. sift_triage-1.0.0/sift/pipeline/prioritizer.py +131 -0
  36. sift_triage-1.0.0/sift/summarizers/__init__.py +1 -0
  37. sift_triage-1.0.0/sift/summarizers/anthropic.py +150 -0
  38. sift_triage-1.0.0/sift/summarizers/injection_detector.py +306 -0
  39. sift_triage-1.0.0/sift/summarizers/mock.py +60 -0
  40. sift_triage-1.0.0/sift/summarizers/ollama.py +170 -0
  41. sift_triage-1.0.0/sift/summarizers/openai.py +148 -0
  42. sift_triage-1.0.0/sift/summarizers/prompt.py +354 -0
  43. sift_triage-1.0.0/sift/summarizers/protocol.py +33 -0
  44. sift_triage-1.0.0/sift/summarizers/template.py +220 -0
  45. sift_triage-1.0.0/sift/summarizers/validation.py +238 -0
  46. sift_triage-1.0.0/sift/version_check.py +58 -0
  47. sift_triage-1.0.0/sift_triage.egg-info/PKG-INFO +388 -0
  48. sift_triage-1.0.0/sift_triage.egg-info/SOURCES.txt +77 -0
  49. sift_triage-1.0.0/sift_triage.egg-info/dependency_links.txt +1 -0
  50. sift_triage-1.0.0/sift_triage.egg-info/entry_points.txt +2 -0
  51. sift_triage-1.0.0/sift_triage.egg-info/requires.txt +22 -0
  52. sift_triage-1.0.0/sift_triage.egg-info/top_level.txt +1 -0
  53. sift_triage-1.0.0/tests/test_attck_validation.py +116 -0
  54. sift_triage-1.0.0/tests/test_cache.py +309 -0
  55. sift_triage-1.0.0/tests/test_chunking.py +154 -0
  56. sift_triage-1.0.0/tests/test_clusterer.py +258 -0
  57. sift_triage-1.0.0/tests/test_clusterer_performance.py +313 -0
  58. sift_triage-1.0.0/tests/test_dedup.py +360 -0
  59. sift_triage-1.0.0/tests/test_e2e_validation.py +521 -0
  60. sift_triage-1.0.0/tests/test_edge_cases.py +478 -0
  61. sift_triage-1.0.0/tests/test_enrich_local.py +139 -0
  62. sift_triage-1.0.0/tests/test_enrichers.py +445 -0
  63. sift_triage-1.0.0/tests/test_export.py +180 -0
  64. sift_triage-1.0.0/tests/test_filtering.py +395 -0
  65. sift_triage-1.0.0/tests/test_injection_detector.py +434 -0
  66. sift_triage-1.0.0/tests/test_ioc_extractor.py +353 -0
  67. sift_triage-1.0.0/tests/test_metrics.py +231 -0
  68. sift_triage-1.0.0/tests/test_mock_summarizer.py +274 -0
  69. sift_triage-1.0.0/tests/test_models.py +232 -0
  70. sift_triage-1.0.0/tests/test_normalizers.py +437 -0
  71. sift_triage-1.0.0/tests/test_pipeline.py +251 -0
  72. sift_triage-1.0.0/tests/test_pipeline_edge_cases.py +404 -0
  73. sift_triage-1.0.0/tests/test_pipeline_integration.py +245 -0
  74. sift_triage-1.0.0/tests/test_prioritizer.py +221 -0
  75. sift_triage-1.0.0/tests/test_prompt_providers.py +315 -0
  76. sift_triage-1.0.0/tests/test_redaction.py +200 -0
  77. sift_triage-1.0.0/tests/test_stix_export.py +570 -0
  78. sift_triage-1.0.0/tests/test_summarizers.py +189 -0
  79. sift_triage-1.0.0/tests/test_validation.py +461 -0
@@ -0,0 +1,388 @@
1
+ Metadata-Version: 2.4
2
+ Name: sift-triage
3
+ Version: 1.0.0
4
+ Summary: AI-powered alert triage summarizer for SOC teams
5
+ Author-email: Christian Huhn <duathron@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/duathron/sift
8
+ Project-URL: Repository, https://github.com/duathron/sift
9
+ Project-URL: Bug Tracker, https://github.com/duathron/sift/issues
10
+ Keywords: siem,alert-triage,soc,dfir,mitre-attack,ai,cli
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Information Technology
14
+ Classifier: Topic :: Security
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: typer>=0.12.0
22
+ Requires-Dist: rich>=13.7.0
23
+ Requires-Dist: pydantic>=2.7.0
24
+ Requires-Dist: pyyaml>=6.0.1
25
+ Requires-Dist: python-dotenv>=1.0.0
26
+ Provides-Extra: llm
27
+ Requires-Dist: anthropic>=0.30.0; extra == "llm"
28
+ Requires-Dist: openai>=1.30.0; extra == "llm"
29
+ Provides-Extra: enrich
30
+ Requires-Dist: barb-phish>=1.0.0; extra == "enrich"
31
+ Provides-Extra: all
32
+ Requires-Dist: anthropic>=0.30.0; extra == "all"
33
+ Requires-Dist: openai>=1.30.0; extra == "all"
34
+ Requires-Dist: barb-phish>=1.0.0; extra == "all"
35
+ Provides-Extra: dev
36
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
37
+ Requires-Dist: ruff>=0.4.0; extra == "dev"
38
+ Requires-Dist: mypy>=1.10.0; extra == "dev"
39
+
40
+ # sift
41
+
42
+ ```
43
+ ____ ___ _____ _____
44
+ / ___|_ _| ___|_ _|
45
+ \___ \| || |_ | |
46
+ ___) | || _| | |
47
+ |____/___|_| |_|
48
+ ```
49
+
50
+ **AI-Powered Alert Triage Summarizer for SOC Teams**
51
+
52
+ `sift` ingests raw security alerts, deduplicates and clusters related events, scores them by priority, and delivers a structured triage summary — with optional AI-generated analysis. Part of the barb → vex → sift SOC workflow trilogy.
53
+
54
+ ---
55
+
56
+ ## Features
57
+
58
+ - Ingest alerts from generic JSON, Splunk exports, or CSV
59
+ - Deduplicate noisy alert streams before analysis
60
+ - Extract IOCs (IPs, domains, hashes, URLs) from alert fields automatically
61
+ - Cluster related alerts by IOC overlap, category + time window, or IP-pair correlation
62
+ - Score clusters across five priority tiers: NOISE / LOW / MEDIUM / HIGH / CRITICAL
63
+ - AI summarization via Anthropic Claude, OpenAI, Ollama (local), or template-based with no LLM required
64
+ - Rich terminal output with priority-colored cluster table
65
+ - Export to JSON, CSV, or STIX 2.1 for downstream tooling
66
+ - Filter clusters using a boolean DSL (`--filter 'priority >= HIGH AND ...'`)
67
+ - Enrich IOCs via barb (phishing URL analysis) and vex (VirusTotal reputation) with `--enrich`
68
+ - Cache triage results by input fingerprint with `--cache` (opt-in, 1h TTL)
69
+ - Validate LLM output schema and detect prompt injection attacks
70
+ - `sift metrics <file>` command for cluster and IOC distribution statistics
71
+ - `sift doctor` diagnostics to verify configuration, LLM connectivity, and dependencies
72
+ - PyPI version check on startup
73
+
74
+ ---
75
+
76
+ ## Installation
77
+
78
+ ```bash
79
+ pip install sift-triage
80
+ ```
81
+
82
+ **Optional extras:**
83
+
84
+ ```bash
85
+ # LLM summarization (Anthropic + OpenAI)
86
+ pip install "sift-triage[llm]"
87
+
88
+ # IOC enrichment via barb/vex
89
+ pip install "sift-triage[enrich]"
90
+
91
+ # Everything
92
+ pip install "sift-triage[llm,enrich]"
93
+ ```
94
+
95
+ ### Kali Linux / Debian
96
+
97
+ ```bash
98
+ # Recommended: use pipx for isolated CLI tool installation
99
+ sudo apt install pipx # or: pip install pipx
100
+ pipx install sift-triage
101
+
102
+ # With LLM support
103
+ pipx install "sift-triage[llm]"
104
+
105
+ # With barb + vex enrichment
106
+ pipx install "sift-triage[enrich]"
107
+ ```
108
+
109
+ > **Note:** Python 3.11+ required. Kali Linux 2024+ includes Python 3.12 by default.
110
+ > On older systems: `sudo apt install python3.12 python3.12-venv`
111
+
112
+ ---
113
+
114
+ ## Quick Start
115
+
116
+ **Triage a JSON alert file:**
117
+ ```bash
118
+ sift triage alerts.json
119
+ ```
120
+
121
+ **Triage with AI summarization (Anthropic Claude):**
122
+ ```bash
123
+ sift triage alerts.json --summarize --provider anthropic
124
+ ```
125
+
126
+ **Pipe from Splunk or another tool:**
127
+ ```bash
128
+ cat splunk_export.json | sift triage -
129
+ ```
130
+
131
+ **Export triage report to JSON:**
132
+ ```bash
133
+ sift triage alerts.json -f json -o report.json
134
+ ```
135
+
136
+ **Export triage report as STIX 2.1 bundle:**
137
+ ```bash
138
+ sift triage alerts.json -f stix -o bundle.json
139
+ ```
140
+
141
+ **Filter to HIGH and CRITICAL clusters only:**
142
+ ```bash
143
+ sift triage alerts.json --filter 'priority >= HIGH'
144
+ ```
145
+
146
+ **Enable result caching (skip reprocessing on repeated runs):**
147
+ ```bash
148
+ sift triage alerts.json --cache
149
+ ```
150
+
151
+ **Show metrics for an alert file:**
152
+ ```bash
153
+ sift metrics alerts.json
154
+ ```
155
+
156
+ **Run diagnostics:**
157
+ ```bash
158
+ sift doctor
159
+ ```
160
+
161
+ **Enrich IOCs via barb (phishing URLs) + vex (VirusTotal):**
162
+ ```bash
163
+ sift triage alerts.json --enrich --summarize
164
+ ```
165
+
166
+ **Enrich only via barb (no VirusTotal API key needed):**
167
+ ```bash
168
+ sift triage alerts.json --enrich --enrich-mode barb
169
+ ```
170
+
171
+ ---
172
+
173
+ ## Workflow
174
+
175
+ `sift` is the third stage of a SOC analyst trilogy. Use `barb` to score and flag suspicious URLs in incoming data, pass flagged IOCs to `vex` for VirusTotal enrichment, then feed the enriched alert data into `sift` for cluster-level triage and summarization. Each tool is useful standalone; together they cover URL analysis → IOC reputation → alert prioritization in a single scriptable pipeline. The `--enrich` flag automates barb and vex calls directly from within `sift triage`.
176
+
177
+ ---
178
+
179
+ ## Input Formats
180
+
181
+ | Format | Description | Notes |
182
+ |---|---|---|
183
+ | Generic JSON | Array of alert objects or NDJSON | Any field schema; sift normalizes automatically |
184
+ | Splunk export | JSON export from Splunk Search | Handles `results` wrapper and Splunk field names |
185
+ | CSV | Comma-separated alert rows | First row treated as header; all fields extracted |
186
+
187
+ Pass `-` as the filename to read from stdin:
188
+ ```bash
189
+ splunk-cli export | sift triage -
190
+ ```
191
+
192
+ ---
193
+
194
+ ## LLM Providers
195
+
196
+ | Provider | Extra | Environment Variable | Notes |
197
+ |---|---|---|---|
198
+ | `template` | *(none)* | — | Default; no LLM required |
199
+ | `mock` | *(none)* | — | Deterministic mock output for testing and CI |
200
+ | `anthropic` | `[llm]` | `ANTHROPIC_API_KEY` | Claude via Anthropic API |
201
+ | `openai` | `[llm]` | `OPENAI_API_KEY` | GPT via OpenAI API |
202
+ | `ollama` | *(none)* | `SIFT_OLLAMA_URL` (optional) | Local inference; defaults to `http://localhost:11434` |
203
+
204
+ Set the default provider in `~/.sift/config.yaml` or via the `SIFT_PROVIDER` environment variable.
205
+
206
+ ---
207
+
208
+ ## Enrichment (barb + vex)
209
+
210
+ The `--enrich` flag enriches extracted IOCs using the sister tools:
211
+
212
+ | Tool | PyPI | What it does | Required |
213
+ |------|------|-------------|----------|
214
+ | barb | `barb-phish` | Heuristic phishing URL analysis | No (local) |
215
+ | vex | `vex-ioc` | VirusTotal IOC reputation lookup | API key via `VT_API_KEY` |
216
+
217
+ ```bash
218
+ # Install enrichment extras
219
+ pip install "sift-triage[enrich]"
220
+
221
+ # Run with enrichment
222
+ sift triage alerts.json --enrich
223
+
224
+ # Barb only (no API key needed)
225
+ sift triage alerts.json --enrich --enrich-mode barb
226
+
227
+ # Skip consent prompt
228
+ sift triage alerts.json --enrich --yes
229
+ ```
230
+
231
+ sift limits enrichment to 20 IOCs per run to avoid API rate limits.
232
+
233
+ ---
234
+
235
+ ## Output Formats
236
+
237
+ | Flag | Output |
238
+ |---|---|
239
+ | `rich` (default) | Color-coded cluster table in the terminal |
240
+ | `console` | Plain-text output, safe for logging |
241
+ | `json` | Structured JSON with all cluster and IOC data |
242
+ | `csv` | Flat CSV suitable for SIEM import or spreadsheets |
243
+ | `stix` | STIX 2.1 bundle JSON for threat intelligence platforms |
244
+
245
+ Use `-f` / `--format` to select output format, and `-o` / `--output` to write to a file.
246
+
247
+ ---
248
+
249
+ ## Advanced Usage
250
+
251
+ ### Alert Filtering
252
+
253
+ Use `--filter` to apply a boolean DSL to the cluster list after triage. Only matching clusters are included in the output.
254
+
255
+ ```bash
256
+ # Only HIGH and CRITICAL clusters
257
+ sift triage alerts.json --filter 'priority >= HIGH'
258
+
259
+ # Malware or phishing clusters with more than 3 IOCs
260
+ sift triage alerts.json --filter 'category IN (malware, phishing) AND ioc_count > 3'
261
+
262
+ # Exclude low-signal categories
263
+ sift triage alerts.json --filter 'NOT category IN (false_positive)'
264
+
265
+ # Combine priority and alert count conditions
266
+ sift triage alerts.json --filter 'priority >= MEDIUM AND alert_count >= 5'
267
+ ```
268
+
269
+ Supported fields: `priority`, `category`, `ioc_count`, `alert_count`.
270
+ Supported operators: `>=`, `<=`, `>`, `<`, `=`, `IN (...)`, `NOT`, `AND`, `OR`.
271
+
272
+ ### Result Caching
273
+
274
+ Use `--cache` to cache triage results by SHA-256 fingerprint of the input. Repeated runs over the same input return instantly from the cache (1-hour TTL, stored in `~/.sift/cache/`).
275
+
276
+ ```bash
277
+ # First run: processes and caches the result
278
+ sift triage alerts.json --cache
279
+
280
+ # Subsequent runs with the same file: returns from cache
281
+ sift triage alerts.json --cache
282
+
283
+ # Combine with other flags; cache stores the full triage output
284
+ sift triage alerts.json --cache --summarize --provider anthropic
285
+ ```
286
+
287
+ ### STIX 2.1 Export Pipeline
288
+
289
+ Export triage results as a STIX 2.1 threat intelligence bundle for ingestion into SIEM or TIP platforms.
290
+
291
+ ```bash
292
+ # Export to STIX bundle file
293
+ sift triage alerts.json -f stix -o bundle.json
294
+
295
+ # Combined enrichment and STIX export
296
+ sift triage alerts.json --enrich -f stix -o enriched_bundle.json
297
+
298
+ # Pipe STIX output to another tool
299
+ sift triage alerts.json -f stix | jq '.objects | length'
300
+ ```
301
+
302
+ ### Max Clusters
303
+
304
+ Limit the number of clusters returned by the pipeline using `max_clusters` in `~/.sift/config.yaml`. When the cluster count exceeds the limit, only the highest-priority clusters are retained. This is useful for large alert volumes where downstream tooling has per-report limits.
305
+
306
+ ```yaml
307
+ clustering:
308
+ max_clusters: 50
309
+ ```
310
+
311
+ ---
312
+
313
+ ## Metrics
314
+
315
+ The `sift metrics` command runs the full normalization, dedup, and clustering pipeline over an alert file and displays summary statistics without generating a triage report.
316
+
317
+ ```bash
318
+ sift metrics alerts.json
319
+ ```
320
+
321
+ Output includes:
322
+ - Total cluster count and alert count
323
+ - Average cluster size
324
+ - Top alert categories by frequency
325
+ - IOC type distribution (IPs, domains, hashes, URLs)
326
+ - AI summary success rate (if summaries were previously generated)
327
+
328
+ ```bash
329
+ # Skip deduplication for raw counts
330
+ sift metrics alerts.json --no-dedup
331
+
332
+ # Use a custom config file
333
+ sift metrics alerts.json --config /path/to/config.yaml
334
+ ```
335
+
336
+ ---
337
+
338
+ ## Validation and Security
339
+
340
+ sift validates all LLM outputs against a strict JSON schema (`--validate-only` runs parse and validate only, then exits):
341
+
342
+ ```bash
343
+ # Validate parsed structure without rendering output
344
+ sift triage alerts.json --validate-only
345
+ ```
346
+
347
+ A built-in prompt injection detector scans LLM inputs for five pattern categories: instruction overrides, output manipulation, JSON escapes, encoded payloads, and shell injection. Suspicious content is flagged and summarization falls back to the template provider automatically.
348
+
349
+ ---
350
+
351
+ ## Exit Codes
352
+
353
+ | Code | Meaning |
354
+ |---|---|
355
+ | `0` | Triage complete — no HIGH or CRITICAL clusters found |
356
+ | `1` | Triage complete — one or more HIGH or CRITICAL clusters found |
357
+ | `2` | Error — invalid input, configuration failure, or LLM error |
358
+
359
+ Exit code `1` is designed for use in CI pipelines and automated response playbooks.
360
+
361
+ ---
362
+
363
+ ## Configuration
364
+
365
+ ```bash
366
+ sift config --show # display current configuration
367
+ sift doctor # verify config, LLM connectivity, and dependencies
368
+ ```
369
+
370
+ Configuration is resolved in priority order: CLI flags > environment variables > `~/.sift/config.yaml` > defaults.
371
+
372
+ ---
373
+
374
+ ## Part of the SOC Trilogy
375
+
376
+ | Tool | Role | PyPI |
377
+ |---|---|---|
378
+ | [barb](https://github.com/duathron/barb) | Heuristic phishing URL analyzer | `barb-phish` |
379
+ | [vex](https://github.com/duathron/vex) | VirusTotal IOC enrichment | `vex-ioc` |
380
+ | **sift** | Alert triage summarizer | `sift-triage` |
381
+
382
+ ---
383
+
384
+ ## License
385
+
386
+ MIT — see [LICENSE](LICENSE) for details.
387
+
388
+ Author: Christian Huhn