canari-forensics 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. canari_forensics-0.1.0/LICENSE +21 -0
  2. canari_forensics-0.1.0/PKG-INFO +152 -0
  3. canari_forensics-0.1.0/README.md +109 -0
  4. canari_forensics-0.1.0/canari_forensics/__init__.py +19 -0
  5. canari_forensics-0.1.0/canari_forensics/__main__.py +4 -0
  6. canari_forensics-0.1.0/canari_forensics/attest.py +38 -0
  7. canari_forensics-0.1.0/canari_forensics/audit.py +82 -0
  8. canari_forensics-0.1.0/canari_forensics/cli.py +458 -0
  9. canari_forensics-0.1.0/canari_forensics/config.py +58 -0
  10. canari_forensics-0.1.0/canari_forensics/doctor.py +50 -0
  11. canari_forensics-0.1.0/canari_forensics/errors.py +23 -0
  12. canari_forensics-0.1.0/canari_forensics/export.py +37 -0
  13. canari_forensics-0.1.0/canari_forensics/models.py +18 -0
  14. canari_forensics-0.1.0/canari_forensics/parsers/__init__.py +4 -0
  15. canari_forensics-0.1.0/canari_forensics/parsers/databricks.py +154 -0
  16. canari_forensics-0.1.0/canari_forensics/parsers/otel.py +299 -0
  17. canari_forensics-0.1.0/canari_forensics/patterns.py +73 -0
  18. canari_forensics-0.1.0/canari_forensics/pdf.py +62 -0
  19. canari_forensics-0.1.0/canari_forensics/receiver.py +82 -0
  20. canari_forensics-0.1.0/canari_forensics/reporting.py +154 -0
  21. canari_forensics-0.1.0/canari_forensics/status.py +33 -0
  22. canari_forensics-0.1.0/canari_forensics/storage.py +72 -0
  23. canari_forensics-0.1.0/canari_forensics/summary.py +40 -0
  24. canari_forensics-0.1.0/canari_forensics/version.py +1 -0
  25. canari_forensics-0.1.0/canari_forensics.egg-info/PKG-INFO +152 -0
  26. canari_forensics-0.1.0/canari_forensics.egg-info/SOURCES.txt +47 -0
  27. canari_forensics-0.1.0/canari_forensics.egg-info/dependency_links.txt +1 -0
  28. canari_forensics-0.1.0/canari_forensics.egg-info/entry_points.txt +2 -0
  29. canari_forensics-0.1.0/canari_forensics.egg-info/top_level.txt +1 -0
  30. canari_forensics-0.1.0/pyproject.toml +42 -0
  31. canari_forensics-0.1.0/setup.cfg +4 -0
  32. canari_forensics-0.1.0/tests/test_cli_scan.py +95 -0
  33. canari_forensics-0.1.0/tests/test_databricks_parser.py +91 -0
  34. canari_forensics-0.1.0/tests/test_otel_parser.py +102 -0
  35. canari_forensics-0.1.0/tests/test_phase10_versioning.py +70 -0
  36. canari_forensics-0.1.0/tests/test_phase11_status.py +40 -0
  37. canari_forensics-0.1.0/tests/test_phase12_export_csv.py +77 -0
  38. canari_forensics-0.1.0/tests/test_phase13_attestation.py +85 -0
  39. canari_forensics-0.1.0/tests/test_phase14_custom_patterns.py +67 -0
  40. canari_forensics-0.1.0/tests/test_phase15_doctor.py +28 -0
  41. canari_forensics-0.1.0/tests/test_phase16_summarize.py +73 -0
  42. canari_forensics-0.1.0/tests/test_phase2_sources_and_receiver.py +76 -0
  43. canari_forensics-0.1.0/tests/test_phase3_reporting.py +69 -0
  44. canari_forensics-0.1.0/tests/test_phase4_audit_workflow.py +60 -0
  45. canari_forensics-0.1.0/tests/test_phase5_config_run.py +69 -0
  46. canari_forensics-0.1.0/tests/test_phase6_demo_script.py +25 -0
  47. canari_forensics-0.1.0/tests/test_phase7_packaging.py +19 -0
  48. canari_forensics-0.1.0/tests/test_phase8_cli_validation.py +64 -0
  49. canari_forensics-0.1.0/tests/test_phase9_error_codes.py +43 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Canari
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.1
2
+ Name: canari-forensics
3
+ Version: 0.1.0
4
+ Summary: Scan historical LLM traces for credential leaks and prompt-injection evidence
5
+ Author: Canari
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Canari
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/cholmess/canari-forensics
29
+ Project-URL: Repository, https://github.com/cholmess/canari-forensics
30
+ Project-URL: Issues, https://github.com/cholmess/canari-forensics/issues
31
+ Classifier: Development Status :: 3 - Alpha
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Operating System :: OS Independent
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Programming Language :: Python :: 3 :: Only
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Requires-Python: >=3.10
41
+ Description-Content-Type: text/markdown
42
+ License-File: LICENSE
43
+
44
+ # canari-forensics
45
+
46
+ [![PyPI package](https://img.shields.io/badge/pypi-package%20pending-lightgrey)](docs/PYPI_RELEASE.md)
47
+ [![CI](https://github.com/cholmess/canari-forensics/actions/workflows/ci.yml/badge.svg)](https://github.com/cholmess/canari-forensics/actions/workflows/ci.yml)
48
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
49
+
50
+ Scan your LLM logs for breaches that already happened.
51
+
52
+ LLM applications can leak internal context through prompt injection
53
+ attacks. Your firewall never flags it because the exfiltration looks
54
+ exactly like a legitimate API response. Most teams find out weeks
55
+ later - if ever.
56
+
57
+ Canari Forensics scans your existing LLM conversation logs and tells
58
+ you definitively whether you have had any successful prompt injection
59
+ or credential leakage before you were monitoring. Exact pattern
60
+ matching, no classifiers, no false positives. Runs locally in under
61
+ a minute. No data leaves your environment.
62
+
63
+ ## Install
64
+
65
+ ```bash
66
+ pip install canari-forensics
67
+ ```
68
+
69
+ If your environment blocks package installs, you can run directly with `python3 -m canari_forensics ...`.
70
+ After install, run `canari ...` directly.
71
+
72
+ ## Quick start
73
+
74
+ ```bash
75
+ # 1) Scan OTEL JSON exports (generic/datadog/honeycomb via --provider)
76
+ canari forensics scan \
77
+ --source otel \
78
+ --provider generic \
79
+ --logs ./otel-traces \
80
+ --file-pattern '*.json' \
81
+ --out ./forensics-scan.json
82
+
83
+ # 2) Generate enterprise audit outputs
84
+ canari forensics report \
85
+ --scan-report ./forensics-scan.json \
86
+ --client "Acme Corp" \
87
+ --application "AI Gateway" \
88
+ --out-pdf ./audit-report.pdf \
89
+ --out-evidence ./canari-evidence.json \
90
+ --bp-dir ./tests/attacks
91
+ ```
92
+
93
+ ## Staged audit workflow
94
+
95
+ ```bash
96
+ # initialize audit workspace
97
+ canari forensics audit init \
98
+ --name "Q1 2026 AI Gateway Audit" \
99
+ --source otel \
100
+ --provider generic \
101
+ --logs ./otel-traces \
102
+ --client "Acme Corp" \
103
+ --application "AI Gateway"
104
+
105
+ # run scan and report using stored metadata
106
+ canari forensics audit scan --audit-id q1-2026-ai-gateway-audit
107
+ canari forensics audit report --audit-id q1-2026-ai-gateway-audit
108
+ ```
109
+
110
+ ## One-command audit from config
111
+
112
+ ```bash
113
+ cp .canari.yml.example .canari.yml
114
+ canari forensics audit run --config .canari.yml
115
+ ```
116
+
117
+ ## Custom pattern packs
118
+
119
+ ```bash
120
+ canari forensics report \
121
+ --scan-report ./forensics-scan.json \
122
+ --client "Acme Corp" \
123
+ --application "AI Gateway" \
124
+ --out-pdf ./audit-report.pdf \
125
+ --out-evidence ./canari-evidence.json \
126
+ --bp-dir ./tests/attacks \
127
+ --patterns-file ./custom_patterns.json
128
+ ```
129
+
130
+ The JSON file should contain either `{"patterns": [...]}` or a top-level array,
131
+ where each pattern has: `pattern_id`, `name`, `severity`, `confidence`, `kind`, `regex`.
132
+
133
+ ## Local demo checkpoint
134
+
135
+ ```bash
136
+ ./scripts/demo_local_audit.sh
137
+ ```
138
+
139
+ ## Real-time OTLP receiver
140
+
141
+ ```bash
142
+ canari forensics receive \
143
+ --host 0.0.0.0 \
144
+ --port 4318 \
145
+ --db ./canari-forensics.db
146
+ ```
147
+
148
+ Outputs:
149
+ - Scan JSON with normalized conversation turns
150
+ - Evidence JSON with findings and metadata
151
+ - PDF audit report for executive review
152
+ - `.bp.json` snapshots for BreakPoint CI workflows
@@ -0,0 +1,109 @@
1
+ # canari-forensics
2
+
3
+ [![PyPI package](https://img.shields.io/badge/pypi-package%20pending-lightgrey)](docs/PYPI_RELEASE.md)
4
+ [![CI](https://github.com/cholmess/canari-forensics/actions/workflows/ci.yml/badge.svg)](https://github.com/cholmess/canari-forensics/actions/workflows/ci.yml)
5
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
6
+
7
+ Scan your LLM logs for breaches that already happened.
8
+
9
+ LLM applications can leak internal context through prompt injection
10
+ attacks. Your firewall never flags it because the exfiltration looks
11
+ exactly like a legitimate API response. Most teams find out weeks
12
+ later - if ever.
13
+
14
+ Canari Forensics scans your existing LLM conversation logs and tells
15
+ you definitively whether you have had any successful prompt injection
16
+ or credential leakage before you were monitoring. Exact pattern
17
+ matching, no classifiers, no false positives. Runs locally in under
18
+ a minute. No data leaves your environment.
19
+
20
+ ## Install
21
+
22
+ ```bash
23
+ pip install canari-forensics
24
+ ```
25
+
26
+ If your environment blocks package installs, you can run directly with `python3 -m canari_forensics ...`.
27
+ After install, run `canari ...` directly.
28
+
29
+ ## Quick start
30
+
31
+ ```bash
32
+ # 1) Scan OTEL JSON exports (generic/datadog/honeycomb via --provider)
33
+ canari forensics scan \
34
+ --source otel \
35
+ --provider generic \
36
+ --logs ./otel-traces \
37
+ --file-pattern '*.json' \
38
+ --out ./forensics-scan.json
39
+
40
+ # 2) Generate enterprise audit outputs
41
+ canari forensics report \
42
+ --scan-report ./forensics-scan.json \
43
+ --client "Acme Corp" \
44
+ --application "AI Gateway" \
45
+ --out-pdf ./audit-report.pdf \
46
+ --out-evidence ./canari-evidence.json \
47
+ --bp-dir ./tests/attacks
48
+ ```
49
+
50
+ ## Staged audit workflow
51
+
52
+ ```bash
53
+ # initialize audit workspace
54
+ canari forensics audit init \
55
+ --name "Q1 2026 AI Gateway Audit" \
56
+ --source otel \
57
+ --provider generic \
58
+ --logs ./otel-traces \
59
+ --client "Acme Corp" \
60
+ --application "AI Gateway"
61
+
62
+ # run scan and report using stored metadata
63
+ canari forensics audit scan --audit-id q1-2026-ai-gateway-audit
64
+ canari forensics audit report --audit-id q1-2026-ai-gateway-audit
65
+ ```
66
+
67
+ ## One-command audit from config
68
+
69
+ ```bash
70
+ cp .canari.yml.example .canari.yml
71
+ canari forensics audit run --config .canari.yml
72
+ ```
73
+
74
+ ## Custom pattern packs
75
+
76
+ ```bash
77
+ canari forensics report \
78
+ --scan-report ./forensics-scan.json \
79
+ --client "Acme Corp" \
80
+ --application "AI Gateway" \
81
+ --out-pdf ./audit-report.pdf \
82
+ --out-evidence ./canari-evidence.json \
83
+ --bp-dir ./tests/attacks \
84
+ --patterns-file ./custom_patterns.json
85
+ ```
86
+
87
+ The JSON file should contain either `{"patterns": [...]}` or a top-level array,
88
+ where each pattern has: `pattern_id`, `name`, `severity`, `confidence`, `kind`, `regex`.
89
+
90
+ ## Local demo checkpoint
91
+
92
+ ```bash
93
+ ./scripts/demo_local_audit.sh
94
+ ```
95
+
96
+ ## Real-time OTLP receiver
97
+
98
+ ```bash
99
+ canari forensics receive \
100
+ --host 0.0.0.0 \
101
+ --port 4318 \
102
+ --db ./canari-forensics.db
103
+ ```
104
+
105
+ Outputs:
106
+ - Scan JSON with normalized conversation turns
107
+ - Evidence JSON with findings and metadata
108
+ - PDF audit report for executive review
109
+ - `.bp.json` snapshots for BreakPoint CI workflows
@@ -0,0 +1,19 @@
1
+ """Canari Forensics package."""
2
+
3
+ from .models import ConversationTurn
4
+ from .parsers import DatabricksAIGatewayParser, OTELParser
5
+ from .receiver import OTLPReceiver
6
+ from .reporting import Finding, detect_findings
7
+ from .storage import SQLiteTurnStore
8
+ from .version import __version__
9
+
10
+ __all__ = [
11
+ "ConversationTurn",
12
+ "OTELParser",
13
+ "DatabricksAIGatewayParser",
14
+ "OTLPReceiver",
15
+ "SQLiteTurnStore",
16
+ "Finding",
17
+ "detect_findings",
18
+ "__version__",
19
+ ]
@@ -0,0 +1,4 @@
1
+ from canari_forensics.cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+
9
+ def file_sha256(path: str | Path) -> str:
10
+ p = Path(path)
11
+ h = hashlib.sha256()
12
+ with p.open("rb") as f:
13
+ for chunk in iter(lambda: f.read(65536), b""):
14
+ h.update(chunk)
15
+ return h.hexdigest()
16
+
17
+
18
+ def create_attestation(evidence_path: str | Path, out_path: str | Path) -> dict[str, Any]:
19
+ evidence = Path(evidence_path)
20
+ digest = file_sha256(evidence)
21
+ payload = {
22
+ "artifact": str(evidence),
23
+ "sha256": digest,
24
+ "type": "canari-evidence-attestation",
25
+ }
26
+
27
+ out = Path(out_path)
28
+ out.parent.mkdir(parents=True, exist_ok=True)
29
+ out.write_text(json.dumps(payload, indent=2), encoding="utf-8")
30
+ return payload
31
+
32
+
33
+ def verify_attestation(attestation_path: str | Path) -> bool:
34
+ data = json.loads(Path(attestation_path).read_text(encoding="utf-8"))
35
+ artifact = data["artifact"]
36
+ expected = data["sha256"]
37
+ actual = file_sha256(artifact)
38
+ return actual == expected
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from dataclasses import dataclass
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+
11
+ def _slug(value: str) -> str:
12
+ text = re.sub(r"[^a-zA-Z0-9]+", "-", value.strip().lower()).strip("-")
13
+ return text or "audit"
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class AuditPaths:
18
+ root: Path
19
+ metadata: Path
20
+ scan_report: Path
21
+ evidence: Path
22
+ pdf: Path
23
+ bp_dir: Path
24
+
25
+
26
+ class AuditManager:
27
+ def __init__(self, base_dir: str | Path = ".canari/audits") -> None:
28
+ self.base_dir = Path(base_dir)
29
+
30
+ def init_audit(
31
+ self,
32
+ name: str,
33
+ source: str,
34
+ provider: str,
35
+ logs: str | None,
36
+ experiment_id: str | None,
37
+ tracking_uri: str,
38
+ client: str,
39
+ application: str,
40
+ patterns_file: str | None = None,
41
+ ) -> AuditPaths:
42
+ audit_id = _slug(name)
43
+ root = self.base_dir / audit_id
44
+ root.mkdir(parents=True, exist_ok=True)
45
+ paths = self._paths(root)
46
+
47
+ payload: dict[str, Any] = {
48
+ "audit_id": audit_id,
49
+ "name": name,
50
+ "created_at": datetime.now(timezone.utc).isoformat(),
51
+ "source": source,
52
+ "provider": provider,
53
+ "logs": logs,
54
+ "experiment_id": experiment_id,
55
+ "tracking_uri": tracking_uri,
56
+ "client": client,
57
+ "application": application,
58
+ "patterns_file": patterns_file,
59
+ "scan_report": str(paths.scan_report),
60
+ "evidence": str(paths.evidence),
61
+ "pdf": str(paths.pdf),
62
+ "bp_dir": str(paths.bp_dir),
63
+ }
64
+ paths.metadata.write_text(json.dumps(payload, indent=2), encoding="utf-8")
65
+ return paths
66
+
67
+ def load_metadata(self, audit_id: str) -> dict[str, Any]:
68
+ root = self.base_dir / audit_id
69
+ paths = self._paths(root)
70
+ if not paths.metadata.exists():
71
+ raise FileNotFoundError(f"Audit not found: {audit_id}")
72
+ return json.loads(paths.metadata.read_text(encoding="utf-8"))
73
+
74
+ def _paths(self, root: Path) -> AuditPaths:
75
+ return AuditPaths(
76
+ root=root,
77
+ metadata=root / "audit.json",
78
+ scan_report=root / "scan-report.json",
79
+ evidence=root / "evidence.json",
80
+ pdf=root / "audit-report.pdf",
81
+ bp_dir=root / "bp-snapshots",
82
+ )