sigmaforge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sigmaforge/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
File without changes
@@ -0,0 +1,21 @@
1
+ from concurrent.futures import ProcessPoolExecutor
2
+
3
+ from sigmaforge.ingest.chunker import chunk_lines
4
+ from sigmaforge.records import MatchRecord
5
+
6
+
7
+ def aggregate(shard_results: list[list[MatchRecord]]) -> set[MatchRecord]:
8
+ merged: set[MatchRecord] = set()
9
+ for s in shard_results:
10
+ merged.update(s) # set union = order-independent, dedup across shard boundaries
11
+ return merged
12
+
13
+
14
+ def backtest(items, shard_size, workers, shard_fn) -> set[MatchRecord]:
15
+ shards = list(chunk_lines(items, shard_size))
16
+ if workers == 1:
17
+ results = [shard_fn(s) for s in shards]
18
+ else:
19
+ with ProcessPoolExecutor(max_workers=workers) as ex:
20
+ results = list(ex.map(shard_fn, shards))
21
+ return aggregate(results)
sigmaforge/banner.py ADDED
@@ -0,0 +1,15 @@
1
+ """sigmaforge banner — uses the Shipwright design system."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+ from shipwright_kit.design.banner import make_banner
8
+
9
+ from sigmaforge import __version__
10
+
11
+
12
+ def show_banner(*, quiet: bool = False) -> None:
13
+ if quiet or not sys.stderr.isatty():
14
+ return
15
+ print(make_banner("sigmaforge", __version__, "Honest Sigma-rule backtest harness"), file=sys.stderr)
sigmaforge/config.py ADDED
@@ -0,0 +1,34 @@
1
+ """sigmaforge configuration: ~/.sigmaforge/config.yaml + env > defaults."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ import yaml
9
+ from pydantic import BaseModel
10
+ from shipwright_kit.config import app_dir, load_config
11
+
12
+ _APP_DIR = app_dir("sigmaforge")
13
+
14
+
15
+ class OutputConfig(BaseModel):
16
+ default_format: str = "rich"
17
+
18
+
19
+ class AppConfig(BaseModel):
20
+ output: OutputConfig = OutputConfig()
21
+
22
+
23
+ def _load_yaml(path: Path) -> dict:
24
+ with open(path) as f:
25
+ return yaml.safe_load(f) or {}
26
+
27
+
28
+ def load(config_path: Optional[Path] = None) -> AppConfig:
29
+ """Resolve config: explicit > ~/.sigmaforge/config.yaml > ./config.yaml > defaults."""
30
+ return load_config(
31
+ [config_path, _APP_DIR / "config.yaml", Path("config.yaml")],
32
+ loader=_load_yaml,
33
+ validator=AppConfig.model_validate,
34
+ )
File without changes
@@ -0,0 +1,12 @@
1
+ def compare_loaded_intersection(z_hits, c_hits, z_loaded, c_loaded) -> dict:
2
+ """A6 cross-engine integrity: compare Zircolite vs Chainsaw ONLY over rules BOTH engines
3
+ loaded. Rules only one engine loaded are a load artifact, reported separately — never as
4
+ detection disagreement. z_hits/c_hits: dict[rule -> set[event_id]]."""
5
+ both = z_loaded & c_loaded
6
+ agree = {r for r in both if z_hits.get(r, set()) == c_hits.get(r, set())}
7
+ return {
8
+ "compared_rules": both,
9
+ "agree": agree,
10
+ "disagree": both - agree,
11
+ "load_artifact_only": z_loaded ^ c_loaded, # symmetric difference = load artifact
12
+ }
sigmaforge/detect.py ADDED
@@ -0,0 +1,42 @@
1
+ """Example parse boundary — classify an untrusted input string.
2
+
3
+ Framework input-contract rule: external/garbage input never raises;
4
+ unrecognized input returns "unknown". Replace with your real logic.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+
11
+ _IPV4 = re.compile(r"^(?:\d{1,3}\.){3}\d{1,3}$")
12
+ _HASH = re.compile(r"^[A-Fa-f0-9]{32,64}$")
13
+ _DOMAIN = re.compile(r"^(?=.{1,253}$)(?!-)[A-Za-z0-9-]{1,63}(?:\.[A-Za-z]{2,})+$")
14
+ # File extensions that look like TLDs but are not valid domains for our purposes.
15
+ _FILE_EXTS = re.compile(
16
+ r"\.(dll|exe|so|dylib|sys|bin|bat|cmd|sh|ps1"
17
+ r"|py|js|ts|rb|go|rs|c|cpp|h|java|class|jar|war"
18
+ r"|zip|tar|gz|bz2|xz|7z|rar"
19
+ r"|pdf|doc|docx|xls|xlsx|ppt|pptx"
20
+ r"|png|jpg|jpeg|gif|svg|ico|mp3|mp4|avi|mov|mkv"
21
+ r"|log|txt|csv|json|xml|yaml|yml|toml|ini|cfg|conf|env)$",
22
+ re.IGNORECASE,
23
+ )
24
+
25
+
26
+ def classify(value: object) -> str:
27
+ """Return a coarse type for value. Never raises; unknown -> "unknown"."""
28
+ if not isinstance(value, str):
29
+ return "unknown"
30
+ v = value.strip()
31
+ if not v:
32
+ return "unknown"
33
+ if _IPV4.match(v):
34
+ parts = v.split(".")
35
+ if all(p.isdigit() and 0 <= int(p) <= 255 for p in parts):
36
+ return "ipv4"
37
+ return "unknown"
38
+ if _HASH.match(v):
39
+ return "hash"
40
+ if _DOMAIN.match(v) and not _FILE_EXTS.search(v):
41
+ return "domain"
42
+ return "unknown"
File without changes
@@ -0,0 +1,22 @@
1
+ from typing import Iterator, Sequence, TypeVar
2
+
3
+ T = TypeVar("T")
4
+
5
+
6
+ def chunk_lines(items: Sequence[T], shard_size: int) -> Iterator[list[T]]:
7
+ """Partition items into chunks of shard_size.
8
+
9
+ Args:
10
+ items: Sequence to partition
11
+ shard_size: Size of each chunk (must be >= 1)
12
+
13
+ Yields:
14
+ Lists of items, each of size shard_size (except possibly the last chunk)
15
+
16
+ Raises:
17
+ ValueError: If shard_size < 1
18
+ """
19
+ if shard_size < 1:
20
+ raise ValueError("shard_size must be >= 1")
21
+ for i in range(0, len(items), shard_size):
22
+ yield list(items[i : i + shard_size])
@@ -0,0 +1,16 @@
1
+ def _is_stateful(rule: dict) -> bool:
2
+ if "correlation" in rule:
3
+ return True
4
+ cond = str(rule.get("detection", {}).get("condition", ""))
5
+ return any(tok in cond for tok in ("count(", "sum(", "avg(", "| near", "temporal"))
6
+
7
+
8
+ def select_by_level(rules: list[dict], levels: tuple[str, ...]) -> list[dict]:
9
+ return [r for r in rules if str(r.get("level", "")).lower() in levels]
10
+
11
+
12
+ def partition_rules(rules: list[dict], levels: tuple[str, ...] = ("high", "critical")) -> tuple[list[dict], list[dict]]:
13
+ in_scope = select_by_level(rules, levels)
14
+ loaded = [r for r in in_scope if not _is_stateful(r)]
15
+ excluded = [r for r in in_scope if _is_stateful(r)]
16
+ return loaded, excluded
@@ -0,0 +1,92 @@
1
+ import hashlib
2
+ import json
3
+ import subprocess
4
+ import tempfile
5
+
6
+ from sigmaforge.records import MatchRecord
7
+
8
+ ZIRCOLITE = ["uv", "run", "python", "Zircolite/zircolite.py"] # vendored 3.7.6
9
+
10
+
11
+ def _stable_event_id(row: dict) -> str:
12
+ """Globally-unique event key (fix C). EventRecordID is a PER-EVTX-FILE counter, so using it
13
+ alone collapses record 42 of fileA with record 42 of fileB across a multi-file attack run and
14
+ silently deflates recall. Hash the whole flattened row instead: it carries Computer/UtcTime/
15
+ Image/CommandLine/... which differ across files even when EventRecordID repeats. Two genuinely
16
+ identical events still hash-collapse — that is correct dedup, not a collision bug.
17
+ (On real data each row also carries Zircolite's autoincrement `row_id`, globally unique
18
+ across a single multi-file run, so real events never over-split. NB: if `--parallel`
19
+ ingestion is ever enabled, `row_id` resets per chunk — uniqueness then rests on the
20
+ content fields, UtcTime/ProcessGuid/etc., which the whole-row hash already includes.)"""
21
+ canonical = json.dumps(row, sort_keys=True, default=str)
22
+ return hashlib.sha1(canonical.encode()).hexdigest()
23
+
24
+
25
+ def parse_detections(
26
+ detections: list[dict],
27
+ corpus_label: str | None = None,
28
+ file_technique_map: dict[str, str] | None = None,
29
+ event_technique_out: dict[str, str] | None = None,
30
+ ) -> list[MatchRecord]:
31
+ """Parse Zircolite detections into MatchRecords.
32
+
33
+ FIX B: when ``file_technique_map`` (source-EVTX basename -> ATT&CK technique) and
34
+ ``event_technique_out`` are supplied, also populate ``event_technique_out`` mapping
35
+ each fired event's ``event_id`` -> its ground-truth technique. The technique is keyed
36
+ on the SAME identity the engine emits (``_stable_event_id``), so a fire and its
37
+ technique join correctly downstream. The source file is read from each match row's
38
+ ``OriginalLogfile`` (the EVTX basename, set by Zircolite's streaming flattener)."""
39
+ out: list[MatchRecord] = []
40
+ for d in detections:
41
+ for m in d.get("matches", []):
42
+ # benign COMISET rows carry the injected hash; native-EVTX rows do NOT -> derive a
43
+ # globally-unique key from the row (NOT bare EventRecordID, which collides across files).
44
+ eid = m.get("sigmaforge_eid") or _stable_event_id(m)
45
+ label = m.get("sigmaforge_label") or corpus_label or "benign"
46
+ out.append(MatchRecord(rule_id=d["title"], event_id=str(eid), event_label=label))
47
+ if file_technique_map is not None and event_technique_out is not None:
48
+ src = m.get("OriginalLogfile")
49
+ tech = file_technique_map.get(src) if src else None
50
+ if tech:
51
+ event_technique_out[str(eid)] = tech
52
+ return out
53
+
54
+
55
+ def run_shard(
56
+ events_path: str,
57
+ ruleset_glob: str,
58
+ mapping_path: str | None = None,
59
+ json_input: bool = True,
60
+ xml_input: bool = False,
61
+ corpus_label: str | None = None,
62
+ file_technique_map: dict[str, str] | None = None,
63
+ event_technique_out: dict[str, str] | None = None,
64
+ ) -> list[MatchRecord]:
65
+ """Run Zircolite over a shard and parse detections.
66
+
67
+ FIX B: pass ``file_technique_map`` + ``event_technique_out`` to also harvest the
68
+ per-event ground-truth technique (see ``parse_detections``).
69
+
70
+ FIX B3: ``xml_input=True`` ingests EVTX-converted-to-XML files (one wrapped
71
+ ``<Events>...</Events>`` document per file, Zircolite ``--xml-input``). Like the
72
+ native-EVTX path, each event's ``OriginalLogfile`` is set to the .xml basename,
73
+ so the same ``file_technique_map`` (basename -> (sub-)technique) recall join
74
+ applies. ``json_input`` and ``xml_input`` are mutually exclusive."""
75
+ if json_input and xml_input:
76
+ raise ValueError("json_input and xml_input are mutually exclusive")
77
+ out = tempfile.NamedTemporaryFile(suffix=".json", delete=False).name
78
+ cmd = [*ZIRCOLITE, "--events", events_path, "--ruleset", ruleset_glob, "--outfile", out]
79
+ if json_input:
80
+ cmd += ["--json-input"]
81
+ if xml_input:
82
+ cmd += ["--xml-input"]
83
+ if mapping_path:
84
+ cmd += ["--config", mapping_path]
85
+ subprocess.run(cmd, check=True, cwd="/Users/christianhuhn/PycharmProjects/ai_project1/sigmaforge")
86
+ with open(out) as fh:
87
+ return parse_detections(
88
+ json.load(fh),
89
+ corpus_label=corpus_label,
90
+ file_technique_map=file_technique_map,
91
+ event_technique_out=event_technique_out,
92
+ )
sigmaforge/main.py ADDED
@@ -0,0 +1,73 @@
1
+ """sigmaforge — CLI entry point."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from rich.console import Console
6
+ from shipwright_kit.cli import build_typer
7
+
8
+ from sigmaforge import __version__
9
+ from sigmaforge.detect import classify as classify_input
10
+
11
+ app = build_typer("sigmaforge", "Honest Sigma-rule backtest harness", version=__version__)
12
+ console = Console()
13
+
14
+
15
+ @app.command()
16
+ def classify(value: str) -> None:
17
+ """Classify an input string (example parse boundary)."""
18
+ console.print(classify_input(value))
19
+
20
+
21
+ @app.command()
22
+ def backtest(
23
+ rules: str,
24
+ attack: str,
25
+ benign: str,
26
+ out: str,
27
+ mapping: str = "data/mappings/comiset.yaml",
28
+ workers: int = 4,
29
+ min_events: int = 1000,
30
+ attack_events: int = 0,
31
+ ) -> None:
32
+ """Backtest Sigma rules: recall on the native-EVTX attack corpus, precision@COMISET on
33
+ the benign corpus. Writes the FP-tuning report to OUT. (Live end-to-end run; meaningful
34
+ precision numbers require the COMISET benign sample.)"""
35
+ import json
36
+ from pathlib import Path
37
+
38
+ import yaml
39
+
40
+ from sigmaforge.ingest.ruleload import partition_rules
41
+ from sigmaforge.ingest.zircolite_runner import run_shard
42
+ from sigmaforge.orchestrate import run_backtest
43
+
44
+ rule_docs = [
45
+ doc
46
+ for p in Path(rules).rglob("*.yml")
47
+ for doc in [yaml.safe_load(p.read_text())]
48
+ if isinstance(doc, dict) and doc.get("title")
49
+ ]
50
+ loaded, _excluded = partition_rules(rule_docs)
51
+ benign_events = [json.loads(line) for line in Path(benign).read_text().splitlines()]
52
+ attack_fires = set(run_shard(attack, rules, json_input=False, corpus_label="malicious"))
53
+ benign_fires = set(run_shard(benign, rules, mapping_path=mapping, json_input=True))
54
+ pc_fired = any(f.event_label == "malicious" for f in benign_fires)
55
+ _rows, _funnel, md = run_backtest(
56
+ loaded,
57
+ attack_fires,
58
+ benign_fires,
59
+ benign_events,
60
+ n_attack_events=attack_events, # attack-corpus event count = recall denominator (provide via --attack-events)
61
+ positive_control_fired=pc_fired,
62
+ min_events=min_events,
63
+ )
64
+ Path(out).write_text(md)
65
+ console.print(f"report written: {out}")
66
+
67
+
68
+ def main() -> None:
69
+ app()
70
+
71
+
72
+ if __name__ == "__main__":
73
+ main()
@@ -0,0 +1,147 @@
1
+ """Pure backtest orchestration (testable without live Zircolite).
2
+
3
+ Two-source scoring (EVTX-recall coherence):
4
+ - RECALL from the native-EVTX attack corpus (all-malicious). FIX B: PER-TECHNIQUE
5
+ recall — each rule is measured against only the attack events of its own ATT&CK
6
+ technique(s) (denom = events of that technique, not the whole corpus). A rule
7
+ with no technique tag, or whose technique has zero attack events, is
8
+ "unmeasured". When the per-technique inputs are not supplied the recall falls
9
+ back to the legacy POOLED denominator `tp_recall / n_attack_events`.
10
+ - PRECISION/FP from the COMISET benign corpus via the label-aware, deduping `score_rule`
11
+ (A3: a malicious-labelled hit in the benign corpus is a TP, not an FP; MAJOR-5: dedupe
12
+ per (rule_id, event_id)).
13
+ Precision flows ONLY through `emit_precision` (A2/A12) — no ungated raw precision is emitted.
14
+ """
15
+
16
+ from sigmaforge.records import MatchRecord
17
+ from sigmaforge.report.render import render_report
18
+ from sigmaforge.runmanifest import run_hash
19
+ from sigmaforge.score.acceptance import assert_one_source
20
+ from sigmaforge.score.adapter import score_rule
21
+ from sigmaforge.score.coverage import (
22
+ benign_events_evaluated_for_rule,
23
+ events_evaluated_for_rule,
24
+ selection_fields,
25
+ )
26
+ from sigmaforge.score.recall import UNMEASURED, per_technique_recall, rule_techniques
27
+ from sigmaforge.score.scorer import emit_precision
28
+
29
+
30
+ def run_backtest(
31
+ loaded_rules: list[dict],
32
+ attack_fires: set[MatchRecord], # from EVTX attack corpus (recall)
33
+ benign_fires: set[MatchRecord], # from COMISET benign corpus (precision)
34
+ benign_events: list[dict], # COMISET events, carry sigmaforge_label (for labels + coverage)
35
+ n_attack_events: int, # total attack-corpus events (legacy pooled recall denominator)
36
+ positive_control_fired: bool,
37
+ min_events: int,
38
+ source: str = "COMISET",
39
+ # FIX B (per-technique recall). All three must be supplied together to enable it;
40
+ # if any is None, recall falls back to the legacy pooled denominator.
41
+ event_technique: dict[str, str] | None = None, # event_id -> ATT&CK (sub-)technique
42
+ technique_event_counts: dict[str, int] | None = None, # technique -> total attack PC events
43
+ ) -> tuple[list[dict], dict, str]:
44
+ per_technique = event_technique is not None and technique_event_counts is not None
45
+ titles = {r["title"] for r in loaded_rules}
46
+ # benign-corpus label split (A3): malicious-labelled benign-corpus events are TP, not FP
47
+ n_ben_mal = sum(1 for e in benign_events if e.get("sigmaforge_label") == "malicious")
48
+ n_ben_ben = len(benign_events) - n_ben_mal
49
+
50
+ scores = []
51
+ recall_by_rule: dict[str, float | str] = {}
52
+ recall_meta_by_rule: dict[str, dict] = {}
53
+ benign_cov_by_rule: dict[str, int] = {}
54
+ for rule in loaded_rules:
55
+ rid = rule["title"]
56
+ fields = selection_fields(rule)
57
+ cov = events_evaluated_for_rule(benign_events, fields)
58
+ # BLOCKER-2: how many BENIGN-labelled events could have produced an FP at all
59
+ benign_cov_by_rule[rid] = benign_events_evaluated_for_rule(benign_events, fields)
60
+ # precision side: label-aware + dedupe via score_rule on the benign corpus
61
+ b = [f for f in benign_fires if f.rule_id == rid]
62
+ s = score_rule(rid, b, n_malicious=n_ben_mal, n_benign=n_ben_ben, events_evaluated=cov)
63
+ scores.append(s)
64
+ # recall side: unique malicious hits on the all-malicious attack corpus
65
+ fired_eids = {f.event_id for f in attack_fires if f.rule_id == rid and f.event_label == "malicious"}
66
+ if per_technique:
67
+ # FIX B: measure the rule against only the events of its own technique(s)
68
+ techs = rule_techniques(rule)
69
+ recall, numer, denom, measured = per_technique_recall(
70
+ rid, techs, fired_eids, event_technique, technique_event_counts
71
+ )
72
+ recall_by_rule[rid] = recall
73
+ recall_meta_by_rule[rid] = {
74
+ "techniques": sorted(techs),
75
+ "measured_techniques": measured,
76
+ "recall_numer": numer,
77
+ "recall_denom": denom,
78
+ "recall_measurable": recall != UNMEASURED,
79
+ }
80
+ else:
81
+ # legacy POOLED recall (fallback when per-technique inputs absent)
82
+ recall_by_rule[rid] = (len(fired_eids) / n_attack_events) if n_attack_events else 0.0
83
+ recall_meta_by_rule[rid] = {"recall_measurable": None}
84
+
85
+ precisions = emit_precision(scores, positive_control_fired, min_events) # the ONLY precision path
86
+ rows = []
87
+ for s in scores:
88
+ # BLOCKER-2 flag: a measured precision with zero benign exemplars carries NO FP signal
89
+ # (fp=0 is true by construction — no benign-labelled event matched the selection).
90
+ no_benign_exemplars = benign_cov_by_rule[s.rule_id] == 0
91
+ meta = recall_meta_by_rule[s.rule_id]
92
+ rows.append(
93
+ {
94
+ "rule": s.rule_id,
95
+ "recall": recall_by_rule[s.rule_id],
96
+ f"precision@{source}": precisions[s.rule_id],
97
+ "tp": s.tp,
98
+ "fp": s.fp,
99
+ "events_evaluated": s.events_evaluated,
100
+ "benign_events_evaluated": benign_cov_by_rule[s.rule_id],
101
+ "no_benign_exemplars": no_benign_exemplars,
102
+ # FIX B per-technique recall provenance (present even in pooled mode, with
103
+ # recall_measurable=None so the report can tell the two modes apart)
104
+ "recall_techniques": meta.get("techniques", []),
105
+ "recall_measured_techniques": meta.get("measured_techniques", []),
106
+ "recall_numer": meta.get("recall_numer"),
107
+ "recall_denom": meta.get("recall_denom"),
108
+ "recall_measurable": meta.get("recall_measurable"),
109
+ }
110
+ )
111
+ funnel = {
112
+ "candidate": len(loaded_rules),
113
+ "loaded": len(loaded_rules),
114
+ "stateless": len(loaded_rules),
115
+ "fires": len({s.rule_id for s in scores if s.tp or s.fp}),
116
+ "survives_fp": len([s for s in scores if s.fp == 0 and s.tp]),
117
+ }
118
+ # FIX H acceptance gate (reconcile-not-relabel): with a ONE-source ruleset
119
+ # (engine compiled from exactly the loaded set), every engine fire must be a
120
+ # loaded rule and engine fires must equal scored fires on BOTH corpora. The
121
+ # old code merely asserted scores ⊆ titles (always true, since scores are
122
+ # built from loaded_rules) and silently dropped engine fires outside `titles`
123
+ # (the 767->2 benign-side gap). This now raises on any such discrepancy.
124
+ assert all(s.rule_id in titles for s in scores)
125
+ assert_one_source(titles, attack_fires, benign_fires)
126
+ # A11: worker-invariant reproducibility stamp over the aggregated fire set
127
+ rh = run_hash(attack_fires | benign_fires)
128
+ if per_technique:
129
+ measurable = sum(1 for r in rows if r["recall_measurable"])
130
+ recall_note = (
131
+ "recall is **per-technique, sub-technique-granular** — each rule is measured against "
132
+ "only the attack events of its own ATT&CK (sub-)technique(s), NOT pooled over the whole "
133
+ "corpus and NOT diluted by sibling sub-techniques. A rule tagged `T1059.001` is scored "
134
+ "against `T1059.001` events ONLY (its `T1059.003` siblings are excluded); a rule with a "
135
+ "bare parent tag `T1059` covers all `T1059.*` children. Rules with no technique tag, or "
136
+ "whose tags match zero attack events in this corpus, are `unmeasured` (not 0). "
137
+ f"Recall-measurable rules: {measurable}/{len(rows)}."
138
+ )
139
+ else:
140
+ recall_note = (
141
+ "recall is **pooled** (tp / whole-corpus). Per-technique recall (FIX B) is not enabled for this run."
142
+ )
143
+ return (
144
+ rows,
145
+ funnel,
146
+ render_report(rows, funnel, source=source, min_events=min_events, run_hash=rh, recall_note=recall_note),
147
+ )
sigmaforge/records.py ADDED
@@ -0,0 +1,18 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass(frozen=True)
5
+ class MatchRecord:
6
+ rule_id: str
7
+ event_id: str
8
+ event_label: str # "benign" | "malicious"
9
+
10
+
11
+ @dataclass
12
+ class RuleScore:
13
+ rule_id: str
14
+ tp: int = 0
15
+ fp: int = 0
16
+ tn: int = 0
17
+ fn: int = 0
18
+ events_evaluated: int = 0
File without changes
@@ -0,0 +1,87 @@
1
+ def render_report(
2
+ rows: list[dict],
3
+ funnel: dict,
4
+ source: str = "COMISET",
5
+ min_events: int = 1000,
6
+ fp_tuning_threshold: int = 5,
7
+ run_hash: str | None = None,
8
+ corpus_note: str | None = None,
9
+ recall_note: str | None = None,
10
+ ) -> str:
11
+ """A10/A8: the deliverable. A human-readable FP-tuning report.
12
+ Leads with the corpus-scope + noisy-label caveat; precision labelled precision@SOURCE.
13
+ `corpus_note` MUST disclose the benign corpus composition when it is blended (A8 honesty).
14
+ `recall_note` (FIX B) discloses how recall is measured (per-technique vs pooled)."""
15
+ lines = [
16
+ f"# Sigmaforge backtest report ({source})",
17
+ "",
18
+ *([f"_run hash (worker-invariant): `{run_hash}`_", ""] if run_hash else []),
19
+ f"> Precision is **precision@{source}**, measured on the benign corpus described below "
20
+ f"— not a general/cross-environment false-positive rate. "
21
+ f"Labels are NOISY ground truth (rule-pattern attributions, e.g. OneDrive.exe tagged "
22
+ f"as an ATT&CK technique), so a measured FP may be a mislabel. Recall is measured on "
23
+ f"the labeled native-EVTX attack corpora over PROCESS-CREATION events only (the loaded "
24
+ f"ruleset is 100% process_creation). Precision floor: {min_events} evaluated events.",
25
+ *([f"> **Benign corpus composition:** {corpus_note}"] if corpus_note else []),
26
+ *([f"> **Recall method (FIX B):** {recall_note}"] if recall_note else []),
27
+ "",
28
+ "> **Precision tautology caveat (BLOCKER-2):** a rule showing precision = 1.0 with fp = 0 "
29
+ "is only trustworthy if benign-labelled events actually matched its selection. Rules whose "
30
+ "benign-corpus coverage held **zero benign exemplars** are flagged `no-benign-exemplars` "
31
+ "below: their fp = 0 is true *by construction*, so precision = 1.0 carries **no "
32
+ "false-positive signal** — it is not evidence of FP-resistance.",
33
+ "",
34
+ "## Funnel",
35
+ ]
36
+ for stage in ("candidate", "loaded", "stateless", "fires", "survives_fp"):
37
+ if stage in funnel:
38
+ lines.append(f"- {stage}: {funnel[stage]}")
39
+ lines += [
40
+ "",
41
+ "## Per-rule",
42
+ "",
43
+ "| rule | recall | precision@"
44
+ + source
45
+ + " | tp | fp | events_evaluated | benign_events_evaluated | precision_signal |",
46
+ "|---|---|---|---|---|---|---|---|",
47
+ ]
48
+ for r in rows:
49
+ prec = r.get(f"precision@{source}", "unmeasured")
50
+ # precision_signal: does the precision number carry any FP information?
51
+ if prec == "unmeasured":
52
+ signal = "n/a (unmeasured)"
53
+ elif r.get("no_benign_exemplars"):
54
+ signal = "NONE (no-benign-exemplars)"
55
+ else:
56
+ signal = "real"
57
+ lines.append(
58
+ f"| {r.get('rule')} | {r.get('recall')} | {prec} "
59
+ f"| {r.get('tp')} | {r.get('fp')} | {r.get('events_evaluated')} "
60
+ f"| {r.get('benign_events_evaluated', 'n/a')} | {signal} |"
61
+ )
62
+ # FP-tuning section: surface over-broad rules (the analyst-judgment deliverable)
63
+ noisy = [r for r in rows if isinstance(r.get("fp"), int) and r["fp"] >= fp_tuning_threshold]
64
+ lines += ["", "## FP-tuning candidates (over-broad on real traffic)"]
65
+ if noisy:
66
+ for r in sorted(noisy, key=lambda x: -x["fp"]):
67
+ lines.append(
68
+ f"- **{r.get('rule')}** catches the attack but fires {r['fp']}x on benign "
69
+ f"activity — candidate for tightening."
70
+ )
71
+ else:
72
+ lines.append("- none above threshold")
73
+
74
+ # BLOCKER-2: rules whose measured precision is tautological (no benign exemplars).
75
+ tautology = [r for r in rows if r.get("no_benign_exemplars") and isinstance(r.get(f"precision@{source}"), float)]
76
+ lines += ["", "## Precision tautologies (no benign exemplars — precision carries no FP signal)"]
77
+ if tautology:
78
+ for r in sorted(tautology, key=lambda x: str(x.get("rule"))):
79
+ lines.append(
80
+ f"- **{r.get('rule')}** reports precision@{source} = {r.get(f'precision@{source}')} "
81
+ f"with fp = {r.get('fp')}, but **0 benign-labelled events matched its selection** — "
82
+ f"fp = 0 is true by construction. No FP-resistance is demonstrated; precision is "
83
+ f"effectively unmeasured for FP purposes."
84
+ )
85
+ else:
86
+ lines.append("- none (every measured rule had at least one benign exemplar)")
87
+ return "\n".join(lines)
@@ -0,0 +1,23 @@
1
+ import hashlib
2
+ import json
3
+
4
+
5
+ def build_manifest(**kw) -> dict:
6
+ """Pin everything that determines a metric, for reproducibility (A4)."""
7
+ kw["level"] = list(kw.get("level", ()))
8
+ return dict(sorted(kw.items()))
9
+
10
+
11
+ def run_hash(aggregated_matches, workers=None) -> str:
12
+ """A4/A11: a stable hash of the aggregated (rule_id, event_id) set.
13
+ `workers` is intentionally NOT hashed — the metric must be worker-count invariant."""
14
+ payload = sorted(f"{r[0]}|{r[1]}" for r in _as_pairs(aggregated_matches))
15
+ return hashlib.sha256(json.dumps(payload).encode()).hexdigest()
16
+
17
+
18
+ def _as_pairs(matches):
19
+ for m in matches:
20
+ if isinstance(m, tuple):
21
+ yield m
22
+ else: # MatchRecord
23
+ yield (m.rule_id, m.event_id)
File without changes
@@ -0,0 +1,96 @@
1
+ """FIX H acceptance gate: reconcile-not-relabel.
2
+
3
+ The gap-review failure was a TWO-SOURCE join by title: the engine fired
4
+ Zircolite's bundled 2680-rule snapshot, the scorer kept only the ~609 loaded
5
+ SigmaHQ titles, so engine fires whose title was outside the 609 set were
6
+ SILENTLY dropped (benign_fires=767 -> funnel fires=2). FIX H compiles ONE
7
+ ruleset from exactly the loaded set, so the engine can only fire loaded rules.
8
+
9
+ This module asserts that invariant after a run, on EACH corpus:
10
+
11
+ 1. NO TITLE-DROP: every engine-fired rule_id is in the loaded title set.
12
+ (If a fire's title is not loaded, the engine ran a DIFFERENT ruleset than
13
+ the one scored — a real two-source regression, not something to suppress.)
14
+ 2. ENGINE == SCORED: the number of distinct (rule_id, event_id) fires the
15
+ engine produced equals the number the scorer actually counted. A gap here
16
+ means fires were dropped between firing and scoring.
17
+
18
+ A failure is a real discrepancy to SURFACE (raise), never relabel.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from dataclasses import dataclass
24
+
25
+ from sigmaforge.records import MatchRecord
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class GateResult:
30
+ corpus: str
31
+ engine_fires: int # distinct (rule_id, event_id) from the engine
32
+ scored_fires: int # distinct (rule_id, event_id) the scorer counted
33
+ dropped_titles: tuple[str, ...] # fired rule_ids NOT in the loaded set
34
+ ok: bool
35
+
36
+ def reason(self) -> str:
37
+ if self.ok:
38
+ return f"{self.corpus}: engine==scored ({self.engine_fires}), no title-drop"
39
+ parts = []
40
+ if self.dropped_titles:
41
+ parts.append(
42
+ f"{len(self.dropped_titles)} fired rule(s) outside the loaded set "
43
+ f"(title-drop / two-source skew): {list(self.dropped_titles)[:5]}"
44
+ + ("…" if len(self.dropped_titles) > 5 else "")
45
+ )
46
+ if self.engine_fires != self.scored_fires:
47
+ parts.append(f"engine fires ({self.engine_fires}) != scored fires ({self.scored_fires})")
48
+ return f"{self.corpus}: " + "; ".join(parts)
49
+
50
+
51
+ def check_corpus(
52
+ corpus: str,
53
+ engine_fires: set[MatchRecord] | list[MatchRecord],
54
+ loaded_titles: set[str],
55
+ ) -> GateResult:
56
+ """Compute the gate result for one corpus.
57
+
58
+ `scored_fires` mirrors what the scorer counts: distinct (rule_id, event_id)
59
+ whose rule_id is in `loaded_titles`. `engine_fires` is the raw distinct
60
+ (rule_id, event_id) the engine emitted. With a one-source ruleset these
61
+ must be equal AND no fired title may fall outside `loaded_titles`.
62
+ """
63
+ engine_pairs = {(f.rule_id, f.event_id) for f in engine_fires}
64
+ scored_pairs = {(rid, eid) for (rid, eid) in engine_pairs if rid in loaded_titles}
65
+ dropped = tuple(sorted({rid for (rid, _eid) in engine_pairs if rid not in loaded_titles}))
66
+ ok = (not dropped) and (len(engine_pairs) == len(scored_pairs))
67
+ return GateResult(
68
+ corpus=corpus,
69
+ engine_fires=len(engine_pairs),
70
+ scored_fires=len(scored_pairs),
71
+ dropped_titles=dropped,
72
+ ok=ok,
73
+ )
74
+
75
+
76
+ def assert_one_source(
77
+ loaded_titles: set[str],
78
+ attack_fires: set[MatchRecord] | list[MatchRecord],
79
+ benign_fires: set[MatchRecord] | list[MatchRecord],
80
+ ) -> list[GateResult]:
81
+ """Run the gate on BOTH corpora; raise on any discrepancy.
82
+
83
+ The 767->2 gap was benign-side, so the benign corpus MUST be checked too.
84
+ Returns the per-corpus results on success; raises AssertionError otherwise.
85
+ """
86
+ results = [
87
+ check_corpus("attack", attack_fires, loaded_titles),
88
+ check_corpus("benign", benign_fires, loaded_titles),
89
+ ]
90
+ failures = [r for r in results if not r.ok]
91
+ if failures:
92
+ raise AssertionError(
93
+ "FIX H acceptance gate FAILED (engine ruleset and scored ruleset disagree):\n"
94
+ + "\n".join(" - " + r.reason() for r in failures)
95
+ )
96
+ return results
@@ -0,0 +1,19 @@
1
+ from sigmaforge.records import MatchRecord, RuleScore
2
+
3
+
4
+ def score_rule(
5
+ rule_id: str,
6
+ fires: list[MatchRecord],
7
+ n_malicious: int,
8
+ n_benign: int,
9
+ events_evaluated: int,
10
+ ) -> RuleScore:
11
+ # Zircolite concatenates filtered_rows across a rule's sigma_queries, so the same
12
+ # event can appear multiple times in `matches`. Dedupe per (rule_id, event_id) BEFORE
13
+ # counting, or tp/fp inflate past n_malicious/n_benign and tn/fn go negative.
14
+ unique = {(f.rule_id, f.event_id): f for f in fires}.values()
15
+ tp = sum(1 for f in unique if f.event_label == "malicious")
16
+ fp = sum(1 for f in unique if f.event_label == "benign")
17
+ fn = max(0, n_malicious - tp)
18
+ tn = max(0, n_benign - fp)
19
+ return RuleScore(rule_id, tp=tp, fp=fp, tn=tn, fn=fn, events_evaluated=events_evaluated)
@@ -0,0 +1,28 @@
1
+ def events_evaluated_for_rule(events: list[dict], selection_fields: set[str]) -> int:
2
+ """A2 coverage counter: how many events actually carry ALL of a rule's selection fields
3
+ (present + non-empty). Distinguishes 'low FP' from 'rule never ran'."""
4
+ return sum(1 for e in events if all(e.get(f) not in (None, "") for f in selection_fields))
5
+
6
+
7
+ def benign_events_evaluated_for_rule(events: list[dict], selection_fields: set[str]) -> int:
8
+ """BLOCKER-2 precision-tautology guard: coverage restricted to BENIGN-labelled events.
9
+
10
+ A rule whose precision is 1.0 with fp=0 carries NO false-positive signal if zero
11
+ benign-labelled events ever carried its selection fields — there was no benign exemplar
12
+ that *could* have produced an FP. This counts the benign exemplars a rule was actually
13
+ exposed to, so the report can flag tautological precision honestly."""
14
+ benign = (e for e in events if e.get("sigmaforge_label") != "malicious")
15
+ return sum(1 for e in benign if all(e.get(f) not in (None, "") for f in selection_fields))
16
+
17
+
18
+ def selection_fields(rule: dict) -> set[str]:
19
+ """Extract the Sigma field names a rule's detection.selection* blocks reference.
20
+ Field names may carry Sigma modifiers (e.g. 'CommandLine|contains') -> strip at the pipe."""
21
+ fields: set[str] = set()
22
+ detection = rule.get("detection", {})
23
+ for key, block in detection.items():
24
+ if key == "condition" or not isinstance(block, dict):
25
+ continue
26
+ for field in block:
27
+ fields.add(str(field).split("|", 1)[0])
28
+ return fields
@@ -0,0 +1,21 @@
1
+ from sigmaforge.records import RuleScore
2
+
3
+
4
+ def precision_or_unmeasured(s: RuleScore, min_events: int):
5
+ """A12 floor + A2 coverage: precision only when enough events were actually evaluated."""
6
+ if s.events_evaluated < min_events:
7
+ return "unmeasured"
8
+ denom = s.tp + s.fp
9
+ return s.tp / denom if denom else "unmeasured"
10
+
11
+
12
+ def positive_control_ok(rule_fired: bool) -> bool:
13
+ """A2: the pinned known-malicious control event MUST fire before any precision is trusted.
14
+ If it does not fire, the field mapping/logsource is broken -> precision is unmeasurable."""
15
+ return rule_fired
16
+
17
+
18
+ def overfit_flag(fires_original: bool, fires_mutated: bool) -> bool:
19
+ """A2 overfit guard: a behavioural rule fires on both the original and the literal-IOC-mutated
20
+ twin; a literal-string (IOC) rule fires only on the original. True = overfit (literal-only)."""
21
+ return fires_original and not fires_mutated
@@ -0,0 +1,123 @@
1
+ """Per-technique recall (FIX B), sub-technique-granular (FIX B2).
2
+
3
+ The pooled recall `tp / n_attack_events` divides every rule's hits by the WHOLE
4
+ process-creation attack corpus. A single-technique rule can match at most the
5
+ events of its own technique, so a corpus-wide denominator caps its recall near
6
+ zero by construction (a recall-side tautology). FIX B measures each rule against
7
+ only the attack events of the rule's own ATT&CK technique(s).
8
+
9
+ FIX B2 — SUB-TECHNIQUE-GRANULAR scoping (no sibling dilution): technique IDs are
10
+ kept at full sub-technique granularity on BOTH sides (a rule tagged
11
+ ``attack.t1059.001`` yields ``T1059.001``; the corpus event keeps ``T1059.001``).
12
+ An attack event of technique X counts toward rule R's recall iff (ASYMMETRIC rule):
13
+
14
+ * X is EXACTLY one of R's (sub-)technique tags
15
+ (a T1059.001 rule ↔ T1059.001 events ONLY — NOT its T1059.003 siblings), OR
16
+ * R carries a BARE parent tag (T1059 with no sub-technique) and X is ANY child
17
+ of it (T1059.* or bare T1059) — a generic rule legitimately covers the whole
18
+ technique.
19
+
20
+ A T1059.001 rule is therefore NEVER scored against T1059.003 events (the sibling
21
+ dilution that inflated the denominator and deflated recall). A bare-T1059 rule IS
22
+ scored against all T1059.* events. Then:
23
+
24
+ denom = | attack events matching R per the rule above |
25
+ numer = | unique such events R fired on |
26
+ recall = numer / denom
27
+
28
+ A rule with no usable technique tag, or whose tags match ZERO attack events in
29
+ the corpus, is "unmeasured" (the sentinel string ``"unmeasured"``), NOT 0 and NOT
30
+ pooled — there is simply no matching event to measure it against in this corpus.
31
+
32
+ Identity contract: ``event_technique`` keys on the SAME event identity the
33
+ engine emits (``MatchRecord.event_id`` == ``_stable_event_id``), so a fire and
34
+ its ground-truth technique join correctly.
35
+ """
36
+
37
+ import re
38
+
39
+ UNMEASURED = "unmeasured"
40
+
41
+ # Keep declared tag granularity: attack.t1059.001 -> T1059.001; bare attack.t1059 -> T1059.
42
+ _TECH_TAG = re.compile(r"^attack\.(t\d{4}(?:\.\d{3})?)$", re.IGNORECASE)
43
+
44
+
45
+ def rule_techniques(rule: dict) -> set[str]:
46
+ """ATT&CK technique IDs from a Sigma rule's tags, at the DECLARED granularity.
47
+
48
+ FIX B2: a rule tagged ``attack.t1059.001`` yields ``{"T1059.001"}`` (kept at
49
+ sub-technique granularity, NOT folded to T1059); a rule tagged bare
50
+ ``attack.t1059`` yields ``{"T1059"}``. Returns an empty set when no usable
51
+ technique tag is present.
52
+ """
53
+ out: set[str] = set()
54
+ for tag in rule.get("tags") or []:
55
+ m = _TECH_TAG.match(str(tag))
56
+ if m:
57
+ out.add(m.group(1).upper())
58
+ return out
59
+
60
+
61
+ def _event_matches_rule(event_tech: str | None, techniques: set[str]) -> bool:
62
+ """ASYMMETRIC match (FIX B2): does an event of ``event_tech`` count for a rule
63
+ whose technique set is ``techniques``?
64
+
65
+ - exact (sub-)technique match: ``T1059.001`` event ↔ ``T1059.001`` rule tag, OR
66
+ - bare-parent rule covers all children: a rule tagged bare ``T1059`` matches
67
+ any ``T1059`` or ``T1059.*`` event.
68
+ A sub-technique rule (``T1059.001``) does NOT match a sibling (``T1059.003``)
69
+ nor the bare parent's other children.
70
+ """
71
+ if event_tech is None:
72
+ return False
73
+ if event_tech in techniques:
74
+ return True
75
+ parent = event_tech.split(".", 1)[0]
76
+ return parent in techniques # bare-parent rule tag covers this child
77
+
78
+
79
+ def _technique_event_count_for_rule(
80
+ techniques: set[str], technique_event_counts: dict[str, int]
81
+ ) -> tuple[int, list[str]]:
82
+ """denom + the corpus technique IDs that contribute to it, per the asymmetric rule.
83
+
84
+ A bare-parent tag (T1059) absorbs every corpus T1059 / T1059.* bucket; a
85
+ sub-technique tag (T1059.001) absorbs only the exact T1059.001 bucket.
86
+ """
87
+ contributing: dict[str, int] = {}
88
+ for corpus_tech, count in technique_event_counts.items():
89
+ if count > 0 and _event_matches_rule(corpus_tech, techniques):
90
+ contributing[corpus_tech] = count
91
+ denom = sum(contributing.values())
92
+ return denom, sorted(contributing)
93
+
94
+
95
+ def per_technique_recall(
96
+ rule_id: str,
97
+ techniques: set[str],
98
+ fired_event_ids: set[str],
99
+ event_technique: dict[str, str],
100
+ technique_event_counts: dict[str, int],
101
+ ) -> tuple[float | str, int, int, list[str]]:
102
+ """Return (recall, numer, denom, measured_techniques).
103
+
104
+ - ``techniques``: the rule's (sub-)technique set (may be empty -> unmeasured).
105
+ - ``fired_event_ids``: unique attack event_ids the rule fired on.
106
+ - ``event_technique``: event_id -> technique (ground truth, sub-technique-granular).
107
+ - ``technique_event_counts``: technique -> total attack PC events of that technique.
108
+
109
+ recall is ``UNMEASURED`` when the rule has no technique tag OR no corpus
110
+ technique matches its tags per the asymmetric rule (denom == 0). Otherwise
111
+ numer/denom, where numer counts only fires that land on an event whose
112
+ technique matches the rule (a fire on an off-technique / sibling event does
113
+ NOT count toward this rule's recall).
114
+ """
115
+ if not techniques:
116
+ return UNMEASURED, 0, 0, []
117
+
118
+ denom, measured = _technique_event_count_for_rule(techniques, technique_event_counts)
119
+ if denom == 0:
120
+ return UNMEASURED, 0, 0, []
121
+
122
+ numer = sum(1 for eid in fired_event_ids if _event_matches_rule(event_technique.get(eid), techniques))
123
+ return numer / denom, numer, denom, measured
@@ -0,0 +1,23 @@
1
+ from shipwright_kit.eval import EvalResult
2
+
3
+ from sigmaforge.records import RuleScore
4
+ from sigmaforge.score.gates import positive_control_ok, precision_or_unmeasured
5
+
6
+
7
+ def metrics(s: RuleScore) -> dict:
8
+ # A9: reuse the validated shipwright_kit.eval math; do not re-derive precision/recall/fpr.
9
+ r = EvalResult(tp=s.tp, fp=s.fp, tn=s.tn, fn=s.fn)
10
+ return {
11
+ "precision": r.precision,
12
+ "recall": r.recall,
13
+ "fpr": r.false_positive_rate,
14
+ "f1": r.f1,
15
+ }
16
+
17
+
18
+ def emit_precision(scores: list[RuleScore], positive_control_fired: bool, min_events: int) -> dict:
19
+ """A2/A12: the ONLY sanctioned precision path. If the positive control did not fire (mapping
20
+ broken), NO rule gets a precision number. Otherwise each rule is floor-gated per coverage."""
21
+ if not positive_control_ok(positive_control_fired):
22
+ return {s.rule_id: "unmeasured" for s in scores}
23
+ return {s.rule_id: precision_or_unmeasured(s, min_events) for s in scores}
@@ -0,0 +1,121 @@
1
+ Metadata-Version: 2.4
2
+ Name: sigmaforge
3
+ Version: 0.1.0
4
+ Summary: Honest Sigma-rule backtest harness
5
+ Author-email: Christian Huhn <duathron@gmail.com>
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.11
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: typer>=0.12.0
11
+ Requires-Dist: rich>=13.7.0
12
+ Requires-Dist: pydantic>=2.7.0
13
+ Requires-Dist: shipwright-kit>=0.8.0
14
+ Dynamic: license-file
15
+
16
+ # sigmaforge
17
+
18
+ **Honest Sigma-rule backtest harness.** Measures detection rules against real log
19
+ corpora and reports two numbers per rule — **recall** (does it catch attacks of its
20
+ ATT&CK sub-technique?) and **precision / false-positives** (does it fire on benign
21
+ activity?) — with honesty gates that return `unmeasured` instead of a fake `0` or a
22
+ tautological `1.0` when the data can't support a number.
23
+
24
+ [![CI](https://github.com/duathron/sigmaforge/actions/workflows/ci.yml/badge.svg)](https://github.com/duathron/sigmaforge/actions/workflows/ci.yml)
25
+ ![python](https://img.shields.io/badge/python-3.11%2B-blue)
26
+ ![license](https://img.shields.io/badge/license-MIT-green)
27
+
28
+ > [!NOTE]
29
+ > **Learning / portfolio project, built by directing AI coding agents.** Christian
30
+ > Huhn (photography → SOC career change) designed, reviewed, and gated the work; the
31
+ > implementation was AI-pair-programmed. It is an honest measurement harness, not a
32
+ > polished product — see *Status* below for exactly what works and what doesn't yet.
33
+
34
+ ## What problem it solves
35
+
36
+ Every SOC ships dozens to hundreds of detection rules and rarely measures them.
37
+ sigmaforge answers two questions with reproducible evidence:
38
+
39
+ - **Which rules are noise generators?** (high false-positives on legitimate activity)
40
+ - **Which rules catch nothing?** (zero recall against real attacks of their technique)
41
+
42
+ Example finding from a real run: *Suspicious Windows Service Tampering* produced 66
43
+ false-positives on a benign corpus — every one a Ninite / TeamViewer installer, not
44
+ an attack.
45
+
46
+ ## How it actually works
47
+
48
+ ```mermaid
49
+ flowchart LR
50
+ R[SigmaHQ rules] -->|partition high/critical| C[compile to one Zircolite ruleset]
51
+ C --> E[Zircolite engine]
52
+ A[(attack corpus<br/>sub-technique-labeled)] --> E
53
+ B[(benign corpus<br/>Nextron + OpTC)] --> E
54
+ E --> S[score: recall per technique<br/>+ precision/FP label-aware]
55
+ S --> G[honesty gates<br/>floor · positive-control · no-self-review]
56
+ G --> O[report.md + manifest]
57
+ ```
58
+
59
+ The real pipeline is **script-driven** (`scripts/run6_backtest.py` is the current
60
+ end-to-end path):
61
+
62
+ ```bash
63
+ uv run python scripts/compile_loaded_ruleset.py # rules -> one Zircolite ruleset
64
+ uv run python scripts/run6_backtest.py # backtest -> reports/run6.md
65
+ ```
66
+
67
+ > [!WARNING]
68
+ > The shipped CLI (`sigmaforge backtest`) is a **weaker, work-in-progress path** and
69
+ > is not the way the real reports were produced. Use the scripts above. The CLI is
70
+ > kept for the future one-command experience, not parity.
71
+
72
+ ## Status
73
+
74
+ | Area | State |
75
+ |------|-------|
76
+ | Recall (per sub-technique, no sibling dilution) | **Working** — 338/609 rules measurable, 70 fire (run5) |
77
+ | Precision / false-positives (label-aware, gated) | **Working** — 7/609 measurable on current benign corpus (run6) |
78
+ | Honesty gates (floor, positive-control, no-self-review) | **Working** |
79
+ | Reproducible manifest (run_hash, corpus SHAs, provenance) | **Working** |
80
+ | One-command CLI (`sigmaforge backtest`) | **WIP** — weaker than the scripts |
81
+ | Self-generated benign corpus | **Kit ready** (`scripts/selfgen/`), needs a Windows VM run |
82
+
83
+ > [!IMPORTANT]
84
+ > **The log corpora are not shipped.** They are large, separately licensed, and
85
+ > gitignored. `pip install sigmaforge` installs the harness code, not the data — a
86
+ > full end-to-end backtest needs the corpora and a local Zircolite checkout (also not
87
+ > bundled). The package is useful as a library / reference; the runnable pipeline
88
+ > needs the local setup documented in `scripts/`.
89
+
90
+ ## Install
91
+
92
+ ```bash
93
+ pip install sigmaforge
94
+ ```
95
+
96
+ Installs the harness package and the `sigmaforge` CLI. The detection engine
97
+ ([Zircolite](https://github.com/wagga40/Zircolite)) and the log corpora are obtained
98
+ separately (see above).
99
+
100
+ ## Corpora used (all verified, portfolio-safe licenses)
101
+
102
+ | Corpus | Role | License |
103
+ |--------|------|---------|
104
+ | [splunk/attack_data](https://github.com/splunk/attack_data) | recall (sub-technique-labeled attacks) | Apache-2.0 |
105
+ | [DARPA OpTC](https://github.com/FiveDirections/OpTC-data) | precision (real enterprise benign week) | Public domain |
106
+ | [NextronSystems/evtx-baseline](https://github.com/NextronSystems/evtx-baseline) | precision (goodware baseline) | Apache-2.0 |
107
+ | Self-generated (`scripts/selfgen/`) | precision (targeted admin/LOLBin noise) | your own lab |
108
+
109
+ ## Development
110
+
111
+ Built with the [Shipwright](https://github.com/duathron/shipwright) dev framework.
112
+
113
+ ```bash
114
+ uv sync --dev
115
+ uv run pytest # 108 tests
116
+ uv run ruff check .
117
+ ```
118
+
119
+ ## License
120
+
121
+ MIT © Christian Huhn. Corpus data retains its upstream license (see table above).
@@ -0,0 +1,31 @@
1
+ sigmaforge/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
2
+ sigmaforge/banner.py,sha256=esg3wB57cukVZ8guPCnG3KApopi4BXgyMgKRnx1DXpQ,413
3
+ sigmaforge/config.py,sha256=6l67AKgYb2h7lb3CnwiTivw9HFi47V8EZdH1_lZDBZw,863
4
+ sigmaforge/detect.py,sha256=D5diyPAXSQknxBVkenvOCyRgzAJ7J5D5EEYnZqFy60M,1401
5
+ sigmaforge/main.py,sha256=lJ7smiZ-SCeNuJuTKPRHhwuZ_mrMNVsmOl7Lw5mOrEk,2263
6
+ sigmaforge/orchestrate.py,sha256=Y93RmJMMVSt4bHMmEB4UkTxsYJZgDEE2Rnqo93E-UOw,7964
7
+ sigmaforge/records.py,sha256=L0lWV1IYOYoexd1rWohrVwbQEdmOpSkwEEV85cua1Kk,302
8
+ sigmaforge/runmanifest.py,sha256=omfAjgkHaVA_g45TJNb-BkpPiKDPyCJ4vGviRRjwlrE,759
9
+ sigmaforge/backtest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ sigmaforge/backtest/runner.py,sha256=_dEkj8m-G67NMC7G7nE7trIs4Z30VyX8i7QJ3RaQvXM,741
11
+ sigmaforge/crosscheck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ sigmaforge/crosscheck/chainsaw.py,sha256=VVzgnMbNLIURvqrWKMxRNwdJotDh91-19N7BTQskaIs,649
13
+ sigmaforge/ingest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ sigmaforge/ingest/chunker.py,sha256=Pn82EGIqAJhMj29VWHcoY7-RlU7nGgqhTm65qOC0ny0,615
15
+ sigmaforge/ingest/ruleload.py,sha256=3s6VB4ScbfZ-RYVJiTmWJf2ec-eSyG_06xd1EjBL8F4,706
16
+ sigmaforge/ingest/zircolite_runner.py,sha256=eHP9ndwDZ-J4tNCmEjPEOqaA1w6UxzffDeSoZJ0pnUc,4526
17
+ sigmaforge/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ sigmaforge/report/render.py,sha256=5Z_0V4j4NTiit7UB757PO6NVH5GZJqLT12gG48I5_wg,4535
19
+ sigmaforge/score/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ sigmaforge/score/acceptance.py,sha256=Ck9aA5C-_L8HfXoCVrOHu_Pj9_ePoIEf0JTbrINPPXM,3923
21
+ sigmaforge/score/adapter.py,sha256=cvb6z0fMZTjxSk0pERuaySwB5j42JrOQL6R-E7iRA-g,811
22
+ sigmaforge/score/coverage.py,sha256=WDi-sS6uoL6NHvLGs_8n6w5qe7R3uzUQmFrTYX0Gxt0,1576
23
+ sigmaforge/score/gates.py,sha256=htzBavPzv5Bg0XJqJnYFSptnCGZv0sN1cehunWK9Rvw,924
24
+ sigmaforge/score/recall.py,sha256=URqTfP6sYFce2SwNvR0KsO_IuNx7ploiDtSSBtq_Ao0,5427
25
+ sigmaforge/score/scorer.py,sha256=vpDS04LOA8d9Hfsx5LxyEv15Gdl4DPchm-FYqLHLLT4,980
26
+ sigmaforge-0.1.0.dist-info/licenses/LICENSE,sha256=BVf5pqest078hZ5byAbwbvGWuPUYGwdeNp7gnRaJebU,1071
27
+ sigmaforge-0.1.0.dist-info/METADATA,sha256=YnsPfuCAO1bJ7-e4uHXjWhj9X1INjeMq3ac0Knu_O_M,5021
28
+ sigmaforge-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
29
+ sigmaforge-0.1.0.dist-info/entry_points.txt,sha256=-mfqUFO_dOojOBqMCxSDn521tzCBKZZqNHItaHIqkME,52
30
+ sigmaforge-0.1.0.dist-info/top_level.txt,sha256=6rhsK1MwVzIwuB9v78C9W4kNivGlm2Lh4W0FFH7gbJc,11
31
+ sigmaforge-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ sigmaforge = sigmaforge.main:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Christian Huhn
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ sigmaforge