crprotocol 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. crp/__init__.py +126 -0
  2. crp/__main__.py +8 -0
  3. crp/_typing.py +27 -0
  4. crp/_version.py +5 -0
  5. crp/adapters.py +31 -0
  6. crp/advanced/__init__.py +40 -0
  7. crp/advanced/auto_ingest.py +400 -0
  8. crp/advanced/cqs.py +235 -0
  9. crp/advanced/cross_window.py +477 -0
  10. crp/advanced/curator.py +265 -0
  11. crp/advanced/feedback.py +146 -0
  12. crp/advanced/hierarchical.py +211 -0
  13. crp/advanced/meta_learning.py +401 -0
  14. crp/advanced/parallel.py +98 -0
  15. crp/advanced/review_cycle.py +329 -0
  16. crp/advanced/scale_mode.py +129 -0
  17. crp/advanced/source_grounding.py +207 -0
  18. crp/ckf/__init__.py +35 -0
  19. crp/ckf/community.py +377 -0
  20. crp/ckf/fabric.py +445 -0
  21. crp/ckf/gc.py +175 -0
  22. crp/ckf/graph_walk.py +87 -0
  23. crp/ckf/merge.py +133 -0
  24. crp/ckf/pattern_query.py +122 -0
  25. crp/ckf/pubsub.py +128 -0
  26. crp/ckf/semantic.py +207 -0
  27. crp/cli/__init__.py +7 -0
  28. crp/cli/main.py +329 -0
  29. crp/cli/sidecar.py +929 -0
  30. crp/cli/startup.py +272 -0
  31. crp/continuation/__init__.py +103 -0
  32. crp/continuation/completion.py +348 -0
  33. crp/continuation/degradation.py +157 -0
  34. crp/continuation/document_map.py +160 -0
  35. crp/continuation/flow.py +109 -0
  36. crp/continuation/gap.py +419 -0
  37. crp/continuation/manager.py +484 -0
  38. crp/continuation/quality_monitor.py +179 -0
  39. crp/continuation/stitch.py +419 -0
  40. crp/continuation/trigger.py +142 -0
  41. crp/continuation/voice.py +157 -0
  42. crp/core/__init__.py +69 -0
  43. crp/core/batch.py +77 -0
  44. crp/core/circuit_breaker.py +116 -0
  45. crp/core/config.py +377 -0
  46. crp/core/context_tools.py +540 -0
  47. crp/core/dispatch_router.py +3977 -0
  48. crp/core/errors.py +128 -0
  49. crp/core/extraction_facade.py +384 -0
  50. crp/core/facilitator.py +713 -0
  51. crp/core/idempotency.py +215 -0
  52. crp/core/orchestrator.py +1435 -0
  53. crp/core/relay_strategies.py +613 -0
  54. crp/core/security_manager.py +140 -0
  55. crp/core/session.py +134 -0
  56. crp/core/task_intent.py +36 -0
  57. crp/core/window.py +363 -0
  58. crp/envelope/__init__.py +30 -0
  59. crp/envelope/builder.py +288 -0
  60. crp/envelope/decomposer.py +236 -0
  61. crp/envelope/formatter.py +168 -0
  62. crp/envelope/packer.py +211 -0
  63. crp/envelope/reranker.py +209 -0
  64. crp/envelope/scoring.py +310 -0
  65. crp/extraction/__init__.py +45 -0
  66. crp/extraction/complexity.py +96 -0
  67. crp/extraction/contradiction.py +132 -0
  68. crp/extraction/pipeline.py +360 -0
  69. crp/extraction/quality_gate.py +237 -0
  70. crp/extraction/stage1_regex.py +173 -0
  71. crp/extraction/stage2_statistical.py +244 -0
  72. crp/extraction/stage3_gliner.py +210 -0
  73. crp/extraction/stage4_uie.py +183 -0
  74. crp/extraction/stage5_discourse.py +175 -0
  75. crp/extraction/stage6_llm.py +178 -0
  76. crp/extraction/structured_output.py +219 -0
  77. crp/extraction/types.py +299 -0
  78. crp/license_guard.py +722 -0
  79. crp/observability/__init__.py +30 -0
  80. crp/observability/audit.py +118 -0
  81. crp/observability/events.py +233 -0
  82. crp/observability/metrics.py +264 -0
  83. crp/observability/quality.py +135 -0
  84. crp/observability/structured_logging.py +81 -0
  85. crp/observability/telemetry.py +117 -0
  86. crp/provenance/__init__.py +314 -0
  87. crp/provenance/_embeddings.py +97 -0
  88. crp/provenance/_types.py +378 -0
  89. crp/provenance/attribution_scorer.py +252 -0
  90. crp/provenance/claim_detector.py +229 -0
  91. crp/provenance/contradiction_detector.py +243 -0
  92. crp/provenance/distortion_detector.py +397 -0
  93. crp/provenance/entailment_verifier.py +358 -0
  94. crp/provenance/fabrication_detector.py +203 -0
  95. crp/provenance/hallucination_scorer.py +320 -0
  96. crp/provenance/omission_analyzer.py +106 -0
  97. crp/provenance/provenance_chain.py +205 -0
  98. crp/provenance/report_generator.py +440 -0
  99. crp/providers/__init__.py +43 -0
  100. crp/providers/anthropic.py +270 -0
  101. crp/providers/base.py +135 -0
  102. crp/providers/custom.py +63 -0
  103. crp/providers/diagnostic.py +251 -0
  104. crp/providers/llamacpp.py +224 -0
  105. crp/providers/manager.py +139 -0
  106. crp/providers/ollama.py +243 -0
  107. crp/providers/openai.py +628 -0
  108. crp/providers/tokenizers.py +48 -0
  109. crp/py.typed +0 -0
  110. crp/resources/__init__.py +53 -0
  111. crp/resources/adaptive_allocator.py +525 -0
  112. crp/resources/cost_model.py +388 -0
  113. crp/resources/overhead_manager.py +217 -0
  114. crp/resources/resource_manager.py +262 -0
  115. crp/schemas/__init__.py +20 -0
  116. crp/schemas/cost-estimate.json +33 -0
  117. crp/schemas/crp-error.json +43 -0
  118. crp/schemas/envelope-preview.json +40 -0
  119. crp/schemas/persisted-state-header.json +27 -0
  120. crp/schemas/quality-report.json +94 -0
  121. crp/schemas/session-handle.json +33 -0
  122. crp/schemas/session-status.json +57 -0
  123. crp/schemas/stream-event.json +18 -0
  124. crp/schemas/task-intent.json +42 -0
  125. crp/security/__init__.py +93 -0
  126. crp/security/audit_trail.py +392 -0
  127. crp/security/binding.py +192 -0
  128. crp/security/compliance.py +813 -0
  129. crp/security/consent.py +593 -0
  130. crp/security/embedding_defense.py +161 -0
  131. crp/security/encryption.py +202 -0
  132. crp/security/injection.py +335 -0
  133. crp/security/integrity.py +267 -0
  134. crp/security/privacy.py +662 -0
  135. crp/security/quarantine.py +249 -0
  136. crp/security/rbac.py +221 -0
  137. crp/security/validation.py +164 -0
  138. crp/state/__init__.py +31 -0
  139. crp/state/cold_storage.py +258 -0
  140. crp/state/compaction.py +263 -0
  141. crp/state/critical_state.py +104 -0
  142. crp/state/event_log.py +313 -0
  143. crp/state/fact.py +189 -0
  144. crp/state/serialization.py +189 -0
  145. crp/state/session_cleanup.py +77 -0
  146. crp/state/snapshot.py +290 -0
  147. crp/state/warm_store.py +346 -0
  148. crprotocol-2.0.0.dist-info/METADATA +1295 -0
  149. crprotocol-2.0.0.dist-info/RECORD +153 -0
  150. crprotocol-2.0.0.dist-info/WHEEL +4 -0
  151. crprotocol-2.0.0.dist-info/entry_points.txt +2 -0
  152. crprotocol-2.0.0.dist-info/licenses/LICENSE.md +170 -0
  153. crprotocol-2.0.0.dist-info/licenses/NOTICE +18 -0
@@ -0,0 +1,219 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """Structured output handling — schema/grammar enforcement (§06 §6.9, 2J).
4
+
5
+ Supports: Outlines FSM, GBNF grammar, logit masking, fallback JSON repair.
6
+ All integrations are optional — graceful fallback if libraries unavailable.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import logging
13
+ import re
14
+ from typing import Any
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # JSON repair (always available — no external deps)
21
+ # ---------------------------------------------------------------------------
22
+
23
+ def repair_json(raw: str) -> str | None:
24
+ """Best-effort repair of malformed JSON.
25
+
26
+ Handles: trailing commas, unquoted keys, single quotes, truncated output.
27
+ Returns the repaired JSON string, or None if unrecoverable.
28
+ """
29
+ # Strip markdown fences
30
+ cleaned = re.sub(r"```(?:json)?\s*", "", raw).strip()
31
+ cleaned = re.sub(r"```\s*$", "", cleaned).strip()
32
+
33
+ # Try direct parse first
34
+ try:
35
+ json.loads(cleaned)
36
+ return cleaned
37
+ except (json.JSONDecodeError, ValueError):
38
+ pass
39
+
40
+ # Fix single quotes → double quotes (naive — handles simple cases)
41
+ attempt = cleaned.replace("'", '"')
42
+ try:
43
+ json.loads(attempt)
44
+ return attempt
45
+ except (json.JSONDecodeError, ValueError):
46
+ pass
47
+
48
+ # Remove trailing commas before } or ]
49
+ attempt = re.sub(r",\s*([}\]])", r"\1", attempt)
50
+ try:
51
+ json.loads(attempt)
52
+ return attempt
53
+ except (json.JSONDecodeError, ValueError):
54
+ pass
55
+
56
+ # Try closing truncated output
57
+ open_braces = attempt.count("{") - attempt.count("}")
58
+ open_brackets = attempt.count("[") - attempt.count("]")
59
+ if open_braces > 0 or open_brackets > 0:
60
+ attempt += "}" * max(open_braces, 0)
61
+ attempt += "]" * max(open_brackets, 0)
62
+ try:
63
+ json.loads(attempt)
64
+ return attempt
65
+ except (json.JSONDecodeError, ValueError):
66
+ pass
67
+
68
+ return None
69
+
70
+
71
+ def validate_json_schema(data: Any, schema: dict[str, Any]) -> list[str]:
72
+ """Validate *data* against JSON Schema. Returns list of error messages."""
73
+ try:
74
+ import jsonschema # type: ignore[import-untyped]
75
+
76
+ validator = jsonschema.Draft7Validator(schema)
77
+ return [e.message for e in validator.iter_errors(data)]
78
+ except ImportError:
79
+ # jsonschema not installed — skip validation
80
+ return []
81
+
82
+
83
+ # ---------------------------------------------------------------------------
84
+ # Outlines FSM integration (optional)
85
+ # ---------------------------------------------------------------------------
86
+
87
+ class OutlinesFSMHandler:
88
+ """Outlines-based constrained generation via finite state machine."""
89
+
90
+ def __init__(self) -> None:
91
+ self._available: bool | None = None
92
+
93
+ @property
94
+ def is_available(self) -> bool:
95
+ if self._available is None:
96
+ try:
97
+ import outlines # type: ignore[import-untyped] # noqa: F401
98
+
99
+ self._available = True
100
+ except ImportError:
101
+ self._available = False
102
+ return self._available
103
+
104
+ def build_guide(self, schema: dict[str, Any]) -> Any:
105
+ """Build an Outlines JSON guide from a JSON Schema."""
106
+ if not self.is_available:
107
+ return None
108
+ try:
109
+ from outlines.generate import json as outlines_json # type: ignore[import-untyped]
110
+
111
+ return outlines_json(schema)
112
+ except Exception:
113
+ logger.warning("Failed to build Outlines guide")
114
+ return None
115
+
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # GBNF grammar support (for llama.cpp providers)
119
+ # ---------------------------------------------------------------------------
120
+
121
+ def json_schema_to_gbnf(schema: dict[str, Any]) -> str | None:
122
+ """Convert a simple JSON Schema to GBNF grammar string.
123
+
124
+ Handles flat object schemas with string/number/boolean/array properties.
125
+ Complex nested schemas require the full llama.cpp grammar converter.
126
+ """
127
+ props = schema.get("properties", {})
128
+ if not props:
129
+ return None
130
+
131
+ rules: list[str] = ['root ::= "{" ws']
132
+ prop_rules: list[str] = []
133
+
134
+ for i, (name, prop_schema) in enumerate(props.items()):
135
+ ptype = prop_schema.get("type", "string")
136
+ sep = ', "' if i > 0 else '"'
137
+ type_rule = _type_to_gbnf(ptype, name)
138
+ prop_rules.append(f'{sep}{name}": ' + type_rule)
139
+
140
+ rules.append(" ".join(prop_rules))
141
+ rules.append('ws "}"')
142
+ rules.append('ws ::= [ \\t\\n]*')
143
+ rules.append('string ::= "\\"" [^"\\\\]* "\\""')
144
+ rules.append('number ::= "-"? [0-9]+ ("." [0-9]+)?')
145
+ rules.append('boolean ::= "true" | "false"')
146
+
147
+ return "\n".join(rules)
148
+
149
+
150
+ def _type_to_gbnf(json_type: str, _name: str) -> str:
151
+ mapping = {
152
+ "string": "ws string",
153
+ "number": "ws number",
154
+ "integer": "ws number",
155
+ "boolean": "ws boolean",
156
+ }
157
+ return mapping.get(json_type, "ws string")
158
+
159
+
160
+ # ---------------------------------------------------------------------------
161
+ # Composite handler
162
+ # ---------------------------------------------------------------------------
163
+
164
+ class StructuredOutputHandler:
165
+ """Orchestrates structured-output enforcement.
166
+
167
+ Priority order:
168
+ 1. Outlines FSM (if available and provider supports it)
169
+ 2. GBNF grammar (if provider is llama.cpp compatible)
170
+ 3. Logit masking (if provider supports token-level constraints)
171
+ 4. Fallback: post-hoc JSON repair + validation
172
+ """
173
+
174
+ def __init__(self) -> None:
175
+ self._outlines = OutlinesFSMHandler()
176
+
177
+ @property
178
+ def outlines_available(self) -> bool:
179
+ return self._outlines.is_available
180
+
181
+ def enforce(
182
+ self,
183
+ raw_output: str,
184
+ schema: dict[str, Any] | None = None,
185
+ ) -> tuple[Any | None, list[str]]:
186
+ """Attempt to parse and validate *raw_output* against *schema*.
187
+
188
+ Returns ``(parsed_data, errors)`` where *errors* is empty on success.
189
+ """
190
+ if schema is None:
191
+ # No schema — just try to parse as JSON
192
+ try:
193
+ return json.loads(raw_output), []
194
+ except (json.JSONDecodeError, ValueError):
195
+ repaired = repair_json(raw_output)
196
+ if repaired is not None:
197
+ return json.loads(repaired), ["json_repaired"]
198
+ return None, ["json_parse_failed"]
199
+
200
+ # Try direct parse
201
+ try:
202
+ data = json.loads(raw_output)
203
+ except (json.JSONDecodeError, ValueError):
204
+ repaired = repair_json(raw_output)
205
+ if repaired is None:
206
+ return None, ["json_parse_failed"]
207
+ data = json.loads(repaired)
208
+
209
+ # Validate against schema
210
+ errors = validate_json_schema(data, schema)
211
+ return data, errors
212
+
213
+ def build_gbnf(self, schema: dict[str, Any]) -> str | None:
214
+ """Build a GBNF grammar string for llama.cpp providers."""
215
+ return json_schema_to_gbnf(schema)
216
+
217
+ def build_outlines_guide(self, schema: dict[str, Any]) -> Any:
218
+ """Build an Outlines FSM guide (returns None if unavailable)."""
219
+ return self._outlines.build_guide(schema)
@@ -0,0 +1,299 @@
1
+ # Copyright © 2025 Constantinos Vidiniotis. All rights reserved.
2
+ # Licensed under Elastic License 2.0 — see LICENSE.md for details.
3
+ """Extraction pipeline data types — Fact, FactEdge, FactGraph, ExtractionResult."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import time
8
+ import uuid
9
+ from dataclasses import dataclass, field
10
+ from enum import Enum
11
+ from typing import Any
12
+
13
+
14
+ class ContentType(str, Enum):
15
+ """Content complexity classification for pipeline routing."""
16
+
17
+ ENTITY_RICH = "ENTITY_RICH"
18
+ REASONING_DENSE = "REASONING_DENSE"
19
+ NARRATIVE = "NARRATIVE"
20
+
21
+
22
+ class RelationType(str, Enum):
23
+ """Semantic relation types for FactEdge records."""
24
+
25
+ CONDITION_FOR = "CONDITION_FOR"
26
+ CAUSE_EFFECT = "CAUSE_EFFECT"
27
+ CONTRAST = "CONTRAST"
28
+ CONCESSION = "CONCESSION"
29
+ CONSEQUENCE = "CONSEQUENCE"
30
+ ELABORATION = "ELABORATION"
31
+ SEQUENCE = "SEQUENCE"
32
+ RELATED = "RELATED"
33
+
34
+
35
+ @dataclass
36
+ class Fact:
37
+ """Single extracted fact from the extraction pipeline.
38
+
39
+ Lightweight record — embedding is computed lazily in Phase 4 (state layer).
40
+ """
41
+
42
+ # Metadata size limits (§audit M4)
43
+ MAX_METADATA_KEYS: int = 64
44
+ MAX_KEY_LENGTH: int = 128
45
+ MAX_VALUE_SIZE: int = 4096
46
+
47
+ id: str = field(default_factory=lambda: str(uuid.uuid4()))
48
+ text: str = ""
49
+ category: str = ""
50
+ source_window_id: str = ""
51
+ confidence: float = 0.0
52
+ extraction_stage: int = 0
53
+ created_at: float = field(default_factory=time.time)
54
+ metadata: dict[str, Any] = field(default_factory=dict)
55
+
56
+ # Quality gate flags (set by post-extraction validation)
57
+ flagged_confidence: bool = False
58
+ confidence_flag_reason: str = ""
59
+
60
+ # Supersession (set by contradiction detection)
61
+ superseded_by: str | None = None
62
+ supersession_confidence: float = 0.0
63
+
64
+ def validate_metadata(self) -> None:
65
+ """Enforce metadata size limits (§audit M4).
66
+
67
+ Raises ValueError if metadata exceeds configured bounds.
68
+ """
69
+ if len(self.metadata) > self.MAX_METADATA_KEYS:
70
+ raise ValueError(
71
+ f"Fact metadata exceeds {self.MAX_METADATA_KEYS} keys "
72
+ f"(got {len(self.metadata)})"
73
+ )
74
+ for key, value in self.metadata.items():
75
+ if len(str(key)) > self.MAX_KEY_LENGTH:
76
+ raise ValueError(
77
+ f"Metadata key exceeds {self.MAX_KEY_LENGTH} chars: {str(key)[:50]}..."
78
+ )
79
+ val_str = str(value)
80
+ if len(val_str) > self.MAX_VALUE_SIZE:
81
+ raise ValueError(
82
+ f"Metadata value for '{key}' exceeds {self.MAX_VALUE_SIZE} chars "
83
+ f"(got {len(val_str)})"
84
+ )
85
+
86
+ def set_metadata(self, key: str, value: Any) -> None:
87
+ """Set a metadata key with size validation."""
88
+ if len(str(key)) > self.MAX_KEY_LENGTH:
89
+ raise ValueError(f"Metadata key exceeds {self.MAX_KEY_LENGTH} chars")
90
+ if len(str(value)) > self.MAX_VALUE_SIZE:
91
+ raise ValueError(f"Metadata value exceeds {self.MAX_VALUE_SIZE} chars")
92
+ if key not in self.metadata and len(self.metadata) >= self.MAX_METADATA_KEYS:
93
+ raise ValueError(f"Metadata exceeds {self.MAX_METADATA_KEYS} keys limit")
94
+ self.metadata[key] = value
95
+
96
+
97
+ @dataclass
98
+ class FactEdge:
99
+ """Directed relation between two facts or text spans."""
100
+
101
+ id: str = field(default_factory=lambda: str(uuid.uuid4()))
102
+ source_id: str = ""
103
+ target_id: str = ""
104
+ relation_type: RelationType | str = RelationType.RELATED
105
+ confidence: float = 0.0
106
+ source_stage: int = 0
107
+ metadata: dict[str, Any] = field(default_factory=dict)
108
+
109
+
110
+ @dataclass
111
+ class FactGraph:
112
+ """In-memory graph of facts and edges."""
113
+
114
+ nodes: dict[str, Fact] = field(default_factory=dict)
115
+ edges: list[FactEdge] = field(default_factory=list)
116
+ # Edge indices for O(1) lookup (§audit L4)
117
+ _edges_from: dict[str, list[FactEdge]] = field(default_factory=lambda: {})
118
+ _edges_to: dict[str, list[FactEdge]] = field(default_factory=lambda: {})
119
+
120
+ def add_fact(self, fact: Fact) -> None:
121
+ self.nodes[fact.id] = fact
122
+
123
+ def remove_fact(self, fact_id: str) -> None:
124
+ """Remove a fact and all its edges from the graph (§audit2 STATE-H5)."""
125
+ self.nodes.pop(fact_id, None)
126
+ # Remove edges referencing this fact
127
+ self.edges = [e for e in self.edges
128
+ if e.source_id != fact_id and e.target_id != fact_id]
129
+ # Clean edge indices
130
+ self._edges_from.pop(fact_id, None)
131
+ self._edges_to.pop(fact_id, None)
132
+ # Remove from other nodes' index entries
133
+ for idx in (self._edges_from, self._edges_to):
134
+ for key in list(idx):
135
+ idx[key] = [e for e in idx[key]
136
+ if e.source_id != fact_id and e.target_id != fact_id]
137
+ if not idx[key]:
138
+ del idx[key]
139
+
140
+ def add_edge(self, edge: FactEdge) -> None:
141
+ # Skip edges referencing non-existent facts (§audit G7)
142
+ if edge.source_id not in self.nodes or edge.target_id not in self.nodes:
143
+ return
144
+ self.edges.append(edge)
145
+ # Maintain O(1) edge indices (§audit L4)
146
+ self._edges_from.setdefault(edge.source_id, []).append(edge)
147
+ self._edges_to.setdefault(edge.target_id, []).append(edge)
148
+
149
+ def edges_from(self, fact_id: str) -> list[FactEdge]:
150
+ return list(self._edges_from.get(fact_id, []))
151
+
152
+ def edges_to(self, fact_id: str) -> list[FactEdge]:
153
+ return list(self._edges_to.get(fact_id, []))
154
+
155
+ def subgraph_for(self, fact_ids: set[str], max_hops: int = 1) -> FactGraph:
156
+ """Return subgraph containing *fact_ids* plus neighbours within *max_hops*."""
157
+ visited: set[str] = set(fact_ids)
158
+ frontier = set(fact_ids)
159
+ for _ in range(max_hops):
160
+ next_frontier: set[str] = set()
161
+ for fid in frontier:
162
+ for e in self._edges_from.get(fid, []):
163
+ if e.target_id not in visited:
164
+ next_frontier.add(e.target_id)
165
+ for e in self._edges_to.get(fid, []):
166
+ if e.source_id not in visited:
167
+ next_frontier.add(e.source_id)
168
+ visited |= next_frontier
169
+ frontier = next_frontier
170
+ sub_nodes = {fid: self.nodes[fid] for fid in visited if fid in self.nodes}
171
+ sub_edges = [e for e in self.edges if e.source_id in visited and e.target_id in visited]
172
+ sub = FactGraph(nodes=sub_nodes, edges=sub_edges)
173
+ # Rebuild edge indices for the subgraph (§audit2 STATE-H4)
174
+ for e in sub_edges:
175
+ sub._edges_from.setdefault(e.source_id, []).append(e)
176
+ sub._edges_to.setdefault(e.target_id, []).append(e)
177
+ return sub
178
+
179
+ def serialize_for_envelope(self) -> str:
180
+ """Plain-text serialisation for envelope packing."""
181
+ lines: list[str] = []
182
+ for fid, fact in self.nodes.items():
183
+ lines.append(f"- {fact.text}")
184
+ for edge in self.edges_from(fid):
185
+ target = self.nodes.get(edge.target_id)
186
+ if target:
187
+ rel = edge.relation_type
188
+ if isinstance(rel, RelationType):
189
+ rel = rel.value
190
+ lines.append(f" ↳ [{rel}] {target.text}")
191
+ return "\n".join(lines)
192
+
193
+
194
+ class ValidationSeverity(str, Enum):
195
+ LOW = "low"
196
+ MEDIUM = "medium"
197
+ HIGH = "high"
198
+
199
+
200
+ @dataclass
201
+ class ValidationIssue:
202
+ """Single issue found by the quality gate."""
203
+
204
+ type: str = ""
205
+ severity: ValidationSeverity = ValidationSeverity.LOW
206
+ detail: str = ""
207
+
208
+
209
+ @dataclass
210
+ class ValidationResult:
211
+ """Result from one quality-gate tier."""
212
+
213
+ tier: int = 0
214
+ passed: bool = True
215
+ issues: list[ValidationIssue] = field(default_factory=list)
216
+
217
+
218
+ @dataclass
219
+ class Contradiction:
220
+ """A detected contradiction between two facts."""
221
+
222
+ fact_a: Fact | None = None
223
+ fact_b: Fact | None = None
224
+ similarity: float = 0.0
225
+ content_diff: float = 0.0
226
+ confidence: float = 0.0
227
+
228
+
229
+ @dataclass
230
+ class FactEvent:
231
+ """Immutable audit-log entry for fact lifecycle events."""
232
+
233
+ event_id: int = 0
234
+ timestamp: float = field(default_factory=time.time)
235
+ window_id: str = ""
236
+ event_type: str = "" # "created" | "superseded" | "compacted" | "archived" | "restored"
237
+ fact_id: str = ""
238
+ payload: dict[str, Any] = field(default_factory=dict)
239
+
240
+
241
+ @dataclass
242
+ class ExtractionResult:
243
+ """Complete extraction result from the graduated pipeline."""
244
+
245
+ extraction_id: str = field(default_factory=lambda: str(uuid.uuid4()))
246
+ source_window_id: str = ""
247
+ timestamp: float = field(default_factory=time.time)
248
+
249
+ # Extracted data
250
+ facts: list[Fact] = field(default_factory=list)
251
+ edges: list[FactEdge] = field(default_factory=list)
252
+ fact_graph: FactGraph = field(default_factory=FactGraph)
253
+
254
+ # Pipeline execution
255
+ stages_run: list[int] = field(default_factory=list)
256
+ stages_skipped: list[int] = field(default_factory=list)
257
+ total_extraction_latency_ms: float = 0.0
258
+ per_stage_latency: dict[int, float] = field(default_factory=dict)
259
+
260
+ # Quality metrics
261
+ total_facts: int = 0
262
+ total_edges: int = 0
263
+ average_confidence: float = 0.0
264
+ entity_density: float = 0.0
265
+ relation_density: float = 0.0
266
+
267
+ # Content classification
268
+ content_type: ContentType = ContentType.NARRATIVE
269
+ discourse_markers_found: int = 0
270
+
271
+ # Pipeline state (for self-calibration)
272
+ stage_yields: dict[int, int] = field(default_factory=dict)
273
+ escalation_triggers: list[str] = field(default_factory=list)
274
+
275
+ # Quality gate
276
+ quality_gate_passed: bool = True
277
+ quality_issues: list[str] = field(default_factory=list)
278
+
279
+ # Normalization
280
+ facts_after_normalization: int = 0
281
+
282
+ @property
283
+ def success(self) -> bool:
284
+ return self.quality_gate_passed
285
+
286
+ def finalize(self) -> None:
287
+ """Compute aggregate metrics from facts/edges lists."""
288
+ self.total_facts = len(self.facts)
289
+ self.total_edges = len(self.edges)
290
+ if self.facts:
291
+ self.average_confidence = sum(f.confidence for f in self.facts) / len(self.facts)
292
+ self.relation_density = self.total_edges / max(self.total_facts, 1)
293
+ self.facts_after_normalization = self.total_facts
294
+ # Build graph
295
+ self.fact_graph = FactGraph()
296
+ for f in self.facts:
297
+ self.fact_graph.add_fact(f)
298
+ for e in self.edges:
299
+ self.fact_graph.add_edge(e)