hegelion 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hegelion/__init__.py +45 -0
- hegelion/core/__init__.py +29 -0
- hegelion/core/agent.py +166 -0
- hegelion/core/autocoding_state.py +293 -0
- hegelion/core/backends.py +442 -0
- hegelion/core/cache.py +92 -0
- hegelion/core/config.py +276 -0
- hegelion/core/core.py +649 -0
- hegelion/core/engine.py +865 -0
- hegelion/core/logging_utils.py +67 -0
- hegelion/core/models.py +293 -0
- hegelion/core/parsing.py +271 -0
- hegelion/core/personas.py +81 -0
- hegelion/core/prompt_autocoding.py +353 -0
- hegelion/core/prompt_dialectic.py +414 -0
- hegelion/core/prompts.py +127 -0
- hegelion/core/schema.py +67 -0
- hegelion/core/validation.py +68 -0
- hegelion/council.py +254 -0
- hegelion/examples_data/__init__.py +6 -0
- hegelion/examples_data/glm4_6_examples.jsonl +2 -0
- hegelion/judge.py +230 -0
- hegelion/mcp/__init__.py +3 -0
- hegelion/mcp/server.py +918 -0
- hegelion/scripts/hegelion_agent_cli.py +90 -0
- hegelion/scripts/hegelion_bench.py +117 -0
- hegelion/scripts/hegelion_cli.py +497 -0
- hegelion/scripts/hegelion_dataset.py +99 -0
- hegelion/scripts/hegelion_eval.py +137 -0
- hegelion/scripts/mcp_setup.py +150 -0
- hegelion/search_providers.py +151 -0
- hegelion/training/__init__.py +7 -0
- hegelion/training/datasets.py +123 -0
- hegelion/training/generator.py +232 -0
- hegelion/training/mlx_scu_trainer.py +379 -0
- hegelion/training/mlx_trainer.py +181 -0
- hegelion/training/unsloth_trainer.py +136 -0
- hegelion-0.4.0.dist-info/METADATA +295 -0
- hegelion-0.4.0.dist-info/RECORD +43 -0
- hegelion-0.4.0.dist-info/WHEEL +5 -0
- hegelion-0.4.0.dist-info/entry_points.txt +8 -0
- hegelion-0.4.0.dist-info/licenses/LICENSE +21 -0
- hegelion-0.4.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Structured logging utilities for Hegelion."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
from typing import Any, Dict
|
|
10
|
+
|
|
11
|
+
# Configure logging level from environment
|
|
12
|
+
LOG_LEVEL = os.getenv("HEGELION_LOG_LEVEL", "WARNING").upper()
|
|
13
|
+
|
|
14
|
+
# Create logger
|
|
15
|
+
logger = logging.getLogger("hegelion")
|
|
16
|
+
logger.setLevel(getattr(logging, LOG_LEVEL, logging.WARNING))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# JSON formatter for structured logs
|
|
20
|
+
class JSONFormatter(logging.Formatter):
|
|
21
|
+
"""Format log records as JSON for structured logging."""
|
|
22
|
+
|
|
23
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
24
|
+
log_data: Dict[str, Any] = {
|
|
25
|
+
"timestamp": self.formatTime(record, self.datefmt),
|
|
26
|
+
"level": record.levelname,
|
|
27
|
+
"logger": record.name,
|
|
28
|
+
"message": record.getMessage(),
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
# Add exception info if present
|
|
32
|
+
if record.exc_info:
|
|
33
|
+
log_data["exception"] = self.formatException(record.exc_info)
|
|
34
|
+
|
|
35
|
+
# Add extra fields from record
|
|
36
|
+
if hasattr(record, "extra_fields"):
|
|
37
|
+
log_data.update(record.extra_fields)
|
|
38
|
+
|
|
39
|
+
return json.dumps(log_data)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Setup handler if not already configured
|
|
43
|
+
if not logger.handlers:
|
|
44
|
+
handler = logging.StreamHandler(sys.stderr)
|
|
45
|
+
handler.setFormatter(JSONFormatter())
|
|
46
|
+
logger.addHandler(handler)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def log_phase(phase: str, **kwargs: Any) -> None:
|
|
50
|
+
"""Log a dialectical phase with structured data."""
|
|
51
|
+
extra = {"extra_fields": {"phase": phase, **kwargs}}
|
|
52
|
+
logger.info(f"Phase: {phase}", extra=extra)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def log_error(error_type: str, message: str, **kwargs: Any) -> None:
|
|
56
|
+
"""Log an error with structured data."""
|
|
57
|
+
extra = {"extra_fields": {"error_type": error_type, **kwargs}}
|
|
58
|
+
logger.error(message, extra=extra)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def log_metric(metric_name: str, value: Any, **kwargs: Any) -> None:
|
|
62
|
+
"""Log a metric with structured data."""
|
|
63
|
+
extra = {"extra_fields": {"metric": metric_name, "value": value, **kwargs}}
|
|
64
|
+
logger.debug(f"Metric: {metric_name}={value}", extra=extra)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
__all__ = ["logger", "log_phase", "log_error", "log_metric"]
|
hegelion/core/models.py
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""Data models for Hegelion dialectical reasoning results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ValidationError(Exception):
|
|
10
|
+
"""Raised when validation fails."""
|
|
11
|
+
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ContradictionResult:
|
|
17
|
+
"""A structured contradiction extracted during antithesis."""
|
|
18
|
+
|
|
19
|
+
description: str
|
|
20
|
+
evidence: Optional[str] = None
|
|
21
|
+
|
|
22
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
23
|
+
"""Convert to dictionary format."""
|
|
24
|
+
result = {"description": self.description}
|
|
25
|
+
if self.evidence:
|
|
26
|
+
result["evidence"] = self.evidence
|
|
27
|
+
return result
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class ResearchProposal:
|
|
32
|
+
"""A research proposal extracted during synthesis."""
|
|
33
|
+
|
|
34
|
+
description: str
|
|
35
|
+
testable_prediction: Optional[str] = None
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
38
|
+
"""Convert to dictionary format."""
|
|
39
|
+
result = {"description": self.description}
|
|
40
|
+
if self.testable_prediction:
|
|
41
|
+
result["testable_prediction"] = self.testable_prediction
|
|
42
|
+
return result
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class HegelionResult:
|
|
47
|
+
"""
|
|
48
|
+
Main result object for Hegelion dialectical reasoning.
|
|
49
|
+
|
|
50
|
+
This is the public API output that excludes internal conflict scoring.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
query: str = None # Default to None to fail validation if missing (as per corrected tests)
|
|
54
|
+
mode: str = "synthesis"
|
|
55
|
+
thesis: str = ""
|
|
56
|
+
antithesis: str = ""
|
|
57
|
+
synthesis: str = ""
|
|
58
|
+
contradictions: List[Dict[str, Any]] = None
|
|
59
|
+
research_proposals: List[Dict[str, Any]] = None
|
|
60
|
+
metadata: Dict[str, Any] = None
|
|
61
|
+
trace: Optional[Dict[str, Any]] = None # Full trace including raw LLM calls
|
|
62
|
+
timestamp: Optional[str] = None
|
|
63
|
+
validation_score: Optional[float] = None
|
|
64
|
+
|
|
65
|
+
def __post_init__(self):
|
|
66
|
+
if self.contradictions is None:
|
|
67
|
+
self.contradictions = []
|
|
68
|
+
if self.research_proposals is None:
|
|
69
|
+
self.research_proposals = []
|
|
70
|
+
# metadata defaults to None as per tests
|
|
71
|
+
|
|
72
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
73
|
+
"""Convert to dictionary for JSON serialization."""
|
|
74
|
+
metadata_val = self.metadata
|
|
75
|
+
if hasattr(metadata_val, "to_dict"):
|
|
76
|
+
metadata_val = metadata_val.to_dict()
|
|
77
|
+
|
|
78
|
+
result = {
|
|
79
|
+
"query": self.query,
|
|
80
|
+
"mode": self.mode,
|
|
81
|
+
"thesis": self.thesis,
|
|
82
|
+
"antithesis": self.antithesis,
|
|
83
|
+
"synthesis": self.synthesis,
|
|
84
|
+
"contradictions": self.contradictions,
|
|
85
|
+
"research_proposals": self.research_proposals,
|
|
86
|
+
"metadata": metadata_val,
|
|
87
|
+
}
|
|
88
|
+
if self.trace is not None:
|
|
89
|
+
result["trace"] = self.trace
|
|
90
|
+
return result
|
|
91
|
+
|
|
92
|
+
def model_dump(self) -> Dict[str, Any]:
|
|
93
|
+
"""Alias for to_dict to satisfy Pydantic-style tests."""
|
|
94
|
+
data = self.to_dict()
|
|
95
|
+
data["timestamp"] = self.timestamp
|
|
96
|
+
data["validation_score"] = self.validation_score
|
|
97
|
+
return data
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def from_dict(cls, data: Dict[str, Any]) -> HegelionResult:
|
|
101
|
+
"""Create a HegelionResult from a dictionary."""
|
|
102
|
+
metadata_data = data.get("metadata", {})
|
|
103
|
+
# Try to convert metadata to object if it looks like one, to satisfy tests
|
|
104
|
+
# that expect object access. But keep as dict if it fails or is empty.
|
|
105
|
+
# Note: The type hint says Dict, but tests expect object.
|
|
106
|
+
metadata_obj = metadata_data
|
|
107
|
+
if isinstance(metadata_data, dict) and "thesis_time_ms" in metadata_data:
|
|
108
|
+
try:
|
|
109
|
+
# Reconstruct HegelionMetadata
|
|
110
|
+
# We need to handle optional fields carefully
|
|
111
|
+
metadata_obj = HegelionMetadata(
|
|
112
|
+
thesis_time_ms=metadata_data.get("thesis_time_ms", 0.0),
|
|
113
|
+
antithesis_time_ms=metadata_data.get("antithesis_time_ms", 0.0),
|
|
114
|
+
synthesis_time_ms=metadata_data.get("synthesis_time_ms"),
|
|
115
|
+
total_time_ms=metadata_data.get("total_time_ms", 0.0),
|
|
116
|
+
backend_provider=metadata_data.get("backend_provider"),
|
|
117
|
+
backend_model=metadata_data.get("backend_model"),
|
|
118
|
+
debug=metadata_data.get("debug"),
|
|
119
|
+
)
|
|
120
|
+
except Exception:
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
return cls(
|
|
124
|
+
query=data.get("query", ""),
|
|
125
|
+
mode=data.get("mode", "synthesis"),
|
|
126
|
+
thesis=data.get("thesis", ""),
|
|
127
|
+
antithesis=data.get("antithesis", ""),
|
|
128
|
+
synthesis=data.get("synthesis", ""),
|
|
129
|
+
contradictions=data.get("contradictions", []),
|
|
130
|
+
research_proposals=data.get("research_proposals", []),
|
|
131
|
+
metadata=metadata_obj,
|
|
132
|
+
trace=data.get("trace"),
|
|
133
|
+
timestamp=data.get("timestamp"),
|
|
134
|
+
validation_score=data.get("validation_score"),
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@dataclass
|
|
139
|
+
class HegelionTrace:
|
|
140
|
+
"""Complete dialectical trace for debugging and analysis."""
|
|
141
|
+
|
|
142
|
+
thesis: str
|
|
143
|
+
antithesis: str
|
|
144
|
+
synthesis: Optional[str]
|
|
145
|
+
contradictions_found: int
|
|
146
|
+
research_proposals: List[str]
|
|
147
|
+
internal_conflict_score: Optional[float] = None # Internal use only
|
|
148
|
+
|
|
149
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
150
|
+
"""Convert to dictionary for JSON serialization."""
|
|
151
|
+
result = {
|
|
152
|
+
"thesis": self.thesis,
|
|
153
|
+
"antithesis": self.antithesis,
|
|
154
|
+
"synthesis": self.synthesis,
|
|
155
|
+
"contradictions_found": self.contradictions_found,
|
|
156
|
+
"research_proposals": self.research_proposals,
|
|
157
|
+
}
|
|
158
|
+
# Only include conflict score in debug mode
|
|
159
|
+
if self.internal_conflict_score is not None:
|
|
160
|
+
result["internal_conflict_score"] = self.internal_conflict_score
|
|
161
|
+
return result
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@dataclass
|
|
165
|
+
class HegelionMetadata:
|
|
166
|
+
"""Metadata about Hegelion execution."""
|
|
167
|
+
|
|
168
|
+
thesis_time_ms: float
|
|
169
|
+
antithesis_time_ms: float
|
|
170
|
+
synthesis_time_ms: Optional[float]
|
|
171
|
+
total_time_ms: float
|
|
172
|
+
backend_provider: Optional[str] = None
|
|
173
|
+
backend_model: Optional[str] = None
|
|
174
|
+
debug: Optional[Dict[str, Any]] = None # Debug information including internal scores
|
|
175
|
+
|
|
176
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
177
|
+
"""Convert to dictionary for JSON serialization."""
|
|
178
|
+
result = {
|
|
179
|
+
"thesis_time_ms": self.thesis_time_ms,
|
|
180
|
+
"antithesis_time_ms": self.antithesis_time_ms,
|
|
181
|
+
"synthesis_time_ms": self.synthesis_time_ms,
|
|
182
|
+
"total_time_ms": self.total_time_ms,
|
|
183
|
+
}
|
|
184
|
+
if self.backend_provider:
|
|
185
|
+
result["backend_provider"] = self.backend_provider
|
|
186
|
+
if self.backend_model:
|
|
187
|
+
result["backend_model"] = self.backend_model
|
|
188
|
+
if self.debug:
|
|
189
|
+
result["debug"] = self.debug
|
|
190
|
+
return result
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# Legacy output model (for backward compatibility, but deprecated)
|
|
194
|
+
class HegelionOutput:
|
|
195
|
+
"""
|
|
196
|
+
Legacy output model that includes conflict_score.
|
|
197
|
+
|
|
198
|
+
DEPRECATED: Use HegelionResult instead. This class is maintained for backward compatibility
|
|
199
|
+
but should not be used in new code.
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
def __init__(
|
|
203
|
+
self,
|
|
204
|
+
result: str,
|
|
205
|
+
mode: str,
|
|
206
|
+
conflict_score: float,
|
|
207
|
+
trace: HegelionTrace,
|
|
208
|
+
metadata: HegelionMetadata,
|
|
209
|
+
):
|
|
210
|
+
self.result = result
|
|
211
|
+
self.mode = mode
|
|
212
|
+
self.conflict_score = conflict_score # This field is deprecated
|
|
213
|
+
self.trace = trace
|
|
214
|
+
self.metadata = metadata
|
|
215
|
+
|
|
216
|
+
def to_hegelion_result(self, include_debug_conflict_score: bool = False) -> HegelionResult:
|
|
217
|
+
"""Convert to the new HegelionResult format."""
|
|
218
|
+
debug_info = None
|
|
219
|
+
if include_debug_conflict_score:
|
|
220
|
+
debug_info = {"internal_conflict_score": self.conflict_score}
|
|
221
|
+
|
|
222
|
+
# Convert contradictions to structured format
|
|
223
|
+
contradictions = []
|
|
224
|
+
for i, contr_desc in enumerate(
|
|
225
|
+
self.trace.research_proposals[: self.trace.contradictions_found]
|
|
226
|
+
):
|
|
227
|
+
contradictions.append({"description": contr_desc})
|
|
228
|
+
|
|
229
|
+
return HegelionResult(
|
|
230
|
+
query="", # Not available in legacy format
|
|
231
|
+
mode=self.mode,
|
|
232
|
+
thesis=self.trace.thesis,
|
|
233
|
+
antithesis=self.trace.antithesis,
|
|
234
|
+
synthesis=self.trace.synthesis or "",
|
|
235
|
+
contradictions=contradictions,
|
|
236
|
+
research_proposals=[{"description": rp} for rp in self.trace.research_proposals],
|
|
237
|
+
metadata={
|
|
238
|
+
"thesis_time_ms": self.metadata.thesis_time_ms,
|
|
239
|
+
"antithesis_time_ms": self.metadata.antithesis_time_ms,
|
|
240
|
+
"synthesis_time_ms": self.metadata.synthesis_time_ms,
|
|
241
|
+
"total_time_ms": self.metadata.total_time_ms,
|
|
242
|
+
"debug": debug_info,
|
|
243
|
+
},
|
|
244
|
+
trace=self.trace.to_dict(),
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# Backwards compatibility alias (older tests / code expect DialecticOutput)
|
|
249
|
+
# DialecticOutput used to be the public name — map it to the current HegelionResult
|
|
250
|
+
DialecticOutput = HegelionResult
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
@dataclass
|
|
254
|
+
class PromptWorkflow:
|
|
255
|
+
query: str
|
|
256
|
+
thesis: str
|
|
257
|
+
antithesis: str
|
|
258
|
+
synthesis: str
|
|
259
|
+
instructions: Optional[str] = None
|
|
260
|
+
|
|
261
|
+
def model_dump(self) -> Dict[str, Any]:
|
|
262
|
+
return {
|
|
263
|
+
"query": self.query,
|
|
264
|
+
"thesis": self.thesis,
|
|
265
|
+
"antithesis": self.antithesis,
|
|
266
|
+
"synthesis": self.synthesis,
|
|
267
|
+
"instructions": self.instructions,
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
@dataclass
|
|
272
|
+
class WorkflowResult:
|
|
273
|
+
workflow: Dict[str, Any]
|
|
274
|
+
results: List[Dict[str, Any]]
|
|
275
|
+
|
|
276
|
+
def model_dump(self) -> Dict[str, Any]:
|
|
277
|
+
return {
|
|
278
|
+
"workflow": self.workflow,
|
|
279
|
+
"results": self.results,
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@dataclass
|
|
284
|
+
class ResultMetadata:
|
|
285
|
+
source: str
|
|
286
|
+
confidence: str
|
|
287
|
+
tags: List[str]
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
@dataclass
|
|
291
|
+
class ConfidenceScore:
|
|
292
|
+
score: float
|
|
293
|
+
reasoning: str
|
hegelion/core/parsing.py
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
"""Parsing utilities for Hegelion dialectical reasoning."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def parse_contradiction_header(text: str) -> Optional[str]:
|
|
11
|
+
"""Parse a contradiction header line and extract the description.
|
|
12
|
+
|
|
13
|
+
Supports variations:
|
|
14
|
+
- CONTRADICTION: description
|
|
15
|
+
- **CONTRADICTION**: description
|
|
16
|
+
- **CONTRADICTION:** description
|
|
17
|
+
- Contradiction 1: description
|
|
18
|
+
- contradiction: description (case insensitive)
|
|
19
|
+
"""
|
|
20
|
+
colon_index = text.find(":")
|
|
21
|
+
if colon_index == -1:
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
# Strip markdown from the prefix only
|
|
25
|
+
prefix = text[:colon_index].strip()
|
|
26
|
+
|
|
27
|
+
# Remove leading and trailing markdown markers from the prefix
|
|
28
|
+
for marker in ["**", "__", "*", "_"]:
|
|
29
|
+
if prefix.startswith(marker):
|
|
30
|
+
prefix = prefix[len(marker) :].strip()
|
|
31
|
+
if prefix.endswith(marker):
|
|
32
|
+
prefix = prefix[: -len(marker)].strip()
|
|
33
|
+
|
|
34
|
+
prefix = prefix.upper()
|
|
35
|
+
|
|
36
|
+
# Remove numbering (e.g., "CONTRADICTION 1" -> "CONTRADICTION")
|
|
37
|
+
prefix_parts = prefix.split()
|
|
38
|
+
if prefix_parts and prefix_parts[0] == "CONTRADICTION":
|
|
39
|
+
# Valid contradiction header
|
|
40
|
+
detail = text[colon_index + 1 :].strip() or "Unspecified contradiction"
|
|
41
|
+
|
|
42
|
+
# Strip markdown from the description as well
|
|
43
|
+
for marker in ["**", "__", "*", "_"]:
|
|
44
|
+
if detail.startswith(marker):
|
|
45
|
+
detail = detail[len(marker) :].strip()
|
|
46
|
+
break
|
|
47
|
+
|
|
48
|
+
return detail
|
|
49
|
+
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def strip_markdown_wrappers(text: str) -> str:
|
|
54
|
+
"""Remove markdown formatting wrappers like **, __, *, _ from text."""
|
|
55
|
+
trimmed = text.strip()
|
|
56
|
+
if not trimmed:
|
|
57
|
+
return ""
|
|
58
|
+
markers = ("**", "__", "*", "_")
|
|
59
|
+
changed = True
|
|
60
|
+
while changed and trimmed:
|
|
61
|
+
changed = False
|
|
62
|
+
for marker in markers:
|
|
63
|
+
if (
|
|
64
|
+
trimmed.startswith(marker)
|
|
65
|
+
and trimmed.endswith(marker)
|
|
66
|
+
and len(trimmed) > 2 * len(marker)
|
|
67
|
+
):
|
|
68
|
+
trimmed = trimmed[len(marker) : -len(marker)].strip()
|
|
69
|
+
changed = True
|
|
70
|
+
return trimmed
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def extract_contradictions(text: str) -> List[str]:
|
|
74
|
+
"""Extract structured contradictions from antithesis text.
|
|
75
|
+
|
|
76
|
+
Supports formats:
|
|
77
|
+
CONTRADICTION: [description]
|
|
78
|
+
EVIDENCE: [supporting evidence]
|
|
79
|
+
**CONTRADICTION**: [description] (markdown)
|
|
80
|
+
Contradiction 1: [description] (numbered)
|
|
81
|
+
|
|
82
|
+
Handles multiline evidence by accumulating lines until next CONTRADICTION.
|
|
83
|
+
"""
|
|
84
|
+
contradictions: List[str] = []
|
|
85
|
+
pending: Optional[str] = None
|
|
86
|
+
evidence_buffer: List[str] = []
|
|
87
|
+
|
|
88
|
+
for raw_line in text.splitlines():
|
|
89
|
+
stripped = raw_line.strip()
|
|
90
|
+
if not stripped:
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
cleaned = strip_markdown_wrappers(stripped)
|
|
94
|
+
if not cleaned:
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
# Check if this is a new contradiction header
|
|
98
|
+
header = parse_contradiction_header(cleaned)
|
|
99
|
+
if header:
|
|
100
|
+
# Save previous contradiction with accumulated evidence
|
|
101
|
+
if pending:
|
|
102
|
+
if evidence_buffer:
|
|
103
|
+
combined_evidence = " ".join(evidence_buffer).strip()
|
|
104
|
+
contradictions.append(f"{pending} — {combined_evidence}")
|
|
105
|
+
else:
|
|
106
|
+
contradictions.append(pending)
|
|
107
|
+
evidence_buffer = []
|
|
108
|
+
pending = header
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
if not pending:
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
# Check if this is evidence
|
|
115
|
+
normalized = cleaned.upper()
|
|
116
|
+
if normalized.startswith("EVIDENCE"):
|
|
117
|
+
# Extract evidence text after colon
|
|
118
|
+
evidence_line = cleaned.split(":", 1)[1].strip() if ":" in cleaned else cleaned
|
|
119
|
+
if evidence_line:
|
|
120
|
+
evidence_buffer.append(evidence_line)
|
|
121
|
+
elif evidence_buffer:
|
|
122
|
+
# Continuation of evidence (multiline)
|
|
123
|
+
evidence_buffer.append(cleaned)
|
|
124
|
+
|
|
125
|
+
# Save final pending contradiction
|
|
126
|
+
if pending:
|
|
127
|
+
if evidence_buffer:
|
|
128
|
+
combined_evidence = " ".join(evidence_buffer).strip()
|
|
129
|
+
contradictions.append(f"{pending} — {combined_evidence}")
|
|
130
|
+
else:
|
|
131
|
+
contradictions.append(pending)
|
|
132
|
+
|
|
133
|
+
return contradictions
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def extract_research_proposals(text: str) -> List[str]:
|
|
137
|
+
"""Extract research proposals from synthesis text.
|
|
138
|
+
|
|
139
|
+
Supports formats:
|
|
140
|
+
- RESEARCH_PROPOSAL: [description]
|
|
141
|
+
- TESTABLE_PREDICTION: [falsifiable claim]
|
|
142
|
+
- PREDICTION 1: [claim] (numbered)
|
|
143
|
+
- TEST_PREDICTION: [claim] (variations)
|
|
144
|
+
|
|
145
|
+
Handles multiline predictions by accumulating until next header.
|
|
146
|
+
"""
|
|
147
|
+
proposals: List[str] = []
|
|
148
|
+
current: Optional[str] = None
|
|
149
|
+
prediction_buffer: List[str] = []
|
|
150
|
+
|
|
151
|
+
def _is_research_header(upper_line: str) -> bool:
|
|
152
|
+
"""Check if line is a research proposal header."""
|
|
153
|
+
return upper_line.startswith("RESEARCH_PROPOSAL:") or upper_line.startswith(
|
|
154
|
+
"RESEARCH PROPOSAL:"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
def _is_prediction_header(upper_line: str) -> bool:
|
|
158
|
+
"""Check if line is a prediction header (with variations)."""
|
|
159
|
+
# Match: TESTABLE_PREDICTION, TESTABLE PREDICTION, TEST_PREDICTION,
|
|
160
|
+
# PREDICTION 1, PREDICTION:, etc.
|
|
161
|
+
if upper_line.startswith("TESTABLE") and "PREDICTION" in upper_line:
|
|
162
|
+
return True
|
|
163
|
+
if upper_line.startswith("TEST") and "PREDICTION" in upper_line:
|
|
164
|
+
return True
|
|
165
|
+
# Handle numbered predictions: "PREDICTION 1:", "PREDICTION 2:", etc.
|
|
166
|
+
if re.match(r"PREDICTION\s*\d*\s*:", upper_line):
|
|
167
|
+
return True
|
|
168
|
+
return False
|
|
169
|
+
|
|
170
|
+
for line in text.splitlines():
|
|
171
|
+
normalized = line.strip()
|
|
172
|
+
if not normalized:
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
cleaned = strip_markdown_wrappers(normalized)
|
|
176
|
+
upper = cleaned.upper()
|
|
177
|
+
|
|
178
|
+
# New research proposal header: flush previous
|
|
179
|
+
if _is_research_header(upper):
|
|
180
|
+
# Flush any existing current proposal
|
|
181
|
+
if current:
|
|
182
|
+
if prediction_buffer:
|
|
183
|
+
combined_pred = " ".join(prediction_buffer).strip()
|
|
184
|
+
proposals.append(f"{current} | Prediction: {combined_pred}")
|
|
185
|
+
else:
|
|
186
|
+
proposals.append(current)
|
|
187
|
+
elif prediction_buffer:
|
|
188
|
+
# Standalone prediction before a new proposal
|
|
189
|
+
combined_pred = " ".join(prediction_buffer).strip()
|
|
190
|
+
proposals.append(f"Prediction: {combined_pred}")
|
|
191
|
+
|
|
192
|
+
# Start new proposal
|
|
193
|
+
current = cleaned.split(":", 1)[1].strip() if ":" in cleaned else cleaned
|
|
194
|
+
prediction_buffer = []
|
|
195
|
+
continue
|
|
196
|
+
|
|
197
|
+
# Prediction header: start/replace prediction buffer
|
|
198
|
+
if _is_prediction_header(upper):
|
|
199
|
+
# If we have a current proposal, attach the prediction to it (don't flush yet)
|
|
200
|
+
# If we have a standalone prediction buffer, flush it first
|
|
201
|
+
if current is None and prediction_buffer:
|
|
202
|
+
# Flush standalone prediction before starting new one
|
|
203
|
+
combined_prev = " ".join(prediction_buffer).strip()
|
|
204
|
+
if combined_prev:
|
|
205
|
+
proposals.append(f"Prediction: {combined_prev}")
|
|
206
|
+
prediction_buffer = []
|
|
207
|
+
|
|
208
|
+
# Start new prediction (will be attached to current proposal if exists)
|
|
209
|
+
prediction_text = cleaned.split(":", 1)[1].strip() if ":" in cleaned else ""
|
|
210
|
+
if prediction_text:
|
|
211
|
+
prediction_buffer = [prediction_text]
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
# Continuation for multiline prediction
|
|
215
|
+
if prediction_buffer and cleaned:
|
|
216
|
+
prediction_buffer.append(cleaned)
|
|
217
|
+
|
|
218
|
+
# Flush tail
|
|
219
|
+
if current:
|
|
220
|
+
if prediction_buffer:
|
|
221
|
+
combined_pred = " ".join(prediction_buffer).strip()
|
|
222
|
+
proposals.append(f"{current} | Prediction: {combined_pred}")
|
|
223
|
+
else:
|
|
224
|
+
proposals.append(current)
|
|
225
|
+
elif prediction_buffer:
|
|
226
|
+
combined_pred = " ".join(prediction_buffer).strip()
|
|
227
|
+
proposals.append(f"Prediction: {combined_pred}")
|
|
228
|
+
|
|
229
|
+
return proposals
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def parse_conflict_value(response: str) -> float:
|
|
233
|
+
"""Parse a conflict value from LLM response JSON."""
|
|
234
|
+
if not response:
|
|
235
|
+
return 0.0
|
|
236
|
+
candidates = [response.strip()]
|
|
237
|
+
candidates.extend(re.findall(r"\{.*?\}", response, flags=re.DOTALL))
|
|
238
|
+
for candidate in candidates:
|
|
239
|
+
try:
|
|
240
|
+
data = json.loads(candidate)
|
|
241
|
+
except json.JSONDecodeError:
|
|
242
|
+
continue
|
|
243
|
+
conflict_value = data.get("conflict")
|
|
244
|
+
try:
|
|
245
|
+
value = float(conflict_value)
|
|
246
|
+
except (TypeError, ValueError):
|
|
247
|
+
continue
|
|
248
|
+
return float(max(0.0, min(1.0, value)))
|
|
249
|
+
return 0.0
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def conclusion_excerpt(text: str, max_paragraphs: int = 2, max_chars: int = 1500) -> str:
|
|
253
|
+
"""Extract a conclusion excerpt from text for conflict analysis."""
|
|
254
|
+
paragraphs = [segment.strip() for segment in text.split("\n\n") if segment.strip()]
|
|
255
|
+
if not paragraphs:
|
|
256
|
+
excerpt = text.strip()
|
|
257
|
+
else:
|
|
258
|
+
excerpt = "\n\n".join(paragraphs[-max_paragraphs:])
|
|
259
|
+
if len(excerpt) > max_chars:
|
|
260
|
+
return excerpt[-max_chars:]
|
|
261
|
+
return excerpt
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
__all__ = [
|
|
265
|
+
"extract_contradictions",
|
|
266
|
+
"extract_research_proposals",
|
|
267
|
+
"parse_conflict_value",
|
|
268
|
+
"conclusion_excerpt",
|
|
269
|
+
"parse_contradiction_header",
|
|
270
|
+
"strip_markdown_wrappers",
|
|
271
|
+
]
|