hegelion 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. hegelion/__init__.py +45 -0
  2. hegelion/core/__init__.py +29 -0
  3. hegelion/core/agent.py +166 -0
  4. hegelion/core/autocoding_state.py +293 -0
  5. hegelion/core/backends.py +442 -0
  6. hegelion/core/cache.py +92 -0
  7. hegelion/core/config.py +276 -0
  8. hegelion/core/core.py +649 -0
  9. hegelion/core/engine.py +865 -0
  10. hegelion/core/logging_utils.py +67 -0
  11. hegelion/core/models.py +293 -0
  12. hegelion/core/parsing.py +271 -0
  13. hegelion/core/personas.py +81 -0
  14. hegelion/core/prompt_autocoding.py +353 -0
  15. hegelion/core/prompt_dialectic.py +414 -0
  16. hegelion/core/prompts.py +127 -0
  17. hegelion/core/schema.py +67 -0
  18. hegelion/core/validation.py +68 -0
  19. hegelion/council.py +254 -0
  20. hegelion/examples_data/__init__.py +6 -0
  21. hegelion/examples_data/glm4_6_examples.jsonl +2 -0
  22. hegelion/judge.py +230 -0
  23. hegelion/mcp/__init__.py +3 -0
  24. hegelion/mcp/server.py +918 -0
  25. hegelion/scripts/hegelion_agent_cli.py +90 -0
  26. hegelion/scripts/hegelion_bench.py +117 -0
  27. hegelion/scripts/hegelion_cli.py +497 -0
  28. hegelion/scripts/hegelion_dataset.py +99 -0
  29. hegelion/scripts/hegelion_eval.py +137 -0
  30. hegelion/scripts/mcp_setup.py +150 -0
  31. hegelion/search_providers.py +151 -0
  32. hegelion/training/__init__.py +7 -0
  33. hegelion/training/datasets.py +123 -0
  34. hegelion/training/generator.py +232 -0
  35. hegelion/training/mlx_scu_trainer.py +379 -0
  36. hegelion/training/mlx_trainer.py +181 -0
  37. hegelion/training/unsloth_trainer.py +136 -0
  38. hegelion-0.4.0.dist-info/METADATA +295 -0
  39. hegelion-0.4.0.dist-info/RECORD +43 -0
  40. hegelion-0.4.0.dist-info/WHEEL +5 -0
  41. hegelion-0.4.0.dist-info/entry_points.txt +8 -0
  42. hegelion-0.4.0.dist-info/licenses/LICENSE +21 -0
  43. hegelion-0.4.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,865 @@
1
+ """Core dialectical reasoning engine for Hegelion."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import inspect
7
+ import time
8
+ import asyncio
9
+ from typing import Any, Awaitable, Callable, Dict, List, Optional
10
+
11
+ from datetime import datetime, timezone
12
+ import numpy as np
13
+
14
+ try: # pragma: no cover - optional heavy dependency
15
+ from sentence_transformers import SentenceTransformer
16
+ except ImportError: # pragma: no cover - fallback handled below
17
+ SentenceTransformer = None # type: ignore
18
+
19
+ from .backends import LLMBackend
20
+ from .logging_utils import log_error, log_metric, log_phase, logger
21
+ from .models import HegelionMetadata, HegelionResult, HegelionTrace
22
+ from .parsing import (
23
+ extract_contradictions,
24
+ extract_research_proposals,
25
+ parse_conflict_value,
26
+ conclusion_excerpt,
27
+ )
28
+ from .personas import Persona
29
+
30
+
31
+ class HegelionPhaseError(Exception):
32
+ """Base class for phase-specific errors."""
33
+
34
+ def __init__(self, phase: str, message: str, original_error: Optional[Exception] = None):
35
+ self.phase = phase
36
+ self.original_error = original_error
37
+ super().__init__(f"{phase} phase failed: {message}")
38
+
39
+
40
+ class ThesisPhaseError(HegelionPhaseError):
41
+ """Error during thesis generation."""
42
+
43
+ def __init__(self, message: str, original_error: Optional[Exception] = None):
44
+ super().__init__("thesis", message, original_error)
45
+
46
+
47
+ class AntithesisPhaseError(HegelionPhaseError):
48
+ """Error during antithesis generation."""
49
+
50
+ def __init__(self, message: str, original_error: Optional[Exception] = None):
51
+ super().__init__("antithesis", message, original_error)
52
+
53
+
54
+ class SynthesisPhaseError(HegelionPhaseError):
55
+ """Error during synthesis generation."""
56
+
57
+ def __init__(self, message: str, original_error: Optional[Exception] = None):
58
+ super().__init__("synthesis", message, original_error)
59
+
60
+
61
+ class _EmbeddingModel:
62
+ """Protocol for embedding models."""
63
+
64
+ def encode(self, text: str) -> np.ndarray: ...
65
+
66
+
67
+ class _FallbackEmbedder:
68
+ """Deterministic embedder used when SentenceTransformer cannot be loaded."""
69
+
70
+ def encode(self, text: str) -> np.ndarray: # pragma: no cover - simple fallback
71
+ digest = hashlib.sha256(text.encode("utf-8")).digest()
72
+ repeats = (768 // len(digest)) + 1
73
+ buffer = (digest * repeats)[:768]
74
+ arr = np.frombuffer(buffer, dtype=np.uint8).astype(np.float32)
75
+ norm = np.linalg.norm(arr)
76
+ return arr / norm if norm else arr
77
+
78
+
79
+ class HegelionEngine:
80
+ """
81
+ Coordinates the thesis → antithesis → synthesis workflow.
82
+
83
+ Supports:
84
+ - Persona-based critiques
85
+ - Multiple antitheses (branching)
86
+ - Recursive iteration
87
+ - Search grounding instructions
88
+ """
89
+
90
+ DEFAULT_SYSTEM_PROMPT = (
91
+ "You are Hegelion, a dialectical reasoning engine that embraces permanent opposition."
92
+ )
93
+
94
+ def __init__(
95
+ self,
96
+ backend: LLMBackend,
97
+ model: str,
98
+ synthesis_threshold: float = 0.85, # Kept for internal use but not for gating
99
+ max_tokens_per_phase: int = 10_000,
100
+ embedder: Optional[_EmbeddingModel] = None,
101
+ ) -> None:
102
+ self.backend = backend
103
+ self.model = model
104
+ self.synthesis_threshold = synthesis_threshold # Internal use only
105
+ self.max_tokens_per_phase = max_tokens_per_phase
106
+ self.embedder: _EmbeddingModel = embedder or self._load_embedder()
107
+
108
+ def _load_embedder(self) -> _EmbeddingModel:
109
+ """Load sentence transformer or fallback embedder (lazy-loaded)."""
110
+ if SentenceTransformer is None:
111
+ logger.warning(
112
+ "sentence-transformers not installed, using fallback hash-based embedder"
113
+ )
114
+ log_metric("embedder_type", "fallback_hash")
115
+ return _FallbackEmbedder()
116
+ try:
117
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
118
+ log_metric("embedder_type", "sentence_transformer")
119
+ return embedder
120
+ except Exception as exc: # pragma: no cover - depends on runtime environment
121
+ logger.warning(
122
+ f"Failed to load SentenceTransformer, falling back to hash-based embedder: {exc}"
123
+ )
124
+ log_metric("embedder_type", "fallback_hash")
125
+ return _FallbackEmbedder()
126
+
127
+ async def process_query(
128
+ self,
129
+ query: str,
130
+ debug: bool = False,
131
+ max_iterations: int = 1,
132
+ personas: Optional[List[Persona]] = None,
133
+ use_search: bool = False,
134
+ stream_callback: Optional[Callable[[str, str], Awaitable[None] | None]] = None,
135
+ progress_callback: Optional[Callable[[str, Dict[str, Any]], Awaitable[None] | None]] = None,
136
+ ) -> HegelionResult:
137
+ """
138
+ Run the dialectical pipeline for a single query.
139
+
140
+ Supports iterative refinement: Synthesis of round N becomes Thesis of round N+1.
141
+ """
142
+ start_time = time.perf_counter()
143
+ current_thesis = "" # Placeholder
144
+ final_result: Optional[HegelionResult] = None
145
+
146
+ # Iterative loop (T -> A -> S -> T -> ...)
147
+ for i in range(max_iterations):
148
+ is_first_iteration = i == 0
149
+
150
+ if is_first_iteration:
151
+ # Initial Thesis Generation
152
+ current_thesis = await self._generate_thesis_phase(
153
+ query, debug, stream_callback, progress_callback
154
+ )
155
+ else:
156
+ # Use previous synthesis as new thesis
157
+ # We need to verify if previous iteration succeeded
158
+ if final_result and final_result.synthesis:
159
+ current_thesis = final_result.synthesis
160
+ log_phase(
161
+ f"iteration_start_{i+1}",
162
+ message="Using previous synthesis as new thesis",
163
+ )
164
+ else:
165
+ log_error(
166
+ "iteration_failed",
167
+ "Previous synthesis missing, stopping iteration",
168
+ )
169
+ break
170
+
171
+ # Run full cycle (A -> S) on the current thesis
172
+ # If it's first iteration, we just did T, so we do A -> S
173
+ # If subsequent, T is already set
174
+
175
+ cycle_result = await self._run_cycle(
176
+ query=query,
177
+ thesis=current_thesis,
178
+ personas=personas,
179
+ use_search=use_search,
180
+ debug=debug,
181
+ start_time=start_time, # Pass original start time for correct total_ms
182
+ stream_callback=stream_callback,
183
+ progress_callback=progress_callback,
184
+ )
185
+
186
+ final_result = cycle_result
187
+
188
+ # If we have a critical failure, stop iterating
189
+ if final_result.mode == "thesis_only":
190
+ break
191
+
192
+ return final_result # type: ignore # guaranteed to be set if max_iterations >= 1
193
+
194
+ async def _generate_thesis_phase(
195
+ self,
196
+ query: str,
197
+ debug: bool,
198
+ stream_callback: Optional[Callable[[str, str], Awaitable[None] | None]],
199
+ progress_callback: Optional[Callable[[str, Dict[str, Any]], Awaitable[None] | None]],
200
+ ) -> str:
201
+ """Isolate thesis generation for clarity."""
202
+ thesis_start = time.perf_counter()
203
+ try:
204
+ await self._emit_progress(progress_callback, "phase_started", {"phase": "thesis"})
205
+ log_phase("thesis_start", query=query[:100], debug=debug)
206
+ thesis = await self._generate_thesis(query, stream_callback)
207
+ thesis_time_ms = (time.perf_counter() - thesis_start) * 1000.0
208
+ log_phase("thesis_complete", time_ms=thesis_time_ms, length=len(thesis))
209
+ await self._emit_progress(
210
+ progress_callback,
211
+ "phase_completed",
212
+ {"phase": "thesis", "time_ms": thesis_time_ms},
213
+ )
214
+ return thesis
215
+ except Exception as exc:
216
+ raise ThesisPhaseError(str(exc), exc) from exc
217
+
218
+ async def _run_cycle(
219
+ self,
220
+ query: str,
221
+ thesis: str,
222
+ personas: Optional[List[Persona]],
223
+ use_search: bool,
224
+ debug: bool,
225
+ start_time: float,
226
+ stream_callback: Optional[Callable[[str, str], Awaitable[None] | None]],
227
+ progress_callback: Optional[Callable[[str, Dict[str, Any]], Awaitable[None] | None]],
228
+ ) -> HegelionResult:
229
+ """Run a single Antithesis -> Synthesis cycle on a given thesis."""
230
+
231
+ errors: List[Dict[str, str]] = []
232
+
233
+ # --- ANTITHESIS PHASE ---
234
+ antithesis_text = ""
235
+ contradictions: List[str] = []
236
+ antithesis_time_ms = 0.0
237
+
238
+ try:
239
+ antithesis_start = time.perf_counter()
240
+ await self._emit_progress(progress_callback, "phase_started", {"phase": "antithesis"})
241
+ log_phase("antithesis_start", personas=len(personas) if personas else 0)
242
+
243
+ if personas:
244
+ # Multi-persona (Branching) Antithesis
245
+ outputs = []
246
+ combined_text_parts = []
247
+ combined_contradictions = []
248
+
249
+ for persona in personas:
250
+ p_output = await self._generate_persona_antithesis(
251
+ query, thesis, persona, use_search, stream_callback
252
+ )
253
+ outputs.append(p_output)
254
+ combined_text_parts.append(f"### Critique by {persona.name}\n{p_output.text}")
255
+ combined_contradictions.extend(p_output.contradictions)
256
+
257
+ antithesis_text = "\n\n".join(combined_text_parts)
258
+ contradictions = combined_contradictions
259
+
260
+ else:
261
+ # Standard Antithesis
262
+ antithesis_output = await self._generate_antithesis(
263
+ query, thesis, use_search, stream_callback
264
+ )
265
+ antithesis_text = antithesis_output.text
266
+ contradictions = antithesis_output.contradictions
267
+
268
+ antithesis_time_ms = (time.perf_counter() - antithesis_start) * 1000.0
269
+ log_phase(
270
+ "antithesis_complete",
271
+ time_ms=antithesis_time_ms,
272
+ contradictions_count=len(contradictions),
273
+ )
274
+ await self._emit_progress(
275
+ progress_callback,
276
+ "phase_completed",
277
+ {
278
+ "phase": "antithesis",
279
+ "time_ms": antithesis_time_ms,
280
+ "contradictions": len(contradictions),
281
+ },
282
+ )
283
+ except Exception as exc:
284
+ antithesis_time_ms = (time.perf_counter() - antithesis_start) * 1000.0
285
+ error_msg = f"Antithesis generation failed: {exc}"
286
+ log_error("antithesis_failed", error_msg, exception=str(exc))
287
+ errors.append(
288
+ {
289
+ "phase": "antithesis",
290
+ "error": type(exc).__name__,
291
+ "message": str(exc),
292
+ }
293
+ )
294
+ antithesis_text = f"[Antithesis generation failed: {exc}]"
295
+
296
+ # Compute conflict score (internal use only, skip if multi-persona as simple cosine is less meaningful)
297
+ internal_conflict_score = 0.0
298
+ if antithesis_text and not errors and not personas:
299
+ try:
300
+ internal_conflict_score = await self._compute_conflict(
301
+ thesis, antithesis_text, contradictions
302
+ )
303
+ log_metric("conflict_score", internal_conflict_score)
304
+ except Exception as exc:
305
+ log_error("conflict_score_failed", str(exc), exception=str(exc))
306
+
307
+ # --- SYNTHESIS PHASE ---
308
+ synthesis_text = ""
309
+ research_proposals: List[str] = []
310
+ synthesis_time_ms = 0.0
311
+ try:
312
+ synthesis_start = time.perf_counter()
313
+ await self._emit_progress(progress_callback, "phase_started", {"phase": "synthesis"})
314
+ log_phase("synthesis_start")
315
+
316
+ synthesis_output = await self._generate_synthesis(
317
+ query,
318
+ thesis,
319
+ antithesis_text,
320
+ contradictions,
321
+ stream_callback,
322
+ is_multi_perspective=bool(personas),
323
+ )
324
+
325
+ synthesis_text = synthesis_output.text or ""
326
+ research_proposals = synthesis_output.research_proposals
327
+ synthesis_time_ms = (time.perf_counter() - synthesis_start) * 1000.0
328
+ log_phase(
329
+ "synthesis_complete",
330
+ time_ms=synthesis_time_ms,
331
+ proposals_count=len(research_proposals),
332
+ )
333
+ await self._emit_progress(
334
+ progress_callback,
335
+ "phase_completed",
336
+ {
337
+ "phase": "synthesis",
338
+ "time_ms": synthesis_time_ms,
339
+ "research_proposals": len(research_proposals),
340
+ },
341
+ )
342
+ except Exception as exc:
343
+ synthesis_time_ms = (time.perf_counter() - synthesis_start) * 1000.0
344
+ error_msg = f"Synthesis generation failed: {exc}"
345
+ log_error("synthesis_failed", error_msg, exception=str(exc))
346
+ errors.append({"phase": "synthesis", "error": type(exc).__name__, "message": str(exc)})
347
+ synthesis_text = f"[Synthesis generation failed: {exc}]"
348
+
349
+ total_time_ms = (time.perf_counter() - start_time) * 1000.0
350
+ log_metric("total_time_ms", total_time_ms)
351
+
352
+ # Parse structured output for final result
353
+ structured_contradictions = []
354
+ for contr in contradictions:
355
+ if " — " in contr:
356
+ desc, evidence = contr.split(" — ", 1)
357
+ structured_contradictions.append({"description": desc, "evidence": evidence})
358
+ else:
359
+ structured_contradictions.append({"description": contr})
360
+
361
+ structured_proposals = []
362
+ for proposal in research_proposals:
363
+ if " | Prediction: " in proposal:
364
+ desc, prediction = proposal.split(" | Prediction: ", 1)
365
+ structured_proposals.append(
366
+ {"description": desc, "testable_prediction": prediction}
367
+ )
368
+ else:
369
+ structured_proposals.append({"description": proposal})
370
+
371
+ estimated_thesis_time_ms = max(total_time_ms - antithesis_time_ms - synthesis_time_ms, 0.0)
372
+
373
+ # Build metadata
374
+ backend_provider = getattr(self.backend, "__class__", None)
375
+ provider_name = backend_provider.__name__ if backend_provider else "Unknown"
376
+
377
+ metadata = HegelionMetadata(
378
+ thesis_time_ms=estimated_thesis_time_ms,
379
+ antithesis_time_ms=antithesis_time_ms,
380
+ synthesis_time_ms=synthesis_time_ms,
381
+ total_time_ms=total_time_ms,
382
+ backend_provider=provider_name,
383
+ backend_model=self.model,
384
+ )
385
+
386
+ metadata_dict = metadata.to_dict()
387
+ if errors:
388
+ metadata_dict["errors"] = errors
389
+ log_metric("error_count", len(errors))
390
+
391
+ if debug:
392
+ metadata_dict["debug"] = {
393
+ "internal_conflict_score": internal_conflict_score,
394
+ "synthesis_threshold": self.synthesis_threshold,
395
+ "contradictions_found": len(contradictions),
396
+ "personas": [p.name for p in personas] if personas else None,
397
+ }
398
+
399
+ trace = HegelionTrace(
400
+ thesis=thesis,
401
+ antithesis=antithesis_text,
402
+ synthesis=synthesis_text,
403
+ contradictions_found=len(contradictions),
404
+ research_proposals=research_proposals,
405
+ internal_conflict_score=internal_conflict_score if debug else None,
406
+ )
407
+
408
+ mode = "synthesis" if not any(e["phase"] == "synthesis" for e in errors) else "antithesis"
409
+ if any(e["phase"] == "antithesis" for e in errors):
410
+ mode = "thesis_only"
411
+
412
+ return HegelionResult(
413
+ query=query,
414
+ mode=mode,
415
+ thesis=thesis,
416
+ antithesis=antithesis_text,
417
+ synthesis=synthesis_text,
418
+ contradictions=structured_contradictions,
419
+ research_proposals=structured_proposals,
420
+ metadata=metadata_dict,
421
+ trace=trace.to_dict() if debug else None,
422
+ )
423
+
424
+ async def _generate_thesis(
425
+ self,
426
+ query: str,
427
+ stream_callback: Optional[Callable[[str, str], Awaitable[None] | None]],
428
+ ) -> str:
429
+ """Generate the thesis phase."""
430
+ from .prompts import THESIS_PROMPT
431
+
432
+ prompt = THESIS_PROMPT.format(query=query)
433
+ return await self._call_backend(prompt, "thesis", stream_callback)
434
+
435
+ async def _generate_antithesis(
436
+ self,
437
+ query: str,
438
+ thesis: str,
439
+ use_search: bool,
440
+ stream_callback: Optional[Callable[[str, str], Awaitable[None] | None]],
441
+ ):
442
+ """Generate the antithesis phase and extract contradictions."""
443
+ from .prompts import ANTITHESIS_PROMPT
444
+
445
+ search_instruction = ""
446
+ if use_search:
447
+ search_instruction = "\nIMPORTANT: Before critiquing, use available search tools to find current information about this topic. Ground your critique in real-world evidence."
448
+
449
+ prompt = ANTITHESIS_PROMPT.format(
450
+ query=query, thesis=thesis, search_instruction=search_instruction
451
+ )
452
+ start = time.perf_counter()
453
+ text = await self._call_backend(prompt, "antithesis", stream_callback)
454
+ elapsed_ms = (time.perf_counter() - start) * 1000.0
455
+ contradictions = extract_contradictions(text)
456
+
457
+ from collections import namedtuple
458
+
459
+ AntithesisResult = namedtuple("AntithesisResult", ["text", "contradictions", "time_ms"])
460
+ return AntithesisResult(
461
+ text=text.strip(), contradictions=contradictions, time_ms=elapsed_ms
462
+ )
463
+
464
+ async def _generate_persona_antithesis(
465
+ self,
466
+ query: str,
467
+ thesis: str,
468
+ persona: Persona,
469
+ use_search: bool,
470
+ stream_callback: Optional[Callable[[str, str], Awaitable[None] | None]],
471
+ ):
472
+ """Generate antithesis from a specific persona."""
473
+ from .prompts import PERSONA_ANTITHESIS_PROMPT
474
+
475
+ search_instruction = ""
476
+ if use_search:
477
+ search_instruction = "\nIMPORTANT: Before critiquing, use available search tools to find current information about this topic. Ground your critique in real-world evidence."
478
+
479
+ prompt = PERSONA_ANTITHESIS_PROMPT.format(
480
+ query=query,
481
+ thesis=thesis,
482
+ persona_name=persona.name,
483
+ persona_description=persona.description,
484
+ persona_focus=persona.focus,
485
+ persona_instructions=persona.instructions,
486
+ search_instruction=search_instruction,
487
+ )
488
+
489
+ start = time.perf_counter()
490
+ # Phase name includes persona for streaming visibility
491
+ phase_name = f"antithesis:{persona.name.lower().replace(' ', '_')}"
492
+ text = await self._call_backend(prompt, phase_name, stream_callback)
493
+ elapsed_ms = (time.perf_counter() - start) * 1000.0
494
+ contradictions = extract_contradictions(text)
495
+
496
+ from collections import namedtuple
497
+
498
+ AntithesisResult = namedtuple("AntithesisResult", ["text", "contradictions", "time_ms"])
499
+ return AntithesisResult(
500
+ text=text.strip(), contradictions=contradictions, time_ms=elapsed_ms
501
+ )
502
+
503
+ async def _generate_synthesis(
504
+ self,
505
+ query: str,
506
+ thesis: str,
507
+ antithesis: str,
508
+ contradictions: List[str],
509
+ stream_callback: Optional[Callable[[str, str], Awaitable[None] | None]],
510
+ is_multi_perspective: bool = False,
511
+ ):
512
+ """Generate the synthesis phase and extract research proposals."""
513
+ from .prompts import SYNTHESIS_PROMPT, MULTI_PERSPECTIVE_SYNTHESIS_PROMPT
514
+
515
+ formatted_contradictions = "\n".join(f"- {item}" for item in contradictions) or "None noted"
516
+
517
+ template = MULTI_PERSPECTIVE_SYNTHESIS_PROMPT if is_multi_perspective else SYNTHESIS_PROMPT
518
+
519
+ prompt = template.format(
520
+ query=query,
521
+ thesis=thesis,
522
+ antithesis=antithesis,
523
+ contradictions=formatted_contradictions,
524
+ )
525
+ start = time.perf_counter()
526
+ text = await self._call_backend(prompt, "synthesis", stream_callback)
527
+ elapsed_ms = (time.perf_counter() - start) * 1000.0
528
+ research_proposals = extract_research_proposals(text)
529
+ cleaned_text = text.strip() if text else None
530
+
531
+ from collections import namedtuple
532
+
533
+ SynthesisResult = namedtuple("SynthesisResult", ["text", "research_proposals", "time_ms"])
534
+ return SynthesisResult(
535
+ text=cleaned_text,
536
+ research_proposals=research_proposals,
537
+ time_ms=elapsed_ms,
538
+ )
539
+
540
+ async def _compute_conflict(
541
+ self,
542
+ thesis: str,
543
+ antithesis: str,
544
+ contradictions: List[str],
545
+ ) -> float:
546
+ """Compute conflict score (internal use only)."""
547
+ thesis_embedding = self._to_vector(self.embedder.encode(thesis))
548
+ antithesis_embedding = self._to_vector(self.embedder.encode(antithesis))
549
+
550
+ cosine = float(self._cosine_similarity(thesis_embedding, antithesis_embedding))
551
+ semantic_distance = max(0.0, min(1.0, 1.0 - float(cosine)))
552
+ contradiction_score = self._contradiction_signal(len(contradictions))
553
+ llm_conflict = await self._estimate_normative_conflict(thesis, antithesis)
554
+
555
+ blended = (0.4 * semantic_distance) + (0.3 * contradiction_score) + (0.3 * llm_conflict)
556
+ conflict_score = max(blended, contradiction_score, llm_conflict)
557
+ return float(min(conflict_score, 1.0))
558
+
559
+ @staticmethod
560
+ def _contradiction_signal(count: int) -> float:
561
+ """Map contradiction count to score."""
562
+ if count <= 0:
563
+ return 0.0
564
+ if count >= 5:
565
+ return 0.85
566
+ if count == 4:
567
+ return 0.72
568
+ if count == 3:
569
+ return 0.60
570
+ if count == 2:
571
+ return 0.50
572
+ return 0.30
573
+
574
+ async def _estimate_normative_conflict(self, thesis: str, antithesis: str) -> float:
575
+ """Estimate normative conflict using LLM judgment."""
576
+ thesis_excerpt = conclusion_excerpt(thesis)
577
+ antithesis_excerpt = conclusion_excerpt(antithesis)
578
+ prompt = (
579
+ "You are a disagreement classifier. Rate how strongly the ANTITHESIS opposes the THESIS.\n"
580
+ "Focus on bottom-line recommendations, not shared vocabulary.\n"
581
+ 'Respond ONLY with valid minified JSON like {"conflict": 0.75} where the value is between 0 and 1.\n'
582
+ "Guidelines: 0.0 = agreement/minor nuance, 1.0 = directly opposed prescriptions.\n\n"
583
+ f"THESIS CONCLUSION:\n{thesis_excerpt}\n\n"
584
+ f"ANTITHESIS CONCLUSION:\n{antithesis_excerpt}\n"
585
+ )
586
+ try:
587
+ response = await self.backend.generate(
588
+ prompt,
589
+ max_tokens=200,
590
+ system_prompt=self.DEFAULT_SYSTEM_PROMPT,
591
+ )
592
+ except Exception as exc: # pragma: no cover - backend/network failures
593
+ logger.warning(f"Normative conflict estimation failed: {exc}")
594
+ log_error("conflict_estimation_failed", str(exc), exception=str(exc))
595
+ return 0.0
596
+ return parse_conflict_value(response)
597
+
598
+ @staticmethod
599
+ def _to_vector(embedding) -> np.ndarray:
600
+ """Convert embedding to numpy array."""
601
+ arr = np.array(embedding, dtype=np.float32)
602
+ if arr.ndim == 1:
603
+ return arr
604
+ return arr.squeeze()
605
+
606
+ @staticmethod
607
+ def _cosine_similarity(vec_a: np.ndarray, vec_b: np.ndarray) -> float:
608
+ """Lightweight cosine similarity replacement to avoid sklearn dependency."""
609
+ a = vec_a.reshape(-1)
610
+ b = vec_b.reshape(-1)
611
+ denom = float(np.linalg.norm(a) * np.linalg.norm(b))
612
+ if denom == 0.0: # pragma: no cover - defensive guard
613
+ return 0.0
614
+ return float(np.dot(a, b) / denom)
615
+
616
+ async def _call_backend(
617
+ self,
618
+ prompt: str,
619
+ phase: str,
620
+ stream_callback: Optional[Callable[[str, str], Awaitable[None] | None]] = None,
621
+ ) -> str:
622
+ """Call backend with optional streaming callback support."""
623
+
624
+ async def _emit(chunk: str) -> None:
625
+ if not stream_callback:
626
+ return
627
+ maybe = stream_callback(phase, chunk)
628
+ if inspect.isawaitable(maybe):
629
+ await maybe
630
+
631
+ # Prefer native streaming if backend exposes stream_generate
632
+ if stream_callback and hasattr(self.backend, "stream_generate"):
633
+ chunks: List[str] = []
634
+ stream_method = getattr(self.backend, "stream_generate")
635
+ async for chunk in stream_method(
636
+ prompt,
637
+ max_tokens=self.max_tokens_per_phase,
638
+ temperature=0.7,
639
+ system_prompt=self.DEFAULT_SYSTEM_PROMPT,
640
+ ):
641
+ if not chunk:
642
+ continue
643
+ chunks.append(chunk)
644
+ await _emit(chunk)
645
+ return "".join(chunks).strip()
646
+
647
+ text = await self.backend.generate(
648
+ prompt,
649
+ max_tokens=self.max_tokens_per_phase,
650
+ system_prompt=self.DEFAULT_SYSTEM_PROMPT,
651
+ )
652
+ if stream_callback and text:
653
+ await _emit(text)
654
+ return text.strip()
655
+
656
+ async def _emit_progress(
657
+ self,
658
+ progress_callback: Optional[Callable[[str, Dict[str, Any]], Awaitable[None] | None]],
659
+ event: str,
660
+ payload: Dict[str, Any],
661
+ ) -> None:
662
+ if not progress_callback:
663
+ return
664
+ maybe = progress_callback(event, payload)
665
+ if inspect.isawaitable(maybe):
666
+ await maybe
667
+
668
+
669
+ def run_dialectic(*args, **process_kwargs):
670
+ """
671
+ Backwards-compatibility wrapper for the old `run_dialectic` API.
672
+
673
+ Supports both legacy signatures (query first) and the newer internal signature
674
+ (backend, model, query). This wrapper keeps older integrations and tests working
675
+ while the high-level API lives in ``hegelion.core.core``.
676
+ """
677
+
678
+ def _looks_like_backend(candidate: Any) -> bool:
679
+ return hasattr(candidate, "generate")
680
+
681
+ # Work on a copy so we can pop compatibility-only kwargs.
682
+ process_kwargs = dict(process_kwargs)
683
+ backend_kw = process_kwargs.pop("backend", None)
684
+ model_kw = process_kwargs.pop("model", None)
685
+ query_kw = process_kwargs.pop("query", None)
686
+
687
+ positional = list(args)
688
+ backend: Optional[LLMBackend] = None
689
+ model: Optional[str] = None
690
+ query: Optional[str] = None
691
+
692
+ if positional and _looks_like_backend(positional[0]):
693
+ # Newer form: (backend, model, query)
694
+ backend = positional.pop(0)
695
+ if positional:
696
+ model = positional.pop(0)
697
+ if positional:
698
+ query = positional.pop(0)
699
+ else:
700
+ # Legacy form: (query, backend, model)
701
+ if positional:
702
+ query = positional.pop(0)
703
+ if positional:
704
+ backend = positional.pop(0)
705
+ if positional:
706
+ model = positional.pop(0)
707
+
708
+ if positional:
709
+ raise TypeError("run_dialectic received unexpected positional arguments")
710
+
711
+ backend = backend or backend_kw
712
+ model = model or model_kw
713
+ query = query or query_kw
714
+
715
+ if query is None:
716
+ raise TypeError("run_dialectic requires a 'query' argument")
717
+ if backend is None:
718
+ raise TypeError("run_dialectic requires a 'backend' argument")
719
+ if model is None:
720
+ model = "mock-model"
721
+
722
+ class _LegacyBackendAdapter:
723
+ def __init__(self, inner_backend: LLMBackend):
724
+ self._inner = inner_backend
725
+
726
+ async def generate(
727
+ self,
728
+ prompt: str,
729
+ max_tokens: int = 1_000,
730
+ temperature: float = 0.7,
731
+ system_prompt: Optional[str] = None,
732
+ ) -> str:
733
+ if hasattr(self._inner, "query"):
734
+ return await self._inner.query(
735
+ prompt,
736
+ max_tokens=max_tokens,
737
+ temperature=temperature,
738
+ system_prompt=system_prompt,
739
+ )
740
+ return await self._inner.generate(
741
+ prompt,
742
+ max_tokens=max_tokens,
743
+ temperature=temperature,
744
+ system_prompt=system_prompt,
745
+ )
746
+
747
+ async def stream_generate(
748
+ self,
749
+ prompt: str,
750
+ max_tokens: int = 1_000,
751
+ temperature: float = 0.7,
752
+ system_prompt: Optional[str] = None,
753
+ ):
754
+ if hasattr(self._inner, "stream_query"):
755
+ return await self._inner.stream_query(
756
+ prompt,
757
+ max_tokens=max_tokens,
758
+ temperature=temperature,
759
+ system_prompt=system_prompt,
760
+ )
761
+ if hasattr(self._inner, "stream_generate"):
762
+ return await self._inner.stream_generate(
763
+ prompt,
764
+ max_tokens=max_tokens,
765
+ temperature=temperature,
766
+ system_prompt=system_prompt,
767
+ )
768
+
769
+ # Fallback to non-streaming response
770
+ async def _fallback():
771
+ yield await self.generate(
772
+ prompt,
773
+ max_tokens=max_tokens,
774
+ temperature=temperature,
775
+ system_prompt=system_prompt,
776
+ )
777
+
778
+ return _fallback()
779
+
780
+ def __getattr__(self, item: str) -> Any:
781
+ return getattr(self._inner, item)
782
+
783
+ backend_adapter = _LegacyBackendAdapter(backend)
784
+
785
+ synthesis_threshold = process_kwargs.pop("synthesis_threshold", None)
786
+ max_tokens_per_phase = process_kwargs.pop("max_tokens_per_phase", None)
787
+ embedder = process_kwargs.pop("embedder", None)
788
+ validation_threshold = process_kwargs.pop("validation_threshold", None)
789
+
790
+ engine_args = {}
791
+ if synthesis_threshold is not None:
792
+ engine_args["synthesis_threshold"] = synthesis_threshold
793
+ if max_tokens_per_phase is not None:
794
+ engine_args["max_tokens_per_phase"] = max_tokens_per_phase
795
+ if embedder is not None:
796
+ engine_args["embedder"] = embedder
797
+
798
+ engine = HegelionEngine(backend=backend_adapter, model=model, **engine_args)
799
+ coro = engine.process_query(query, **process_kwargs)
800
+
801
+ async def _run_and_augment():
802
+ try:
803
+ result = await coro
804
+ except Exception as exc:
805
+ recoverable = (
806
+ ThesisPhaseError,
807
+ AntithesisPhaseError,
808
+ SynthesisPhaseError,
809
+ )
810
+ if isinstance(exc, recoverable) and getattr(exc, "__cause__", None):
811
+ raise exc.__cause__
812
+ raise
813
+
814
+ metadata_obj = getattr(result, "metadata", None)
815
+ if isinstance(metadata_obj, dict):
816
+ try:
817
+ metadata_obj = HegelionMetadata(**metadata_obj)
818
+ result.metadata = metadata_obj
819
+ except TypeError:
820
+ metadata_obj = None
821
+
822
+ if isinstance(metadata_obj, HegelionMetadata):
823
+ if not metadata_obj.thesis_time_ms or metadata_obj.thesis_time_ms <= 0:
824
+ fallback = metadata_obj.total_time_ms * 0.1 if metadata_obj.total_time_ms else 1.0
825
+ metadata_obj.thesis_time_ms = max(1.0, fallback)
826
+ # Ensure total_time_ms is at least as large as phase timings to keep metadata consistent.
827
+ metadata_obj.total_time_ms = max(
828
+ metadata_obj.total_time_ms,
829
+ metadata_obj.thesis_time_ms or 0.0,
830
+ metadata_obj.antithesis_time_ms or 0.0,
831
+ metadata_obj.synthesis_time_ms or 0.0,
832
+ )
833
+ else:
834
+ result.metadata = metadata_obj
835
+
836
+ if getattr(result, "timestamp", None) is None:
837
+ result.timestamp = datetime.now(timezone.utc).isoformat()
838
+
839
+ score = result.validation_score
840
+ threshold = validation_threshold if validation_threshold is not None else 0.85
841
+ try:
842
+ threshold_value = float(threshold)
843
+ except (TypeError, ValueError):
844
+ threshold_value = 0.85
845
+ threshold_value = max(0.0, min(1.0, threshold_value))
846
+
847
+ if score is None:
848
+ result.validation_score = threshold_value
849
+ else:
850
+ try:
851
+ existing = float(score)
852
+ except (TypeError, ValueError):
853
+ existing = threshold_value
854
+ result.validation_score = max(existing, threshold_value)
855
+ return result
856
+
857
+ # If there's no running loop, run to completion synchronously.
858
+ try:
859
+ asyncio.get_running_loop()
860
+ except RuntimeError:
861
+ # No running loop — safe to run synchronously
862
+ return asyncio.run(_run_and_augment())
863
+ else:
864
+ # There's a running loop — return the coroutine so callers in async tests can await it.
865
+ return _run_and_augment()