hegelion 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. hegelion/__init__.py +45 -0
  2. hegelion/core/__init__.py +29 -0
  3. hegelion/core/agent.py +166 -0
  4. hegelion/core/autocoding_state.py +293 -0
  5. hegelion/core/backends.py +442 -0
  6. hegelion/core/cache.py +92 -0
  7. hegelion/core/config.py +276 -0
  8. hegelion/core/core.py +649 -0
  9. hegelion/core/engine.py +865 -0
  10. hegelion/core/logging_utils.py +67 -0
  11. hegelion/core/models.py +293 -0
  12. hegelion/core/parsing.py +271 -0
  13. hegelion/core/personas.py +81 -0
  14. hegelion/core/prompt_autocoding.py +353 -0
  15. hegelion/core/prompt_dialectic.py +414 -0
  16. hegelion/core/prompts.py +127 -0
  17. hegelion/core/schema.py +67 -0
  18. hegelion/core/validation.py +68 -0
  19. hegelion/council.py +254 -0
  20. hegelion/examples_data/__init__.py +6 -0
  21. hegelion/examples_data/glm4_6_examples.jsonl +2 -0
  22. hegelion/judge.py +230 -0
  23. hegelion/mcp/__init__.py +3 -0
  24. hegelion/mcp/server.py +918 -0
  25. hegelion/scripts/hegelion_agent_cli.py +90 -0
  26. hegelion/scripts/hegelion_bench.py +117 -0
  27. hegelion/scripts/hegelion_cli.py +497 -0
  28. hegelion/scripts/hegelion_dataset.py +99 -0
  29. hegelion/scripts/hegelion_eval.py +137 -0
  30. hegelion/scripts/mcp_setup.py +150 -0
  31. hegelion/search_providers.py +151 -0
  32. hegelion/training/__init__.py +7 -0
  33. hegelion/training/datasets.py +123 -0
  34. hegelion/training/generator.py +232 -0
  35. hegelion/training/mlx_scu_trainer.py +379 -0
  36. hegelion/training/mlx_trainer.py +181 -0
  37. hegelion/training/unsloth_trainer.py +136 -0
  38. hegelion-0.4.0.dist-info/METADATA +295 -0
  39. hegelion-0.4.0.dist-info/RECORD +43 -0
  40. hegelion-0.4.0.dist-info/WHEEL +5 -0
  41. hegelion-0.4.0.dist-info/entry_points.txt +8 -0
  42. hegelion-0.4.0.dist-info/licenses/LICENSE +21 -0
  43. hegelion-0.4.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python
2
+ """Evaluation CLI for comparing Hegelion benchmark runs."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Optional, Sequence
10
+
11
+ if __package__ is None or __package__ == "": # pragma: no cover - direct execution fallback
12
+ import sys
13
+
14
+ sys.path.insert(0, str(Path(__file__).parent.parent))
15
+
16
+ from hegelion import HegelionResult
17
+
18
+
19
+ def build_parser() -> argparse.ArgumentParser:
20
+ parser = argparse.ArgumentParser(
21
+ description="Compare multiple Hegelion benchmark runs and generate a report."
22
+ )
23
+ parser.add_argument(
24
+ "results_files",
25
+ type=Path,
26
+ nargs="+",
27
+ help="Paths to one or more Hegelion JSONL results files.",
28
+ )
29
+ parser.add_argument(
30
+ "--output",
31
+ type=Path,
32
+ default=None,
33
+ help="Optional path to write the comparison report in Markdown format.",
34
+ )
35
+ return parser
36
+
37
+
38
+ def parse_args(argv: Optional[Sequence[str]] = None) -> argparse.Namespace:
39
+ return build_parser().parse_args(argv)
40
+
41
+
42
+ def load_results(path: Path) -> List[HegelionResult]:
43
+ """Load Hegelion results from a JSONL file."""
44
+ results = []
45
+ with path.open("r", encoding="utf-8") as f:
46
+ for line in f:
47
+ line = line.strip()
48
+ if not line:
49
+ continue
50
+ data = json.loads(line)
51
+ results.append(HegelionResult(**data))
52
+ return results
53
+
54
+
55
+ def analyze_results(results: List[HegelionResult]) -> Dict[str, Any]:
56
+ """Calculate aggregate metrics for a list of results."""
57
+ if not results:
58
+ return {}
59
+
60
+ total_queries = len(results)
61
+ total_contradictions = sum(len(r.contradictions) for r in results)
62
+ total_proposals = sum(len(r.research_proposals) for r in results)
63
+ total_time = sum(r.metadata.get("total_time_ms", 0) for r in results)
64
+
65
+ conflict_scores = []
66
+ for r in results:
67
+ debug = r.metadata.get("debug", {})
68
+ if debug and "internal_conflict_score" in debug:
69
+ conflict_scores.append(debug["internal_conflict_score"])
70
+
71
+ avg_conflict_score = sum(conflict_scores) / len(conflict_scores) if conflict_scores else None
72
+
73
+ return {
74
+ "model": results[0].metadata.get("backend_model", "Unknown"),
75
+ "total_queries": total_queries,
76
+ "avg_contradictions": total_contradictions / total_queries,
77
+ "avg_proposals": total_proposals / total_queries,
78
+ "avg_time_ms": total_time / total_queries,
79
+ "avg_conflict_score": avg_conflict_score,
80
+ }
81
+
82
+
83
+ def generate_report(analysis: List[Dict[str, Any]]) -> str:
84
+ """Generate a Markdown report from the analysis."""
85
+ report = ["# Hegelion Evaluation Report", ""]
86
+
87
+ # Summary Table
88
+ report.append("## Summary")
89
+ report.append(
90
+ "| Model | Queries | Avg. Contradictions | Avg. Proposals | Avg. Time (ms) | Avg. Conflict Score |"
91
+ )
92
+ report.append(
93
+ "|-------|---------|---------------------|----------------|----------------|---------------------|"
94
+ )
95
+ for stats in analysis:
96
+ conflict_score_str = (
97
+ f"{stats['avg_conflict_score']:.3f}"
98
+ if stats["avg_conflict_score"] is not None
99
+ else "N/A"
100
+ )
101
+ report.append(
102
+ f"| {stats['model']} | {stats['total_queries']} | {stats['avg_contradictions']:.2f} | "
103
+ f"{stats['avg_proposals']:.2f} | {stats['avg_time_ms']:.0f} | {conflict_score_str} |"
104
+ )
105
+ report.append("")
106
+
107
+ return "\n".join(report)
108
+
109
+
110
+ def main(argv: Optional[Sequence[str]] = None) -> None:
111
+ args = parse_args(argv)
112
+
113
+ all_analysis = []
114
+ for results_file in args.results_files:
115
+ if not results_file.exists():
116
+ print(f"Error: File not found: {results_file}", file=sys.stderr)
117
+ continue
118
+
119
+ results = load_results(results_file)
120
+ if not results:
121
+ print(f"Warning: No results found in: {results_file}", file=sys.stderr)
122
+ continue
123
+
124
+ analysis = analyze_results(results)
125
+ all_analysis.append(analysis)
126
+
127
+ report = generate_report(all_analysis)
128
+
129
+ if args.output:
130
+ args.output.write_text(report, encoding="utf-8")
131
+ print(f"Report written to {args.output}")
132
+ else:
133
+ print(report)
134
+
135
+
136
+ if __name__ == "__main__": # pragma: no cover - CLI entrypoint
137
+ main()
@@ -0,0 +1,150 @@
1
+ """Hegelion MCP Setup Logic and CLI helper."""
2
+
3
+ import sys
4
+ import json
5
+ import site
6
+ from pathlib import Path
7
+ import hegelion
8
+
9
+
10
+ USAGE_NOTE = """
11
+ Hegelion MCP setup
12
+ ------------------
13
+ Use this helper to generate the MCP snippet for Cursor / Claude Desktop.
14
+
15
+ Examples:
16
+ hegelion-setup-mcp # print JSON snippet
17
+ hegelion-setup-mcp --write # write to ./mcp_config.json
18
+ hegelion-setup-mcp --write "$HOME/Library/Application Support/Claude/claude_desktop_config.json" # macOS Claude Desktop
19
+
20
+ Note: After modifying the config, quit and reopen Claude Desktop for changes to take effect.
21
+ """
22
+
23
+
24
+ def get_python_path():
25
+ """Get the absolute path to the current python interpreter."""
26
+ return sys.executable
27
+
28
+
29
+ def is_installed_in_site_packages():
30
+ """Check if hegelion is installed in site-packages."""
31
+ package_path = Path(hegelion.__file__).parent
32
+ for site_package in site.getsitepackages():
33
+ if str(package_path).startswith(site_package):
34
+ return True
35
+ # Also check user site packages
36
+ if site.getusersitepackages() and str(package_path).startswith(site.getusersitepackages()):
37
+ return True
38
+ return False
39
+
40
+
41
+ def get_project_root():
42
+ """Get the absolute path to the project root (if running from source)."""
43
+ return Path(hegelion.__file__).parent.parent.absolute()
44
+
45
+
46
+ def generate_config(python_path, project_root, is_installed):
47
+ """Generate the MCP config."""
48
+
49
+ env = {}
50
+ if not is_installed:
51
+ # If not installed in site-packages, we likely need PYTHONPATH
52
+ env["PYTHONPATH"] = str(project_root)
53
+
54
+ config = {
55
+ "mcpServers": {
56
+ "hegelion": {
57
+ "command": python_path,
58
+ "args": ["-m", "hegelion.mcp.server"],
59
+ },
60
+ }
61
+ }
62
+
63
+ if env:
64
+ config["mcpServers"]["hegelion"]["env"] = env
65
+
66
+ return config
67
+
68
+
69
+ def print_setup_instructions(dry_run=False):
70
+ python_path = get_python_path()
71
+ is_installed = is_installed_in_site_packages()
72
+ project_root = get_project_root()
73
+
74
+ config = generate_config(python_path, project_root, is_installed)
75
+
76
+ snippet = config["mcpServers"]
77
+ json_output = json.dumps(snippet, indent=2)
78
+
79
+ print("\n" + "=" * 60)
80
+ print("MCP CONFIGURATION SNIPPET")
81
+ print("=" * 60)
82
+ print("Copy the snippet below into your 'Global MCP Settings' (Cursor)")
83
+ print("or your MCP configuration file:")
84
+ print("-" * 60)
85
+ print(json_output)
86
+ print("-" * 60)
87
+
88
+ print(
89
+ "Tools available: dialectical_workflow, dialectical_single_shot, thesis_prompt, antithesis_prompt, synthesis_prompt"
90
+ )
91
+ print("response_style options: json, sections, synthesis_only")
92
+ print("\nCommon config paths:")
93
+ print(" macOS Claude Desktop: ~/Library/Application Support/Claude/claude_desktop_config.json")
94
+ print(" Cursor: ~/.cursor/mcp_config.json")
95
+ print(" Windsurf: ~/.codeium/windsurf/mcp_config.json")
96
+ print("\n⚠️ Restart Required: Quit and reopen Claude Desktop after modifying the config.")
97
+
98
+ if not is_installed:
99
+ print(f"\nNOTE: Detected source installation at {project_root}")
100
+ print("Added PYTHONPATH to ensure the server runs correctly.")
101
+ else:
102
+ print("\nNOTE: Detected installed package.")
103
+
104
+
105
+ def _write_config(target: Path, snippet: dict) -> None:
106
+ target = target.expanduser()
107
+ target.parent.mkdir(parents=True, exist_ok=True)
108
+ if target.exists():
109
+ existing = {}
110
+ try:
111
+ existing = json.loads(target.read_text(encoding="utf-8"))
112
+ except Exception:
113
+ existing = {}
114
+ merged = existing.get("mcpServers", {})
115
+ merged.update(snippet)
116
+ payload = {"mcpServers": merged, **{k: v for k, v in existing.items() if k != "mcpServers"}}
117
+ else:
118
+ payload = {"mcpServers": snippet}
119
+ target.write_text(json.dumps(payload, indent=2), encoding="utf-8")
120
+ print(f"Wrote MCP config to {target}")
121
+
122
+
123
+ def main(argv=None): # pragma: no cover - lightweight CLI
124
+ import argparse
125
+
126
+ parser = argparse.ArgumentParser(
127
+ description="Generate MCP config for Hegelion", epilog=USAGE_NOTE
128
+ )
129
+ parser.add_argument(
130
+ "--write",
131
+ nargs="?",
132
+ const="mcp_config.json",
133
+ help="Write to path (default: mcp_config.json in CWD)",
134
+ )
135
+ args = parser.parse_args(argv)
136
+
137
+ python_path = get_python_path()
138
+ is_installed = is_installed_in_site_packages()
139
+ project_root = get_project_root()
140
+ config = generate_config(python_path, project_root, is_installed)
141
+ snippet = config["mcpServers"]
142
+
143
+ if args.write:
144
+ _write_config(Path(args.write), snippet)
145
+ else:
146
+ print_setup_instructions()
147
+
148
+
149
+ if __name__ == "__main__":
150
+ main()
@@ -0,0 +1,151 @@
1
+ """Search providers for grounding antithesis with real-world information."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import logging
7
+ from abc import ABC, abstractmethod
8
+ from typing import List, Optional
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class SearchProvider(ABC):
14
+ """Abstract base class for search providers."""
15
+
16
+ @abstractmethod
17
+ async def search(self, query: str, max_results: int = 5) -> List[str]:
18
+ """Search for context snippets related to the query.
19
+
20
+ Args:
21
+ query: Search query string
22
+ max_results: Maximum number of results to return
23
+
24
+ Returns:
25
+ List of context snippets
26
+ """
27
+ pass
28
+
29
+
30
+ class TavilySearchProvider(SearchProvider):
31
+ """Tavily search provider - optimized for AI agents."""
32
+
33
+ def __init__(self, api_key: str):
34
+ self.api_key = api_key
35
+ try:
36
+ from tavily import TavilyClient
37
+
38
+ self.client = TavilyClient(api_key=api_key)
39
+ except ImportError:
40
+ raise RuntimeError("tavily-python not installed. Run: pip install tavily-python")
41
+
42
+ async def search(self, query: str, max_results: int = 5) -> List[str]:
43
+ """Search using Tavily's agent-optimized API."""
44
+ try:
45
+ response = self.client.search(
46
+ query=query,
47
+ search_depth="advanced",
48
+ max_results=max_results,
49
+ include_answer=True,
50
+ include_raw_content=False,
51
+ )
52
+
53
+ snippets = []
54
+
55
+ # Add the AI-generated answer if available
56
+ if response.get("answer"):
57
+ snippets.append(f"Summary: {response['answer']}")
58
+
59
+ # Add search results
60
+ for result in response.get("results", []):
61
+ content = result.get("content", "").strip()
62
+ url = result.get("url", "")
63
+ if content:
64
+ snippet = f"Source: {url}\n{content}"
65
+ snippets.append(snippet)
66
+
67
+ return snippets[:max_results]
68
+
69
+ except Exception as e:
70
+ logger.warning(f"Tavily search failed: {e}")
71
+ return []
72
+
73
+
74
+ class DuckDuckGoSearchProvider(SearchProvider):
75
+ """DuckDuckGo search provider - free, no API key required."""
76
+
77
+ def __init__(self):
78
+ try:
79
+ from duckduckgo_search import DDGS
80
+
81
+ self.ddgs = DDGS()
82
+ except ImportError:
83
+ raise RuntimeError(
84
+ "duckduckgo-search not installed. Run: pip install duckduckgo-search"
85
+ )
86
+
87
+ async def search(self, query: str, max_results: int = 5) -> List[str]:
88
+ """Search using DuckDuckGo's free API."""
89
+ try:
90
+ results = self.ddgs.text(keywords=query, max_results=max_results, safesearch="moderate")
91
+
92
+ snippets = []
93
+ for result in results:
94
+ title = result.get("title", "")
95
+ body = result.get("body", "")
96
+ href = result.get("href", "")
97
+
98
+ if body:
99
+ snippet = f"Title: {title}\nSource: {href}\n{body}"
100
+ snippets.append(snippet)
101
+
102
+ return snippets
103
+
104
+ except Exception as e:
105
+ logger.warning(f"DuckDuckGo search failed: {e}")
106
+ return []
107
+
108
+
109
+ def create_search_provider() -> Optional[SearchProvider]:
110
+ """Factory function to create the best available search provider.
111
+
112
+ Returns:
113
+ SearchProvider instance, or None if no providers available
114
+ """
115
+ # Try Tavily first (premium, agent-optimized)
116
+ tavily_key = os.environ.get("TAVILY_API_KEY")
117
+ if tavily_key:
118
+ try:
119
+ logger.info("Using Tavily search provider (premium)")
120
+ return TavilySearchProvider(tavily_key)
121
+ except RuntimeError as e:
122
+ logger.warning(f"Tavily provider failed: {e}")
123
+
124
+ # Fall back to DuckDuckGo (free)
125
+ try:
126
+ logger.info("Using DuckDuckGo search provider (free)")
127
+ return DuckDuckGoSearchProvider()
128
+ except RuntimeError as e:
129
+ logger.warning(f"DuckDuckGo provider failed: {e}")
130
+
131
+ logger.error(
132
+ "No search providers available. Install: pip install duckduckgo-search tavily-python"
133
+ )
134
+ return None
135
+
136
+
137
+ async def search_for_context(query: str, max_results: int = 5) -> List[str]:
138
+ """Search for context snippets to ground the antithesis.
139
+
140
+ Args:
141
+ query: Search query
142
+ max_results: Maximum results to return
143
+
144
+ Returns:
145
+ List of context snippets, empty list if search fails
146
+ """
147
+ provider = create_search_provider()
148
+ if not provider:
149
+ return []
150
+
151
+ return await provider.search(query, max_results)
@@ -0,0 +1,7 @@
1
+ from .datasets import export_training_data, to_dpo_dataset, to_instruction_tuning_dataset
2
+
3
+ __all__ = [
4
+ "export_training_data",
5
+ "to_dpo_dataset",
6
+ "to_instruction_tuning_dataset",
7
+ ]
@@ -0,0 +1,123 @@
1
+ """
2
+ Hegelion Datasets: Tools for converting dialectical results into training data.
3
+
4
+ This module enables Hegelion to be used as a generator for RLAIF (Reinforcement Learning
5
+ from AI Feedback) and DPO (Direct Preference Optimization) datasets.
6
+ """
7
+
8
+ from typing import List, Literal
9
+ import json
10
+ from pathlib import Path
11
+ from hegelion.core.models import HegelionResult
12
+
13
+
14
+ def to_dpo_dataset(
15
+ results: List[HegelionResult],
16
+ output_file: str | Path,
17
+ rejected_source: Literal["thesis", "antithesis", "both"] = "thesis",
18
+ ) -> None:
19
+ """
20
+ Convert a list of Hegelion results into a DPO (Direct Preference Optimization) dataset.
21
+
22
+ Format:
23
+ {
24
+ "prompt": "Query...",
25
+ "chosen": "Synthesis...",
26
+ "rejected": "Thesis/Antithesis..."
27
+ }
28
+
29
+ Args:
30
+ results: List of HegelionResult objects
31
+ output_file: Path to save the .jsonl file
32
+ rejected_source: Which part of the dialectic to treat as the "rejected" (inferior) response.
33
+ - 'thesis': The initial position (good, but not transcendent)
34
+ - 'antithesis': The critique (critical, but one-sided)
35
+ - 'both': Creates two examples per result (one vs thesis, one vs antithesis)
36
+ """
37
+
38
+ dataset = []
39
+
40
+ for res in results:
41
+ # Basic prompt format
42
+ prompt = f"Query: {res.query}\n\nProvide a comprehensive analysis."
43
+
44
+ # The "Chosen" response is always the Synthesis (the transcendent view)
45
+ chosen = res.synthesis
46
+
47
+ rejected_items = []
48
+ if rejected_source == "thesis" or rejected_source == "both":
49
+ rejected_items.append(res.thesis)
50
+
51
+ if rejected_source == "antithesis" or rejected_source == "both":
52
+ rejected_items.append(res.antithesis)
53
+
54
+ for rejected in rejected_items:
55
+ entry = {
56
+ "prompt": prompt,
57
+ "chosen": chosen,
58
+ "rejected": rejected,
59
+ "metadata": {
60
+ "source": "hegelion-synthetic",
61
+ "mode": res.mode,
62
+ "contradictions_found": len(res.contradictions),
63
+ },
64
+ }
65
+ dataset.append(entry)
66
+
67
+ # Write to JSONL
68
+ with open(output_file, "w", encoding="utf-8") as f:
69
+ for entry in dataset:
70
+ f.write(json.dumps(entry, ensure_ascii=False) + "\n")
71
+
72
+ print(f"Exported {len(dataset)} DPO pairs to {output_file}")
73
+
74
+
75
+ def to_instruction_tuning_dataset(results: List[HegelionResult], output_file: str | Path) -> None:
76
+ """
77
+ Convert results into standard instruction tuning format (Alpaca/ShareGPT style).
78
+
79
+ Format:
80
+ {
81
+ "instruction": "Query...",
82
+ "output": "Synthesis..."
83
+ }
84
+ """
85
+ dataset = []
86
+ for res in results:
87
+ entry = {
88
+ "instruction": res.query,
89
+ "input": "",
90
+ "output": res.synthesis,
91
+ "system": "You are a dialectical reasoner capable of synthesizing opposing viewpoints.",
92
+ }
93
+ dataset.append(entry)
94
+
95
+ with open(output_file, "w", encoding="utf-8") as f:
96
+ json.dump(dataset, f, indent=2, ensure_ascii=False)
97
+
98
+ print(f"Exported {len(dataset)} instruction tuning examples to {output_file}")
99
+
100
+
101
+ def export_training_data(
102
+ results: List[HegelionResult],
103
+ output_file: str | Path,
104
+ *,
105
+ format: Literal["dpo", "instruction"] = "dpo",
106
+ rejected_source: Literal["thesis", "antithesis", "both"] = "thesis",
107
+ ) -> None:
108
+ """
109
+ Convenience wrapper for exporting Hegelion results to common training formats.
110
+
111
+ Args:
112
+ results: List of HegelionResult objects.
113
+ output_file: Destination path (``.jsonl`` for DPO, ``.json`` for instruction tuning).
114
+ format: ``"dpo"`` (preference pairs) or ``"instruction"`` (Alpaca-style).
115
+ rejected_source: Which side to treat as the rejected answer for DPO exports.
116
+ """
117
+
118
+ if format == "dpo":
119
+ to_dpo_dataset(results, output_file, rejected_source=rejected_source)
120
+ elif format == "instruction":
121
+ to_instruction_tuning_dataset(results, output_file)
122
+ else:
123
+ raise ValueError("format must be 'dpo' or 'instruction'")