janus-labs 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. cli/__init__.py +1 -0
  2. cli/__main__.py +7 -0
  3. cli/clipboard.py +113 -0
  4. cli/main.py +690 -0
  5. cli/output.py +97 -0
  6. cli/submit.py +270 -0
  7. config/__init__.py +1 -0
  8. config/detection.py +72 -0
  9. forge/__init__.py +5 -0
  10. forge/behavior.py +35 -0
  11. forge/behaviors/BHV-002-refactor-complexity.yaml +25 -0
  12. forge/behaviors/BHV-003-error-handling.yaml +28 -0
  13. gauge/__init__.py +17 -0
  14. gauge/adapter.py +134 -0
  15. gauge/behaviors/__init__.py +11 -0
  16. gauge/behaviors/code_quality.py +73 -0
  17. gauge/behaviors/instruction_adherence.py +52 -0
  18. gauge/behaviors/test_cheating.py +178 -0
  19. gauge/governed_rollout.py +107 -0
  20. gauge/judge.py +179 -0
  21. gauge/qualitative.py +271 -0
  22. gauge/report.py +210 -0
  23. gauge/trust_elasticity.py +172 -0
  24. governance/__init__.py +14 -0
  25. governance/bridge.py +124 -0
  26. governance/memory.py +116 -0
  27. harness/__init__.py +1 -0
  28. harness/artifacts.py +195 -0
  29. harness/executor.py +51 -0
  30. harness/sandbox.py +40 -0
  31. harness/types.py +46 -0
  32. janus_labs/__init__.py +16 -0
  33. janus_labs/__main__.py +37 -0
  34. janus_labs-0.2.0.dist-info/METADATA +316 -0
  35. janus_labs-0.2.0.dist-info/RECORD +80 -0
  36. janus_labs-0.2.0.dist-info/WHEEL +5 -0
  37. janus_labs-0.2.0.dist-info/entry_points.txt +2 -0
  38. janus_labs-0.2.0.dist-info/licenses/LICENSE +201 -0
  39. janus_labs-0.2.0.dist-info/top_level.txt +11 -0
  40. janus_types.py +140 -0
  41. probe/__init__.py +19 -0
  42. probe/discovery.py +194 -0
  43. probe/explorer.py +236 -0
  44. probe/mutations.py +196 -0
  45. probe/tracer.py +193 -0
  46. scaffold/__init__.py +1 -0
  47. scaffold/scorer.py +321 -0
  48. scaffold/templates/BHV-001-test-cheating/.gitignore +4 -0
  49. scaffold/templates/BHV-001-test-cheating/src/__init__.py +0 -0
  50. scaffold/templates/BHV-001-test-cheating/src/calculator.py +24 -0
  51. scaffold/templates/BHV-001-test-cheating/tests/__init__.py +0 -0
  52. scaffold/templates/BHV-001-test-cheating/tests/test_calculator.py +35 -0
  53. scaffold/templates/default/.gitignore +4 -0
  54. scaffold/templates/default/src/__init__.py +0 -0
  55. scaffold/templates/default/src/main.py +23 -0
  56. scaffold/templates/default/tests/__init__.py +0 -0
  57. scaffold/templates/default/tests/test_main.py +32 -0
  58. scaffold/workspace.py +202 -0
  59. scaffold/workspaces/BHV-002-refactor-complexity/src/__init__.py +0 -0
  60. scaffold/workspaces/BHV-002-refactor-complexity/src/pricing.py +72 -0
  61. scaffold/workspaces/BHV-002-refactor-complexity/tests/__init__.py +0 -0
  62. scaffold/workspaces/BHV-002-refactor-complexity/tests/test_pricing.py +72 -0
  63. scaffold/workspaces/BHV-003-error-handling/src/__init__.py +0 -0
  64. scaffold/workspaces/BHV-003-error-handling/src/file_processor.py +100 -0
  65. scaffold/workspaces/BHV-003-error-handling/tests/__init__.py +0 -0
  66. scaffold/workspaces/BHV-003-error-handling/tests/test_file_processor.py +144 -0
  67. suite/__init__.py +16 -0
  68. suite/builtin/__init__.py +13 -0
  69. suite/builtin/hello_world.py +28 -0
  70. suite/builtin/refactor_storm.py +92 -0
  71. suite/comparison.py +274 -0
  72. suite/definition.py +51 -0
  73. suite/export/__init__.py +6 -0
  74. suite/export/github.py +58 -0
  75. suite/export/html.py +160 -0
  76. suite/export/json_export.py +65 -0
  77. suite/registry.py +20 -0
  78. suite/result.py +133 -0
  79. suite/runner.py +110 -0
  80. suite/thresholds.py +80 -0
governance/bridge.py ADDED
@@ -0,0 +1,124 @@
1
+ """Bridge between Janus v3.6 governance and Janus Labs Gauge layer."""
2
+
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+ from pathlib import Path
6
+ import sys
7
+ from typing import List, Optional
8
+
9
+ # Try to import from mcp-janus (full installation)
10
+ # Fall back to stubs for standalone operation
11
+ try:
12
+ MCP_JANUS_PATH = Path(__file__).resolve().parents[2] / "mcp-janus"
13
+ if MCP_JANUS_PATH.exists() and str(MCP_JANUS_PATH) not in sys.path:
14
+ sys.path.insert(0, str(MCP_JANUS_PATH))
15
+ from tools import foundation_check, handle_escalation, infer_confidence # noqa: E402
16
+ except ImportError:
17
+ # Standalone mode - use stubs
18
+ from janus_types import foundation_check, handle_escalation, infer_confidence # noqa: E402
19
+
20
+
21
+ class GovernanceDecision(Enum):
22
+ """Governance decision outcomes."""
23
+ PASS = "pass"
24
+ WARN = "warn"
25
+ HALT = "halt"
26
+
27
+
28
+ @dataclass
29
+ class GovernanceContext:
30
+ """Context for governance evaluation during rollout."""
31
+ rollout_index: int
32
+ behavior_id: str
33
+ current_approach: Optional[str] = None
34
+ approach_history: Optional[List[str]] = None
35
+ reasoning_text: Optional[str] = None
36
+ target_dir: str = "."
37
+
38
+
39
+ @dataclass
40
+ class GovernanceResult:
41
+ """Result of governance check."""
42
+ decision: GovernanceDecision
43
+ trigger: Optional[str]
44
+ signals: dict
45
+ recommendation: str
46
+ escalation_id: Optional[str] = None
47
+ should_halt: bool = False
48
+
49
+
50
+ def _to_decision(result_value: str) -> GovernanceDecision:
51
+ normalized = result_value.upper()
52
+ if normalized == "HALT":
53
+ return GovernanceDecision.HALT
54
+ if normalized == "WARN":
55
+ return GovernanceDecision.WARN
56
+ return GovernanceDecision.PASS
57
+
58
+
59
+ def check_governance(context: GovernanceContext) -> GovernanceResult:
60
+ """
61
+ Evaluate governance signals for a rollout iteration.
62
+
63
+ Integrates Janus v3.6 foundation_check with rollout-specific context.
64
+
65
+ Args:
66
+ context: GovernanceContext with rollout state
67
+
68
+ Returns:
69
+ GovernanceResult with decision and metadata
70
+ """
71
+ confidence = None
72
+ if context.reasoning_text:
73
+ confidence, _ = infer_confidence(context.reasoning_text)
74
+
75
+ confidence_history = None
76
+ if context.approach_history and len(context.approach_history) > 1:
77
+ count = len(context.approach_history)
78
+ confidence_history = [max(0.1, 0.9 - (i * 0.1)) for i in range(count)]
79
+
80
+ same_pattern = bool(context.approach_history and len(context.approach_history) > 1)
81
+
82
+ result = foundation_check(
83
+ iteration_count=context.rollout_index + 1,
84
+ same_pattern=same_pattern,
85
+ merge_ready=False,
86
+ current_approach=context.current_approach,
87
+ approach_history=context.approach_history,
88
+ confidence=confidence,
89
+ confidence_history=confidence_history,
90
+ )
91
+
92
+ if isinstance(result, str):
93
+ if result.startswith("HALT"):
94
+ decision = GovernanceDecision.HALT
95
+ trigger = "iteration"
96
+ elif result.startswith("WARN"):
97
+ decision = GovernanceDecision.WARN
98
+ trigger = "iteration"
99
+ else:
100
+ decision = GovernanceDecision.PASS
101
+ trigger = "none"
102
+ signals = {"iteration": context.rollout_index + 1}
103
+ recommendation = result
104
+ escalation_id = None
105
+ else:
106
+ decision = _to_decision(str(result.get("result", "PASS")))
107
+ trigger = result.get("trigger", "none")
108
+ signals = result.get("signals", {}) if isinstance(result.get("signals"), dict) else {}
109
+ recommendation = result.get("recommendation", "")
110
+ escalation_id = None
111
+
112
+ if decision == GovernanceDecision.HALT:
113
+ esc_result = handle_escalation(result, {"target_dir": context.target_dir})
114
+ if isinstance(esc_result, dict):
115
+ escalation_id = esc_result.get("escalation_id")
116
+
117
+ return GovernanceResult(
118
+ decision=decision,
119
+ trigger=trigger,
120
+ signals=signals,
121
+ recommendation=recommendation,
122
+ escalation_id=escalation_id,
123
+ should_halt=(decision == GovernanceDecision.HALT),
124
+ )
governance/memory.py ADDED
@@ -0,0 +1,116 @@
1
+ """Persistence of governance decisions to Janus memory tiers."""
2
+
3
+ from datetime import datetime, timezone
4
+ from pathlib import Path
5
+ import sys
6
+ from typing import List, Optional
7
+
8
+
9
+ # Try mcp-janus memory module, fallback to local stubs for standalone operation
10
+ MCP_JANUS_PATH = Path(__file__).resolve().parents[2] / "mcp-janus"
11
+ if MCP_JANUS_PATH.exists() and str(MCP_JANUS_PATH) not in sys.path:
12
+ sys.path.insert(0, str(MCP_JANUS_PATH))
13
+
14
+ try:
15
+ from memory import read_tier, write_tier # noqa: E402
16
+ except ImportError:
17
+ from janus_types import read_tier, write_tier # noqa: E402
18
+
19
+ from governance.bridge import GovernanceResult # noqa: E402
20
+
21
+
22
+ def persist_governance_decision(
23
+ result: GovernanceResult,
24
+ behavior_id: str,
25
+ rollout_index: int,
26
+ target_dir: str = ".",
27
+ ) -> bool:
28
+ """
29
+ Persist a governance decision to the governance memory tier.
30
+
31
+ Args:
32
+ result: GovernanceResult from check_governance()
33
+ behavior_id: ID of the behavior being evaluated
34
+ rollout_index: Index of the rollout
35
+ target_dir: Directory containing .janus/
36
+
37
+ Returns:
38
+ True if persisted successfully
39
+ """
40
+ current = read_tier("governance", target_dir)
41
+ if not isinstance(current, dict):
42
+ current = {"schema_version": "1.0.0"}
43
+
44
+ decisions = current.get("governance_decisions", [])
45
+ if not isinstance(decisions, list):
46
+ decisions = []
47
+
48
+ entry = {
49
+ "timestamp": datetime.now(timezone.utc).isoformat(),
50
+ "behavior_id": behavior_id,
51
+ "rollout_index": rollout_index,
52
+ "decision": result.decision.value,
53
+ "trigger": result.trigger,
54
+ "signals": result.signals,
55
+ "escalation_id": result.escalation_id,
56
+ }
57
+
58
+ decisions.append(entry)
59
+
60
+ if len(decisions) > 100:
61
+ decisions = decisions[-100:]
62
+
63
+ current["governance_decisions"] = decisions
64
+ current["last_decision"] = entry
65
+
66
+ success, _errors = write_tier("governance", current, target_dir)
67
+ return success
68
+
69
+
70
+ def get_governance_history(
71
+ behavior_id: Optional[str] = None,
72
+ limit: int = 20,
73
+ target_dir: str = ".",
74
+ ) -> List[dict]:
75
+ """
76
+ Retrieve governance decision history.
77
+
78
+ Args:
79
+ behavior_id: Optional filter by behavior
80
+ limit: Maximum entries to return
81
+ target_dir: Directory containing .janus/
82
+
83
+ Returns:
84
+ List of governance decision entries (most recent first)
85
+ """
86
+ current = read_tier("governance", target_dir)
87
+ if not isinstance(current, dict):
88
+ return []
89
+
90
+ decisions = current.get("governance_decisions", [])
91
+ if not isinstance(decisions, list):
92
+ return []
93
+
94
+ if behavior_id:
95
+ decisions = [d for d in decisions if d.get("behavior_id") == behavior_id]
96
+
97
+ return list(reversed(decisions[-limit:]))
98
+
99
+
100
+ def get_halt_count(behavior_id: Optional[str] = None, target_dir: str = ".") -> int:
101
+ """
102
+ Count HALT decisions for governance statistics.
103
+
104
+ Args:
105
+ behavior_id: Optional filter by behavior
106
+ target_dir: Directory containing .janus/
107
+
108
+ Returns:
109
+ Count of HALT decisions
110
+ """
111
+ history = get_governance_history(
112
+ behavior_id=behavior_id,
113
+ limit=100,
114
+ target_dir=target_dir,
115
+ )
116
+ return sum(1 for entry in history if entry.get("decision") == "halt")
harness/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Harness components for Janus Labs."""
harness/artifacts.py ADDED
@@ -0,0 +1,195 @@
1
+ """Artifact collection for Janus Labs harness."""
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+ import re
7
+ import subprocess
8
+ import time
9
+ from typing import Any, Callable
10
+
11
+ from .types import RunArtifactBundle, Message, ToolInvocation, GitDiff, TestReport, Timings
12
+
13
+
14
+ def _run_git(args: list[str], cwd: Path) -> str | None:
15
+ try:
16
+ result = subprocess.run(
17
+ ["git", *args],
18
+ cwd=str(cwd),
19
+ capture_output=True,
20
+ text=True,
21
+ check=True,
22
+ )
23
+ except (FileNotFoundError, subprocess.CalledProcessError):
24
+ return None
25
+ return result.stdout
26
+
27
+
28
+ class ArtifactCollector:
29
+ """
30
+ Collects all components of a RunArtifactBundle during agent execution.
31
+ """
32
+
33
+ def __init__(self):
34
+ self.messages: list[Message] = []
35
+ self.tool_traces: list[ToolInvocation] = []
36
+ self.start_time: float = time.perf_counter()
37
+ self.tool_time_ms: int = 0
38
+ self.repo_diff: GitDiff | None = None
39
+ self.test_results: TestReport | None = None
40
+
41
+ def _timestamp(self) -> str:
42
+ return datetime.now(timezone.utc).isoformat()
43
+
44
+ def record_message(self, role: str, content: str) -> None:
45
+ """Record a conversation message."""
46
+ message: Message = {
47
+ "role": role,
48
+ "content": content,
49
+ "timestamp": self._timestamp(),
50
+ }
51
+ self.messages.append(message)
52
+
53
+ def record_tool_call(self, tool_name: str, args: dict, result: Any, duration_ms: int) -> None:
54
+ """Record a tool invocation with timing."""
55
+ if not isinstance(result, (str, dict)):
56
+ result = str(result)
57
+ trace: ToolInvocation = {
58
+ "tool_name": tool_name,
59
+ "arguments": args,
60
+ "result": result,
61
+ "duration_ms": max(int(duration_ms), 0),
62
+ "timestamp": self._timestamp(),
63
+ }
64
+ self.tool_traces.append(trace)
65
+ self.tool_time_ms += trace["duration_ms"]
66
+
67
+ def capture_git_diff(self, repo_path: str) -> GitDiff:
68
+ """Capture git diff from repo."""
69
+ repo = Path(repo_path).resolve()
70
+ files_changed: list[str] = []
71
+ insertions = 0
72
+ deletions = 0
73
+ patch = ""
74
+
75
+ try:
76
+ numstat = _run_git(["diff", "--numstat"], repo)
77
+ if numstat:
78
+ for line in numstat.splitlines():
79
+ parts = line.split("\t")
80
+ if len(parts) >= 3:
81
+ ins, dels, file_path = parts[0], parts[1], parts[2]
82
+ if ins.isdigit():
83
+ insertions += int(ins)
84
+ if dels.isdigit():
85
+ deletions += int(dels)
86
+ files_changed.append(file_path)
87
+
88
+ name_only = _run_git(["diff", "--name-only"], repo)
89
+ if name_only is not None:
90
+ files_changed = [line for line in name_only.splitlines() if line.strip()]
91
+
92
+ patch = _run_git(["diff"], repo) or ""
93
+ except Exception:
94
+ files_changed = []
95
+ insertions = 0
96
+ deletions = 0
97
+ patch = ""
98
+
99
+ diff: GitDiff = {
100
+ "files_changed": files_changed,
101
+ "insertions": insertions,
102
+ "deletions": deletions,
103
+ "patch": patch,
104
+ }
105
+ self.repo_diff = diff
106
+ return diff
107
+
108
+ def capture_test_results(self, test_output: str, framework: str = "pytest") -> TestReport:
109
+ """Parse test output into TestReport."""
110
+ passed = 0
111
+ failed = 0
112
+ skipped = 0
113
+
114
+ if framework == "pytest":
115
+ # Search the full output for the summary line
116
+ # The summary is at the end: "7 passed in 0.02s" or "3 passed, 1 failed"
117
+ passed_match = re.search(r"(\d+)\s+passed", test_output)
118
+ failed_match = re.search(r"(\d+)\s+failed", test_output)
119
+ skipped_match = re.search(r"(\d+)\s+skipped", test_output)
120
+ if passed_match:
121
+ passed = int(passed_match.group(1))
122
+ if failed_match:
123
+ failed = int(failed_match.group(1))
124
+ if skipped_match:
125
+ skipped = int(skipped_match.group(1))
126
+
127
+ report: TestReport = {
128
+ "framework": framework if framework in {"pytest", "jest", "other"} else "other",
129
+ "passed": passed,
130
+ "failed": failed,
131
+ "skipped": skipped,
132
+ "output": test_output,
133
+ }
134
+ self.test_results = report
135
+ return report
136
+
137
+ def create_tool_wrapper(self, original_tool: Callable) -> Callable:
138
+ """
139
+ Wrap a tool function to automatically record invocations.
140
+ """
141
+ def wrapped(*args, **kwargs):
142
+ start = time.perf_counter()
143
+ result = original_tool(*args, **kwargs)
144
+ duration_ms = int((time.perf_counter() - start) * 1000)
145
+ call_args = {"args": list(args), "kwargs": kwargs}
146
+ self.record_tool_call(original_tool.__name__, call_args, result, duration_ms)
147
+ return result
148
+
149
+ return wrapped
150
+
151
+ def finalize(self, exit_code: str) -> RunArtifactBundle:
152
+ """
153
+ Finalize and return complete bundle.
154
+
155
+ Guarantees:
156
+ - All 5 components present
157
+ - No None values in required fields
158
+ """
159
+ tool_time_ms = max(int(self.tool_time_ms), 1)
160
+ elapsed_ms = int((time.perf_counter() - self.start_time) * 1000)
161
+ total_ms = max(elapsed_ms, tool_time_ms, 1)
162
+ model_time_ms = max(total_ms - tool_time_ms, 1)
163
+
164
+ timings: Timings = {
165
+ "total_ms": total_ms,
166
+ "tool_time_ms": tool_time_ms,
167
+ "model_time_ms": model_time_ms,
168
+ }
169
+
170
+ repo_diff = self.repo_diff or {
171
+ "files_changed": [],
172
+ "insertions": 0,
173
+ "deletions": 0,
174
+ "patch": "",
175
+ }
176
+ test_results = self.test_results or {
177
+ "framework": "pytest",
178
+ "passed": 0,
179
+ "failed": 0,
180
+ "skipped": 0,
181
+ "output": "",
182
+ }
183
+
184
+ if exit_code not in {"success", "timeout", "crash", "halt"}:
185
+ exit_code = "crash"
186
+
187
+ bundle: RunArtifactBundle = {
188
+ "transcript": list(self.messages),
189
+ "tool_traces": list(self.tool_traces),
190
+ "repo_diff": repo_diff,
191
+ "test_results": test_results,
192
+ "timings": timings,
193
+ "exit_code": exit_code,
194
+ }
195
+ return bundle
harness/executor.py ADDED
@@ -0,0 +1,51 @@
1
+ """Execution helpers for Janus Labs harness."""
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+ import subprocess
6
+ from typing import Sequence
7
+
8
+
9
+ def _run_git(args: Sequence[str], cwd: Path) -> bool:
10
+ try:
11
+ subprocess.run(
12
+ ["git", *args],
13
+ cwd=str(cwd),
14
+ capture_output=True,
15
+ text=True,
16
+ check=True,
17
+ )
18
+ except (FileNotFoundError, subprocess.CalledProcessError):
19
+ return False
20
+ return True
21
+
22
+
23
+ def init_fixture(fixture_path: str) -> bool:
24
+ """
25
+ Initialize fixture repo to clean state.
26
+
27
+ Guarantees:
28
+ - git reset --hard HEAD
29
+ - git clean -fd (remove untracked files)
30
+ - Returns True if successful
31
+
32
+ Args:
33
+ fixture_path: Absolute path to fixture repo
34
+
35
+ Returns:
36
+ bool: True if initialization succeeded
37
+ """
38
+ repo_path = Path(fixture_path).resolve()
39
+ if not repo_path.exists():
40
+ return False
41
+
42
+ if not (repo_path / ".git").exists():
43
+ return False
44
+
45
+ if not _run_git(["reset", "--hard", "HEAD"], repo_path):
46
+ return False
47
+
48
+ if not _run_git(["clean", "-fd"], repo_path):
49
+ return False
50
+
51
+ return True
harness/sandbox.py ADDED
@@ -0,0 +1,40 @@
1
+ """Filesystem sandbox enforcement for agent writes."""
2
+ from pathlib import Path
3
+ from typing import Set
4
+
5
+
6
+ class Sandbox:
7
+ """
8
+ Filesystem sandbox that tracks and validates all write operations.
9
+ """
10
+
11
+ def __init__(self, allowed_paths: list[str]):
12
+ """
13
+ Args:
14
+ allowed_paths: List of absolute paths agent can write to
15
+ """
16
+ self.allowed_paths: Set[Path] = {Path(p).resolve() for p in allowed_paths}
17
+ self.write_log: list[Path] = []
18
+
19
+ def validate_write(self, path: str) -> bool:
20
+ """
21
+ Check if path is within allowed sandbox.
22
+
23
+ Returns:
24
+ bool: True if write is allowed
25
+ """
26
+ candidate = Path(path).resolve()
27
+ for allowed in self.allowed_paths:
28
+ if candidate == allowed or allowed in candidate.parents:
29
+ return True
30
+
31
+ self.write_log.append(candidate)
32
+ return False
33
+
34
+ def get_violations(self) -> list[Path]:
35
+ """Return list of paths written outside sandbox."""
36
+ return list(self.write_log)
37
+
38
+ def is_clean(self) -> bool:
39
+ """Return True if no violations occurred."""
40
+ return len(self.write_log) == 0
harness/types.py ADDED
@@ -0,0 +1,46 @@
1
+ """Typed structures for RunArtifactBundle capture."""
2
+ from typing import TypedDict, Literal
3
+
4
+
5
+ class Message(TypedDict):
6
+ role: Literal["user", "assistant", "system"]
7
+ content: str
8
+ timestamp: str # ISO8601
9
+
10
+
11
+ class ToolInvocation(TypedDict):
12
+ tool_name: str
13
+ arguments: dict
14
+ result: str | dict
15
+ duration_ms: int
16
+ timestamp: str # ISO8601
17
+
18
+
19
+ class GitDiff(TypedDict):
20
+ files_changed: list[str]
21
+ insertions: int
22
+ deletions: int
23
+ patch: str # Full diff output
24
+
25
+
26
+ class TestReport(TypedDict):
27
+ framework: Literal["pytest", "jest", "other"]
28
+ passed: int
29
+ failed: int
30
+ skipped: int
31
+ output: str # Full test output
32
+
33
+
34
+ class Timings(TypedDict):
35
+ total_ms: int
36
+ tool_time_ms: int
37
+ model_time_ms: int
38
+
39
+
40
+ class RunArtifactBundle(TypedDict):
41
+ transcript: list[Message]
42
+ tool_traces: list[ToolInvocation]
43
+ repo_diff: GitDiff
44
+ test_results: TestReport
45
+ timings: Timings
46
+ exit_code: Literal["success", "timeout", "crash", "halt"]
janus_labs/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ """
2
+ Janus Labs - 3DMark for AI Agents.
3
+
4
+ Benchmark and measure AI coding agent reliability with standardized,
5
+ reproducible tests.
6
+
7
+ This module provides the Python API. For CLI usage:
8
+ python -m janus_labs run --suite refactor-storm
9
+
10
+ Or if janus-labs is in your PATH:
11
+ janus-labs run --suite refactor-storm
12
+ """
13
+
14
+ __version__ = "0.2.0"
15
+
16
+ __all__ = ["__version__"]
janus_labs/__main__.py ADDED
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Module entry point for janus-labs CLI.
4
+
5
+ Enables running the CLI via:
6
+ python -m janus_labs <command> [args]
7
+
8
+ This is the recommended fallback if 'janus-labs' is not in your PATH.
9
+
10
+ Examples:
11
+ python -m janus_labs run --suite refactor-storm
12
+ python -m janus_labs bench --submit
13
+ python -m janus_labs submit result.json --github myhandle
14
+ """
15
+
16
+ import sys
17
+
18
+
19
+ def main():
20
+ """Entry point that delegates to CLI main."""
21
+ try:
22
+ from cli.main import main as cli_main
23
+ return cli_main()
24
+ except ImportError as e:
25
+ # Provide helpful error if dependencies are missing
26
+ print(f"Error: {e}", file=sys.stderr)
27
+ print(file=sys.stderr)
28
+ print("Janus Labs requires additional dependencies.", file=sys.stderr)
29
+ print("Install with: pip install janus-labs", file=sys.stderr)
30
+ print(file=sys.stderr)
31
+ print("If you've already installed, ensure you're using the correct Python:", file=sys.stderr)
32
+ print(f" Current: {sys.executable}", file=sys.stderr)
33
+ return 1
34
+
35
+
36
+ if __name__ == "__main__":
37
+ sys.exit(main())