onecoder 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onecoder/agent.py +95 -0
- onecoder/agentic_tool_search/__init__.py +0 -0
- onecoder/agentic_tool_search/dynamic_tool_search.py +64 -0
- onecoder/agentic_tool_search/registry.py +33 -0
- onecoder/agents/__init__.py +7 -0
- onecoder/agents/documentation_agent.py +12 -0
- onecoder/agents/file_reader_agent.py +19 -0
- onecoder/agents/file_writer_agent.py +19 -0
- onecoder/agents/orchestrator_agent.py +51 -0
- onecoder/agents/refactoring_agent.py +12 -0
- onecoder/agents/research_agent.py +31 -0
- onecoder/agents/task_suggestion_agent.py +88 -0
- onecoder/alignment.py +236 -0
- onecoder/api.py +162 -0
- onecoder/api_client.py +112 -0
- onecoder/backends/base.py +22 -0
- onecoder/backends/local_tui.py +65 -0
- onecoder/blackboard.py +102 -0
- onecoder/cli.py +108 -0
- onecoder/commands/__init__.py +1 -0
- onecoder/commands/auth.py +78 -0
- onecoder/commands/ci.py +29 -0
- onecoder/commands/delegate.py +557 -0
- onecoder/commands/doctor.py +40 -0
- onecoder/commands/issue.py +136 -0
- onecoder/commands/logs.py +45 -0
- onecoder/commands/project.py +270 -0
- onecoder/commands/server.py +170 -0
- onecoder/config_manager.py +87 -0
- onecoder/constants.py +9 -0
- onecoder/diagnostics/__init__.py +2 -0
- onecoder/diagnostics/env_scan.py +207 -0
- onecoder/discovery.py +101 -0
- onecoder/distillation.py +236 -0
- onecoder/evaluation/__init__.py +1 -0
- onecoder/evaluation/ttu.py +176 -0
- onecoder/governance/__init__.py +0 -0
- onecoder/governance/probllm.py +91 -0
- onecoder/hooks.py +74 -0
- onecoder/ipc_auth.py +200 -0
- onecoder/issues.py +188 -0
- onecoder/jules_client.py +343 -0
- onecoder/knowledge.py +106 -0
- onecoder/llm.py +61 -0
- onecoder/logger.py +42 -0
- onecoder/metrics.py +129 -0
- onecoder/models/delegation.py +46 -0
- onecoder/onboarding.py +264 -0
- onecoder/review.py +233 -0
- onecoder/services/delegation_service.py +209 -0
- onecoder/services/validation_service.py +104 -0
- onecoder/sessions.py +186 -0
- onecoder/sprint_collector.py +165 -0
- onecoder/sync.py +167 -0
- onecoder/tmux.py +86 -0
- onecoder/tools/__init__.py +10 -0
- onecoder/tools/executor.py +53 -0
- onecoder/tools/external_tools.py +106 -0
- onecoder/tools/file_tools.py +77 -0
- onecoder/tools/interface.py +25 -0
- onecoder/tools/jules_tools.py +122 -0
- onecoder/tools/kit_tools.py +122 -0
- onecoder/tools/registry.py +32 -0
- onecoder/tui/__init__.py +5 -0
- onecoder/tui/app.py +263 -0
- onecoder/tui/commands.py +150 -0
- onecoder/tui/widgets.py +92 -0
- onecoder/worktree.py +186 -0
- onecoder-0.0.2.dist-info/METADATA +17 -0
- onecoder-0.0.2.dist-info/RECORD +73 -0
- onecoder-0.0.2.dist-info/WHEEL +5 -0
- onecoder-0.0.2.dist-info/entry_points.txt +2 -0
- onecoder-0.0.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Dict, Any
|
|
5
|
+
|
|
6
|
+
class ConfigManager:
|
|
7
|
+
def __init__(self):
|
|
8
|
+
self.config_dir = Path.home() / ".onecoder"
|
|
9
|
+
self.config_file = self.config_dir / "config.json"
|
|
10
|
+
self._ensure_config_dir()
|
|
11
|
+
|
|
12
|
+
def _ensure_config_dir(self):
|
|
13
|
+
if not self.config_dir.exists():
|
|
14
|
+
self.config_dir.mkdir(parents=True, exist_ok=True)
|
|
15
|
+
# Set restrictive permissions
|
|
16
|
+
os.chmod(self.config_dir, 0o700)
|
|
17
|
+
|
|
18
|
+
def load_config(self) -> Dict[str, Any]:
|
|
19
|
+
if not self.config_file.exists():
|
|
20
|
+
return {}
|
|
21
|
+
try:
|
|
22
|
+
with open(self.config_file, "r") as f:
|
|
23
|
+
return json.load(f)
|
|
24
|
+
except Exception as e:
|
|
25
|
+
print(f"Error loading config: {e}")
|
|
26
|
+
return {}
|
|
27
|
+
|
|
28
|
+
def save_config(self, config: Dict[str, Any]):
|
|
29
|
+
try:
|
|
30
|
+
with open(self.config_file, "w") as f:
|
|
31
|
+
json.dump(config, f, indent=4)
|
|
32
|
+
# Set restrictive permissions
|
|
33
|
+
os.chmod(self.config_file, 0o600)
|
|
34
|
+
except Exception as e:
|
|
35
|
+
print(f"Error saving config: {e}")
|
|
36
|
+
|
|
37
|
+
def get_token(self) -> Optional[str]:
|
|
38
|
+
config = self.load_config()
|
|
39
|
+
return config.get("api_token")
|
|
40
|
+
|
|
41
|
+
def set_token(self, token: str):
|
|
42
|
+
config = self.load_config()
|
|
43
|
+
config["api_token"] = token
|
|
44
|
+
self.save_config(config)
|
|
45
|
+
|
|
46
|
+
def clear_token(self):
|
|
47
|
+
config = self.load_config()
|
|
48
|
+
if "api_token" in config:
|
|
49
|
+
del config["api_token"]
|
|
50
|
+
self.save_config(config)
|
|
51
|
+
|
|
52
|
+
def get_user(self) -> Optional[Dict[str, Any]]:
|
|
53
|
+
config = self.load_config()
|
|
54
|
+
return config.get("user")
|
|
55
|
+
|
|
56
|
+
def set_user(self, user: Dict[str, Any]):
|
|
57
|
+
config = self.load_config()
|
|
58
|
+
config["user"] = user
|
|
59
|
+
self.save_config(config)
|
|
60
|
+
|
|
61
|
+
def get_model_config(self) -> Optional[Dict[str, Any]]:
|
|
62
|
+
config = self.load_config()
|
|
63
|
+
return config.get("model")
|
|
64
|
+
|
|
65
|
+
def set_model_config(self, model_config: Dict[str, Any]):
|
|
66
|
+
config = self.load_config()
|
|
67
|
+
config["model"] = model_config
|
|
68
|
+
self.save_config(config)
|
|
69
|
+
|
|
70
|
+
def get_github_client_id(self) -> str:
|
|
71
|
+
"""Get the GitHub Client ID from config or environment variable."""
|
|
72
|
+
# Environment variable takes highest precedence
|
|
73
|
+
env_id = os.getenv("GITHUB_CLIENT_ID")
|
|
74
|
+
if env_id:
|
|
75
|
+
return env_id
|
|
76
|
+
|
|
77
|
+
config = self.load_config()
|
|
78
|
+
# Return from config or the production default
|
|
79
|
+
return config.get("github_client_id", "Iv23limfvipYiMLhjhq1")
|
|
80
|
+
|
|
81
|
+
def set_github_client_id(self, client_id: str):
|
|
82
|
+
"""Set the GitHub Client ID in the config file."""
|
|
83
|
+
config = self.load_config()
|
|
84
|
+
config["github_client_id"] = client_id
|
|
85
|
+
self.save_config(config)
|
|
86
|
+
|
|
87
|
+
config_manager = ConfigManager()
|
onecoder/constants.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from .config_manager import config_manager
|
|
3
|
+
|
|
4
|
+
# Default to Production API URL
|
|
5
|
+
ONECODER_API_URL = os.getenv("ONECODER_API_URL", "https://api.onecoder.dev")
|
|
6
|
+
|
|
7
|
+
# Production GitHub App Client ID
|
|
8
|
+
# This is safe to share as it is a public identifier, not a secret.
|
|
9
|
+
GITHUB_CLIENT_ID = config_manager.get_github_client_id()
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Iterable, List, Optional
|
|
7
|
+
|
|
8
|
+
from onecoder.metrics import find_repo_root
|
|
9
|
+
|
|
10
|
+
ENV_FILENAMES = [".env", ".env.local", ".dev.vars"]
|
|
11
|
+
SHARED_SECRETS = ["GITHUB_CLIENT_ID", "JWT_SECRET", "GEMINI_API_KEY"]
|
|
12
|
+
COMPONENT_PATHS: Dict[str, List[Path]] = {
|
|
13
|
+
"onecoder-api": [Path("onecoder-api")],
|
|
14
|
+
"oneadmin": [Path("oneadmin/worker"), Path("oneadmin/client")],
|
|
15
|
+
"oneui": [Path("oneui"), Path("components/devcenter")],
|
|
16
|
+
"onewebsite": [Path("components/onewebsite")],
|
|
17
|
+
"sprint-cli": [Path("sprint-cli")],
|
|
18
|
+
"onecoder-cli": [Path("onecoder-cli")],
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class EnvFinding:
|
|
24
|
+
component: str
|
|
25
|
+
check: str
|
|
26
|
+
status: str
|
|
27
|
+
message: str
|
|
28
|
+
file: Optional[Path] = None
|
|
29
|
+
tt_id: Optional[str] = None
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> Dict[str, str]:
|
|
32
|
+
return {
|
|
33
|
+
"component": self.component,
|
|
34
|
+
"check": self.check,
|
|
35
|
+
"status": self.status,
|
|
36
|
+
"message": self.message,
|
|
37
|
+
"file": str(self.file) if self.file else None,
|
|
38
|
+
"tt_id": self.tt_id,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class EnvDoctor:
|
|
43
|
+
def __init__(self, repo_root: Optional[Path] = None) -> None:
|
|
44
|
+
self.repo_root = repo_root or find_repo_root()
|
|
45
|
+
|
|
46
|
+
def run(self) -> List[EnvFinding]:
|
|
47
|
+
findings: List[EnvFinding] = []
|
|
48
|
+
component_envs: Dict[str, Dict[str, str]] = {}
|
|
49
|
+
|
|
50
|
+
for name, rel_paths in COMPONENT_PATHS.items():
|
|
51
|
+
env_files: List[Path] = []
|
|
52
|
+
for rel_path in rel_paths:
|
|
53
|
+
env_files.extend(self._discover_env_files(self.repo_root / rel_path))
|
|
54
|
+
|
|
55
|
+
if not env_files:
|
|
56
|
+
findings.append(
|
|
57
|
+
EnvFinding(
|
|
58
|
+
component=name,
|
|
59
|
+
check="env_files",
|
|
60
|
+
status="warn",
|
|
61
|
+
message=f"No env files found (searched: {', '.join(str(self.repo_root / p) for p in rel_paths)})",
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
merged_env: Dict[str, str] = {}
|
|
67
|
+
for env_file in env_files:
|
|
68
|
+
try:
|
|
69
|
+
merged_env.update(self._load_env_file(env_file))
|
|
70
|
+
findings.append(
|
|
71
|
+
EnvFinding(
|
|
72
|
+
component=name,
|
|
73
|
+
check="env_files",
|
|
74
|
+
status="pass",
|
|
75
|
+
message=f"Found {env_file.name}",
|
|
76
|
+
file=env_file,
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
except ValueError as exc:
|
|
80
|
+
findings.append(
|
|
81
|
+
EnvFinding(
|
|
82
|
+
component=name,
|
|
83
|
+
check="env_parse",
|
|
84
|
+
status="fail",
|
|
85
|
+
message=f"{env_file}: {exc}",
|
|
86
|
+
file=env_file,
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
component_envs[name] = merged_env
|
|
90
|
+
|
|
91
|
+
for key in SHARED_SECRETS:
|
|
92
|
+
if key not in merged_env:
|
|
93
|
+
findings.append(
|
|
94
|
+
EnvFinding(
|
|
95
|
+
component=name,
|
|
96
|
+
check=key,
|
|
97
|
+
status="warn",
|
|
98
|
+
message=f"{key} missing in {', '.join(f.name for f in env_files)}",
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
findings.extend(self._compare_shared_secrets(component_envs))
|
|
103
|
+
return findings
|
|
104
|
+
|
|
105
|
+
def write_artifact(self, findings: List[EnvFinding]) -> Path:
|
|
106
|
+
debug_dir = self.repo_root / ".onecoder" / "debug"
|
|
107
|
+
debug_dir.mkdir(parents=True, exist_ok=True)
|
|
108
|
+
artifact_path = debug_dir / "doctor-env.json"
|
|
109
|
+
artifact_path.write_text(json.dumps([f.to_dict() for f in findings], indent=2))
|
|
110
|
+
return artifact_path
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def to_json(findings: List[EnvFinding]) -> str:
|
|
114
|
+
return json.dumps([f.to_dict() for f in findings], indent=2)
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def has_failures(findings: List[EnvFinding]) -> bool:
|
|
118
|
+
return any(f.status == "fail" for f in findings)
|
|
119
|
+
|
|
120
|
+
@staticmethod
|
|
121
|
+
def _discover_env_files(component_root: Path) -> List[Path]:
|
|
122
|
+
files: List[Path] = []
|
|
123
|
+
if not component_root.exists():
|
|
124
|
+
return files
|
|
125
|
+
for filename in ENV_FILENAMES:
|
|
126
|
+
for candidate in component_root.rglob(filename):
|
|
127
|
+
if "node_modules" in candidate.parts or ".git" in candidate.parts:
|
|
128
|
+
continue
|
|
129
|
+
files.append(candidate)
|
|
130
|
+
return list(dict.fromkeys(files))
|
|
131
|
+
|
|
132
|
+
@staticmethod
|
|
133
|
+
def _load_env_file(path: Path) -> Dict[str, str]:
|
|
134
|
+
values: Dict[str, str] = {}
|
|
135
|
+
current_key: Optional[str] = None
|
|
136
|
+
multiline_buffer: List[str] = []
|
|
137
|
+
with path.open() as handle:
|
|
138
|
+
for line in handle:
|
|
139
|
+
raw_line = line.rstrip("\n")
|
|
140
|
+
stripped = raw_line.strip()
|
|
141
|
+
|
|
142
|
+
if current_key:
|
|
143
|
+
closing = stripped.endswith('"')
|
|
144
|
+
content = raw_line.rstrip()[:-1] if closing else raw_line
|
|
145
|
+
multiline_buffer.append(content)
|
|
146
|
+
if closing:
|
|
147
|
+
values[current_key] = "\n".join(multiline_buffer)
|
|
148
|
+
current_key = None
|
|
149
|
+
multiline_buffer = []
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
if not stripped or stripped.startswith("#"):
|
|
153
|
+
continue
|
|
154
|
+
if "=" not in stripped:
|
|
155
|
+
raise ValueError("Invalid line (missing '=')")
|
|
156
|
+
key, value = stripped.split("=", 1)
|
|
157
|
+
clean_value = value.strip()
|
|
158
|
+
values[key.strip()] = clean_value.strip('"').strip("'")
|
|
159
|
+
|
|
160
|
+
if clean_value.startswith('"') and not clean_value.endswith('"'):
|
|
161
|
+
current_key = key.strip()
|
|
162
|
+
multiline_buffer.append(clean_value.lstrip('"'))
|
|
163
|
+
if current_key:
|
|
164
|
+
raise ValueError(f"Unterminated multiline value for {current_key}")
|
|
165
|
+
return values
|
|
166
|
+
|
|
167
|
+
def _compare_shared_secrets(
|
|
168
|
+
self, component_envs: Dict[str, Dict[str, str]]
|
|
169
|
+
) -> List[EnvFinding]:
|
|
170
|
+
findings: List[EnvFinding] = []
|
|
171
|
+
for secret in SHARED_SECRETS:
|
|
172
|
+
values: Dict[str, str] = {
|
|
173
|
+
component: envs[secret]
|
|
174
|
+
for component, envs in component_envs.items()
|
|
175
|
+
if secret in envs
|
|
176
|
+
}
|
|
177
|
+
if len(values) <= 1:
|
|
178
|
+
continue
|
|
179
|
+
unique_values = set(values.values())
|
|
180
|
+
if len(unique_values) == 1:
|
|
181
|
+
continue
|
|
182
|
+
|
|
183
|
+
tt_id = "TT-032" if self._is_font_trap(unique_values) and secret == "GITHUB_CLIENT_ID" else None
|
|
184
|
+
message = "; ".join(
|
|
185
|
+
f"{component}={display_value}"
|
|
186
|
+
for component, display_value in values.items()
|
|
187
|
+
)
|
|
188
|
+
findings.append(
|
|
189
|
+
EnvFinding(
|
|
190
|
+
component="shared",
|
|
191
|
+
check=secret,
|
|
192
|
+
status="fail",
|
|
193
|
+
message=f"Mismatched {secret}: {message}",
|
|
194
|
+
tt_id=tt_id,
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
return findings
|
|
198
|
+
|
|
199
|
+
@staticmethod
|
|
200
|
+
def _is_font_trap(values: Iterable[str]) -> bool:
|
|
201
|
+
normalized = {_font_normalize(value) for value in values}
|
|
202
|
+
return len(normalized) < len(set(values))
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _font_normalize(value: str) -> str:
|
|
206
|
+
"""Normalize ambiguous glyphs so I/l mix-ups collapse to the same value."""
|
|
207
|
+
return value.replace("I", "1").replace("l", "1")
|
onecoder/discovery.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import pkgutil
|
|
3
|
+
import inspect
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Any
|
|
8
|
+
from .tools.interface import BaseTool
|
|
9
|
+
from .tools.registry import registry
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
class DiscoveryAgent:
|
|
14
|
+
"""
|
|
15
|
+
DiscoveryAgent scans directories and modules to dynamically register tools.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, tools_package: str = "onecoder.tools"):
|
|
19
|
+
self.tools_package = tools_package
|
|
20
|
+
|
|
21
|
+
def discover_and_register(self):
|
|
22
|
+
"""
|
|
23
|
+
Scans the tools package and register functions.
|
|
24
|
+
Also scans paths in ONECODER_TOOLS_PATH environment variable.
|
|
25
|
+
"""
|
|
26
|
+
logger.info(f"Starting tool discovery in {self.tools_package}")
|
|
27
|
+
|
|
28
|
+
# 1. Discover internal tools
|
|
29
|
+
try:
|
|
30
|
+
package = importlib.import_module(self.tools_package)
|
|
31
|
+
for loader, module_name, is_pkg in pkgutil.walk_packages(package.__path__, package.__name__ + "."):
|
|
32
|
+
if is_pkg:
|
|
33
|
+
continue
|
|
34
|
+
try:
|
|
35
|
+
module = importlib.import_module(module_name)
|
|
36
|
+
self._scan_module(module)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
logger.error(f"Error scanning module {module_name}: {e}")
|
|
39
|
+
except ImportError as e:
|
|
40
|
+
logger.error(f"Could not import tools package {self.tools_package}: {e}")
|
|
41
|
+
|
|
42
|
+
# 2. Discover external tools from ONECODER_TOOLS_PATH
|
|
43
|
+
tools_path = os.environ.get("ONECODER_TOOLS_PATH")
|
|
44
|
+
if tools_path:
|
|
45
|
+
for path in tools_path.split(os.pathsep):
|
|
46
|
+
if not path:
|
|
47
|
+
continue
|
|
48
|
+
p = Path(path).absolute()
|
|
49
|
+
if not p.exists():
|
|
50
|
+
logger.warning(f"External tools path does not exist: {p}")
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
logger.info(f"Scanning external tools path: {p}")
|
|
54
|
+
self._scan_directory(p)
|
|
55
|
+
|
|
56
|
+
def _scan_directory(self, directory: Path):
|
|
57
|
+
"""Scans a directory for .py files and imports them."""
|
|
58
|
+
import sys
|
|
59
|
+
if str(directory) not in sys.path:
|
|
60
|
+
sys.path.insert(0, str(directory))
|
|
61
|
+
|
|
62
|
+
for file in directory.glob("*.py"):
|
|
63
|
+
if file.name.startswith("__"):
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
module_name = file.stem
|
|
67
|
+
try:
|
|
68
|
+
# Use a unique namespace for external tools if needed,
|
|
69
|
+
# but simple import works for discovery
|
|
70
|
+
module = importlib.import_module(module_name)
|
|
71
|
+
self._scan_module(module)
|
|
72
|
+
except Exception as e:
|
|
73
|
+
logger.error(f"Error loading external tool module {module_name}: {e}")
|
|
74
|
+
|
|
75
|
+
def _scan_module(self, module: Any):
|
|
76
|
+
"""Scans a module for functions ending in '_tool'."""
|
|
77
|
+
for name, obj in inspect.getmembers(module):
|
|
78
|
+
if inspect.isfunction(obj) and (name.endswith("_tool") or hasattr(obj, "_is_agentic_tool")):
|
|
79
|
+
# Avoid registering if already exists
|
|
80
|
+
tool_name = getattr(obj, "_tool_name", name.replace("_tool", ""))
|
|
81
|
+
if registry.get_tool(tool_name):
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
# Extract metadata from docstring
|
|
85
|
+
doc = inspect.getdoc(obj) or "No description provided."
|
|
86
|
+
|
|
87
|
+
logger.debug(f"Discovered tool: {tool_name} in {module.__name__}")
|
|
88
|
+
|
|
89
|
+
tool = BaseTool(
|
|
90
|
+
name=tool_name,
|
|
91
|
+
description=doc,
|
|
92
|
+
func=obj
|
|
93
|
+
)
|
|
94
|
+
registry.register(tool)
|
|
95
|
+
|
|
96
|
+
if __name__ == "__main__":
|
|
97
|
+
# Test discovery
|
|
98
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
99
|
+
disco = DiscoveryAgent("onecoder.tools")
|
|
100
|
+
disco.discover_and_register()
|
|
101
|
+
print(f"Registered tools: {[t.name for t in registry.list_tools()]}")
|
onecoder/distillation.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import re
|
|
4
|
+
import datetime
|
|
5
|
+
from typing import Any, List, Dict
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
class SessionCapture:
|
|
9
|
+
"""
|
|
10
|
+
Captures session events (text, tool calls) for later distillation.
|
|
11
|
+
"""
|
|
12
|
+
def __init__(self, directory: str = "."):
|
|
13
|
+
self.directory = directory
|
|
14
|
+
self.logs_dir = os.path.join(directory, ".sprint", "logs")
|
|
15
|
+
|
|
16
|
+
self.current_session_id = None
|
|
17
|
+
self.events: List[Dict[str, Any]] = []
|
|
18
|
+
|
|
19
|
+
def start_session(self, session_id: str):
|
|
20
|
+
os.makedirs(self.logs_dir, exist_ok=True)
|
|
21
|
+
self.current_session_id = session_id
|
|
22
|
+
self.events = []
|
|
23
|
+
|
|
24
|
+
def log_event(self, event_data: Dict[str, Any]):
|
|
25
|
+
"""Logs an event within the current session."""
|
|
26
|
+
event_entry = {
|
|
27
|
+
"timestamp": datetime.datetime.now().isoformat(),
|
|
28
|
+
"data": event_data
|
|
29
|
+
}
|
|
30
|
+
self.events.append(event_entry)
|
|
31
|
+
|
|
32
|
+
def save_session(self):
|
|
33
|
+
"""Saves the current session logs to the .sprint/logs directory."""
|
|
34
|
+
if not self.current_session_id:
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
filename = f"session_{self.current_session_id}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
|
38
|
+
filepath = os.path.join(self.logs_dir, filename)
|
|
39
|
+
|
|
40
|
+
with open(filepath, "w") as f:
|
|
41
|
+
json.dump({
|
|
42
|
+
"session_id": self.current_session_id,
|
|
43
|
+
"events": self.events
|
|
44
|
+
}, f, indent=2)
|
|
45
|
+
|
|
46
|
+
print(f"Session logs saved to {filepath}")
|
|
47
|
+
|
|
48
|
+
def distill_patterns(self) -> str:
|
|
49
|
+
"""
|
|
50
|
+
[v0.1.0 Mechanical] Implementation for pattern capture.
|
|
51
|
+
In the future, this will use an LLM to extract reusable skills.
|
|
52
|
+
"""
|
|
53
|
+
patterns = []
|
|
54
|
+
for event in self.events:
|
|
55
|
+
data = event.get("data", {})
|
|
56
|
+
if "tool_call" in data:
|
|
57
|
+
patterns.append(data["tool_call"])
|
|
58
|
+
|
|
59
|
+
return f"Captured {len(patterns)} tool call patterns."
|
|
60
|
+
|
|
61
|
+
# Singleton instance for the API to use
|
|
62
|
+
capture_engine = SessionCapture()
|
|
63
|
+
|
|
64
|
+
class SprintDistiller:
|
|
65
|
+
"""
|
|
66
|
+
Distills learnings from a sprint and updates project awareness.
|
|
67
|
+
"""
|
|
68
|
+
def __init__(self, project_root: str = "."):
|
|
69
|
+
self.project_root = self._find_repo_root(Path(project_root).absolute())
|
|
70
|
+
self.antigravity_md = self.project_root / "ANTIGRAVITY.md"
|
|
71
|
+
|
|
72
|
+
def _find_repo_root(self, start_path: Path) -> Path:
|
|
73
|
+
"""Traverses upwards to find the repository root."""
|
|
74
|
+
curr = start_path
|
|
75
|
+
while curr != curr.parent:
|
|
76
|
+
if (curr / ".sprint").exists() or (curr / ".git").exists():
|
|
77
|
+
return curr
|
|
78
|
+
curr = curr.parent
|
|
79
|
+
return start_path # Fallback to start_path
|
|
80
|
+
|
|
81
|
+
def distill_sprint(self, sprint_id: str) -> Dict[str, Any]:
|
|
82
|
+
"""
|
|
83
|
+
Analyzes a finished sprint and extracts learnings.
|
|
84
|
+
"""
|
|
85
|
+
sprint_dir = self.project_root / ".sprint" / sprint_id
|
|
86
|
+
retro_path = sprint_dir / "RETRO.md"
|
|
87
|
+
|
|
88
|
+
if not retro_path.exists():
|
|
89
|
+
return {"error": f"RETRO.md not found in {sprint_id}"}
|
|
90
|
+
|
|
91
|
+
content = retro_path.read_text()
|
|
92
|
+
|
|
93
|
+
# Use LLM with mechanical fallback
|
|
94
|
+
try:
|
|
95
|
+
learnings = self._extract_learnings_llm(content)
|
|
96
|
+
except Exception as e:
|
|
97
|
+
print(f"Warning: LLM distillation failed ({e}). Falling back to mechanical extraction.")
|
|
98
|
+
learnings = self._extract_learnings_mechanical(content)
|
|
99
|
+
|
|
100
|
+
if learnings:
|
|
101
|
+
self._update_awareness(sprint_id, learnings)
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
"sprint_id": sprint_id,
|
|
105
|
+
"learnings_extracted": len(learnings),
|
|
106
|
+
"updated": self.antigravity_md.name if learnings else None,
|
|
107
|
+
"method": "llm" if hasattr(self, "_llm_used") and self._llm_used else "mechanical"
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
def _extract_learnings_llm(self, content: str) -> List[str]:
|
|
111
|
+
"""Uses LLM to semantically extract learnings from retro content."""
|
|
112
|
+
import litellm
|
|
113
|
+
import os
|
|
114
|
+
|
|
115
|
+
api_key = os.getenv("OPENROUTER_API_KEY")
|
|
116
|
+
if not api_key:
|
|
117
|
+
raise ValueError("OPENROUTER_API_KEY not found")
|
|
118
|
+
|
|
119
|
+
model = "openrouter/xiaomi/mimo-v2-flash:free"
|
|
120
|
+
|
|
121
|
+
prompt = f"""
|
|
122
|
+
Analyze the following Sprint Retro (RETRO.md) content and extract key engineering/architectural learnings.
|
|
123
|
+
Focus on reusable patterns, failure modes to avoid, and optimized workflows.
|
|
124
|
+
|
|
125
|
+
Format your response EXACTLY as a JSON list of strings. Each string should be a concise, action-oriented learning.
|
|
126
|
+
DO NOT include any other text in your response.
|
|
127
|
+
|
|
128
|
+
RETRO CONTENT:
|
|
129
|
+
{content}
|
|
130
|
+
|
|
131
|
+
Example Output: ["Always use relative paths in sprint-cli", "Verify JWT presence before syncing secrets"]
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
response = litellm.completion(
|
|
135
|
+
model=model,
|
|
136
|
+
messages=[{"role": "user", "content": prompt}],
|
|
137
|
+
api_key=api_key,
|
|
138
|
+
base_url="https://openrouter.ai/api/v1",
|
|
139
|
+
response_format={ "type": "json_object" } # Try to enforce JSON
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
text = response.choices[0].message.content
|
|
143
|
+
try:
|
|
144
|
+
# Handle if LLM returned a JSON object with a 'learnings' key instead of a list
|
|
145
|
+
data = json.loads(text)
|
|
146
|
+
if isinstance(data, list):
|
|
147
|
+
learnings = data
|
|
148
|
+
elif isinstance(data, dict):
|
|
149
|
+
# Look for common keys
|
|
150
|
+
learnings = data.get("learnings", data.get("items", list(data.values())[0]))
|
|
151
|
+
else:
|
|
152
|
+
learnings = []
|
|
153
|
+
except:
|
|
154
|
+
# Fallback: extract anything that looks like a list
|
|
155
|
+
match = re.search(r'\[.*\]', text, re.DOTALL)
|
|
156
|
+
if match:
|
|
157
|
+
learnings = json.loads(match.group(0))
|
|
158
|
+
else:
|
|
159
|
+
raise ValueError("Could not parse LLM response as JSON list")
|
|
160
|
+
|
|
161
|
+
self._llm_used = True
|
|
162
|
+
return learnings
|
|
163
|
+
|
|
164
|
+
def _extract_learnings_mechanical(self, content: str) -> List[str]:
|
|
165
|
+
"""
|
|
166
|
+
Extracts bullet points and numbered items from 'Learnings', 'To Improve', or 'Went Well' sections.
|
|
167
|
+
This is a simple mechanical extraction fallback.
|
|
168
|
+
"""
|
|
169
|
+
learnings = []
|
|
170
|
+
capture = False
|
|
171
|
+
for line in content.split("\n"):
|
|
172
|
+
line_stripped = line.strip()
|
|
173
|
+
# Start capturing after these headers
|
|
174
|
+
if line_stripped.startswith("## ") and any(x in line_stripped for x in ["Learnings", "Learning", "Went Well", "To Improve", "Could Be Improved"]):
|
|
175
|
+
capture = True
|
|
176
|
+
continue
|
|
177
|
+
# Stop if we hit another header or end of section
|
|
178
|
+
elif line_stripped.startswith("## "):
|
|
179
|
+
capture = False
|
|
180
|
+
|
|
181
|
+
if capture:
|
|
182
|
+
# Match bullet points: - text
|
|
183
|
+
if line_stripped.startswith("- "):
|
|
184
|
+
learning = line_stripped[2:].strip()
|
|
185
|
+
if learning:
|
|
186
|
+
learnings.append(learning)
|
|
187
|
+
# Match numbered lists: 1. text, 2. text, etc.
|
|
188
|
+
elif re.match(r'^\d+\.\s+', line_stripped):
|
|
189
|
+
learning = re.sub(r'^\d+\.\s+', '', line_stripped).strip()
|
|
190
|
+
if learning:
|
|
191
|
+
learnings.append(learning)
|
|
192
|
+
|
|
193
|
+
self._llm_used = False
|
|
194
|
+
return learnings
|
|
195
|
+
|
|
196
|
+
def _update_awareness(self, sprint_id: str, learnings: List[str]):
|
|
197
|
+
"""
|
|
198
|
+
Injects learnings into ANTIGRAVITY.md.
|
|
199
|
+
"""
|
|
200
|
+
if not self.antigravity_md.exists():
|
|
201
|
+
return
|
|
202
|
+
|
|
203
|
+
lines = self.antigravity_md.read_text().split("\n")
|
|
204
|
+
new_lines = []
|
|
205
|
+
in_learned_section = False
|
|
206
|
+
injected = False
|
|
207
|
+
|
|
208
|
+
marker = "### 🛡️ Distilled from Sprints"
|
|
209
|
+
|
|
210
|
+
# Check if the marker already exists
|
|
211
|
+
content = self.antigravity_md.read_text()
|
|
212
|
+
if marker not in content:
|
|
213
|
+
# Inject it after ## 🛠️ Environmental Awareness (Learned)
|
|
214
|
+
for line in lines:
|
|
215
|
+
new_lines.append(line)
|
|
216
|
+
if "## 🛠️ Environmental Awareness (Learned)" in line:
|
|
217
|
+
new_lines.append("")
|
|
218
|
+
new_lines.append(marker)
|
|
219
|
+
for l in learnings:
|
|
220
|
+
new_lines.append(f"- {l} (distilled from {sprint_id})")
|
|
221
|
+
new_lines.append("")
|
|
222
|
+
injected = True
|
|
223
|
+
else:
|
|
224
|
+
# Append to the existing section, but avoid duplicates
|
|
225
|
+
content = self.antigravity_md.read_text()
|
|
226
|
+
for line in lines:
|
|
227
|
+
new_lines.append(line)
|
|
228
|
+
if marker in line:
|
|
229
|
+
for l in learnings:
|
|
230
|
+
entry = f"- {l} (distilled from {sprint_id})"
|
|
231
|
+
if entry not in content:
|
|
232
|
+
new_lines.append(entry)
|
|
233
|
+
injected = True
|
|
234
|
+
|
|
235
|
+
if injected:
|
|
236
|
+
self.antigravity_md.write_text("\n".join(new_lines))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .ttu import TTUEvaluator, TTUResult
|