codeledger 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeledger/__init__.py +4 -0
- codeledger/__main__.py +6 -0
- codeledger/classifier/__init__.py +21 -0
- codeledger/classifier/deferred.py +143 -0
- codeledger/classifier/rules.py +116 -0
- codeledger/classifier/session.py +60 -0
- codeledger/classifier/slm.py +19 -0
- codeledger/cli.py +510 -0
- codeledger/compressor/__init__.py +15 -0
- codeledger/compressor/scope_engine.py +73 -0
- codeledger/compressor/token_compressor.py +138 -0
- codeledger/config/__init__.py +19 -0
- codeledger/config/loader.py +141 -0
- codeledger/config/presets/cli_tool.yaml +62 -0
- codeledger/config/presets/data_pipeline.yaml +67 -0
- codeledger/config/presets/fullstack.yaml +80 -0
- codeledger/config/presets/minimal.yaml +48 -0
- codeledger/config/presets/ml_research.yaml +73 -0
- codeledger/config/presets/python_api.yaml +86 -0
- codeledger/config/presets/react_frontend.yaml +70 -0
- codeledger/config/schema.py +212 -0
- codeledger/generator/__init__.py +15 -0
- codeledger/generator/api_client.py +104 -0
- codeledger/generator/local_client.py +72 -0
- codeledger/generator/model_router.py +61 -0
- codeledger/generator/prompt_builder.py +125 -0
- codeledger/merge/__init__.py +15 -0
- codeledger/merge/deduplicator.py +100 -0
- codeledger/merge/extractor.py +131 -0
- codeledger/merge/merge_engine.py +127 -0
- codeledger/models/__init__.py +3 -0
- codeledger/models/inference.py +0 -0
- codeledger/parser/__init__.py +39 -0
- codeledger/parser/base.py +136 -0
- codeledger/parser/fallback.py +122 -0
- codeledger/parser/java_parser.py +10 -0
- codeledger/parser/js_parser.py +10 -0
- codeledger/parser/python_parser.py +239 -0
- codeledger/postprocess/__init__.py +23 -0
- codeledger/postprocess/file_manager.py +183 -0
- codeledger/postprocess/formatter.py +114 -0
- codeledger/postprocess/validator.py +130 -0
- codeledger/scanner/__init__.py +30 -0
- codeledger/scanner/change_dag.py +228 -0
- codeledger/scanner/dependency.py +124 -0
- codeledger/scanner/file_scanner.py +207 -0
- codeledger/scanner/snapshot.py +278 -0
- codeledger/templates/doc_template.md.j2 +23 -0
- codeledger/templates/merge_template.md.j2 +17 -0
- codeledger/templates/prompt_templates/merge.txt +35 -0
- codeledger/templates/prompt_templates/micro.txt +20 -0
- codeledger/templates/prompt_templates/refactor.txt +30 -0
- codeledger/templates/prompt_templates/standard.txt +19 -0
- codeledger-0.1.0.dist-info/METADATA +296 -0
- codeledger-0.1.0.dist-info/RECORD +58 -0
- codeledger-0.1.0.dist-info/WHEEL +4 -0
- codeledger-0.1.0.dist-info/entry_points.txt +2 -0
- codeledger-0.1.0.dist-info/licenses/LICENSE +21 -0
codeledger/__init__.py
ADDED
codeledger/__main__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Classifier package — session classification and change deferral."""
|
|
2
|
+
|
|
3
|
+
from codeledger.classifier.session import (
|
|
4
|
+
SessionClassification,
|
|
5
|
+
SessionType,
|
|
6
|
+
classify_session,
|
|
7
|
+
)
|
|
8
|
+
from codeledger.classifier.deferred import (
|
|
9
|
+
PendingChanges,
|
|
10
|
+
load_pending,
|
|
11
|
+
save_pending,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"PendingChanges",
|
|
16
|
+
"SessionClassification",
|
|
17
|
+
"SessionType",
|
|
18
|
+
"classify_session",
|
|
19
|
+
"load_pending",
|
|
20
|
+
"save_pending",
|
|
21
|
+
]
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""Deferred change accumulator — buffers trivial sessions until flush threshold."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from ruamel.yaml import YAML
|
|
11
|
+
|
|
12
|
+
from codeledger.scanner.change_dag import ChangeMetrics
|
|
13
|
+
|
|
14
|
+
yaml = YAML()
|
|
15
|
+
yaml.default_flow_style = False
|
|
16
|
+
|
|
17
|
+
PENDING_FILE = ".codeledger/.pending_changes.yaml"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class DeferredSession:
|
|
22
|
+
"""Record of a deferred trivial session."""
|
|
23
|
+
|
|
24
|
+
session_id: str
|
|
25
|
+
timestamp: str
|
|
26
|
+
files_changed: list[str]
|
|
27
|
+
summary: str
|
|
28
|
+
magnitude: float
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class PendingChanges:
|
|
33
|
+
"""Accumulated deferred changes waiting to be flushed."""
|
|
34
|
+
|
|
35
|
+
pending_sessions: list[DeferredSession] = field(default_factory=list)
|
|
36
|
+
accumulated_magnitude: float = 0.0
|
|
37
|
+
sessions_deferred: int = 0
|
|
38
|
+
|
|
39
|
+
def should_flush(
|
|
40
|
+
self,
|
|
41
|
+
flush_threshold: float = 0.40,
|
|
42
|
+
max_deferred: int = 5,
|
|
43
|
+
) -> bool:
|
|
44
|
+
"""Check if accumulated changes should trigger a documentation flush."""
|
|
45
|
+
if self.sessions_deferred >= max_deferred:
|
|
46
|
+
return True
|
|
47
|
+
if self.accumulated_magnitude >= flush_threshold:
|
|
48
|
+
return True
|
|
49
|
+
return False
|
|
50
|
+
|
|
51
|
+
def add_session(
|
|
52
|
+
self,
|
|
53
|
+
changed_paths: list[str],
|
|
54
|
+
metrics: ChangeMetrics,
|
|
55
|
+
summary: str = "",
|
|
56
|
+
) -> None:
|
|
57
|
+
"""Add a deferred session to the pending buffer."""
|
|
58
|
+
now = datetime.now(timezone.utc)
|
|
59
|
+
session_id = f"deferred_{now.strftime('%Y%m%d_%H%M%S')}"
|
|
60
|
+
|
|
61
|
+
if not summary:
|
|
62
|
+
summary = (
|
|
63
|
+
f"{metrics.files_changed} files, "
|
|
64
|
+
f"{metrics.lines_added}+ / {metrics.lines_removed}- lines"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
session = DeferredSession(
|
|
68
|
+
session_id=session_id,
|
|
69
|
+
timestamp=now.isoformat(),
|
|
70
|
+
files_changed=changed_paths,
|
|
71
|
+
summary=summary,
|
|
72
|
+
magnitude=metrics.total_magnitude,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
self.pending_sessions.append(session)
|
|
76
|
+
self.accumulated_magnitude += metrics.total_magnitude
|
|
77
|
+
self.sessions_deferred += 1
|
|
78
|
+
|
|
79
|
+
def flush(self) -> list[DeferredSession]:
|
|
80
|
+
"""Flush all pending sessions and return them. Resets the buffer."""
|
|
81
|
+
flushed = list(self.pending_sessions)
|
|
82
|
+
self.pending_sessions = []
|
|
83
|
+
self.accumulated_magnitude = 0.0
|
|
84
|
+
self.sessions_deferred = 0
|
|
85
|
+
return flushed
|
|
86
|
+
|
|
87
|
+
def to_dict(self) -> dict:
|
|
88
|
+
return {
|
|
89
|
+
"pending_sessions": [
|
|
90
|
+
{
|
|
91
|
+
"session_id": s.session_id,
|
|
92
|
+
"timestamp": s.timestamp,
|
|
93
|
+
"files_changed": s.files_changed,
|
|
94
|
+
"summary": s.summary,
|
|
95
|
+
"magnitude": s.magnitude,
|
|
96
|
+
}
|
|
97
|
+
for s in self.pending_sessions
|
|
98
|
+
],
|
|
99
|
+
"accumulated_magnitude": round(self.accumulated_magnitude, 4),
|
|
100
|
+
"sessions_deferred": self.sessions_deferred,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def from_dict(cls, data: dict) -> "PendingChanges":
|
|
105
|
+
sessions = [
|
|
106
|
+
DeferredSession(
|
|
107
|
+
session_id=s["session_id"],
|
|
108
|
+
timestamp=s["timestamp"],
|
|
109
|
+
files_changed=s.get("files_changed", []),
|
|
110
|
+
summary=s.get("summary", ""),
|
|
111
|
+
magnitude=s.get("magnitude", 0.0),
|
|
112
|
+
)
|
|
113
|
+
for s in data.get("pending_sessions", [])
|
|
114
|
+
]
|
|
115
|
+
return cls(
|
|
116
|
+
pending_sessions=sessions,
|
|
117
|
+
accumulated_magnitude=data.get("accumulated_magnitude", 0.0),
|
|
118
|
+
sessions_deferred=data.get("sessions_deferred", 0),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def load_pending(project_root: Path) -> PendingChanges:
|
|
123
|
+
"""Load pending changes from disk."""
|
|
124
|
+
filepath = project_root / PENDING_FILE
|
|
125
|
+
if not filepath.exists():
|
|
126
|
+
return PendingChanges()
|
|
127
|
+
|
|
128
|
+
with open(filepath, "r", encoding="utf-8") as f:
|
|
129
|
+
data = yaml.load(f)
|
|
130
|
+
|
|
131
|
+
if not data:
|
|
132
|
+
return PendingChanges()
|
|
133
|
+
|
|
134
|
+
return PendingChanges.from_dict(data)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def save_pending(project_root: Path, pending: PendingChanges) -> None:
|
|
138
|
+
"""Save pending changes to disk."""
|
|
139
|
+
filepath = project_root / PENDING_FILE
|
|
140
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
141
|
+
|
|
142
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
143
|
+
yaml.dump(pending.to_dict(), f)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Rule-based session classifier — MVP classifier using configurable thresholds."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from codeledger.classifier.session import (
|
|
6
|
+
SESSION_BUDGETS,
|
|
7
|
+
SessionClassification,
|
|
8
|
+
SessionType,
|
|
9
|
+
)
|
|
10
|
+
from codeledger.scanner.change_dag import ChangeMetrics
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def classify_with_rules(
|
|
14
|
+
metrics: ChangeMetrics,
|
|
15
|
+
trivial_max_files: int = 2,
|
|
16
|
+
trivial_max_lines: int = 30,
|
|
17
|
+
minor_max_files: int = 5,
|
|
18
|
+
minor_max_lines: int = 150,
|
|
19
|
+
standard_max_files: int = 15,
|
|
20
|
+
standard_max_lines: int = 500,
|
|
21
|
+
) -> SessionClassification:
|
|
22
|
+
"""Classify a session using rule-based thresholds.
|
|
23
|
+
|
|
24
|
+
Decision order:
|
|
25
|
+
1. Refactor detection (deletes + creates with low net change)
|
|
26
|
+
2. Trivial (very small changes)
|
|
27
|
+
3. Minor / Standard / Major (by size)
|
|
28
|
+
"""
|
|
29
|
+
net_lines = metrics.lines_added + metrics.lines_removed
|
|
30
|
+
|
|
31
|
+
# --- Refactor detection ---
|
|
32
|
+
if (
|
|
33
|
+
metrics.files_deleted > 0
|
|
34
|
+
and metrics.new_files_created > 0
|
|
35
|
+
and abs(metrics.net_lines) < 50
|
|
36
|
+
and metrics.files_changed >= 3
|
|
37
|
+
):
|
|
38
|
+
budgets = SESSION_BUDGETS[SessionType.REFACTOR]
|
|
39
|
+
return SessionClassification(
|
|
40
|
+
session_type=SessionType.REFACTOR,
|
|
41
|
+
confidence=0.85,
|
|
42
|
+
input_token_budget=budgets[0],
|
|
43
|
+
output_token_budget=budgets[1],
|
|
44
|
+
reason=(
|
|
45
|
+
f"Refactor detected: {metrics.files_deleted} deleted, "
|
|
46
|
+
f"{metrics.new_files_created} created, "
|
|
47
|
+
f"low net line change ({metrics.net_lines})"
|
|
48
|
+
),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# --- Trivial ---
|
|
52
|
+
if (
|
|
53
|
+
metrics.files_changed <= trivial_max_files
|
|
54
|
+
and net_lines <= trivial_max_lines
|
|
55
|
+
and metrics.new_files_created == 0
|
|
56
|
+
and not metrics.has_structural_changes
|
|
57
|
+
):
|
|
58
|
+
budgets = SESSION_BUDGETS[SessionType.TRIVIAL]
|
|
59
|
+
return SessionClassification(
|
|
60
|
+
session_type=SessionType.TRIVIAL,
|
|
61
|
+
confidence=0.90,
|
|
62
|
+
input_token_budget=budgets[0],
|
|
63
|
+
output_token_budget=budgets[1],
|
|
64
|
+
reason=(
|
|
65
|
+
f"Trivial: {metrics.files_changed} files, "
|
|
66
|
+
f"{net_lines} lines changed"
|
|
67
|
+
),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# --- Minor ---
|
|
71
|
+
if (
|
|
72
|
+
metrics.files_changed <= minor_max_files
|
|
73
|
+
and net_lines <= minor_max_lines
|
|
74
|
+
):
|
|
75
|
+
budgets = SESSION_BUDGETS[SessionType.MINOR]
|
|
76
|
+
return SessionClassification(
|
|
77
|
+
session_type=SessionType.MINOR,
|
|
78
|
+
confidence=0.85,
|
|
79
|
+
input_token_budget=budgets[0],
|
|
80
|
+
output_token_budget=budgets[1],
|
|
81
|
+
reason=(
|
|
82
|
+
f"Minor: {metrics.files_changed} files, "
|
|
83
|
+
f"{net_lines} lines changed"
|
|
84
|
+
),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# --- Standard ---
|
|
88
|
+
if (
|
|
89
|
+
metrics.files_changed <= standard_max_files
|
|
90
|
+
and net_lines <= standard_max_lines
|
|
91
|
+
):
|
|
92
|
+
budgets = SESSION_BUDGETS[SessionType.STANDARD]
|
|
93
|
+
return SessionClassification(
|
|
94
|
+
session_type=SessionType.STANDARD,
|
|
95
|
+
confidence=0.85,
|
|
96
|
+
input_token_budget=budgets[0],
|
|
97
|
+
output_token_budget=budgets[1],
|
|
98
|
+
reason=(
|
|
99
|
+
f"Standard: {metrics.files_changed} files, "
|
|
100
|
+
f"{net_lines} lines changed"
|
|
101
|
+
),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# --- Major ---
|
|
105
|
+
budgets = SESSION_BUDGETS[SessionType.MAJOR]
|
|
106
|
+
return SessionClassification(
|
|
107
|
+
session_type=SessionType.MAJOR,
|
|
108
|
+
confidence=0.80,
|
|
109
|
+
input_token_budget=budgets[0],
|
|
110
|
+
output_token_budget=budgets[1],
|
|
111
|
+
reason=(
|
|
112
|
+
f"Major: {metrics.files_changed} files, "
|
|
113
|
+
f"{net_lines} lines changed, "
|
|
114
|
+
f"{metrics.new_files_created} new files"
|
|
115
|
+
),
|
|
116
|
+
)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Session types and classification interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
from codeledger.scanner.change_dag import ChangeMetrics
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SessionType(str, Enum):
|
|
12
|
+
TRIVIAL = "trivial"
|
|
13
|
+
MINOR = "minor"
|
|
14
|
+
STANDARD = "standard"
|
|
15
|
+
MAJOR = "major"
|
|
16
|
+
REFACTOR = "refactor"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class SessionClassification:
|
|
21
|
+
"""Result of classifying a development session."""
|
|
22
|
+
|
|
23
|
+
session_type: SessionType
|
|
24
|
+
confidence: float # 0.0 - 1.0
|
|
25
|
+
input_token_budget: int
|
|
26
|
+
output_token_budget: int
|
|
27
|
+
reason: str
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def should_defer(self) -> bool:
|
|
31
|
+
return self.session_type == SessionType.TRIVIAL
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Token budgets per session type
|
|
35
|
+
SESSION_BUDGETS: dict[SessionType, tuple[int, int]] = {
|
|
36
|
+
SessionType.TRIVIAL: (0, 0), # deferred, no generation
|
|
37
|
+
SessionType.MINOR: (500, 1500), # micro-doc
|
|
38
|
+
SessionType.STANDARD: (2000, 5000), # normal doc
|
|
39
|
+
SessionType.MAJOR: (4000, 8000), # comprehensive doc
|
|
40
|
+
SessionType.REFACTOR: (1500, 3000), # refactor-focused doc
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def classify_session(
|
|
45
|
+
metrics: ChangeMetrics,
|
|
46
|
+
use_slm: bool = False,
|
|
47
|
+
) -> SessionClassification:
|
|
48
|
+
"""Classify a development session based on change metrics.
|
|
49
|
+
|
|
50
|
+
Routes to SLM or rule-based classifier.
|
|
51
|
+
"""
|
|
52
|
+
if use_slm:
|
|
53
|
+
try:
|
|
54
|
+
from codeledger.classifier.slm import classify_with_slm
|
|
55
|
+
return classify_with_slm(metrics)
|
|
56
|
+
except ImportError:
|
|
57
|
+
pass # Fall through to rules
|
|
58
|
+
|
|
59
|
+
from codeledger.classifier.rules import classify_with_rules
|
|
60
|
+
return classify_with_rules(metrics)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""SLM-based classifier — optional enhanced classification using a local model."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from codeledger.classifier.session import (
|
|
6
|
+
SessionClassification,
|
|
7
|
+
)
|
|
8
|
+
from codeledger.scanner.change_dag import ChangeMetrics
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def classify_with_slm(metrics: ChangeMetrics) -> SessionClassification:
|
|
12
|
+
"""Classify a session using a local SLM.
|
|
13
|
+
|
|
14
|
+
Requires the `codeledger[slm]` extra. Falls back to rule-based
|
|
15
|
+
classification when the SLM dependencies are not installed.
|
|
16
|
+
"""
|
|
17
|
+
from codeledger.classifier.rules import classify_with_rules
|
|
18
|
+
|
|
19
|
+
return classify_with_rules(metrics)
|