codeledger 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. codeledger/__init__.py +4 -0
  2. codeledger/__main__.py +6 -0
  3. codeledger/classifier/__init__.py +21 -0
  4. codeledger/classifier/deferred.py +143 -0
  5. codeledger/classifier/rules.py +116 -0
  6. codeledger/classifier/session.py +60 -0
  7. codeledger/classifier/slm.py +19 -0
  8. codeledger/cli.py +510 -0
  9. codeledger/compressor/__init__.py +15 -0
  10. codeledger/compressor/scope_engine.py +73 -0
  11. codeledger/compressor/token_compressor.py +138 -0
  12. codeledger/config/__init__.py +19 -0
  13. codeledger/config/loader.py +141 -0
  14. codeledger/config/presets/cli_tool.yaml +62 -0
  15. codeledger/config/presets/data_pipeline.yaml +67 -0
  16. codeledger/config/presets/fullstack.yaml +80 -0
  17. codeledger/config/presets/minimal.yaml +48 -0
  18. codeledger/config/presets/ml_research.yaml +73 -0
  19. codeledger/config/presets/python_api.yaml +86 -0
  20. codeledger/config/presets/react_frontend.yaml +70 -0
  21. codeledger/config/schema.py +212 -0
  22. codeledger/generator/__init__.py +15 -0
  23. codeledger/generator/api_client.py +104 -0
  24. codeledger/generator/local_client.py +72 -0
  25. codeledger/generator/model_router.py +61 -0
  26. codeledger/generator/prompt_builder.py +125 -0
  27. codeledger/merge/__init__.py +15 -0
  28. codeledger/merge/deduplicator.py +100 -0
  29. codeledger/merge/extractor.py +131 -0
  30. codeledger/merge/merge_engine.py +127 -0
  31. codeledger/models/__init__.py +3 -0
  32. codeledger/models/inference.py +0 -0
  33. codeledger/parser/__init__.py +39 -0
  34. codeledger/parser/base.py +136 -0
  35. codeledger/parser/fallback.py +122 -0
  36. codeledger/parser/java_parser.py +10 -0
  37. codeledger/parser/js_parser.py +10 -0
  38. codeledger/parser/python_parser.py +239 -0
  39. codeledger/postprocess/__init__.py +23 -0
  40. codeledger/postprocess/file_manager.py +183 -0
  41. codeledger/postprocess/formatter.py +114 -0
  42. codeledger/postprocess/validator.py +130 -0
  43. codeledger/scanner/__init__.py +30 -0
  44. codeledger/scanner/change_dag.py +228 -0
  45. codeledger/scanner/dependency.py +124 -0
  46. codeledger/scanner/file_scanner.py +207 -0
  47. codeledger/scanner/snapshot.py +278 -0
  48. codeledger/templates/doc_template.md.j2 +23 -0
  49. codeledger/templates/merge_template.md.j2 +17 -0
  50. codeledger/templates/prompt_templates/merge.txt +35 -0
  51. codeledger/templates/prompt_templates/micro.txt +20 -0
  52. codeledger/templates/prompt_templates/refactor.txt +30 -0
  53. codeledger/templates/prompt_templates/standard.txt +19 -0
  54. codeledger-0.1.0.dist-info/METADATA +296 -0
  55. codeledger-0.1.0.dist-info/RECORD +58 -0
  56. codeledger-0.1.0.dist-info/WHEEL +4 -0
  57. codeledger-0.1.0.dist-info/entry_points.txt +2 -0
  58. codeledger-0.1.0.dist-info/licenses/LICENSE +21 -0
codeledger/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ """CodeLedger — Auto-generated code comprehension for AI-assisted development."""
2
+
3
+ __version__ = "0.1.0"
4
+ __app_name__ = "codeledger"
codeledger/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Allow running codeledger as `python -m codeledger`."""
2
+
3
+ from codeledger.cli import app
4
+
5
+ if __name__ == "__main__":
6
+ app()
@@ -0,0 +1,21 @@
1
+ """Classifier package — session classification and change deferral."""
2
+
3
+ from codeledger.classifier.session import (
4
+ SessionClassification,
5
+ SessionType,
6
+ classify_session,
7
+ )
8
+ from codeledger.classifier.deferred import (
9
+ PendingChanges,
10
+ load_pending,
11
+ save_pending,
12
+ )
13
+
14
+ __all__ = [
15
+ "PendingChanges",
16
+ "SessionClassification",
17
+ "SessionType",
18
+ "classify_session",
19
+ "load_pending",
20
+ "save_pending",
21
+ ]
@@ -0,0 +1,143 @@
1
+ """Deferred change accumulator — buffers trivial sessions until flush threshold."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ from ruamel.yaml import YAML
11
+
12
+ from codeledger.scanner.change_dag import ChangeMetrics
13
+
14
+ yaml = YAML()
15
+ yaml.default_flow_style = False
16
+
17
+ PENDING_FILE = ".codeledger/.pending_changes.yaml"
18
+
19
+
20
+ @dataclass
21
+ class DeferredSession:
22
+ """Record of a deferred trivial session."""
23
+
24
+ session_id: str
25
+ timestamp: str
26
+ files_changed: list[str]
27
+ summary: str
28
+ magnitude: float
29
+
30
+
31
+ @dataclass
32
+ class PendingChanges:
33
+ """Accumulated deferred changes waiting to be flushed."""
34
+
35
+ pending_sessions: list[DeferredSession] = field(default_factory=list)
36
+ accumulated_magnitude: float = 0.0
37
+ sessions_deferred: int = 0
38
+
39
+ def should_flush(
40
+ self,
41
+ flush_threshold: float = 0.40,
42
+ max_deferred: int = 5,
43
+ ) -> bool:
44
+ """Check if accumulated changes should trigger a documentation flush."""
45
+ if self.sessions_deferred >= max_deferred:
46
+ return True
47
+ if self.accumulated_magnitude >= flush_threshold:
48
+ return True
49
+ return False
50
+
51
+ def add_session(
52
+ self,
53
+ changed_paths: list[str],
54
+ metrics: ChangeMetrics,
55
+ summary: str = "",
56
+ ) -> None:
57
+ """Add a deferred session to the pending buffer."""
58
+ now = datetime.now(timezone.utc)
59
+ session_id = f"deferred_{now.strftime('%Y%m%d_%H%M%S')}"
60
+
61
+ if not summary:
62
+ summary = (
63
+ f"{metrics.files_changed} files, "
64
+ f"{metrics.lines_added}+ / {metrics.lines_removed}- lines"
65
+ )
66
+
67
+ session = DeferredSession(
68
+ session_id=session_id,
69
+ timestamp=now.isoformat(),
70
+ files_changed=changed_paths,
71
+ summary=summary,
72
+ magnitude=metrics.total_magnitude,
73
+ )
74
+
75
+ self.pending_sessions.append(session)
76
+ self.accumulated_magnitude += metrics.total_magnitude
77
+ self.sessions_deferred += 1
78
+
79
+ def flush(self) -> list[DeferredSession]:
80
+ """Flush all pending sessions and return them. Resets the buffer."""
81
+ flushed = list(self.pending_sessions)
82
+ self.pending_sessions = []
83
+ self.accumulated_magnitude = 0.0
84
+ self.sessions_deferred = 0
85
+ return flushed
86
+
87
+ def to_dict(self) -> dict:
88
+ return {
89
+ "pending_sessions": [
90
+ {
91
+ "session_id": s.session_id,
92
+ "timestamp": s.timestamp,
93
+ "files_changed": s.files_changed,
94
+ "summary": s.summary,
95
+ "magnitude": s.magnitude,
96
+ }
97
+ for s in self.pending_sessions
98
+ ],
99
+ "accumulated_magnitude": round(self.accumulated_magnitude, 4),
100
+ "sessions_deferred": self.sessions_deferred,
101
+ }
102
+
103
+ @classmethod
104
+ def from_dict(cls, data: dict) -> "PendingChanges":
105
+ sessions = [
106
+ DeferredSession(
107
+ session_id=s["session_id"],
108
+ timestamp=s["timestamp"],
109
+ files_changed=s.get("files_changed", []),
110
+ summary=s.get("summary", ""),
111
+ magnitude=s.get("magnitude", 0.0),
112
+ )
113
+ for s in data.get("pending_sessions", [])
114
+ ]
115
+ return cls(
116
+ pending_sessions=sessions,
117
+ accumulated_magnitude=data.get("accumulated_magnitude", 0.0),
118
+ sessions_deferred=data.get("sessions_deferred", 0),
119
+ )
120
+
121
+
122
+ def load_pending(project_root: Path) -> PendingChanges:
123
+ """Load pending changes from disk."""
124
+ filepath = project_root / PENDING_FILE
125
+ if not filepath.exists():
126
+ return PendingChanges()
127
+
128
+ with open(filepath, "r", encoding="utf-8") as f:
129
+ data = yaml.load(f)
130
+
131
+ if not data:
132
+ return PendingChanges()
133
+
134
+ return PendingChanges.from_dict(data)
135
+
136
+
137
+ def save_pending(project_root: Path, pending: PendingChanges) -> None:
138
+ """Save pending changes to disk."""
139
+ filepath = project_root / PENDING_FILE
140
+ filepath.parent.mkdir(parents=True, exist_ok=True)
141
+
142
+ with open(filepath, "w", encoding="utf-8") as f:
143
+ yaml.dump(pending.to_dict(), f)
@@ -0,0 +1,116 @@
1
+ """Rule-based session classifier — MVP classifier using configurable thresholds."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from codeledger.classifier.session import (
6
+ SESSION_BUDGETS,
7
+ SessionClassification,
8
+ SessionType,
9
+ )
10
+ from codeledger.scanner.change_dag import ChangeMetrics
11
+
12
+
13
+ def classify_with_rules(
14
+ metrics: ChangeMetrics,
15
+ trivial_max_files: int = 2,
16
+ trivial_max_lines: int = 30,
17
+ minor_max_files: int = 5,
18
+ minor_max_lines: int = 150,
19
+ standard_max_files: int = 15,
20
+ standard_max_lines: int = 500,
21
+ ) -> SessionClassification:
22
+ """Classify a session using rule-based thresholds.
23
+
24
+ Decision order:
25
+ 1. Refactor detection (deletes + creates with low net change)
26
+ 2. Trivial (very small changes)
27
+ 3. Minor / Standard / Major (by size)
28
+ """
29
+ net_lines = metrics.lines_added + metrics.lines_removed
30
+
31
+ # --- Refactor detection ---
32
+ if (
33
+ metrics.files_deleted > 0
34
+ and metrics.new_files_created > 0
35
+ and abs(metrics.net_lines) < 50
36
+ and metrics.files_changed >= 3
37
+ ):
38
+ budgets = SESSION_BUDGETS[SessionType.REFACTOR]
39
+ return SessionClassification(
40
+ session_type=SessionType.REFACTOR,
41
+ confidence=0.85,
42
+ input_token_budget=budgets[0],
43
+ output_token_budget=budgets[1],
44
+ reason=(
45
+ f"Refactor detected: {metrics.files_deleted} deleted, "
46
+ f"{metrics.new_files_created} created, "
47
+ f"low net line change ({metrics.net_lines})"
48
+ ),
49
+ )
50
+
51
+ # --- Trivial ---
52
+ if (
53
+ metrics.files_changed <= trivial_max_files
54
+ and net_lines <= trivial_max_lines
55
+ and metrics.new_files_created == 0
56
+ and not metrics.has_structural_changes
57
+ ):
58
+ budgets = SESSION_BUDGETS[SessionType.TRIVIAL]
59
+ return SessionClassification(
60
+ session_type=SessionType.TRIVIAL,
61
+ confidence=0.90,
62
+ input_token_budget=budgets[0],
63
+ output_token_budget=budgets[1],
64
+ reason=(
65
+ f"Trivial: {metrics.files_changed} files, "
66
+ f"{net_lines} lines changed"
67
+ ),
68
+ )
69
+
70
+ # --- Minor ---
71
+ if (
72
+ metrics.files_changed <= minor_max_files
73
+ and net_lines <= minor_max_lines
74
+ ):
75
+ budgets = SESSION_BUDGETS[SessionType.MINOR]
76
+ return SessionClassification(
77
+ session_type=SessionType.MINOR,
78
+ confidence=0.85,
79
+ input_token_budget=budgets[0],
80
+ output_token_budget=budgets[1],
81
+ reason=(
82
+ f"Minor: {metrics.files_changed} files, "
83
+ f"{net_lines} lines changed"
84
+ ),
85
+ )
86
+
87
+ # --- Standard ---
88
+ if (
89
+ metrics.files_changed <= standard_max_files
90
+ and net_lines <= standard_max_lines
91
+ ):
92
+ budgets = SESSION_BUDGETS[SessionType.STANDARD]
93
+ return SessionClassification(
94
+ session_type=SessionType.STANDARD,
95
+ confidence=0.85,
96
+ input_token_budget=budgets[0],
97
+ output_token_budget=budgets[1],
98
+ reason=(
99
+ f"Standard: {metrics.files_changed} files, "
100
+ f"{net_lines} lines changed"
101
+ ),
102
+ )
103
+
104
+ # --- Major ---
105
+ budgets = SESSION_BUDGETS[SessionType.MAJOR]
106
+ return SessionClassification(
107
+ session_type=SessionType.MAJOR,
108
+ confidence=0.80,
109
+ input_token_budget=budgets[0],
110
+ output_token_budget=budgets[1],
111
+ reason=(
112
+ f"Major: {metrics.files_changed} files, "
113
+ f"{net_lines} lines changed, "
114
+ f"{metrics.new_files_created} new files"
115
+ ),
116
+ )
@@ -0,0 +1,60 @@
1
+ """Session types and classification interface."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+
8
+ from codeledger.scanner.change_dag import ChangeMetrics
9
+
10
+
11
+ class SessionType(str, Enum):
12
+ TRIVIAL = "trivial"
13
+ MINOR = "minor"
14
+ STANDARD = "standard"
15
+ MAJOR = "major"
16
+ REFACTOR = "refactor"
17
+
18
+
19
+ @dataclass
20
+ class SessionClassification:
21
+ """Result of classifying a development session."""
22
+
23
+ session_type: SessionType
24
+ confidence: float # 0.0 - 1.0
25
+ input_token_budget: int
26
+ output_token_budget: int
27
+ reason: str
28
+
29
+ @property
30
+ def should_defer(self) -> bool:
31
+ return self.session_type == SessionType.TRIVIAL
32
+
33
+
34
+ # Token budgets per session type
35
+ SESSION_BUDGETS: dict[SessionType, tuple[int, int]] = {
36
+ SessionType.TRIVIAL: (0, 0), # deferred, no generation
37
+ SessionType.MINOR: (500, 1500), # micro-doc
38
+ SessionType.STANDARD: (2000, 5000), # normal doc
39
+ SessionType.MAJOR: (4000, 8000), # comprehensive doc
40
+ SessionType.REFACTOR: (1500, 3000), # refactor-focused doc
41
+ }
42
+
43
+
44
+ def classify_session(
45
+ metrics: ChangeMetrics,
46
+ use_slm: bool = False,
47
+ ) -> SessionClassification:
48
+ """Classify a development session based on change metrics.
49
+
50
+ Routes to SLM or rule-based classifier.
51
+ """
52
+ if use_slm:
53
+ try:
54
+ from codeledger.classifier.slm import classify_with_slm
55
+ return classify_with_slm(metrics)
56
+ except ImportError:
57
+ pass # Fall through to rules
58
+
59
+ from codeledger.classifier.rules import classify_with_rules
60
+ return classify_with_rules(metrics)
@@ -0,0 +1,19 @@
1
+ """SLM-based classifier — optional enhanced classification using a local model."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from codeledger.classifier.session import (
6
+ SessionClassification,
7
+ )
8
+ from codeledger.scanner.change_dag import ChangeMetrics
9
+
10
+
11
+ def classify_with_slm(metrics: ChangeMetrics) -> SessionClassification:
12
+ """Classify a session using a local SLM.
13
+
14
+ Requires the `codeledger[slm]` extra. Falls back to rule-based
15
+ classification when the SLM dependencies are not installed.
16
+ """
17
+ from codeledger.classifier.rules import classify_with_rules
18
+
19
+ return classify_with_rules(metrics)