dotscope 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dotscope/.scope +63 -0
- dotscope/__init__.py +3 -0
- dotscope/absorber.py +390 -0
- dotscope/assertions.py +128 -0
- dotscope/ast_analyzer.py +2 -0
- dotscope/backtest.py +2 -0
- dotscope/bench.py +141 -0
- dotscope/budget.py +3 -0
- dotscope/cache.py +2 -0
- dotscope/check/__init__.py +1 -0
- dotscope/check/acknowledge.py +2 -0
- dotscope/check/checker.py +3 -0
- dotscope/check/checks/__init__.py +1 -0
- dotscope/check/checks/antipattern.py +2 -0
- dotscope/check/checks/boundary.py +2 -0
- dotscope/check/checks/contracts.py +3 -0
- dotscope/check/checks/direction.py +2 -0
- dotscope/check/checks/intent.py +2 -0
- dotscope/check/checks/stability.py +2 -0
- dotscope/check/constraints.py +2 -0
- dotscope/check/models.py +15 -0
- dotscope/cli.py +1447 -0
- dotscope/composer.py +147 -0
- dotscope/constants.py +45 -0
- dotscope/context.py +60 -0
- dotscope/counterfactual.py +180 -0
- dotscope/debug.py +220 -0
- dotscope/discovery.py +104 -0
- dotscope/formatter.py +157 -0
- dotscope/graph.py +3 -0
- dotscope/health.py +212 -0
- dotscope/help.py +204 -0
- dotscope/history.py +6 -0
- dotscope/hooks.py +2 -0
- dotscope/ingest.py +858 -0
- dotscope/intent.py +618 -0
- dotscope/lessons.py +223 -0
- dotscope/matcher.py +104 -0
- dotscope/mcp_server.py +1081 -0
- dotscope/models/.scope +45 -0
- dotscope/models/__init__.py +7 -0
- dotscope/models/core.py +288 -0
- dotscope/models/history.py +73 -0
- dotscope/models/intent.py +213 -0
- dotscope/models/passes.py +58 -0
- dotscope/models/state.py +250 -0
- dotscope/models.py +9 -0
- dotscope/near_miss.py +3 -0
- dotscope/onboarding.py +2 -0
- dotscope/parser.py +387 -0
- dotscope/passes/.scope +105 -0
- dotscope/passes/__init__.py +1 -0
- dotscope/passes/ast_analyzer.py +508 -0
- dotscope/passes/backtest.py +198 -0
- dotscope/passes/budget_allocator.py +164 -0
- dotscope/passes/convention_compliance.py +40 -0
- dotscope/passes/convention_discovery.py +247 -0
- dotscope/passes/convention_parser.py +223 -0
- dotscope/passes/graph_builder.py +299 -0
- dotscope/passes/history_miner.py +336 -0
- dotscope/passes/incremental.py +149 -0
- dotscope/passes/lang/__init__.py +38 -0
- dotscope/passes/lang/_base.py +20 -0
- dotscope/passes/lang/_treesitter.py +93 -0
- dotscope/passes/lang/go.py +333 -0
- dotscope/passes/lang/javascript.py +348 -0
- dotscope/passes/lazy.py +152 -0
- dotscope/passes/semantic_diff.py +160 -0
- dotscope/passes/sentinel/__init__.py +1 -0
- dotscope/passes/sentinel/acknowledge.py +222 -0
- dotscope/passes/sentinel/checker.py +383 -0
- dotscope/passes/sentinel/checks/__init__.py +1 -0
- dotscope/passes/sentinel/checks/antipattern.py +84 -0
- dotscope/passes/sentinel/checks/boundary.py +46 -0
- dotscope/passes/sentinel/checks/contracts.py +148 -0
- dotscope/passes/sentinel/checks/convention.py +54 -0
- dotscope/passes/sentinel/checks/direction.py +71 -0
- dotscope/passes/sentinel/checks/intent.py +207 -0
- dotscope/passes/sentinel/checks/stability.py +66 -0
- dotscope/passes/sentinel/checks/voice.py +108 -0
- dotscope/passes/sentinel/constraints.py +472 -0
- dotscope/passes/sentinel/line_filter.py +88 -0
- dotscope/passes/sentinel/models.py +15 -0
- dotscope/passes/virtual.py +239 -0
- dotscope/passes/voice.py +162 -0
- dotscope/passes/voice_defaults.py +28 -0
- dotscope/passes/voice_discovery.py +245 -0
- dotscope/paths.py +32 -0
- dotscope/progress.py +44 -0
- dotscope/regression.py +147 -0
- dotscope/resolver.py +203 -0
- dotscope/scanner.py +246 -0
- dotscope/sessions.py +2 -0
- dotscope/storage/.scope +64 -0
- dotscope/storage/__init__.py +1 -0
- dotscope/storage/cache.py +114 -0
- dotscope/storage/claude_hooks.py +119 -0
- dotscope/storage/git_hooks.py +277 -0
- dotscope/storage/incremental_state.py +61 -0
- dotscope/storage/mcp_config.py +98 -0
- dotscope/storage/near_miss.py +183 -0
- dotscope/storage/onboarding.py +150 -0
- dotscope/storage/session_manager.py +195 -0
- dotscope/storage/timing.py +84 -0
- dotscope/timing.py +2 -0
- dotscope/tokens.py +53 -0
- dotscope/utility.py +123 -0
- dotscope/virtual.py +3 -0
- dotscope/visibility.py +664 -0
- dotscope-0.1.0.dist-info/METADATA +50 -0
- dotscope-0.1.0.dist-info/RECORD +114 -0
- dotscope-0.1.0.dist-info/WHEEL +4 -0
- dotscope-0.1.0.dist-info/entry_points.txt +3 -0
- dotscope-0.1.0.dist-info/licenses/LICENSE +21 -0
dotscope/lessons.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""Lessons & constraints: machine-generated knowledge from observation patterns.
|
|
2
|
+
|
|
3
|
+
Lessons are extracted automatically when the observation layer detects
|
|
4
|
+
recurring patterns. Constraints are evidence-based invariants derived from
|
|
5
|
+
the dependency graph and git history.
|
|
6
|
+
|
|
7
|
+
Both are injected into resolved context so agents receive them automatically.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import time
|
|
13
|
+
from collections import defaultdict
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
from .models.state import Lesson, ObservationLog, ObservedInvariant, SessionLog # noqa: F401
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def generate_lessons(
|
|
21
|
+
sessions: List[SessionLog],
|
|
22
|
+
observations: List[ObservationLog],
|
|
23
|
+
module: Optional[str] = None,
|
|
24
|
+
) -> List[Lesson]:
|
|
25
|
+
"""Generate lessons from observation patterns.
|
|
26
|
+
|
|
27
|
+
Patterns detected:
|
|
28
|
+
- File resolved but never touched (noise candidate)
|
|
29
|
+
- File touched but not in scope (scope gap)
|
|
30
|
+
- Scope consistently low recall for certain task keywords
|
|
31
|
+
"""
|
|
32
|
+
lessons = []
|
|
33
|
+
obs_by_session = {obs.session_id: obs for obs in observations}
|
|
34
|
+
|
|
35
|
+
# Track per-file stats
|
|
36
|
+
file_resolved: Dict[str, int] = defaultdict(int)
|
|
37
|
+
file_touched: Dict[str, int] = defaultdict(int)
|
|
38
|
+
file_gap: Dict[str, int] = defaultdict(int) # touched but not predicted
|
|
39
|
+
|
|
40
|
+
for session in sessions:
|
|
41
|
+
if module and module not in session.scope_expr:
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
obs = obs_by_session.get(session.session_id)
|
|
45
|
+
if not obs:
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
for f in session.predicted_files:
|
|
49
|
+
file_resolved[f] += 1
|
|
50
|
+
for f in obs.actual_files_modified:
|
|
51
|
+
file_touched[f] += 1
|
|
52
|
+
for f in obs.touched_not_predicted:
|
|
53
|
+
file_gap[f] += 1
|
|
54
|
+
|
|
55
|
+
# Lesson: file resolved but never touched in 10+ observations
|
|
56
|
+
total_obs = len([s for s in sessions if s.session_id in obs_by_session])
|
|
57
|
+
for f, count in file_resolved.items():
|
|
58
|
+
if count >= 10 and file_touched.get(f, 0) == 0:
|
|
59
|
+
lessons.append(Lesson(
|
|
60
|
+
trigger="resolved_never_touched",
|
|
61
|
+
observation=f"Resolved {count} times, modified 0 times",
|
|
62
|
+
lesson_text=(
|
|
63
|
+
f"{os.path.basename(f)} is consistently included but never modified. "
|
|
64
|
+
f"Consider reducing its budget priority."
|
|
65
|
+
),
|
|
66
|
+
confidence=min(count / 20, 1.0),
|
|
67
|
+
created=time.time(),
|
|
68
|
+
))
|
|
69
|
+
|
|
70
|
+
# Lesson: file touched but not in scope in 5+ observations
|
|
71
|
+
for f, count in file_gap.items():
|
|
72
|
+
if count >= 3:
|
|
73
|
+
lessons.append(Lesson(
|
|
74
|
+
trigger="touched_not_predicted",
|
|
75
|
+
observation=f"Modified in {count} commits but not in scope includes",
|
|
76
|
+
lesson_text=(
|
|
77
|
+
f"{os.path.basename(f)} is frequently needed but missing from scope. "
|
|
78
|
+
f"Consider adding to includes."
|
|
79
|
+
),
|
|
80
|
+
confidence=min(count / 10, 1.0),
|
|
81
|
+
created=time.time(),
|
|
82
|
+
))
|
|
83
|
+
|
|
84
|
+
# Lesson: most frequently modified file
|
|
85
|
+
if file_touched and total_obs >= 5:
|
|
86
|
+
top_file = max(file_touched, key=file_touched.get)
|
|
87
|
+
top_count = file_touched[top_file]
|
|
88
|
+
if top_count >= 3:
|
|
89
|
+
ratio = top_count / total_obs
|
|
90
|
+
lessons.append(Lesson(
|
|
91
|
+
trigger="hotspot",
|
|
92
|
+
observation=f"Modified in {top_count}/{total_obs} observations ({ratio:.0%})",
|
|
93
|
+
lesson_text=(
|
|
94
|
+
f"{os.path.basename(top_file)} is the most frequently modified file "
|
|
95
|
+
f"(touched in {ratio:.0%} of sessions)."
|
|
96
|
+
),
|
|
97
|
+
confidence=ratio,
|
|
98
|
+
created=time.time(),
|
|
99
|
+
))
|
|
100
|
+
|
|
101
|
+
return sorted(lessons, key=lambda ls: -ls.confidence)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def detect_invariants(
|
|
105
|
+
graph_edges: List[tuple],
|
|
106
|
+
module: str,
|
|
107
|
+
all_modules: List[str],
|
|
108
|
+
commit_count: int = 0,
|
|
109
|
+
) -> List[ObservedInvariant]:
|
|
110
|
+
"""Detect boundary invariants from the dependency graph.
|
|
111
|
+
|
|
112
|
+
If module A has never imported from module B across the entire history,
|
|
113
|
+
that's an observed invariant.
|
|
114
|
+
"""
|
|
115
|
+
# Which modules does this module import from?
|
|
116
|
+
imports_from = set()
|
|
117
|
+
for src, dst in graph_edges:
|
|
118
|
+
src_parts = src.split("/")
|
|
119
|
+
dst_parts = dst.split("/")
|
|
120
|
+
if len(src_parts) > 1 and src_parts[0] == module:
|
|
121
|
+
if len(dst_parts) > 1 and dst_parts[0] != module:
|
|
122
|
+
imports_from.add(dst_parts[0])
|
|
123
|
+
|
|
124
|
+
invariants = []
|
|
125
|
+
for other in all_modules:
|
|
126
|
+
if other == module:
|
|
127
|
+
continue
|
|
128
|
+
if other not in imports_from:
|
|
129
|
+
confidence = min(commit_count / 100, 1.0) if commit_count > 0 else 0.5
|
|
130
|
+
invariants.append(ObservedInvariant(
|
|
131
|
+
boundary=f"{module} -> {other}",
|
|
132
|
+
direction="no_import",
|
|
133
|
+
held_since="", # Would need git history to determine
|
|
134
|
+
commit_count=commit_count,
|
|
135
|
+
confidence=round(confidence, 2),
|
|
136
|
+
))
|
|
137
|
+
|
|
138
|
+
return invariants
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def format_lessons_for_context(lessons: List[Lesson], invariants: List[ObservedInvariant]) -> str:
|
|
142
|
+
"""Format lessons and invariants for injection into resolved context."""
|
|
143
|
+
parts = []
|
|
144
|
+
|
|
145
|
+
if lessons:
|
|
146
|
+
parts.append("## Lessons (from observed sessions)")
|
|
147
|
+
for lesson in lessons[:5]:
|
|
148
|
+
parts.append(f"- {lesson.lesson_text}")
|
|
149
|
+
|
|
150
|
+
if invariants:
|
|
151
|
+
high_conf = [inv for inv in invariants if inv.confidence >= 0.9]
|
|
152
|
+
if high_conf:
|
|
153
|
+
parts.append("## Boundaries")
|
|
154
|
+
for inv in high_conf[:5]:
|
|
155
|
+
parts.append(
|
|
156
|
+
f"- {inv.boundary}: no imports observed "
|
|
157
|
+
f"({inv.commit_count} commits). Do not break."
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return "\n".join(parts)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def save_lessons(dot_dir: Path, module: str, lessons: List[Lesson]) -> None:
|
|
164
|
+
"""Save lessons to .dotscope/lessons/<module>.json."""
|
|
165
|
+
lessons_dir = dot_dir / "lessons"
|
|
166
|
+
lessons_dir.mkdir(parents=True, exist_ok=True)
|
|
167
|
+
|
|
168
|
+
data = [
|
|
169
|
+
{
|
|
170
|
+
"trigger": item.trigger,
|
|
171
|
+
"observation": item.observation,
|
|
172
|
+
"lesson_text": item.lesson_text,
|
|
173
|
+
"confidence": item.confidence,
|
|
174
|
+
"created": item.created,
|
|
175
|
+
"source_sessions": item.source_sessions,
|
|
176
|
+
"acknowledged": item.acknowledged,
|
|
177
|
+
}
|
|
178
|
+
for item in lessons
|
|
179
|
+
]
|
|
180
|
+
(lessons_dir / f"{module}.json").write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def load_lessons(dot_dir: Path, module: str) -> List[Lesson]:
|
|
184
|
+
"""Load lessons from .dotscope/lessons/<module>.json."""
|
|
185
|
+
path = dot_dir / "lessons" / f"{module}.json"
|
|
186
|
+
if not path.exists():
|
|
187
|
+
return []
|
|
188
|
+
try:
|
|
189
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
190
|
+
return [Lesson(**item) for item in data]
|
|
191
|
+
except (json.JSONDecodeError, TypeError):
|
|
192
|
+
return []
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def save_invariants(dot_dir: Path, module: str, invariants: List[ObservedInvariant]) -> None:
|
|
196
|
+
"""Save invariants to .dotscope/invariants/<module>.json."""
|
|
197
|
+
inv_dir = dot_dir / "invariants"
|
|
198
|
+
inv_dir.mkdir(parents=True, exist_ok=True)
|
|
199
|
+
|
|
200
|
+
data = [
|
|
201
|
+
{
|
|
202
|
+
"boundary": inv.boundary,
|
|
203
|
+
"direction": inv.direction,
|
|
204
|
+
"held_since": inv.held_since,
|
|
205
|
+
"commit_count": inv.commit_count,
|
|
206
|
+
"confidence": inv.confidence,
|
|
207
|
+
"violations": inv.violations,
|
|
208
|
+
}
|
|
209
|
+
for inv in invariants
|
|
210
|
+
]
|
|
211
|
+
(inv_dir / f"{module}.json").write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def load_invariants(dot_dir: Path, module: str) -> List[ObservedInvariant]:
|
|
215
|
+
"""Load invariants from .dotscope/invariants/<module>.json."""
|
|
216
|
+
path = dot_dir / "invariants" / f"{module}.json"
|
|
217
|
+
if not path.exists():
|
|
218
|
+
return []
|
|
219
|
+
try:
|
|
220
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
221
|
+
return [ObservedInvariant(**item) for item in data]
|
|
222
|
+
except (json.JSONDecodeError, TypeError):
|
|
223
|
+
return []
|
dotscope/matcher.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Task-to-scope matching via keyword overlap.
|
|
2
|
+
|
|
3
|
+
Given a natural language task description, find which scope(s) are most relevant.
|
|
4
|
+
Uses Jaccard similarity over keywords, with optional embedding fallback.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from typing import List, Tuple
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Common stop words to exclude from matching
|
|
13
|
+
_STOP_WORDS = {
|
|
14
|
+
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
|
|
15
|
+
"have", "has", "had", "do", "does", "did", "will", "would", "could",
|
|
16
|
+
"should", "may", "might", "shall", "can", "need", "must", "to", "of",
|
|
17
|
+
"in", "for", "on", "with", "at", "by", "from", "as", "into", "about",
|
|
18
|
+
"like", "through", "after", "before", "between", "under", "above",
|
|
19
|
+
"it", "its", "this", "that", "these", "those", "i", "me", "my", "we",
|
|
20
|
+
"our", "you", "your", "he", "she", "they", "them", "and", "but", "or",
|
|
21
|
+
"not", "no", "if", "then", "so", "very", "just", "also", "only",
|
|
22
|
+
"fix", "add", "update", "change", "make", "get", "set", "new",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def match_task(
|
|
27
|
+
task: str,
|
|
28
|
+
scopes: List[Tuple[str, List[str], str]],
|
|
29
|
+
threshold: float = 0.05,
|
|
30
|
+
) -> List[Tuple[str, float]]:
|
|
31
|
+
"""Match a task description to the most relevant scope(s).
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
task: Natural language task description
|
|
35
|
+
scopes: List of (scope_name, keywords, description) tuples
|
|
36
|
+
threshold: Minimum score to include in results
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
List of (scope_name, score) sorted by relevance descending
|
|
40
|
+
"""
|
|
41
|
+
task_words = _tokenize(task)
|
|
42
|
+
if not task_words:
|
|
43
|
+
return []
|
|
44
|
+
|
|
45
|
+
results = []
|
|
46
|
+
for name, keywords, description in scopes:
|
|
47
|
+
score = _score_scope(task_words, name, keywords, description)
|
|
48
|
+
if score >= threshold:
|
|
49
|
+
results.append((name, score))
|
|
50
|
+
|
|
51
|
+
results.sort(key=lambda x: -x[1])
|
|
52
|
+
return results
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _score_scope(
|
|
56
|
+
task_words: set,
|
|
57
|
+
scope_name: str,
|
|
58
|
+
keywords: List[str],
|
|
59
|
+
description: str,
|
|
60
|
+
) -> float:
|
|
61
|
+
"""Score how well a scope matches a task.
|
|
62
|
+
|
|
63
|
+
Combines:
|
|
64
|
+
1. Jaccard similarity between task words and scope keywords
|
|
65
|
+
2. Substring match of scope name in task
|
|
66
|
+
3. Word overlap with description
|
|
67
|
+
"""
|
|
68
|
+
score = 0.0
|
|
69
|
+
|
|
70
|
+
# Keyword match (Jaccard similarity)
|
|
71
|
+
kw_words = set()
|
|
72
|
+
for kw in keywords:
|
|
73
|
+
kw_words.update(_tokenize(kw))
|
|
74
|
+
|
|
75
|
+
if kw_words:
|
|
76
|
+
intersection = task_words & kw_words
|
|
77
|
+
union = task_words | kw_words
|
|
78
|
+
if union:
|
|
79
|
+
jaccard = len(intersection) / len(union)
|
|
80
|
+
score += jaccard * 0.6 # 60% weight on keyword match
|
|
81
|
+
|
|
82
|
+
# Scope name match
|
|
83
|
+
name_words = _tokenize(scope_name)
|
|
84
|
+
if name_words & task_words:
|
|
85
|
+
score += 0.25 # 25% bonus for name match
|
|
86
|
+
|
|
87
|
+
# Also check if scope name appears as substring in task
|
|
88
|
+
if scope_name.lower() in " ".join(task_words):
|
|
89
|
+
score += 0.1
|
|
90
|
+
|
|
91
|
+
# Description word overlap
|
|
92
|
+
desc_words = _tokenize(description)
|
|
93
|
+
if desc_words:
|
|
94
|
+
desc_overlap = task_words & desc_words
|
|
95
|
+
if desc_overlap:
|
|
96
|
+
score += (len(desc_overlap) / len(desc_words)) * 0.15 # 15% weight
|
|
97
|
+
|
|
98
|
+
return min(score, 1.0)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _tokenize(text: str) -> set:
|
|
102
|
+
"""Tokenize text into lowercase words, stripping stop words and punctuation."""
|
|
103
|
+
words = re.findall(r"[a-zA-Z0-9_]+", text.lower())
|
|
104
|
+
return {w for w in words if len(w) > 1 and w not in _STOP_WORDS}
|