dotscope 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. dotscope/.scope +63 -0
  2. dotscope/__init__.py +3 -0
  3. dotscope/absorber.py +390 -0
  4. dotscope/assertions.py +128 -0
  5. dotscope/ast_analyzer.py +2 -0
  6. dotscope/backtest.py +2 -0
  7. dotscope/bench.py +141 -0
  8. dotscope/budget.py +3 -0
  9. dotscope/cache.py +2 -0
  10. dotscope/check/__init__.py +1 -0
  11. dotscope/check/acknowledge.py +2 -0
  12. dotscope/check/checker.py +3 -0
  13. dotscope/check/checks/__init__.py +1 -0
  14. dotscope/check/checks/antipattern.py +2 -0
  15. dotscope/check/checks/boundary.py +2 -0
  16. dotscope/check/checks/contracts.py +3 -0
  17. dotscope/check/checks/direction.py +2 -0
  18. dotscope/check/checks/intent.py +2 -0
  19. dotscope/check/checks/stability.py +2 -0
  20. dotscope/check/constraints.py +2 -0
  21. dotscope/check/models.py +15 -0
  22. dotscope/cli.py +1447 -0
  23. dotscope/composer.py +147 -0
  24. dotscope/constants.py +45 -0
  25. dotscope/context.py +60 -0
  26. dotscope/counterfactual.py +180 -0
  27. dotscope/debug.py +220 -0
  28. dotscope/discovery.py +104 -0
  29. dotscope/formatter.py +157 -0
  30. dotscope/graph.py +3 -0
  31. dotscope/health.py +212 -0
  32. dotscope/help.py +204 -0
  33. dotscope/history.py +6 -0
  34. dotscope/hooks.py +2 -0
  35. dotscope/ingest.py +858 -0
  36. dotscope/intent.py +618 -0
  37. dotscope/lessons.py +223 -0
  38. dotscope/matcher.py +104 -0
  39. dotscope/mcp_server.py +1081 -0
  40. dotscope/models/.scope +45 -0
  41. dotscope/models/__init__.py +7 -0
  42. dotscope/models/core.py +288 -0
  43. dotscope/models/history.py +73 -0
  44. dotscope/models/intent.py +213 -0
  45. dotscope/models/passes.py +58 -0
  46. dotscope/models/state.py +250 -0
  47. dotscope/models.py +9 -0
  48. dotscope/near_miss.py +3 -0
  49. dotscope/onboarding.py +2 -0
  50. dotscope/parser.py +387 -0
  51. dotscope/passes/.scope +105 -0
  52. dotscope/passes/__init__.py +1 -0
  53. dotscope/passes/ast_analyzer.py +508 -0
  54. dotscope/passes/backtest.py +198 -0
  55. dotscope/passes/budget_allocator.py +164 -0
  56. dotscope/passes/convention_compliance.py +40 -0
  57. dotscope/passes/convention_discovery.py +247 -0
  58. dotscope/passes/convention_parser.py +223 -0
  59. dotscope/passes/graph_builder.py +299 -0
  60. dotscope/passes/history_miner.py +336 -0
  61. dotscope/passes/incremental.py +149 -0
  62. dotscope/passes/lang/__init__.py +38 -0
  63. dotscope/passes/lang/_base.py +20 -0
  64. dotscope/passes/lang/_treesitter.py +93 -0
  65. dotscope/passes/lang/go.py +333 -0
  66. dotscope/passes/lang/javascript.py +348 -0
  67. dotscope/passes/lazy.py +152 -0
  68. dotscope/passes/semantic_diff.py +160 -0
  69. dotscope/passes/sentinel/__init__.py +1 -0
  70. dotscope/passes/sentinel/acknowledge.py +222 -0
  71. dotscope/passes/sentinel/checker.py +383 -0
  72. dotscope/passes/sentinel/checks/__init__.py +1 -0
  73. dotscope/passes/sentinel/checks/antipattern.py +84 -0
  74. dotscope/passes/sentinel/checks/boundary.py +46 -0
  75. dotscope/passes/sentinel/checks/contracts.py +148 -0
  76. dotscope/passes/sentinel/checks/convention.py +54 -0
  77. dotscope/passes/sentinel/checks/direction.py +71 -0
  78. dotscope/passes/sentinel/checks/intent.py +207 -0
  79. dotscope/passes/sentinel/checks/stability.py +66 -0
  80. dotscope/passes/sentinel/checks/voice.py +108 -0
  81. dotscope/passes/sentinel/constraints.py +472 -0
  82. dotscope/passes/sentinel/line_filter.py +88 -0
  83. dotscope/passes/sentinel/models.py +15 -0
  84. dotscope/passes/virtual.py +239 -0
  85. dotscope/passes/voice.py +162 -0
  86. dotscope/passes/voice_defaults.py +28 -0
  87. dotscope/passes/voice_discovery.py +245 -0
  88. dotscope/paths.py +32 -0
  89. dotscope/progress.py +44 -0
  90. dotscope/regression.py +147 -0
  91. dotscope/resolver.py +203 -0
  92. dotscope/scanner.py +246 -0
  93. dotscope/sessions.py +2 -0
  94. dotscope/storage/.scope +64 -0
  95. dotscope/storage/__init__.py +1 -0
  96. dotscope/storage/cache.py +114 -0
  97. dotscope/storage/claude_hooks.py +119 -0
  98. dotscope/storage/git_hooks.py +277 -0
  99. dotscope/storage/incremental_state.py +61 -0
  100. dotscope/storage/mcp_config.py +98 -0
  101. dotscope/storage/near_miss.py +183 -0
  102. dotscope/storage/onboarding.py +150 -0
  103. dotscope/storage/session_manager.py +195 -0
  104. dotscope/storage/timing.py +84 -0
  105. dotscope/timing.py +2 -0
  106. dotscope/tokens.py +53 -0
  107. dotscope/utility.py +123 -0
  108. dotscope/virtual.py +3 -0
  109. dotscope/visibility.py +664 -0
  110. dotscope-0.1.0.dist-info/METADATA +50 -0
  111. dotscope-0.1.0.dist-info/RECORD +114 -0
  112. dotscope-0.1.0.dist-info/WHEEL +4 -0
  113. dotscope-0.1.0.dist-info/entry_points.txt +3 -0
  114. dotscope-0.1.0.dist-info/licenses/LICENSE +21 -0
dotscope/lessons.py ADDED
@@ -0,0 +1,223 @@
1
+ """Lessons & constraints: machine-generated knowledge from observation patterns.
2
+
3
+ Lessons are extracted automatically when the observation layer detects
4
+ recurring patterns. Constraints are evidence-based invariants derived from
5
+ the dependency graph and git history.
6
+
7
+ Both are injected into resolved context so agents receive them automatically.
8
+ """
9
+
10
+ import json
11
+ import os
12
+ import time
13
+ from collections import defaultdict
14
+ from pathlib import Path
15
+ from typing import Dict, List, Optional
16
+
17
+ from .models.state import Lesson, ObservationLog, ObservedInvariant, SessionLog # noqa: F401
18
+
19
+
20
+ def generate_lessons(
21
+ sessions: List[SessionLog],
22
+ observations: List[ObservationLog],
23
+ module: Optional[str] = None,
24
+ ) -> List[Lesson]:
25
+ """Generate lessons from observation patterns.
26
+
27
+ Patterns detected:
28
+ - File resolved but never touched (noise candidate)
29
+ - File touched but not in scope (scope gap)
30
+ - Scope consistently low recall for certain task keywords
31
+ """
32
+ lessons = []
33
+ obs_by_session = {obs.session_id: obs for obs in observations}
34
+
35
+ # Track per-file stats
36
+ file_resolved: Dict[str, int] = defaultdict(int)
37
+ file_touched: Dict[str, int] = defaultdict(int)
38
+ file_gap: Dict[str, int] = defaultdict(int) # touched but not predicted
39
+
40
+ for session in sessions:
41
+ if module and module not in session.scope_expr:
42
+ continue
43
+
44
+ obs = obs_by_session.get(session.session_id)
45
+ if not obs:
46
+ continue
47
+
48
+ for f in session.predicted_files:
49
+ file_resolved[f] += 1
50
+ for f in obs.actual_files_modified:
51
+ file_touched[f] += 1
52
+ for f in obs.touched_not_predicted:
53
+ file_gap[f] += 1
54
+
55
+ # Lesson: file resolved but never touched in 10+ observations
56
+ total_obs = len([s for s in sessions if s.session_id in obs_by_session])
57
+ for f, count in file_resolved.items():
58
+ if count >= 10 and file_touched.get(f, 0) == 0:
59
+ lessons.append(Lesson(
60
+ trigger="resolved_never_touched",
61
+ observation=f"Resolved {count} times, modified 0 times",
62
+ lesson_text=(
63
+ f"{os.path.basename(f)} is consistently included but never modified. "
64
+ f"Consider reducing its budget priority."
65
+ ),
66
+ confidence=min(count / 20, 1.0),
67
+ created=time.time(),
68
+ ))
69
+
70
+ # Lesson: file touched but not in scope in 5+ observations
71
+ for f, count in file_gap.items():
72
+ if count >= 3:
73
+ lessons.append(Lesson(
74
+ trigger="touched_not_predicted",
75
+ observation=f"Modified in {count} commits but not in scope includes",
76
+ lesson_text=(
77
+ f"{os.path.basename(f)} is frequently needed but missing from scope. "
78
+ f"Consider adding to includes."
79
+ ),
80
+ confidence=min(count / 10, 1.0),
81
+ created=time.time(),
82
+ ))
83
+
84
+ # Lesson: most frequently modified file
85
+ if file_touched and total_obs >= 5:
86
+ top_file = max(file_touched, key=file_touched.get)
87
+ top_count = file_touched[top_file]
88
+ if top_count >= 3:
89
+ ratio = top_count / total_obs
90
+ lessons.append(Lesson(
91
+ trigger="hotspot",
92
+ observation=f"Modified in {top_count}/{total_obs} observations ({ratio:.0%})",
93
+ lesson_text=(
94
+ f"{os.path.basename(top_file)} is the most frequently modified file "
95
+ f"(touched in {ratio:.0%} of sessions)."
96
+ ),
97
+ confidence=ratio,
98
+ created=time.time(),
99
+ ))
100
+
101
+ return sorted(lessons, key=lambda ls: -ls.confidence)
102
+
103
+
104
+ def detect_invariants(
105
+ graph_edges: List[tuple],
106
+ module: str,
107
+ all_modules: List[str],
108
+ commit_count: int = 0,
109
+ ) -> List[ObservedInvariant]:
110
+ """Detect boundary invariants from the dependency graph.
111
+
112
+ If module A has never imported from module B across the entire history,
113
+ that's an observed invariant.
114
+ """
115
+ # Which modules does this module import from?
116
+ imports_from = set()
117
+ for src, dst in graph_edges:
118
+ src_parts = src.split("/")
119
+ dst_parts = dst.split("/")
120
+ if len(src_parts) > 1 and src_parts[0] == module:
121
+ if len(dst_parts) > 1 and dst_parts[0] != module:
122
+ imports_from.add(dst_parts[0])
123
+
124
+ invariants = []
125
+ for other in all_modules:
126
+ if other == module:
127
+ continue
128
+ if other not in imports_from:
129
+ confidence = min(commit_count / 100, 1.0) if commit_count > 0 else 0.5
130
+ invariants.append(ObservedInvariant(
131
+ boundary=f"{module} -> {other}",
132
+ direction="no_import",
133
+ held_since="", # Would need git history to determine
134
+ commit_count=commit_count,
135
+ confidence=round(confidence, 2),
136
+ ))
137
+
138
+ return invariants
139
+
140
+
141
+ def format_lessons_for_context(lessons: List[Lesson], invariants: List[ObservedInvariant]) -> str:
142
+ """Format lessons and invariants for injection into resolved context."""
143
+ parts = []
144
+
145
+ if lessons:
146
+ parts.append("## Lessons (from observed sessions)")
147
+ for lesson in lessons[:5]:
148
+ parts.append(f"- {lesson.lesson_text}")
149
+
150
+ if invariants:
151
+ high_conf = [inv for inv in invariants if inv.confidence >= 0.9]
152
+ if high_conf:
153
+ parts.append("## Boundaries")
154
+ for inv in high_conf[:5]:
155
+ parts.append(
156
+ f"- {inv.boundary}: no imports observed "
157
+ f"({inv.commit_count} commits). Do not break."
158
+ )
159
+
160
+ return "\n".join(parts)
161
+
162
+
163
+ def save_lessons(dot_dir: Path, module: str, lessons: List[Lesson]) -> None:
164
+ """Save lessons to .dotscope/lessons/<module>.json."""
165
+ lessons_dir = dot_dir / "lessons"
166
+ lessons_dir.mkdir(parents=True, exist_ok=True)
167
+
168
+ data = [
169
+ {
170
+ "trigger": item.trigger,
171
+ "observation": item.observation,
172
+ "lesson_text": item.lesson_text,
173
+ "confidence": item.confidence,
174
+ "created": item.created,
175
+ "source_sessions": item.source_sessions,
176
+ "acknowledged": item.acknowledged,
177
+ }
178
+ for item in lessons
179
+ ]
180
+ (lessons_dir / f"{module}.json").write_text(json.dumps(data, indent=2), encoding="utf-8")
181
+
182
+
183
+ def load_lessons(dot_dir: Path, module: str) -> List[Lesson]:
184
+ """Load lessons from .dotscope/lessons/<module>.json."""
185
+ path = dot_dir / "lessons" / f"{module}.json"
186
+ if not path.exists():
187
+ return []
188
+ try:
189
+ data = json.loads(path.read_text(encoding="utf-8"))
190
+ return [Lesson(**item) for item in data]
191
+ except (json.JSONDecodeError, TypeError):
192
+ return []
193
+
194
+
195
+ def save_invariants(dot_dir: Path, module: str, invariants: List[ObservedInvariant]) -> None:
196
+ """Save invariants to .dotscope/invariants/<module>.json."""
197
+ inv_dir = dot_dir / "invariants"
198
+ inv_dir.mkdir(parents=True, exist_ok=True)
199
+
200
+ data = [
201
+ {
202
+ "boundary": inv.boundary,
203
+ "direction": inv.direction,
204
+ "held_since": inv.held_since,
205
+ "commit_count": inv.commit_count,
206
+ "confidence": inv.confidence,
207
+ "violations": inv.violations,
208
+ }
209
+ for inv in invariants
210
+ ]
211
+ (inv_dir / f"{module}.json").write_text(json.dumps(data, indent=2), encoding="utf-8")
212
+
213
+
214
+ def load_invariants(dot_dir: Path, module: str) -> List[ObservedInvariant]:
215
+ """Load invariants from .dotscope/invariants/<module>.json."""
216
+ path = dot_dir / "invariants" / f"{module}.json"
217
+ if not path.exists():
218
+ return []
219
+ try:
220
+ data = json.loads(path.read_text(encoding="utf-8"))
221
+ return [ObservedInvariant(**item) for item in data]
222
+ except (json.JSONDecodeError, TypeError):
223
+ return []
dotscope/matcher.py ADDED
@@ -0,0 +1,104 @@
1
+ """Task-to-scope matching via keyword overlap.
2
+
3
+ Given a natural language task description, find which scope(s) are most relevant.
4
+ Uses Jaccard similarity over keywords, with optional embedding fallback.
5
+ """
6
+
7
+
8
+ import re
9
+ from typing import List, Tuple
10
+
11
+
12
+ # Common stop words to exclude from matching
13
+ _STOP_WORDS = {
14
+ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
15
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
16
+ "should", "may", "might", "shall", "can", "need", "must", "to", "of",
17
+ "in", "for", "on", "with", "at", "by", "from", "as", "into", "about",
18
+ "like", "through", "after", "before", "between", "under", "above",
19
+ "it", "its", "this", "that", "these", "those", "i", "me", "my", "we",
20
+ "our", "you", "your", "he", "she", "they", "them", "and", "but", "or",
21
+ "not", "no", "if", "then", "so", "very", "just", "also", "only",
22
+ "fix", "add", "update", "change", "make", "get", "set", "new",
23
+ }
24
+
25
+
26
+ def match_task(
27
+ task: str,
28
+ scopes: List[Tuple[str, List[str], str]],
29
+ threshold: float = 0.05,
30
+ ) -> List[Tuple[str, float]]:
31
+ """Match a task description to the most relevant scope(s).
32
+
33
+ Args:
34
+ task: Natural language task description
35
+ scopes: List of (scope_name, keywords, description) tuples
36
+ threshold: Minimum score to include in results
37
+
38
+ Returns:
39
+ List of (scope_name, score) sorted by relevance descending
40
+ """
41
+ task_words = _tokenize(task)
42
+ if not task_words:
43
+ return []
44
+
45
+ results = []
46
+ for name, keywords, description in scopes:
47
+ score = _score_scope(task_words, name, keywords, description)
48
+ if score >= threshold:
49
+ results.append((name, score))
50
+
51
+ results.sort(key=lambda x: -x[1])
52
+ return results
53
+
54
+
55
+ def _score_scope(
56
+ task_words: set,
57
+ scope_name: str,
58
+ keywords: List[str],
59
+ description: str,
60
+ ) -> float:
61
+ """Score how well a scope matches a task.
62
+
63
+ Combines:
64
+ 1. Jaccard similarity between task words and scope keywords
65
+ 2. Substring match of scope name in task
66
+ 3. Word overlap with description
67
+ """
68
+ score = 0.0
69
+
70
+ # Keyword match (Jaccard similarity)
71
+ kw_words = set()
72
+ for kw in keywords:
73
+ kw_words.update(_tokenize(kw))
74
+
75
+ if kw_words:
76
+ intersection = task_words & kw_words
77
+ union = task_words | kw_words
78
+ if union:
79
+ jaccard = len(intersection) / len(union)
80
+ score += jaccard * 0.6 # 60% weight on keyword match
81
+
82
+ # Scope name match
83
+ name_words = _tokenize(scope_name)
84
+ if name_words & task_words:
85
+ score += 0.25 # 25% bonus for name match
86
+
87
+ # Also check if scope name appears as substring in task
88
+ if scope_name.lower() in " ".join(task_words):
89
+ score += 0.1
90
+
91
+ # Description word overlap
92
+ desc_words = _tokenize(description)
93
+ if desc_words:
94
+ desc_overlap = task_words & desc_words
95
+ if desc_overlap:
96
+ score += (len(desc_overlap) / len(desc_words)) * 0.15 # 15% weight
97
+
98
+ return min(score, 1.0)
99
+
100
+
101
+ def _tokenize(text: str) -> set:
102
+ """Tokenize text into lowercase words, stripping stop words and punctuation."""
103
+ words = re.findall(r"[a-zA-Z0-9_]+", text.lower())
104
+ return {w for w in words if len(w) > 1 and w not in _STOP_WORDS}