dotscope 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dotscope/.scope +63 -0
- dotscope/__init__.py +3 -0
- dotscope/absorber.py +390 -0
- dotscope/assertions.py +128 -0
- dotscope/ast_analyzer.py +2 -0
- dotscope/backtest.py +2 -0
- dotscope/bench.py +141 -0
- dotscope/budget.py +3 -0
- dotscope/cache.py +2 -0
- dotscope/check/__init__.py +1 -0
- dotscope/check/acknowledge.py +2 -0
- dotscope/check/checker.py +3 -0
- dotscope/check/checks/__init__.py +1 -0
- dotscope/check/checks/antipattern.py +2 -0
- dotscope/check/checks/boundary.py +2 -0
- dotscope/check/checks/contracts.py +3 -0
- dotscope/check/checks/direction.py +2 -0
- dotscope/check/checks/intent.py +2 -0
- dotscope/check/checks/stability.py +2 -0
- dotscope/check/constraints.py +2 -0
- dotscope/check/models.py +15 -0
- dotscope/cli.py +1447 -0
- dotscope/composer.py +147 -0
- dotscope/constants.py +45 -0
- dotscope/context.py +60 -0
- dotscope/counterfactual.py +180 -0
- dotscope/debug.py +220 -0
- dotscope/discovery.py +104 -0
- dotscope/formatter.py +157 -0
- dotscope/graph.py +3 -0
- dotscope/health.py +212 -0
- dotscope/help.py +204 -0
- dotscope/history.py +6 -0
- dotscope/hooks.py +2 -0
- dotscope/ingest.py +858 -0
- dotscope/intent.py +618 -0
- dotscope/lessons.py +223 -0
- dotscope/matcher.py +104 -0
- dotscope/mcp_server.py +1081 -0
- dotscope/models/.scope +45 -0
- dotscope/models/__init__.py +7 -0
- dotscope/models/core.py +288 -0
- dotscope/models/history.py +73 -0
- dotscope/models/intent.py +213 -0
- dotscope/models/passes.py +58 -0
- dotscope/models/state.py +250 -0
- dotscope/models.py +9 -0
- dotscope/near_miss.py +3 -0
- dotscope/onboarding.py +2 -0
- dotscope/parser.py +387 -0
- dotscope/passes/.scope +105 -0
- dotscope/passes/__init__.py +1 -0
- dotscope/passes/ast_analyzer.py +508 -0
- dotscope/passes/backtest.py +198 -0
- dotscope/passes/budget_allocator.py +164 -0
- dotscope/passes/convention_compliance.py +40 -0
- dotscope/passes/convention_discovery.py +247 -0
- dotscope/passes/convention_parser.py +223 -0
- dotscope/passes/graph_builder.py +299 -0
- dotscope/passes/history_miner.py +336 -0
- dotscope/passes/incremental.py +149 -0
- dotscope/passes/lang/__init__.py +38 -0
- dotscope/passes/lang/_base.py +20 -0
- dotscope/passes/lang/_treesitter.py +93 -0
- dotscope/passes/lang/go.py +333 -0
- dotscope/passes/lang/javascript.py +348 -0
- dotscope/passes/lazy.py +152 -0
- dotscope/passes/semantic_diff.py +160 -0
- dotscope/passes/sentinel/__init__.py +1 -0
- dotscope/passes/sentinel/acknowledge.py +222 -0
- dotscope/passes/sentinel/checker.py +383 -0
- dotscope/passes/sentinel/checks/__init__.py +1 -0
- dotscope/passes/sentinel/checks/antipattern.py +84 -0
- dotscope/passes/sentinel/checks/boundary.py +46 -0
- dotscope/passes/sentinel/checks/contracts.py +148 -0
- dotscope/passes/sentinel/checks/convention.py +54 -0
- dotscope/passes/sentinel/checks/direction.py +71 -0
- dotscope/passes/sentinel/checks/intent.py +207 -0
- dotscope/passes/sentinel/checks/stability.py +66 -0
- dotscope/passes/sentinel/checks/voice.py +108 -0
- dotscope/passes/sentinel/constraints.py +472 -0
- dotscope/passes/sentinel/line_filter.py +88 -0
- dotscope/passes/sentinel/models.py +15 -0
- dotscope/passes/virtual.py +239 -0
- dotscope/passes/voice.py +162 -0
- dotscope/passes/voice_defaults.py +28 -0
- dotscope/passes/voice_discovery.py +245 -0
- dotscope/paths.py +32 -0
- dotscope/progress.py +44 -0
- dotscope/regression.py +147 -0
- dotscope/resolver.py +203 -0
- dotscope/scanner.py +246 -0
- dotscope/sessions.py +2 -0
- dotscope/storage/.scope +64 -0
- dotscope/storage/__init__.py +1 -0
- dotscope/storage/cache.py +114 -0
- dotscope/storage/claude_hooks.py +119 -0
- dotscope/storage/git_hooks.py +277 -0
- dotscope/storage/incremental_state.py +61 -0
- dotscope/storage/mcp_config.py +98 -0
- dotscope/storage/near_miss.py +183 -0
- dotscope/storage/onboarding.py +150 -0
- dotscope/storage/session_manager.py +195 -0
- dotscope/storage/timing.py +84 -0
- dotscope/timing.py +2 -0
- dotscope/tokens.py +53 -0
- dotscope/utility.py +123 -0
- dotscope/virtual.py +3 -0
- dotscope/visibility.py +664 -0
- dotscope-0.1.0.dist-info/METADATA +50 -0
- dotscope-0.1.0.dist-info/RECORD +114 -0
- dotscope-0.1.0.dist-info/WHEEL +4 -0
- dotscope-0.1.0.dist-info/entry_points.txt +3 -0
- dotscope-0.1.0.dist-info/licenses/LICENSE +21 -0
dotscope/.scope
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
description: Root interfaces and orchestrators
|
|
2
|
+
includes:
|
|
3
|
+
- dotscope/cli.py
|
|
4
|
+
- dotscope/mcp_server.py
|
|
5
|
+
- dotscope/composer.py
|
|
6
|
+
- dotscope/resolver.py
|
|
7
|
+
- dotscope/intent.py
|
|
8
|
+
- dotscope/ingest.py
|
|
9
|
+
- dotscope/formatter.py
|
|
10
|
+
- dotscope/discovery.py
|
|
11
|
+
- dotscope/parser.py
|
|
12
|
+
- dotscope/visibility.py
|
|
13
|
+
excludes:
|
|
14
|
+
- dotscope/__pycache__/
|
|
15
|
+
- dotscope/models/
|
|
16
|
+
- dotscope/passes/
|
|
17
|
+
- dotscope/storage/
|
|
18
|
+
context: |
|
|
19
|
+
The root dotscope/ directory holds interfaces and orchestrators.
|
|
20
|
+
Heavy lifting is delegated to passes/. State to storage/. Types to models/.
|
|
21
|
+
|
|
22
|
+
## cli.py — Human Interface
|
|
23
|
+
All CLI commands. Dispatches to functional modules. 20+ commands including
|
|
24
|
+
resolve, check, intent, ingest, test-compiler, bench, debug, health.
|
|
25
|
+
|
|
26
|
+
## mcp_server.py — Agent Interface
|
|
27
|
+
FastMCP server. 16 tools: resolve_scope, dotscope_check, dotscope_debug,
|
|
28
|
+
dotscope_acknowledge, match_scope, session_summary, etc.
|
|
29
|
+
Every resolve_scope call is tracked. Constraints injected prophylactically.
|
|
30
|
+
|
|
31
|
+
## composer.py — Scope Algebra
|
|
32
|
+
Parses expressions like auth+payments-tests&api@context.
|
|
33
|
+
Operators: + (union), - (subtract), & (intersect), @ (modifier).
|
|
34
|
+
|
|
35
|
+
## resolver.py — File Resolution
|
|
36
|
+
Resolves a ScopeConfig to concrete files by expanding glob patterns,
|
|
37
|
+
applying excludes, and computing token estimates.
|
|
38
|
+
|
|
39
|
+
## ingest.py — Codebase Ingestion Orchestrator
|
|
40
|
+
Coordinates the full ingest pipeline: graph → history → docs → synthesis →
|
|
41
|
+
backtest → auto-correct → write. The largest orchestrator.
|
|
42
|
+
|
|
43
|
+
## parser.py — YAML Parser
|
|
44
|
+
Minimal YAML subset parser. Zero dependencies. Handles the specific
|
|
45
|
+
patterns .scope and .scopes files use.
|
|
46
|
+
|
|
47
|
+
## visibility.py — DX Features
|
|
48
|
+
SessionTracker, attribution hints, health nudges, counterfactual
|
|
49
|
+
integration. The glue between observation data and agent-facing output.
|
|
50
|
+
|
|
51
|
+
## Gotchas
|
|
52
|
+
Many root files are backward-compat stubs (graph.py, history.py, etc.)
|
|
53
|
+
that re-export from passes/ or storage/. New code should import from
|
|
54
|
+
the canonical location (passes.graph_builder, not graph).
|
|
55
|
+
related:
|
|
56
|
+
- dotscope/models/.scope
|
|
57
|
+
- dotscope/passes/.scope
|
|
58
|
+
- dotscope/storage/.scope
|
|
59
|
+
tags:
|
|
60
|
+
- cli
|
|
61
|
+
- mcp
|
|
62
|
+
- interfaces
|
|
63
|
+
tokens_estimate: 18000
|
dotscope/__init__.py
ADDED
dotscope/absorber.py
ADDED
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
"""Documentation absorber: extract architectural knowledge from existing docs.
|
|
2
|
+
|
|
3
|
+
Scans for and extracts knowledge from:
|
|
4
|
+
- README.md, ARCHITECTURE.md, CONTRIBUTING.md
|
|
5
|
+
- Docstrings (module-level and class-level)
|
|
6
|
+
- Inline comments with signals: NOTE, HACK, TODO, WARNING, FIXME, XXX, IMPORTANT
|
|
7
|
+
- Type hints and function signatures (contract-like information)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Dict, List, Optional, Tuple
|
|
15
|
+
|
|
16
|
+
from .constants import SKIP_DIRS, SOURCE_EXTS
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class DocFragment:
|
|
21
|
+
"""A fragment of documentation extracted from the codebase."""
|
|
22
|
+
source: str # File path relative to root
|
|
23
|
+
content: str # The text content
|
|
24
|
+
kind: str # "readme", "docstring", "comment", "architecture"
|
|
25
|
+
relevance_module: str # Which module this is relevant to (top-level dir)
|
|
26
|
+
priority: int = 0 # Higher = more important (warnings > notes > general)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class AbsorptionResult:
|
|
31
|
+
"""All documentation fragments found in a codebase."""
|
|
32
|
+
fragments: List[DocFragment] = field(default_factory=list)
|
|
33
|
+
by_module: Dict[str, List[DocFragment]] = field(default_factory=dict)
|
|
34
|
+
doc_files_found: List[str] = field(default_factory=list)
|
|
35
|
+
|
|
36
|
+
def for_module(self, module: str) -> List[DocFragment]:
|
|
37
|
+
"""Get all fragments relevant to a module, sorted by priority."""
|
|
38
|
+
frags = self.by_module.get(module, [])
|
|
39
|
+
return sorted(frags, key=lambda f: -f.priority)
|
|
40
|
+
|
|
41
|
+
def synthesize_context(self, module: str, max_chars: int = 2000) -> str:
|
|
42
|
+
"""Synthesize a clean context string from fragments for a module.
|
|
43
|
+
|
|
44
|
+
Groups fragments by kind in order: architecture > docstrings > comments > readme.
|
|
45
|
+
Avoids noisy filepath prefixes — agents need the knowledge, not the source location.
|
|
46
|
+
"""
|
|
47
|
+
frags = self.for_module(module)
|
|
48
|
+
if not frags:
|
|
49
|
+
return ""
|
|
50
|
+
|
|
51
|
+
# Group by kind, ordered by usefulness to agents
|
|
52
|
+
kind_order = {"architecture": 0, "docstring": 1, "comment": 2, "readme": 3}
|
|
53
|
+
grouped: dict[str, list] = {}
|
|
54
|
+
for frag in frags:
|
|
55
|
+
grouped.setdefault(frag.kind, []).append(frag)
|
|
56
|
+
|
|
57
|
+
parts = []
|
|
58
|
+
total = 0
|
|
59
|
+
for kind in sorted(grouped.keys(), key=lambda k: kind_order.get(k, 99)):
|
|
60
|
+
for frag in grouped[kind]:
|
|
61
|
+
# Clean the content — strip filepath prefixes
|
|
62
|
+
content = frag.content
|
|
63
|
+
if content.startswith("[") and "] " in content:
|
|
64
|
+
content = content.split("] ", 1)[1]
|
|
65
|
+
# Skip very short fragments (noise)
|
|
66
|
+
if len(content.strip()) < 10:
|
|
67
|
+
continue
|
|
68
|
+
if total + len(content) > max_chars:
|
|
69
|
+
break
|
|
70
|
+
parts.append(content)
|
|
71
|
+
total += len(content)
|
|
72
|
+
|
|
73
|
+
return "\n".join(parts)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# Doc files to look for (in priority order)
|
|
77
|
+
_DOC_FILES = [
|
|
78
|
+
"ARCHITECTURE.md",
|
|
79
|
+
"CONTRIBUTING.md",
|
|
80
|
+
"README.md",
|
|
81
|
+
"DESIGN.md",
|
|
82
|
+
"AGENTS.md",
|
|
83
|
+
"docs/architecture.md",
|
|
84
|
+
"docs/design.md",
|
|
85
|
+
"docs/contributing.md",
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
# Comment signal patterns and their priorities.
|
|
89
|
+
# Use (.+?) non-greedy to avoid capturing trailing inline comments.
|
|
90
|
+
_SIGNAL_PATTERN = r"\s*:?\s*(.+?)(?:\s+#.*|\s+//.*)?$"
|
|
91
|
+
_COMMENT_SIGNALS = [
|
|
92
|
+
(re.compile(r"#\s*(WARNING|DANGER|CRITICAL)" + _SIGNAL_PATTERN, re.I), 10),
|
|
93
|
+
(re.compile(r"#\s*(IMPORTANT|INVARIANT)" + _SIGNAL_PATTERN, re.I), 9),
|
|
94
|
+
(re.compile(r"#\s*(HACK|WORKAROUND)" + _SIGNAL_PATTERN, re.I), 8),
|
|
95
|
+
(re.compile(r"#\s*(NOTE|NB)" + _SIGNAL_PATTERN, re.I), 6),
|
|
96
|
+
(re.compile(r"#\s*(TODO|FIXME|XXX)" + _SIGNAL_PATTERN, re.I), 5),
|
|
97
|
+
(re.compile(r"//\s*(WARNING|DANGER|CRITICAL)" + _SIGNAL_PATTERN, re.I), 10),
|
|
98
|
+
(re.compile(r"//\s*(IMPORTANT|INVARIANT)" + _SIGNAL_PATTERN, re.I), 9),
|
|
99
|
+
(re.compile(r"//\s*(HACK|WORKAROUND)" + _SIGNAL_PATTERN, re.I), 8),
|
|
100
|
+
(re.compile(r"//\s*(NOTE|NB)" + _SIGNAL_PATTERN, re.I), 6),
|
|
101
|
+
(re.compile(r"//\s*(TODO|FIXME|XXX)" + _SIGNAL_PATTERN, re.I), 5),
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def absorb_docs(root: str, apis: Optional[Dict] = None) -> AbsorptionResult:
|
|
106
|
+
"""Scan a codebase and absorb all architectural documentation.
|
|
107
|
+
|
|
108
|
+
Collects fragments from doc files, docstrings, signal comments,
|
|
109
|
+
and AST-extracted API surfaces, then clusters by module.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
root: Repository root
|
|
113
|
+
apis: Optional dict of {rel_path: ModuleAPI} from AST analysis.
|
|
114
|
+
If provided, uses AST data for docstrings and API surfaces.
|
|
115
|
+
"""
|
|
116
|
+
root = os.path.abspath(root)
|
|
117
|
+
result = AbsorptionResult()
|
|
118
|
+
|
|
119
|
+
_absorb_doc_files(root, result)
|
|
120
|
+
|
|
121
|
+
if apis:
|
|
122
|
+
_absorb_from_ast(root, apis, result)
|
|
123
|
+
else:
|
|
124
|
+
_absorb_docstrings(root, result)
|
|
125
|
+
|
|
126
|
+
_absorb_signal_comments(root, result)
|
|
127
|
+
|
|
128
|
+
# 4. Cluster fragments by module
|
|
129
|
+
for frag in result.fragments:
|
|
130
|
+
if frag.relevance_module not in result.by_module:
|
|
131
|
+
result.by_module[frag.relevance_module] = []
|
|
132
|
+
result.by_module[frag.relevance_module].append(frag)
|
|
133
|
+
|
|
134
|
+
return result
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _absorb_from_ast(root: str, apis: Dict, result: AbsorptionResult) -> None:
|
|
138
|
+
"""Extract documentation from AST-analyzed modules."""
|
|
139
|
+
for rel_path, api in apis.items():
|
|
140
|
+
parts = rel_path.split(os.sep)
|
|
141
|
+
module = parts[0] if len(parts) > 1 else "_root"
|
|
142
|
+
|
|
143
|
+
# Module docstring (from AST, more reliable than regex)
|
|
144
|
+
if api.docstring and len(api.docstring) > 20:
|
|
145
|
+
result.fragments.append(DocFragment(
|
|
146
|
+
source=rel_path,
|
|
147
|
+
content=api.docstring,
|
|
148
|
+
kind="docstring",
|
|
149
|
+
relevance_module=module,
|
|
150
|
+
priority=4,
|
|
151
|
+
))
|
|
152
|
+
|
|
153
|
+
# Public API surface
|
|
154
|
+
public_fns = [f for f in api.functions if f.is_public]
|
|
155
|
+
public_classes = [c for c in api.classes if c.is_public]
|
|
156
|
+
|
|
157
|
+
if public_fns or public_classes:
|
|
158
|
+
api_lines = []
|
|
159
|
+
for cls in public_classes:
|
|
160
|
+
bases = f"({', '.join(cls.bases)})" if cls.bases else ""
|
|
161
|
+
decorators = ", ".join(f"@{d}" for d in cls.decorators[:3])
|
|
162
|
+
dec_str = f" [{decorators}]" if decorators else ""
|
|
163
|
+
methods = [m for m in cls.methods if not m.startswith("_")]
|
|
164
|
+
api_lines.append(
|
|
165
|
+
f"{cls.name}{bases}{dec_str} — {len(methods)} public method(s)"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
for fn in public_fns:
|
|
169
|
+
params = ", ".join(fn.params[:5])
|
|
170
|
+
ret = f" -> {fn.return_type}" if fn.return_type else ""
|
|
171
|
+
decorators = ", ".join(f"@{d}" for d in fn.decorators[:2])
|
|
172
|
+
dec_str = f" [{decorators}]" if decorators else ""
|
|
173
|
+
api_lines.append(f"{fn.name}({params}){ret}{dec_str}")
|
|
174
|
+
|
|
175
|
+
if api_lines:
|
|
176
|
+
content = "\n".join(api_lines)
|
|
177
|
+
result.fragments.append(DocFragment(
|
|
178
|
+
source=rel_path,
|
|
179
|
+
content=content,
|
|
180
|
+
kind="api_surface",
|
|
181
|
+
relevance_module=module,
|
|
182
|
+
priority=8,
|
|
183
|
+
))
|
|
184
|
+
|
|
185
|
+
# Class hierarchies
|
|
186
|
+
for cls in public_classes:
|
|
187
|
+
if cls.bases:
|
|
188
|
+
result.fragments.append(DocFragment(
|
|
189
|
+
source=rel_path,
|
|
190
|
+
content=f"{cls.name} inherits from {', '.join(cls.bases)}",
|
|
191
|
+
kind="class_hierarchy",
|
|
192
|
+
relevance_module=module,
|
|
193
|
+
priority=7,
|
|
194
|
+
))
|
|
195
|
+
|
|
196
|
+
# Decorator patterns (framework-significant ones)
|
|
197
|
+
significant_decorators = {"dataclass", "abstractmethod", "property",
|
|
198
|
+
"staticmethod", "classmethod", "app.route",
|
|
199
|
+
"pytest.fixture", "lru_cache", "cached_property"}
|
|
200
|
+
for fn in api.functions:
|
|
201
|
+
for dec in fn.decorators:
|
|
202
|
+
if any(sig in dec for sig in significant_decorators):
|
|
203
|
+
result.fragments.append(DocFragment(
|
|
204
|
+
source=rel_path,
|
|
205
|
+
content=f"@{dec} on {fn.name}",
|
|
206
|
+
kind="decorator_pattern",
|
|
207
|
+
relevance_module=module,
|
|
208
|
+
priority=6,
|
|
209
|
+
))
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _absorb_doc_files(root: str, result: AbsorptionResult) -> None:
|
|
213
|
+
"""Find and absorb documentation files."""
|
|
214
|
+
# Top-level docs
|
|
215
|
+
for doc_name in _DOC_FILES:
|
|
216
|
+
doc_path = os.path.join(root, doc_name)
|
|
217
|
+
if os.path.isfile(doc_path):
|
|
218
|
+
result.doc_files_found.append(doc_name)
|
|
219
|
+
content = _read_file(doc_path)
|
|
220
|
+
if content:
|
|
221
|
+
# Extract sections and assign to relevant modules
|
|
222
|
+
sections = _split_markdown_sections(content)
|
|
223
|
+
for heading, body in sections:
|
|
224
|
+
module = _guess_module_from_text(heading + " " + body, root)
|
|
225
|
+
result.fragments.append(DocFragment(
|
|
226
|
+
source=doc_name,
|
|
227
|
+
content=f"From {doc_name} — {heading}:\n{body[:500]}",
|
|
228
|
+
kind="readme" if "README" in doc_name else "architecture",
|
|
229
|
+
relevance_module=module or "_root",
|
|
230
|
+
priority=7 if "ARCHITECTURE" in doc_name.upper() else 5,
|
|
231
|
+
))
|
|
232
|
+
|
|
233
|
+
# Module-level docs (e.g., auth/README.md)
|
|
234
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
235
|
+
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
|
|
236
|
+
rel_dir = os.path.relpath(dirpath, root)
|
|
237
|
+
if rel_dir == ".":
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
for fn in filenames:
|
|
241
|
+
if fn.upper() in ("README.MD", "ARCHITECTURE.MD", "DESIGN.MD"):
|
|
242
|
+
rel_path = os.path.join(rel_dir, fn)
|
|
243
|
+
full_path = os.path.join(dirpath, fn)
|
|
244
|
+
content = _read_file(full_path)
|
|
245
|
+
if content:
|
|
246
|
+
module = rel_dir.split(os.sep)[0]
|
|
247
|
+
result.doc_files_found.append(rel_path)
|
|
248
|
+
result.fragments.append(DocFragment(
|
|
249
|
+
source=rel_path,
|
|
250
|
+
content=content[:1000],
|
|
251
|
+
kind="readme",
|
|
252
|
+
relevance_module=module,
|
|
253
|
+
priority=8, # Module-specific docs are high priority
|
|
254
|
+
))
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _absorb_docstrings(root: str, result: AbsorptionResult) -> None:
|
|
258
|
+
"""Extract module-level and class-level docstrings from Python files."""
|
|
259
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
260
|
+
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
|
|
261
|
+
|
|
262
|
+
for fn in filenames:
|
|
263
|
+
if not fn.endswith(".py"):
|
|
264
|
+
continue
|
|
265
|
+
|
|
266
|
+
rel_path = os.path.relpath(os.path.join(dirpath, fn), root)
|
|
267
|
+
parts = rel_path.split(os.sep)
|
|
268
|
+
module = parts[0] if len(parts) > 1 else "_root"
|
|
269
|
+
|
|
270
|
+
full_path = os.path.join(dirpath, fn)
|
|
271
|
+
docstring = _extract_module_docstring(full_path)
|
|
272
|
+
if docstring and len(docstring) > 20:
|
|
273
|
+
result.fragments.append(DocFragment(
|
|
274
|
+
source=rel_path,
|
|
275
|
+
content=docstring,
|
|
276
|
+
kind="docstring",
|
|
277
|
+
relevance_module=module,
|
|
278
|
+
priority=4,
|
|
279
|
+
))
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _absorb_signal_comments(root: str, result: AbsorptionResult) -> None:
|
|
283
|
+
"""Find comments with signal words (WARNING, NOTE, HACK, TODO, etc.)."""
|
|
284
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
285
|
+
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
|
|
286
|
+
|
|
287
|
+
for fn in filenames:
|
|
288
|
+
ext = os.path.splitext(fn)[1].lower()
|
|
289
|
+
if ext not in SOURCE_EXTS:
|
|
290
|
+
continue
|
|
291
|
+
|
|
292
|
+
rel_path = os.path.relpath(os.path.join(dirpath, fn), root)
|
|
293
|
+
parts = rel_path.split(os.sep)
|
|
294
|
+
module = parts[0] if len(parts) > 1 else "_root"
|
|
295
|
+
|
|
296
|
+
full_path = os.path.join(dirpath, fn)
|
|
297
|
+
signals = _extract_signal_comments(full_path)
|
|
298
|
+
for comment_text, priority in signals:
|
|
299
|
+
result.fragments.append(DocFragment(
|
|
300
|
+
source=rel_path,
|
|
301
|
+
content=comment_text,
|
|
302
|
+
kind="comment",
|
|
303
|
+
relevance_module=module,
|
|
304
|
+
priority=priority,
|
|
305
|
+
))
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _extract_module_docstring(filepath: str) -> Optional[str]:
|
|
309
|
+
"""Extract the module-level docstring from a Python file."""
|
|
310
|
+
content = _read_file(filepath)
|
|
311
|
+
if not content:
|
|
312
|
+
return None
|
|
313
|
+
|
|
314
|
+
# Match triple-quoted string at the start of the file (after optional comments/encoding)
|
|
315
|
+
m = re.match(
|
|
316
|
+
r'^(?:\s*#[^\n]*\n)*\s*(?:"""(.*?)"""|\'\'\'(.*?)\'\'\')',
|
|
317
|
+
content,
|
|
318
|
+
re.DOTALL,
|
|
319
|
+
)
|
|
320
|
+
if m:
|
|
321
|
+
return (m.group(1) or m.group(2) or "").strip()
|
|
322
|
+
return None
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _extract_signal_comments(filepath: str) -> List[Tuple[str, int]]:
|
|
326
|
+
"""Extract comments with signal words from a source file."""
|
|
327
|
+
content = _read_file(filepath)
|
|
328
|
+
if not content:
|
|
329
|
+
return []
|
|
330
|
+
|
|
331
|
+
signals = []
|
|
332
|
+
for line in content.splitlines():
|
|
333
|
+
line = line.strip()
|
|
334
|
+
for pattern, priority in _COMMENT_SIGNALS:
|
|
335
|
+
m = pattern.search(line)
|
|
336
|
+
if m:
|
|
337
|
+
signal_type = m.group(1).upper()
|
|
338
|
+
text = m.group(2).strip()
|
|
339
|
+
signals.append((f"{signal_type}: {text}", priority))
|
|
340
|
+
break # One match per line
|
|
341
|
+
|
|
342
|
+
return signals
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _split_markdown_sections(content: str) -> List[Tuple[str, str]]:
|
|
346
|
+
"""Split markdown content into (heading, body) sections."""
|
|
347
|
+
sections = []
|
|
348
|
+
current_heading = "Introduction"
|
|
349
|
+
current_body: List[str] = []
|
|
350
|
+
|
|
351
|
+
for line in content.splitlines():
|
|
352
|
+
if line.startswith("#"):
|
|
353
|
+
if current_body:
|
|
354
|
+
sections.append((current_heading, "\n".join(current_body).strip()))
|
|
355
|
+
current_heading = line.lstrip("#").strip()
|
|
356
|
+
current_body = []
|
|
357
|
+
else:
|
|
358
|
+
current_body.append(line)
|
|
359
|
+
|
|
360
|
+
if current_body:
|
|
361
|
+
sections.append((current_heading, "\n".join(current_body).strip()))
|
|
362
|
+
|
|
363
|
+
return sections
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def _guess_module_from_text(text: str, root: str) -> Optional[str]:
|
|
367
|
+
"""Try to guess which module a text fragment is about from directory names."""
|
|
368
|
+
text_lower = text.lower()
|
|
369
|
+
|
|
370
|
+
# Check if any top-level directory name appears in the text
|
|
371
|
+
try:
|
|
372
|
+
entries = os.listdir(root)
|
|
373
|
+
except OSError:
|
|
374
|
+
return None
|
|
375
|
+
|
|
376
|
+
for entry in entries:
|
|
377
|
+
if os.path.isdir(os.path.join(root, entry)) and entry not in SKIP_DIRS:
|
|
378
|
+
if entry.lower() in text_lower:
|
|
379
|
+
return entry
|
|
380
|
+
|
|
381
|
+
return None
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _read_file(path: str) -> Optional[str]:
|
|
385
|
+
"""Read a file safely."""
|
|
386
|
+
try:
|
|
387
|
+
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
|
388
|
+
return f.read()
|
|
389
|
+
except (IOError, OSError):
|
|
390
|
+
return None
|
dotscope/assertions.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Architectural assertions: compiler-grade guarantees on scope resolution.
|
|
2
|
+
|
|
3
|
+
Assertions prevent silent context corruption. If a critical file gets
|
|
4
|
+
dropped by token budgeting, dotscope raises an error instead of serving
|
|
5
|
+
incomplete context.
|
|
6
|
+
|
|
7
|
+
Defined in intent.yaml (project-wide) or .scope files (per-scope).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from .models.intent import Assertion, ContextExhaustionError # noqa: F401
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_assertions(repo_root: str, scope_name: str = "") -> List[Assertion]:
|
|
16
|
+
"""Load assertions from intent.yaml and the scope's .scope file.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
repo_root: Repository root
|
|
20
|
+
scope_name: Current scope being resolved (for per-scope assertions)
|
|
21
|
+
"""
|
|
22
|
+
import os
|
|
23
|
+
assertions = []
|
|
24
|
+
|
|
25
|
+
# 1. Project-wide assertions from intent.yaml
|
|
26
|
+
intent_path = os.path.join(repo_root, "intent.yaml")
|
|
27
|
+
if os.path.exists(intent_path):
|
|
28
|
+
from .parser import _parse_yaml
|
|
29
|
+
with open(intent_path, "r", encoding="utf-8") as f:
|
|
30
|
+
data = _parse_yaml(f.read())
|
|
31
|
+
for item in _to_list_of_dicts(data.get("assertions", [])):
|
|
32
|
+
assertions.append(_parse_assertion(item))
|
|
33
|
+
|
|
34
|
+
# 2. Per-scope assertions from .scope file
|
|
35
|
+
if scope_name:
|
|
36
|
+
from .discovery import find_scope
|
|
37
|
+
scope_path = find_scope(scope_name, repo_root)
|
|
38
|
+
if scope_path:
|
|
39
|
+
try:
|
|
40
|
+
from .parser import _parse_yaml
|
|
41
|
+
with open(scope_path, "r", encoding="utf-8") as f:
|
|
42
|
+
data = _parse_yaml(f.read())
|
|
43
|
+
raw = data.get("assertions", {})
|
|
44
|
+
if isinstance(raw, dict):
|
|
45
|
+
a = Assertion(scope=scope_name)
|
|
46
|
+
a.ensure_includes = _str_list(raw.get("ensure_includes", []))
|
|
47
|
+
a.ensure_context_contains = _str_list(raw.get("ensure_context_contains", []))
|
|
48
|
+
a.ensure_constraints = bool(raw.get("ensure_constraints", False))
|
|
49
|
+
if a.ensure_includes or a.ensure_context_contains or a.ensure_constraints:
|
|
50
|
+
assertions.append(a)
|
|
51
|
+
except Exception:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
return assertions
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_required_files(
|
|
58
|
+
assertions: List[Assertion],
|
|
59
|
+
scope_name: str,
|
|
60
|
+
) -> set:
|
|
61
|
+
"""Return set of files that must be included (infinite utility)."""
|
|
62
|
+
required = set()
|
|
63
|
+
for a in assertions:
|
|
64
|
+
if a.scope == "*" or a.scope == scope_name:
|
|
65
|
+
required.update(a.ensure_includes)
|
|
66
|
+
return required
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def check_output_assertions(
|
|
70
|
+
resolved_context: str,
|
|
71
|
+
constraints: list,
|
|
72
|
+
assertions: List[Assertion],
|
|
73
|
+
scope_name: str,
|
|
74
|
+
) -> Optional[ContextExhaustionError]:
|
|
75
|
+
"""Check non-file assertions against resolved output."""
|
|
76
|
+
for a in assertions:
|
|
77
|
+
if a.scope != "*" and a.scope != scope_name:
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
if a.ensure_context_contains:
|
|
81
|
+
for substring in a.ensure_context_contains:
|
|
82
|
+
if substring.lower() not in resolved_context.lower():
|
|
83
|
+
return ContextExhaustionError(
|
|
84
|
+
assertion_type="ensure_context_contains",
|
|
85
|
+
detail=f"Context must contain '{substring}'",
|
|
86
|
+
reason=a.reason,
|
|
87
|
+
suggestion=f"Add '{substring}' to scope context or check context truncation",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
if a.ensure_constraints and not constraints:
|
|
91
|
+
return ContextExhaustionError(
|
|
92
|
+
assertion_type="ensure_constraints",
|
|
93
|
+
detail="Resolve response must include constraints",
|
|
94
|
+
reason=a.reason,
|
|
95
|
+
suggestion="Check that invariants.json and intent.yaml exist",
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ---------------------------------------------------------------------------
|
|
102
|
+
# Internals
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
def _parse_assertion(item: dict) -> Assertion:
|
|
106
|
+
"""Parse a single assertion from intent.yaml."""
|
|
107
|
+
return Assertion(
|
|
108
|
+
scope=str(item.get("scope", "*")),
|
|
109
|
+
ensure_includes=_str_list(item.get("ensure_includes", [])),
|
|
110
|
+
ensure_context_contains=_str_list(item.get("ensure_context_contains", [])),
|
|
111
|
+
ensure_constraints=bool(item.get("ensure_constraints", False)),
|
|
112
|
+
reason=str(item.get("reason", "")),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _str_list(val: object) -> List[str]:
|
|
117
|
+
if isinstance(val, list):
|
|
118
|
+
return [str(v) for v in val]
|
|
119
|
+
if isinstance(val, str) and val:
|
|
120
|
+
return [v.strip() for v in val.split(",")]
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _to_list_of_dicts(val: object) -> List[dict]:
|
|
125
|
+
"""Handle both parsed lists and raw YAML structures."""
|
|
126
|
+
if isinstance(val, list):
|
|
127
|
+
return [v for v in val if isinstance(v, dict)]
|
|
128
|
+
return []
|
dotscope/ast_analyzer.py
ADDED
dotscope/backtest.py
ADDED