reporails-cli 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. reporails_cli/.env.example +1 -0
  2. reporails_cli/__init__.py +24 -0
  3. reporails_cli/bundled/.semgrepignore +51 -0
  4. reporails_cli/bundled/__init__.py +31 -0
  5. reporails_cli/bundled/capability-patterns.yml +54 -0
  6. reporails_cli/bundled/levels.yml +99 -0
  7. reporails_cli/core/__init__.py +35 -0
  8. reporails_cli/core/agents.py +147 -0
  9. reporails_cli/core/applicability.py +150 -0
  10. reporails_cli/core/bootstrap.py +147 -0
  11. reporails_cli/core/cache.py +352 -0
  12. reporails_cli/core/capability.py +245 -0
  13. reporails_cli/core/discover.py +362 -0
  14. reporails_cli/core/engine.py +177 -0
  15. reporails_cli/core/init.py +309 -0
  16. reporails_cli/core/levels.py +177 -0
  17. reporails_cli/core/models.py +329 -0
  18. reporails_cli/core/opengrep/__init__.py +34 -0
  19. reporails_cli/core/opengrep/runner.py +203 -0
  20. reporails_cli/core/opengrep/semgrepignore.py +39 -0
  21. reporails_cli/core/opengrep/templates.py +138 -0
  22. reporails_cli/core/registry.py +155 -0
  23. reporails_cli/core/sarif.py +181 -0
  24. reporails_cli/core/scorer.py +178 -0
  25. reporails_cli/core/semantic.py +193 -0
  26. reporails_cli/core/utils.py +139 -0
  27. reporails_cli/formatters/__init__.py +19 -0
  28. reporails_cli/formatters/json.py +137 -0
  29. reporails_cli/formatters/mcp.py +68 -0
  30. reporails_cli/formatters/text/__init__.py +32 -0
  31. reporails_cli/formatters/text/box.py +89 -0
  32. reporails_cli/formatters/text/chars.py +42 -0
  33. reporails_cli/formatters/text/compact.py +119 -0
  34. reporails_cli/formatters/text/components.py +117 -0
  35. reporails_cli/formatters/text/full.py +135 -0
  36. reporails_cli/formatters/text/rules.py +50 -0
  37. reporails_cli/formatters/text/violations.py +92 -0
  38. reporails_cli/interfaces/__init__.py +1 -0
  39. reporails_cli/interfaces/cli/__init__.py +7 -0
  40. reporails_cli/interfaces/cli/main.py +352 -0
  41. reporails_cli/interfaces/mcp/__init__.py +5 -0
  42. reporails_cli/interfaces/mcp/server.py +194 -0
  43. reporails_cli/interfaces/mcp/tools.py +136 -0
  44. reporails_cli/py.typed +0 -0
  45. reporails_cli/templates/__init__.py +65 -0
  46. reporails_cli/templates/cli_box.txt +10 -0
  47. reporails_cli/templates/cli_cta.txt +4 -0
  48. reporails_cli/templates/cli_delta.txt +1 -0
  49. reporails_cli/templates/cli_file_header.txt +1 -0
  50. reporails_cli/templates/cli_legend.txt +1 -0
  51. reporails_cli/templates/cli_pending.txt +3 -0
  52. reporails_cli/templates/cli_violation.txt +1 -0
  53. reporails_cli/templates/cli_working.txt +2 -0
  54. reporails_cli-0.0.1.dist-info/METADATA +108 -0
  55. reporails_cli-0.0.1.dist-info/RECORD +58 -0
  56. reporails_cli-0.0.1.dist-info/WHEEL +4 -0
  57. reporails_cli-0.0.1.dist-info/entry_points.txt +3 -0
  58. reporails_cli-0.0.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,245 @@
1
+ """Capability detection - determines project capability level.
2
+
3
+ Two-phase detection:
4
+ 1. Filesystem (applicability.py) - directory/file existence
5
+ 2. Content (this module) - OpenGrep pattern matching
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ from reporails_cli.core.levels import capability_score_to_level, detect_orphan_features
13
+ from reporails_cli.core.models import (
14
+ CapabilityResult,
15
+ ContentFeatures,
16
+ DetectedFeatures,
17
+ Level,
18
+ )
19
+
20
+ # Capability weights for scoring
21
+ CAPABILITY_WEIGHTS: dict[str, int] = {
22
+ "has_instruction_file": 1,
23
+ "has_sections": 1,
24
+ "has_imports": 1,
25
+ "has_explicit_constraints": 1,
26
+ "has_rules_dir": 2,
27
+ "has_path_scoped_rules": 1,
28
+ "has_shared_files": 1,
29
+ "component_count_3plus": 1,
30
+ "has_backbone": 2,
31
+ }
32
+ # Max: 12 points
33
+
34
+
35
+ def detect_features_content(sarif: dict[str, Any]) -> ContentFeatures:
36
+ """Parse OpenGrep SARIF output to detect content features.
37
+
38
+ Args:
39
+ sarif: SARIF output from capability pattern detection
40
+
41
+ Returns:
42
+ ContentFeatures with detected flags
43
+ """
44
+ has_sections = False
45
+ has_imports = False
46
+ has_explicit_constraints = False
47
+ has_path_scoped_rules = False
48
+
49
+ for run in sarif.get("runs", []):
50
+ for result in run.get("results", []):
51
+ rule_id = result.get("ruleId", "")
52
+
53
+ if "has-sections" in rule_id:
54
+ has_sections = True
55
+ elif "has-imports" in rule_id:
56
+ has_imports = True
57
+ elif "has-explicit-constraints" in rule_id:
58
+ has_explicit_constraints = True
59
+ elif "has-path-scoped-rules" in rule_id:
60
+ has_path_scoped_rules = True
61
+
62
+ return ContentFeatures(
63
+ has_sections=has_sections,
64
+ has_imports=has_imports,
65
+ has_explicit_constraints=has_explicit_constraints,
66
+ has_path_scoped_rules=has_path_scoped_rules,
67
+ )
68
+
69
+
70
+ def calculate_capability_score(features: DetectedFeatures) -> int:
71
+ """Calculate capability score from features.
72
+
73
+ Args:
74
+ features: Detected project features
75
+
76
+ Returns:
77
+ Score from 0 to 12
78
+ """
79
+ score = 0
80
+
81
+ # Phase 1 features (filesystem)
82
+ if features.has_instruction_file or features.has_claude_md:
83
+ score += CAPABILITY_WEIGHTS["has_instruction_file"]
84
+ if features.has_rules_dir:
85
+ score += CAPABILITY_WEIGHTS["has_rules_dir"]
86
+ if features.has_shared_files:
87
+ score += CAPABILITY_WEIGHTS["has_shared_files"]
88
+ if features.component_count >= 3:
89
+ score += CAPABILITY_WEIGHTS["component_count_3plus"]
90
+ if features.has_backbone:
91
+ score += CAPABILITY_WEIGHTS["has_backbone"]
92
+
93
+ # Phase 2 features (content)
94
+ if features.has_sections:
95
+ score += CAPABILITY_WEIGHTS["has_sections"]
96
+ if features.has_imports:
97
+ score += CAPABILITY_WEIGHTS["has_imports"]
98
+ if features.has_explicit_constraints:
99
+ score += CAPABILITY_WEIGHTS["has_explicit_constraints"]
100
+ if features.has_path_scoped_rules:
101
+ score += CAPABILITY_WEIGHTS["has_path_scoped_rules"]
102
+
103
+ return score
104
+
105
+
106
+ def calculate_filesystem_score(features: DetectedFeatures) -> int:
107
+ """Calculate capability score from filesystem features only.
108
+
109
+ Used for early rule filtering before OpenGrep runs.
110
+ Returns conservative estimate (may be lower than final level).
111
+
112
+ Args:
113
+ features: Detected project features (filesystem only)
114
+
115
+ Returns:
116
+ Score from 0 to 7 (filesystem features max)
117
+ """
118
+ score = 0
119
+
120
+ if features.has_instruction_file or features.has_claude_md:
121
+ score += CAPABILITY_WEIGHTS["has_instruction_file"]
122
+ if features.has_rules_dir:
123
+ score += CAPABILITY_WEIGHTS["has_rules_dir"]
124
+ if features.has_shared_files:
125
+ score += CAPABILITY_WEIGHTS["has_shared_files"]
126
+ if features.component_count >= 3:
127
+ score += CAPABILITY_WEIGHTS["component_count_3plus"]
128
+ if features.has_backbone:
129
+ score += CAPABILITY_WEIGHTS["has_backbone"]
130
+
131
+ return score
132
+
133
+
134
+ def estimate_preliminary_level(features: DetectedFeatures) -> Level:
135
+ """Estimate capability level from filesystem features only.
136
+
137
+ Conservative estimate for early rule filtering.
138
+ Final level is determined after content analysis.
139
+
140
+ Args:
141
+ features: Detected project features (filesystem only)
142
+
143
+ Returns:
144
+ Preliminary Level (may be lower than final)
145
+ """
146
+ from reporails_cli.core.levels import capability_score_to_level
147
+ score = calculate_filesystem_score(features)
148
+ return capability_score_to_level(score)
149
+
150
+
151
+ def merge_content_features(
152
+ features: DetectedFeatures,
153
+ content_features: ContentFeatures,
154
+ ) -> DetectedFeatures:
155
+ """Merge content features into main features object.
156
+
157
+ Args:
158
+ features: Base features from filesystem detection
159
+ content_features: Features from content detection
160
+
161
+ Returns:
162
+ Updated DetectedFeatures
163
+ """
164
+ features.has_sections = content_features.has_sections
165
+ features.has_imports = features.has_imports or content_features.has_imports
166
+ features.has_explicit_constraints = content_features.has_explicit_constraints
167
+ features.has_path_scoped_rules = content_features.has_path_scoped_rules
168
+ return features
169
+
170
+
171
+ def determine_capability_level(
172
+ features: DetectedFeatures,
173
+ content_features: ContentFeatures | None = None,
174
+ ) -> CapabilityResult:
175
+ """Determine capability level from features.
176
+
177
+ Two-phase pipeline:
178
+ 1. Filesystem features (already in features)
179
+ 2. Content features (merged in)
180
+
181
+ Args:
182
+ features: Detected project features
183
+ content_features: Optional content features to merge
184
+
185
+ Returns:
186
+ CapabilityResult with level, score, and summary
187
+ """
188
+ # Merge content features if provided
189
+ if content_features:
190
+ merge_content_features(features, content_features)
191
+
192
+ # Calculate score and level
193
+ score = calculate_capability_score(features)
194
+ level = capability_score_to_level(score)
195
+
196
+ # Check for orphan features
197
+ has_orphan = detect_orphan_features(features, level)
198
+
199
+ # Generate summary
200
+ summary = get_feature_summary(features)
201
+
202
+ return CapabilityResult(
203
+ features=features,
204
+ capability_score=score,
205
+ level=level,
206
+ has_orphan_features=has_orphan,
207
+ feature_summary=summary,
208
+ )
209
+
210
+
211
+ def get_feature_summary(features: DetectedFeatures) -> str:
212
+ """Generate human-readable summary of detected features.
213
+
214
+ Args:
215
+ features: Detected project features
216
+
217
+ Returns:
218
+ Summary string for display
219
+ """
220
+ parts = []
221
+
222
+ # File count
223
+ file_count = features.instruction_file_count
224
+ if file_count == 0:
225
+ parts.append("No instruction files")
226
+ elif file_count == 1:
227
+ parts.append("1 instruction file")
228
+ else:
229
+ parts.append(f"{file_count} instruction files")
230
+
231
+ # Features present
232
+ feature_list = []
233
+ if features.has_rules_dir:
234
+ feature_list.append(".claude/rules/")
235
+ if features.has_backbone:
236
+ feature_list.append("backbone.yml")
237
+ if features.has_shared_files:
238
+ feature_list.append("shared files")
239
+ if features.has_hierarchical_structure:
240
+ feature_list.append("hierarchical")
241
+
242
+ if feature_list:
243
+ parts.append(" + ".join(feature_list))
244
+
245
+ return ", ".join(parts) if parts else "No features detected"
@@ -0,0 +1,362 @@
1
+ """Discovery engine - analyze instruction files and generate backbone.
2
+
3
+ Deterministic discovery of:
4
+ - Component structure (from directory hierarchy)
5
+ - File references (from markdown content)
6
+ - Dependencies (imports between instruction files)
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ import re
13
+ from dataclasses import dataclass, field
14
+ from datetime import UTC, datetime
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ import yaml
19
+
20
+ from reporails_cli.core.agents import DetectedAgent, detect_agents, get_all_instruction_files
21
+
22
+
23
+ @dataclass
24
+ class FileReference:
25
+ """A reference to another file found in instruction content."""
26
+
27
+ path: str
28
+ line_number: int
29
+ context: str # The line containing the reference
30
+
31
+
32
+ @dataclass
33
+ class Component:
34
+ """A discovered component (directory with instruction files)."""
35
+
36
+ id: str # Dot-separated path: "langgraph.app.agents"
37
+ root: Path
38
+ instruction_files: list[Path] = field(default_factory=list)
39
+ imports: list[str] = field(default_factory=list) # Referenced files
40
+ children: list[str] = field(default_factory=list) # Child component IDs
41
+ parent: str | None = None
42
+ content_hash: str | None = None
43
+
44
+
45
+ @dataclass
46
+ class DiscoveryResult:
47
+ """Result of discovery operation."""
48
+
49
+ target: Path
50
+ discovered_at: str
51
+ agents: list[DetectedAgent]
52
+ components: dict[str, Component]
53
+ shared_files: list[str]
54
+ total_instruction_files: int
55
+ total_references: int
56
+
57
+
58
+ # Patterns for extracting file references from markdown
59
+ REFERENCE_PATTERNS = [
60
+ # Backtick paths: `path/to/file.ext` or `./path/to/file`
61
+ re.compile(r"`([./][\w\-./]+\.\w+)`"),
62
+ re.compile(r"`(\.?\.?/[\w\-./]+)`"),
63
+ # Markdown links: [text](path/to/file)
64
+ re.compile(r"\[.*?\]\(([./][\w\-./]+\.?\w*)\)"),
65
+ # Read/See commands: Read `file` or See "file"
66
+ re.compile(r"(?:Read|See|Check|Load)\s+[`'\"]([^`'\"]+)[`'\"]", re.IGNORECASE),
67
+ # Numbered lists with paths: 1. Read `.shared/sys.yml`
68
+ re.compile(r"^\s*\d+\.\s+.*?[`'\"]([./][\w\-./]+)[`'\"]", re.MULTILINE),
69
+ ]
70
+
71
+
72
+ def extract_references(content: str) -> list[FileReference]:
73
+ """
74
+ Extract file references from instruction file content.
75
+
76
+ Deterministic: uses regex patterns to find explicit path references.
77
+
78
+ Args:
79
+ content: Markdown content to analyze
80
+
81
+ Returns:
82
+ List of file references found
83
+ """
84
+ references: list[FileReference] = []
85
+ lines = content.split("\n")
86
+
87
+ for line_num, line in enumerate(lines, 1):
88
+ for pattern in REFERENCE_PATTERNS:
89
+ for match in pattern.finditer(line):
90
+ path = match.group(1)
91
+ # Filter out obvious non-paths
92
+ if _is_valid_path_reference(path):
93
+ references.append(
94
+ FileReference(
95
+ path=path,
96
+ line_number=line_num,
97
+ context=line.strip()[:100],
98
+ )
99
+ )
100
+
101
+ return references
102
+
103
+
104
+ def _is_valid_path_reference(path: str) -> bool:
105
+ """Check if a string looks like a valid file path reference."""
106
+ # Must have at least one slash or dot
107
+ if "/" not in path and "." not in path:
108
+ return False
109
+ # Filter out URLs
110
+ if path.startswith("http://") or path.startswith("https://"):
111
+ return False
112
+ # Reject path traversal attempts (../../../etc)
113
+ # Normalize and check for excessive parent refs
114
+ if path.count("..") > 2:
115
+ return False
116
+ # Reject absolute paths outside project
117
+ if path.startswith("/") and not path.startswith("./"):
118
+ return False
119
+ # Filter out common false positives
120
+ false_positives = {"e.g.", "i.e.", "etc.", "vs.", "v1", "v2"}
121
+ return path.lower() not in false_positives
122
+
123
+
124
+ def _is_path_within_bounds(path: str, project_root: Path) -> bool:
125
+ """Check if resolved path stays within project bounds."""
126
+ try:
127
+ resolved = (project_root / path).resolve()
128
+ return resolved.is_relative_to(project_root)
129
+ except (ValueError, OSError):
130
+ return False
131
+
132
+
133
+ def compute_content_hash(file_path: Path) -> str:
134
+ """Compute SHA256 hash of file content."""
135
+ content = file_path.read_bytes()
136
+ return f"sha256:{hashlib.sha256(content).hexdigest()[:16]}"
137
+
138
+
139
+ def discover_components(target: Path, instruction_files: list[Path]) -> dict[str, Component]:
140
+ """
141
+ Discover components from instruction file locations.
142
+
143
+ Component = directory containing instruction file(s).
144
+ Hierarchy derived from directory structure.
145
+
146
+ Args:
147
+ target: Project root
148
+ instruction_files: All discovered instruction files
149
+
150
+ Returns:
151
+ Dict mapping component ID to Component
152
+ """
153
+ components: dict[str, Component] = {}
154
+
155
+ for file_path in instruction_files:
156
+ # Get directory containing the instruction file
157
+ component_dir = file_path.parent
158
+ relative_dir = component_dir.relative_to(target)
159
+
160
+ # Create component ID from path
161
+ if relative_dir == Path("."):
162
+ component_id = "root"
163
+ else:
164
+ component_id = str(relative_dir).replace("/", ".").replace("\\", ".")
165
+
166
+ # Create or update component
167
+ if component_id not in components:
168
+ components[component_id] = Component(
169
+ id=component_id,
170
+ root=component_dir,
171
+ )
172
+
173
+ component = components[component_id]
174
+ component.instruction_files.append(file_path)
175
+
176
+ # Extract references from file content
177
+ content = file_path.read_text(encoding="utf-8")
178
+ refs = extract_references(content)
179
+ # Filter: only include references that stay within project bounds
180
+ valid_refs = [r.path for r in refs if _is_path_within_bounds(r.path, target)]
181
+ component.imports.extend(valid_refs)
182
+
183
+ # Compute content hash
184
+ if component.content_hash is None:
185
+ component.content_hash = compute_content_hash(file_path)
186
+
187
+ # Deduplicate imports
188
+ for component in components.values():
189
+ component.imports = sorted(set(component.imports))
190
+
191
+ # Build parent-child relationships
192
+ _build_hierarchy(components)
193
+
194
+ return components
195
+
196
+
197
+ def _build_hierarchy(components: dict[str, Component]) -> None:
198
+ """Build parent-child relationships between components."""
199
+ component_ids = sorted(components.keys())
200
+
201
+ for comp_id in component_ids:
202
+ component = components[comp_id]
203
+
204
+ # Find parent (longest matching prefix)
205
+ parts = comp_id.split(".")
206
+ if len(parts) > 1:
207
+ parent_id = ".".join(parts[:-1])
208
+ if parent_id in components:
209
+ component.parent = parent_id
210
+ components[parent_id].children.append(comp_id)
211
+
212
+
213
+ def find_shared_files(components: dict[str, Component], target: Path) -> list[str]:
214
+ """
215
+ Identify shared files (referenced by multiple components).
216
+
217
+ Args:
218
+ components: Discovered components
219
+ target: Project root
220
+
221
+ Returns:
222
+ List of shared file paths
223
+ """
224
+ # Count references to each file
225
+ ref_counts: dict[str, int] = {}
226
+ for component in components.values():
227
+ for ref in component.imports:
228
+ ref_counts[ref] = ref_counts.get(ref, 0) + 1
229
+
230
+ # Files referenced by 2+ components are shared
231
+ shared = [path for path, count in ref_counts.items() if count >= 2]
232
+
233
+ # Also include common shared directories
234
+ for pattern in [".shared/**/*", ".ai/shared/**/*", "shared/**/*"]:
235
+ for path in target.glob(pattern):
236
+ if path.is_file():
237
+ rel_path = str(path.relative_to(target))
238
+ if rel_path not in shared:
239
+ shared.append(rel_path)
240
+
241
+ return sorted(set(shared))
242
+
243
+
244
+ def run_discovery(target: Path) -> DiscoveryResult:
245
+ """
246
+ Run full discovery on target directory.
247
+
248
+ Deterministic analysis:
249
+ 1. Detect which coding agents are configured
250
+ 2. Find all instruction files
251
+ 3. Extract references from content
252
+ 4. Build component hierarchy
253
+ 5. Identify shared files
254
+
255
+ Args:
256
+ target: Project root to analyze
257
+
258
+ Returns:
259
+ DiscoveryResult with full analysis
260
+ """
261
+ # Detect agents
262
+ agents = detect_agents(target)
263
+
264
+ # Get all instruction files
265
+ instruction_files = get_all_instruction_files(target)
266
+
267
+ # Discover components
268
+ components = discover_components(target, instruction_files)
269
+
270
+ # Find shared files
271
+ shared_files = find_shared_files(components, target)
272
+
273
+ # Count total references
274
+ total_refs = sum(len(c.imports) for c in components.values())
275
+
276
+ return DiscoveryResult(
277
+ target=target,
278
+ discovered_at=datetime.now(UTC).isoformat(),
279
+ agents=agents,
280
+ components=components,
281
+ shared_files=shared_files,
282
+ total_instruction_files=len(instruction_files),
283
+ total_references=total_refs,
284
+ )
285
+
286
+
287
+ def generate_backbone_yaml(result: DiscoveryResult) -> str:
288
+ """
289
+ Generate backbone.yml content from discovery result.
290
+
291
+ Args:
292
+ result: Discovery result
293
+
294
+ Returns:
295
+ YAML string for backbone file
296
+ """
297
+ data: dict[str, Any] = {
298
+ "version": 1,
299
+ "generated_at": result.discovered_at,
300
+ "generator": "ails discover",
301
+ "target": str(result.target),
302
+ "agents": {},
303
+ "components": {},
304
+ "shared": result.shared_files,
305
+ "stats": {
306
+ "total_instruction_files": result.total_instruction_files,
307
+ "total_components": len(result.components),
308
+ "total_references": result.total_references,
309
+ },
310
+ }
311
+
312
+ # Add detected agents
313
+ for agent in result.agents:
314
+ data["agents"][agent.agent_type.id] = {
315
+ "name": agent.agent_type.name,
316
+ "instruction_files": [
317
+ str(f.relative_to(result.target)) for f in agent.instruction_files
318
+ ],
319
+ "config_files": [str(f.relative_to(result.target)) for f in agent.config_files],
320
+ "rule_files": [str(f.relative_to(result.target)) for f in agent.rule_files],
321
+ }
322
+
323
+ # Add components
324
+ for comp_id, component in sorted(result.components.items()):
325
+ comp_data: dict[str, Any] = {
326
+ "root": str(component.root.relative_to(result.target)),
327
+ "instruction_files": [
328
+ str(f.relative_to(result.target)) for f in component.instruction_files
329
+ ],
330
+ "content_hash": component.content_hash,
331
+ }
332
+ if component.imports:
333
+ comp_data["imports"] = component.imports
334
+ if component.parent:
335
+ comp_data["parent"] = component.parent
336
+ if component.children:
337
+ comp_data["children"] = component.children
338
+
339
+ data["components"][comp_id] = comp_data
340
+
341
+ yaml_output: str = yaml.dump(data, default_flow_style=False, sort_keys=False, allow_unicode=True)
342
+ return yaml_output
343
+
344
+
345
+ def save_backbone(target: Path, content: str) -> Path:
346
+ """
347
+ Save backbone.yml to target's .reporails directory.
348
+
349
+ Args:
350
+ target: Project root
351
+ content: YAML content
352
+
353
+ Returns:
354
+ Path to saved file
355
+ """
356
+ backbone_dir = target / ".reporails"
357
+ backbone_dir.mkdir(parents=True, exist_ok=True)
358
+
359
+ backbone_path = backbone_dir / "backbone.yml"
360
+ backbone_path.write_text(content, encoding="utf-8")
361
+
362
+ return backbone_path