raise-cli 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. raise_cli/__init__.py +38 -0
  2. raise_cli/__main__.py +30 -0
  3. raise_cli/adapters/__init__.py +91 -0
  4. raise_cli/adapters/declarative/__init__.py +26 -0
  5. raise_cli/adapters/declarative/adapter.py +267 -0
  6. raise_cli/adapters/declarative/discovery.py +94 -0
  7. raise_cli/adapters/declarative/expressions.py +150 -0
  8. raise_cli/adapters/declarative/reference/__init__.py +1 -0
  9. raise_cli/adapters/declarative/reference/github.yaml +143 -0
  10. raise_cli/adapters/declarative/schema.py +98 -0
  11. raise_cli/adapters/filesystem.py +299 -0
  12. raise_cli/adapters/mcp_bridge.py +10 -0
  13. raise_cli/adapters/mcp_confluence.py +246 -0
  14. raise_cli/adapters/mcp_jira.py +405 -0
  15. raise_cli/adapters/models.py +205 -0
  16. raise_cli/adapters/protocols.py +180 -0
  17. raise_cli/adapters/registry.py +90 -0
  18. raise_cli/adapters/sync.py +149 -0
  19. raise_cli/agents/__init__.py +14 -0
  20. raise_cli/agents/antigravity.yaml +8 -0
  21. raise_cli/agents/claude.yaml +8 -0
  22. raise_cli/agents/copilot.yaml +8 -0
  23. raise_cli/agents/copilot_plugin.py +124 -0
  24. raise_cli/agents/cursor.yaml +7 -0
  25. raise_cli/agents/roo.yaml +8 -0
  26. raise_cli/agents/windsurf.yaml +8 -0
  27. raise_cli/artifacts/__init__.py +30 -0
  28. raise_cli/artifacts/models.py +43 -0
  29. raise_cli/artifacts/reader.py +55 -0
  30. raise_cli/artifacts/renderer.py +104 -0
  31. raise_cli/artifacts/story_design.py +69 -0
  32. raise_cli/artifacts/writer.py +45 -0
  33. raise_cli/backlog/__init__.py +1 -0
  34. raise_cli/backlog/sync.py +115 -0
  35. raise_cli/cli/__init__.py +3 -0
  36. raise_cli/cli/commands/__init__.py +3 -0
  37. raise_cli/cli/commands/_resolve.py +153 -0
  38. raise_cli/cli/commands/adapters.py +362 -0
  39. raise_cli/cli/commands/artifact.py +137 -0
  40. raise_cli/cli/commands/backlog.py +333 -0
  41. raise_cli/cli/commands/base.py +31 -0
  42. raise_cli/cli/commands/discover.py +551 -0
  43. raise_cli/cli/commands/docs.py +130 -0
  44. raise_cli/cli/commands/doctor.py +177 -0
  45. raise_cli/cli/commands/gate.py +223 -0
  46. raise_cli/cli/commands/graph.py +1086 -0
  47. raise_cli/cli/commands/info.py +81 -0
  48. raise_cli/cli/commands/init.py +746 -0
  49. raise_cli/cli/commands/journal.py +167 -0
  50. raise_cli/cli/commands/mcp.py +524 -0
  51. raise_cli/cli/commands/memory.py +467 -0
  52. raise_cli/cli/commands/pattern.py +348 -0
  53. raise_cli/cli/commands/profile.py +59 -0
  54. raise_cli/cli/commands/publish.py +80 -0
  55. raise_cli/cli/commands/release.py +338 -0
  56. raise_cli/cli/commands/session.py +528 -0
  57. raise_cli/cli/commands/signal.py +410 -0
  58. raise_cli/cli/commands/skill.py +350 -0
  59. raise_cli/cli/commands/skill_set.py +145 -0
  60. raise_cli/cli/error_handler.py +158 -0
  61. raise_cli/cli/main.py +163 -0
  62. raise_cli/compat.py +66 -0
  63. raise_cli/config/__init__.py +41 -0
  64. raise_cli/config/agent_plugin.py +105 -0
  65. raise_cli/config/agent_registry.py +233 -0
  66. raise_cli/config/agents.py +120 -0
  67. raise_cli/config/ide.py +32 -0
  68. raise_cli/config/paths.py +379 -0
  69. raise_cli/config/settings.py +180 -0
  70. raise_cli/context/__init__.py +42 -0
  71. raise_cli/context/analyzers/__init__.py +16 -0
  72. raise_cli/context/analyzers/models.py +36 -0
  73. raise_cli/context/analyzers/protocol.py +43 -0
  74. raise_cli/context/analyzers/python.py +292 -0
  75. raise_cli/context/builder.py +1569 -0
  76. raise_cli/context/diff.py +213 -0
  77. raise_cli/context/extractors/__init__.py +13 -0
  78. raise_cli/context/extractors/skills.py +121 -0
  79. raise_cli/core/__init__.py +37 -0
  80. raise_cli/core/files.py +66 -0
  81. raise_cli/core/text.py +174 -0
  82. raise_cli/core/tools.py +441 -0
  83. raise_cli/discovery/__init__.py +50 -0
  84. raise_cli/discovery/analyzer.py +691 -0
  85. raise_cli/discovery/drift.py +355 -0
  86. raise_cli/discovery/scanner.py +1687 -0
  87. raise_cli/doctor/__init__.py +4 -0
  88. raise_cli/doctor/checks/__init__.py +1 -0
  89. raise_cli/doctor/checks/environment.py +110 -0
  90. raise_cli/doctor/checks/project.py +238 -0
  91. raise_cli/doctor/fix.py +80 -0
  92. raise_cli/doctor/models.py +56 -0
  93. raise_cli/doctor/protocol.py +43 -0
  94. raise_cli/doctor/registry.py +100 -0
  95. raise_cli/doctor/report.py +141 -0
  96. raise_cli/doctor/runner.py +95 -0
  97. raise_cli/engines/__init__.py +3 -0
  98. raise_cli/exceptions.py +215 -0
  99. raise_cli/gates/__init__.py +19 -0
  100. raise_cli/gates/builtin/__init__.py +1 -0
  101. raise_cli/gates/builtin/coverage.py +52 -0
  102. raise_cli/gates/builtin/lint.py +48 -0
  103. raise_cli/gates/builtin/tests.py +48 -0
  104. raise_cli/gates/builtin/types.py +48 -0
  105. raise_cli/gates/models.py +40 -0
  106. raise_cli/gates/protocol.py +41 -0
  107. raise_cli/gates/registry.py +141 -0
  108. raise_cli/governance/__init__.py +11 -0
  109. raise_cli/governance/extractor.py +412 -0
  110. raise_cli/governance/models.py +134 -0
  111. raise_cli/governance/parsers/__init__.py +35 -0
  112. raise_cli/governance/parsers/_convert.py +38 -0
  113. raise_cli/governance/parsers/adr.py +274 -0
  114. raise_cli/governance/parsers/backlog.py +356 -0
  115. raise_cli/governance/parsers/constitution.py +119 -0
  116. raise_cli/governance/parsers/epic.py +323 -0
  117. raise_cli/governance/parsers/glossary.py +316 -0
  118. raise_cli/governance/parsers/guardrails.py +345 -0
  119. raise_cli/governance/parsers/prd.py +112 -0
  120. raise_cli/governance/parsers/roadmap.py +118 -0
  121. raise_cli/governance/parsers/vision.py +116 -0
  122. raise_cli/graph/__init__.py +1 -0
  123. raise_cli/graph/backends/__init__.py +57 -0
  124. raise_cli/graph/backends/api.py +137 -0
  125. raise_cli/graph/backends/dual.py +139 -0
  126. raise_cli/graph/backends/pending.py +84 -0
  127. raise_cli/handlers/__init__.py +3 -0
  128. raise_cli/hooks/__init__.py +54 -0
  129. raise_cli/hooks/builtin/__init__.py +1 -0
  130. raise_cli/hooks/builtin/backlog.py +216 -0
  131. raise_cli/hooks/builtin/gate_bridge.py +83 -0
  132. raise_cli/hooks/builtin/jira_sync.py +127 -0
  133. raise_cli/hooks/builtin/memory.py +117 -0
  134. raise_cli/hooks/builtin/telemetry.py +72 -0
  135. raise_cli/hooks/emitter.py +184 -0
  136. raise_cli/hooks/events.py +262 -0
  137. raise_cli/hooks/protocol.py +38 -0
  138. raise_cli/hooks/registry.py +117 -0
  139. raise_cli/mcp/__init__.py +33 -0
  140. raise_cli/mcp/bridge.py +218 -0
  141. raise_cli/mcp/models.py +43 -0
  142. raise_cli/mcp/registry.py +77 -0
  143. raise_cli/mcp/schema.py +41 -0
  144. raise_cli/memory/__init__.py +58 -0
  145. raise_cli/memory/loader.py +247 -0
  146. raise_cli/memory/migration.py +241 -0
  147. raise_cli/memory/models.py +169 -0
  148. raise_cli/memory/writer.py +598 -0
  149. raise_cli/onboarding/__init__.py +103 -0
  150. raise_cli/onboarding/bootstrap.py +324 -0
  151. raise_cli/onboarding/claudemd.py +17 -0
  152. raise_cli/onboarding/conventions.py +742 -0
  153. raise_cli/onboarding/detection.py +374 -0
  154. raise_cli/onboarding/governance.py +443 -0
  155. raise_cli/onboarding/instructions.py +672 -0
  156. raise_cli/onboarding/manifest.py +201 -0
  157. raise_cli/onboarding/memory_md.py +399 -0
  158. raise_cli/onboarding/migration.py +207 -0
  159. raise_cli/onboarding/profile.py +624 -0
  160. raise_cli/onboarding/skill_conflict.py +100 -0
  161. raise_cli/onboarding/skill_manifest.py +176 -0
  162. raise_cli/onboarding/skills.py +437 -0
  163. raise_cli/onboarding/workflows.py +101 -0
  164. raise_cli/output/__init__.py +28 -0
  165. raise_cli/output/console.py +394 -0
  166. raise_cli/output/formatters/__init__.py +9 -0
  167. raise_cli/output/formatters/adapters.py +135 -0
  168. raise_cli/output/formatters/discover.py +439 -0
  169. raise_cli/output/formatters/skill.py +298 -0
  170. raise_cli/publish/__init__.py +3 -0
  171. raise_cli/publish/changelog.py +80 -0
  172. raise_cli/publish/check.py +179 -0
  173. raise_cli/publish/version.py +172 -0
  174. raise_cli/rai_base/__init__.py +22 -0
  175. raise_cli/rai_base/framework/__init__.py +7 -0
  176. raise_cli/rai_base/framework/methodology.yaml +233 -0
  177. raise_cli/rai_base/governance/__init__.py +1 -0
  178. raise_cli/rai_base/governance/architecture/__init__.py +1 -0
  179. raise_cli/rai_base/governance/architecture/domain-model.md +20 -0
  180. raise_cli/rai_base/governance/architecture/system-context.md +34 -0
  181. raise_cli/rai_base/governance/architecture/system-design.md +24 -0
  182. raise_cli/rai_base/governance/backlog.md +8 -0
  183. raise_cli/rai_base/governance/guardrails.md +17 -0
  184. raise_cli/rai_base/governance/prd.md +25 -0
  185. raise_cli/rai_base/governance/vision.md +16 -0
  186. raise_cli/rai_base/identity/__init__.py +8 -0
  187. raise_cli/rai_base/identity/core.md +119 -0
  188. raise_cli/rai_base/identity/perspective.md +119 -0
  189. raise_cli/rai_base/memory/__init__.py +7 -0
  190. raise_cli/rai_base/memory/patterns-base.jsonl +55 -0
  191. raise_cli/schemas/__init__.py +3 -0
  192. raise_cli/schemas/journal.py +49 -0
  193. raise_cli/schemas/session_state.py +117 -0
  194. raise_cli/session/__init__.py +5 -0
  195. raise_cli/session/bundle.py +820 -0
  196. raise_cli/session/close.py +268 -0
  197. raise_cli/session/journal.py +119 -0
  198. raise_cli/session/resolver.py +126 -0
  199. raise_cli/session/state.py +187 -0
  200. raise_cli/skills/__init__.py +44 -0
  201. raise_cli/skills/locator.py +141 -0
  202. raise_cli/skills/name_checker.py +199 -0
  203. raise_cli/skills/parser.py +145 -0
  204. raise_cli/skills/scaffold.py +212 -0
  205. raise_cli/skills/schema.py +132 -0
  206. raise_cli/skills/skillsets.py +195 -0
  207. raise_cli/skills/validator.py +197 -0
  208. raise_cli/skills_base/__init__.py +80 -0
  209. raise_cli/skills_base/contract-template.md +60 -0
  210. raise_cli/skills_base/preamble.md +37 -0
  211. raise_cli/skills_base/rai-architecture-review/SKILL.md +137 -0
  212. raise_cli/skills_base/rai-debug/SKILL.md +171 -0
  213. raise_cli/skills_base/rai-discover/SKILL.md +167 -0
  214. raise_cli/skills_base/rai-discover-document/SKILL.md +128 -0
  215. raise_cli/skills_base/rai-discover-scan/SKILL.md +147 -0
  216. raise_cli/skills_base/rai-discover-start/SKILL.md +145 -0
  217. raise_cli/skills_base/rai-discover-validate/SKILL.md +142 -0
  218. raise_cli/skills_base/rai-docs-update/SKILL.md +142 -0
  219. raise_cli/skills_base/rai-doctor/SKILL.md +120 -0
  220. raise_cli/skills_base/rai-epic-close/SKILL.md +165 -0
  221. raise_cli/skills_base/rai-epic-close/templates/retrospective.md +68 -0
  222. raise_cli/skills_base/rai-epic-design/SKILL.md +146 -0
  223. raise_cli/skills_base/rai-epic-design/templates/design.md +24 -0
  224. raise_cli/skills_base/rai-epic-design/templates/scope.md +76 -0
  225. raise_cli/skills_base/rai-epic-plan/SKILL.md +153 -0
  226. raise_cli/skills_base/rai-epic-plan/_references/sequencing-strategies.md +67 -0
  227. raise_cli/skills_base/rai-epic-plan/templates/plan-section.md +49 -0
  228. raise_cli/skills_base/rai-epic-run/SKILL.md +208 -0
  229. raise_cli/skills_base/rai-epic-start/SKILL.md +136 -0
  230. raise_cli/skills_base/rai-epic-start/templates/brief.md +34 -0
  231. raise_cli/skills_base/rai-mcp-add/SKILL.md +176 -0
  232. raise_cli/skills_base/rai-mcp-remove/SKILL.md +120 -0
  233. raise_cli/skills_base/rai-mcp-status/SKILL.md +147 -0
  234. raise_cli/skills_base/rai-problem-shape/SKILL.md +138 -0
  235. raise_cli/skills_base/rai-project-create/SKILL.md +144 -0
  236. raise_cli/skills_base/rai-project-onboard/SKILL.md +162 -0
  237. raise_cli/skills_base/rai-quality-review/SKILL.md +189 -0
  238. raise_cli/skills_base/rai-research/SKILL.md +143 -0
  239. raise_cli/skills_base/rai-research/references/research-prompt-template.md +317 -0
  240. raise_cli/skills_base/rai-session-close/SKILL.md +176 -0
  241. raise_cli/skills_base/rai-session-start/SKILL.md +110 -0
  242. raise_cli/skills_base/rai-story-close/SKILL.md +198 -0
  243. raise_cli/skills_base/rai-story-design/SKILL.md +203 -0
  244. raise_cli/skills_base/rai-story-design/references/tech-design-story-v2.md +293 -0
  245. raise_cli/skills_base/rai-story-implement/SKILL.md +115 -0
  246. raise_cli/skills_base/rai-story-plan/SKILL.md +135 -0
  247. raise_cli/skills_base/rai-story-review/SKILL.md +178 -0
  248. raise_cli/skills_base/rai-story-run/SKILL.md +282 -0
  249. raise_cli/skills_base/rai-story-start/SKILL.md +166 -0
  250. raise_cli/skills_base/rai-story-start/templates/story.md +38 -0
  251. raise_cli/skills_base/rai-welcome/SKILL.md +134 -0
  252. raise_cli/telemetry/__init__.py +42 -0
  253. raise_cli/telemetry/schemas.py +285 -0
  254. raise_cli/telemetry/writer.py +217 -0
  255. raise_cli/tier/__init__.py +0 -0
  256. raise_cli/tier/context.py +134 -0
  257. raise_cli/viz/__init__.py +7 -0
  258. raise_cli/viz/generator.py +406 -0
  259. raise_cli-2.2.1.dist-info/METADATA +433 -0
  260. raise_cli-2.2.1.dist-info/RECORD +264 -0
  261. raise_cli-2.2.1.dist-info/WHEEL +4 -0
  262. raise_cli-2.2.1.dist-info/entry_points.txt +40 -0
  263. raise_cli-2.2.1.dist-info/licenses/LICENSE +190 -0
  264. raise_cli-2.2.1.dist-info/licenses/NOTICE +4 -0
@@ -0,0 +1,742 @@
1
+ """Convention detection for brownfield projects.
2
+
3
+ Detects code style, naming, and structure conventions by analyzing source files
4
+ and reporting findings with confidence scores.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ import time
11
+ from collections import Counter
12
+ from enum import StrEnum
13
+ from pathlib import Path
14
+ from typing import Literal
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+ from raise_cli.core.files import should_exclude_dir
19
+
20
+
21
+ class Confidence(StrEnum):
22
+ """Confidence level for a detected convention.
23
+
24
+ Confidence is based on both consistency ratio and sample size:
25
+ - HIGH: >90% consistency AND >10 samples
26
+ - MEDIUM: 70-90% consistency OR 5-10 samples
27
+ - LOW: <70% consistency OR <5 samples
28
+ """
29
+
30
+ HIGH = "high"
31
+ MEDIUM = "medium"
32
+ LOW = "low"
33
+
34
+
35
+ class IndentationConvention(BaseModel):
36
+ """Detected indentation convention.
37
+
38
+ Attributes:
39
+ style: Whether the project uses spaces, tabs, or mixed.
40
+ width: Indentation width in spaces (None if tabs or mixed).
41
+ confidence: How confident we are in this detection.
42
+ sample_count: Number of files analyzed.
43
+ consistent_count: Number of files matching the detected convention.
44
+ """
45
+
46
+ style: Literal["spaces", "tabs", "mixed"]
47
+ width: int | None = None
48
+ confidence: Confidence
49
+ sample_count: int
50
+ consistent_count: int
51
+
52
+
53
+ class QuoteConvention(BaseModel):
54
+ """Detected string quote style convention.
55
+
56
+ Attributes:
57
+ style: Whether single quotes, double quotes, or mixed.
58
+ confidence: How confident we are in this detection.
59
+ sample_count: Number of string literals analyzed.
60
+ consistent_count: Number matching the detected style.
61
+ """
62
+
63
+ style: Literal["single", "double", "mixed"]
64
+ confidence: Confidence
65
+ sample_count: int
66
+ consistent_count: int
67
+
68
+
69
+ class LineLengthConvention(BaseModel):
70
+ """Detected line length convention.
71
+
72
+ Attributes:
73
+ max_length: The 80th percentile line length (typical max).
74
+ confidence: How confident we are in this detection.
75
+ sample_count: Number of lines analyzed.
76
+ """
77
+
78
+ max_length: int
79
+ confidence: Confidence
80
+ sample_count: int
81
+
82
+
83
+ class StyleConventions(BaseModel):
84
+ """Code style conventions detected in the project.
85
+
86
+ Groups indentation, quote style, and line length conventions.
87
+ """
88
+
89
+ indentation: IndentationConvention
90
+ quote_style: QuoteConvention
91
+ line_length: LineLengthConvention
92
+
93
+
94
+ class NamingConvention(BaseModel):
95
+ """Detected naming pattern for a symbol type.
96
+
97
+ Attributes:
98
+ pattern: The detected naming pattern (snake_case, PascalCase, etc.).
99
+ confidence: How confident we are in this detection.
100
+ sample_count: Number of symbols analyzed.
101
+ consistent_count: Number matching the detected pattern.
102
+ """
103
+
104
+ pattern: Literal[
105
+ "snake_case", "camelCase", "PascalCase", "UPPER_SNAKE_CASE", "mixed"
106
+ ]
107
+ confidence: Confidence
108
+ sample_count: int
109
+ consistent_count: int
110
+
111
+
112
+ class NamingConventions(BaseModel):
113
+ """Naming conventions by symbol type.
114
+
115
+ Tracks separate conventions for functions, classes, and constants
116
+ since each typically follows different patterns.
117
+ """
118
+
119
+ functions: NamingConvention
120
+ classes: NamingConvention
121
+ constants: NamingConvention
122
+
123
+
124
+ class StructureConventions(BaseModel):
125
+ """Project structure conventions.
126
+
127
+ Attributes:
128
+ source_dir: Detected source root (e.g., "src/mypackage").
129
+ test_dir: Detected test directory (e.g., "tests").
130
+ has_src_layout: Whether using the src/ layout pattern.
131
+ common_patterns: Recurring directory patterns found.
132
+ """
133
+
134
+ source_dir: str | None = None
135
+ test_dir: str | None = None
136
+ has_src_layout: bool = False
137
+ common_patterns: list[str] = Field(default_factory=list)
138
+
139
+
140
+ class ConventionResult(BaseModel):
141
+ """Complete result of convention detection.
142
+
143
+ This is the main return type from detect_conventions().
144
+
145
+ Attributes:
146
+ style: Code style conventions (indentation, quotes, line length).
147
+ naming: Naming conventions by symbol type.
148
+ structure: Project structure conventions.
149
+ overall_confidence: Lowest confidence across key conventions.
150
+ files_analyzed: Total Python files analyzed.
151
+ analysis_time_ms: Time taken for analysis in milliseconds.
152
+ """
153
+
154
+ style: StyleConventions
155
+ naming: NamingConventions
156
+ structure: StructureConventions
157
+ overall_confidence: Confidence
158
+ files_analyzed: int
159
+ analysis_time_ms: int = 0
160
+
161
+
162
+ # =============================================================================
163
+ # Confidence Calculation
164
+ # =============================================================================
165
+
166
+
167
+ def calculate_confidence(consistent: int, total: int) -> Confidence:
168
+ """Calculate confidence based on consistency ratio and sample size.
169
+
170
+ The confidence algorithm accounts for both how consistent the codebase is
171
+ AND how much data we have to make that determination:
172
+
173
+ 1. <5 samples → always LOW (insufficient data to draw conclusions)
174
+ 2. 5-10 samples → cap at MEDIUM (small sample, even if 100% consistent)
175
+ 3. >10 samples → ratio determines confidence:
176
+ - >90% consistent → HIGH
177
+ - 70-90% consistent → MEDIUM
178
+ - <70% consistent → LOW
179
+
180
+ Args:
181
+ consistent: Number of samples matching the detected convention.
182
+ total: Total number of samples analyzed.
183
+
184
+ Returns:
185
+ Confidence level (HIGH, MEDIUM, or LOW).
186
+
187
+ Examples:
188
+ >>> calculate_confidence(4, 4) # 100% but only 4 samples
189
+ Confidence.LOW
190
+ >>> calculate_confidence(10, 10) # 100% but only 10 samples
191
+ Confidence.MEDIUM
192
+ >>> calculate_confidence(95, 100) # 95% with 100 samples
193
+ Confidence.HIGH
194
+ """
195
+ # Edge case: no samples
196
+ if total == 0:
197
+ return Confidence.LOW
198
+
199
+ # Rule 1: <5 samples = insufficient data
200
+ if total < 5:
201
+ return Confidence.LOW
202
+
203
+ ratio = consistent / total
204
+
205
+ # Rule 2: 5-10 samples = cap at MEDIUM
206
+ if total <= 10:
207
+ return Confidence.MEDIUM if ratio >= 0.7 else Confidence.LOW
208
+
209
+ # Rule 3: >10 samples = ratio determines confidence
210
+ if ratio > 0.9:
211
+ return Confidence.HIGH
212
+ elif ratio >= 0.7:
213
+ return Confidence.MEDIUM
214
+ else:
215
+ return Confidence.LOW
216
+
217
+
218
+ # =============================================================================
219
+ # File Collection
220
+ # =============================================================================
221
+
222
+
223
+ def collect_python_files(directory: Path, max_files: int = 200) -> list[Path]:
224
+ """Collect Python files from a directory recursively.
225
+
226
+ Args:
227
+ directory: Root directory to scan.
228
+ max_files: Maximum number of files to return (for performance).
229
+
230
+ Returns:
231
+ List of paths to Python files.
232
+ """
233
+ if not directory.is_dir():
234
+ return []
235
+
236
+ files: list[Path] = []
237
+
238
+ def _collect(path: Path) -> None:
239
+ if len(files) >= max_files:
240
+ return
241
+ try:
242
+ for item in path.iterdir():
243
+ if len(files) >= max_files:
244
+ return
245
+ if item.is_dir():
246
+ if not should_exclude_dir(item):
247
+ _collect(item)
248
+ elif item.is_file() and item.suffix == ".py":
249
+ files.append(item)
250
+ except PermissionError:
251
+ pass
252
+
253
+ _collect(directory)
254
+ return files
255
+
256
+
257
+ # =============================================================================
258
+ # Style Detection
259
+ # =============================================================================
260
+
261
+
262
+ def _get_first_indent(file_path: Path) -> tuple[str, int] | None:
263
+ """Get the first indentation character and width from a file.
264
+
265
+ Returns:
266
+ Tuple of (char, width) where char is 'tab' or 'space', or None if no indent found.
267
+ """
268
+ try:
269
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
270
+ except (OSError, UnicodeDecodeError):
271
+ return None
272
+
273
+ for line in content.splitlines():
274
+ if not line or line[0] not in (" ", "\t"):
275
+ continue
276
+ if line[0] == "\t":
277
+ return ("tab", 0)
278
+ # Count leading spaces
279
+ stripped = line.lstrip(" ")
280
+ indent = len(line) - len(stripped)
281
+ if indent > 0:
282
+ return ("space", indent)
283
+
284
+ return None
285
+
286
+
287
+ def _determine_indent_style(
288
+ tabs_count: int, spaces_count: int, indent_widths: list[int]
289
+ ) -> tuple[Literal["spaces", "tabs", "mixed"], int | None, int]:
290
+ """Determine indentation style from collected samples.
291
+
292
+ Returns:
293
+ Tuple of (style, width, consistent_count).
294
+ """
295
+ if tabs_count > 0 and spaces_count > 0:
296
+ return ("mixed", None, 0)
297
+
298
+ if tabs_count > spaces_count:
299
+ return ("tabs", None, tabs_count)
300
+
301
+ # Spaces style - find most common width
302
+ if indent_widths:
303
+ width_counts = Counter(indent_widths)
304
+ width = width_counts.most_common(1)[0][0]
305
+ consistent = width_counts[width]
306
+ return ("spaces", width, consistent)
307
+
308
+ return ("spaces", 4, spaces_count)
309
+
310
+
311
+ def detect_indentation(files: list[Path]) -> IndentationConvention:
312
+ """Detect indentation convention from Python files.
313
+
314
+ Analyzes the first indented line of each file to determine
315
+ whether spaces or tabs are used, and what width.
316
+
317
+ Args:
318
+ files: List of Python files to analyze.
319
+
320
+ Returns:
321
+ IndentationConvention with detected style and confidence.
322
+ """
323
+ indent_widths: list[int] = []
324
+ tabs_count = 0
325
+ spaces_count = 0
326
+
327
+ for file_path in files:
328
+ result = _get_first_indent(file_path)
329
+ if result is None:
330
+ continue
331
+ char_type, width = result
332
+ if char_type == "tab":
333
+ tabs_count += 1
334
+ else:
335
+ spaces_count += 1
336
+ indent_widths.append(width)
337
+
338
+ total = tabs_count + spaces_count
339
+ if total == 0:
340
+ return IndentationConvention(
341
+ style="spaces",
342
+ width=4,
343
+ confidence=Confidence.LOW,
344
+ sample_count=0,
345
+ consistent_count=0,
346
+ )
347
+
348
+ style, width, consistent = _determine_indent_style(
349
+ tabs_count, spaces_count, indent_widths
350
+ )
351
+ confidence = calculate_confidence(consistent, total)
352
+
353
+ return IndentationConvention(
354
+ style=style,
355
+ width=width,
356
+ confidence=confidence,
357
+ sample_count=total,
358
+ consistent_count=consistent,
359
+ )
360
+
361
+
362
+ # Regex to find string literals (simple version - not perfect but good enough)
363
+ STRING_PATTERN = re.compile(r"""(?<!\\)(["'])(?:(?!\1|\\).|\\.)*\1""")
364
+
365
+
366
+ def detect_quotes(files: list[Path]) -> QuoteConvention:
367
+ """Detect quote style convention from Python files.
368
+
369
+ Analyzes string literals to determine whether single or double
370
+ quotes are preferred.
371
+
372
+ Args:
373
+ files: List of Python files to analyze.
374
+
375
+ Returns:
376
+ QuoteConvention with detected style and confidence.
377
+ """
378
+ single_count = 0
379
+ double_count = 0
380
+
381
+ for file_path in files:
382
+ try:
383
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
384
+ # Skip docstrings (triple quotes) and find regular strings
385
+ # Simple heuristic: count ' vs " as string delimiters
386
+ for match in STRING_PATTERN.finditer(content):
387
+ quote_char = match.group(1)
388
+ if quote_char == "'":
389
+ single_count += 1
390
+ else:
391
+ double_count += 1
392
+ except (OSError, UnicodeDecodeError):
393
+ continue
394
+
395
+ total = single_count + double_count
396
+
397
+ if total == 0:
398
+ return QuoteConvention(
399
+ style="double",
400
+ confidence=Confidence.LOW,
401
+ sample_count=0,
402
+ consistent_count=0,
403
+ )
404
+
405
+ # Determine style
406
+ if single_count > 0 and double_count > 0:
407
+ ratio = max(single_count, double_count) / total
408
+ if ratio < 0.7:
409
+ style: Literal["single", "double", "mixed"] = "mixed"
410
+ consistent = 0
411
+ elif single_count > double_count:
412
+ style = "single"
413
+ consistent = single_count
414
+ else:
415
+ style = "double"
416
+ consistent = double_count
417
+ elif single_count > double_count:
418
+ style = "single"
419
+ consistent = single_count
420
+ else:
421
+ style = "double"
422
+ consistent = double_count
423
+
424
+ confidence = calculate_confidence(consistent, total)
425
+
426
+ return QuoteConvention(
427
+ style=style,
428
+ confidence=confidence,
429
+ sample_count=total,
430
+ consistent_count=consistent,
431
+ )
432
+
433
+
434
+ def detect_line_length(files: list[Path]) -> LineLengthConvention:
435
+ """Detect line length convention from Python files.
436
+
437
+ Uses the 80th percentile of line lengths as the typical maximum.
438
+
439
+ Args:
440
+ files: List of Python files to analyze.
441
+
442
+ Returns:
443
+ LineLengthConvention with detected max length and confidence.
444
+ """
445
+ line_lengths: list[int] = []
446
+
447
+ for file_path in files:
448
+ try:
449
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
450
+ for line in content.splitlines():
451
+ # Skip empty lines and comments for better signal
452
+ stripped = line.strip()
453
+ if stripped and not stripped.startswith("#"):
454
+ line_lengths.append(len(line))
455
+ except (OSError, UnicodeDecodeError):
456
+ continue
457
+
458
+ if not line_lengths:
459
+ return LineLengthConvention(
460
+ max_length=88,
461
+ confidence=Confidence.LOW,
462
+ sample_count=0,
463
+ )
464
+
465
+ # Calculate 80th percentile
466
+ line_lengths.sort()
467
+ idx = int(len(line_lengths) * 0.8)
468
+ max_length = line_lengths[idx] if idx < len(line_lengths) else line_lengths[-1]
469
+
470
+ # Round to common values (79, 88, 100, 120)
471
+ common_lengths = [79, 88, 100, 120]
472
+ max_length = min(common_lengths, key=lambda x: abs(x - max_length))
473
+
474
+ # Confidence based on sample size
475
+ confidence = calculate_confidence(len(line_lengths), len(line_lengths))
476
+
477
+ return LineLengthConvention(
478
+ max_length=max_length,
479
+ confidence=confidence,
480
+ sample_count=len(line_lengths),
481
+ )
482
+
483
+
484
+ # =============================================================================
485
+ # Naming Detection
486
+ # =============================================================================
487
+
488
+
489
+ # Regex patterns for extracting names
490
+ FUNCTION_PATTERN = re.compile(r"^def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(", re.MULTILINE)
491
+ CLASS_PATTERN = re.compile(r"^class\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*[\(:]", re.MULTILINE)
492
+ CONSTANT_PATTERN = re.compile(r"^([A-Z][A-Z0-9_]*)\s*[=:]", re.MULTILINE)
493
+
494
+
495
+ def classify_name(
496
+ name: str,
497
+ ) -> Literal["snake_case", "camelCase", "PascalCase", "UPPER_SNAKE_CASE", "mixed"]:
498
+ """Classify a name into its naming pattern.
499
+
500
+ Args:
501
+ name: The identifier name to classify.
502
+
503
+ Returns:
504
+ The detected naming pattern.
505
+ """
506
+ # Skip private/dunder names for classification
507
+ if name.startswith("_"):
508
+ name = name.lstrip("_")
509
+ if not name:
510
+ return "snake_case"
511
+
512
+ # UPPER_SNAKE_CASE requires underscore OR multiple uppercase chars
513
+ # (single uppercase letter like "X" is PascalCase)
514
+ if re.match(r"^[A-Z][A-Z0-9_]*$", name) and (len(name) > 1 or "_" in name):
515
+ return "UPPER_SNAKE_CASE"
516
+ if re.match(r"^[A-Z][a-zA-Z0-9]*$", name):
517
+ return "PascalCase"
518
+ if re.match(r"^[a-z][a-z0-9_]*$", name):
519
+ return "snake_case"
520
+ if re.match(r"^[a-z][a-zA-Z0-9]*$", name):
521
+ return "camelCase"
522
+ return "mixed"
523
+
524
+
525
+ def _detect_naming_for_pattern(
526
+ files: list[Path],
527
+ pattern: re.Pattern[str],
528
+ expected_style: Literal[
529
+ "snake_case", "camelCase", "PascalCase", "UPPER_SNAKE_CASE", "mixed"
530
+ ],
531
+ ) -> NamingConvention:
532
+ """Detect naming convention for symbols matching a pattern.
533
+
534
+ Args:
535
+ files: List of Python files to analyze.
536
+ pattern: Regex pattern to extract symbol names.
537
+ expected_style: The expected naming style for this symbol type.
538
+
539
+ Returns:
540
+ NamingConvention with detected pattern and confidence.
541
+ """
542
+ style_counts: Counter[str] = Counter()
543
+
544
+ for file_path in files:
545
+ try:
546
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
547
+ for match in pattern.finditer(content):
548
+ name = match.group(1)
549
+ style = classify_name(name)
550
+ style_counts[style] += 1
551
+ except (OSError, UnicodeDecodeError):
552
+ continue
553
+
554
+ total = sum(style_counts.values())
555
+
556
+ if total == 0:
557
+ return NamingConvention(
558
+ pattern=expected_style,
559
+ confidence=Confidence.LOW,
560
+ sample_count=0,
561
+ consistent_count=0,
562
+ )
563
+
564
+ # Find most common style
565
+ most_common = style_counts.most_common(1)[0]
566
+ detected_pattern = most_common[0]
567
+ consistent = most_common[1]
568
+
569
+ # Ensure it's a valid literal type
570
+ valid_patterns: list[
571
+ Literal["snake_case", "camelCase", "PascalCase", "UPPER_SNAKE_CASE", "mixed"]
572
+ ] = ["snake_case", "camelCase", "PascalCase", "UPPER_SNAKE_CASE", "mixed"]
573
+ if detected_pattern not in valid_patterns:
574
+ detected_pattern = "mixed"
575
+
576
+ confidence = calculate_confidence(consistent, total)
577
+
578
+ return NamingConvention(
579
+ pattern=detected_pattern, # type: ignore[arg-type]
580
+ confidence=confidence,
581
+ sample_count=total,
582
+ consistent_count=consistent,
583
+ )
584
+
585
+
586
+ def detect_naming(files: list[Path]) -> NamingConventions:
587
+ """Detect naming conventions for functions, classes, and constants.
588
+
589
+ Args:
590
+ files: List of Python files to analyze.
591
+
592
+ Returns:
593
+ NamingConventions with detected patterns for each symbol type.
594
+ """
595
+ return NamingConventions(
596
+ functions=_detect_naming_for_pattern(files, FUNCTION_PATTERN, "snake_case"),
597
+ classes=_detect_naming_for_pattern(files, CLASS_PATTERN, "PascalCase"),
598
+ constants=_detect_naming_for_pattern(
599
+ files, CONSTANT_PATTERN, "UPPER_SNAKE_CASE"
600
+ ),
601
+ )
602
+
603
+
604
+ # =============================================================================
605
+ # Structure Detection
606
+ # =============================================================================
607
+
608
+
609
+ def detect_structure(directory: Path) -> StructureConventions:
610
+ """Detect project structure conventions.
611
+
612
+ Identifies source directories, test directories, and common patterns.
613
+
614
+ Args:
615
+ directory: Root directory to analyze.
616
+
617
+ Returns:
618
+ StructureConventions with detected structure.
619
+ """
620
+ source_dir: str | None = None
621
+ test_dir: str | None = None
622
+ has_src_layout = False
623
+ common_patterns: list[str] = []
624
+
625
+ # Check for src/ layout
626
+ src_dir = directory / "src"
627
+ if src_dir.is_dir():
628
+ has_src_layout = True
629
+ # Find package inside src/
630
+ for item in src_dir.iterdir():
631
+ if (
632
+ item.is_dir()
633
+ and not item.name.startswith(".")
634
+ and (item / "__init__.py").exists()
635
+ ):
636
+ source_dir = f"src/{item.name}"
637
+ break
638
+ if not source_dir:
639
+ source_dir = "src"
640
+
641
+ # If no src/, look for package at root
642
+ if not source_dir:
643
+ for item in directory.iterdir():
644
+ if (
645
+ item.is_dir()
646
+ and not item.name.startswith(".")
647
+ and item.name not in {"tests", "test", "docs", "build", "dist"}
648
+ and (item / "__init__.py").exists()
649
+ ):
650
+ source_dir = item.name
651
+ break
652
+
653
+ # Find test directory
654
+ for test_name in ["tests", "test"]:
655
+ test_path = directory / test_name
656
+ if test_path.is_dir():
657
+ test_dir = test_name
658
+ break
659
+
660
+ # Find common patterns (subdirectories that appear in source)
661
+ if source_dir:
662
+ source_path = directory / source_dir
663
+ if source_path.is_dir():
664
+ for item in source_path.iterdir():
665
+ if item.is_dir() and not item.name.startswith("_"):
666
+ common_patterns.append(f"{item.name}/")
667
+
668
+ return StructureConventions(
669
+ source_dir=source_dir,
670
+ test_dir=test_dir,
671
+ has_src_layout=has_src_layout,
672
+ common_patterns=sorted(common_patterns)[:10], # Limit to top 10
673
+ )
674
+
675
+
676
+ # =============================================================================
677
+ # Main Detection Function
678
+ # =============================================================================
679
+
680
+
681
+ def detect_conventions(directory: Path) -> ConventionResult:
682
+ """Detect all conventions in a project directory.
683
+
684
+ This is the main entry point for convention detection. It analyzes
685
+ Python files to detect code style, naming, and structure conventions.
686
+
687
+ Args:
688
+ directory: Root directory of the project to analyze.
689
+
690
+ Returns:
691
+ ConventionResult with all detected conventions and confidence scores.
692
+
693
+ Example:
694
+ >>> result = detect_conventions(Path("/path/to/project"))
695
+ >>> print(result.style.indentation.width) # e.g., 4
696
+ >>> print(result.naming.functions.pattern) # e.g., "snake_case"
697
+ """
698
+ start_time = time.perf_counter()
699
+
700
+ # Collect Python files
701
+ files = collect_python_files(directory)
702
+
703
+ # Detect style conventions
704
+ indentation = detect_indentation(files)
705
+ quotes = detect_quotes(files)
706
+ line_length = detect_line_length(files)
707
+
708
+ style = StyleConventions(
709
+ indentation=indentation,
710
+ quote_style=quotes,
711
+ line_length=line_length,
712
+ )
713
+
714
+ # Detect naming conventions
715
+ naming = detect_naming(files)
716
+
717
+ # Detect structure conventions
718
+ structure = detect_structure(directory)
719
+
720
+ # Calculate overall confidence (lowest of key conventions)
721
+ key_confidences = [
722
+ indentation.confidence,
723
+ naming.functions.confidence,
724
+ ]
725
+
726
+ if Confidence.LOW in key_confidences:
727
+ overall = Confidence.LOW
728
+ elif Confidence.MEDIUM in key_confidences:
729
+ overall = Confidence.MEDIUM
730
+ else:
731
+ overall = Confidence.HIGH
732
+
733
+ elapsed_ms = int((time.perf_counter() - start_time) * 1000)
734
+
735
+ return ConventionResult(
736
+ style=style,
737
+ naming=naming,
738
+ structure=structure,
739
+ overall_confidence=overall,
740
+ files_analyzed=len(files),
741
+ analysis_time_ms=elapsed_ms,
742
+ )