raise-cli 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. raise_cli/__init__.py +38 -0
  2. raise_cli/__main__.py +30 -0
  3. raise_cli/adapters/__init__.py +91 -0
  4. raise_cli/adapters/declarative/__init__.py +26 -0
  5. raise_cli/adapters/declarative/adapter.py +267 -0
  6. raise_cli/adapters/declarative/discovery.py +94 -0
  7. raise_cli/adapters/declarative/expressions.py +150 -0
  8. raise_cli/adapters/declarative/reference/__init__.py +1 -0
  9. raise_cli/adapters/declarative/reference/github.yaml +143 -0
  10. raise_cli/adapters/declarative/schema.py +98 -0
  11. raise_cli/adapters/filesystem.py +299 -0
  12. raise_cli/adapters/mcp_bridge.py +10 -0
  13. raise_cli/adapters/mcp_confluence.py +246 -0
  14. raise_cli/adapters/mcp_jira.py +405 -0
  15. raise_cli/adapters/models.py +205 -0
  16. raise_cli/adapters/protocols.py +180 -0
  17. raise_cli/adapters/registry.py +90 -0
  18. raise_cli/adapters/sync.py +149 -0
  19. raise_cli/agents/__init__.py +14 -0
  20. raise_cli/agents/antigravity.yaml +8 -0
  21. raise_cli/agents/claude.yaml +8 -0
  22. raise_cli/agents/copilot.yaml +8 -0
  23. raise_cli/agents/copilot_plugin.py +124 -0
  24. raise_cli/agents/cursor.yaml +7 -0
  25. raise_cli/agents/roo.yaml +8 -0
  26. raise_cli/agents/windsurf.yaml +8 -0
  27. raise_cli/artifacts/__init__.py +30 -0
  28. raise_cli/artifacts/models.py +43 -0
  29. raise_cli/artifacts/reader.py +55 -0
  30. raise_cli/artifacts/renderer.py +104 -0
  31. raise_cli/artifacts/story_design.py +69 -0
  32. raise_cli/artifacts/writer.py +45 -0
  33. raise_cli/backlog/__init__.py +1 -0
  34. raise_cli/backlog/sync.py +115 -0
  35. raise_cli/cli/__init__.py +3 -0
  36. raise_cli/cli/commands/__init__.py +3 -0
  37. raise_cli/cli/commands/_resolve.py +153 -0
  38. raise_cli/cli/commands/adapters.py +362 -0
  39. raise_cli/cli/commands/artifact.py +137 -0
  40. raise_cli/cli/commands/backlog.py +333 -0
  41. raise_cli/cli/commands/base.py +31 -0
  42. raise_cli/cli/commands/discover.py +551 -0
  43. raise_cli/cli/commands/docs.py +130 -0
  44. raise_cli/cli/commands/doctor.py +177 -0
  45. raise_cli/cli/commands/gate.py +223 -0
  46. raise_cli/cli/commands/graph.py +1086 -0
  47. raise_cli/cli/commands/info.py +81 -0
  48. raise_cli/cli/commands/init.py +746 -0
  49. raise_cli/cli/commands/journal.py +167 -0
  50. raise_cli/cli/commands/mcp.py +524 -0
  51. raise_cli/cli/commands/memory.py +467 -0
  52. raise_cli/cli/commands/pattern.py +348 -0
  53. raise_cli/cli/commands/profile.py +59 -0
  54. raise_cli/cli/commands/publish.py +80 -0
  55. raise_cli/cli/commands/release.py +338 -0
  56. raise_cli/cli/commands/session.py +528 -0
  57. raise_cli/cli/commands/signal.py +410 -0
  58. raise_cli/cli/commands/skill.py +350 -0
  59. raise_cli/cli/commands/skill_set.py +145 -0
  60. raise_cli/cli/error_handler.py +158 -0
  61. raise_cli/cli/main.py +163 -0
  62. raise_cli/compat.py +66 -0
  63. raise_cli/config/__init__.py +41 -0
  64. raise_cli/config/agent_plugin.py +105 -0
  65. raise_cli/config/agent_registry.py +233 -0
  66. raise_cli/config/agents.py +120 -0
  67. raise_cli/config/ide.py +32 -0
  68. raise_cli/config/paths.py +379 -0
  69. raise_cli/config/settings.py +180 -0
  70. raise_cli/context/__init__.py +42 -0
  71. raise_cli/context/analyzers/__init__.py +16 -0
  72. raise_cli/context/analyzers/models.py +36 -0
  73. raise_cli/context/analyzers/protocol.py +43 -0
  74. raise_cli/context/analyzers/python.py +292 -0
  75. raise_cli/context/builder.py +1569 -0
  76. raise_cli/context/diff.py +213 -0
  77. raise_cli/context/extractors/__init__.py +13 -0
  78. raise_cli/context/extractors/skills.py +121 -0
  79. raise_cli/core/__init__.py +37 -0
  80. raise_cli/core/files.py +66 -0
  81. raise_cli/core/text.py +174 -0
  82. raise_cli/core/tools.py +441 -0
  83. raise_cli/discovery/__init__.py +50 -0
  84. raise_cli/discovery/analyzer.py +691 -0
  85. raise_cli/discovery/drift.py +355 -0
  86. raise_cli/discovery/scanner.py +1687 -0
  87. raise_cli/doctor/__init__.py +4 -0
  88. raise_cli/doctor/checks/__init__.py +1 -0
  89. raise_cli/doctor/checks/environment.py +110 -0
  90. raise_cli/doctor/checks/project.py +238 -0
  91. raise_cli/doctor/fix.py +80 -0
  92. raise_cli/doctor/models.py +56 -0
  93. raise_cli/doctor/protocol.py +43 -0
  94. raise_cli/doctor/registry.py +100 -0
  95. raise_cli/doctor/report.py +141 -0
  96. raise_cli/doctor/runner.py +95 -0
  97. raise_cli/engines/__init__.py +3 -0
  98. raise_cli/exceptions.py +215 -0
  99. raise_cli/gates/__init__.py +19 -0
  100. raise_cli/gates/builtin/__init__.py +1 -0
  101. raise_cli/gates/builtin/coverage.py +52 -0
  102. raise_cli/gates/builtin/lint.py +48 -0
  103. raise_cli/gates/builtin/tests.py +48 -0
  104. raise_cli/gates/builtin/types.py +48 -0
  105. raise_cli/gates/models.py +40 -0
  106. raise_cli/gates/protocol.py +41 -0
  107. raise_cli/gates/registry.py +141 -0
  108. raise_cli/governance/__init__.py +11 -0
  109. raise_cli/governance/extractor.py +412 -0
  110. raise_cli/governance/models.py +134 -0
  111. raise_cli/governance/parsers/__init__.py +35 -0
  112. raise_cli/governance/parsers/_convert.py +38 -0
  113. raise_cli/governance/parsers/adr.py +274 -0
  114. raise_cli/governance/parsers/backlog.py +356 -0
  115. raise_cli/governance/parsers/constitution.py +119 -0
  116. raise_cli/governance/parsers/epic.py +323 -0
  117. raise_cli/governance/parsers/glossary.py +316 -0
  118. raise_cli/governance/parsers/guardrails.py +345 -0
  119. raise_cli/governance/parsers/prd.py +112 -0
  120. raise_cli/governance/parsers/roadmap.py +118 -0
  121. raise_cli/governance/parsers/vision.py +116 -0
  122. raise_cli/graph/__init__.py +1 -0
  123. raise_cli/graph/backends/__init__.py +57 -0
  124. raise_cli/graph/backends/api.py +137 -0
  125. raise_cli/graph/backends/dual.py +139 -0
  126. raise_cli/graph/backends/pending.py +84 -0
  127. raise_cli/handlers/__init__.py +3 -0
  128. raise_cli/hooks/__init__.py +54 -0
  129. raise_cli/hooks/builtin/__init__.py +1 -0
  130. raise_cli/hooks/builtin/backlog.py +216 -0
  131. raise_cli/hooks/builtin/gate_bridge.py +83 -0
  132. raise_cli/hooks/builtin/jira_sync.py +127 -0
  133. raise_cli/hooks/builtin/memory.py +117 -0
  134. raise_cli/hooks/builtin/telemetry.py +72 -0
  135. raise_cli/hooks/emitter.py +184 -0
  136. raise_cli/hooks/events.py +262 -0
  137. raise_cli/hooks/protocol.py +38 -0
  138. raise_cli/hooks/registry.py +117 -0
  139. raise_cli/mcp/__init__.py +33 -0
  140. raise_cli/mcp/bridge.py +218 -0
  141. raise_cli/mcp/models.py +43 -0
  142. raise_cli/mcp/registry.py +77 -0
  143. raise_cli/mcp/schema.py +41 -0
  144. raise_cli/memory/__init__.py +58 -0
  145. raise_cli/memory/loader.py +247 -0
  146. raise_cli/memory/migration.py +241 -0
  147. raise_cli/memory/models.py +169 -0
  148. raise_cli/memory/writer.py +598 -0
  149. raise_cli/onboarding/__init__.py +103 -0
  150. raise_cli/onboarding/bootstrap.py +324 -0
  151. raise_cli/onboarding/claudemd.py +17 -0
  152. raise_cli/onboarding/conventions.py +742 -0
  153. raise_cli/onboarding/detection.py +374 -0
  154. raise_cli/onboarding/governance.py +443 -0
  155. raise_cli/onboarding/instructions.py +672 -0
  156. raise_cli/onboarding/manifest.py +201 -0
  157. raise_cli/onboarding/memory_md.py +399 -0
  158. raise_cli/onboarding/migration.py +207 -0
  159. raise_cli/onboarding/profile.py +624 -0
  160. raise_cli/onboarding/skill_conflict.py +100 -0
  161. raise_cli/onboarding/skill_manifest.py +176 -0
  162. raise_cli/onboarding/skills.py +437 -0
  163. raise_cli/onboarding/workflows.py +101 -0
  164. raise_cli/output/__init__.py +28 -0
  165. raise_cli/output/console.py +394 -0
  166. raise_cli/output/formatters/__init__.py +9 -0
  167. raise_cli/output/formatters/adapters.py +135 -0
  168. raise_cli/output/formatters/discover.py +439 -0
  169. raise_cli/output/formatters/skill.py +298 -0
  170. raise_cli/publish/__init__.py +3 -0
  171. raise_cli/publish/changelog.py +80 -0
  172. raise_cli/publish/check.py +179 -0
  173. raise_cli/publish/version.py +172 -0
  174. raise_cli/rai_base/__init__.py +22 -0
  175. raise_cli/rai_base/framework/__init__.py +7 -0
  176. raise_cli/rai_base/framework/methodology.yaml +233 -0
  177. raise_cli/rai_base/governance/__init__.py +1 -0
  178. raise_cli/rai_base/governance/architecture/__init__.py +1 -0
  179. raise_cli/rai_base/governance/architecture/domain-model.md +20 -0
  180. raise_cli/rai_base/governance/architecture/system-context.md +34 -0
  181. raise_cli/rai_base/governance/architecture/system-design.md +24 -0
  182. raise_cli/rai_base/governance/backlog.md +8 -0
  183. raise_cli/rai_base/governance/guardrails.md +17 -0
  184. raise_cli/rai_base/governance/prd.md +25 -0
  185. raise_cli/rai_base/governance/vision.md +16 -0
  186. raise_cli/rai_base/identity/__init__.py +8 -0
  187. raise_cli/rai_base/identity/core.md +119 -0
  188. raise_cli/rai_base/identity/perspective.md +119 -0
  189. raise_cli/rai_base/memory/__init__.py +7 -0
  190. raise_cli/rai_base/memory/patterns-base.jsonl +55 -0
  191. raise_cli/schemas/__init__.py +3 -0
  192. raise_cli/schemas/journal.py +49 -0
  193. raise_cli/schemas/session_state.py +117 -0
  194. raise_cli/session/__init__.py +5 -0
  195. raise_cli/session/bundle.py +820 -0
  196. raise_cli/session/close.py +268 -0
  197. raise_cli/session/journal.py +119 -0
  198. raise_cli/session/resolver.py +126 -0
  199. raise_cli/session/state.py +187 -0
  200. raise_cli/skills/__init__.py +44 -0
  201. raise_cli/skills/locator.py +141 -0
  202. raise_cli/skills/name_checker.py +199 -0
  203. raise_cli/skills/parser.py +145 -0
  204. raise_cli/skills/scaffold.py +212 -0
  205. raise_cli/skills/schema.py +132 -0
  206. raise_cli/skills/skillsets.py +195 -0
  207. raise_cli/skills/validator.py +197 -0
  208. raise_cli/skills_base/__init__.py +80 -0
  209. raise_cli/skills_base/contract-template.md +60 -0
  210. raise_cli/skills_base/preamble.md +37 -0
  211. raise_cli/skills_base/rai-architecture-review/SKILL.md +137 -0
  212. raise_cli/skills_base/rai-debug/SKILL.md +171 -0
  213. raise_cli/skills_base/rai-discover/SKILL.md +167 -0
  214. raise_cli/skills_base/rai-discover-document/SKILL.md +128 -0
  215. raise_cli/skills_base/rai-discover-scan/SKILL.md +147 -0
  216. raise_cli/skills_base/rai-discover-start/SKILL.md +145 -0
  217. raise_cli/skills_base/rai-discover-validate/SKILL.md +142 -0
  218. raise_cli/skills_base/rai-docs-update/SKILL.md +142 -0
  219. raise_cli/skills_base/rai-doctor/SKILL.md +120 -0
  220. raise_cli/skills_base/rai-epic-close/SKILL.md +165 -0
  221. raise_cli/skills_base/rai-epic-close/templates/retrospective.md +68 -0
  222. raise_cli/skills_base/rai-epic-design/SKILL.md +146 -0
  223. raise_cli/skills_base/rai-epic-design/templates/design.md +24 -0
  224. raise_cli/skills_base/rai-epic-design/templates/scope.md +76 -0
  225. raise_cli/skills_base/rai-epic-plan/SKILL.md +153 -0
  226. raise_cli/skills_base/rai-epic-plan/_references/sequencing-strategies.md +67 -0
  227. raise_cli/skills_base/rai-epic-plan/templates/plan-section.md +49 -0
  228. raise_cli/skills_base/rai-epic-run/SKILL.md +208 -0
  229. raise_cli/skills_base/rai-epic-start/SKILL.md +136 -0
  230. raise_cli/skills_base/rai-epic-start/templates/brief.md +34 -0
  231. raise_cli/skills_base/rai-mcp-add/SKILL.md +176 -0
  232. raise_cli/skills_base/rai-mcp-remove/SKILL.md +120 -0
  233. raise_cli/skills_base/rai-mcp-status/SKILL.md +147 -0
  234. raise_cli/skills_base/rai-problem-shape/SKILL.md +138 -0
  235. raise_cli/skills_base/rai-project-create/SKILL.md +144 -0
  236. raise_cli/skills_base/rai-project-onboard/SKILL.md +162 -0
  237. raise_cli/skills_base/rai-quality-review/SKILL.md +189 -0
  238. raise_cli/skills_base/rai-research/SKILL.md +143 -0
  239. raise_cli/skills_base/rai-research/references/research-prompt-template.md +317 -0
  240. raise_cli/skills_base/rai-session-close/SKILL.md +176 -0
  241. raise_cli/skills_base/rai-session-start/SKILL.md +110 -0
  242. raise_cli/skills_base/rai-story-close/SKILL.md +198 -0
  243. raise_cli/skills_base/rai-story-design/SKILL.md +203 -0
  244. raise_cli/skills_base/rai-story-design/references/tech-design-story-v2.md +293 -0
  245. raise_cli/skills_base/rai-story-implement/SKILL.md +115 -0
  246. raise_cli/skills_base/rai-story-plan/SKILL.md +135 -0
  247. raise_cli/skills_base/rai-story-review/SKILL.md +178 -0
  248. raise_cli/skills_base/rai-story-run/SKILL.md +282 -0
  249. raise_cli/skills_base/rai-story-start/SKILL.md +166 -0
  250. raise_cli/skills_base/rai-story-start/templates/story.md +38 -0
  251. raise_cli/skills_base/rai-welcome/SKILL.md +134 -0
  252. raise_cli/telemetry/__init__.py +42 -0
  253. raise_cli/telemetry/schemas.py +285 -0
  254. raise_cli/telemetry/writer.py +217 -0
  255. raise_cli/tier/__init__.py +0 -0
  256. raise_cli/tier/context.py +134 -0
  257. raise_cli/viz/__init__.py +7 -0
  258. raise_cli/viz/generator.py +406 -0
  259. raise_cli-2.2.1.dist-info/METADATA +433 -0
  260. raise_cli-2.2.1.dist-info/RECORD +264 -0
  261. raise_cli-2.2.1.dist-info/WHEEL +4 -0
  262. raise_cli-2.2.1.dist-info/entry_points.txt +40 -0
  263. raise_cli-2.2.1.dist-info/licenses/LICENSE +190 -0
  264. raise_cli-2.2.1.dist-info/licenses/NOTICE +4 -0
@@ -0,0 +1,691 @@
1
+ """Deterministic analyzer for discovery scan results.
2
+
3
+ Enriches raw scan output with confidence scores, path-based categories,
4
+ hierarchical folding (methods into classes), and module grouping for
5
+ parallel AI synthesis. No AI inference required — all signals are deterministic.
6
+
7
+ Architecture: E13 Discovery improvement (discover-validate-scaling story)
8
+
9
+ Example:
10
+ >>> from raise_cli.discovery.analyzer import compute_confidence, match_path_category
11
+ >>> from raise_cli.discovery.scanner import Symbol
12
+ >>> sym = Symbol(name="Foo", kind="class", file="src/schemas/foo.py",
13
+ ... line=1, signature="class Foo(BaseModel)")
14
+ >>> cat = match_path_category(sym.file)
15
+ >>> result = compute_confidence(sym, cat)
16
+ >>> result.tier
17
+ 'high'
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from pathlib import PurePosixPath
23
+ from typing import Literal
24
+
25
+ from pydantic import BaseModel, Field
26
+
27
+ from raise_cli.discovery.scanner import ScanResult, Symbol
28
+
29
+ # ── Type aliases ──────────────────────────────────────────────────────────
30
+
31
+ ConfidenceTier = Literal["high", "medium", "low"]
32
+
33
+ # ── Category mapping constants ────────────────────────────────────────────
34
+
35
+ DEFAULT_CATEGORY_MAP: dict[str, str] = {
36
+ # Python (raise-cli conventions)
37
+ "cli/commands/": "command",
38
+ "cli/": "utility",
39
+ "schemas/": "schema",
40
+ "models/": "model",
41
+ "output/": "formatter",
42
+ "governance/": "parser",
43
+ "context/": "builder",
44
+ "discovery/": "service",
45
+ "memory/": "service",
46
+ "onboarding/": "service",
47
+ "config/": "utility",
48
+ "core/": "utility",
49
+ "telemetry/": "service",
50
+ # Laravel/PHP
51
+ "Controllers/": "controller",
52
+ "Models/": "model",
53
+ "Middleware/": "middleware",
54
+ "Providers/": "provider",
55
+ "Services/": "service",
56
+ "Requests/": "schema",
57
+ "Resources/": "formatter",
58
+ "routes/": "route",
59
+ "Migrations/": "migration",
60
+ # Svelte/TS/JS
61
+ "components/": "component",
62
+ "stores/": "store",
63
+ "lib/": "utility",
64
+ "utils/": "utility",
65
+ "types/": "schema",
66
+ "hooks/": "utility",
67
+ "api/": "service",
68
+ # C#/.NET (Clean Architecture conventions — leaf directories only,
69
+ # avoid broad layer dirs like Infrastructure/ that shadow more specific ones)
70
+ "Repositories/": "repository",
71
+ "Handlers/": "service",
72
+ "Commands/": "command",
73
+ "Queries/": "query",
74
+ "Validators/": "validator",
75
+ }
76
+
77
+ NAME_CATEGORY_OVERRIDES: dict[str, str] = {
78
+ "Error": "exception",
79
+ "Warning": "exception",
80
+ "Settings": "config",
81
+ "Config": "config",
82
+ "Test": "test",
83
+ "test_": "test",
84
+ # C#/.NET name suffixes
85
+ "Handler": "service",
86
+ "Repository": "repository",
87
+ "RepositoryAsync": "repository",
88
+ "Command": "command",
89
+ "Query": "query",
90
+ "Validator": "validator",
91
+ "Controller": "controller",
92
+ "Middleware": "middleware",
93
+ "Extension": "utility",
94
+ "Factory": "utility",
95
+ }
96
+
97
+ BASE_CLASS_CATEGORIES: dict[str, str] = {
98
+ "BaseModel": "model",
99
+ "Exception": "exception",
100
+ "BaseSettings": "config",
101
+ "TypedDict": "schema",
102
+ # C#/.NET common base classes
103
+ "ControllerBase": "controller",
104
+ "Controller": "controller",
105
+ "DbContext": "service",
106
+ "IRequestHandler": "service",
107
+ }
108
+
109
+ # C# name suffixes that indicate clear semantic intent.
110
+ # When present, confidence gets a +15 boost (same as parent context).
111
+ CSHARP_SEMANTIC_SUFFIXES: frozenset[str] = frozenset(
112
+ {
113
+ "Handler",
114
+ "Repository",
115
+ "RepositoryAsync",
116
+ "Command",
117
+ "Query",
118
+ "Validator",
119
+ "Controller",
120
+ "Middleware",
121
+ "Factory",
122
+ "Extension",
123
+ "Service",
124
+ "Manager",
125
+ }
126
+ )
127
+
128
+
129
+ # ── Pydantic models ──────────────────────────────────────────────────────
130
+
131
+
132
+ class ConfidenceSignals(BaseModel):
133
+ """Deterministic signals used to compute confidence score.
134
+
135
+ Each signal maps to a specific condition detected in the source symbol.
136
+ All signals are boolean or simple values — no AI inference involved.
137
+ """
138
+
139
+ has_docstring: bool = False
140
+ docstring_length: int = 0
141
+ has_type_annotations: bool = False
142
+ path_matches_convention: bool = False
143
+ known_base_class: str | None = None
144
+ name_follows_convention: bool = False
145
+ parent_validated: bool = False
146
+ has_semantic_suffix: bool = False # C#: name ends with known semantic suffix
147
+
148
+
149
+ class ConfidenceResult(BaseModel):
150
+ """Confidence assessment for a component.
151
+
152
+ Attributes:
153
+ score: Confidence score from 0 to 100.
154
+ tier: Derived tier — high (>=70), medium (40-69), low (<40).
155
+ signals: Individual signals that contributed to the score.
156
+ """
157
+
158
+ score: int = Field(ge=0, le=100)
159
+ tier: ConfidenceTier
160
+ signals: ConfidenceSignals
161
+
162
+
163
+ class AnalyzedComponent(BaseModel):
164
+ """A component enriched with deterministic analysis.
165
+
166
+ Attributes:
167
+ id: Unique component ID (e.g., "comp-scanner-symbol").
168
+ name: Symbol name.
169
+ kind: Symbol kind (class, function, method, module).
170
+ file: Relative path to source file.
171
+ line: Line number (1-indexed).
172
+ signature: Full signature string.
173
+ module: Python module path (dotted).
174
+ confidence: Confidence assessment.
175
+ auto_category: Deterministic category from path/name conventions.
176
+ auto_purpose: First sentence of docstring, or empty string.
177
+ depends_on: Dependencies extracted from signature.
178
+ internal: Whether this is an internal (underscore-prefixed) symbol.
179
+ methods: Method names if kind=class (folded in).
180
+ docstring: Original docstring, if available.
181
+ """
182
+
183
+ id: str
184
+ name: str
185
+ kind: str
186
+ file: str
187
+ line: int
188
+ signature: str
189
+ module: str
190
+ confidence: ConfidenceResult
191
+ auto_category: str
192
+ auto_purpose: str
193
+ depends_on: list[str] = Field(default_factory=list)
194
+ internal: bool = False
195
+ methods: list[str] = Field(default_factory=list)
196
+ docstring: str | None = None
197
+
198
+
199
+ class AnalysisResult(BaseModel):
200
+ """Complete analysis output — deterministic, no AI needed.
201
+
202
+ Attributes:
203
+ scan_summary: Aggregate scan statistics.
204
+ confidence_distribution: Count of components per confidence tier.
205
+ categories: Count of components per category.
206
+ components: All analyzed components.
207
+ module_groups: Components grouped by source file (for parallel AI synthesis batches).
208
+ """
209
+
210
+ scan_summary: dict[str, int | list[str]]
211
+ confidence_distribution: dict[str, int]
212
+ categories: dict[str, int]
213
+ components: list[AnalyzedComponent]
214
+ module_groups: dict[str, list[str]] = Field(default_factory=dict)
215
+
216
+
217
+ # ── Functions ─────────────────────────────────────────────────────────────
218
+
219
+
220
+ def match_path_category(
221
+ file_path: str,
222
+ category_map: dict[str, str] | None = None,
223
+ ) -> str | None:
224
+ """Match a file path against convention-based category patterns.
225
+
226
+ Uses longest-prefix matching to ensure more specific paths
227
+ (e.g., "cli/commands/") win over less specific ones (e.g., "cli/").
228
+
229
+ Args:
230
+ file_path: Relative path to the source file.
231
+ category_map: Custom category map. If None, uses DEFAULT_CATEGORY_MAP.
232
+
233
+ Returns:
234
+ Category string if a match is found, None otherwise.
235
+
236
+ Example:
237
+ >>> match_path_category("src/raise_cli/cli/commands/discover.py")
238
+ 'command'
239
+ >>> match_path_category("src/raise_cli/unknown/foo.py")
240
+ """
241
+ categories = category_map if category_map is not None else DEFAULT_CATEGORY_MAP
242
+
243
+ # Match on directory boundaries: pattern must be preceded by "/" or be at
244
+ # the start of the path. This prevents "cli/" matching "raise_cli/".
245
+ # Check all occurrences of the pattern (not just the first).
246
+ best_match: str | None = None
247
+ best_length = 0
248
+
249
+ for pattern, category in categories.items():
250
+ # Search all occurrences of pattern in file_path
251
+ start = 0
252
+ while True:
253
+ idx = file_path.find(pattern, start)
254
+ if idx < 0:
255
+ break
256
+ # Ensure directory boundary (preceded by "/" or at start)
257
+ if idx == 0 or file_path[idx - 1] == "/":
258
+ if len(pattern) > best_length:
259
+ best_match = category
260
+ best_length = len(pattern)
261
+ break # Found valid match for this pattern
262
+ start = idx + 1
263
+
264
+ return best_match
265
+
266
+
267
+ def compute_confidence(
268
+ symbol: Symbol,
269
+ path_category: str | None,
270
+ ) -> ConfidenceResult:
271
+ """Compute deterministic confidence score for a symbol.
272
+
273
+ Scoring signals (total possible = 100):
274
+ - Has docstring: +30
275
+ - Substantial docstring (>20 chars): +10
276
+ - Has type annotations in signature: +10
277
+ - Path matches a known convention: +20
278
+ - Known base class in signature: +10
279
+ - Name follows convention: +5
280
+ - Parent class context (methods): +15
281
+ - Semantic suffix in name [C# only]: +15
282
+
283
+ Tier thresholds:
284
+ - High: score >= 70
285
+ - Medium: 40 <= score < 70
286
+ - Low: score < 40
287
+
288
+ C# note: XML doc comments (///) are not yet extracted by the scanner
289
+ (tracked in RAISE-225). Until then, Signal 1 will always be 0 for C#
290
+ symbols. Signals 2, 5, and 7 compensate for this gap.
291
+
292
+ Args:
293
+ symbol: The Symbol to score.
294
+ path_category: Category from match_path_category(), or None.
295
+
296
+ Returns:
297
+ ConfidenceResult with score, tier, and detailed signals.
298
+ """
299
+ score = 0
300
+ signals = ConfidenceSignals()
301
+ is_csharp = symbol.file.endswith(".cs")
302
+
303
+ # Signal 1: Has docstring (+30)
304
+ if symbol.docstring:
305
+ signals.has_docstring = True
306
+ signals.docstring_length = len(symbol.docstring)
307
+ score += 30
308
+ # Bonus for substantial docstring (+10)
309
+ if len(symbol.docstring) > 20:
310
+ score += 10
311
+
312
+ # Signal 2: Has type annotations in signature (+10)
313
+ # Python: looks for '->' (return type) or ': ' (param type hints)
314
+ # C#: also counts generic types '<' (e.g. Task<T>, IRequestHandler<Q,R>)
315
+ if is_csharp:
316
+ if ": " in symbol.signature or "<" in symbol.signature:
317
+ signals.has_type_annotations = True
318
+ score += 10
319
+ else:
320
+ if "->" in symbol.signature or ": " in symbol.signature:
321
+ signals.has_type_annotations = True
322
+ score += 10
323
+
324
+ # Signal 3: Path matches a known convention (+20)
325
+ if path_category:
326
+ signals.path_matches_convention = True
327
+ score += 20
328
+
329
+ # Signal 4: Known base class in signature (+10)
330
+ for base_class in BASE_CLASS_CATEGORIES:
331
+ if base_class in symbol.signature:
332
+ signals.known_base_class = base_class
333
+ score += 10
334
+ break
335
+
336
+ # Signal 5: Name follows convention (+5)
337
+ # Python: classes PascalCase, functions/methods snake_case
338
+ # C#: all public symbols are PascalCase (classes AND methods)
339
+ if is_csharp:
340
+ short_name = symbol.name.split(".")[-1] # strip namespace if present
341
+ if short_name and short_name[0].isupper():
342
+ signals.name_follows_convention = True
343
+ score += 5
344
+ elif (
345
+ symbol.kind == "class"
346
+ and symbol.name
347
+ and symbol.name[0].isupper()
348
+ or symbol.kind in ("function", "method")
349
+ and symbol.name.islower()
350
+ ):
351
+ signals.name_follows_convention = True
352
+ score += 5
353
+
354
+ # Signal 6: Parent class context (+15)
355
+ if symbol.parent:
356
+ signals.parent_validated = True
357
+ score += 15
358
+
359
+ # Signal 7: Semantic suffix in name [C# only] (+15)
360
+ # Handler, Repository, Command, Query, Validator, Controller, etc.
361
+ # These suffixes are intentional architectural markers in C#/.NET.
362
+ if is_csharp:
363
+ short_name = symbol.name.split(".")[-1]
364
+ if any(short_name.endswith(s) for s in CSHARP_SEMANTIC_SUFFIXES):
365
+ signals.has_semantic_suffix = True
366
+ score += 15
367
+
368
+ # Cap at 100
369
+ score = min(score, 100)
370
+
371
+ # Tier assignment
372
+ tier: ConfidenceTier
373
+ if score >= 70:
374
+ tier = "high"
375
+ elif score >= 40:
376
+ tier = "medium"
377
+ else:
378
+ tier = "low"
379
+
380
+ return ConfidenceResult(score=score, tier=tier, signals=signals)
381
+
382
+
383
+ def extract_first_sentence(docstring: str | None) -> str:
384
+ """Extract the first sentence from a docstring.
385
+
386
+ Args:
387
+ docstring: Raw docstring text, or None.
388
+
389
+ Returns:
390
+ First sentence (up to first period), or first line if no period.
391
+ Empty string if docstring is None or empty.
392
+ """
393
+ if not docstring:
394
+ return ""
395
+ text = docstring.strip()
396
+ if not text:
397
+ return ""
398
+ # Take first line
399
+ first_line = text.split("\n")[0].strip()
400
+ # If it contains a period, take up to and including the first period
401
+ dot_idx = first_line.find(".")
402
+ if dot_idx >= 0:
403
+ return first_line[: dot_idx + 1]
404
+ return first_line
405
+
406
+
407
+ def determine_category(
408
+ name: str,
409
+ kind: str,
410
+ path_category: str | None,
411
+ base_class: str | None = None,
412
+ ) -> str:
413
+ """Determine component category using priority chain.
414
+
415
+ Priority: name override → base class → path convention → "other".
416
+
417
+ Args:
418
+ name: Symbol name.
419
+ kind: Symbol kind (class, function, etc.).
420
+ path_category: Category from match_path_category(), or None.
421
+ base_class: Known base class from confidence signals, or None.
422
+
423
+ Returns:
424
+ Category string.
425
+ """
426
+ # Priority 1: Name-based overrides
427
+ for suffix, category in NAME_CATEGORY_OVERRIDES.items():
428
+ if suffix == "test_" and name.startswith("test_"):
429
+ return category
430
+ if suffix == "Test" and kind == "class" and name.startswith("Test"):
431
+ return category
432
+ if suffix not in ("test_", "Test") and name.endswith(suffix):
433
+ return category
434
+
435
+ # Priority 2: Base class
436
+ if base_class and base_class in BASE_CLASS_CATEGORIES:
437
+ return BASE_CLASS_CATEGORIES[base_class]
438
+
439
+ # Priority 3: Path convention
440
+ if path_category:
441
+ return path_category
442
+
443
+ return "other"
444
+
445
+
446
+ _SOURCE_PREFIXES = ("src", "app", "lib")
447
+ _CODE_EXTENSIONS = {
448
+ ".py",
449
+ ".php",
450
+ ".ts",
451
+ ".tsx",
452
+ ".js",
453
+ ".jsx",
454
+ ".svelte",
455
+ ".cs",
456
+ ".dart",
457
+ }
458
+
459
+
460
+ def _file_to_module(file_path: str) -> str:
461
+ """Convert a file path to a dotted module path.
462
+
463
+ Strips common source prefixes (src/, app/, lib/) and known code
464
+ extensions (.py, .php, .ts, .tsx, .js, .jsx, .svelte).
465
+
466
+ Args:
467
+ file_path: Relative file path (e.g., "src/raise_cli/discovery/scanner.py"
468
+ or "app/Http/Controllers/UserController.php").
469
+
470
+ Returns:
471
+ Dotted module path (e.g., "raise_cli.discovery.scanner"
472
+ or "Http.Controllers.UserController").
473
+ """
474
+ # Normalize Windows backslashes before PurePosixPath — paths may arrive
475
+ # with backslashes on Windows or from PHP/C# namespace-derived paths.
476
+ p = PurePosixPath(file_path.replace("\\", "/"))
477
+ parts = list(p.parts)
478
+ # Strip common source prefixes
479
+ if parts and parts[0] in _SOURCE_PREFIXES:
480
+ parts = parts[1:]
481
+ # Remove known code extension from last part
482
+ if parts:
483
+ last = PurePosixPath(parts[-1])
484
+ if last.suffix in _CODE_EXTENSIONS:
485
+ parts[-1] = last.stem
486
+ return ".".join(parts)
487
+
488
+
489
+ def build_hierarchy(symbols: list[Symbol]) -> list[AnalyzedComponent]:
490
+ """Fold methods into their parent classes.
491
+
492
+ Classes become single units with a methods list.
493
+ Standalone functions and modules remain individual units.
494
+ Methods with missing parent classes are dropped.
495
+
496
+ Args:
497
+ symbols: List of Symbol objects to organize.
498
+
499
+ Returns:
500
+ List of AnalyzedComponent units with methods folded into classes.
501
+ """
502
+ class_symbols: dict[str, Symbol] = {}
503
+ class_methods: dict[str, list[Symbol]] = {}
504
+
505
+ for s in symbols:
506
+ if s.kind == "class":
507
+ class_symbols[s.name] = s
508
+ class_methods.setdefault(s.name, [])
509
+ elif s.kind == "method" and s.parent:
510
+ class_methods.setdefault(s.parent, []).append(s)
511
+
512
+ units: list[AnalyzedComponent] = []
513
+
514
+ # Create class units (with methods folded in)
515
+ for class_name, class_sym in class_symbols.items():
516
+ methods = class_methods.get(class_name, [])
517
+ comp_id = f"comp-{_file_to_module(class_sym.file)}-{class_name}"
518
+ units.append(
519
+ AnalyzedComponent(
520
+ id=comp_id,
521
+ name=class_name,
522
+ kind="class",
523
+ file=class_sym.file,
524
+ line=class_sym.line,
525
+ signature=class_sym.signature,
526
+ module=_file_to_module(class_sym.file),
527
+ confidence=ConfidenceResult(
528
+ score=0, tier="low", signals=ConfidenceSignals()
529
+ ),
530
+ auto_category="other",
531
+ auto_purpose="",
532
+ internal=class_name.startswith("_"),
533
+ methods=[m.name for m in methods],
534
+ docstring=class_sym.docstring,
535
+ )
536
+ )
537
+
538
+ # Add standalone symbols: everything that's not class or method
539
+ # (exclude-based routing — future kinds automatically become standalone)
540
+ for s in symbols:
541
+ if s.kind not in ("class", "method"):
542
+ # Use "module" as suffix for module-level entries to avoid
543
+ # collisions with same-named functions (e.g., test_version.py
544
+ # has both module "test_version" and function "test_version")
545
+ id_name = "module" if s.kind == "module" else s.name
546
+ comp_id = f"comp-{_file_to_module(s.file)}-{id_name}"
547
+ units.append(
548
+ AnalyzedComponent(
549
+ id=comp_id,
550
+ name=s.name,
551
+ kind=s.kind,
552
+ file=s.file,
553
+ line=s.line,
554
+ signature=s.signature,
555
+ module=_file_to_module(s.file),
556
+ confidence=ConfidenceResult(
557
+ score=0, tier="low", signals=ConfidenceSignals()
558
+ ),
559
+ auto_category="other",
560
+ auto_purpose="",
561
+ internal=s.name.startswith("_"),
562
+ methods=[],
563
+ docstring=s.docstring,
564
+ )
565
+ )
566
+
567
+ return units
568
+
569
+
570
+ def _build_hierarchy_with_symbols(
571
+ symbols: list[Symbol],
572
+ ) -> tuple[list[AnalyzedComponent], dict[str, Symbol]]:
573
+ """Build hierarchy and return a map of component ID → original Symbol.
574
+
575
+ Used internally by analyze() to preserve original Symbol objects
576
+ for type-safe confidence scoring.
577
+ """
578
+ units = build_hierarchy(symbols)
579
+ # Build a lookup from symbol name+file to original Symbol
580
+ sym_lookup: dict[tuple[str, str], Symbol] = {}
581
+ for s in symbols:
582
+ sym_lookup[(s.name, s.file)] = s
583
+
584
+ symbol_map: dict[str, Symbol] = {}
585
+ for unit in units:
586
+ original = sym_lookup.get((unit.name, unit.file))
587
+ if original:
588
+ symbol_map[unit.id] = original
589
+ else:
590
+ # Fallback: create a minimal Symbol (shouldn't happen normally)
591
+ symbol_map[unit.id] = Symbol(
592
+ name=unit.name,
593
+ kind="class",
594
+ file=unit.file,
595
+ line=unit.line,
596
+ signature=unit.signature,
597
+ docstring=unit.docstring,
598
+ )
599
+ return units, symbol_map
600
+
601
+
602
+ def group_by_module(components: list[AnalyzedComponent]) -> dict[str, list[str]]:
603
+ """Group component IDs by their source module file.
604
+
605
+ Each module group becomes a batch for parallel AI synthesis.
606
+
607
+ Args:
608
+ components: List of analyzed components.
609
+
610
+ Returns:
611
+ Dict mapping file path to list of component IDs.
612
+ """
613
+ groups: dict[str, list[str]] = {}
614
+ for comp in components:
615
+ groups.setdefault(comp.file, []).append(comp.id)
616
+ return groups
617
+
618
+
619
+ def analyze(
620
+ scan_result: ScanResult,
621
+ category_map: dict[str, str] | None = None,
622
+ ) -> AnalysisResult:
623
+ """Run the full deterministic analysis pipeline.
624
+
625
+ Pipeline: filter internal → build hierarchy → score confidence →
626
+ categorize → group by module.
627
+
628
+ Args:
629
+ scan_result: Raw scan output from raise discover scan.
630
+ category_map: Optional custom path-to-category mapping.
631
+
632
+ Returns:
633
+ AnalysisResult with scored, categorized, module-grouped components.
634
+ """
635
+ all_symbols = scan_result.symbols
636
+ public = [s for s in all_symbols if not s.name.startswith("_")]
637
+ internal = [s for s in all_symbols if s.name.startswith("_")]
638
+
639
+ # Build hierarchy (fold methods into classes)
640
+ # Returns (units, symbol_map) so we can reuse original Symbols for scoring
641
+ units, symbol_map = _build_hierarchy_with_symbols(public)
642
+
643
+ # Deduplicate IDs — can occur with generated dirs (.astro/, __pycache__/)
644
+ # or Windows paths. Keep first occurrence, warn about duplicates.
645
+ import warnings
646
+
647
+ seen_ids: dict[str, str] = {}
648
+ deduped: list[AnalyzedComponent] = []
649
+ for unit in units:
650
+ if unit.id in seen_ids:
651
+ warnings.warn(
652
+ f"Duplicate component ID '{unit.id}' in {unit.file} "
653
+ f"(already seen in {seen_ids[unit.id]}) — skipping duplicate.",
654
+ stacklevel=2,
655
+ )
656
+ else:
657
+ seen_ids[unit.id] = unit.file
658
+ deduped.append(unit)
659
+ units = deduped
660
+
661
+ # Score confidence + categorize + extract purpose
662
+ for unit in units:
663
+ path_category = match_path_category(unit.file, category_map)
664
+ original_sym = symbol_map[unit.id]
665
+ conf = compute_confidence(original_sym, path_category)
666
+ unit.confidence = conf
667
+ unit.auto_category = determine_category(
668
+ unit.name, unit.kind, path_category, conf.signals.known_base_class
669
+ )
670
+ unit.auto_purpose = extract_first_sentence(unit.docstring)
671
+
672
+ # Aggregate statistics
673
+ tier_counts: dict[str, int] = {"high": 0, "medium": 0, "low": 0}
674
+ cat_counts: dict[str, int] = {}
675
+ for unit in units:
676
+ tier_counts[unit.confidence.tier] += 1
677
+ cat_counts[unit.auto_category] = cat_counts.get(unit.auto_category, 0) + 1
678
+
679
+ return AnalysisResult(
680
+ scan_summary={
681
+ "files_scanned": scan_result.files_scanned,
682
+ "total_symbols": len(all_symbols),
683
+ "public_symbols": len(public),
684
+ "internal_symbols": len(internal),
685
+ "errors": scan_result.errors,
686
+ },
687
+ confidence_distribution=tier_counts,
688
+ categories=cat_counts,
689
+ components=units,
690
+ module_groups=group_by_module(units),
691
+ )