raise-cli 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- raise_cli/__init__.py +38 -0
- raise_cli/__main__.py +30 -0
- raise_cli/adapters/__init__.py +91 -0
- raise_cli/adapters/declarative/__init__.py +26 -0
- raise_cli/adapters/declarative/adapter.py +267 -0
- raise_cli/adapters/declarative/discovery.py +94 -0
- raise_cli/adapters/declarative/expressions.py +150 -0
- raise_cli/adapters/declarative/reference/__init__.py +1 -0
- raise_cli/adapters/declarative/reference/github.yaml +143 -0
- raise_cli/adapters/declarative/schema.py +98 -0
- raise_cli/adapters/filesystem.py +299 -0
- raise_cli/adapters/mcp_bridge.py +10 -0
- raise_cli/adapters/mcp_confluence.py +246 -0
- raise_cli/adapters/mcp_jira.py +405 -0
- raise_cli/adapters/models.py +205 -0
- raise_cli/adapters/protocols.py +180 -0
- raise_cli/adapters/registry.py +90 -0
- raise_cli/adapters/sync.py +149 -0
- raise_cli/agents/__init__.py +14 -0
- raise_cli/agents/antigravity.yaml +8 -0
- raise_cli/agents/claude.yaml +8 -0
- raise_cli/agents/copilot.yaml +8 -0
- raise_cli/agents/copilot_plugin.py +124 -0
- raise_cli/agents/cursor.yaml +7 -0
- raise_cli/agents/roo.yaml +8 -0
- raise_cli/agents/windsurf.yaml +8 -0
- raise_cli/artifacts/__init__.py +30 -0
- raise_cli/artifacts/models.py +43 -0
- raise_cli/artifacts/reader.py +55 -0
- raise_cli/artifacts/renderer.py +104 -0
- raise_cli/artifacts/story_design.py +69 -0
- raise_cli/artifacts/writer.py +45 -0
- raise_cli/backlog/__init__.py +1 -0
- raise_cli/backlog/sync.py +115 -0
- raise_cli/cli/__init__.py +3 -0
- raise_cli/cli/commands/__init__.py +3 -0
- raise_cli/cli/commands/_resolve.py +153 -0
- raise_cli/cli/commands/adapters.py +362 -0
- raise_cli/cli/commands/artifact.py +137 -0
- raise_cli/cli/commands/backlog.py +333 -0
- raise_cli/cli/commands/base.py +31 -0
- raise_cli/cli/commands/discover.py +551 -0
- raise_cli/cli/commands/docs.py +130 -0
- raise_cli/cli/commands/doctor.py +177 -0
- raise_cli/cli/commands/gate.py +223 -0
- raise_cli/cli/commands/graph.py +1086 -0
- raise_cli/cli/commands/info.py +81 -0
- raise_cli/cli/commands/init.py +746 -0
- raise_cli/cli/commands/journal.py +167 -0
- raise_cli/cli/commands/mcp.py +524 -0
- raise_cli/cli/commands/memory.py +467 -0
- raise_cli/cli/commands/pattern.py +348 -0
- raise_cli/cli/commands/profile.py +59 -0
- raise_cli/cli/commands/publish.py +80 -0
- raise_cli/cli/commands/release.py +338 -0
- raise_cli/cli/commands/session.py +528 -0
- raise_cli/cli/commands/signal.py +410 -0
- raise_cli/cli/commands/skill.py +350 -0
- raise_cli/cli/commands/skill_set.py +145 -0
- raise_cli/cli/error_handler.py +158 -0
- raise_cli/cli/main.py +163 -0
- raise_cli/compat.py +66 -0
- raise_cli/config/__init__.py +41 -0
- raise_cli/config/agent_plugin.py +105 -0
- raise_cli/config/agent_registry.py +233 -0
- raise_cli/config/agents.py +120 -0
- raise_cli/config/ide.py +32 -0
- raise_cli/config/paths.py +379 -0
- raise_cli/config/settings.py +180 -0
- raise_cli/context/__init__.py +42 -0
- raise_cli/context/analyzers/__init__.py +16 -0
- raise_cli/context/analyzers/models.py +36 -0
- raise_cli/context/analyzers/protocol.py +43 -0
- raise_cli/context/analyzers/python.py +292 -0
- raise_cli/context/builder.py +1569 -0
- raise_cli/context/diff.py +213 -0
- raise_cli/context/extractors/__init__.py +13 -0
- raise_cli/context/extractors/skills.py +121 -0
- raise_cli/core/__init__.py +37 -0
- raise_cli/core/files.py +66 -0
- raise_cli/core/text.py +174 -0
- raise_cli/core/tools.py +441 -0
- raise_cli/discovery/__init__.py +50 -0
- raise_cli/discovery/analyzer.py +691 -0
- raise_cli/discovery/drift.py +355 -0
- raise_cli/discovery/scanner.py +1687 -0
- raise_cli/doctor/__init__.py +4 -0
- raise_cli/doctor/checks/__init__.py +1 -0
- raise_cli/doctor/checks/environment.py +110 -0
- raise_cli/doctor/checks/project.py +238 -0
- raise_cli/doctor/fix.py +80 -0
- raise_cli/doctor/models.py +56 -0
- raise_cli/doctor/protocol.py +43 -0
- raise_cli/doctor/registry.py +100 -0
- raise_cli/doctor/report.py +141 -0
- raise_cli/doctor/runner.py +95 -0
- raise_cli/engines/__init__.py +3 -0
- raise_cli/exceptions.py +215 -0
- raise_cli/gates/__init__.py +19 -0
- raise_cli/gates/builtin/__init__.py +1 -0
- raise_cli/gates/builtin/coverage.py +52 -0
- raise_cli/gates/builtin/lint.py +48 -0
- raise_cli/gates/builtin/tests.py +48 -0
- raise_cli/gates/builtin/types.py +48 -0
- raise_cli/gates/models.py +40 -0
- raise_cli/gates/protocol.py +41 -0
- raise_cli/gates/registry.py +141 -0
- raise_cli/governance/__init__.py +11 -0
- raise_cli/governance/extractor.py +412 -0
- raise_cli/governance/models.py +134 -0
- raise_cli/governance/parsers/__init__.py +35 -0
- raise_cli/governance/parsers/_convert.py +38 -0
- raise_cli/governance/parsers/adr.py +274 -0
- raise_cli/governance/parsers/backlog.py +356 -0
- raise_cli/governance/parsers/constitution.py +119 -0
- raise_cli/governance/parsers/epic.py +323 -0
- raise_cli/governance/parsers/glossary.py +316 -0
- raise_cli/governance/parsers/guardrails.py +345 -0
- raise_cli/governance/parsers/prd.py +112 -0
- raise_cli/governance/parsers/roadmap.py +118 -0
- raise_cli/governance/parsers/vision.py +116 -0
- raise_cli/graph/__init__.py +1 -0
- raise_cli/graph/backends/__init__.py +57 -0
- raise_cli/graph/backends/api.py +137 -0
- raise_cli/graph/backends/dual.py +139 -0
- raise_cli/graph/backends/pending.py +84 -0
- raise_cli/handlers/__init__.py +3 -0
- raise_cli/hooks/__init__.py +54 -0
- raise_cli/hooks/builtin/__init__.py +1 -0
- raise_cli/hooks/builtin/backlog.py +216 -0
- raise_cli/hooks/builtin/gate_bridge.py +83 -0
- raise_cli/hooks/builtin/jira_sync.py +127 -0
- raise_cli/hooks/builtin/memory.py +117 -0
- raise_cli/hooks/builtin/telemetry.py +72 -0
- raise_cli/hooks/emitter.py +184 -0
- raise_cli/hooks/events.py +262 -0
- raise_cli/hooks/protocol.py +38 -0
- raise_cli/hooks/registry.py +117 -0
- raise_cli/mcp/__init__.py +33 -0
- raise_cli/mcp/bridge.py +218 -0
- raise_cli/mcp/models.py +43 -0
- raise_cli/mcp/registry.py +77 -0
- raise_cli/mcp/schema.py +41 -0
- raise_cli/memory/__init__.py +58 -0
- raise_cli/memory/loader.py +247 -0
- raise_cli/memory/migration.py +241 -0
- raise_cli/memory/models.py +169 -0
- raise_cli/memory/writer.py +598 -0
- raise_cli/onboarding/__init__.py +103 -0
- raise_cli/onboarding/bootstrap.py +324 -0
- raise_cli/onboarding/claudemd.py +17 -0
- raise_cli/onboarding/conventions.py +742 -0
- raise_cli/onboarding/detection.py +374 -0
- raise_cli/onboarding/governance.py +443 -0
- raise_cli/onboarding/instructions.py +672 -0
- raise_cli/onboarding/manifest.py +201 -0
- raise_cli/onboarding/memory_md.py +399 -0
- raise_cli/onboarding/migration.py +207 -0
- raise_cli/onboarding/profile.py +624 -0
- raise_cli/onboarding/skill_conflict.py +100 -0
- raise_cli/onboarding/skill_manifest.py +176 -0
- raise_cli/onboarding/skills.py +437 -0
- raise_cli/onboarding/workflows.py +101 -0
- raise_cli/output/__init__.py +28 -0
- raise_cli/output/console.py +394 -0
- raise_cli/output/formatters/__init__.py +9 -0
- raise_cli/output/formatters/adapters.py +135 -0
- raise_cli/output/formatters/discover.py +439 -0
- raise_cli/output/formatters/skill.py +298 -0
- raise_cli/publish/__init__.py +3 -0
- raise_cli/publish/changelog.py +80 -0
- raise_cli/publish/check.py +179 -0
- raise_cli/publish/version.py +172 -0
- raise_cli/rai_base/__init__.py +22 -0
- raise_cli/rai_base/framework/__init__.py +7 -0
- raise_cli/rai_base/framework/methodology.yaml +233 -0
- raise_cli/rai_base/governance/__init__.py +1 -0
- raise_cli/rai_base/governance/architecture/__init__.py +1 -0
- raise_cli/rai_base/governance/architecture/domain-model.md +20 -0
- raise_cli/rai_base/governance/architecture/system-context.md +34 -0
- raise_cli/rai_base/governance/architecture/system-design.md +24 -0
- raise_cli/rai_base/governance/backlog.md +8 -0
- raise_cli/rai_base/governance/guardrails.md +17 -0
- raise_cli/rai_base/governance/prd.md +25 -0
- raise_cli/rai_base/governance/vision.md +16 -0
- raise_cli/rai_base/identity/__init__.py +8 -0
- raise_cli/rai_base/identity/core.md +119 -0
- raise_cli/rai_base/identity/perspective.md +119 -0
- raise_cli/rai_base/memory/__init__.py +7 -0
- raise_cli/rai_base/memory/patterns-base.jsonl +55 -0
- raise_cli/schemas/__init__.py +3 -0
- raise_cli/schemas/journal.py +49 -0
- raise_cli/schemas/session_state.py +117 -0
- raise_cli/session/__init__.py +5 -0
- raise_cli/session/bundle.py +820 -0
- raise_cli/session/close.py +268 -0
- raise_cli/session/journal.py +119 -0
- raise_cli/session/resolver.py +126 -0
- raise_cli/session/state.py +187 -0
- raise_cli/skills/__init__.py +44 -0
- raise_cli/skills/locator.py +141 -0
- raise_cli/skills/name_checker.py +199 -0
- raise_cli/skills/parser.py +145 -0
- raise_cli/skills/scaffold.py +212 -0
- raise_cli/skills/schema.py +132 -0
- raise_cli/skills/skillsets.py +195 -0
- raise_cli/skills/validator.py +197 -0
- raise_cli/skills_base/__init__.py +80 -0
- raise_cli/skills_base/contract-template.md +60 -0
- raise_cli/skills_base/preamble.md +37 -0
- raise_cli/skills_base/rai-architecture-review/SKILL.md +137 -0
- raise_cli/skills_base/rai-debug/SKILL.md +171 -0
- raise_cli/skills_base/rai-discover/SKILL.md +167 -0
- raise_cli/skills_base/rai-discover-document/SKILL.md +128 -0
- raise_cli/skills_base/rai-discover-scan/SKILL.md +147 -0
- raise_cli/skills_base/rai-discover-start/SKILL.md +145 -0
- raise_cli/skills_base/rai-discover-validate/SKILL.md +142 -0
- raise_cli/skills_base/rai-docs-update/SKILL.md +142 -0
- raise_cli/skills_base/rai-doctor/SKILL.md +120 -0
- raise_cli/skills_base/rai-epic-close/SKILL.md +165 -0
- raise_cli/skills_base/rai-epic-close/templates/retrospective.md +68 -0
- raise_cli/skills_base/rai-epic-design/SKILL.md +146 -0
- raise_cli/skills_base/rai-epic-design/templates/design.md +24 -0
- raise_cli/skills_base/rai-epic-design/templates/scope.md +76 -0
- raise_cli/skills_base/rai-epic-plan/SKILL.md +153 -0
- raise_cli/skills_base/rai-epic-plan/_references/sequencing-strategies.md +67 -0
- raise_cli/skills_base/rai-epic-plan/templates/plan-section.md +49 -0
- raise_cli/skills_base/rai-epic-run/SKILL.md +208 -0
- raise_cli/skills_base/rai-epic-start/SKILL.md +136 -0
- raise_cli/skills_base/rai-epic-start/templates/brief.md +34 -0
- raise_cli/skills_base/rai-mcp-add/SKILL.md +176 -0
- raise_cli/skills_base/rai-mcp-remove/SKILL.md +120 -0
- raise_cli/skills_base/rai-mcp-status/SKILL.md +147 -0
- raise_cli/skills_base/rai-problem-shape/SKILL.md +138 -0
- raise_cli/skills_base/rai-project-create/SKILL.md +144 -0
- raise_cli/skills_base/rai-project-onboard/SKILL.md +162 -0
- raise_cli/skills_base/rai-quality-review/SKILL.md +189 -0
- raise_cli/skills_base/rai-research/SKILL.md +143 -0
- raise_cli/skills_base/rai-research/references/research-prompt-template.md +317 -0
- raise_cli/skills_base/rai-session-close/SKILL.md +176 -0
- raise_cli/skills_base/rai-session-start/SKILL.md +110 -0
- raise_cli/skills_base/rai-story-close/SKILL.md +198 -0
- raise_cli/skills_base/rai-story-design/SKILL.md +203 -0
- raise_cli/skills_base/rai-story-design/references/tech-design-story-v2.md +293 -0
- raise_cli/skills_base/rai-story-implement/SKILL.md +115 -0
- raise_cli/skills_base/rai-story-plan/SKILL.md +135 -0
- raise_cli/skills_base/rai-story-review/SKILL.md +178 -0
- raise_cli/skills_base/rai-story-run/SKILL.md +282 -0
- raise_cli/skills_base/rai-story-start/SKILL.md +166 -0
- raise_cli/skills_base/rai-story-start/templates/story.md +38 -0
- raise_cli/skills_base/rai-welcome/SKILL.md +134 -0
- raise_cli/telemetry/__init__.py +42 -0
- raise_cli/telemetry/schemas.py +285 -0
- raise_cli/telemetry/writer.py +217 -0
- raise_cli/tier/__init__.py +0 -0
- raise_cli/tier/context.py +134 -0
- raise_cli/viz/__init__.py +7 -0
- raise_cli/viz/generator.py +406 -0
- raise_cli-2.2.1.dist-info/METADATA +433 -0
- raise_cli-2.2.1.dist-info/RECORD +264 -0
- raise_cli-2.2.1.dist-info/WHEEL +4 -0
- raise_cli-2.2.1.dist-info/entry_points.txt +40 -0
- raise_cli-2.2.1.dist-info/licenses/LICENSE +190 -0
- raise_cli-2.2.1.dist-info/licenses/NOTICE +4 -0
|
@@ -0,0 +1,691 @@
|
|
|
1
|
+
"""Deterministic analyzer for discovery scan results.
|
|
2
|
+
|
|
3
|
+
Enriches raw scan output with confidence scores, path-based categories,
|
|
4
|
+
hierarchical folding (methods into classes), and module grouping for
|
|
5
|
+
parallel AI synthesis. No AI inference required — all signals are deterministic.
|
|
6
|
+
|
|
7
|
+
Architecture: E13 Discovery improvement (discover-validate-scaling story)
|
|
8
|
+
|
|
9
|
+
Example:
|
|
10
|
+
>>> from raise_cli.discovery.analyzer import compute_confidence, match_path_category
|
|
11
|
+
>>> from raise_cli.discovery.scanner import Symbol
|
|
12
|
+
>>> sym = Symbol(name="Foo", kind="class", file="src/schemas/foo.py",
|
|
13
|
+
... line=1, signature="class Foo(BaseModel)")
|
|
14
|
+
>>> cat = match_path_category(sym.file)
|
|
15
|
+
>>> result = compute_confidence(sym, cat)
|
|
16
|
+
>>> result.tier
|
|
17
|
+
'high'
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from pathlib import PurePosixPath
|
|
23
|
+
from typing import Literal
|
|
24
|
+
|
|
25
|
+
from pydantic import BaseModel, Field
|
|
26
|
+
|
|
27
|
+
from raise_cli.discovery.scanner import ScanResult, Symbol
|
|
28
|
+
|
|
29
|
+
# ── Type aliases ──────────────────────────────────────────────────────────
|
|
30
|
+
|
|
31
|
+
ConfidenceTier = Literal["high", "medium", "low"]
|
|
32
|
+
|
|
33
|
+
# ── Category mapping constants ────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
DEFAULT_CATEGORY_MAP: dict[str, str] = {
|
|
36
|
+
# Python (raise-cli conventions)
|
|
37
|
+
"cli/commands/": "command",
|
|
38
|
+
"cli/": "utility",
|
|
39
|
+
"schemas/": "schema",
|
|
40
|
+
"models/": "model",
|
|
41
|
+
"output/": "formatter",
|
|
42
|
+
"governance/": "parser",
|
|
43
|
+
"context/": "builder",
|
|
44
|
+
"discovery/": "service",
|
|
45
|
+
"memory/": "service",
|
|
46
|
+
"onboarding/": "service",
|
|
47
|
+
"config/": "utility",
|
|
48
|
+
"core/": "utility",
|
|
49
|
+
"telemetry/": "service",
|
|
50
|
+
# Laravel/PHP
|
|
51
|
+
"Controllers/": "controller",
|
|
52
|
+
"Models/": "model",
|
|
53
|
+
"Middleware/": "middleware",
|
|
54
|
+
"Providers/": "provider",
|
|
55
|
+
"Services/": "service",
|
|
56
|
+
"Requests/": "schema",
|
|
57
|
+
"Resources/": "formatter",
|
|
58
|
+
"routes/": "route",
|
|
59
|
+
"Migrations/": "migration",
|
|
60
|
+
# Svelte/TS/JS
|
|
61
|
+
"components/": "component",
|
|
62
|
+
"stores/": "store",
|
|
63
|
+
"lib/": "utility",
|
|
64
|
+
"utils/": "utility",
|
|
65
|
+
"types/": "schema",
|
|
66
|
+
"hooks/": "utility",
|
|
67
|
+
"api/": "service",
|
|
68
|
+
# C#/.NET (Clean Architecture conventions — leaf directories only,
|
|
69
|
+
# avoid broad layer dirs like Infrastructure/ that shadow more specific ones)
|
|
70
|
+
"Repositories/": "repository",
|
|
71
|
+
"Handlers/": "service",
|
|
72
|
+
"Commands/": "command",
|
|
73
|
+
"Queries/": "query",
|
|
74
|
+
"Validators/": "validator",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
NAME_CATEGORY_OVERRIDES: dict[str, str] = {
|
|
78
|
+
"Error": "exception",
|
|
79
|
+
"Warning": "exception",
|
|
80
|
+
"Settings": "config",
|
|
81
|
+
"Config": "config",
|
|
82
|
+
"Test": "test",
|
|
83
|
+
"test_": "test",
|
|
84
|
+
# C#/.NET name suffixes
|
|
85
|
+
"Handler": "service",
|
|
86
|
+
"Repository": "repository",
|
|
87
|
+
"RepositoryAsync": "repository",
|
|
88
|
+
"Command": "command",
|
|
89
|
+
"Query": "query",
|
|
90
|
+
"Validator": "validator",
|
|
91
|
+
"Controller": "controller",
|
|
92
|
+
"Middleware": "middleware",
|
|
93
|
+
"Extension": "utility",
|
|
94
|
+
"Factory": "utility",
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
BASE_CLASS_CATEGORIES: dict[str, str] = {
|
|
98
|
+
"BaseModel": "model",
|
|
99
|
+
"Exception": "exception",
|
|
100
|
+
"BaseSettings": "config",
|
|
101
|
+
"TypedDict": "schema",
|
|
102
|
+
# C#/.NET common base classes
|
|
103
|
+
"ControllerBase": "controller",
|
|
104
|
+
"Controller": "controller",
|
|
105
|
+
"DbContext": "service",
|
|
106
|
+
"IRequestHandler": "service",
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
# C# name suffixes that indicate clear semantic intent.
|
|
110
|
+
# When present, confidence gets a +15 boost (same as parent context).
|
|
111
|
+
CSHARP_SEMANTIC_SUFFIXES: frozenset[str] = frozenset(
|
|
112
|
+
{
|
|
113
|
+
"Handler",
|
|
114
|
+
"Repository",
|
|
115
|
+
"RepositoryAsync",
|
|
116
|
+
"Command",
|
|
117
|
+
"Query",
|
|
118
|
+
"Validator",
|
|
119
|
+
"Controller",
|
|
120
|
+
"Middleware",
|
|
121
|
+
"Factory",
|
|
122
|
+
"Extension",
|
|
123
|
+
"Service",
|
|
124
|
+
"Manager",
|
|
125
|
+
}
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ── Pydantic models ──────────────────────────────────────────────────────
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class ConfidenceSignals(BaseModel):
|
|
133
|
+
"""Deterministic signals used to compute confidence score.
|
|
134
|
+
|
|
135
|
+
Each signal maps to a specific condition detected in the source symbol.
|
|
136
|
+
All signals are boolean or simple values — no AI inference involved.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
has_docstring: bool = False
|
|
140
|
+
docstring_length: int = 0
|
|
141
|
+
has_type_annotations: bool = False
|
|
142
|
+
path_matches_convention: bool = False
|
|
143
|
+
known_base_class: str | None = None
|
|
144
|
+
name_follows_convention: bool = False
|
|
145
|
+
parent_validated: bool = False
|
|
146
|
+
has_semantic_suffix: bool = False # C#: name ends with known semantic suffix
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class ConfidenceResult(BaseModel):
|
|
150
|
+
"""Confidence assessment for a component.
|
|
151
|
+
|
|
152
|
+
Attributes:
|
|
153
|
+
score: Confidence score from 0 to 100.
|
|
154
|
+
tier: Derived tier — high (>=70), medium (40-69), low (<40).
|
|
155
|
+
signals: Individual signals that contributed to the score.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
score: int = Field(ge=0, le=100)
|
|
159
|
+
tier: ConfidenceTier
|
|
160
|
+
signals: ConfidenceSignals
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class AnalyzedComponent(BaseModel):
|
|
164
|
+
"""A component enriched with deterministic analysis.
|
|
165
|
+
|
|
166
|
+
Attributes:
|
|
167
|
+
id: Unique component ID (e.g., "comp-scanner-symbol").
|
|
168
|
+
name: Symbol name.
|
|
169
|
+
kind: Symbol kind (class, function, method, module).
|
|
170
|
+
file: Relative path to source file.
|
|
171
|
+
line: Line number (1-indexed).
|
|
172
|
+
signature: Full signature string.
|
|
173
|
+
module: Python module path (dotted).
|
|
174
|
+
confidence: Confidence assessment.
|
|
175
|
+
auto_category: Deterministic category from path/name conventions.
|
|
176
|
+
auto_purpose: First sentence of docstring, or empty string.
|
|
177
|
+
depends_on: Dependencies extracted from signature.
|
|
178
|
+
internal: Whether this is an internal (underscore-prefixed) symbol.
|
|
179
|
+
methods: Method names if kind=class (folded in).
|
|
180
|
+
docstring: Original docstring, if available.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
id: str
|
|
184
|
+
name: str
|
|
185
|
+
kind: str
|
|
186
|
+
file: str
|
|
187
|
+
line: int
|
|
188
|
+
signature: str
|
|
189
|
+
module: str
|
|
190
|
+
confidence: ConfidenceResult
|
|
191
|
+
auto_category: str
|
|
192
|
+
auto_purpose: str
|
|
193
|
+
depends_on: list[str] = Field(default_factory=list)
|
|
194
|
+
internal: bool = False
|
|
195
|
+
methods: list[str] = Field(default_factory=list)
|
|
196
|
+
docstring: str | None = None
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class AnalysisResult(BaseModel):
|
|
200
|
+
"""Complete analysis output — deterministic, no AI needed.
|
|
201
|
+
|
|
202
|
+
Attributes:
|
|
203
|
+
scan_summary: Aggregate scan statistics.
|
|
204
|
+
confidence_distribution: Count of components per confidence tier.
|
|
205
|
+
categories: Count of components per category.
|
|
206
|
+
components: All analyzed components.
|
|
207
|
+
module_groups: Components grouped by source file (for parallel AI synthesis batches).
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
scan_summary: dict[str, int | list[str]]
|
|
211
|
+
confidence_distribution: dict[str, int]
|
|
212
|
+
categories: dict[str, int]
|
|
213
|
+
components: list[AnalyzedComponent]
|
|
214
|
+
module_groups: dict[str, list[str]] = Field(default_factory=dict)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# ── Functions ─────────────────────────────────────────────────────────────
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def match_path_category(
|
|
221
|
+
file_path: str,
|
|
222
|
+
category_map: dict[str, str] | None = None,
|
|
223
|
+
) -> str | None:
|
|
224
|
+
"""Match a file path against convention-based category patterns.
|
|
225
|
+
|
|
226
|
+
Uses longest-prefix matching to ensure more specific paths
|
|
227
|
+
(e.g., "cli/commands/") win over less specific ones (e.g., "cli/").
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
file_path: Relative path to the source file.
|
|
231
|
+
category_map: Custom category map. If None, uses DEFAULT_CATEGORY_MAP.
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
Category string if a match is found, None otherwise.
|
|
235
|
+
|
|
236
|
+
Example:
|
|
237
|
+
>>> match_path_category("src/raise_cli/cli/commands/discover.py")
|
|
238
|
+
'command'
|
|
239
|
+
>>> match_path_category("src/raise_cli/unknown/foo.py")
|
|
240
|
+
"""
|
|
241
|
+
categories = category_map if category_map is not None else DEFAULT_CATEGORY_MAP
|
|
242
|
+
|
|
243
|
+
# Match on directory boundaries: pattern must be preceded by "/" or be at
|
|
244
|
+
# the start of the path. This prevents "cli/" matching "raise_cli/".
|
|
245
|
+
# Check all occurrences of the pattern (not just the first).
|
|
246
|
+
best_match: str | None = None
|
|
247
|
+
best_length = 0
|
|
248
|
+
|
|
249
|
+
for pattern, category in categories.items():
|
|
250
|
+
# Search all occurrences of pattern in file_path
|
|
251
|
+
start = 0
|
|
252
|
+
while True:
|
|
253
|
+
idx = file_path.find(pattern, start)
|
|
254
|
+
if idx < 0:
|
|
255
|
+
break
|
|
256
|
+
# Ensure directory boundary (preceded by "/" or at start)
|
|
257
|
+
if idx == 0 or file_path[idx - 1] == "/":
|
|
258
|
+
if len(pattern) > best_length:
|
|
259
|
+
best_match = category
|
|
260
|
+
best_length = len(pattern)
|
|
261
|
+
break # Found valid match for this pattern
|
|
262
|
+
start = idx + 1
|
|
263
|
+
|
|
264
|
+
return best_match
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def compute_confidence(
|
|
268
|
+
symbol: Symbol,
|
|
269
|
+
path_category: str | None,
|
|
270
|
+
) -> ConfidenceResult:
|
|
271
|
+
"""Compute deterministic confidence score for a symbol.
|
|
272
|
+
|
|
273
|
+
Scoring signals (total possible = 100):
|
|
274
|
+
- Has docstring: +30
|
|
275
|
+
- Substantial docstring (>20 chars): +10
|
|
276
|
+
- Has type annotations in signature: +10
|
|
277
|
+
- Path matches a known convention: +20
|
|
278
|
+
- Known base class in signature: +10
|
|
279
|
+
- Name follows convention: +5
|
|
280
|
+
- Parent class context (methods): +15
|
|
281
|
+
- Semantic suffix in name [C# only]: +15
|
|
282
|
+
|
|
283
|
+
Tier thresholds:
|
|
284
|
+
- High: score >= 70
|
|
285
|
+
- Medium: 40 <= score < 70
|
|
286
|
+
- Low: score < 40
|
|
287
|
+
|
|
288
|
+
C# note: XML doc comments (///) are not yet extracted by the scanner
|
|
289
|
+
(tracked in RAISE-225). Until then, Signal 1 will always be 0 for C#
|
|
290
|
+
symbols. Signals 2, 5, and 7 compensate for this gap.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
symbol: The Symbol to score.
|
|
294
|
+
path_category: Category from match_path_category(), or None.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
ConfidenceResult with score, tier, and detailed signals.
|
|
298
|
+
"""
|
|
299
|
+
score = 0
|
|
300
|
+
signals = ConfidenceSignals()
|
|
301
|
+
is_csharp = symbol.file.endswith(".cs")
|
|
302
|
+
|
|
303
|
+
# Signal 1: Has docstring (+30)
|
|
304
|
+
if symbol.docstring:
|
|
305
|
+
signals.has_docstring = True
|
|
306
|
+
signals.docstring_length = len(symbol.docstring)
|
|
307
|
+
score += 30
|
|
308
|
+
# Bonus for substantial docstring (+10)
|
|
309
|
+
if len(symbol.docstring) > 20:
|
|
310
|
+
score += 10
|
|
311
|
+
|
|
312
|
+
# Signal 2: Has type annotations in signature (+10)
|
|
313
|
+
# Python: looks for '->' (return type) or ': ' (param type hints)
|
|
314
|
+
# C#: also counts generic types '<' (e.g. Task<T>, IRequestHandler<Q,R>)
|
|
315
|
+
if is_csharp:
|
|
316
|
+
if ": " in symbol.signature or "<" in symbol.signature:
|
|
317
|
+
signals.has_type_annotations = True
|
|
318
|
+
score += 10
|
|
319
|
+
else:
|
|
320
|
+
if "->" in symbol.signature or ": " in symbol.signature:
|
|
321
|
+
signals.has_type_annotations = True
|
|
322
|
+
score += 10
|
|
323
|
+
|
|
324
|
+
# Signal 3: Path matches a known convention (+20)
|
|
325
|
+
if path_category:
|
|
326
|
+
signals.path_matches_convention = True
|
|
327
|
+
score += 20
|
|
328
|
+
|
|
329
|
+
# Signal 4: Known base class in signature (+10)
|
|
330
|
+
for base_class in BASE_CLASS_CATEGORIES:
|
|
331
|
+
if base_class in symbol.signature:
|
|
332
|
+
signals.known_base_class = base_class
|
|
333
|
+
score += 10
|
|
334
|
+
break
|
|
335
|
+
|
|
336
|
+
# Signal 5: Name follows convention (+5)
|
|
337
|
+
# Python: classes PascalCase, functions/methods snake_case
|
|
338
|
+
# C#: all public symbols are PascalCase (classes AND methods)
|
|
339
|
+
if is_csharp:
|
|
340
|
+
short_name = symbol.name.split(".")[-1] # strip namespace if present
|
|
341
|
+
if short_name and short_name[0].isupper():
|
|
342
|
+
signals.name_follows_convention = True
|
|
343
|
+
score += 5
|
|
344
|
+
elif (
|
|
345
|
+
symbol.kind == "class"
|
|
346
|
+
and symbol.name
|
|
347
|
+
and symbol.name[0].isupper()
|
|
348
|
+
or symbol.kind in ("function", "method")
|
|
349
|
+
and symbol.name.islower()
|
|
350
|
+
):
|
|
351
|
+
signals.name_follows_convention = True
|
|
352
|
+
score += 5
|
|
353
|
+
|
|
354
|
+
# Signal 6: Parent class context (+15)
|
|
355
|
+
if symbol.parent:
|
|
356
|
+
signals.parent_validated = True
|
|
357
|
+
score += 15
|
|
358
|
+
|
|
359
|
+
# Signal 7: Semantic suffix in name [C# only] (+15)
|
|
360
|
+
# Handler, Repository, Command, Query, Validator, Controller, etc.
|
|
361
|
+
# These suffixes are intentional architectural markers in C#/.NET.
|
|
362
|
+
if is_csharp:
|
|
363
|
+
short_name = symbol.name.split(".")[-1]
|
|
364
|
+
if any(short_name.endswith(s) for s in CSHARP_SEMANTIC_SUFFIXES):
|
|
365
|
+
signals.has_semantic_suffix = True
|
|
366
|
+
score += 15
|
|
367
|
+
|
|
368
|
+
# Cap at 100
|
|
369
|
+
score = min(score, 100)
|
|
370
|
+
|
|
371
|
+
# Tier assignment
|
|
372
|
+
tier: ConfidenceTier
|
|
373
|
+
if score >= 70:
|
|
374
|
+
tier = "high"
|
|
375
|
+
elif score >= 40:
|
|
376
|
+
tier = "medium"
|
|
377
|
+
else:
|
|
378
|
+
tier = "low"
|
|
379
|
+
|
|
380
|
+
return ConfidenceResult(score=score, tier=tier, signals=signals)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def extract_first_sentence(docstring: str | None) -> str:
|
|
384
|
+
"""Extract the first sentence from a docstring.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
docstring: Raw docstring text, or None.
|
|
388
|
+
|
|
389
|
+
Returns:
|
|
390
|
+
First sentence (up to first period), or first line if no period.
|
|
391
|
+
Empty string if docstring is None or empty.
|
|
392
|
+
"""
|
|
393
|
+
if not docstring:
|
|
394
|
+
return ""
|
|
395
|
+
text = docstring.strip()
|
|
396
|
+
if not text:
|
|
397
|
+
return ""
|
|
398
|
+
# Take first line
|
|
399
|
+
first_line = text.split("\n")[0].strip()
|
|
400
|
+
# If it contains a period, take up to and including the first period
|
|
401
|
+
dot_idx = first_line.find(".")
|
|
402
|
+
if dot_idx >= 0:
|
|
403
|
+
return first_line[: dot_idx + 1]
|
|
404
|
+
return first_line
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def determine_category(
|
|
408
|
+
name: str,
|
|
409
|
+
kind: str,
|
|
410
|
+
path_category: str | None,
|
|
411
|
+
base_class: str | None = None,
|
|
412
|
+
) -> str:
|
|
413
|
+
"""Determine component category using priority chain.
|
|
414
|
+
|
|
415
|
+
Priority: name override → base class → path convention → "other".
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
name: Symbol name.
|
|
419
|
+
kind: Symbol kind (class, function, etc.).
|
|
420
|
+
path_category: Category from match_path_category(), or None.
|
|
421
|
+
base_class: Known base class from confidence signals, or None.
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
Category string.
|
|
425
|
+
"""
|
|
426
|
+
# Priority 1: Name-based overrides
|
|
427
|
+
for suffix, category in NAME_CATEGORY_OVERRIDES.items():
|
|
428
|
+
if suffix == "test_" and name.startswith("test_"):
|
|
429
|
+
return category
|
|
430
|
+
if suffix == "Test" and kind == "class" and name.startswith("Test"):
|
|
431
|
+
return category
|
|
432
|
+
if suffix not in ("test_", "Test") and name.endswith(suffix):
|
|
433
|
+
return category
|
|
434
|
+
|
|
435
|
+
# Priority 2: Base class
|
|
436
|
+
if base_class and base_class in BASE_CLASS_CATEGORIES:
|
|
437
|
+
return BASE_CLASS_CATEGORIES[base_class]
|
|
438
|
+
|
|
439
|
+
# Priority 3: Path convention
|
|
440
|
+
if path_category:
|
|
441
|
+
return path_category
|
|
442
|
+
|
|
443
|
+
return "other"
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
_SOURCE_PREFIXES = ("src", "app", "lib")
|
|
447
|
+
_CODE_EXTENSIONS = {
|
|
448
|
+
".py",
|
|
449
|
+
".php",
|
|
450
|
+
".ts",
|
|
451
|
+
".tsx",
|
|
452
|
+
".js",
|
|
453
|
+
".jsx",
|
|
454
|
+
".svelte",
|
|
455
|
+
".cs",
|
|
456
|
+
".dart",
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _file_to_module(file_path: str) -> str:
|
|
461
|
+
"""Convert a file path to a dotted module path.
|
|
462
|
+
|
|
463
|
+
Strips common source prefixes (src/, app/, lib/) and known code
|
|
464
|
+
extensions (.py, .php, .ts, .tsx, .js, .jsx, .svelte).
|
|
465
|
+
|
|
466
|
+
Args:
|
|
467
|
+
file_path: Relative file path (e.g., "src/raise_cli/discovery/scanner.py"
|
|
468
|
+
or "app/Http/Controllers/UserController.php").
|
|
469
|
+
|
|
470
|
+
Returns:
|
|
471
|
+
Dotted module path (e.g., "raise_cli.discovery.scanner"
|
|
472
|
+
or "Http.Controllers.UserController").
|
|
473
|
+
"""
|
|
474
|
+
# Normalize Windows backslashes before PurePosixPath — paths may arrive
|
|
475
|
+
# with backslashes on Windows or from PHP/C# namespace-derived paths.
|
|
476
|
+
p = PurePosixPath(file_path.replace("\\", "/"))
|
|
477
|
+
parts = list(p.parts)
|
|
478
|
+
# Strip common source prefixes
|
|
479
|
+
if parts and parts[0] in _SOURCE_PREFIXES:
|
|
480
|
+
parts = parts[1:]
|
|
481
|
+
# Remove known code extension from last part
|
|
482
|
+
if parts:
|
|
483
|
+
last = PurePosixPath(parts[-1])
|
|
484
|
+
if last.suffix in _CODE_EXTENSIONS:
|
|
485
|
+
parts[-1] = last.stem
|
|
486
|
+
return ".".join(parts)
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def build_hierarchy(symbols: list[Symbol]) -> list[AnalyzedComponent]:
|
|
490
|
+
"""Fold methods into their parent classes.
|
|
491
|
+
|
|
492
|
+
Classes become single units with a methods list.
|
|
493
|
+
Standalone functions and modules remain individual units.
|
|
494
|
+
Methods with missing parent classes are dropped.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
symbols: List of Symbol objects to organize.
|
|
498
|
+
|
|
499
|
+
Returns:
|
|
500
|
+
List of AnalyzedComponent units with methods folded into classes.
|
|
501
|
+
"""
|
|
502
|
+
class_symbols: dict[str, Symbol] = {}
|
|
503
|
+
class_methods: dict[str, list[Symbol]] = {}
|
|
504
|
+
|
|
505
|
+
for s in symbols:
|
|
506
|
+
if s.kind == "class":
|
|
507
|
+
class_symbols[s.name] = s
|
|
508
|
+
class_methods.setdefault(s.name, [])
|
|
509
|
+
elif s.kind == "method" and s.parent:
|
|
510
|
+
class_methods.setdefault(s.parent, []).append(s)
|
|
511
|
+
|
|
512
|
+
units: list[AnalyzedComponent] = []
|
|
513
|
+
|
|
514
|
+
# Create class units (with methods folded in)
|
|
515
|
+
for class_name, class_sym in class_symbols.items():
|
|
516
|
+
methods = class_methods.get(class_name, [])
|
|
517
|
+
comp_id = f"comp-{_file_to_module(class_sym.file)}-{class_name}"
|
|
518
|
+
units.append(
|
|
519
|
+
AnalyzedComponent(
|
|
520
|
+
id=comp_id,
|
|
521
|
+
name=class_name,
|
|
522
|
+
kind="class",
|
|
523
|
+
file=class_sym.file,
|
|
524
|
+
line=class_sym.line,
|
|
525
|
+
signature=class_sym.signature,
|
|
526
|
+
module=_file_to_module(class_sym.file),
|
|
527
|
+
confidence=ConfidenceResult(
|
|
528
|
+
score=0, tier="low", signals=ConfidenceSignals()
|
|
529
|
+
),
|
|
530
|
+
auto_category="other",
|
|
531
|
+
auto_purpose="",
|
|
532
|
+
internal=class_name.startswith("_"),
|
|
533
|
+
methods=[m.name for m in methods],
|
|
534
|
+
docstring=class_sym.docstring,
|
|
535
|
+
)
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
# Add standalone symbols: everything that's not class or method
|
|
539
|
+
# (exclude-based routing — future kinds automatically become standalone)
|
|
540
|
+
for s in symbols:
|
|
541
|
+
if s.kind not in ("class", "method"):
|
|
542
|
+
# Use "module" as suffix for module-level entries to avoid
|
|
543
|
+
# collisions with same-named functions (e.g., test_version.py
|
|
544
|
+
# has both module "test_version" and function "test_version")
|
|
545
|
+
id_name = "module" if s.kind == "module" else s.name
|
|
546
|
+
comp_id = f"comp-{_file_to_module(s.file)}-{id_name}"
|
|
547
|
+
units.append(
|
|
548
|
+
AnalyzedComponent(
|
|
549
|
+
id=comp_id,
|
|
550
|
+
name=s.name,
|
|
551
|
+
kind=s.kind,
|
|
552
|
+
file=s.file,
|
|
553
|
+
line=s.line,
|
|
554
|
+
signature=s.signature,
|
|
555
|
+
module=_file_to_module(s.file),
|
|
556
|
+
confidence=ConfidenceResult(
|
|
557
|
+
score=0, tier="low", signals=ConfidenceSignals()
|
|
558
|
+
),
|
|
559
|
+
auto_category="other",
|
|
560
|
+
auto_purpose="",
|
|
561
|
+
internal=s.name.startswith("_"),
|
|
562
|
+
methods=[],
|
|
563
|
+
docstring=s.docstring,
|
|
564
|
+
)
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
return units
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
def _build_hierarchy_with_symbols(
|
|
571
|
+
symbols: list[Symbol],
|
|
572
|
+
) -> tuple[list[AnalyzedComponent], dict[str, Symbol]]:
|
|
573
|
+
"""Build hierarchy and return a map of component ID → original Symbol.
|
|
574
|
+
|
|
575
|
+
Used internally by analyze() to preserve original Symbol objects
|
|
576
|
+
for type-safe confidence scoring.
|
|
577
|
+
"""
|
|
578
|
+
units = build_hierarchy(symbols)
|
|
579
|
+
# Build a lookup from symbol name+file to original Symbol
|
|
580
|
+
sym_lookup: dict[tuple[str, str], Symbol] = {}
|
|
581
|
+
for s in symbols:
|
|
582
|
+
sym_lookup[(s.name, s.file)] = s
|
|
583
|
+
|
|
584
|
+
symbol_map: dict[str, Symbol] = {}
|
|
585
|
+
for unit in units:
|
|
586
|
+
original = sym_lookup.get((unit.name, unit.file))
|
|
587
|
+
if original:
|
|
588
|
+
symbol_map[unit.id] = original
|
|
589
|
+
else:
|
|
590
|
+
# Fallback: create a minimal Symbol (shouldn't happen normally)
|
|
591
|
+
symbol_map[unit.id] = Symbol(
|
|
592
|
+
name=unit.name,
|
|
593
|
+
kind="class",
|
|
594
|
+
file=unit.file,
|
|
595
|
+
line=unit.line,
|
|
596
|
+
signature=unit.signature,
|
|
597
|
+
docstring=unit.docstring,
|
|
598
|
+
)
|
|
599
|
+
return units, symbol_map
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def group_by_module(components: list[AnalyzedComponent]) -> dict[str, list[str]]:
|
|
603
|
+
"""Group component IDs by their source module file.
|
|
604
|
+
|
|
605
|
+
Each module group becomes a batch for parallel AI synthesis.
|
|
606
|
+
|
|
607
|
+
Args:
|
|
608
|
+
components: List of analyzed components.
|
|
609
|
+
|
|
610
|
+
Returns:
|
|
611
|
+
Dict mapping file path to list of component IDs.
|
|
612
|
+
"""
|
|
613
|
+
groups: dict[str, list[str]] = {}
|
|
614
|
+
for comp in components:
|
|
615
|
+
groups.setdefault(comp.file, []).append(comp.id)
|
|
616
|
+
return groups
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def analyze(
|
|
620
|
+
scan_result: ScanResult,
|
|
621
|
+
category_map: dict[str, str] | None = None,
|
|
622
|
+
) -> AnalysisResult:
|
|
623
|
+
"""Run the full deterministic analysis pipeline.
|
|
624
|
+
|
|
625
|
+
Pipeline: filter internal → build hierarchy → score confidence →
|
|
626
|
+
categorize → group by module.
|
|
627
|
+
|
|
628
|
+
Args:
|
|
629
|
+
scan_result: Raw scan output from raise discover scan.
|
|
630
|
+
category_map: Optional custom path-to-category mapping.
|
|
631
|
+
|
|
632
|
+
Returns:
|
|
633
|
+
AnalysisResult with scored, categorized, module-grouped components.
|
|
634
|
+
"""
|
|
635
|
+
all_symbols = scan_result.symbols
|
|
636
|
+
public = [s for s in all_symbols if not s.name.startswith("_")]
|
|
637
|
+
internal = [s for s in all_symbols if s.name.startswith("_")]
|
|
638
|
+
|
|
639
|
+
# Build hierarchy (fold methods into classes)
|
|
640
|
+
# Returns (units, symbol_map) so we can reuse original Symbols for scoring
|
|
641
|
+
units, symbol_map = _build_hierarchy_with_symbols(public)
|
|
642
|
+
|
|
643
|
+
# Deduplicate IDs — can occur with generated dirs (.astro/, __pycache__/)
|
|
644
|
+
# or Windows paths. Keep first occurrence, warn about duplicates.
|
|
645
|
+
import warnings
|
|
646
|
+
|
|
647
|
+
seen_ids: dict[str, str] = {}
|
|
648
|
+
deduped: list[AnalyzedComponent] = []
|
|
649
|
+
for unit in units:
|
|
650
|
+
if unit.id in seen_ids:
|
|
651
|
+
warnings.warn(
|
|
652
|
+
f"Duplicate component ID '{unit.id}' in {unit.file} "
|
|
653
|
+
f"(already seen in {seen_ids[unit.id]}) — skipping duplicate.",
|
|
654
|
+
stacklevel=2,
|
|
655
|
+
)
|
|
656
|
+
else:
|
|
657
|
+
seen_ids[unit.id] = unit.file
|
|
658
|
+
deduped.append(unit)
|
|
659
|
+
units = deduped
|
|
660
|
+
|
|
661
|
+
# Score confidence + categorize + extract purpose
|
|
662
|
+
for unit in units:
|
|
663
|
+
path_category = match_path_category(unit.file, category_map)
|
|
664
|
+
original_sym = symbol_map[unit.id]
|
|
665
|
+
conf = compute_confidence(original_sym, path_category)
|
|
666
|
+
unit.confidence = conf
|
|
667
|
+
unit.auto_category = determine_category(
|
|
668
|
+
unit.name, unit.kind, path_category, conf.signals.known_base_class
|
|
669
|
+
)
|
|
670
|
+
unit.auto_purpose = extract_first_sentence(unit.docstring)
|
|
671
|
+
|
|
672
|
+
# Aggregate statistics
|
|
673
|
+
tier_counts: dict[str, int] = {"high": 0, "medium": 0, "low": 0}
|
|
674
|
+
cat_counts: dict[str, int] = {}
|
|
675
|
+
for unit in units:
|
|
676
|
+
tier_counts[unit.confidence.tier] += 1
|
|
677
|
+
cat_counts[unit.auto_category] = cat_counts.get(unit.auto_category, 0) + 1
|
|
678
|
+
|
|
679
|
+
return AnalysisResult(
|
|
680
|
+
scan_summary={
|
|
681
|
+
"files_scanned": scan_result.files_scanned,
|
|
682
|
+
"total_symbols": len(all_symbols),
|
|
683
|
+
"public_symbols": len(public),
|
|
684
|
+
"internal_symbols": len(internal),
|
|
685
|
+
"errors": scan_result.errors,
|
|
686
|
+
},
|
|
687
|
+
confidence_distribution=tier_counts,
|
|
688
|
+
categories=cat_counts,
|
|
689
|
+
components=units,
|
|
690
|
+
module_groups=group_by_module(units),
|
|
691
|
+
)
|