component-mapper 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- component_mapper/__init__.py +4 -0
- component_mapper/cache/__init__.py +0 -0
- component_mapper/cache/mapping_cache.py +72 -0
- component_mapper/config.py +247 -0
- component_mapper/mcp/__init__.py +0 -0
- component_mapper/mcp/official_client.py +182 -0
- component_mapper/mcp/registry_fetcher.py +214 -0
- component_mapper/models.py +159 -0
- component_mapper/pipeline.py +182 -0
- component_mapper/registry/__init__.py +0 -0
- component_mapper/registry/astro_generator.py +390 -0
- component_mapper/registry/custom_registry.py +127 -0
- component_mapper/registry/prop_mapper.py +370 -0
- component_mapper/registry/signature_index.py +694 -0
- component_mapper/stages/__init__.py +0 -0
- component_mapper/stages/astro_stage.py +122 -0
- component_mapper/stages/cache_lookup.py +93 -0
- component_mapper/stages/llm_mapper.py +509 -0
- component_mapper/stages/structural_match.py +145 -0
- component_mapper/utils/__init__.py +0 -0
- component_mapper/utils/similarity.py +69 -0
- component_mapper/utils/source_parser.py +292 -0
- component_mapper-0.1.0.dist-info/METADATA +16 -0
- component_mapper-0.1.0.dist-info/RECORD +25 -0
- component_mapper-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import aiohttp
|
|
7
|
+
import aiofiles
|
|
8
|
+
from component_mapper.config import RegistryConfig
|
|
9
|
+
from component_mapper.models import RegistrySource
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RegistryFetcher:
|
|
15
|
+
def __init__(self, config: RegistryConfig):
|
|
16
|
+
self.config = config
|
|
17
|
+
self._memory_cache: dict[str, dict] = {}
|
|
18
|
+
self._cache_timestamps: dict[str, float] = {}
|
|
19
|
+
self._semaphore = asyncio.Semaphore(config.max_concurrent_fetches)
|
|
20
|
+
self._disk_cache_dir = Path(".cache/registry_http")
|
|
21
|
+
self._disk_cache_dir.mkdir(parents=True, exist_ok=True)
|
|
22
|
+
|
|
23
|
+
def _cache_key(self, name: str, source: RegistrySource) -> str:
|
|
24
|
+
return f"{source.value}:{name}"
|
|
25
|
+
|
|
26
|
+
def _disk_cache_path(self, name: str, source: RegistrySource) -> Path:
|
|
27
|
+
return self._disk_cache_dir / f"{source.value}_{name}.json"
|
|
28
|
+
|
|
29
|
+
def _is_cache_fresh(self, cache_key: str) -> bool:
|
|
30
|
+
ts = self._cache_timestamps.get(cache_key, 0)
|
|
31
|
+
ttl_seconds = self.config.http_cache_ttl_hours * 3600
|
|
32
|
+
return (time.time() - ts) < ttl_seconds
|
|
33
|
+
|
|
34
|
+
def _base_url(self, source: RegistrySource) -> str:
|
|
35
|
+
if source == RegistrySource.SHADCN:
|
|
36
|
+
return self.config.shadcn_registry_base_url
|
|
37
|
+
return (
|
|
38
|
+
self.config.custom_registry_base_url or self.config.shadcn_registry_base_url
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
async def fetch_component(
|
|
42
|
+
self,
|
|
43
|
+
name: str,
|
|
44
|
+
source: RegistrySource = RegistrySource.SHADCN,
|
|
45
|
+
) -> dict:
|
|
46
|
+
cache_key = self._cache_key(name, source)
|
|
47
|
+
|
|
48
|
+
# Memory cache
|
|
49
|
+
if cache_key in self._memory_cache and self._is_cache_fresh(cache_key):
|
|
50
|
+
return self._memory_cache[cache_key]
|
|
51
|
+
|
|
52
|
+
# Disk cache
|
|
53
|
+
disk_path = self._disk_cache_path(name, source)
|
|
54
|
+
if disk_path.exists():
|
|
55
|
+
stat_age = time.time() - disk_path.stat().st_mtime
|
|
56
|
+
if stat_age < self.config.http_cache_ttl_hours * 3600:
|
|
57
|
+
try:
|
|
58
|
+
async with aiofiles.open(disk_path, "r") as f:
|
|
59
|
+
data = json.loads(await f.read())
|
|
60
|
+
self._memory_cache[cache_key] = data
|
|
61
|
+
self._cache_timestamps[cache_key] = time.time()
|
|
62
|
+
return data
|
|
63
|
+
except Exception:
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
# HTTP fetch
|
|
67
|
+
async with self._semaphore:
|
|
68
|
+
url = f"{self._base_url(source)}/{name}.json"
|
|
69
|
+
try:
|
|
70
|
+
async with aiohttp.ClientSession() as session:
|
|
71
|
+
async with session.get(
|
|
72
|
+
url,
|
|
73
|
+
timeout=aiohttp.ClientTimeout(
|
|
74
|
+
total=self.config.fetch_timeout_seconds
|
|
75
|
+
),
|
|
76
|
+
) as resp:
|
|
77
|
+
if resp.status == 200:
|
|
78
|
+
data = await resp.json(content_type=None)
|
|
79
|
+
else:
|
|
80
|
+
logger.warning(
|
|
81
|
+
"Registry fetch %s returned %d", url, resp.status
|
|
82
|
+
)
|
|
83
|
+
data = {"name": name, "files": []}
|
|
84
|
+
except Exception as exc:
|
|
85
|
+
logger.warning("Registry fetch failed for %s: %s", name, exc)
|
|
86
|
+
data = {"name": name, "files": []}
|
|
87
|
+
|
|
88
|
+
self._memory_cache[cache_key] = data
|
|
89
|
+
self._cache_timestamps[cache_key] = time.time()
|
|
90
|
+
|
|
91
|
+
# Persist to disk
|
|
92
|
+
try:
|
|
93
|
+
async with aiofiles.open(disk_path, "w") as f:
|
|
94
|
+
await f.write(json.dumps(data))
|
|
95
|
+
except Exception:
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
return data
|
|
99
|
+
|
|
100
|
+
async def fetch_many(
|
|
101
|
+
self,
|
|
102
|
+
names: list[str],
|
|
103
|
+
source: RegistrySource = RegistrySource.SHADCN,
|
|
104
|
+
) -> dict[str, dict]:
|
|
105
|
+
"""Concurrent fetch under semaphore. Returns name -> registry JSON."""
|
|
106
|
+
tasks = [self.fetch_component(n, source) for n in names]
|
|
107
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
108
|
+
return {
|
|
109
|
+
name: (r if isinstance(r, dict) else {"name": name, "files": []})
|
|
110
|
+
for name, r in zip(names, results)
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
async def fetch_source_code(self, name: str) -> str:
|
|
114
|
+
"""Extract TypeScript source from registry JSON files array."""
|
|
115
|
+
data = await self.fetch_component(name, RegistrySource.SHADCN)
|
|
116
|
+
files = data.get("files", [])
|
|
117
|
+
if files:
|
|
118
|
+
return files[0].get("content", "")
|
|
119
|
+
return ""
|
|
120
|
+
|
|
121
|
+
async def fetch_from_external(
|
|
122
|
+
self,
|
|
123
|
+
url_template: str,
|
|
124
|
+
component_name: str,
|
|
125
|
+
registry_name: str,
|
|
126
|
+
) -> dict:
|
|
127
|
+
"""Fetch a component from an external registry using its URL template.
|
|
128
|
+
|
|
129
|
+
url_template uses {name} as the placeholder e.g.
|
|
130
|
+
"https://bundui.io/r/{name}.json" → "https://bundui.io/r/pagination.json"
|
|
131
|
+
"""
|
|
132
|
+
cache_key = f"external:{registry_name}:{component_name}"
|
|
133
|
+
|
|
134
|
+
if cache_key in self._memory_cache and self._is_cache_fresh(cache_key):
|
|
135
|
+
return self._memory_cache[cache_key]
|
|
136
|
+
|
|
137
|
+
disk_path = self._disk_cache_dir / f"ext_{registry_name}_{component_name}.json"
|
|
138
|
+
if disk_path.exists():
|
|
139
|
+
stat_age = time.time() - disk_path.stat().st_mtime
|
|
140
|
+
if stat_age < self.config.http_cache_ttl_hours * 3600:
|
|
141
|
+
try:
|
|
142
|
+
async with aiofiles.open(disk_path, "r") as f:
|
|
143
|
+
data = json.loads(await f.read())
|
|
144
|
+
self._memory_cache[cache_key] = data
|
|
145
|
+
self._cache_timestamps[cache_key] = time.time()
|
|
146
|
+
return data
|
|
147
|
+
except Exception:
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
url = url_template.replace("{name}", component_name)
|
|
151
|
+
async with self._semaphore:
|
|
152
|
+
try:
|
|
153
|
+
async with aiohttp.ClientSession() as session:
|
|
154
|
+
async with session.get(
|
|
155
|
+
url,
|
|
156
|
+
timeout=aiohttp.ClientTimeout(
|
|
157
|
+
total=self.config.fetch_timeout_seconds
|
|
158
|
+
),
|
|
159
|
+
) as resp:
|
|
160
|
+
if resp.status == 200:
|
|
161
|
+
data = await resp.json(content_type=None)
|
|
162
|
+
logger.debug(
|
|
163
|
+
"Fetched %s/%s from external registry",
|
|
164
|
+
registry_name, component_name,
|
|
165
|
+
)
|
|
166
|
+
else:
|
|
167
|
+
logger.warning(
|
|
168
|
+
"External registry %s returned %d for %s",
|
|
169
|
+
registry_name, resp.status, component_name,
|
|
170
|
+
)
|
|
171
|
+
data = {"name": component_name, "files": []}
|
|
172
|
+
except Exception as exc:
|
|
173
|
+
logger.warning(
|
|
174
|
+
"External registry fetch failed %s/%s: %s",
|
|
175
|
+
registry_name, component_name, exc,
|
|
176
|
+
)
|
|
177
|
+
data = {"name": component_name, "files": []}
|
|
178
|
+
|
|
179
|
+
self._memory_cache[cache_key] = data
|
|
180
|
+
self._cache_timestamps[cache_key] = time.time()
|
|
181
|
+
try:
|
|
182
|
+
async with aiofiles.open(disk_path, "w") as f:
|
|
183
|
+
await f.write(json.dumps(data))
|
|
184
|
+
except Exception:
|
|
185
|
+
pass
|
|
186
|
+
|
|
187
|
+
return data
|
|
188
|
+
|
|
189
|
+
async def fetch_all_external(
|
|
190
|
+
self,
|
|
191
|
+
external_registries: list,
|
|
192
|
+
) -> dict[str, dict]:
|
|
193
|
+
"""Fetch all components from all external registries concurrently.
|
|
194
|
+
|
|
195
|
+
Returns dict keyed by "registry_name/component_name".
|
|
196
|
+
"""
|
|
197
|
+
tasks = {}
|
|
198
|
+
for reg in external_registries:
|
|
199
|
+
if not reg.open_source:
|
|
200
|
+
continue
|
|
201
|
+
for comp in reg.components:
|
|
202
|
+
key = f"{reg.name}/{comp}"
|
|
203
|
+
tasks[key] = self.fetch_from_external(
|
|
204
|
+
reg.url_template, comp, reg.name
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
if not tasks:
|
|
208
|
+
return {}
|
|
209
|
+
|
|
210
|
+
results = await asyncio.gather(*tasks.values(), return_exceptions=True)
|
|
211
|
+
return {
|
|
212
|
+
key: (r if isinstance(r, dict) else {"name": key, "files": []})
|
|
213
|
+
for key, r in zip(tasks.keys(), results)
|
|
214
|
+
}
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Any
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
from segment_classifier.models import ClassifiedSegment, ComponentType
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MappingStage(str, Enum):
|
|
8
|
+
CACHE_HIT = "cache_hit"
|
|
9
|
+
STRUCTURAL_MATCH = "structural_match"
|
|
10
|
+
LLM_MAPPED = "llm_mapped"
|
|
11
|
+
LLM_NOVEL = "llm_novel"
|
|
12
|
+
UNRESOLVED = "unresolved"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class InteractivityMode(str, Enum):
|
|
16
|
+
STATIC = "static"
|
|
17
|
+
INTERACTIVE = "interactive"
|
|
18
|
+
PARTIAL = "partial"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RegistrySource(str, Enum):
|
|
22
|
+
SHADCN = "shadcn"
|
|
23
|
+
CUSTOM = "custom"
|
|
24
|
+
NOVEL = "novel"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class PropDefinition(BaseModel):
|
|
28
|
+
name: str
|
|
29
|
+
type: str
|
|
30
|
+
required: bool = False
|
|
31
|
+
default_value: str | None = None
|
|
32
|
+
description: str = ""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ComponentSignature(BaseModel):
|
|
36
|
+
component_name: str
|
|
37
|
+
registry_source: RegistrySource
|
|
38
|
+
|
|
39
|
+
dom_skeleton: str
|
|
40
|
+
root_element: str
|
|
41
|
+
required_children: list[str]
|
|
42
|
+
optional_children: list[str]
|
|
43
|
+
structural_class_tokens: list[str]
|
|
44
|
+
typical_nesting_depth: int
|
|
45
|
+
child_tag_counts: dict[str, int]
|
|
46
|
+
unique_tag_count: int
|
|
47
|
+
|
|
48
|
+
compatible_component_types: list[ComponentType]
|
|
49
|
+
interactivity: InteractivityMode = InteractivityMode.STATIC
|
|
50
|
+
description: str = ""
|
|
51
|
+
|
|
52
|
+
props: list[PropDefinition] = Field(default_factory=list)
|
|
53
|
+
|
|
54
|
+
astro_import: str
|
|
55
|
+
install_command: str
|
|
56
|
+
requires_client_directive: bool = False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class CustomComponentDefinition(BaseModel):
|
|
60
|
+
name: str
|
|
61
|
+
dom_skeleton: str
|
|
62
|
+
structural_class_tokens: list[str]
|
|
63
|
+
compatible_component_types: list[ComponentType]
|
|
64
|
+
props: list[PropDefinition]
|
|
65
|
+
astro_import: str
|
|
66
|
+
install_command: str = ""
|
|
67
|
+
interactivity: InteractivityMode = InteractivityMode.STATIC
|
|
68
|
+
description: str = ""
|
|
69
|
+
source: str = "manual"
|
|
70
|
+
confidence: float = 1.0
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class RankedCandidate(BaseModel):
|
|
74
|
+
component_name: str
|
|
75
|
+
registry_source: RegistrySource
|
|
76
|
+
signature: ComponentSignature
|
|
77
|
+
|
|
78
|
+
structural_score: float = Field(ge=0.0, le=1.0)
|
|
79
|
+
type_score: float = Field(ge=0.0, le=1.0)
|
|
80
|
+
class_token_score: float = Field(ge=0.0, le=1.0)
|
|
81
|
+
composite_score: float = Field(ge=0.0, le=1.0)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class PropMapping(BaseModel):
|
|
85
|
+
mappings: list[dict[str, Any]] = Field(default_factory=list)
|
|
86
|
+
has_ambiguous: bool = False
|
|
87
|
+
unmapped_props: list[str] = Field(default_factory=list)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class AstroImport(BaseModel):
|
|
91
|
+
identifier: str
|
|
92
|
+
source: str
|
|
93
|
+
is_default: bool = False
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class AstroComponent(BaseModel):
|
|
97
|
+
component_name: str
|
|
98
|
+
file_path: str
|
|
99
|
+
frontmatter: str
|
|
100
|
+
template: str
|
|
101
|
+
imports: list[AstroImport]
|
|
102
|
+
full_file_content: str
|
|
103
|
+
install_commands: list[str]
|
|
104
|
+
client_directive: str | None = None
|
|
105
|
+
is_collection_item: bool = False
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class ContentCollectionSchema(BaseModel):
|
|
109
|
+
collection_name: str
|
|
110
|
+
zod_schema: str
|
|
111
|
+
example_entry: str
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class MappingCacheRecord(BaseModel):
|
|
115
|
+
fingerprint_hash: str
|
|
116
|
+
component_name: str
|
|
117
|
+
registry_source: RegistrySource
|
|
118
|
+
prop_mapping: PropMapping
|
|
119
|
+
mapping_stage: MappingStage
|
|
120
|
+
confidence: float
|
|
121
|
+
hit_count: int = 1
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class MappedComponent(BaseModel):
|
|
125
|
+
segment_id: str
|
|
126
|
+
page_url: str
|
|
127
|
+
component_type: ComponentType
|
|
128
|
+
classification_stage: str
|
|
129
|
+
|
|
130
|
+
component_name: str
|
|
131
|
+
registry_source: RegistrySource
|
|
132
|
+
mapping_stage: MappingStage
|
|
133
|
+
mapping_confidence: float
|
|
134
|
+
|
|
135
|
+
prop_mapping: PropMapping
|
|
136
|
+
|
|
137
|
+
astro_component: AstroComponent
|
|
138
|
+
content_collection_schema: ContentCollectionSchema | None = None
|
|
139
|
+
|
|
140
|
+
llm_model_used: str | None = None
|
|
141
|
+
llm_reasoning: str | None = None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class PipelineRunResult(BaseModel):
|
|
145
|
+
total_segments: int
|
|
146
|
+
mapped: list[MappedComponent]
|
|
147
|
+
unresolved: list[ClassifiedSegment]
|
|
148
|
+
|
|
149
|
+
stage_breakdown: dict[MappingStage, int]
|
|
150
|
+
|
|
151
|
+
llm_calls_made: int
|
|
152
|
+
llm_model_usage: dict[str, int]
|
|
153
|
+
mcp_calls_made: int
|
|
154
|
+
|
|
155
|
+
cache_hit_rate: float
|
|
156
|
+
structural_match_rate: float
|
|
157
|
+
|
|
158
|
+
install_commands: list[str]
|
|
159
|
+
unique_components_used: list[str]
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
from component_mapper.config import MapperSettings
|
|
4
|
+
from component_mapper.models import (
|
|
5
|
+
PipelineRunResult,
|
|
6
|
+
MappingStage,
|
|
7
|
+
)
|
|
8
|
+
from component_mapper.mcp.official_client import OfficialMCPClient
|
|
9
|
+
from component_mapper.mcp.registry_fetcher import RegistryFetcher
|
|
10
|
+
from component_mapper.registry.signature_index import SignatureIndex
|
|
11
|
+
from component_mapper.registry.custom_registry import CustomRegistry
|
|
12
|
+
from component_mapper.cache.mapping_cache import MappingCache
|
|
13
|
+
from component_mapper.stages.cache_lookup import CacheLookupStage
|
|
14
|
+
from component_mapper.stages.structural_match import StructuralMatchStage
|
|
15
|
+
from component_mapper.stages.llm_mapper import LLMMapperStage
|
|
16
|
+
from component_mapper.stages.astro_stage import AstroStage
|
|
17
|
+
from segment_classifier.models import ClassifiedSegment
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MapperPipeline:
|
|
23
|
+
def __init__(self, settings: MapperSettings):
|
|
24
|
+
self.settings = settings
|
|
25
|
+
self.mcp_client = OfficialMCPClient(settings.mcp)
|
|
26
|
+
self.fetcher = RegistryFetcher(settings.registry)
|
|
27
|
+
self.custom_registry = CustomRegistry()
|
|
28
|
+
self.mapping_cache = MappingCache(
|
|
29
|
+
settings.mapping_cache.cache_path,
|
|
30
|
+
settings.mapping_cache.auto_persist_every,
|
|
31
|
+
)
|
|
32
|
+
self.signature_index = SignatureIndex(settings, self.fetcher, self.mcp_client)
|
|
33
|
+
|
|
34
|
+
self.cache_stage = CacheLookupStage(self.mapping_cache, self.signature_index)
|
|
35
|
+
self.structural_stage = StructuralMatchStage(
|
|
36
|
+
self.signature_index, self.mapping_cache
|
|
37
|
+
)
|
|
38
|
+
self.llm_stage = LLMMapperStage(
|
|
39
|
+
settings, self.signature_index, self.custom_registry, self.mapping_cache
|
|
40
|
+
)
|
|
41
|
+
self.astro_stage = AstroStage(settings, self.signature_index)
|
|
42
|
+
|
|
43
|
+
async def initialize(self) -> None:
|
|
44
|
+
"""One-time setup. Call before run()."""
|
|
45
|
+
logger.info("Initializing MapperPipeline")
|
|
46
|
+
|
|
47
|
+
# Connect MCP (non-fatal if unavailable)
|
|
48
|
+
await self.mcp_client.connect()
|
|
49
|
+
|
|
50
|
+
# Load caches
|
|
51
|
+
await asyncio.gather(
|
|
52
|
+
self.mapping_cache.load(),
|
|
53
|
+
self.custom_registry.load(
|
|
54
|
+
self.settings.signature_index.custom_registry_path
|
|
55
|
+
),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Build signature index (uses cache if fresh, else rebuilds)
|
|
59
|
+
await self.signature_index.build()
|
|
60
|
+
|
|
61
|
+
# Merge custom components into index (priority: custom first)
|
|
62
|
+
self.signature_index.merge_custom(self.custom_registry.get_all())
|
|
63
|
+
|
|
64
|
+
logger.info(
|
|
65
|
+
"Pipeline initialized: %d components in index, %d mapping cache records",
|
|
66
|
+
len(self.signature_index.get_all_component_names()),
|
|
67
|
+
self.mapping_cache.size,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
async def run(self, segments: list[ClassifiedSegment]) -> PipelineRunResult:
|
|
71
|
+
"""Map all segments. Returns PipelineRunResult."""
|
|
72
|
+
if not segments:
|
|
73
|
+
return PipelineRunResult(
|
|
74
|
+
total_segments=0,
|
|
75
|
+
mapped=[],
|
|
76
|
+
unresolved=[],
|
|
77
|
+
stage_breakdown={s: 0 for s in MappingStage},
|
|
78
|
+
llm_calls_made=0,
|
|
79
|
+
llm_model_usage={},
|
|
80
|
+
mcp_calls_made=self.mcp_client.calls_made,
|
|
81
|
+
cache_hit_rate=0.0,
|
|
82
|
+
structural_match_rate=0.0,
|
|
83
|
+
install_commands=[],
|
|
84
|
+
unique_components_used=[],
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
total = len(segments)
|
|
88
|
+
logger.info("Starting pipeline run: %d segments", total)
|
|
89
|
+
|
|
90
|
+
# Stage 1: Cache lookup
|
|
91
|
+
hits, misses = await self.cache_stage.process(segments)
|
|
92
|
+
|
|
93
|
+
# Stage 2: Structural match
|
|
94
|
+
direct_matches, ambiguous, novel = await self.structural_stage.process(misses)
|
|
95
|
+
|
|
96
|
+
# Stage 3: LLM mapping
|
|
97
|
+
llm_mapped, unresolved = await self.llm_stage.process(ambiguous, novel)
|
|
98
|
+
|
|
99
|
+
# Stage 4: Astro generation
|
|
100
|
+
all_mapped = hits + direct_matches + llm_mapped
|
|
101
|
+
all_mapped = await self.astro_stage.process(all_mapped)
|
|
102
|
+
|
|
103
|
+
# Collect install manifest
|
|
104
|
+
install_set: set[str] = set()
|
|
105
|
+
component_names: set[str] = set()
|
|
106
|
+
for comp in all_mapped:
|
|
107
|
+
if comp.astro_component:
|
|
108
|
+
for cmd in comp.astro_component.install_commands:
|
|
109
|
+
if cmd:
|
|
110
|
+
install_set.add(cmd)
|
|
111
|
+
component_names.add(comp.component_name)
|
|
112
|
+
|
|
113
|
+
# Stage breakdown
|
|
114
|
+
breakdown: dict[MappingStage, int] = {s: 0 for s in MappingStage}
|
|
115
|
+
for comp in all_mapped:
|
|
116
|
+
breakdown[comp.mapping_stage] = breakdown.get(comp.mapping_stage, 0) + 1
|
|
117
|
+
breakdown[MappingStage.UNRESOLVED] = len(unresolved)
|
|
118
|
+
|
|
119
|
+
miss_count = len(misses)
|
|
120
|
+
structural_match_rate = len(direct_matches) / miss_count if miss_count else 0.0
|
|
121
|
+
|
|
122
|
+
logger.info(
|
|
123
|
+
"Pipeline complete: %d mapped, %d unresolved | "
|
|
124
|
+
"cache=%.0f%% structural=%.0f%% llm=%d calls",
|
|
125
|
+
len(all_mapped),
|
|
126
|
+
len(unresolved),
|
|
127
|
+
(len(hits) / total * 100) if total else 0,
|
|
128
|
+
structural_match_rate * 100,
|
|
129
|
+
self.llm_stage.calls_made,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return PipelineRunResult(
|
|
133
|
+
total_segments=total,
|
|
134
|
+
mapped=all_mapped,
|
|
135
|
+
unresolved=unresolved,
|
|
136
|
+
stage_breakdown=breakdown,
|
|
137
|
+
llm_calls_made=self.llm_stage.calls_made,
|
|
138
|
+
llm_model_usage=self.llm_stage.get_model_usage(),
|
|
139
|
+
mcp_calls_made=self.mcp_client.calls_made,
|
|
140
|
+
cache_hit_rate=len(hits) / total if total else 0.0,
|
|
141
|
+
structural_match_rate=structural_match_rate,
|
|
142
|
+
install_commands=sorted(install_set),
|
|
143
|
+
unique_components_used=sorted(component_names),
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
async def shutdown(self) -> None:
|
|
147
|
+
"""Persist state and install components. Call after run()."""
|
|
148
|
+
logger.info("Shutting down pipeline")
|
|
149
|
+
|
|
150
|
+
# Persist caches
|
|
151
|
+
await asyncio.gather(
|
|
152
|
+
self.mapping_cache.persist(),
|
|
153
|
+
self.custom_registry.persist(
|
|
154
|
+
self.settings.signature_index.custom_registry_path
|
|
155
|
+
),
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# Install unique shadcn components via MCP
|
|
159
|
+
unique_shadcn = [
|
|
160
|
+
name
|
|
161
|
+
for name in self.signature_index.get_all_component_names()
|
|
162
|
+
if (
|
|
163
|
+
self.signature_index.get_signature(name) is not None
|
|
164
|
+
and self.signature_index.get_signature(name).registry_source.value
|
|
165
|
+
== "shadcn"
|
|
166
|
+
)
|
|
167
|
+
]
|
|
168
|
+
|
|
169
|
+
if unique_shadcn and self.mcp_client._connected:
|
|
170
|
+
try:
|
|
171
|
+
results = await self.mcp_client.install_components(unique_shadcn)
|
|
172
|
+
success_count = sum(1 for v in results.values() if v)
|
|
173
|
+
logger.info(
|
|
174
|
+
"Installed %d/%d shadcn components",
|
|
175
|
+
success_count,
|
|
176
|
+
len(unique_shadcn),
|
|
177
|
+
)
|
|
178
|
+
except Exception as exc:
|
|
179
|
+
logger.warning("Component install failed: %s", exc)
|
|
180
|
+
|
|
181
|
+
await self.mcp_client.disconnect()
|
|
182
|
+
logger.info("Pipeline shutdown complete")
|
|
File without changes
|