component-mapper 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- component_mapper/__init__.py +4 -0
- component_mapper/cache/__init__.py +0 -0
- component_mapper/cache/mapping_cache.py +72 -0
- component_mapper/config.py +247 -0
- component_mapper/mcp/__init__.py +0 -0
- component_mapper/mcp/official_client.py +182 -0
- component_mapper/mcp/registry_fetcher.py +214 -0
- component_mapper/models.py +159 -0
- component_mapper/pipeline.py +182 -0
- component_mapper/registry/__init__.py +0 -0
- component_mapper/registry/astro_generator.py +390 -0
- component_mapper/registry/custom_registry.py +127 -0
- component_mapper/registry/prop_mapper.py +370 -0
- component_mapper/registry/signature_index.py +694 -0
- component_mapper/stages/__init__.py +0 -0
- component_mapper/stages/astro_stage.py +122 -0
- component_mapper/stages/cache_lookup.py +93 -0
- component_mapper/stages/llm_mapper.py +509 -0
- component_mapper/stages/structural_match.py +145 -0
- component_mapper/utils/__init__.py +0 -0
- component_mapper/utils/similarity.py +69 -0
- component_mapper/utils/source_parser.py +292 -0
- component_mapper-0.1.0.dist-info/METADATA +16 -0
- component_mapper-0.1.0.dist-info/RECORD +25 -0
- component_mapper-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from component_mapper.config import MapperSettings
|
|
4
|
+
from component_mapper.models import MappedComponent, AstroComponent
|
|
5
|
+
from component_mapper.registry.signature_index import SignatureIndex
|
|
6
|
+
from component_mapper.registry.astro_generator import (
|
|
7
|
+
generate_astro_component,
|
|
8
|
+
generate_content_collection_schema,
|
|
9
|
+
COLLECTION_TYPE_TO_NAME,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AstroStage:
|
|
16
|
+
def __init__(self, settings: MapperSettings, index: SignatureIndex):
|
|
17
|
+
self._settings = settings
|
|
18
|
+
self._index = index
|
|
19
|
+
|
|
20
|
+
async def process(
|
|
21
|
+
self,
|
|
22
|
+
mapped: list[MappedComponent],
|
|
23
|
+
) -> list[MappedComponent]:
|
|
24
|
+
"""Enrich all MappedComponents with astro_component, write files to disk."""
|
|
25
|
+
if not mapped:
|
|
26
|
+
return []
|
|
27
|
+
|
|
28
|
+
seen_astro: dict[str, AstroComponent] = {}
|
|
29
|
+
enriched: list[MappedComponent] = []
|
|
30
|
+
|
|
31
|
+
for component in mapped:
|
|
32
|
+
if component.component_name in seen_astro:
|
|
33
|
+
enriched.append(
|
|
34
|
+
component.model_copy(
|
|
35
|
+
update={"astro_component": seen_astro[component.component_name]}
|
|
36
|
+
)
|
|
37
|
+
)
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
sig = self._index.get_signature(component.component_name)
|
|
41
|
+
if sig is not None:
|
|
42
|
+
try:
|
|
43
|
+
from segment_classifier.models import ClassifiedSegment
|
|
44
|
+
|
|
45
|
+
seg = ClassifiedSegment(
|
|
46
|
+
segment_id=component.segment_id,
|
|
47
|
+
page_url=component.page_url,
|
|
48
|
+
component_type=component.component_type,
|
|
49
|
+
classification_stage=component.classification_stage,
|
|
50
|
+
fingerprint_hash=component.segment_id,
|
|
51
|
+
raw_html="",
|
|
52
|
+
)
|
|
53
|
+
astro = generate_astro_component(
|
|
54
|
+
seg, sig, component.prop_mapping, component.component_name
|
|
55
|
+
)
|
|
56
|
+
updated = component.model_copy(update={"astro_component": astro})
|
|
57
|
+
seen_astro[component.component_name] = astro
|
|
58
|
+
enriched.append(updated)
|
|
59
|
+
continue
|
|
60
|
+
except Exception as exc:
|
|
61
|
+
logger.debug(
|
|
62
|
+
"Astro enrich failed for %s: %s", component.segment_id, exc
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
enriched.append(component)
|
|
66
|
+
seen_astro[component.component_name] = component.astro_component
|
|
67
|
+
|
|
68
|
+
# Write to disk
|
|
69
|
+
astro_root = self._settings.astro_project_root
|
|
70
|
+
if astro_root:
|
|
71
|
+
await self._write_files(enriched)
|
|
72
|
+
|
|
73
|
+
if self._settings.generate_collection_schemas:
|
|
74
|
+
await self._attach_collection_schemas(enriched)
|
|
75
|
+
|
|
76
|
+
return enriched
|
|
77
|
+
|
|
78
|
+
async def _write_files(self, mapped: list[MappedComponent]) -> None:
|
|
79
|
+
astro_root = Path(self._settings.astro_project_root)
|
|
80
|
+
written: set[str] = set()
|
|
81
|
+
for component in mapped:
|
|
82
|
+
if component.astro_component is None:
|
|
83
|
+
continue
|
|
84
|
+
file_path = astro_root / component.astro_component.file_path
|
|
85
|
+
if str(file_path) in written:
|
|
86
|
+
continue
|
|
87
|
+
try:
|
|
88
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
89
|
+
file_path.write_text(component.astro_component.full_file_content)
|
|
90
|
+
written.add(str(file_path))
|
|
91
|
+
logger.debug("Wrote %s", file_path)
|
|
92
|
+
except Exception as exc:
|
|
93
|
+
logger.warning("Failed to write %s: %s", file_path, exc)
|
|
94
|
+
|
|
95
|
+
async def _attach_collection_schemas(self, mapped: list[MappedComponent]) -> None:
|
|
96
|
+
seen_collections: dict[str, any] = {}
|
|
97
|
+
for component in mapped:
|
|
98
|
+
ct = component.component_type
|
|
99
|
+
if ct not in COLLECTION_TYPE_TO_NAME:
|
|
100
|
+
continue
|
|
101
|
+
collection_name = COLLECTION_TYPE_TO_NAME[ct]
|
|
102
|
+
if collection_name in seen_collections:
|
|
103
|
+
# Reuse existing schema
|
|
104
|
+
try:
|
|
105
|
+
object.__setattr__(
|
|
106
|
+
component,
|
|
107
|
+
"content_collection_schema",
|
|
108
|
+
seen_collections[collection_name],
|
|
109
|
+
)
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
continue
|
|
113
|
+
sig = self._index.get_signature(component.component_name)
|
|
114
|
+
if sig:
|
|
115
|
+
try:
|
|
116
|
+
schema = generate_content_collection_schema(
|
|
117
|
+
ct, component.prop_mapping, sig
|
|
118
|
+
)
|
|
119
|
+
seen_collections[collection_name] = schema
|
|
120
|
+
object.__setattr__(component, "content_collection_schema", schema)
|
|
121
|
+
except Exception as exc:
|
|
122
|
+
logger.debug("Schema gen failed: %s", exc)
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
from component_mapper.models import (
|
|
4
|
+
MappedComponent,
|
|
5
|
+
MappingStage,
|
|
6
|
+
AstroComponent,
|
|
7
|
+
)
|
|
8
|
+
from component_mapper.cache.mapping_cache import MappingCache
|
|
9
|
+
from component_mapper.registry.signature_index import SignatureIndex
|
|
10
|
+
from component_mapper.registry.astro_generator import generate_astro_component
|
|
11
|
+
from segment_classifier.models import ClassifiedSegment
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CacheLookupStage:
|
|
17
|
+
def __init__(self, cache: MappingCache, index: SignatureIndex):
|
|
18
|
+
self._cache = cache
|
|
19
|
+
self._index = index
|
|
20
|
+
|
|
21
|
+
async def process(
|
|
22
|
+
self,
|
|
23
|
+
segments: list[ClassifiedSegment],
|
|
24
|
+
) -> tuple[list[MappedComponent], list[ClassifiedSegment]]:
|
|
25
|
+
"""Returns (cache_hits, cache_misses)."""
|
|
26
|
+
tasks = [self._lookup(seg) for seg in segments]
|
|
27
|
+
results = await asyncio.gather(*tasks)
|
|
28
|
+
|
|
29
|
+
hits: list[MappedComponent] = []
|
|
30
|
+
misses: list[ClassifiedSegment] = []
|
|
31
|
+
for seg, result in zip(segments, results):
|
|
32
|
+
if result is not None:
|
|
33
|
+
hits.append(result)
|
|
34
|
+
else:
|
|
35
|
+
misses.append(seg)
|
|
36
|
+
|
|
37
|
+
logger.info("Cache lookup: %d hits, %d misses", len(hits), len(misses))
|
|
38
|
+
return hits, misses
|
|
39
|
+
|
|
40
|
+
async def _lookup(self, segment: ClassifiedSegment) -> MappedComponent | None:
|
|
41
|
+
record = await self._cache.get(segment.fingerprint_hash)
|
|
42
|
+
if record is None:
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
await self._cache.increment_hit(segment.fingerprint_hash)
|
|
46
|
+
|
|
47
|
+
# Try to rebuild AstroComponent from cached signature
|
|
48
|
+
sig = self._index.get_signature(record.component_name)
|
|
49
|
+
|
|
50
|
+
if sig is None:
|
|
51
|
+
# Build a minimal AstroComponent placeholder
|
|
52
|
+
astro = _minimal_astro(record.component_name)
|
|
53
|
+
else:
|
|
54
|
+
try:
|
|
55
|
+
astro = generate_astro_component(
|
|
56
|
+
segment, sig, record.prop_mapping, record.component_name
|
|
57
|
+
)
|
|
58
|
+
except Exception as exc:
|
|
59
|
+
logger.debug(
|
|
60
|
+
"Failed to regenerate astro for cache hit %s: %s",
|
|
61
|
+
segment.segment_id,
|
|
62
|
+
exc,
|
|
63
|
+
)
|
|
64
|
+
astro = _minimal_astro(record.component_name)
|
|
65
|
+
|
|
66
|
+
return MappedComponent(
|
|
67
|
+
segment_id=segment.segment_id,
|
|
68
|
+
page_url=segment.page_url,
|
|
69
|
+
component_type=segment.component_type,
|
|
70
|
+
classification_stage=segment.classification_stage,
|
|
71
|
+
component_name=record.component_name,
|
|
72
|
+
registry_source=record.registry_source,
|
|
73
|
+
mapping_stage=MappingStage.CACHE_HIT,
|
|
74
|
+
mapping_confidence=record.confidence,
|
|
75
|
+
prop_mapping=record.prop_mapping,
|
|
76
|
+
astro_component=astro,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _minimal_astro(component_name: str) -> AstroComponent:
|
|
81
|
+
pascal = "".join(
|
|
82
|
+
p.capitalize() for p in component_name.replace("-", "_").split("_")
|
|
83
|
+
)
|
|
84
|
+
content = f"---\n// {pascal} (from cache)\n---\n\n<{pascal} />\n"
|
|
85
|
+
return AstroComponent(
|
|
86
|
+
component_name=pascal,
|
|
87
|
+
file_path=f"src/components/{pascal}.astro",
|
|
88
|
+
frontmatter=f"// {pascal}",
|
|
89
|
+
template=f"<{pascal} />",
|
|
90
|
+
imports=[],
|
|
91
|
+
full_file_content=content,
|
|
92
|
+
install_commands=[],
|
|
93
|
+
)
|