component-mapper 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,122 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from component_mapper.config import MapperSettings
4
+ from component_mapper.models import MappedComponent, AstroComponent
5
+ from component_mapper.registry.signature_index import SignatureIndex
6
+ from component_mapper.registry.astro_generator import (
7
+ generate_astro_component,
8
+ generate_content_collection_schema,
9
+ COLLECTION_TYPE_TO_NAME,
10
+ )
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class AstroStage:
16
+ def __init__(self, settings: MapperSettings, index: SignatureIndex):
17
+ self._settings = settings
18
+ self._index = index
19
+
20
+ async def process(
21
+ self,
22
+ mapped: list[MappedComponent],
23
+ ) -> list[MappedComponent]:
24
+ """Enrich all MappedComponents with astro_component, write files to disk."""
25
+ if not mapped:
26
+ return []
27
+
28
+ seen_astro: dict[str, AstroComponent] = {}
29
+ enriched: list[MappedComponent] = []
30
+
31
+ for component in mapped:
32
+ if component.component_name in seen_astro:
33
+ enriched.append(
34
+ component.model_copy(
35
+ update={"astro_component": seen_astro[component.component_name]}
36
+ )
37
+ )
38
+ continue
39
+
40
+ sig = self._index.get_signature(component.component_name)
41
+ if sig is not None:
42
+ try:
43
+ from segment_classifier.models import ClassifiedSegment
44
+
45
+ seg = ClassifiedSegment(
46
+ segment_id=component.segment_id,
47
+ page_url=component.page_url,
48
+ component_type=component.component_type,
49
+ classification_stage=component.classification_stage,
50
+ fingerprint_hash=component.segment_id,
51
+ raw_html="",
52
+ )
53
+ astro = generate_astro_component(
54
+ seg, sig, component.prop_mapping, component.component_name
55
+ )
56
+ updated = component.model_copy(update={"astro_component": astro})
57
+ seen_astro[component.component_name] = astro
58
+ enriched.append(updated)
59
+ continue
60
+ except Exception as exc:
61
+ logger.debug(
62
+ "Astro enrich failed for %s: %s", component.segment_id, exc
63
+ )
64
+
65
+ enriched.append(component)
66
+ seen_astro[component.component_name] = component.astro_component
67
+
68
+ # Write to disk
69
+ astro_root = self._settings.astro_project_root
70
+ if astro_root:
71
+ await self._write_files(enriched)
72
+
73
+ if self._settings.generate_collection_schemas:
74
+ await self._attach_collection_schemas(enriched)
75
+
76
+ return enriched
77
+
78
+ async def _write_files(self, mapped: list[MappedComponent]) -> None:
79
+ astro_root = Path(self._settings.astro_project_root)
80
+ written: set[str] = set()
81
+ for component in mapped:
82
+ if component.astro_component is None:
83
+ continue
84
+ file_path = astro_root / component.astro_component.file_path
85
+ if str(file_path) in written:
86
+ continue
87
+ try:
88
+ file_path.parent.mkdir(parents=True, exist_ok=True)
89
+ file_path.write_text(component.astro_component.full_file_content)
90
+ written.add(str(file_path))
91
+ logger.debug("Wrote %s", file_path)
92
+ except Exception as exc:
93
+ logger.warning("Failed to write %s: %s", file_path, exc)
94
+
95
+ async def _attach_collection_schemas(self, mapped: list[MappedComponent]) -> None:
96
+ seen_collections: dict[str, any] = {}
97
+ for component in mapped:
98
+ ct = component.component_type
99
+ if ct not in COLLECTION_TYPE_TO_NAME:
100
+ continue
101
+ collection_name = COLLECTION_TYPE_TO_NAME[ct]
102
+ if collection_name in seen_collections:
103
+ # Reuse existing schema
104
+ try:
105
+ object.__setattr__(
106
+ component,
107
+ "content_collection_schema",
108
+ seen_collections[collection_name],
109
+ )
110
+ except Exception:
111
+ pass
112
+ continue
113
+ sig = self._index.get_signature(component.component_name)
114
+ if sig:
115
+ try:
116
+ schema = generate_content_collection_schema(
117
+ ct, component.prop_mapping, sig
118
+ )
119
+ seen_collections[collection_name] = schema
120
+ object.__setattr__(component, "content_collection_schema", schema)
121
+ except Exception as exc:
122
+ logger.debug("Schema gen failed: %s", exc)
@@ -0,0 +1,93 @@
1
+ import asyncio
2
+ import logging
3
+ from component_mapper.models import (
4
+ MappedComponent,
5
+ MappingStage,
6
+ AstroComponent,
7
+ )
8
+ from component_mapper.cache.mapping_cache import MappingCache
9
+ from component_mapper.registry.signature_index import SignatureIndex
10
+ from component_mapper.registry.astro_generator import generate_astro_component
11
+ from segment_classifier.models import ClassifiedSegment
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class CacheLookupStage:
17
+ def __init__(self, cache: MappingCache, index: SignatureIndex):
18
+ self._cache = cache
19
+ self._index = index
20
+
21
+ async def process(
22
+ self,
23
+ segments: list[ClassifiedSegment],
24
+ ) -> tuple[list[MappedComponent], list[ClassifiedSegment]]:
25
+ """Returns (cache_hits, cache_misses)."""
26
+ tasks = [self._lookup(seg) for seg in segments]
27
+ results = await asyncio.gather(*tasks)
28
+
29
+ hits: list[MappedComponent] = []
30
+ misses: list[ClassifiedSegment] = []
31
+ for seg, result in zip(segments, results):
32
+ if result is not None:
33
+ hits.append(result)
34
+ else:
35
+ misses.append(seg)
36
+
37
+ logger.info("Cache lookup: %d hits, %d misses", len(hits), len(misses))
38
+ return hits, misses
39
+
40
+ async def _lookup(self, segment: ClassifiedSegment) -> MappedComponent | None:
41
+ record = await self._cache.get(segment.fingerprint_hash)
42
+ if record is None:
43
+ return None
44
+
45
+ await self._cache.increment_hit(segment.fingerprint_hash)
46
+
47
+ # Try to rebuild AstroComponent from cached signature
48
+ sig = self._index.get_signature(record.component_name)
49
+
50
+ if sig is None:
51
+ # Build a minimal AstroComponent placeholder
52
+ astro = _minimal_astro(record.component_name)
53
+ else:
54
+ try:
55
+ astro = generate_astro_component(
56
+ segment, sig, record.prop_mapping, record.component_name
57
+ )
58
+ except Exception as exc:
59
+ logger.debug(
60
+ "Failed to regenerate astro for cache hit %s: %s",
61
+ segment.segment_id,
62
+ exc,
63
+ )
64
+ astro = _minimal_astro(record.component_name)
65
+
66
+ return MappedComponent(
67
+ segment_id=segment.segment_id,
68
+ page_url=segment.page_url,
69
+ component_type=segment.component_type,
70
+ classification_stage=segment.classification_stage,
71
+ component_name=record.component_name,
72
+ registry_source=record.registry_source,
73
+ mapping_stage=MappingStage.CACHE_HIT,
74
+ mapping_confidence=record.confidence,
75
+ prop_mapping=record.prop_mapping,
76
+ astro_component=astro,
77
+ )
78
+
79
+
80
+ def _minimal_astro(component_name: str) -> AstroComponent:
81
+ pascal = "".join(
82
+ p.capitalize() for p in component_name.replace("-", "_").split("_")
83
+ )
84
+ content = f"---\n// {pascal} (from cache)\n---\n\n<{pascal} />\n"
85
+ return AstroComponent(
86
+ component_name=pascal,
87
+ file_path=f"src/components/{pascal}.astro",
88
+ frontmatter=f"// {pascal}",
89
+ template=f"<{pascal} />",
90
+ imports=[],
91
+ full_file_content=content,
92
+ install_commands=[],
93
+ )