component-mapper 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ from difflib import SequenceMatcher
2
+ from component_mapper.models import RankedCandidate
3
+
4
+
5
+ def jaccard_similarity(set_a: set, set_b: set) -> float:
6
+ """Intersection over union. Returns 1.0 if both sets are empty."""
7
+ if not set_a and not set_b:
8
+ return 1.0
9
+ union = set_a | set_b
10
+ if not union:
11
+ return 0.0
12
+ return len(set_a & set_b) / len(union)
13
+
14
+
15
+ def skeleton_similarity(skeleton_a: str, skeleton_b: str) -> float:
16
+ """Normalized edit distance on skeleton strings using SequenceMatcher."""
17
+ if not skeleton_a and not skeleton_b:
18
+ return 1.0
19
+ if not skeleton_a or not skeleton_b:
20
+ return 0.0
21
+ return SequenceMatcher(None, skeleton_a, skeleton_b).ratio()
22
+
23
+
24
+ def composite_score(
25
+ structural: float,
26
+ class_tokens: float,
27
+ type_compat: float,
28
+ weights: tuple[float, float, float] = (0.5, 0.3, 0.2),
29
+ ) -> float:
30
+ """Weighted sum of structural, class_tokens, type_compat scores."""
31
+ w_s, w_c, w_t = weights
32
+ result = w_s * structural + w_c * class_tokens + w_t * type_compat
33
+ return max(0.0, min(1.0, result))
34
+
35
+
36
+ def tfidf_cosine_similarity(vec_a: list[float], vec_b: list[float]) -> float:
37
+ """Cosine similarity between two TF-IDF vectors."""
38
+ if not vec_a or not vec_b:
39
+ return 0.0
40
+
41
+ try:
42
+ import numpy as np
43
+
44
+ a = np.array(vec_a, dtype=float)
45
+ b = np.array(vec_b, dtype=float)
46
+ norm_a = np.linalg.norm(a)
47
+ norm_b = np.linalg.norm(b)
48
+ if norm_a == 0 or norm_b == 0:
49
+ return 0.0
50
+ return float(np.dot(a, b) / (norm_a * norm_b))
51
+ except ImportError:
52
+ # Pure Python fallback
53
+ dot = sum(x * y for x, y in zip(vec_a, vec_b))
54
+ norm_a = sum(x * x for x in vec_a) ** 0.5
55
+ norm_b = sum(y * y for y in vec_b) ** 0.5
56
+ if norm_a == 0 or norm_b == 0:
57
+ return 0.0
58
+ return dot / (norm_a * norm_b)
59
+
60
+
61
+ def rank_candidates(
62
+ candidates: list[RankedCandidate],
63
+ top_k: int = 4,
64
+ min_threshold: float = 0.40,
65
+ ) -> list[RankedCandidate]:
66
+ """Sort by composite_score descending, filter below threshold, return top_k."""
67
+ filtered = [c for c in candidates if c.composite_score >= min_threshold]
68
+ filtered.sort(key=lambda c: c.composite_score, reverse=True)
69
+ return filtered[:top_k]
@@ -0,0 +1,292 @@
1
+ import re
2
+ from dataclasses import dataclass, field
3
+ from component_mapper.models import PropDefinition, InteractivityMode
4
+
5
+
6
+ STRUCTURAL_CLASS_PATTERN = re.compile(
7
+ r"\b(card|grid|list|item|hero|nav|menu|header|footer|sidebar|"
8
+ r"form|modal|badge|price|rating|carousel|pagination|search|"
9
+ r"feature|testimonial|cta|faq|pricing|article|media|table|"
10
+ r"product|blog|news|collection|section|widget)\b",
11
+ re.IGNORECASE,
12
+ )
13
+
14
+ PROPS_INTERFACE_PATTERN = re.compile(
15
+ r"(?:interface\s+\w*Props\w*|type\s+\w*Props\w*\s*=)\s*\{([^}]+)\}", re.DOTALL
16
+ )
17
+ INTERACTIVE_HOOKS_PATTERN = re.compile(
18
+ r"\b(useState|useEffect|onClick|onChange|useRef|useCallback|useReducer)\b"
19
+ )
20
+ PARTIAL_INTERACTIVE_PATTERN = re.compile(r"\b(onMouseEnter|onFocus|onBlur|onHover)\b")
21
+ RETURN_BLOCK_PATTERN = re.compile(r"\breturn\s*\((.*?)\)\s*[;]?\s*\}", re.DOTALL)
22
+ RETURN_BLOCK_ALT_PATTERN = re.compile(
23
+ r"\breturn\s*(<[A-Za-z].*?)(?=\n\s*\})", re.DOTALL
24
+ )
25
+ CONDITIONAL_COMPONENT_PATTERN = re.compile(r"\{[^}]*&&\s*<([A-Z][A-Za-z0-9]*)")
26
+ TERNARY_COMPONENT_PATTERN = re.compile(r"\?[^:]*<([A-Z][A-Za-z0-9]*)[^:]*:")
27
+ CLASSNAME_PATTERN = re.compile(r'className=["\']([^"\']+)["\']')
28
+
29
+
30
+ @dataclass
31
+ class ParsedSource:
32
+ dom_skeleton: str = ""
33
+ root_element: str = "div"
34
+ required_children: list[str] = field(default_factory=list)
35
+ optional_children: list[str] = field(default_factory=list)
36
+ structural_class_tokens: list[str] = field(default_factory=list)
37
+ typical_nesting_depth: int = 0
38
+ child_tag_counts: dict[str, int] = field(default_factory=dict)
39
+ interactivity: InteractivityMode = InteractivityMode.STATIC
40
+ props: list[PropDefinition] = field(default_factory=list)
41
+
42
+
43
+ def parse_source(source_code: str) -> ParsedSource:
44
+ """Parse TypeScript/TSX source into ParsedSource. Gracefully handles malformed input."""
45
+ result = ParsedSource()
46
+ try:
47
+ jsx_block = _extract_return_block(source_code)
48
+ if jsx_block:
49
+ result.dom_skeleton = _parse_jsx_skeleton(jsx_block, max_depth=5)
50
+ result.root_element = _extract_root_element(jsx_block)
51
+ result.typical_nesting_depth = _measure_nesting_depth(jsx_block)
52
+ result.child_tag_counts = _count_child_tags(jsx_block)
53
+ result.required_children, result.optional_children = _extract_children(
54
+ jsx_block
55
+ )
56
+
57
+ all_classnames = CLASSNAME_PATTERN.findall(source_code)
58
+ tokens = set()
59
+ for cls_string in all_classnames:
60
+ for token in STRUCTURAL_CLASS_PATTERN.findall(cls_string):
61
+ tokens.add(token.lower())
62
+ result.structural_class_tokens = sorted(tokens)
63
+
64
+ result.interactivity = _detect_interactivity(source_code)
65
+ result.props = _extract_props(source_code)
66
+ except Exception:
67
+ pass
68
+ return result
69
+
70
+
71
+ def _extract_return_block(source: str) -> str:
72
+ """Extract the JSX return() block from a component function."""
73
+ m = RETURN_BLOCK_PATTERN.search(source)
74
+ if m:
75
+ return m.group(1).strip()
76
+ m = RETURN_BLOCK_ALT_PATTERN.search(source)
77
+ if m:
78
+ return m.group(1).strip()
79
+ # Fallback: find any JSX-looking block
80
+ jsx_start = re.search(r"<[A-Z][A-Za-z]*|<[a-z]+[\s>/]", source)
81
+ if jsx_start:
82
+ return source[jsx_start.start() :]
83
+ return ""
84
+
85
+
86
+ def _extract_root_element(jsx_block: str) -> str:
87
+ """Extract outermost JSX element tag name."""
88
+ m = re.match(r"<([A-Za-z][A-Za-z0-9]*)", jsx_block.strip())
89
+ if m:
90
+ return m.group(1).lower()
91
+ return "div"
92
+
93
+
94
+ def _parse_jsx_skeleton(jsx: str, max_depth: int) -> str:
95
+ """Recursively parse JSX into skeleton string."""
96
+ # Build token stream
97
+ tokens = []
98
+ for m in re.finditer(r"<(/?)([A-Za-z][A-Za-z0-9]*)([^>]*)(/?)>", jsx):
99
+ is_close = m.group(1) == "/"
100
+ tag = m.group(2)
101
+ # attrs = m.group(3) - Unused
102
+
103
+ is_self_close = m.group(4) == "/" or tag.lower() in (
104
+ "input",
105
+ "img",
106
+ "br",
107
+ "hr",
108
+ "meta",
109
+ "link",
110
+ )
111
+
112
+ if is_close:
113
+ tokens.append(("close", tag, m.start()))
114
+ elif is_self_close:
115
+ tokens.append(("self", tag, m.start()))
116
+ else:
117
+ tokens.append(("open", tag, m.start()))
118
+
119
+ if not tokens:
120
+ return ""
121
+
122
+ def build_tree(pos: int, depth: int) -> tuple[str, int]:
123
+ if pos >= len(tokens) or depth > max_depth:
124
+ return "", pos
125
+
126
+ kind, tag, _ = tokens[pos]
127
+ tag_lower = tag.lower()
128
+
129
+ if kind == "close":
130
+ return "", pos
131
+
132
+ if kind == "self":
133
+ return tag_lower, pos + 1
134
+
135
+ # kind == 'open'
136
+ children = []
137
+ i = pos + 1
138
+ while i < len(tokens):
139
+ k, t, _ = tokens[i]
140
+ if k == "close" and t.lower() == tag_lower:
141
+ i += 1
142
+ break
143
+ child_str, i = build_tree(i, depth + 1)
144
+ if child_str:
145
+ children.append(child_str)
146
+
147
+ if not children:
148
+ return tag_lower, i
149
+ elif len(children) == 1:
150
+ return f"{tag_lower}>{children[0]}", i
151
+ else:
152
+ return f"{tag_lower}>[{'+'.join(children)}]", i
153
+
154
+ result, _ = build_tree(0, 0)
155
+ return result
156
+
157
+
158
+ def _measure_nesting_depth(jsx: str) -> int:
159
+ """Count max nesting depth of JSX tags."""
160
+ depth = 0
161
+ max_depth = 0
162
+ for m in re.finditer(r"<(/?)([A-Za-z][A-Za-z0-9]*)([^>]*)(/?)>", jsx):
163
+ is_close = m.group(1) == "/"
164
+ is_self = m.group(4) == "/" or m.group(2).lower() in (
165
+ "input",
166
+ "img",
167
+ "br",
168
+ "hr",
169
+ )
170
+ if is_close:
171
+ depth = max(0, depth - 1)
172
+ elif not is_self:
173
+ depth += 1
174
+ max_depth = max(max_depth, depth)
175
+ return max_depth
176
+
177
+
178
+ def _count_child_tags(jsx: str) -> dict[str, int]:
179
+ """Count occurrences of each lowercase HTML tag."""
180
+ counts: dict[str, int] = {}
181
+ html_tags = {
182
+ "div",
183
+ "span",
184
+ "p",
185
+ "h1",
186
+ "h2",
187
+ "h3",
188
+ "h4",
189
+ "h5",
190
+ "h6",
191
+ "a",
192
+ "img",
193
+ "button",
194
+ "input",
195
+ "form",
196
+ "ul",
197
+ "ol",
198
+ "li",
199
+ "table",
200
+ "tr",
201
+ "td",
202
+ "th",
203
+ "section",
204
+ "article",
205
+ "nav",
206
+ "header",
207
+ "footer",
208
+ "main",
209
+ "aside",
210
+ "figure",
211
+ "figcaption",
212
+ "label",
213
+ "select",
214
+ "textarea",
215
+ "picture",
216
+ "source",
217
+ }
218
+ for m in re.finditer(r"<([A-Za-z][A-Za-z0-9]*)", jsx):
219
+ tag = m.group(1).lower()
220
+ if tag in html_tags:
221
+ counts[tag] = counts.get(tag, 0) + 1
222
+ return counts
223
+
224
+
225
+ def _extract_children(jsx: str) -> tuple[list[str], list[str]]:
226
+ """Extract required and optional sub-component children."""
227
+ # All capitalized component refs
228
+ all_components = set(re.findall(r"<([A-Z][A-Za-z0-9]+)", jsx))
229
+
230
+ # Optional: inside conditional expressions
231
+ optional_set = set()
232
+ for m in CONDITIONAL_COMPONENT_PATTERN.finditer(jsx):
233
+ optional_set.add(m.group(1))
234
+ for m in TERNARY_COMPONENT_PATTERN.finditer(jsx):
235
+ optional_set.add(m.group(1))
236
+
237
+ required = sorted(all_components - optional_set)
238
+ optional = sorted(optional_set & all_components)
239
+ return required, optional
240
+
241
+
242
+ def _extract_props(source: str) -> list[PropDefinition]:
243
+ """Parse Props interface/type into PropDefinition list."""
244
+ m = PROPS_INTERFACE_PATTERN.search(source)
245
+ if not m:
246
+ return []
247
+
248
+ body = m.group(1)
249
+ props = []
250
+
251
+ # Split on both newlines and semicolons to handle single-line interfaces
252
+ raw_fields = re.split(r"[;\n]", body)
253
+ for raw_field in raw_fields:
254
+ raw_field = raw_field.strip()
255
+ if not raw_field or raw_field.startswith("//") or raw_field.startswith("*"):
256
+ continue
257
+
258
+ # Match: propName?: Type or propName: Type
259
+ pm = re.match(r"(\w+)(\?)?\s*:\s*(.+?)(?:,)?\s*$", raw_field)
260
+ if not pm:
261
+ continue
262
+
263
+ name = pm.group(1)
264
+ optional = pm.group(2) == "?"
265
+ type_str = pm.group(3).strip().rstrip(",")
266
+
267
+ # Extract default from JSDoc or inline comment
268
+ default_val = None
269
+ default_m = re.search(r"@default\s+(\S+)", raw_field)
270
+ if default_m:
271
+ default_val = default_m.group(1)
272
+
273
+ props.append(
274
+ PropDefinition(
275
+ name=name,
276
+ type=type_str,
277
+ required=not optional,
278
+ default_value=default_val,
279
+ description="",
280
+ )
281
+ )
282
+
283
+ return props
284
+
285
+
286
+ def _detect_interactivity(source: str) -> InteractivityMode:
287
+ """Check for React hooks/event handlers to determine interactivity."""
288
+ if INTERACTIVE_HOOKS_PATTERN.search(source):
289
+ return InteractivityMode.INTERACTIVE
290
+ if PARTIAL_INTERACTIVE_PATTERN.search(source):
291
+ return InteractivityMode.PARTIAL
292
+ return InteractivityMode.STATIC
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.4
2
+ Name: component-mapper
3
+ Version: 0.1.0
4
+ Summary: Maps ClassifiedSegment objects to Shadcn UI components and Astro wrappers
5
+ Requires-Python: >=3.12
6
+ Requires-Dist: aiofiles>=23.0
7
+ Requires-Dist: aiohttp>=3.9
8
+ Requires-Dist: beautifulsoup4>=4.12
9
+ Requires-Dist: litellm>=1.40
10
+ Requires-Dist: numpy>=1.26
11
+ Requires-Dist: page-segmenter>=0.1.2
12
+ Requires-Dist: pydantic-settings>=2.2
13
+ Requires-Dist: pydantic>=2.7
14
+ Requires-Dist: scikit-learn>=1.5
15
+ Requires-Dist: scipy>=1.13
16
+ Requires-Dist: segment-classifier>=0.1.1
@@ -0,0 +1,25 @@
1
+ component_mapper/__init__.py,sha256=kJlD6dEQKNEDSltN6rwgSdd5bC4ZVRXC11zT1neaBnk,152
2
+ component_mapper/config.py,sha256=NFhui7bMMfHv2Tbg_zxvOaYz4fX-SHlC_wX3lv282CU,38662
3
+ component_mapper/models.py,sha256=GOc_B1HlO18YGVAOHyPGMAv8c9ZpBbnN_SeDgUZ4aaU,3875
4
+ component_mapper/pipeline.py,sha256=QwvPrbQcJ7Lw9BL5Cvwog_TaV0fwtsZetX51-Ldc_lw,7039
5
+ component_mapper/cache/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ component_mapper/cache/mapping_cache.py,sha256=Z17DKZEz2uElxhBlIwcdjKAq7h-daeeIF0LpDPfyVdQ,2721
7
+ component_mapper/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ component_mapper/mcp/official_client.py,sha256=_DeuqF2GOHX0Waxxt_MMijQ1hKuRsbuppn7G7jHmffY,6925
9
+ component_mapper/mcp/registry_fetcher.py,sha256=0PFTNngKftXjwxS7EAJ1gFB5oORRbhALB7DBLcoXtdA,8171
10
+ component_mapper/registry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ component_mapper/registry/astro_generator.py,sha256=Koswgxdni0WFvVEnnMRWN2hT4UaJJJ2c_x5raBp0clc,12963
12
+ component_mapper/registry/custom_registry.py,sha256=p6oLy7petEGsrQU_BaM2yAJmLvGaLkrtPiW1fpwkivU,4582
13
+ component_mapper/registry/prop_mapper.py,sha256=Eh2sNhFn8hn0i21_tVH7gLyBeW1PY-JIFhFJzyI2eTY,11751
14
+ component_mapper/registry/signature_index.py,sha256=QKa2aGxTtT69tyS0gSYLPeod19hsvuWqqSa8yv_B_kg,24168
15
+ component_mapper/stages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ component_mapper/stages/astro_stage.py,sha256=C-aqtmekq-Ad4_Pcx4P10lU-l0JpMr7TqDbGI-ibRPc,4870
17
+ component_mapper/stages/cache_lookup.py,sha256=QvabZBZtt2bPVKD2VSzMBSOqN7AbVpmzgzm_1iVxvA4,3271
18
+ component_mapper/stages/llm_mapper.py,sha256=5Ei-ONKgFDvY7BaSCHzb2kzFyukN44guSzP_PiM9sP0,18524
19
+ component_mapper/stages/structural_match.py,sha256=VoU9Xy2oYASdvDDzOjj6_CeNTKdX7cXEtxVt-6RvwXc,5114
20
+ component_mapper/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ component_mapper/utils/similarity.py,sha256=pbSWmo21hbcRxg5tsmFshRMpic0NU1ICbugQFvhHI68,2285
22
+ component_mapper/utils/source_parser.py,sha256=oczvBLqC3Z4KxefA75qRQ3jaZjBJtxIwpK78-6qUFaI,9024
23
+ component_mapper-0.1.0.dist-info/METADATA,sha256=A_-xeaSQlRNtJKoXkeVtWNBO0h8dGOuqzGzA-3GW3Rg,522
24
+ component_mapper-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
25
+ component_mapper-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any