rebly-sections 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/assets/data/block-patterns.csv +160 -0
  2. package/assets/data/component-library/GDPR-modal.liquid +183 -0
  3. package/assets/data/component-library/Ishi_parallaxblockstyle1.liquid +331 -0
  4. package/assets/data/component-library/_index.csv +157 -19
  5. package/assets/data/component-library/about.liquid +1557 -0
  6. package/assets/data/component-library/adv-header.liquid +344 -0
  7. package/assets/data/component-library/adv-navigation.liquid +542 -0
  8. package/assets/data/component-library/announcement-bar.liquid +42 -60
  9. package/assets/data/component-library/article.liquid +242 -0
  10. package/assets/data/component-library/axeo-perfume-cosmetics-store-shopify-theme-about.liquid +1557 -0
  11. package/assets/data/component-library/basel-gl_newsletter_pets.liquid +612 -0
  12. package/assets/data/component-library/bixbang-fullpackage-collection-template.liquid +990 -0
  13. package/assets/data/component-library/blog-sidebar-article.liquid +51 -0
  14. package/assets/data/component-library/blog-sidebar-deals.liquid +189 -0
  15. package/assets/data/component-library/blog-sidebar-instagram.liquid +126 -0
  16. package/assets/data/component-library/blog-sidebar-tags.liquid +30 -0
  17. package/assets/data/component-library/blog.liquid +371 -0
  18. package/assets/data/component-library/brands-page.liquid +114 -0
  19. package/assets/data/component-library/cake-shop-shopify-theme-for-bakery-and-cafe-home-support-blo.liquid +780 -0
  20. package/assets/data/component-library/collection-template-promotion.liquid +1139 -0
  21. package/assets/data/component-library/collection-template.liquid +146 -0
  22. package/assets/data/component-library/contact-us.liquid +663 -0
  23. package/assets/data/component-library/contact.liquid +256 -0
  24. package/assets/data/component-library/copyright_payment.liquid +95 -0
  25. package/assets/data/component-library/custom-content.liquid +832 -0
  26. package/assets/data/component-library/faq-template-3.liquid +1014 -0
  27. package/assets/data/component-library/footer-model-1.liquid +503 -0
  28. package/assets/data/component-library/footer-model-10.liquid +210 -0
  29. package/assets/data/component-library/footer-model-2.liquid +460 -0
  30. package/assets/data/component-library/footer-model-3.liquid +548 -0
  31. package/assets/data/component-library/footer-model-4.liquid +455 -0
  32. package/assets/data/component-library/footer-model-5.liquid +407 -0
  33. package/assets/data/component-library/footer-model-6.liquid +543 -0
  34. package/assets/data/component-library/footer-model-7.liquid +345 -0
  35. package/assets/data/component-library/footer-model-8.liquid +279 -0
  36. package/assets/data/component-library/footer-model-9.liquid +376 -0
  37. package/assets/data/component-library/gallery.liquid +236 -0
  38. package/assets/data/component-library/gecko-shopify-v5-7-6-nulled-manual_blog.liquid +720 -0
  39. package/assets/data/component-library/gl_newsletter_pets.liquid +612 -0
  40. package/assets/data/component-library/gp-logo-list.liquid +362 -0
  41. package/assets/data/component-library/grid-banner-type-3-b.liquid +655 -0
  42. package/assets/data/component-library/header-model-1.liquid +427 -0
  43. package/assets/data/component-library/header-model-10.liquid +599 -0
  44. package/assets/data/component-library/header-model-2.liquid +633 -0
  45. package/assets/data/component-library/header-model-3.liquid +415 -0
  46. package/assets/data/component-library/header-model-4.liquid +754 -0
  47. package/assets/data/component-library/header-model-5.liquid +562 -0
  48. package/assets/data/component-library/header-model-6.liquid +713 -0
  49. package/assets/data/component-library/header-model-7.liquid +743 -0
  50. package/assets/data/component-library/header-model-8.liquid +500 -0
  51. package/assets/data/component-library/header-model-9.liquid +506 -0
  52. package/assets/data/component-library/home-blog-posts-1.liquid +399 -0
  53. package/assets/data/component-library/home-blog-posts-2.liquid +393 -0
  54. package/assets/data/component-library/home-blog-posts-3.liquid +545 -0
  55. package/assets/data/component-library/home-brand-slider.liquid +224 -0
  56. package/assets/data/component-library/home-circled-block.liquid +332 -0
  57. package/assets/data/component-library/home-contact-block-1.liquid +395 -0
  58. package/assets/data/component-library/home-contact-block-2.liquid +372 -0
  59. package/assets/data/component-library/home-content-block-1.liquid +320 -0
  60. package/assets/data/component-library/home-donut-chart.liquid +335 -0
  61. package/assets/data/component-library/home-fade-in-banner.liquid +277 -0
  62. package/assets/data/component-library/home-faq-model.liquid +323 -0
  63. package/assets/data/component-library/home-featured-blog.liquid +1462 -0
  64. package/assets/data/component-library/home-featured-collections.liquid +484 -0
  65. package/assets/data/component-library/home-gallery-block1.liquid +276 -0
  66. package/assets/data/component-library/home-gallery-block2.liquid +396 -0
  67. package/assets/data/component-library/home-grid-banner-type-1.liquid +371 -0
  68. package/assets/data/component-library/home-grid-banner-type-2.liquid +362 -0
  69. package/assets/data/component-library/home-grid-banner-type-3.liquid +374 -0
  70. package/assets/data/component-library/home-grid-banner-type-4.liquid +900 -0
  71. package/assets/data/component-library/home-grid-banner-type-5.liquid +368 -0
  72. package/assets/data/component-library/home-grid-banner-type-6.liquid +382 -0
  73. package/assets/data/component-library/home-grid-banner-type-7.liquid +371 -0
  74. package/assets/data/component-library/home-hotspot-with-product-carousel.liquid +1425 -0
  75. package/assets/data/component-library/home-image-gallery.liquid +1087 -0
  76. package/assets/data/component-library/home-instagram.liquid +356 -0
  77. package/assets/data/component-library/home-newsletter.liquid +246 -0
  78. package/assets/data/component-library/home-number-counter.liquid +790 -0
  79. package/assets/data/component-library/home-price-table.liquid +416 -0
  80. package/assets/data/component-library/home-pricing-table.liquid +1076 -0
  81. package/assets/data/component-library/home-product-grid.liquid +413 -0
  82. package/assets/data/component-library/home-product-tab-1.liquid +528 -0
  83. package/assets/data/component-library/home-product-tab-2.liquid +342 -0
  84. package/assets/data/component-library/home-product-tab-3.liquid +357 -0
  85. package/assets/data/component-library/home-product-vertical-carousel.liquid +477 -0
  86. package/assets/data/component-library/home-quotes-1.liquid +274 -0
  87. package/assets/data/component-library/home-quotes-2.liquid +239 -0
  88. package/assets/data/component-library/home-quotes-3.liquid +244 -0
  89. package/assets/data/component-library/home-quotes-4.liquid +258 -0
  90. package/assets/data/component-library/home-slider-width-promo-images.liquid +1377 -0
  91. package/assets/data/component-library/home-slideshow-type-1.liquid +656 -0
  92. package/assets/data/component-library/home-slideshow-type-2.liquid +570 -0
  93. package/assets/data/component-library/home-specification-block-1.liquid +468 -0
  94. package/assets/data/component-library/home-specification-block-2.liquid +291 -0
  95. package/assets/data/component-library/home-specification-block-3.liquid +429 -0
  96. package/assets/data/component-library/home-support-block.liquid +392 -0
  97. package/assets/data/component-library/home-testimonial.liquid +1348 -0
  98. package/assets/data/component-library/home-video-banner.liquid +317 -0
  99. package/assets/data/component-library/home-wide-banner.liquid +327 -0
  100. package/assets/data/component-library/icon-with-content.liquid +478 -0
  101. package/assets/data/component-library/instafeed.liquid +1 -0
  102. package/assets/data/component-library/kea-ecommerce-interior-furniture-shopify-theme-about.liquid +1300 -0
  103. package/assets/data/component-library/kidslife-responsive-shopify-theme-home-number-counter.liquid +729 -0
  104. package/assets/data/component-library/logo-bar.liquid +314 -0
  105. package/assets/data/component-library/lookbook.liquid +367 -0
  106. package/assets/data/component-library/manual_blog.liquid +724 -0
  107. package/assets/data/component-library/navigation-etc.liquid +642 -0
  108. package/assets/data/component-library/newsletter.liquid +246 -0
  109. package/assets/data/component-library/order-form.liquid +96 -0
  110. package/assets/data/component-library/page-catev1-template.liquid +344 -0
  111. package/assets/data/component-library/popup_video.liquid +396 -0
  112. package/assets/data/component-library/product-sidebar-bestsellers.liquid +99 -0
  113. package/assets/data/component-library/product-sidebar-deals.liquid +158 -0
  114. package/assets/data/component-library/product-template-2.liquid +629 -0
  115. package/assets/data/component-library/product-template-3.liquid +670 -0
  116. package/assets/data/component-library/product-template-4.liquid +627 -0
  117. package/assets/data/component-library/product-template-5.liquid +652 -0
  118. package/assets/data/component-library/product-template.liquid +698 -0
  119. package/assets/data/component-library/rich-text.liquid +541 -0
  120. package/assets/data/component-library/section-countdown-v2.liquid +215 -0
  121. package/assets/data/component-library/services.liquid +596 -0
  122. package/assets/data/component-library/shipping_info.liquid +327 -0
  123. package/assets/data/component-library/sidebar-bestsellers.liquid +109 -0
  124. package/assets/data/component-library/sidebar-category.liquid +105 -0
  125. package/assets/data/component-library/sidebar-colors.liquid +104 -0
  126. package/assets/data/component-library/single_product_feature.liquid +1892 -0
  127. package/assets/data/component-library/social-links-menu.liquid +244 -0
  128. package/assets/data/component-library/someone-purchased.liquid +190 -0
  129. package/assets/data/component-library/special-offer-area.liquid +530 -0
  130. package/assets/data/component-library/theno-minimal-clean-watch-store-shopify-theme-page-catev1-te.liquid +344 -0
  131. package/assets/data/component-library/top-bar-type-1.liquid +200 -0
  132. package/assets/data/component-library/top-bar-type-10.liquid +395 -0
  133. package/assets/data/component-library/top-bar-type-11.liquid +395 -0
  134. package/assets/data/component-library/top-bar-type-2.liquid +106 -0
  135. package/assets/data/component-library/top-bar-type-3.liquid +205 -0
  136. package/assets/data/component-library/top-countdown-bar.liquid +116 -0
  137. package/assets/data/component-library/trixe-solar-responsive-shopify-template-home-image-gallery.liquid +783 -0
  138. package/assets/data/component-library/trixe-solar-responsive-shopify-template-home-pricing-table.liquid +1043 -0
  139. package/assets/data/component-library/trixe-solar-responsive-shopify-template-home-testimonial.liquid +1338 -0
  140. package/assets/data/component-library/video.liquid +511 -0
  141. package/assets/data/component-library/waffy-spices-dry-fruits-store-shopify-theme-v-1-1-contact-us.liquid +523 -0
  142. package/assets/data/design-tokens.csv +93 -57
  143. package/assets/data/schema-library.csv +48 -46
  144. package/assets/data/settings-profiles.csv +235 -0
  145. package/assets/data/shopify-best-practices.csv +58 -36
  146. package/assets/scripts/backfill-component-index.py +102 -0
  147. package/assets/scripts/core.py +30 -8
  148. package/assets/scripts/fix-schema-library.py +42 -0
  149. package/assets/scripts/kb-analyzer-helpers.py +136 -0
  150. package/assets/scripts/kb-analyzer.py +186 -0
  151. package/assets/scripts/kb-builder.py +32 -63
  152. package/assets/scripts/kb-constants.py +62 -0
  153. package/assets/scripts/kb-extractor-helpers.py +178 -0
  154. package/assets/scripts/kb-extractor.py +106 -170
  155. package/assets/scripts/kb-synthesizer.py +251 -0
  156. package/assets/scripts/quality-gate-checks.py +55 -0
  157. package/assets/scripts/quality-gate.py +56 -2
  158. package/assets/scripts/section-generator-helpers.py +74 -0
  159. package/assets/scripts/section-generator.py +59 -49
  160. package/assets/templates/generation-prompt.md +78 -14
  161. package/package.json +1 -1
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/env python3
2
+ """Parse/extract functions for kb-analyzer.py — schema, CSS, Liquid, HTML extraction."""
3
+
4
+ import importlib.util
5
+ import json
6
+ import re
7
+ from pathlib import Path
8
+
9
+ # Import classify_section from shared constants
10
+ _constants_path = Path(__file__).parent / 'kb-constants.py'
11
+ _cspec = importlib.util.spec_from_file_location('kb_constants', _constants_path)
12
+ _constants = importlib.util.module_from_spec(_cspec)
13
+ _cspec.loader.exec_module(_constants)
14
+ classify_section = _constants.classify_section
15
+
16
+ # ─── Schema Extraction ────────────────────────────────────────────────────────
17
+
18
+ SCHEMA_RE = re.compile(r'\{%-?\s*schema\s*-?%\}(.*?)\{%-?\s*endschema\s*-?%\}', re.DOTALL)
19
+
20
+
21
+ def extract_schema_data(content: str) -> dict:
22
+ """Extract structured data from {% schema %} block."""
23
+ match = SCHEMA_RE.search(content)
24
+ if not match:
25
+ return {}
26
+ try:
27
+ schema = json.loads(match.group(1).strip())
28
+ except json.JSONDecodeError:
29
+ return {}
30
+ settings = schema.get('settings', [])
31
+ blocks = schema.get('blocks', [])
32
+ return {
33
+ 'schema_name': str(schema.get('name', '')) if not isinstance(schema.get('name'), dict) else str(next(iter(schema.get('name', {}).values()), '')),
34
+ 'settings_count': len([s for s in settings if isinstance(s, dict) and s.get('type')]),
35
+ 'blocks_count': len(blocks),
36
+ 'setting_types': sorted(set(s.get('type', '') for s in settings if isinstance(s, dict) and s.get('type'))),
37
+ 'setting_ids': [s.get('id', '') for s in settings if isinstance(s, dict) and s.get('id')],
38
+ 'block_types': sorted(set(b.get('type', '') for b in blocks if isinstance(b, dict) and b.get('type'))),
39
+ 'has_presets': bool(schema.get('presets')),
40
+ 'has_app_block': any(b.get('type') == '@app' for b in blocks if isinstance(b, dict)),
41
+ }
42
+
43
+
44
+ # ─── CSS Extraction ──────────────────────────────────────────────────────────
45
+
46
+ STYLE_RE = re.compile(r'<style[^>]*>(.*?)</style>', re.DOTALL | re.IGNORECASE)
47
+ CSS_VAR_RE = re.compile(r'var\((--[\w-]+)\)')
48
+ BREAKPOINT_RE = re.compile(r'@media[^{]*?(\d{3,4})px')
49
+ CSS_CLASS_RE = re.compile(r'\.([\w-]+)\s*[{,]')
50
+
51
+
52
+ def extract_css_data(content: str) -> dict:
53
+ """Extract CSS variables, breakpoints, classes from <style> blocks."""
54
+ style_match = STYLE_RE.search(content)
55
+ css = style_match.group(1) if style_match else ''
56
+ css_vars = CSS_VAR_RE.findall(css)
57
+ prefixes = sorted(set(
58
+ re.match(r'(--[\w]+-)', v).group(1)
59
+ for v in css_vars if re.match(r'(--[\w]+-)', v)
60
+ ))
61
+ breakpoints = sorted(set(int(m) for m in BREAKPOINT_RE.findall(css)))
62
+ classes = sorted(set(CSS_CLASS_RE.findall(css)))[:20] # cap to avoid noise
63
+ return {
64
+ 'css_vars': sorted(set(css_vars)),
65
+ 'css_var_prefixes': prefixes,
66
+ 'breakpoints': breakpoints,
67
+ 'css_classes': classes,
68
+ }
69
+
70
+
71
+ # ─── Liquid Tag Extraction ───────────────────────────────────────────────────
72
+
73
+ LIQUID_TAG_RE = re.compile(r'\{%-?\s*([\w]+)')
74
+ KNOWN_TAGS = frozenset({
75
+ 'if', 'unless', 'elsif', 'else', 'endif', 'for', 'endfor',
76
+ 'assign', 'capture', 'endcapture', 'render', 'include',
77
+ 'form', 'endform', 'paginate', 'endpaginate', 'case', 'when',
78
+ 'endcase', 'comment', 'endcomment', 'liquid', 'echo', 'cycle',
79
+ 'tablerow', 'schema', 'endschema', 'style', 'endstyle',
80
+ })
81
+
82
+
83
+ def extract_liquid_tags(content: str) -> list:
84
+ """Extract known Liquid tags used in content."""
85
+ found = set(LIQUID_TAG_RE.findall(content))
86
+ return sorted(found & KNOWN_TAGS)
87
+
88
+
89
+ # ─── HTML Element Extraction ─────────────────────────────────────────────────
90
+
91
+ HTML_ELEMENT_RE = re.compile(
92
+ r'<(section|div|h[1-6]|p|a|img|video|button|span|ul|ol|li|'
93
+ r'form|input|textarea|nav|header|footer|main|article|aside)\b',
94
+ re.IGNORECASE
95
+ )
96
+
97
+
98
+ def extract_html_elements(content: str) -> list:
99
+ """Extract HTML element types used."""
100
+ return sorted(set(m.lower() for m in HTML_ELEMENT_RE.findall(content)))
101
+
102
+
103
+ # ─── Full Section Parser ─────────────────────────────────────────────────────
104
+
105
+ def parse_section(liquid_path: Path, theme_slug: str) -> dict | None:
106
+ """Parse a .liquid file and return structured record for analysis.jsonl.
107
+
108
+ Returns None if the file has no valid schema block.
109
+ """
110
+ try:
111
+ content = liquid_path.read_text(encoding='utf-8', errors='replace')
112
+ except Exception:
113
+ return None
114
+
115
+ schema = extract_schema_data(content)
116
+ if not schema:
117
+ return None # skip files without schema
118
+
119
+ css = extract_css_data(content)
120
+ tags = extract_liquid_tags(content)
121
+ elements = extract_html_elements(content)
122
+ schema_name = schema.get('schema_name', '')
123
+ # Handle i18n names like {"en": "Hero"} — extract first string value
124
+ if isinstance(schema_name, dict):
125
+ schema_name = str(next(iter(schema_name.values()), ''))
126
+ category = classify_section(liquid_path.stem, str(schema_name))
127
+
128
+ return {
129
+ 'theme_slug': theme_slug,
130
+ 'filename': liquid_path.name,
131
+ 'category': category,
132
+ **schema,
133
+ **css,
134
+ 'liquid_tags': tags,
135
+ 'html_elements': elements,
136
+ }
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ KB Analyzer — parse extracted sections, produce analysis.jsonl + aggregate-stats.json.
4
+
5
+ Usage:
6
+ python3 kb-analyzer.py --input themes/extracted/ --output themes/
7
+ python3 kb-analyzer.py --input themes/extracted/ --output themes/ --resume
8
+ """
9
+
10
+ import argparse
11
+ import importlib.util
12
+ import json
13
+ import sys
14
+ from collections import Counter, defaultdict
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+
18
+ # Import parse helpers
19
+ _helpers_path = Path(__file__).parent / 'kb-analyzer-helpers.py'
20
+ _hspec = importlib.util.spec_from_file_location('kb_analyzer_helpers', _helpers_path)
21
+ _helpers = importlib.util.module_from_spec(_hspec)
22
+ _hspec.loader.exec_module(_helpers)
23
+ parse_section = _helpers.parse_section
24
+
25
+
26
+ def load_existing_keys(jsonl_path: Path) -> set:
27
+ """Load existing theme_slug+filename keys from JSONL for resume support."""
28
+ keys = set()
29
+ if not jsonl_path.exists():
30
+ return keys
31
+ with open(jsonl_path, 'r', encoding='utf-8') as f:
32
+ for line in f:
33
+ line = line.strip()
34
+ if not line:
35
+ continue
36
+ try:
37
+ rec = json.loads(line)
38
+ keys.add(f"{rec['theme_slug']}:{rec['filename']}")
39
+ except (json.JSONDecodeError, KeyError):
40
+ pass
41
+ return keys
42
+
43
+
44
+ def analyze_all(input_dir: Path, output_dir: Path, resume: bool = True):
45
+ """Walk all extracted themes, parse sections, write JSONL + aggregate stats."""
46
+ jsonl_path = output_dir / 'analysis.jsonl'
47
+ stats_path = output_dir / 'aggregate-stats.json'
48
+
49
+ # Resume: load already-processed keys
50
+ existing_keys = load_existing_keys(jsonl_path) if resume else set()
51
+ if existing_keys:
52
+ print(f"Resume: {len(existing_keys)} records already processed")
53
+
54
+ # Find all theme directories
55
+ theme_dirs = sorted(d for d in input_dir.iterdir()
56
+ if d.is_dir() and (d / 'sections').exists())
57
+ total_themes = len(theme_dirs)
58
+ total_sections = 0
59
+ skipped = 0
60
+ errors = 0
61
+
62
+ # Stream-write JSONL
63
+ mode = 'a' if resume and existing_keys else 'w'
64
+ with open(jsonl_path, mode, encoding='utf-8') as jf:
65
+ for i, theme_dir in enumerate(theme_dirs, 1):
66
+ theme_slug = theme_dir.name
67
+ sections = sorted((theme_dir / 'sections').glob('*.liquid'))
68
+ theme_count = 0
69
+
70
+ for liquid in sections:
71
+ key = f"{theme_slug}:{liquid.name}"
72
+ if key in existing_keys:
73
+ skipped += 1
74
+ continue
75
+
76
+ record = parse_section(liquid, theme_slug)
77
+ if record:
78
+ jf.write(json.dumps(record, ensure_ascii=False) + '\n')
79
+ total_sections += 1
80
+ theme_count += 1
81
+ else:
82
+ errors += 1
83
+
84
+ if i % 50 == 0 or i == total_themes:
85
+ print(f"[{i}/{total_themes}] {theme_slug}: {theme_count} sections")
86
+
87
+ print(f"\nAnalysis complete: {total_sections} new records, {skipped} resumed, {errors} parse errors")
88
+
89
+ # Generate aggregate stats from full JSONL
90
+ aggregate_stats(jsonl_path, stats_path)
91
+
92
+
93
+ def aggregate_stats(jsonl_path: Path, stats_path: Path):
94
+ """Read JSONL, compute per-category frequency stats, write JSON."""
95
+ cat_setting_types = defaultdict(Counter)
96
+ cat_setting_ids = defaultdict(Counter)
97
+ cat_block_combos = defaultdict(Counter)
98
+ cat_css_prefixes = defaultdict(Counter)
99
+ cat_breakpoints = defaultdict(Counter)
100
+ cat_counts = Counter()
101
+ total = 0
102
+
103
+ with open(jsonl_path, 'r', encoding='utf-8') as f:
104
+ for line in f:
105
+ line = line.strip()
106
+ if not line:
107
+ continue
108
+ try:
109
+ rec = json.loads(line)
110
+ except json.JSONDecodeError:
111
+ continue
112
+
113
+ total += 1
114
+ cat = rec.get('category', 'other')
115
+ cat_counts[cat] += 1
116
+
117
+ for st in rec.get('setting_types', []):
118
+ cat_setting_types[cat][st] += 1
119
+ for sid in rec.get('setting_ids', []):
120
+ cat_setting_ids[cat][sid] += 1
121
+
122
+ block_combo = '|'.join(sorted(rec.get('block_types', [])))
123
+ if block_combo:
124
+ cat_block_combos[cat][block_combo] += 1
125
+
126
+ for prefix in rec.get('css_var_prefixes', []):
127
+ cat_css_prefixes[cat][prefix] += 1
128
+ for bp in rec.get('breakpoints', []):
129
+ cat_breakpoints[cat][str(bp)] += 1
130
+
131
+ # Build output
132
+ categories = {}
133
+ for cat in sorted(cat_counts.keys()):
134
+ count = cat_counts[cat]
135
+ categories[cat] = {
136
+ 'count': count,
137
+ 'top_setting_types': [
138
+ {'type': t, 'count': c, 'pct': int(c / count * 100)}
139
+ for t, c in cat_setting_types[cat].most_common(20)
140
+ ],
141
+ 'top_setting_ids': [
142
+ {'id': sid, 'count': c}
143
+ for sid, c in cat_setting_ids[cat].most_common(20)
144
+ ],
145
+ 'top_block_combos': [
146
+ {'combo': combo, 'count': c}
147
+ for combo, c in cat_block_combos[cat].most_common(10)
148
+ ],
149
+ 'top_css_var_prefixes': [
150
+ {'prefix': p, 'count': c}
151
+ for p, c in cat_css_prefixes[cat].most_common(10)
152
+ ],
153
+ 'breakpoint_usage': dict(cat_breakpoints[cat].most_common(10)),
154
+ }
155
+
156
+ output = {
157
+ 'generated_at': datetime.now(timezone.utc).isoformat(),
158
+ 'total_sections': total,
159
+ 'categories': categories,
160
+ }
161
+ stats_path.write_text(json.dumps(output, indent=2, ensure_ascii=False), encoding='utf-8')
162
+ print(f"Aggregate stats: {total} sections across {len(categories)} categories -> {stats_path}")
163
+
164
+
165
+ def main():
166
+ parser = argparse.ArgumentParser(description="Analyze extracted Shopify sections")
167
+ parser.add_argument('--input', default='themes/extracted/', help='Extracted themes dir')
168
+ parser.add_argument('--output', default='themes/', help='Output dir for JSONL + stats')
169
+ parser.add_argument('--resume', action='store_true', default=True,
170
+ help='Skip already-analyzed sections (default: on)')
171
+ parser.add_argument('--no-resume', action='store_true', help='Re-analyze everything')
172
+ args = parser.parse_args()
173
+
174
+ input_dir = Path(args.input)
175
+ output_dir = Path(args.output)
176
+ output_dir.mkdir(parents=True, exist_ok=True)
177
+
178
+ if not input_dir.exists():
179
+ print(f"Input dir not found: {input_dir}")
180
+ sys.exit(1)
181
+
182
+ analyze_all(input_dir, output_dir, resume=not args.no_resume)
183
+
184
+
185
+ if __name__ == '__main__':
186
+ main()
@@ -9,67 +9,20 @@ Usage:
9
9
 
10
10
  import argparse
11
11
  import csv
12
+ import importlib.util
12
13
  import json
13
14
  import re
14
15
  import shutil
15
16
  from pathlib import Path
16
17
 
17
- # ─── Section Type Classification ──────────────────────────────────────────────
18
-
19
- SECTION_TYPES = {
20
- "hero": ["hero", "banner", "slider", "slideshow", "cover", "header-image", "fullscreen"],
21
- "about": ["about", "team", "story", "brand", "mission", "who-we-are"],
22
- "features": ["feature", "benefit", "highlight", "why-us", "column", "multi-column"],
23
- "testimonial": ["testimonial", "review", "quote", "feedback", "customer"],
24
- "faq": ["faq", "question", "accordion", "help", "support"],
25
- "cta": ["cta", "call-to-action", "promo", "promotion", "offer"],
26
- "newsletter": ["newsletter", "email", "subscribe", "signup", "form"],
27
- "product-grid": ["product", "collection", "shop", "catalog", "grid"],
28
- "blog": ["blog", "article", "post", "news", "journal"],
29
- "countdown": ["countdown", "timer", "hurry", "limited", "sale-end"],
30
- "trust": ["trust", "badge", "guarantee", "security", "payment"],
31
- "stats": ["stat", "counter", "number", "metric", "achievement"],
32
- "logo": ["logo", "brand", "partner", "client", "sponsor"],
33
- "gallery": ["gallery", "image", "photo", "portfolio", "masonry"],
34
- "video": ["video", "youtube", "vimeo", "embed", "media"],
35
- "pricing": ["pricing", "plan", "tier", "subscription"],
36
- "map": ["map", "location", "store", "contact", "direction"],
37
- "announcement": ["announcement", "bar", "notification", "alert", "strip"],
38
- "rich-text": ["rich-text", "text", "content", "wysiwyg", "body"],
39
- "collection": ["collection", "category", "department", "browse"],
40
- }
41
-
42
- EXPECTED_SETTINGS = {
43
- "hero": ["heading", "subheading", "image", "button_label", "button_link", "color_scheme", "padding", "min_height"],
44
- "about": ["heading", "content", "image", "layout", "color_scheme", "padding"],
45
- "features": ["heading", "columns", "color_scheme", "padding"],
46
- "testimonial": ["heading", "color_scheme", "padding"],
47
- "faq": ["heading", "color_scheme", "padding"],
48
- "cta": ["heading", "button_label", "button_link", "color_scheme", "padding"],
49
- "newsletter": ["heading", "button_label", "color_scheme", "padding"],
50
- "product-grid":["collection", "products_per_row", "color_scheme", "padding"],
51
- "blog": ["blog", "posts_shown", "color_scheme", "padding"],
52
- "countdown": ["heading", "end_date", "color_scheme", "padding"],
53
- "video": ["video_url", "heading", "color_scheme", "padding"],
54
- "pricing": ["heading", "color_scheme", "padding"],
55
- "gallery": ["heading", "columns", "color_scheme", "padding"],
56
- "stats": ["heading", "color_scheme", "padding"],
57
- "logo": ["heading", "color_scheme", "padding"],
58
- "announcement":["text", "link", "link_text", "color_scheme"],
59
- "rich-text": ["heading", "content", "color_scheme", "padding"],
60
- "collection": ["heading", "color_scheme", "padding"],
61
- "trust": ["heading", "color_scheme", "padding"],
62
- "map": ["heading", "address", "color_scheme", "padding"],
63
- }
64
-
65
-
66
- def classify_section(filename: str, schema_name: str) -> str:
67
- """Classify section type based on filename + schema name."""
68
- combined = (filename + " " + schema_name).lower().replace('-', ' ').replace('_', ' ')
69
- for section_type, keywords in SECTION_TYPES.items():
70
- if any(kw in combined for kw in keywords):
71
- return section_type
72
- return "other"
18
+ # Import shared constants (DRY)
19
+ _constants_path = Path(__file__).parent / 'kb-constants.py'
20
+ _cspec = importlib.util.spec_from_file_location('kb_constants', _constants_path)
21
+ _constants = importlib.util.module_from_spec(_cspec)
22
+ _cspec.loader.exec_module(_constants)
23
+ SECTION_TYPES = _constants.SECTION_TYPES
24
+ EXPECTED_SETTINGS = _constants.EXPECTED_SETTINGS
25
+ classify_section = _constants.classify_section
73
26
 
74
27
 
75
28
  def score_completeness(section_type: str, setting_ids: list) -> int:
@@ -187,14 +140,23 @@ def save_index(index_path: Path, index: dict):
187
140
  writer.writerows(rows)
188
141
 
189
142
 
143
+ SCHEMA_FIELDNAMES = ['No', 'Type', 'Category', 'Returns', 'Keywords', 'Required Props',
144
+ 'Optional Props', 'Default Behavior', 'JSON Example', 'Constraints', 'Notes']
145
+
146
+
190
147
  def save_schema_library(schema_path: Path, new_settings: list):
191
148
  """Append unique schema setting types to schema-library.csv."""
192
149
  existing_types = set()
193
- if schema_path.exists():
150
+ max_no = 0
151
+ if schema_path.exists() and schema_path.stat().st_size > 0:
194
152
  with open(schema_path, newline='', encoding='utf-8') as f:
195
153
  reader = csv.DictReader(f)
196
154
  for row in reader:
197
155
  existing_types.add(row.get('Type', ''))
156
+ try:
157
+ max_no = max(max_no, int(row.get('No', 0)))
158
+ except (ValueError, TypeError):
159
+ pass
198
160
 
199
161
  new_rows = []
200
162
  seen = set(existing_types)
@@ -203,24 +165,28 @@ def save_schema_library(schema_path: Path, new_settings: list):
203
165
  continue
204
166
  t = s.get('type', '')
205
167
  if t and t not in seen:
168
+ max_no += 1
206
169
  new_rows.append({
170
+ 'No': max_no,
207
171
  'Type': t,
208
172
  'Category': 'basic',
209
173
  'Returns': '',
210
- 'Label': s.get('label', ''),
174
+ 'Keywords': t,
211
175
  'Required Props': '',
212
176
  'Optional Props': str(s.get('info', '')),
213
- 'Default': str(s.get('default', '')),
177
+ 'Default Behavior': str(s.get('default', '')),
214
178
  'JSON Example': json.dumps({k: s.get(k) for k in ['type', 'id', 'label'] if s.get(k)}, ensure_ascii=False),
179
+ 'Constraints': '',
215
180
  'Notes': '',
216
- 'Use Case': '',
217
- 'Keywords': t,
218
181
  })
219
182
  seen.add(t)
220
183
 
221
184
  if new_rows:
185
+ file_is_empty = not schema_path.exists() or schema_path.stat().st_size == 0
222
186
  with open(schema_path, 'a', newline='', encoding='utf-8') as f:
223
- writer = csv.DictWriter(f, fieldnames=new_rows[0].keys())
187
+ writer = csv.DictWriter(f, fieldnames=SCHEMA_FIELDNAMES)
188
+ if file_is_empty:
189
+ writer.writeheader()
224
190
  for row in new_rows:
225
191
  writer.writerow(row)
226
192
 
@@ -263,6 +229,9 @@ def process_theme(theme_dir: Path, kb_dir: Path, stats: dict):
263
229
  dest = component_dir / f"{slug}.liquid"
264
230
  dest.write_text(parsed['content'], encoding='utf-8')
265
231
 
232
+ # Capture membership BEFORE mutation for accurate stats
233
+ is_new = slug not in index
234
+
266
235
  # Update index
267
236
  index[slug] = {
268
237
  'No': row_no,
@@ -284,7 +253,7 @@ def process_theme(theme_dir: Path, kb_dir: Path, stats: dict):
284
253
  # Update schema library
285
254
  save_schema_library(schema_path, parsed['settings'])
286
255
 
287
- stats['added' if slug not in index else 'updated'] += 1
256
+ stats['added' if is_new else 'updated'] += 1
288
257
  action = '🆕' if next_no > max((int(r.get('No', 0) or 0) for r in index.values() if r.get('Slug') != slug), default=0) else '⬆️'
289
258
  print(f" {action} [{section_type:15s}] {slug} (completeness: {completeness}%)")
290
259
 
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env python3
2
+ """Shared constants for KB pipeline scripts (DRY: single source of truth)."""
3
+
4
+ # Section type classification keywords
5
+ SECTION_TYPES = {
6
+ "hero": ["hero", "banner", "slider", "slideshow", "cover", "header-image", "fullscreen"],
7
+ "about": ["about", "team", "story", "brand", "mission", "who-we-are"],
8
+ "features": ["feature", "benefit", "highlight", "why-us", "column", "multi-column"],
9
+ "testimonial": ["testimonial", "review", "quote", "feedback", "customer"],
10
+ "faq": ["faq", "question", "accordion", "help", "support"],
11
+ "cta": ["cta", "call-to-action", "promo", "promotion", "offer"],
12
+ "newsletter": ["newsletter", "email", "subscribe", "signup", "form"],
13
+ "product-grid": ["product", "collection", "shop", "catalog", "grid"],
14
+ "blog": ["blog", "article", "post", "news", "journal"],
15
+ "countdown": ["countdown", "timer", "hurry", "limited", "sale-end"],
16
+ "trust": ["trust", "badge", "guarantee", "security", "payment"],
17
+ "stats": ["stat", "counter", "number", "metric", "achievement"],
18
+ "logo": ["logo", "brand", "partner", "client", "sponsor"],
19
+ "gallery": ["gallery", "image", "photo", "portfolio", "masonry"],
20
+ "video": ["video", "youtube", "vimeo", "embed", "media"],
21
+ "pricing": ["pricing", "plan", "tier", "subscription"],
22
+ "map": ["map", "location", "store", "contact", "direction"],
23
+ "announcement": ["announcement", "bar", "notification", "alert", "strip"],
24
+ "rich-text": ["rich-text", "text", "content", "wysiwyg", "body"],
25
+ "collection": ["collection", "category", "department", "browse"],
26
+ "header": ["header", "navigation", "nav", "menu", "topbar"],
27
+ "footer": ["footer", "bottom", "copyright"],
28
+ "sidebar": ["sidebar", "widget", "aside"],
29
+ }
30
+
31
+ # Expected settings per section type (for completeness scoring)
32
+ EXPECTED_SETTINGS = {
33
+ "hero": ["heading", "subheading", "image", "button_label", "button_link", "color_scheme", "padding", "min_height"],
34
+ "about": ["heading", "content", "image", "layout", "color_scheme", "padding"],
35
+ "features": ["heading", "columns", "color_scheme", "padding"],
36
+ "testimonial": ["heading", "color_scheme", "padding"],
37
+ "faq": ["heading", "color_scheme", "padding"],
38
+ "cta": ["heading", "button_label", "button_link", "color_scheme", "padding"],
39
+ "newsletter": ["heading", "button_label", "color_scheme", "padding"],
40
+ "product-grid":["collection", "products_per_row", "color_scheme", "padding"],
41
+ "blog": ["blog", "posts_shown", "color_scheme", "padding"],
42
+ "countdown": ["heading", "end_date", "color_scheme", "padding"],
43
+ "video": ["video_url", "heading", "color_scheme", "padding"],
44
+ "pricing": ["heading", "color_scheme", "padding"],
45
+ "gallery": ["heading", "columns", "color_scheme", "padding"],
46
+ "stats": ["heading", "color_scheme", "padding"],
47
+ "logo": ["heading", "color_scheme", "padding"],
48
+ "announcement":["text", "link", "link_text", "color_scheme"],
49
+ "rich-text": ["heading", "content", "color_scheme", "padding"],
50
+ "collection": ["heading", "color_scheme", "padding"],
51
+ "trust": ["heading", "color_scheme", "padding"],
52
+ "map": ["heading", "address", "color_scheme", "padding"],
53
+ }
54
+
55
+
56
+ def classify_section(filename: str, schema_name: str) -> str:
57
+ """Classify section type based on filename + schema name."""
58
+ combined = (filename + " " + schema_name).lower().replace('-', ' ').replace('_', ' ')
59
+ for section_type, keywords in SECTION_TYPES.items():
60
+ if any(kw in combined for kw in keywords):
61
+ return section_type
62
+ return "other"