rebly-sections 1.0.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/data/block-patterns.csv +160 -0
- package/assets/data/component-library/GDPR-modal.liquid +183 -0
- package/assets/data/component-library/Ishi_parallaxblockstyle1.liquid +331 -0
- package/assets/data/component-library/_index.csv +157 -19
- package/assets/data/component-library/about.liquid +1557 -0
- package/assets/data/component-library/adv-header.liquid +344 -0
- package/assets/data/component-library/adv-navigation.liquid +542 -0
- package/assets/data/component-library/announcement-bar.liquid +42 -60
- package/assets/data/component-library/article.liquid +242 -0
- package/assets/data/component-library/axeo-perfume-cosmetics-store-shopify-theme-about.liquid +1557 -0
- package/assets/data/component-library/basel-gl_newsletter_pets.liquid +612 -0
- package/assets/data/component-library/bixbang-fullpackage-collection-template.liquid +990 -0
- package/assets/data/component-library/blog-sidebar-article.liquid +51 -0
- package/assets/data/component-library/blog-sidebar-deals.liquid +189 -0
- package/assets/data/component-library/blog-sidebar-instagram.liquid +126 -0
- package/assets/data/component-library/blog-sidebar-tags.liquid +30 -0
- package/assets/data/component-library/blog.liquid +371 -0
- package/assets/data/component-library/brands-page.liquid +114 -0
- package/assets/data/component-library/cake-shop-shopify-theme-for-bakery-and-cafe-home-support-blo.liquid +780 -0
- package/assets/data/component-library/collection-template-promotion.liquid +1139 -0
- package/assets/data/component-library/collection-template.liquid +146 -0
- package/assets/data/component-library/contact-us.liquid +663 -0
- package/assets/data/component-library/contact.liquid +256 -0
- package/assets/data/component-library/copyright_payment.liquid +95 -0
- package/assets/data/component-library/custom-content.liquid +832 -0
- package/assets/data/component-library/faq-template-3.liquid +1014 -0
- package/assets/data/component-library/footer-model-1.liquid +503 -0
- package/assets/data/component-library/footer-model-10.liquid +210 -0
- package/assets/data/component-library/footer-model-2.liquid +460 -0
- package/assets/data/component-library/footer-model-3.liquid +548 -0
- package/assets/data/component-library/footer-model-4.liquid +455 -0
- package/assets/data/component-library/footer-model-5.liquid +407 -0
- package/assets/data/component-library/footer-model-6.liquid +543 -0
- package/assets/data/component-library/footer-model-7.liquid +345 -0
- package/assets/data/component-library/footer-model-8.liquid +279 -0
- package/assets/data/component-library/footer-model-9.liquid +376 -0
- package/assets/data/component-library/gallery.liquid +236 -0
- package/assets/data/component-library/gecko-shopify-v5-7-6-nulled-manual_blog.liquid +720 -0
- package/assets/data/component-library/gl_newsletter_pets.liquid +612 -0
- package/assets/data/component-library/gp-logo-list.liquid +362 -0
- package/assets/data/component-library/grid-banner-type-3-b.liquid +655 -0
- package/assets/data/component-library/header-model-1.liquid +427 -0
- package/assets/data/component-library/header-model-10.liquid +599 -0
- package/assets/data/component-library/header-model-2.liquid +633 -0
- package/assets/data/component-library/header-model-3.liquid +415 -0
- package/assets/data/component-library/header-model-4.liquid +754 -0
- package/assets/data/component-library/header-model-5.liquid +562 -0
- package/assets/data/component-library/header-model-6.liquid +713 -0
- package/assets/data/component-library/header-model-7.liquid +743 -0
- package/assets/data/component-library/header-model-8.liquid +500 -0
- package/assets/data/component-library/header-model-9.liquid +506 -0
- package/assets/data/component-library/home-blog-posts-1.liquid +399 -0
- package/assets/data/component-library/home-blog-posts-2.liquid +393 -0
- package/assets/data/component-library/home-blog-posts-3.liquid +545 -0
- package/assets/data/component-library/home-brand-slider.liquid +224 -0
- package/assets/data/component-library/home-circled-block.liquid +332 -0
- package/assets/data/component-library/home-contact-block-1.liquid +395 -0
- package/assets/data/component-library/home-contact-block-2.liquid +372 -0
- package/assets/data/component-library/home-content-block-1.liquid +320 -0
- package/assets/data/component-library/home-donut-chart.liquid +335 -0
- package/assets/data/component-library/home-fade-in-banner.liquid +277 -0
- package/assets/data/component-library/home-faq-model.liquid +323 -0
- package/assets/data/component-library/home-featured-blog.liquid +1462 -0
- package/assets/data/component-library/home-featured-collections.liquid +484 -0
- package/assets/data/component-library/home-gallery-block1.liquid +276 -0
- package/assets/data/component-library/home-gallery-block2.liquid +396 -0
- package/assets/data/component-library/home-grid-banner-type-1.liquid +371 -0
- package/assets/data/component-library/home-grid-banner-type-2.liquid +362 -0
- package/assets/data/component-library/home-grid-banner-type-3.liquid +374 -0
- package/assets/data/component-library/home-grid-banner-type-4.liquid +900 -0
- package/assets/data/component-library/home-grid-banner-type-5.liquid +368 -0
- package/assets/data/component-library/home-grid-banner-type-6.liquid +382 -0
- package/assets/data/component-library/home-grid-banner-type-7.liquid +371 -0
- package/assets/data/component-library/home-hotspot-with-product-carousel.liquid +1425 -0
- package/assets/data/component-library/home-image-gallery.liquid +1087 -0
- package/assets/data/component-library/home-instagram.liquid +356 -0
- package/assets/data/component-library/home-newsletter.liquid +246 -0
- package/assets/data/component-library/home-number-counter.liquid +790 -0
- package/assets/data/component-library/home-price-table.liquid +416 -0
- package/assets/data/component-library/home-pricing-table.liquid +1076 -0
- package/assets/data/component-library/home-product-grid.liquid +413 -0
- package/assets/data/component-library/home-product-tab-1.liquid +528 -0
- package/assets/data/component-library/home-product-tab-2.liquid +342 -0
- package/assets/data/component-library/home-product-tab-3.liquid +357 -0
- package/assets/data/component-library/home-product-vertical-carousel.liquid +477 -0
- package/assets/data/component-library/home-quotes-1.liquid +274 -0
- package/assets/data/component-library/home-quotes-2.liquid +239 -0
- package/assets/data/component-library/home-quotes-3.liquid +244 -0
- package/assets/data/component-library/home-quotes-4.liquid +258 -0
- package/assets/data/component-library/home-slider-width-promo-images.liquid +1377 -0
- package/assets/data/component-library/home-slideshow-type-1.liquid +656 -0
- package/assets/data/component-library/home-slideshow-type-2.liquid +570 -0
- package/assets/data/component-library/home-specification-block-1.liquid +468 -0
- package/assets/data/component-library/home-specification-block-2.liquid +291 -0
- package/assets/data/component-library/home-specification-block-3.liquid +429 -0
- package/assets/data/component-library/home-support-block.liquid +392 -0
- package/assets/data/component-library/home-testimonial.liquid +1348 -0
- package/assets/data/component-library/home-video-banner.liquid +317 -0
- package/assets/data/component-library/home-wide-banner.liquid +327 -0
- package/assets/data/component-library/icon-with-content.liquid +478 -0
- package/assets/data/component-library/instafeed.liquid +1 -0
- package/assets/data/component-library/kea-ecommerce-interior-furniture-shopify-theme-about.liquid +1300 -0
- package/assets/data/component-library/kidslife-responsive-shopify-theme-home-number-counter.liquid +729 -0
- package/assets/data/component-library/logo-bar.liquid +314 -0
- package/assets/data/component-library/lookbook.liquid +367 -0
- package/assets/data/component-library/manual_blog.liquid +724 -0
- package/assets/data/component-library/navigation-etc.liquid +642 -0
- package/assets/data/component-library/newsletter.liquid +246 -0
- package/assets/data/component-library/order-form.liquid +96 -0
- package/assets/data/component-library/page-catev1-template.liquid +344 -0
- package/assets/data/component-library/popup_video.liquid +396 -0
- package/assets/data/component-library/product-sidebar-bestsellers.liquid +99 -0
- package/assets/data/component-library/product-sidebar-deals.liquid +158 -0
- package/assets/data/component-library/product-template-2.liquid +629 -0
- package/assets/data/component-library/product-template-3.liquid +670 -0
- package/assets/data/component-library/product-template-4.liquid +627 -0
- package/assets/data/component-library/product-template-5.liquid +652 -0
- package/assets/data/component-library/product-template.liquid +698 -0
- package/assets/data/component-library/rich-text.liquid +541 -0
- package/assets/data/component-library/section-countdown-v2.liquid +215 -0
- package/assets/data/component-library/services.liquid +596 -0
- package/assets/data/component-library/shipping_info.liquid +327 -0
- package/assets/data/component-library/sidebar-bestsellers.liquid +109 -0
- package/assets/data/component-library/sidebar-category.liquid +105 -0
- package/assets/data/component-library/sidebar-colors.liquid +104 -0
- package/assets/data/component-library/single_product_feature.liquid +1892 -0
- package/assets/data/component-library/social-links-menu.liquid +244 -0
- package/assets/data/component-library/someone-purchased.liquid +190 -0
- package/assets/data/component-library/special-offer-area.liquid +530 -0
- package/assets/data/component-library/theno-minimal-clean-watch-store-shopify-theme-page-catev1-te.liquid +344 -0
- package/assets/data/component-library/top-bar-type-1.liquid +200 -0
- package/assets/data/component-library/top-bar-type-10.liquid +395 -0
- package/assets/data/component-library/top-bar-type-11.liquid +395 -0
- package/assets/data/component-library/top-bar-type-2.liquid +106 -0
- package/assets/data/component-library/top-bar-type-3.liquid +205 -0
- package/assets/data/component-library/top-countdown-bar.liquid +116 -0
- package/assets/data/component-library/trixe-solar-responsive-shopify-template-home-image-gallery.liquid +783 -0
- package/assets/data/component-library/trixe-solar-responsive-shopify-template-home-pricing-table.liquid +1043 -0
- package/assets/data/component-library/trixe-solar-responsive-shopify-template-home-testimonial.liquid +1338 -0
- package/assets/data/component-library/video.liquid +511 -0
- package/assets/data/component-library/waffy-spices-dry-fruits-store-shopify-theme-v-1-1-contact-us.liquid +523 -0
- package/assets/data/design-tokens.csv +93 -57
- package/assets/data/schema-library.csv +48 -46
- package/assets/data/settings-profiles.csv +235 -0
- package/assets/data/shopify-best-practices.csv +58 -36
- package/assets/scripts/backfill-component-index.py +102 -0
- package/assets/scripts/core.py +30 -8
- package/assets/scripts/fix-schema-library.py +42 -0
- package/assets/scripts/kb-analyzer-helpers.py +136 -0
- package/assets/scripts/kb-analyzer.py +186 -0
- package/assets/scripts/kb-builder.py +32 -63
- package/assets/scripts/kb-constants.py +62 -0
- package/assets/scripts/kb-extractor-helpers.py +178 -0
- package/assets/scripts/kb-extractor.py +106 -170
- package/assets/scripts/kb-synthesizer.py +251 -0
- package/assets/scripts/quality-gate-checks.py +55 -0
- package/assets/scripts/quality-gate.py +56 -2
- package/assets/scripts/section-generator-helpers.py +74 -0
- package/assets/scripts/section-generator.py +59 -49
- package/assets/templates/generation-prompt.md +78 -14
- package/package.json +1 -1
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Parse/extract functions for kb-analyzer.py — schema, CSS, Liquid, HTML extraction."""
|
|
3
|
+
|
|
4
|
+
import importlib.util
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
# Import classify_section from shared constants
|
|
10
|
+
_constants_path = Path(__file__).parent / 'kb-constants.py'
|
|
11
|
+
_cspec = importlib.util.spec_from_file_location('kb_constants', _constants_path)
|
|
12
|
+
_constants = importlib.util.module_from_spec(_cspec)
|
|
13
|
+
_cspec.loader.exec_module(_constants)
|
|
14
|
+
classify_section = _constants.classify_section
|
|
15
|
+
|
|
16
|
+
# ─── Schema Extraction ────────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
SCHEMA_RE = re.compile(r'\{%-?\s*schema\s*-?%\}(.*?)\{%-?\s*endschema\s*-?%\}', re.DOTALL)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def extract_schema_data(content: str) -> dict:
|
|
22
|
+
"""Extract structured data from {% schema %} block."""
|
|
23
|
+
match = SCHEMA_RE.search(content)
|
|
24
|
+
if not match:
|
|
25
|
+
return {}
|
|
26
|
+
try:
|
|
27
|
+
schema = json.loads(match.group(1).strip())
|
|
28
|
+
except json.JSONDecodeError:
|
|
29
|
+
return {}
|
|
30
|
+
settings = schema.get('settings', [])
|
|
31
|
+
blocks = schema.get('blocks', [])
|
|
32
|
+
return {
|
|
33
|
+
'schema_name': str(schema.get('name', '')) if not isinstance(schema.get('name'), dict) else str(next(iter(schema.get('name', {}).values()), '')),
|
|
34
|
+
'settings_count': len([s for s in settings if isinstance(s, dict) and s.get('type')]),
|
|
35
|
+
'blocks_count': len(blocks),
|
|
36
|
+
'setting_types': sorted(set(s.get('type', '') for s in settings if isinstance(s, dict) and s.get('type'))),
|
|
37
|
+
'setting_ids': [s.get('id', '') for s in settings if isinstance(s, dict) and s.get('id')],
|
|
38
|
+
'block_types': sorted(set(b.get('type', '') for b in blocks if isinstance(b, dict) and b.get('type'))),
|
|
39
|
+
'has_presets': bool(schema.get('presets')),
|
|
40
|
+
'has_app_block': any(b.get('type') == '@app' for b in blocks if isinstance(b, dict)),
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ─── CSS Extraction ──────────────────────────────────────────────────────────
|
|
45
|
+
|
|
46
|
+
STYLE_RE = re.compile(r'<style[^>]*>(.*?)</style>', re.DOTALL | re.IGNORECASE)
|
|
47
|
+
CSS_VAR_RE = re.compile(r'var\((--[\w-]+)\)')
|
|
48
|
+
BREAKPOINT_RE = re.compile(r'@media[^{]*?(\d{3,4})px')
|
|
49
|
+
CSS_CLASS_RE = re.compile(r'\.([\w-]+)\s*[{,]')
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def extract_css_data(content: str) -> dict:
|
|
53
|
+
"""Extract CSS variables, breakpoints, classes from <style> blocks."""
|
|
54
|
+
style_match = STYLE_RE.search(content)
|
|
55
|
+
css = style_match.group(1) if style_match else ''
|
|
56
|
+
css_vars = CSS_VAR_RE.findall(css)
|
|
57
|
+
prefixes = sorted(set(
|
|
58
|
+
re.match(r'(--[\w]+-)', v).group(1)
|
|
59
|
+
for v in css_vars if re.match(r'(--[\w]+-)', v)
|
|
60
|
+
))
|
|
61
|
+
breakpoints = sorted(set(int(m) for m in BREAKPOINT_RE.findall(css)))
|
|
62
|
+
classes = sorted(set(CSS_CLASS_RE.findall(css)))[:20] # cap to avoid noise
|
|
63
|
+
return {
|
|
64
|
+
'css_vars': sorted(set(css_vars)),
|
|
65
|
+
'css_var_prefixes': prefixes,
|
|
66
|
+
'breakpoints': breakpoints,
|
|
67
|
+
'css_classes': classes,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ─── Liquid Tag Extraction ───────────────────────────────────────────────────
|
|
72
|
+
|
|
73
|
+
LIQUID_TAG_RE = re.compile(r'\{%-?\s*([\w]+)')
|
|
74
|
+
KNOWN_TAGS = frozenset({
|
|
75
|
+
'if', 'unless', 'elsif', 'else', 'endif', 'for', 'endfor',
|
|
76
|
+
'assign', 'capture', 'endcapture', 'render', 'include',
|
|
77
|
+
'form', 'endform', 'paginate', 'endpaginate', 'case', 'when',
|
|
78
|
+
'endcase', 'comment', 'endcomment', 'liquid', 'echo', 'cycle',
|
|
79
|
+
'tablerow', 'schema', 'endschema', 'style', 'endstyle',
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def extract_liquid_tags(content: str) -> list:
|
|
84
|
+
"""Extract known Liquid tags used in content."""
|
|
85
|
+
found = set(LIQUID_TAG_RE.findall(content))
|
|
86
|
+
return sorted(found & KNOWN_TAGS)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ─── HTML Element Extraction ─────────────────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
HTML_ELEMENT_RE = re.compile(
|
|
92
|
+
r'<(section|div|h[1-6]|p|a|img|video|button|span|ul|ol|li|'
|
|
93
|
+
r'form|input|textarea|nav|header|footer|main|article|aside)\b',
|
|
94
|
+
re.IGNORECASE
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def extract_html_elements(content: str) -> list:
|
|
99
|
+
"""Extract HTML element types used."""
|
|
100
|
+
return sorted(set(m.lower() for m in HTML_ELEMENT_RE.findall(content)))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ─── Full Section Parser ─────────────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
def parse_section(liquid_path: Path, theme_slug: str) -> dict | None:
|
|
106
|
+
"""Parse a .liquid file and return structured record for analysis.jsonl.
|
|
107
|
+
|
|
108
|
+
Returns None if the file has no valid schema block.
|
|
109
|
+
"""
|
|
110
|
+
try:
|
|
111
|
+
content = liquid_path.read_text(encoding='utf-8', errors='replace')
|
|
112
|
+
except Exception:
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
schema = extract_schema_data(content)
|
|
116
|
+
if not schema:
|
|
117
|
+
return None # skip files without schema
|
|
118
|
+
|
|
119
|
+
css = extract_css_data(content)
|
|
120
|
+
tags = extract_liquid_tags(content)
|
|
121
|
+
elements = extract_html_elements(content)
|
|
122
|
+
schema_name = schema.get('schema_name', '')
|
|
123
|
+
# Handle i18n names like {"en": "Hero"} — extract first string value
|
|
124
|
+
if isinstance(schema_name, dict):
|
|
125
|
+
schema_name = str(next(iter(schema_name.values()), ''))
|
|
126
|
+
category = classify_section(liquid_path.stem, str(schema_name))
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
'theme_slug': theme_slug,
|
|
130
|
+
'filename': liquid_path.name,
|
|
131
|
+
'category': category,
|
|
132
|
+
**schema,
|
|
133
|
+
**css,
|
|
134
|
+
'liquid_tags': tags,
|
|
135
|
+
'html_elements': elements,
|
|
136
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
KB Analyzer — parse extracted sections, produce analysis.jsonl + aggregate-stats.json.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python3 kb-analyzer.py --input themes/extracted/ --output themes/
|
|
7
|
+
python3 kb-analyzer.py --input themes/extracted/ --output themes/ --resume
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import importlib.util
|
|
12
|
+
import json
|
|
13
|
+
import sys
|
|
14
|
+
from collections import Counter, defaultdict
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
# Import parse helpers
|
|
19
|
+
_helpers_path = Path(__file__).parent / 'kb-analyzer-helpers.py'
|
|
20
|
+
_hspec = importlib.util.spec_from_file_location('kb_analyzer_helpers', _helpers_path)
|
|
21
|
+
_helpers = importlib.util.module_from_spec(_hspec)
|
|
22
|
+
_hspec.loader.exec_module(_helpers)
|
|
23
|
+
parse_section = _helpers.parse_section
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def load_existing_keys(jsonl_path: Path) -> set:
|
|
27
|
+
"""Load existing theme_slug+filename keys from JSONL for resume support."""
|
|
28
|
+
keys = set()
|
|
29
|
+
if not jsonl_path.exists():
|
|
30
|
+
return keys
|
|
31
|
+
with open(jsonl_path, 'r', encoding='utf-8') as f:
|
|
32
|
+
for line in f:
|
|
33
|
+
line = line.strip()
|
|
34
|
+
if not line:
|
|
35
|
+
continue
|
|
36
|
+
try:
|
|
37
|
+
rec = json.loads(line)
|
|
38
|
+
keys.add(f"{rec['theme_slug']}:{rec['filename']}")
|
|
39
|
+
except (json.JSONDecodeError, KeyError):
|
|
40
|
+
pass
|
|
41
|
+
return keys
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def analyze_all(input_dir: Path, output_dir: Path, resume: bool = True):
|
|
45
|
+
"""Walk all extracted themes, parse sections, write JSONL + aggregate stats."""
|
|
46
|
+
jsonl_path = output_dir / 'analysis.jsonl'
|
|
47
|
+
stats_path = output_dir / 'aggregate-stats.json'
|
|
48
|
+
|
|
49
|
+
# Resume: load already-processed keys
|
|
50
|
+
existing_keys = load_existing_keys(jsonl_path) if resume else set()
|
|
51
|
+
if existing_keys:
|
|
52
|
+
print(f"Resume: {len(existing_keys)} records already processed")
|
|
53
|
+
|
|
54
|
+
# Find all theme directories
|
|
55
|
+
theme_dirs = sorted(d for d in input_dir.iterdir()
|
|
56
|
+
if d.is_dir() and (d / 'sections').exists())
|
|
57
|
+
total_themes = len(theme_dirs)
|
|
58
|
+
total_sections = 0
|
|
59
|
+
skipped = 0
|
|
60
|
+
errors = 0
|
|
61
|
+
|
|
62
|
+
# Stream-write JSONL
|
|
63
|
+
mode = 'a' if resume and existing_keys else 'w'
|
|
64
|
+
with open(jsonl_path, mode, encoding='utf-8') as jf:
|
|
65
|
+
for i, theme_dir in enumerate(theme_dirs, 1):
|
|
66
|
+
theme_slug = theme_dir.name
|
|
67
|
+
sections = sorted((theme_dir / 'sections').glob('*.liquid'))
|
|
68
|
+
theme_count = 0
|
|
69
|
+
|
|
70
|
+
for liquid in sections:
|
|
71
|
+
key = f"{theme_slug}:{liquid.name}"
|
|
72
|
+
if key in existing_keys:
|
|
73
|
+
skipped += 1
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
record = parse_section(liquid, theme_slug)
|
|
77
|
+
if record:
|
|
78
|
+
jf.write(json.dumps(record, ensure_ascii=False) + '\n')
|
|
79
|
+
total_sections += 1
|
|
80
|
+
theme_count += 1
|
|
81
|
+
else:
|
|
82
|
+
errors += 1
|
|
83
|
+
|
|
84
|
+
if i % 50 == 0 or i == total_themes:
|
|
85
|
+
print(f"[{i}/{total_themes}] {theme_slug}: {theme_count} sections")
|
|
86
|
+
|
|
87
|
+
print(f"\nAnalysis complete: {total_sections} new records, {skipped} resumed, {errors} parse errors")
|
|
88
|
+
|
|
89
|
+
# Generate aggregate stats from full JSONL
|
|
90
|
+
aggregate_stats(jsonl_path, stats_path)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def aggregate_stats(jsonl_path: Path, stats_path: Path):
|
|
94
|
+
"""Read JSONL, compute per-category frequency stats, write JSON."""
|
|
95
|
+
cat_setting_types = defaultdict(Counter)
|
|
96
|
+
cat_setting_ids = defaultdict(Counter)
|
|
97
|
+
cat_block_combos = defaultdict(Counter)
|
|
98
|
+
cat_css_prefixes = defaultdict(Counter)
|
|
99
|
+
cat_breakpoints = defaultdict(Counter)
|
|
100
|
+
cat_counts = Counter()
|
|
101
|
+
total = 0
|
|
102
|
+
|
|
103
|
+
with open(jsonl_path, 'r', encoding='utf-8') as f:
|
|
104
|
+
for line in f:
|
|
105
|
+
line = line.strip()
|
|
106
|
+
if not line:
|
|
107
|
+
continue
|
|
108
|
+
try:
|
|
109
|
+
rec = json.loads(line)
|
|
110
|
+
except json.JSONDecodeError:
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
total += 1
|
|
114
|
+
cat = rec.get('category', 'other')
|
|
115
|
+
cat_counts[cat] += 1
|
|
116
|
+
|
|
117
|
+
for st in rec.get('setting_types', []):
|
|
118
|
+
cat_setting_types[cat][st] += 1
|
|
119
|
+
for sid in rec.get('setting_ids', []):
|
|
120
|
+
cat_setting_ids[cat][sid] += 1
|
|
121
|
+
|
|
122
|
+
block_combo = '|'.join(sorted(rec.get('block_types', [])))
|
|
123
|
+
if block_combo:
|
|
124
|
+
cat_block_combos[cat][block_combo] += 1
|
|
125
|
+
|
|
126
|
+
for prefix in rec.get('css_var_prefixes', []):
|
|
127
|
+
cat_css_prefixes[cat][prefix] += 1
|
|
128
|
+
for bp in rec.get('breakpoints', []):
|
|
129
|
+
cat_breakpoints[cat][str(bp)] += 1
|
|
130
|
+
|
|
131
|
+
# Build output
|
|
132
|
+
categories = {}
|
|
133
|
+
for cat in sorted(cat_counts.keys()):
|
|
134
|
+
count = cat_counts[cat]
|
|
135
|
+
categories[cat] = {
|
|
136
|
+
'count': count,
|
|
137
|
+
'top_setting_types': [
|
|
138
|
+
{'type': t, 'count': c, 'pct': int(c / count * 100)}
|
|
139
|
+
for t, c in cat_setting_types[cat].most_common(20)
|
|
140
|
+
],
|
|
141
|
+
'top_setting_ids': [
|
|
142
|
+
{'id': sid, 'count': c}
|
|
143
|
+
for sid, c in cat_setting_ids[cat].most_common(20)
|
|
144
|
+
],
|
|
145
|
+
'top_block_combos': [
|
|
146
|
+
{'combo': combo, 'count': c}
|
|
147
|
+
for combo, c in cat_block_combos[cat].most_common(10)
|
|
148
|
+
],
|
|
149
|
+
'top_css_var_prefixes': [
|
|
150
|
+
{'prefix': p, 'count': c}
|
|
151
|
+
for p, c in cat_css_prefixes[cat].most_common(10)
|
|
152
|
+
],
|
|
153
|
+
'breakpoint_usage': dict(cat_breakpoints[cat].most_common(10)),
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
output = {
|
|
157
|
+
'generated_at': datetime.now(timezone.utc).isoformat(),
|
|
158
|
+
'total_sections': total,
|
|
159
|
+
'categories': categories,
|
|
160
|
+
}
|
|
161
|
+
stats_path.write_text(json.dumps(output, indent=2, ensure_ascii=False), encoding='utf-8')
|
|
162
|
+
print(f"Aggregate stats: {total} sections across {len(categories)} categories -> {stats_path}")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def main():
|
|
166
|
+
parser = argparse.ArgumentParser(description="Analyze extracted Shopify sections")
|
|
167
|
+
parser.add_argument('--input', default='themes/extracted/', help='Extracted themes dir')
|
|
168
|
+
parser.add_argument('--output', default='themes/', help='Output dir for JSONL + stats')
|
|
169
|
+
parser.add_argument('--resume', action='store_true', default=True,
|
|
170
|
+
help='Skip already-analyzed sections (default: on)')
|
|
171
|
+
parser.add_argument('--no-resume', action='store_true', help='Re-analyze everything')
|
|
172
|
+
args = parser.parse_args()
|
|
173
|
+
|
|
174
|
+
input_dir = Path(args.input)
|
|
175
|
+
output_dir = Path(args.output)
|
|
176
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
177
|
+
|
|
178
|
+
if not input_dir.exists():
|
|
179
|
+
print(f"Input dir not found: {input_dir}")
|
|
180
|
+
sys.exit(1)
|
|
181
|
+
|
|
182
|
+
analyze_all(input_dir, output_dir, resume=not args.no_resume)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
if __name__ == '__main__':
|
|
186
|
+
main()
|
|
@@ -9,67 +9,20 @@ Usage:
|
|
|
9
9
|
|
|
10
10
|
import argparse
|
|
11
11
|
import csv
|
|
12
|
+
import importlib.util
|
|
12
13
|
import json
|
|
13
14
|
import re
|
|
14
15
|
import shutil
|
|
15
16
|
from pathlib import Path
|
|
16
17
|
|
|
17
|
-
#
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"cta": ["cta", "call-to-action", "promo", "promotion", "offer"],
|
|
26
|
-
"newsletter": ["newsletter", "email", "subscribe", "signup", "form"],
|
|
27
|
-
"product-grid": ["product", "collection", "shop", "catalog", "grid"],
|
|
28
|
-
"blog": ["blog", "article", "post", "news", "journal"],
|
|
29
|
-
"countdown": ["countdown", "timer", "hurry", "limited", "sale-end"],
|
|
30
|
-
"trust": ["trust", "badge", "guarantee", "security", "payment"],
|
|
31
|
-
"stats": ["stat", "counter", "number", "metric", "achievement"],
|
|
32
|
-
"logo": ["logo", "brand", "partner", "client", "sponsor"],
|
|
33
|
-
"gallery": ["gallery", "image", "photo", "portfolio", "masonry"],
|
|
34
|
-
"video": ["video", "youtube", "vimeo", "embed", "media"],
|
|
35
|
-
"pricing": ["pricing", "plan", "tier", "subscription"],
|
|
36
|
-
"map": ["map", "location", "store", "contact", "direction"],
|
|
37
|
-
"announcement": ["announcement", "bar", "notification", "alert", "strip"],
|
|
38
|
-
"rich-text": ["rich-text", "text", "content", "wysiwyg", "body"],
|
|
39
|
-
"collection": ["collection", "category", "department", "browse"],
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
EXPECTED_SETTINGS = {
|
|
43
|
-
"hero": ["heading", "subheading", "image", "button_label", "button_link", "color_scheme", "padding", "min_height"],
|
|
44
|
-
"about": ["heading", "content", "image", "layout", "color_scheme", "padding"],
|
|
45
|
-
"features": ["heading", "columns", "color_scheme", "padding"],
|
|
46
|
-
"testimonial": ["heading", "color_scheme", "padding"],
|
|
47
|
-
"faq": ["heading", "color_scheme", "padding"],
|
|
48
|
-
"cta": ["heading", "button_label", "button_link", "color_scheme", "padding"],
|
|
49
|
-
"newsletter": ["heading", "button_label", "color_scheme", "padding"],
|
|
50
|
-
"product-grid":["collection", "products_per_row", "color_scheme", "padding"],
|
|
51
|
-
"blog": ["blog", "posts_shown", "color_scheme", "padding"],
|
|
52
|
-
"countdown": ["heading", "end_date", "color_scheme", "padding"],
|
|
53
|
-
"video": ["video_url", "heading", "color_scheme", "padding"],
|
|
54
|
-
"pricing": ["heading", "color_scheme", "padding"],
|
|
55
|
-
"gallery": ["heading", "columns", "color_scheme", "padding"],
|
|
56
|
-
"stats": ["heading", "color_scheme", "padding"],
|
|
57
|
-
"logo": ["heading", "color_scheme", "padding"],
|
|
58
|
-
"announcement":["text", "link", "link_text", "color_scheme"],
|
|
59
|
-
"rich-text": ["heading", "content", "color_scheme", "padding"],
|
|
60
|
-
"collection": ["heading", "color_scheme", "padding"],
|
|
61
|
-
"trust": ["heading", "color_scheme", "padding"],
|
|
62
|
-
"map": ["heading", "address", "color_scheme", "padding"],
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def classify_section(filename: str, schema_name: str) -> str:
|
|
67
|
-
"""Classify section type based on filename + schema name."""
|
|
68
|
-
combined = (filename + " " + schema_name).lower().replace('-', ' ').replace('_', ' ')
|
|
69
|
-
for section_type, keywords in SECTION_TYPES.items():
|
|
70
|
-
if any(kw in combined for kw in keywords):
|
|
71
|
-
return section_type
|
|
72
|
-
return "other"
|
|
18
|
+
# Import shared constants (DRY)
|
|
19
|
+
_constants_path = Path(__file__).parent / 'kb-constants.py'
|
|
20
|
+
_cspec = importlib.util.spec_from_file_location('kb_constants', _constants_path)
|
|
21
|
+
_constants = importlib.util.module_from_spec(_cspec)
|
|
22
|
+
_cspec.loader.exec_module(_constants)
|
|
23
|
+
SECTION_TYPES = _constants.SECTION_TYPES
|
|
24
|
+
EXPECTED_SETTINGS = _constants.EXPECTED_SETTINGS
|
|
25
|
+
classify_section = _constants.classify_section
|
|
73
26
|
|
|
74
27
|
|
|
75
28
|
def score_completeness(section_type: str, setting_ids: list) -> int:
|
|
@@ -187,14 +140,23 @@ def save_index(index_path: Path, index: dict):
|
|
|
187
140
|
writer.writerows(rows)
|
|
188
141
|
|
|
189
142
|
|
|
143
|
+
SCHEMA_FIELDNAMES = ['No', 'Type', 'Category', 'Returns', 'Keywords', 'Required Props',
|
|
144
|
+
'Optional Props', 'Default Behavior', 'JSON Example', 'Constraints', 'Notes']
|
|
145
|
+
|
|
146
|
+
|
|
190
147
|
def save_schema_library(schema_path: Path, new_settings: list):
|
|
191
148
|
"""Append unique schema setting types to schema-library.csv."""
|
|
192
149
|
existing_types = set()
|
|
193
|
-
|
|
150
|
+
max_no = 0
|
|
151
|
+
if schema_path.exists() and schema_path.stat().st_size > 0:
|
|
194
152
|
with open(schema_path, newline='', encoding='utf-8') as f:
|
|
195
153
|
reader = csv.DictReader(f)
|
|
196
154
|
for row in reader:
|
|
197
155
|
existing_types.add(row.get('Type', ''))
|
|
156
|
+
try:
|
|
157
|
+
max_no = max(max_no, int(row.get('No', 0)))
|
|
158
|
+
except (ValueError, TypeError):
|
|
159
|
+
pass
|
|
198
160
|
|
|
199
161
|
new_rows = []
|
|
200
162
|
seen = set(existing_types)
|
|
@@ -203,24 +165,28 @@ def save_schema_library(schema_path: Path, new_settings: list):
|
|
|
203
165
|
continue
|
|
204
166
|
t = s.get('type', '')
|
|
205
167
|
if t and t not in seen:
|
|
168
|
+
max_no += 1
|
|
206
169
|
new_rows.append({
|
|
170
|
+
'No': max_no,
|
|
207
171
|
'Type': t,
|
|
208
172
|
'Category': 'basic',
|
|
209
173
|
'Returns': '',
|
|
210
|
-
'
|
|
174
|
+
'Keywords': t,
|
|
211
175
|
'Required Props': '',
|
|
212
176
|
'Optional Props': str(s.get('info', '')),
|
|
213
|
-
'Default': str(s.get('default', '')),
|
|
177
|
+
'Default Behavior': str(s.get('default', '')),
|
|
214
178
|
'JSON Example': json.dumps({k: s.get(k) for k in ['type', 'id', 'label'] if s.get(k)}, ensure_ascii=False),
|
|
179
|
+
'Constraints': '',
|
|
215
180
|
'Notes': '',
|
|
216
|
-
'Use Case': '',
|
|
217
|
-
'Keywords': t,
|
|
218
181
|
})
|
|
219
182
|
seen.add(t)
|
|
220
183
|
|
|
221
184
|
if new_rows:
|
|
185
|
+
file_is_empty = not schema_path.exists() or schema_path.stat().st_size == 0
|
|
222
186
|
with open(schema_path, 'a', newline='', encoding='utf-8') as f:
|
|
223
|
-
writer = csv.DictWriter(f, fieldnames=
|
|
187
|
+
writer = csv.DictWriter(f, fieldnames=SCHEMA_FIELDNAMES)
|
|
188
|
+
if file_is_empty:
|
|
189
|
+
writer.writeheader()
|
|
224
190
|
for row in new_rows:
|
|
225
191
|
writer.writerow(row)
|
|
226
192
|
|
|
@@ -263,6 +229,9 @@ def process_theme(theme_dir: Path, kb_dir: Path, stats: dict):
|
|
|
263
229
|
dest = component_dir / f"{slug}.liquid"
|
|
264
230
|
dest.write_text(parsed['content'], encoding='utf-8')
|
|
265
231
|
|
|
232
|
+
# Capture membership BEFORE mutation for accurate stats
|
|
233
|
+
is_new = slug not in index
|
|
234
|
+
|
|
266
235
|
# Update index
|
|
267
236
|
index[slug] = {
|
|
268
237
|
'No': row_no,
|
|
@@ -284,7 +253,7 @@ def process_theme(theme_dir: Path, kb_dir: Path, stats: dict):
|
|
|
284
253
|
# Update schema library
|
|
285
254
|
save_schema_library(schema_path, parsed['settings'])
|
|
286
255
|
|
|
287
|
-
stats['added' if
|
|
256
|
+
stats['added' if is_new else 'updated'] += 1
|
|
288
257
|
action = '🆕' if next_no > max((int(r.get('No', 0) or 0) for r in index.values() if r.get('Slug') != slug), default=0) else '⬆️'
|
|
289
258
|
print(f" {action} [{section_type:15s}] {slug} (completeness: {completeness}%)")
|
|
290
259
|
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Shared constants for KB pipeline scripts (DRY: single source of truth)."""
|
|
3
|
+
|
|
4
|
+
# Section type classification keywords
|
|
5
|
+
SECTION_TYPES = {
|
|
6
|
+
"hero": ["hero", "banner", "slider", "slideshow", "cover", "header-image", "fullscreen"],
|
|
7
|
+
"about": ["about", "team", "story", "brand", "mission", "who-we-are"],
|
|
8
|
+
"features": ["feature", "benefit", "highlight", "why-us", "column", "multi-column"],
|
|
9
|
+
"testimonial": ["testimonial", "review", "quote", "feedback", "customer"],
|
|
10
|
+
"faq": ["faq", "question", "accordion", "help", "support"],
|
|
11
|
+
"cta": ["cta", "call-to-action", "promo", "promotion", "offer"],
|
|
12
|
+
"newsletter": ["newsletter", "email", "subscribe", "signup", "form"],
|
|
13
|
+
"product-grid": ["product", "collection", "shop", "catalog", "grid"],
|
|
14
|
+
"blog": ["blog", "article", "post", "news", "journal"],
|
|
15
|
+
"countdown": ["countdown", "timer", "hurry", "limited", "sale-end"],
|
|
16
|
+
"trust": ["trust", "badge", "guarantee", "security", "payment"],
|
|
17
|
+
"stats": ["stat", "counter", "number", "metric", "achievement"],
|
|
18
|
+
"logo": ["logo", "brand", "partner", "client", "sponsor"],
|
|
19
|
+
"gallery": ["gallery", "image", "photo", "portfolio", "masonry"],
|
|
20
|
+
"video": ["video", "youtube", "vimeo", "embed", "media"],
|
|
21
|
+
"pricing": ["pricing", "plan", "tier", "subscription"],
|
|
22
|
+
"map": ["map", "location", "store", "contact", "direction"],
|
|
23
|
+
"announcement": ["announcement", "bar", "notification", "alert", "strip"],
|
|
24
|
+
"rich-text": ["rich-text", "text", "content", "wysiwyg", "body"],
|
|
25
|
+
"collection": ["collection", "category", "department", "browse"],
|
|
26
|
+
"header": ["header", "navigation", "nav", "menu", "topbar"],
|
|
27
|
+
"footer": ["footer", "bottom", "copyright"],
|
|
28
|
+
"sidebar": ["sidebar", "widget", "aside"],
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
# Expected settings per section type (for completeness scoring)
|
|
32
|
+
EXPECTED_SETTINGS = {
|
|
33
|
+
"hero": ["heading", "subheading", "image", "button_label", "button_link", "color_scheme", "padding", "min_height"],
|
|
34
|
+
"about": ["heading", "content", "image", "layout", "color_scheme", "padding"],
|
|
35
|
+
"features": ["heading", "columns", "color_scheme", "padding"],
|
|
36
|
+
"testimonial": ["heading", "color_scheme", "padding"],
|
|
37
|
+
"faq": ["heading", "color_scheme", "padding"],
|
|
38
|
+
"cta": ["heading", "button_label", "button_link", "color_scheme", "padding"],
|
|
39
|
+
"newsletter": ["heading", "button_label", "color_scheme", "padding"],
|
|
40
|
+
"product-grid":["collection", "products_per_row", "color_scheme", "padding"],
|
|
41
|
+
"blog": ["blog", "posts_shown", "color_scheme", "padding"],
|
|
42
|
+
"countdown": ["heading", "end_date", "color_scheme", "padding"],
|
|
43
|
+
"video": ["video_url", "heading", "color_scheme", "padding"],
|
|
44
|
+
"pricing": ["heading", "color_scheme", "padding"],
|
|
45
|
+
"gallery": ["heading", "columns", "color_scheme", "padding"],
|
|
46
|
+
"stats": ["heading", "color_scheme", "padding"],
|
|
47
|
+
"logo": ["heading", "color_scheme", "padding"],
|
|
48
|
+
"announcement":["text", "link", "link_text", "color_scheme"],
|
|
49
|
+
"rich-text": ["heading", "content", "color_scheme", "padding"],
|
|
50
|
+
"collection": ["heading", "color_scheme", "padding"],
|
|
51
|
+
"trust": ["heading", "color_scheme", "padding"],
|
|
52
|
+
"map": ["heading", "address", "color_scheme", "padding"],
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def classify_section(filename: str, schema_name: str) -> str:
|
|
57
|
+
"""Classify section type based on filename + schema name."""
|
|
58
|
+
combined = (filename + " " + schema_name).lower().replace('-', ' ').replace('_', ' ')
|
|
59
|
+
for section_type, keywords in SECTION_TYPES.items():
|
|
60
|
+
if any(kw in combined for kw in keywords):
|
|
61
|
+
return section_type
|
|
62
|
+
return "other"
|