npm - rebly-sections - Versions diffs - 1.0.1 → 1.2.0 - Mend

rebly-sections 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

package/assets/scripts/kb-analyzer-helpers.py ADDED Viewed

@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+"""Parse/extract functions for kb-analyzer.py — schema, CSS, Liquid, HTML extraction."""
+import importlib.util
+import json
+import re
+from pathlib import Path
+# Import classify_section from shared constants
+_constants_path = Path(__file__).parent / 'kb-constants.py'
+_cspec = importlib.util.spec_from_file_location('kb_constants', _constants_path)
+_constants = importlib.util.module_from_spec(_cspec)
+_cspec.loader.exec_module(_constants)
+classify_section = _constants.classify_section
+# ─── Schema Extraction ────────────────────────────────────────────────────────
+SCHEMA_RE = re.compile(r'\{%-?\s*schema\s*-?%\}(.*?)\{%-?\s*endschema\s*-?%\}', re.DOTALL)
+def extract_schema_data(content: str) -> dict:
+    """Extract structured data from {% schema %} block."""
+    match = SCHEMA_RE.search(content)
+    if not match:
+        return {}
+    try:
+        schema = json.loads(match.group(1).strip())
+    except json.JSONDecodeError:
+        return {}
+    settings = schema.get('settings', [])
+    blocks = schema.get('blocks', [])
+    return {
+        'schema_name': str(schema.get('name', '')) if not isinstance(schema.get('name'), dict) else str(next(iter(schema.get('name', {}).values()), '')),
+        'settings_count': len([s for s in settings if isinstance(s, dict) and s.get('type')]),
+        'blocks_count': len(blocks),
+        'setting_types': sorted(set(s.get('type', '') for s in settings if isinstance(s, dict) and s.get('type'))),
+        'setting_ids': [s.get('id', '') for s in settings if isinstance(s, dict) and s.get('id')],
+        'block_types': sorted(set(b.get('type', '') for b in blocks if isinstance(b, dict) and b.get('type'))),
+        'has_presets': bool(schema.get('presets')),
+        'has_app_block': any(b.get('type') == '@app' for b in blocks if isinstance(b, dict)),
+    }
+# ─── CSS Extraction ──────────────────────────────────────────────────────────
+STYLE_RE = re.compile(r'<style[^>]*>(.*?)</style>', re.DOTALL | re.IGNORECASE)
+CSS_VAR_RE = re.compile(r'var\((--[\w-]+)\)')
+BREAKPOINT_RE = re.compile(r'@media[^{]*?(\d{3,4})px')
+CSS_CLASS_RE = re.compile(r'\.([\w-]+)\s*[{,]')
+def extract_css_data(content: str) -> dict:
+    """Extract CSS variables, breakpoints, classes from <style> blocks."""
+    style_match = STYLE_RE.search(content)
+    css = style_match.group(1) if style_match else ''
+    css_vars = CSS_VAR_RE.findall(css)
+    prefixes = sorted(set(
+        re.match(r'(--[\w]+-)', v).group(1)
+        for v in css_vars if re.match(r'(--[\w]+-)', v)
+    ))
+    breakpoints = sorted(set(int(m) for m in BREAKPOINT_RE.findall(css)))
+    classes = sorted(set(CSS_CLASS_RE.findall(css)))[:20]  # cap to avoid noise
+    return {
+        'css_vars': sorted(set(css_vars)),
+        'css_var_prefixes': prefixes,
+        'breakpoints': breakpoints,
+        'css_classes': classes,
+    }
+# ─── Liquid Tag Extraction ───────────────────────────────────────────────────
+LIQUID_TAG_RE = re.compile(r'\{%-?\s*([\w]+)')
+KNOWN_TAGS = frozenset({
+    'if', 'unless', 'elsif', 'else', 'endif', 'for', 'endfor',
+    'assign', 'capture', 'endcapture', 'render', 'include',
+    'form', 'endform', 'paginate', 'endpaginate', 'case', 'when',
+    'endcase', 'comment', 'endcomment', 'liquid', 'echo', 'cycle',
+    'tablerow', 'schema', 'endschema', 'style', 'endstyle',
+})
+def extract_liquid_tags(content: str) -> list:
+    """Extract known Liquid tags used in content."""
+    found = set(LIQUID_TAG_RE.findall(content))
+    return sorted(found & KNOWN_TAGS)
+# ─── HTML Element Extraction ─────────────────────────────────────────────────
+HTML_ELEMENT_RE = re.compile(
+    r'<(section|div|h[1-6]|p|a|img|video|button|span|ul|ol|li|'
+    r'form|input|textarea|nav|header|footer|main|article|aside)\b',
+    re.IGNORECASE
+)
+def extract_html_elements(content: str) -> list:
+    """Extract HTML element types used."""
+    return sorted(set(m.lower() for m in HTML_ELEMENT_RE.findall(content)))
+# ─── Full Section Parser ─────────────────────────────────────────────────────
+def parse_section(liquid_path: Path, theme_slug: str) -> dict | None:
+    """Parse a .liquid file and return structured record for analysis.jsonl.
+    Returns None if the file has no valid schema block.
+    """
+    try:
+        content = liquid_path.read_text(encoding='utf-8', errors='replace')
+    except Exception:
+        return None
+    schema = extract_schema_data(content)
+    if not schema:
+        return None  # skip files without schema
+    css = extract_css_data(content)
+    tags = extract_liquid_tags(content)
+    elements = extract_html_elements(content)
+    schema_name = schema.get('schema_name', '')
+    # Handle i18n names like {"en": "Hero"} — extract first string value
+    if isinstance(schema_name, dict):
+        schema_name = str(next(iter(schema_name.values()), ''))
+    category = classify_section(liquid_path.stem, str(schema_name))
+    return {
+        'theme_slug': theme_slug,
+        'filename': liquid_path.name,
+        'category': category,
+        **schema,
+        **css,
+        'liquid_tags': tags,
+        'html_elements': elements,
+    }

package/assets/scripts/kb-analyzer.py ADDED Viewed

@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+"""
+KB Analyzer — parse extracted sections, produce analysis.jsonl + aggregate-stats.json.
+Usage:
+    python3 kb-analyzer.py --input themes/extracted/ --output themes/
+    python3 kb-analyzer.py --input themes/extracted/ --output themes/ --resume
+"""
+import argparse
+import importlib.util
+import json
+import sys
+from collections import Counter, defaultdict
+from datetime import datetime, timezone
+from pathlib import Path
+# Import parse helpers
+_helpers_path = Path(__file__).parent / 'kb-analyzer-helpers.py'
+_hspec = importlib.util.spec_from_file_location('kb_analyzer_helpers', _helpers_path)
+_helpers = importlib.util.module_from_spec(_hspec)
+_hspec.loader.exec_module(_helpers)
+parse_section = _helpers.parse_section
+def load_existing_keys(jsonl_path: Path) -> set:
+    """Load existing theme_slug+filename keys from JSONL for resume support."""
+    keys = set()
+    if not jsonl_path.exists():
+        return keys
+    with open(jsonl_path, 'r', encoding='utf-8') as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                rec = json.loads(line)
+                keys.add(f"{rec['theme_slug']}:{rec['filename']}")
+            except (json.JSONDecodeError, KeyError):
+                pass
+    return keys
+def analyze_all(input_dir: Path, output_dir: Path, resume: bool = True):
+    """Walk all extracted themes, parse sections, write JSONL + aggregate stats."""
+    jsonl_path = output_dir / 'analysis.jsonl'
+    stats_path = output_dir / 'aggregate-stats.json'
+    # Resume: load already-processed keys
+    existing_keys = load_existing_keys(jsonl_path) if resume else set()
+    if existing_keys:
+        print(f"Resume: {len(existing_keys)} records already processed")
+    # Find all theme directories
+    theme_dirs = sorted(d for d in input_dir.iterdir()
+                        if d.is_dir() and (d / 'sections').exists())
+    total_themes = len(theme_dirs)
+    total_sections = 0
+    skipped = 0
+    errors = 0
+    # Stream-write JSONL
+    mode = 'a' if resume and existing_keys else 'w'
+    with open(jsonl_path, mode, encoding='utf-8') as jf:
+        for i, theme_dir in enumerate(theme_dirs, 1):
+            theme_slug = theme_dir.name
+            sections = sorted((theme_dir / 'sections').glob('*.liquid'))
+            theme_count = 0
+            for liquid in sections:
+                key = f"{theme_slug}:{liquid.name}"
+                if key in existing_keys:
+                    skipped += 1
+                    continue
+                record = parse_section(liquid, theme_slug)
+                if record:
+                    jf.write(json.dumps(record, ensure_ascii=False) + '\n')
+                    total_sections += 1
+                    theme_count += 1
+                else:
+                    errors += 1
+            if i % 50 == 0 or i == total_themes:
+                print(f"[{i}/{total_themes}] {theme_slug}: {theme_count} sections")
+    print(f"\nAnalysis complete: {total_sections} new records, {skipped} resumed, {errors} parse errors")
+    # Generate aggregate stats from full JSONL
+    aggregate_stats(jsonl_path, stats_path)
+def aggregate_stats(jsonl_path: Path, stats_path: Path):
+    """Read JSONL, compute per-category frequency stats, write JSON."""
+    cat_setting_types = defaultdict(Counter)
+    cat_setting_ids = defaultdict(Counter)
+    cat_block_combos = defaultdict(Counter)
+    cat_css_prefixes = defaultdict(Counter)
+    cat_breakpoints = defaultdict(Counter)
+    cat_counts = Counter()
+    total = 0
+    with open(jsonl_path, 'r', encoding='utf-8') as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                rec = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            total += 1
+            cat = rec.get('category', 'other')
+            cat_counts[cat] += 1
+            for st in rec.get('setting_types', []):
+                cat_setting_types[cat][st] += 1
+            for sid in rec.get('setting_ids', []):
+                cat_setting_ids[cat][sid] += 1
+            block_combo = '|'.join(sorted(rec.get('block_types', [])))
+            if block_combo:
+                cat_block_combos[cat][block_combo] += 1
+            for prefix in rec.get('css_var_prefixes', []):
+                cat_css_prefixes[cat][prefix] += 1
+            for bp in rec.get('breakpoints', []):
+                cat_breakpoints[cat][str(bp)] += 1
+    # Build output
+    categories = {}
+    for cat in sorted(cat_counts.keys()):
+        count = cat_counts[cat]
+        categories[cat] = {
+            'count': count,
+            'top_setting_types': [
+                {'type': t, 'count': c, 'pct': int(c / count * 100)}
+                for t, c in cat_setting_types[cat].most_common(20)
+            ],
+            'top_setting_ids': [
+                {'id': sid, 'count': c}
+                for sid, c in cat_setting_ids[cat].most_common(20)
+            ],
+            'top_block_combos': [
+                {'combo': combo, 'count': c}
+                for combo, c in cat_block_combos[cat].most_common(10)
+            ],
+            'top_css_var_prefixes': [
+                {'prefix': p, 'count': c}
+                for p, c in cat_css_prefixes[cat].most_common(10)
+            ],
+            'breakpoint_usage': dict(cat_breakpoints[cat].most_common(10)),
+        }
+    output = {
+        'generated_at': datetime.now(timezone.utc).isoformat(),
+        'total_sections': total,
+        'categories': categories,
+    }
+    stats_path.write_text(json.dumps(output, indent=2, ensure_ascii=False), encoding='utf-8')
+    print(f"Aggregate stats: {total} sections across {len(categories)} categories -> {stats_path}")
+def main():
+    parser = argparse.ArgumentParser(description="Analyze extracted Shopify sections")
+    parser.add_argument('--input', default='themes/extracted/', help='Extracted themes dir')
+    parser.add_argument('--output', default='themes/', help='Output dir for JSONL + stats')
+    parser.add_argument('--resume', action='store_true', default=True,
+                        help='Skip already-analyzed sections (default: on)')
+    parser.add_argument('--no-resume', action='store_true', help='Re-analyze everything')
+    args = parser.parse_args()
+    input_dir = Path(args.input)
+    output_dir = Path(args.output)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    if not input_dir.exists():
+        print(f"Input dir not found: {input_dir}")
+        sys.exit(1)
+    analyze_all(input_dir, output_dir, resume=not args.no_resume)
+if __name__ == '__main__':
+    main()

package/assets/scripts/kb-builder.py CHANGED Viewed

@@ -9,67 +9,20 @@ Usage:
 import argparse
 import csv
+import importlib.util
 import json
 import re
 import shutil
 from pathlib import Path
-# ─── Section Type Classification ──────────────────────────────────────────────
-SECTION_TYPES = {
-    "hero":          ["hero", "banner", "slider", "slideshow", "cover", "header-image", "fullscreen"],
-    "about":         ["about", "team", "story", "brand", "mission", "who-we-are"],
-    "features":      ["feature", "benefit", "highlight", "why-us", "column", "multi-column"],
-    "testimonial":   ["testimonial", "review", "quote", "feedback", "customer"],
-    "faq":           ["faq", "question", "accordion", "help", "support"],
-    "cta":           ["cta", "call-to-action", "promo", "promotion", "offer"],
-    "newsletter":    ["newsletter", "email", "subscribe", "signup", "form"],
-    "product-grid":  ["product", "collection", "shop", "catalog", "grid"],
-    "blog":          ["blog", "article", "post", "news", "journal"],
-    "countdown":     ["countdown", "timer", "hurry", "limited", "sale-end"],
-    "trust":         ["trust", "badge", "guarantee", "security", "payment"],
-    "stats":         ["stat", "counter", "number", "metric", "achievement"],
-    "logo":          ["logo", "brand", "partner", "client", "sponsor"],
-    "gallery":       ["gallery", "image", "photo", "portfolio", "masonry"],
-    "video":         ["video", "youtube", "vimeo", "embed", "media"],
-    "pricing":       ["pricing", "plan", "tier", "subscription"],
-    "map":           ["map", "location", "store", "contact", "direction"],
-    "announcement":  ["announcement", "bar", "notification", "alert", "strip"],
-    "rich-text":     ["rich-text", "text", "content", "wysiwyg", "body"],
-    "collection":    ["collection", "category", "department", "browse"],
-}
-EXPECTED_SETTINGS = {
-    "hero":        ["heading", "subheading", "image", "button_label", "button_link", "color_scheme", "padding", "min_height"],
-    "about":       ["heading", "content", "image", "layout", "color_scheme", "padding"],
-    "features":    ["heading", "columns", "color_scheme", "padding"],
-    "testimonial": ["heading", "color_scheme", "padding"],
-    "faq":         ["heading", "color_scheme", "padding"],
-    "cta":         ["heading", "button_label", "button_link", "color_scheme", "padding"],
-    "newsletter":  ["heading", "button_label", "color_scheme", "padding"],
-    "product-grid":["collection", "products_per_row", "color_scheme", "padding"],
-    "blog":        ["blog", "posts_shown", "color_scheme", "padding"],
-    "countdown":   ["heading", "end_date", "color_scheme", "padding"],
-    "video":       ["video_url", "heading", "color_scheme", "padding"],
-    "pricing":     ["heading", "color_scheme", "padding"],
-    "gallery":     ["heading", "columns", "color_scheme", "padding"],
-    "stats":       ["heading", "color_scheme", "padding"],
-    "logo":        ["heading", "color_scheme", "padding"],
-    "announcement":["text", "link", "link_text", "color_scheme"],
-    "rich-text":   ["heading", "content", "color_scheme", "padding"],
-    "collection":  ["heading", "color_scheme", "padding"],
-    "trust":       ["heading", "color_scheme", "padding"],
-    "map":         ["heading", "address", "color_scheme", "padding"],
-}
-def classify_section(filename: str, schema_name: str) -> str:
-    """Classify section type based on filename + schema name."""
-    combined = (filename + " " + schema_name).lower().replace('-', ' ').replace('_', ' ')
-    for section_type, keywords in SECTION_TYPES.items():
-        if any(kw in combined for kw in keywords):
-            return section_type
-    return "other"
+# Import shared constants (DRY)
+_constants_path = Path(__file__).parent / 'kb-constants.py'
+_cspec = importlib.util.spec_from_file_location('kb_constants', _constants_path)
+_constants = importlib.util.module_from_spec(_cspec)
+_cspec.loader.exec_module(_constants)
+SECTION_TYPES = _constants.SECTION_TYPES
+EXPECTED_SETTINGS = _constants.EXPECTED_SETTINGS
+classify_section = _constants.classify_section
 def score_completeness(section_type: str, setting_ids: list) -> int:
@@ -187,14 +140,23 @@ def save_index(index_path: Path, index: dict):
         writer.writerows(rows)
+SCHEMA_FIELDNAMES = ['No', 'Type', 'Category', 'Returns', 'Keywords', 'Required Props',
+                     'Optional Props', 'Default Behavior', 'JSON Example', 'Constraints', 'Notes']
 def save_schema_library(schema_path: Path, new_settings: list):
     """Append unique schema setting types to schema-library.csv."""
     existing_types = set()
-    if schema_path.exists():
+    max_no = 0
+    if schema_path.exists() and schema_path.stat().st_size > 0:
         with open(schema_path, newline='', encoding='utf-8') as f:
             reader = csv.DictReader(f)
             for row in reader:
                 existing_types.add(row.get('Type', ''))
+                try:
+                    max_no = max(max_no, int(row.get('No', 0)))
+                except (ValueError, TypeError):
+                    pass
     new_rows = []
     seen = set(existing_types)
@@ -203,24 +165,28 @@ def save_schema_library(schema_path: Path, new_settings: list):
             continue
         t = s.get('type', '')
         if t and t not in seen:
+            max_no += 1
             new_rows.append({
+                'No': max_no,
                 'Type': t,
                 'Category': 'basic',
                 'Returns': '',
-                'Label': s.get('label', ''),
+                'Keywords': t,
                 'Required Props': '',
                 'Optional Props': str(s.get('info', '')),
-                'Default': str(s.get('default', '')),
+                'Default Behavior': str(s.get('default', '')),
                 'JSON Example': json.dumps({k: s.get(k) for k in ['type', 'id', 'label'] if s.get(k)}, ensure_ascii=False),
+                'Constraints': '',
                 'Notes': '',
-                'Use Case': '',
-                'Keywords': t,
             })
             seen.add(t)
     if new_rows:
+        file_is_empty = not schema_path.exists() or schema_path.stat().st_size == 0
         with open(schema_path, 'a', newline='', encoding='utf-8') as f:
-            writer = csv.DictWriter(f, fieldnames=new_rows[0].keys())
+            writer = csv.DictWriter(f, fieldnames=SCHEMA_FIELDNAMES)
+            if file_is_empty:
+                writer.writeheader()
             for row in new_rows:
                 writer.writerow(row)
@@ -263,6 +229,9 @@ def process_theme(theme_dir: Path, kb_dir: Path, stats: dict):
         dest = component_dir / f"{slug}.liquid"
         dest.write_text(parsed['content'], encoding='utf-8')
+        # Capture membership BEFORE mutation for accurate stats
+        is_new = slug not in index
         # Update index
         index[slug] = {
             'No': row_no,
@@ -284,7 +253,7 @@ def process_theme(theme_dir: Path, kb_dir: Path, stats: dict):
         # Update schema library
         save_schema_library(schema_path, parsed['settings'])
-        stats['added' if slug not in index else 'updated'] += 1
+        stats['added' if is_new else 'updated'] += 1
         action = '🆕' if next_no > max((int(r.get('No', 0) or 0) for r in index.values() if r.get('Slug') != slug), default=0) else '⬆️'
         print(f"  {action} [{section_type:15s}] {slug} (completeness: {completeness}%)")

package/assets/scripts/kb-constants.py ADDED Viewed

@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+"""Shared constants for KB pipeline scripts (DRY: single source of truth)."""
+# Section type classification keywords
+SECTION_TYPES = {
+    "hero":          ["hero", "banner", "slider", "slideshow", "cover", "header-image", "fullscreen"],
+    "about":         ["about", "team", "story", "brand", "mission", "who-we-are"],
+    "features":      ["feature", "benefit", "highlight", "why-us", "column", "multi-column"],
+    "testimonial":   ["testimonial", "review", "quote", "feedback", "customer"],
+    "faq":           ["faq", "question", "accordion", "help", "support"],
+    "cta":           ["cta", "call-to-action", "promo", "promotion", "offer"],
+    "newsletter":    ["newsletter", "email", "subscribe", "signup", "form"],
+    "product-grid":  ["product", "collection", "shop", "catalog", "grid"],
+    "blog":          ["blog", "article", "post", "news", "journal"],
+    "countdown":     ["countdown", "timer", "hurry", "limited", "sale-end"],
+    "trust":         ["trust", "badge", "guarantee", "security", "payment"],
+    "stats":         ["stat", "counter", "number", "metric", "achievement"],
+    "logo":          ["logo", "brand", "partner", "client", "sponsor"],
+    "gallery":       ["gallery", "image", "photo", "portfolio", "masonry"],
+    "video":         ["video", "youtube", "vimeo", "embed", "media"],
+    "pricing":       ["pricing", "plan", "tier", "subscription"],
+    "map":           ["map", "location", "store", "contact", "direction"],
+    "announcement":  ["announcement", "bar", "notification", "alert", "strip"],
+    "rich-text":     ["rich-text", "text", "content", "wysiwyg", "body"],
+    "collection":    ["collection", "category", "department", "browse"],
+    "header":        ["header", "navigation", "nav", "menu", "topbar"],
+    "footer":        ["footer", "bottom", "copyright"],
+    "sidebar":       ["sidebar", "widget", "aside"],
+}
+# Expected settings per section type (for completeness scoring)
+EXPECTED_SETTINGS = {
+    "hero":        ["heading", "subheading", "image", "button_label", "button_link", "color_scheme", "padding", "min_height"],
+    "about":       ["heading", "content", "image", "layout", "color_scheme", "padding"],
+    "features":    ["heading", "columns", "color_scheme", "padding"],
+    "testimonial": ["heading", "color_scheme", "padding"],
+    "faq":         ["heading", "color_scheme", "padding"],
+    "cta":         ["heading", "button_label", "button_link", "color_scheme", "padding"],
+    "newsletter":  ["heading", "button_label", "color_scheme", "padding"],
+    "product-grid":["collection", "products_per_row", "color_scheme", "padding"],
+    "blog":        ["blog", "posts_shown", "color_scheme", "padding"],
+    "countdown":   ["heading", "end_date", "color_scheme", "padding"],
+    "video":       ["video_url", "heading", "color_scheme", "padding"],
+    "pricing":     ["heading", "color_scheme", "padding"],
+    "gallery":     ["heading", "columns", "color_scheme", "padding"],
+    "stats":       ["heading", "color_scheme", "padding"],
+    "logo":        ["heading", "color_scheme", "padding"],
+    "announcement":["text", "link", "link_text", "color_scheme"],
+    "rich-text":   ["heading", "content", "color_scheme", "padding"],
+    "collection":  ["heading", "color_scheme", "padding"],
+    "trust":       ["heading", "color_scheme", "padding"],
+    "map":         ["heading", "address", "color_scheme", "padding"],
+}
+def classify_section(filename: str, schema_name: str) -> str:
+    """Classify section type based on filename + schema name."""
+    combined = (filename + " " + schema_name).lower().replace('-', ' ').replace('_', ' ')
+    for section_type, keywords in SECTION_TYPES.items():
+        if any(kw in combined for kw in keywords):
+            return section_type
+    return "other"