rebly-sections 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/assets/data/block-patterns.csv +160 -0
  2. package/assets/data/component-library/GDPR-modal.liquid +183 -0
  3. package/assets/data/component-library/Ishi_parallaxblockstyle1.liquid +331 -0
  4. package/assets/data/component-library/_index.csv +157 -19
  5. package/assets/data/component-library/about.liquid +1557 -0
  6. package/assets/data/component-library/adv-header.liquid +344 -0
  7. package/assets/data/component-library/adv-navigation.liquid +542 -0
  8. package/assets/data/component-library/announcement-bar.liquid +42 -60
  9. package/assets/data/component-library/article.liquid +242 -0
  10. package/assets/data/component-library/axeo-perfume-cosmetics-store-shopify-theme-about.liquid +1557 -0
  11. package/assets/data/component-library/basel-gl_newsletter_pets.liquid +612 -0
  12. package/assets/data/component-library/bixbang-fullpackage-collection-template.liquid +990 -0
  13. package/assets/data/component-library/blog-sidebar-article.liquid +51 -0
  14. package/assets/data/component-library/blog-sidebar-deals.liquid +189 -0
  15. package/assets/data/component-library/blog-sidebar-instagram.liquid +126 -0
  16. package/assets/data/component-library/blog-sidebar-tags.liquid +30 -0
  17. package/assets/data/component-library/blog.liquid +371 -0
  18. package/assets/data/component-library/brands-page.liquid +114 -0
  19. package/assets/data/component-library/cake-shop-shopify-theme-for-bakery-and-cafe-home-support-blo.liquid +780 -0
  20. package/assets/data/component-library/collection-template-promotion.liquid +1139 -0
  21. package/assets/data/component-library/collection-template.liquid +146 -0
  22. package/assets/data/component-library/contact-us.liquid +663 -0
  23. package/assets/data/component-library/contact.liquid +256 -0
  24. package/assets/data/component-library/copyright_payment.liquid +95 -0
  25. package/assets/data/component-library/custom-content.liquid +832 -0
  26. package/assets/data/component-library/faq-template-3.liquid +1014 -0
  27. package/assets/data/component-library/footer-model-1.liquid +503 -0
  28. package/assets/data/component-library/footer-model-10.liquid +210 -0
  29. package/assets/data/component-library/footer-model-2.liquid +460 -0
  30. package/assets/data/component-library/footer-model-3.liquid +548 -0
  31. package/assets/data/component-library/footer-model-4.liquid +455 -0
  32. package/assets/data/component-library/footer-model-5.liquid +407 -0
  33. package/assets/data/component-library/footer-model-6.liquid +543 -0
  34. package/assets/data/component-library/footer-model-7.liquid +345 -0
  35. package/assets/data/component-library/footer-model-8.liquid +279 -0
  36. package/assets/data/component-library/footer-model-9.liquid +376 -0
  37. package/assets/data/component-library/gallery.liquid +236 -0
  38. package/assets/data/component-library/gecko-shopify-v5-7-6-nulled-manual_blog.liquid +720 -0
  39. package/assets/data/component-library/gl_newsletter_pets.liquid +612 -0
  40. package/assets/data/component-library/gp-logo-list.liquid +362 -0
  41. package/assets/data/component-library/grid-banner-type-3-b.liquid +655 -0
  42. package/assets/data/component-library/header-model-1.liquid +427 -0
  43. package/assets/data/component-library/header-model-10.liquid +599 -0
  44. package/assets/data/component-library/header-model-2.liquid +633 -0
  45. package/assets/data/component-library/header-model-3.liquid +415 -0
  46. package/assets/data/component-library/header-model-4.liquid +754 -0
  47. package/assets/data/component-library/header-model-5.liquid +562 -0
  48. package/assets/data/component-library/header-model-6.liquid +713 -0
  49. package/assets/data/component-library/header-model-7.liquid +743 -0
  50. package/assets/data/component-library/header-model-8.liquid +500 -0
  51. package/assets/data/component-library/header-model-9.liquid +506 -0
  52. package/assets/data/component-library/home-blog-posts-1.liquid +399 -0
  53. package/assets/data/component-library/home-blog-posts-2.liquid +393 -0
  54. package/assets/data/component-library/home-blog-posts-3.liquid +545 -0
  55. package/assets/data/component-library/home-brand-slider.liquid +224 -0
  56. package/assets/data/component-library/home-circled-block.liquid +332 -0
  57. package/assets/data/component-library/home-contact-block-1.liquid +395 -0
  58. package/assets/data/component-library/home-contact-block-2.liquid +372 -0
  59. package/assets/data/component-library/home-content-block-1.liquid +320 -0
  60. package/assets/data/component-library/home-donut-chart.liquid +335 -0
  61. package/assets/data/component-library/home-fade-in-banner.liquid +277 -0
  62. package/assets/data/component-library/home-faq-model.liquid +323 -0
  63. package/assets/data/component-library/home-featured-blog.liquid +1462 -0
  64. package/assets/data/component-library/home-featured-collections.liquid +484 -0
  65. package/assets/data/component-library/home-gallery-block1.liquid +276 -0
  66. package/assets/data/component-library/home-gallery-block2.liquid +396 -0
  67. package/assets/data/component-library/home-grid-banner-type-1.liquid +371 -0
  68. package/assets/data/component-library/home-grid-banner-type-2.liquid +362 -0
  69. package/assets/data/component-library/home-grid-banner-type-3.liquid +374 -0
  70. package/assets/data/component-library/home-grid-banner-type-4.liquid +900 -0
  71. package/assets/data/component-library/home-grid-banner-type-5.liquid +368 -0
  72. package/assets/data/component-library/home-grid-banner-type-6.liquid +382 -0
  73. package/assets/data/component-library/home-grid-banner-type-7.liquid +371 -0
  74. package/assets/data/component-library/home-hotspot-with-product-carousel.liquid +1425 -0
  75. package/assets/data/component-library/home-image-gallery.liquid +1087 -0
  76. package/assets/data/component-library/home-instagram.liquid +356 -0
  77. package/assets/data/component-library/home-newsletter.liquid +246 -0
  78. package/assets/data/component-library/home-number-counter.liquid +790 -0
  79. package/assets/data/component-library/home-price-table.liquid +416 -0
  80. package/assets/data/component-library/home-pricing-table.liquid +1076 -0
  81. package/assets/data/component-library/home-product-grid.liquid +413 -0
  82. package/assets/data/component-library/home-product-tab-1.liquid +528 -0
  83. package/assets/data/component-library/home-product-tab-2.liquid +342 -0
  84. package/assets/data/component-library/home-product-tab-3.liquid +357 -0
  85. package/assets/data/component-library/home-product-vertical-carousel.liquid +477 -0
  86. package/assets/data/component-library/home-quotes-1.liquid +274 -0
  87. package/assets/data/component-library/home-quotes-2.liquid +239 -0
  88. package/assets/data/component-library/home-quotes-3.liquid +244 -0
  89. package/assets/data/component-library/home-quotes-4.liquid +258 -0
  90. package/assets/data/component-library/home-slider-width-promo-images.liquid +1377 -0
  91. package/assets/data/component-library/home-slideshow-type-1.liquid +656 -0
  92. package/assets/data/component-library/home-slideshow-type-2.liquid +570 -0
  93. package/assets/data/component-library/home-specification-block-1.liquid +468 -0
  94. package/assets/data/component-library/home-specification-block-2.liquid +291 -0
  95. package/assets/data/component-library/home-specification-block-3.liquid +429 -0
  96. package/assets/data/component-library/home-support-block.liquid +392 -0
  97. package/assets/data/component-library/home-testimonial.liquid +1348 -0
  98. package/assets/data/component-library/home-video-banner.liquid +317 -0
  99. package/assets/data/component-library/home-wide-banner.liquid +327 -0
  100. package/assets/data/component-library/icon-with-content.liquid +478 -0
  101. package/assets/data/component-library/instafeed.liquid +1 -0
  102. package/assets/data/component-library/kea-ecommerce-interior-furniture-shopify-theme-about.liquid +1300 -0
  103. package/assets/data/component-library/kidslife-responsive-shopify-theme-home-number-counter.liquid +729 -0
  104. package/assets/data/component-library/logo-bar.liquid +314 -0
  105. package/assets/data/component-library/lookbook.liquid +367 -0
  106. package/assets/data/component-library/manual_blog.liquid +724 -0
  107. package/assets/data/component-library/navigation-etc.liquid +642 -0
  108. package/assets/data/component-library/newsletter.liquid +246 -0
  109. package/assets/data/component-library/order-form.liquid +96 -0
  110. package/assets/data/component-library/page-catev1-template.liquid +344 -0
  111. package/assets/data/component-library/popup_video.liquid +396 -0
  112. package/assets/data/component-library/product-sidebar-bestsellers.liquid +99 -0
  113. package/assets/data/component-library/product-sidebar-deals.liquid +158 -0
  114. package/assets/data/component-library/product-template-2.liquid +629 -0
  115. package/assets/data/component-library/product-template-3.liquid +670 -0
  116. package/assets/data/component-library/product-template-4.liquid +627 -0
  117. package/assets/data/component-library/product-template-5.liquid +652 -0
  118. package/assets/data/component-library/product-template.liquid +698 -0
  119. package/assets/data/component-library/rich-text.liquid +541 -0
  120. package/assets/data/component-library/section-countdown-v2.liquid +215 -0
  121. package/assets/data/component-library/services.liquid +596 -0
  122. package/assets/data/component-library/shipping_info.liquid +327 -0
  123. package/assets/data/component-library/sidebar-bestsellers.liquid +109 -0
  124. package/assets/data/component-library/sidebar-category.liquid +105 -0
  125. package/assets/data/component-library/sidebar-colors.liquid +104 -0
  126. package/assets/data/component-library/single_product_feature.liquid +1892 -0
  127. package/assets/data/component-library/social-links-menu.liquid +244 -0
  128. package/assets/data/component-library/someone-purchased.liquid +190 -0
  129. package/assets/data/component-library/special-offer-area.liquid +530 -0
  130. package/assets/data/component-library/theno-minimal-clean-watch-store-shopify-theme-page-catev1-te.liquid +344 -0
  131. package/assets/data/component-library/top-bar-type-1.liquid +200 -0
  132. package/assets/data/component-library/top-bar-type-10.liquid +395 -0
  133. package/assets/data/component-library/top-bar-type-11.liquid +395 -0
  134. package/assets/data/component-library/top-bar-type-2.liquid +106 -0
  135. package/assets/data/component-library/top-bar-type-3.liquid +205 -0
  136. package/assets/data/component-library/top-countdown-bar.liquid +116 -0
  137. package/assets/data/component-library/trixe-solar-responsive-shopify-template-home-image-gallery.liquid +783 -0
  138. package/assets/data/component-library/trixe-solar-responsive-shopify-template-home-pricing-table.liquid +1043 -0
  139. package/assets/data/component-library/trixe-solar-responsive-shopify-template-home-testimonial.liquid +1338 -0
  140. package/assets/data/component-library/video.liquid +511 -0
  141. package/assets/data/component-library/waffy-spices-dry-fruits-store-shopify-theme-v-1-1-contact-us.liquid +523 -0
  142. package/assets/data/design-tokens.csv +93 -57
  143. package/assets/data/schema-library.csv +48 -46
  144. package/assets/data/settings-profiles.csv +235 -0
  145. package/assets/data/shopify-best-practices.csv +58 -36
  146. package/assets/scripts/backfill-component-index.py +102 -0
  147. package/assets/scripts/core.py +30 -8
  148. package/assets/scripts/fix-schema-library.py +42 -0
  149. package/assets/scripts/kb-analyzer-helpers.py +136 -0
  150. package/assets/scripts/kb-analyzer.py +186 -0
  151. package/assets/scripts/kb-builder.py +32 -63
  152. package/assets/scripts/kb-constants.py +62 -0
  153. package/assets/scripts/kb-extractor-helpers.py +178 -0
  154. package/assets/scripts/kb-extractor.py +106 -170
  155. package/assets/scripts/kb-synthesizer.py +251 -0
  156. package/assets/scripts/quality-gate-checks.py +55 -0
  157. package/assets/scripts/quality-gate.py +56 -2
  158. package/assets/scripts/section-generator-helpers.py +74 -0
  159. package/assets/scripts/section-generator.py +59 -49
  160. package/assets/templates/generation-prompt.md +78 -14
  161. package/package.json +1 -1
@@ -0,0 +1,178 @@
1
+ #!/usr/bin/env python3
2
+ """Helpers for kb-extractor.py — zip handling, flat extraction, dedup, manifest, report."""
3
+
4
+ import hashlib
5
+ import json
6
+ import re
7
+ import shutil
8
+ import zipfile
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+
12
+
13
+ # ─── Zip Extraction Logic ────────────────────────────────────────────────────
14
+
15
+ def find_shopify_root(base: Path) -> Path | None:
16
+ """Find the directory that contains sections/*.liquid files."""
17
+ if (base / 'sections').exists() and list((base / 'sections').glob('*.liquid')):
18
+ return base
19
+ for child in sorted(base.iterdir()):
20
+ if child.is_dir() and (child / 'sections').exists():
21
+ if list((child / 'sections').glob('*.liquid')):
22
+ return child
23
+ return None
24
+
25
+
26
+ def extract_zip_safe(zip_path: Path, dest: Path) -> bool:
27
+ """Extract a zip file safely, filtering dangerous paths."""
28
+ try:
29
+ dest.mkdir(parents=True, exist_ok=True)
30
+ with zipfile.ZipFile(zip_path, 'r') as zf:
31
+ members = [m for m in zf.namelist()
32
+ if not m.startswith('__MACOSX')
33
+ and '..' not in m
34
+ and not m.startswith('/')]
35
+ zf.extractall(dest, members=members)
36
+ return True
37
+ except (zipfile.BadZipFile, Exception):
38
+ return False
39
+
40
+
41
+ def find_inner_zip(base: Path) -> Path | None:
42
+ """Find most likely theme zip: install-me-*.zip > largest non-demo zip."""
43
+ for f in base.rglob('install-me-*.zip'):
44
+ return f
45
+ zips = [f for f in base.rglob('*.zip') if '__MACOSX' not in str(f)]
46
+ if not zips:
47
+ return None
48
+ excluded = {'demo', 'assets', 'preview', 'documentation', 'docs', 'samples'}
49
+ candidates = [z for z in zips if not any(e in z.stem.lower() for e in excluded)]
50
+ if not candidates:
51
+ candidates = zips
52
+ return max(candidates, key=lambda z: z.stat().st_size)
53
+
54
+
55
+ def extract_theme(zip_path: Path, tmp_dir: Path) -> Path | None:
56
+ """Extract theme zip handling up to 3 levels of nesting."""
57
+ theme_name = zip_path.stem.lower().replace(' ', '-')[:50]
58
+ dest = tmp_dir / theme_name
59
+ if dest.exists():
60
+ shutil.rmtree(dest)
61
+
62
+ current_zip = zip_path
63
+ for level in range(3):
64
+ level_dest = dest / f'_level{level}'
65
+ if not extract_zip_safe(current_zip, level_dest):
66
+ return None
67
+ root = find_shopify_root(level_dest)
68
+ if root:
69
+ return root
70
+ inner = find_inner_zip(level_dest)
71
+ if not inner:
72
+ return None
73
+ current_zip = inner
74
+ return None
75
+
76
+
77
+ def theme_slug(zip_path: Path) -> str:
78
+ """Stable, filesystem-safe slug from zip filename."""
79
+ name = zip_path.stem.lower()
80
+ name = re.sub(r'[^a-z0-9]+', '-', name).strip('-')
81
+ return name[:60]
82
+
83
+
84
+ def content_hash(path: Path) -> str:
85
+ """SHA-256 first 16 hex chars for dedup."""
86
+ return hashlib.sha256(path.read_bytes()).hexdigest()[:16]
87
+
88
+
89
+ def already_extracted(out_dir: Path, slug: str) -> bool:
90
+ """Check if theme already extracted (has manifest.json)."""
91
+ theme_dir = out_dir / slug
92
+ return theme_dir.exists() and (theme_dir / 'manifest.json').exists()
93
+
94
+
95
+ def extract_sections_flat(theme_root: Path, out_dir: Path, slug: str,
96
+ source_zip: str, zip_type: str) -> dict:
97
+ """Copy unique sections from theme_root to out_dir/{slug}/sections/.
98
+
99
+ Returns per-theme stats dict.
100
+ """
101
+ sections_src = theme_root / 'sections'
102
+ theme_out = out_dir / slug / 'sections'
103
+ theme_out.mkdir(parents=True, exist_ok=True)
104
+
105
+ seen_hashes = set()
106
+ found = 0
107
+ unique = 0
108
+ deduped = 0
109
+
110
+ for liquid in sorted(sections_src.glob('*.liquid')):
111
+ found += 1
112
+ h = content_hash(liquid)
113
+ if h in seen_hashes:
114
+ deduped += 1
115
+ continue
116
+ seen_hashes.add(h)
117
+ shutil.copy2(liquid, theme_out / liquid.name)
118
+ unique += 1
119
+
120
+ # Write per-theme manifest
121
+ manifest = {
122
+ "slug": slug,
123
+ "source_zip": source_zip,
124
+ "zip_type": zip_type,
125
+ "sections_found": found,
126
+ "sections_unique": unique,
127
+ "sections_deduped": deduped,
128
+ "extracted_at": datetime.now(timezone.utc).isoformat(),
129
+ }
130
+ manifest_path = out_dir / slug / 'manifest.json'
131
+ manifest_path.write_text(json.dumps(manifest, indent=2), encoding='utf-8')
132
+
133
+ return manifest
134
+
135
+
136
+ def write_extraction_report(out_dir: Path, stats: dict, theme_manifests: list):
137
+ """Write themes/extraction-report.md with summary table."""
138
+ report = out_dir / 'extraction-report.md'
139
+
140
+ # Count by type
141
+ type_counts = {}
142
+ for m in theme_manifests:
143
+ t = m.get('zip_type', '?')
144
+ type_counts[t] = type_counts.get(t, 0) + 1
145
+
146
+ total_sections = sum(m.get('sections_unique', 0) for m in theme_manifests)
147
+ total_deduped = sum(m.get('sections_deduped', 0) for m in theme_manifests)
148
+
149
+ lines = [
150
+ "# Extraction Report",
151
+ f"\nGenerated: {datetime.now(timezone.utc).isoformat()}",
152
+ "",
153
+ "## Summary",
154
+ f"- Total zips processed: {stats.get('total', 0)}",
155
+ f"- Extracted: {stats.get('extracted', 0)}",
156
+ f"- Skipped (no sections): {stats.get('skipped', 0)}",
157
+ f"- Skipped (resume): {stats.get('resumed', 0)}",
158
+ f"- Failed: {stats.get('failed', 0)}",
159
+ f"- Unique sections: {total_sections}",
160
+ f"- Deduped (within-theme): {total_deduped}",
161
+ "",
162
+ "## Type Breakdown",
163
+ ]
164
+ for t, c in sorted(type_counts.items()):
165
+ lines.append(f"- Type {t}: {c}")
166
+
167
+ lines += [
168
+ "",
169
+ "## Top Themes by Section Count",
170
+ "| Theme | Sections | Type |",
171
+ "|-------|----------|------|",
172
+ ]
173
+ top = sorted(theme_manifests, key=lambda m: m.get('sections_unique', 0), reverse=True)[:20]
174
+ for m in top:
175
+ lines.append(f"| {m['slug']} | {m['sections_unique']} | {m['zip_type']} |")
176
+
177
+ report.write_text("\n".join(lines), encoding='utf-8')
178
+ return str(report)
@@ -19,91 +19,21 @@ import importlib.util
19
19
  import json
20
20
  import shutil
21
21
  import sys
22
- import zipfile
23
22
  from pathlib import Path
24
23
 
24
+ # Import all helpers (extraction logic, dedup, manifest, report)
25
+ _helpers_path = Path(__file__).parent / 'kb-extractor-helpers.py'
26
+ _hspec = importlib.util.spec_from_file_location('kb_extractor_helpers', _helpers_path)
27
+ _helpers = importlib.util.module_from_spec(_hspec)
28
+ _hspec.loader.exec_module(_helpers)
29
+ theme_slug_fn = _helpers.theme_slug
30
+ already_extracted = _helpers.already_extracted
31
+ extract_sections_flat = _helpers.extract_sections_flat
32
+ write_extraction_report = _helpers.write_extraction_report
33
+ extract_theme = _helpers.extract_theme
25
34
 
26
35
  TMP_DIR = Path('themes/tmp')
27
36
 
28
- # ─── Extraction Logic ─────────────────────────────────────────────────────────
29
-
30
- def find_shopify_root(base: Path) -> Path | None:
31
- """Find the directory that contains sections/*.liquid files."""
32
- # Check base itself
33
- if (base / 'sections').exists() and list((base / 'sections').glob('*.liquid')):
34
- return base
35
- # Check one level deep
36
- for child in sorted(base.iterdir()):
37
- if child.is_dir() and (child / 'sections').exists():
38
- if list((child / 'sections').glob('*.liquid')):
39
- return child
40
- return None
41
-
42
-
43
- def extract_zip_safe(zip_path: Path, dest: Path) -> bool:
44
- """Extract a zip file safely, return True if successful."""
45
- try:
46
- dest.mkdir(parents=True, exist_ok=True)
47
- with zipfile.ZipFile(zip_path, 'r') as zf:
48
- # Filter out dangerous paths and MACOSX junk
49
- members = [m for m in zf.namelist()
50
- if not m.startswith('__MACOSX')
51
- and '..' not in m
52
- and not m.startswith('/')]
53
- zf.extractall(dest, members=members)
54
- return True
55
- except zipfile.BadZipFile:
56
- return False
57
- except Exception as e:
58
- print(f" āš ļø Extract error: {e}")
59
- return False
60
-
61
-
62
- def find_inner_zip(base: Path) -> Path | None:
63
- """Find the first inner zip file (install-me pattern or any zip)."""
64
- # Prefer install-me-*.zip pattern
65
- for f in base.rglob('install-me-*.zip'):
66
- return f
67
- # Fallback: any zip not in __MACOSX
68
- for f in base.rglob('*.zip'):
69
- if '__MACOSX' not in str(f):
70
- return f
71
- return None
72
-
73
-
74
- def extract_theme(zip_path: Path, tmp_dir: Path) -> Path | None:
75
- """
76
- Extract a theme zip to tmp_dir, handling all 3 types.
77
- Returns the path to the Shopify theme root (containing sections/) or None.
78
- """
79
- theme_name = zip_path.stem.lower().replace(' ', '-')[:50]
80
- dest = tmp_dir / theme_name
81
- # Clean previous attempt
82
- if dest.exists():
83
- shutil.rmtree(dest)
84
-
85
- # Level 1 extract
86
- if not extract_zip_safe(zip_path, dest):
87
- return None
88
-
89
- # Check if already a Shopify theme (Type A)
90
- root = find_shopify_root(dest)
91
- if root:
92
- return root
93
-
94
- # Level 2: find and extract inner zip (Type B & C)
95
- inner = find_inner_zip(dest)
96
- if not inner:
97
- return None
98
-
99
- inner_dest = dest / '_inner'
100
- if not extract_zip_safe(inner, inner_dest):
101
- return None
102
-
103
- root = find_shopify_root(inner_dest)
104
- return root
105
-
106
-
107
37
  # ─── KB Builder Integration ───────────────────────────────────────────────────
108
38
 
109
39
  def load_kb_builder(scripts_dir: Path):
@@ -119,46 +49,31 @@ def load_kb_builder(scripts_dir: Path):
119
49
 
120
50
  # ─── Main ─────────────────────────────────────────────────────────────────────
121
51
 
122
- def process_zip(zip_path: Path, kb_dir: Path, tmp_dir: Path, kb_builder, stats: dict, dry_run: bool = False):
123
- """Extract a single zip, build KB from it, cleanup."""
124
- print(f"\nšŸ“¦ {zip_path.name}")
52
+ def build_queue(args, allowed_types, skip_collections):
53
+ """Build the zip processing queue from manifest, source, or single zip."""
54
+ queue = [] # list of (zip_path, zip_type, source_str)
125
55
 
126
- if dry_run:
127
- print(f" [DRY RUN] Would extract: {zip_path.name}")
128
- stats['dry_run'] += 1
129
- return
130
-
131
- theme_root = extract_theme(zip_path, tmp_dir)
56
+ if args.zip:
57
+ p = Path(args.zip)
58
+ queue.append((p, '?', str(p)))
132
59
 
133
- if not theme_root:
134
- print(f" āŒ No valid Shopify sections found — skipping")
135
- stats['skipped'] += 1
136
- # Cleanup failed extraction
137
- theme_tmp = tmp_dir / zip_path.stem.lower().replace(' ', '-')[:50]
138
- if theme_tmp.exists():
139
- shutil.rmtree(theme_tmp, ignore_errors=True)
140
- return
60
+ elif args.manifest:
61
+ manifest = json.loads(Path(args.manifest).read_text(encoding='utf-8'))
62
+ source_base = Path(manifest.get('source', '.'))
63
+ for coll_name, coll_data in manifest.get('collections', {}).items():
64
+ if coll_name in skip_collections:
65
+ continue
66
+ for entry in coll_data.get('files', []):
67
+ if entry.get('type') in allowed_types:
68
+ zip_path = source_base / entry['path']
69
+ if zip_path.exists():
70
+ queue.append((zip_path, entry['type'], entry['path']))
141
71
 
142
- sections = list((theme_root / 'sections').glob('*.liquid'))
143
- print(f" āœ… Found {len(sections)} sections → building KB...")
144
- stats['found'] += 1
145
-
146
- # Run kb-builder
147
- if kb_builder:
148
- bstats = {'added': 0, 'updated': 0, 'skipped': 0}
149
- kb_builder.process_theme(theme_root, kb_dir, bstats)
150
- stats['sections_added'] += bstats['added']
151
- stats['sections_updated'] += bstats['updated']
152
- print(f" šŸ“ +{bstats['added']} new, ↑{bstats['updated']} updated, ā­ {bstats['skipped']} skipped")
153
- else:
154
- print(f" āš ļø kb-builder not found — extracted to {theme_root} (not cleaning up)")
155
- return
72
+ elif args.source:
73
+ source = Path(args.source)
74
+ queue = [(f, '?', str(f)) for f in source.rglob('*.zip')]
156
75
 
157
- # Cleanup to save disk space
158
- parent = theme_tmp = tmp_dir / zip_path.stem.lower().replace(' ', '-')[:50]
159
- if parent.exists():
160
- shutil.rmtree(parent, ignore_errors=True)
161
- print(f" šŸ—‘ļø Cleaned up temp files")
76
+ return queue
162
77
 
163
78
 
164
79
  def main():
@@ -167,80 +82,101 @@ def main():
167
82
  parser.add_argument('--zip', help='Path to single zip file to process')
168
83
  parser.add_argument('--source', help='Path to shopify-theme-data/ (process all zips directly)')
169
84
  parser.add_argument('--kb', default='src/rebly-sections/data/', help='Path to KB data dir')
85
+ parser.add_argument('--out', default='', help='Flat extraction output dir (e.g. themes/extracted/)')
170
86
  parser.add_argument('--tmp', default='themes/tmp/', help='Temp extraction dir')
171
87
  parser.add_argument('--batch-size', type=int, default=10, help='Process N zips at a time')
172
88
  parser.add_argument('--dry-run', action='store_true', help='Preview without extracting')
173
89
  parser.add_argument('--types', default='A,B,C', help='Zip types to process (A,B,C)')
90
+ parser.add_argument('--skip-types', nargs='*', default=['WP', 'RAR', 'INVALID'],
91
+ help='Zip types to skip')
92
+ parser.add_argument('--skip-collections', nargs='*', default=[
93
+ '150+ Shopify Codes', '150+ Shopify Liquid Codes'],
94
+ help='Collection names to skip entirely')
95
+ parser.add_argument('--resume', action='store_true', default=True,
96
+ help='Skip already-extracted themes (default: on)')
174
97
  args = parser.parse_args()
175
98
 
176
99
  kb_dir = Path(args.kb)
177
100
  tmp_dir = Path(args.tmp)
178
101
  tmp_dir.mkdir(parents=True, exist_ok=True)
102
+ out_dir = Path(args.out) if args.out else None
103
+ if out_dir:
104
+ out_dir.mkdir(parents=True, exist_ok=True)
179
105
 
180
- # Load kb-builder
106
+ # Load kb-builder (only needed for legacy KB mode, not flat extraction)
181
107
  scripts_dir = Path(__file__).parent
182
- kb_builder = load_kb_builder(scripts_dir)
183
- if not kb_builder and not args.dry_run:
184
- print("āŒ kb-builder.py not found in scripts dir. Run from correct directory.")
185
- sys.exit(1)
186
-
187
- allowed_types = set(args.types.upper().split(','))
188
- stats = {'found': 0, 'skipped': 0, 'dry_run': 0, 'sections_added': 0, 'sections_updated': 0}
108
+ kb_builder = load_kb_builder(scripts_dir) if not out_dir else None
189
109
 
190
- # Build zip queue
191
- queue = []
110
+ allowed_types = set(args.types.upper().split(',')) - set(args.skip_types)
111
+ skip_colls = set(args.skip_collections) if args.skip_collections else set()
112
+ queue = build_queue(args, allowed_types, skip_colls)
192
113
 
193
- if args.zip:
194
- queue = [Path(args.zip)]
195
-
196
- elif args.manifest:
197
- manifest = json.loads(Path(args.manifest).read_text(encoding='utf-8'))
198
- source_base = Path(manifest.get('source', '.'))
199
- for coll_name, coll_data in manifest.get('collections', {}).items():
200
- for entry in coll_data.get('files', []):
201
- if entry.get('type') in allowed_types:
202
- zip_path = source_base / entry['path']
203
- if zip_path.exists():
204
- queue.append(zip_path)
205
-
206
- elif args.source:
207
- source = Path(args.source)
208
- queue = [f for f in source.rglob('*.zip')]
209
-
210
- else:
114
+ if not queue:
211
115
  parser.print_help()
212
116
  return
213
117
 
214
118
  total = len(queue)
215
- print(f"šŸš€ Processing {total} zip files (batch size: {args.batch_size})")
216
-
217
- # Process in batches
218
- for i in range(0, total, args.batch_size):
219
- batch = queue[i:i + args.batch_size]
220
- print(f"\n{'='*50}")
221
- print(f"Batch {i // args.batch_size + 1} of {(total + args.batch_size - 1) // args.batch_size}")
222
- print(f"{'='*50}")
223
-
224
- for zip_path in batch:
225
- process_zip(zip_path, kb_dir, tmp_dir, kb_builder, stats, args.dry_run)
119
+ stats = {'total': total, 'extracted': 0, 'skipped': 0, 'resumed': 0,
120
+ 'failed': 0, 'sections_added': 0, 'sections_updated': 0}
121
+ theme_manifests = []
122
+
123
+ print(f"Processing {total} zip files (batch size: {args.batch_size})")
124
+
125
+ for i, (zip_path, zip_type, source_str) in enumerate(queue, 1):
126
+ slug = theme_slug_fn(zip_path)
127
+ print(f"\n[{i}/{total}] {zip_path.name} [{zip_type}]")
128
+
129
+ if args.dry_run:
130
+ print(f" [DRY RUN] Would extract: {zip_path.name}")
131
+ continue
132
+
133
+ # Resume: skip already-extracted themes
134
+ if out_dir and args.resume and already_extracted(out_dir, slug):
135
+ print(f" -- Already extracted, skipping")
136
+ stats['resumed'] += 1
137
+ continue
138
+
139
+ try:
140
+ theme_root = extract_theme(zip_path, tmp_dir)
141
+ except Exception as e:
142
+ print(f" FAIL: {e}")
143
+ stats['failed'] += 1
144
+ continue
145
+
146
+ if not theme_root:
147
+ print(f" No sections found, skipping")
148
+ stats['skipped'] += 1
149
+ elif out_dir:
150
+ # Flat extraction mode
151
+ m = extract_sections_flat(theme_root, out_dir, slug, source_str, zip_type)
152
+ theme_manifests.append(m)
153
+ stats['extracted'] += 1
154
+ print(f" OK: {m['sections_unique']} unique sections ({m['sections_deduped']} deduped)")
155
+ elif kb_builder:
156
+ # Legacy KB builder mode
157
+ bstats = {'added': 0, 'updated': 0, 'skipped': 0}
158
+ kb_builder.process_theme(theme_root, kb_dir, bstats)
159
+ stats['extracted'] += 1
160
+ stats['sections_added'] += bstats['added']
161
+ stats['sections_updated'] += bstats['updated']
162
+ print(f" +{bstats['added']} new, ^{bstats['updated']} updated")
163
+
164
+ # Cleanup tmp for this theme
165
+ theme_tmp = tmp_dir / slug[:50]
166
+ if theme_tmp.exists():
167
+ shutil.rmtree(theme_tmp, ignore_errors=True)
226
168
 
227
169
  # Final cleanup
228
170
  if tmp_dir.exists() and not args.dry_run:
229
- remaining = list(tmp_dir.iterdir())
230
- if remaining:
231
- print(f"\nāš ļø {len(remaining)} unclean dirs in tmp — cleaning up...")
232
- shutil.rmtree(tmp_dir, ignore_errors=True)
233
-
234
- print(f"""
235
- ╔══════════════════════════════════════╗
236
- ā•‘ KB Extractor Complete ā•‘
237
- ╠══════════════════════════════════════╣
238
- ā•‘ āœ… Themes processed: {stats['found']:>4} ā•‘
239
- ā•‘ āŒ Skipped (no sections): {stats['skipped']:>4} ā•‘
240
- ā•‘ šŸ†• Sections added: {stats['sections_added']:>4} ā•‘
241
- ā•‘ ā¬†ļø Sections updated: {stats['sections_updated']:>4} ā•‘
242
- ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•
243
- """)
171
+ shutil.rmtree(tmp_dir, ignore_errors=True)
172
+
173
+ # Write extraction report
174
+ if out_dir and theme_manifests:
175
+ report_path = write_extraction_report(out_dir, stats, theme_manifests)
176
+ print(f"\nReport: {report_path}")
177
+
178
+ print(f"\nExtracted: {stats['extracted']} | Skipped: {stats['skipped']} | "
179
+ f"Resumed: {stats['resumed']} | Failed: {stats['failed']}")
244
180
 
245
181
 
246
182
  if __name__ == '__main__':