@flitzrrr/agent-skills 1.0.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cursorrules +2 -2
- package/.github/copilot-instructions.md +59 -0
- package/.lovable +1 -1
- package/AGENTS.md +2 -2
- package/CHEATSHEET.md +84 -86
- package/CLAUDE.md +2 -2
- package/LICENSE +27 -0
- package/README.md +191 -99
- package/bin/build-catalog.js +208 -0
- package/bin/cli.js +7 -3
- package/bin/rebuild-symlinks.js +161 -0
- package/bin/sync-docs.js +147 -0
- package/bin/sync-skills.sh +17 -0
- package/bin/test-cli.js +115 -0
- package/bin/update-wiki.js +102 -0
- package/package.json +9 -2
- package/skills/dispatch-parallel-agents/skill.md +95 -0
- package/skills/execute-work-package/SKILL.md +300 -0
- package/skills/execute-work-package/scripts/start-l4l-oci.sh +75 -0
- package/skills/execute-work-package/tpl-execution-blueprint.md +39 -0
- package/skills/execute-work-package/tpl-execution-digest.md +24 -0
- package/skills/execute-work-package/tpl-implementer-execute-prompt.md +57 -0
- package/skills/execute-work-package/tpl-implementer-preflight-prompt.md +66 -0
- package/skills/product-description-seo/CROSS-SELL.md +31 -0
- package/skills/product-description-seo/KEYWORDS.md +35 -0
- package/skills/product-description-seo/SKILL.md +361 -0
- package/skills/product-description-seo/scripts/analyze_catalog.py +136 -0
- package/skills/product-description-seo/scripts/check_quality.py +204 -0
- package/skills/product-description-seo/scripts/extract_category.py +88 -0
- package/skills/product-description-seo/scripts/track_progress.py +140 -0
- package/skills/product-description-seo/scripts/update_catalog.py +80 -0
- package/skills/product-description-seo/scripts/validate_json.py +87 -0
- package/skills/systematic-debugging/skill.md +87 -0
- package/skills/tob-gh-cli/SKILL.md +71 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: product-description-seo
|
|
3
|
+
description: "End-to-end skill for SEO-optimized product descriptions: inventory analysis, generation, quality assurance, catalog update, and deployment. Use whenever the user wants to analyze, write, check, update, or deploy product descriptions — whether for a single product, a category, or an entire catalog. Triggers on: 'product description', 'Produktbeschreibung', 'SEO text', 'catalog text', 'update descriptions', 'which descriptions are missing', 'thin content', 'QA check', 'description quality', or when the user names a product category and wants texts for it. Also triggers on 'deploy' or 'commit' in the context of product descriptions."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Product Description SEO — End-to-End Workflow
|
|
7
|
+
|
|
8
|
+
A complete pipeline for turning thin or missing product descriptions into SEO-optimized, structured content. Works with any JSON-based product catalog, any language, any industry.
|
|
9
|
+
|
|
10
|
+
## Quick Reference
|
|
11
|
+
|
|
12
|
+
| Phase | Task | Tool |
|
|
13
|
+
| -------------- | ------------------------------------------------ | ----------------------------- |
|
|
14
|
+
| 1. Inventory | Analyze catalog, find thin descriptions | `scripts/analyze_catalog.py` |
|
|
15
|
+
| 2. Progress | Check current completion status | `scripts/track_progress.py` |
|
|
16
|
+
| 3. Extraction | Pull category products as batch-ready JSON | `scripts/extract_category.py` |
|
|
17
|
+
| 4. Generation | Write descriptions using 4-paragraph structure | See structure below |
|
|
18
|
+
| 5. QA | Automated quality check (8 criteria) | `scripts/check_quality.py` |
|
|
19
|
+
| 6. Update | Write descriptions back to catalog JSON | `scripts/update_catalog.py` |
|
|
20
|
+
| 7. Validation | Verify JSON structure after update | `scripts/validate_json.py` |
|
|
21
|
+
| 8. Rendering | Adapt frontend for multi-paragraph text | Code guidance below |
|
|
22
|
+
| 9. Deploy | Feature branch, PR, merge | Git workflow |
|
|
23
|
+
|
|
24
|
+
## Setup
|
|
25
|
+
|
|
26
|
+
Before first use, create a `product-seo-config.json` in the working directory to configure the skill for a specific catalog. This is optional — all scripts work without it using sensible defaults.
|
|
27
|
+
|
|
28
|
+
```json
|
|
29
|
+
{
|
|
30
|
+
"catalog_path": "path/to/catalog/index.json",
|
|
31
|
+
"company": {
|
|
32
|
+
"name": "Company Name",
|
|
33
|
+
"founded": "1990",
|
|
34
|
+
"location": "City, Country",
|
|
35
|
+
"expertise": "short description of domain expertise",
|
|
36
|
+
"usp": "key selling points for the closing paragraph"
|
|
37
|
+
},
|
|
38
|
+
"fields": {
|
|
39
|
+
"description": "beschreibung",
|
|
40
|
+
"category": "kategorieName",
|
|
41
|
+
"sku": "sku",
|
|
42
|
+
"name": "name",
|
|
43
|
+
"status": "status",
|
|
44
|
+
"status_active_value": "aktiv"
|
|
45
|
+
},
|
|
46
|
+
"seo": {
|
|
47
|
+
"min_words": 200,
|
|
48
|
+
"max_words": 350,
|
|
49
|
+
"language": "de",
|
|
50
|
+
"primary_keyword_pattern": "{product_name} + {industry_term}",
|
|
51
|
+
"banned_words": ["best-in-class", "unparalleled", "revolutionary", "world-leading"]
|
|
52
|
+
},
|
|
53
|
+
"target_audience": "procurement managers, facility managers, technical buyers",
|
|
54
|
+
"tone": "professional, expert, trustworthy — like an experienced technical consultant",
|
|
55
|
+
"priorities": {
|
|
56
|
+
"1": ["Core Category A", "Core Category B"],
|
|
57
|
+
"2": ["Secondary Category C"],
|
|
58
|
+
"3": []
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Catalog Format
|
|
64
|
+
|
|
65
|
+
The catalog must be a JSON file with a `products` array. Field names are configurable via `product-seo-config.json`:
|
|
66
|
+
|
|
67
|
+
```json
|
|
68
|
+
{
|
|
69
|
+
"products": [
|
|
70
|
+
{
|
|
71
|
+
"sku": "401",
|
|
72
|
+
"name": "Product Name",
|
|
73
|
+
"beschreibung": "Current description text...",
|
|
74
|
+
"kategorie": "category-slug",
|
|
75
|
+
"kategorieName": "Category Display Name",
|
|
76
|
+
"preis": 29.99,
|
|
77
|
+
"einheit": "piece",
|
|
78
|
+
"status": "aktiv",
|
|
79
|
+
"slug": "product-name-401"
|
|
80
|
+
}
|
|
81
|
+
]
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Only the description field is updated. Everything else stays untouched.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Phase 1: Inventory
|
|
90
|
+
|
|
91
|
+
Understand the current state of the catalog — which descriptions are too thin, which categories need work.
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
# Full catalog overview
|
|
95
|
+
python scripts/analyze_catalog.py <catalog.json>
|
|
96
|
+
|
|
97
|
+
# Single category (fuzzy match — "Tools" finds "Power Tools" etc.)
|
|
98
|
+
python scripts/analyze_catalog.py <catalog.json> --category "Tools"
|
|
99
|
+
|
|
100
|
+
# Custom word threshold
|
|
101
|
+
python scripts/analyze_catalog.py <catalog.json> --min-words 150
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Output: per-category statistics (avg word count, products below threshold), top-20 thinnest descriptions, and when filtering by category a JSON export ready for batch prompting.
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## Phase 2: Progress
|
|
109
|
+
|
|
110
|
+
Track the completion status of the description update campaign.
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
python scripts/track_progress.py <catalog.json>
|
|
114
|
+
|
|
115
|
+
# With priority configuration
|
|
116
|
+
python scripts/track_progress.py <catalog.json> --config product-seo-config.json
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Output: progress bar, per-priority breakdown, next-action queue showing which categories and products to tackle next.
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## Phase 3: Extraction
|
|
124
|
+
|
|
125
|
+
Pull products from a category as prompt-ready JSON.
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
# First 8 products
|
|
129
|
+
python scripts/extract_category.py <catalog.json> "Power Tools" --limit 8
|
|
130
|
+
|
|
131
|
+
# Thinnest descriptions first (most urgent)
|
|
132
|
+
python scripts/extract_category.py <catalog.json> "Power Tools" --thin-first
|
|
133
|
+
|
|
134
|
+
# Pagination for large categories
|
|
135
|
+
python scripts/extract_category.py <catalog.json> "Accessories" --offset 8 --limit 8
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Category matching is fuzzy — partial matches work. If no match is found, available categories are listed.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Phase 4: Generation
|
|
143
|
+
|
|
144
|
+
Write descriptions following the **4-paragraph structure**. Process **5-8 products per batch** maximum to maintain quality.
|
|
145
|
+
|
|
146
|
+
### 4-Paragraph Structure
|
|
147
|
+
|
|
148
|
+
**Paragraph 1 — Introduction and Value (50-80 words)**
|
|
149
|
+
Focus keyword (product name + industry/category term) in the **first sentence**. What is the product? What problem does it solve? Why does the target audience need it?
|
|
150
|
+
|
|
151
|
+
The first 155 characters must work standalone as a meta description — complete thought, ending with a period.
|
|
152
|
+
|
|
153
|
+
**Paragraph 2 — Technical Specifications (60-100 words)**
|
|
154
|
+
Materials, dimensions, weight, capacity, special features. **Every** technical fact from the existing description must be preserved 1:1. Never omit data, never fabricate specs. If source data is vague, keep it vague.
|
|
155
|
+
|
|
156
|
+
**Paragraph 3 — Application and Practice (40-60 words)**
|
|
157
|
+
Typical use cases, ideal conditions, when to deploy. Mention 1-2 complementary products from the same catalog (cross-sell). Use CROSS-SELL.md as reference if available.
|
|
158
|
+
|
|
159
|
+
**Paragraph 4 — Quality and Service (30-50 words)**
|
|
160
|
+
Company expertise, personal consultation, delivery/support promise. Soft CTA: "Contact us for personalized advice." No aggressive sales pressure. Adapt to the company info from config.
|
|
161
|
+
|
|
162
|
+
### Variant Rule
|
|
163
|
+
|
|
164
|
+
Products with color, size, or material variants **must get unique texts**. The variant attribute must be mentioned and contextualized — never copy the same text across variants. Example: a red variant could mention visibility advantages; a compact variant could highlight space efficiency.
|
|
165
|
+
|
|
166
|
+
### Tone
|
|
167
|
+
|
|
168
|
+
- Professional, expert, trustworthy — like an experienced technical consultant
|
|
169
|
+
- Formal address (German: "Sie"; English: naturally formal; adapt to language)
|
|
170
|
+
- **Banned words:** best-in-class, unparalleled, revolutionary, sensational, unmatched, premium (without proof), world-leading, perfect, unique (without proof)
|
|
171
|
+
- Superlatives only with concrete evidence (e.g., "proven for over 10 years")
|
|
172
|
+
- Adapt to `target_audience` and `tone` from config
|
|
173
|
+
|
|
174
|
+
### SEO Keywords
|
|
175
|
+
|
|
176
|
+
- **Primary:** [Product Name] + industry/category term
|
|
177
|
+
- **Secondary:** [Category Name], industry-standard terms
|
|
178
|
+
- **Longtail:** "[Product Name] buy/purchase", "[Product Name] for [use case]"
|
|
179
|
+
- Maximum 3x focus keyword per text — no keyword stuffing
|
|
180
|
+
- Use KEYWORDS.md for category-specific targets if available
|
|
181
|
+
|
|
182
|
+
### Output Format
|
|
183
|
+
|
|
184
|
+
Save descriptions as a JSON array:
|
|
185
|
+
|
|
186
|
+
```json
|
|
187
|
+
[
|
|
188
|
+
{
|
|
189
|
+
"sku": "401",
|
|
190
|
+
"name": "Product Name",
|
|
191
|
+
"beschreibung": "Paragraph 1...\n\nParagraph 2...\n\nParagraph 3...\n\nParagraph 4..."
|
|
192
|
+
}
|
|
193
|
+
]
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
Save as `updates-<category-slug>.json`.
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## Phase 5: Quality Assurance
|
|
201
|
+
|
|
202
|
+
Automated check against all requirements:
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
# Standard check
|
|
206
|
+
python scripts/check_quality.py updates-tools.json
|
|
207
|
+
|
|
208
|
+
# Strict mode (warnings become failures)
|
|
209
|
+
python scripts/check_quality.py updates-tools.json --strict
|
|
210
|
+
|
|
211
|
+
# Custom word range
|
|
212
|
+
python scripts/check_quality.py updates-tools.json --min-words 150 --max-words 400
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
Checks per description:
|
|
216
|
+
|
|
217
|
+
1. Word count within configured range (default 200-350)
|
|
218
|
+
2. 4-paragraph structure (exactly 3x `\n\n` separator)
|
|
219
|
+
3. Focus keyword (product name) in first sentence
|
|
220
|
+
4. No banned superlatives
|
|
221
|
+
5. Cross-sell reference present (company name or "combination with")
|
|
222
|
+
6. Formal address (no informal pronouns in German texts)
|
|
223
|
+
7. Meta-description-ready first 155 characters
|
|
224
|
+
8. Plain text only (no HTML, no Markdown, no bullet points)
|
|
225
|
+
|
|
226
|
+
Fix any failures, re-check. Proceed to Phase 6 only when all checks pass.
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## Phase 6: Update
|
|
231
|
+
|
|
232
|
+
Write QA-approved descriptions back to the catalog:
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
python scripts/update_catalog.py <catalog.json> updates-tools.json
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
The script:
|
|
239
|
+
1. Creates an automatic backup (`catalog.json.backup_YYYYMMDD_HHMMSS`)
|
|
240
|
+
2. Matches updates by SKU
|
|
241
|
+
3. Reports word count before -> after per product
|
|
242
|
+
4. Flags SKUs not found in catalog
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## Phase 7: Validation
|
|
247
|
+
|
|
248
|
+
Verify JSON integrity after the update:
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
python scripts/validate_json.py <catalog.json>
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
Checks: valid JSON parse, required fields on every product, no duplicate SKUs, no HTML tags in descriptions, valid slug format.
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## Phase 8: Rendering
|
|
259
|
+
|
|
260
|
+
If the frontend renders descriptions as a single element, adapt it for multi-paragraph text. Paragraphs are separated by `\n\n` in the JSON.
|
|
261
|
+
|
|
262
|
+
**Svelte:**
|
|
263
|
+
```svelte
|
|
264
|
+
{#each description.split('\n\n') as paragraph}
|
|
265
|
+
<p class="description">{paragraph}</p>
|
|
266
|
+
{/each}
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
**React:**
|
|
270
|
+
```jsx
|
|
271
|
+
{description.split('\n\n').map((p, i) => (
|
|
272
|
+
<p key={i} className="description">{p}</p>
|
|
273
|
+
))}
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
**Vue:**
|
|
277
|
+
```vue
|
|
278
|
+
<p v-for="(p, i) in description.split('\n\n')" :key="i" class="description">{{ p }}</p>
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
**Meta description** — use the first paragraph, truncated:
|
|
282
|
+
```javascript
|
|
283
|
+
const metaDescription = description.split('\n\n')[0].slice(0, 155);
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
This change only needs to happen once and applies to all products.
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
## Phase 9: Deploy
|
|
291
|
+
|
|
292
|
+
Standard git workflow — never push directly to main.
|
|
293
|
+
|
|
294
|
+
```bash
|
|
295
|
+
git checkout -b product-descriptions/<category-slug>
|
|
296
|
+
git add path/to/catalog.json
|
|
297
|
+
git commit -m "content: Update product descriptions for <Category>"
|
|
298
|
+
gh pr create --title "Content: Product descriptions <Category>"
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
One PR per category or batch to keep reviews manageable.
|
|
302
|
+
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
## Example: Full Run
|
|
306
|
+
|
|
307
|
+
```bash
|
|
308
|
+
# 1. Where do we stand?
|
|
309
|
+
python scripts/track_progress.py catalog.json
|
|
310
|
+
|
|
311
|
+
# 2. Next category
|
|
312
|
+
python scripts/extract_category.py catalog.json "Tools" --thin-first
|
|
313
|
+
|
|
314
|
+
# 3. Generate descriptions (4-paragraph structure)
|
|
315
|
+
# -> save as updates-tools.json
|
|
316
|
+
|
|
317
|
+
# 4. Quality check
|
|
318
|
+
python scripts/check_quality.py updates-tools.json
|
|
319
|
+
|
|
320
|
+
# 5. Write to catalog
|
|
321
|
+
python scripts/update_catalog.py catalog.json updates-tools.json
|
|
322
|
+
|
|
323
|
+
# 6. Validate JSON
|
|
324
|
+
python scripts/validate_json.py catalog.json
|
|
325
|
+
|
|
326
|
+
# 7. Commit and PR
|
|
327
|
+
git checkout -b product-descriptions/tools
|
|
328
|
+
git add catalog.json
|
|
329
|
+
git commit -m "content: Update product descriptions Tools (8 products)"
|
|
330
|
+
gh pr create --title "Content: Product descriptions Tools"
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
---
|
|
334
|
+
|
|
335
|
+
## Optional Reference Files
|
|
336
|
+
|
|
337
|
+
Not required, but improve output quality when present alongside the skill:
|
|
338
|
+
|
|
339
|
+
| File | Purpose |
|
|
340
|
+
| ----------------------- | ---------------------------------------------------------------- |
|
|
341
|
+
| KEYWORDS.md | Category-specific keyword targets (primary, secondary, longtail) |
|
|
342
|
+
| CROSS-SELL.md | Cross-sell matrix defining which categories reference each other |
|
|
343
|
+
| product-seo-config.json | Company context, field mappings, priorities |
|
|
344
|
+
|
|
345
|
+
Templates for KEYWORDS.md and CROSS-SELL.md are included — fill them in for your catalog.
|
|
346
|
+
|
|
347
|
+
## Scripts
|
|
348
|
+
|
|
349
|
+
| Script | Purpose |
|
|
350
|
+
| ----------------------------- | ----------------------------------------------------- |
|
|
351
|
+
| `scripts/analyze_catalog.py` | Catalog analysis (thinnest descriptions, word counts) |
|
|
352
|
+
| `scripts/track_progress.py` | Progress tracking with priority support |
|
|
353
|
+
| `scripts/extract_category.py` | Category extraction for batch prompting (fuzzy match) |
|
|
354
|
+
| `scripts/check_quality.py` | Automated QA with 8 configurable checks |
|
|
355
|
+
| `scripts/update_catalog.py` | Write descriptions to catalog with automatic backup |
|
|
356
|
+
| `scripts/validate_json.py` | JSON structure validation |
|
|
357
|
+
|
|
358
|
+
## Dependencies
|
|
359
|
+
|
|
360
|
+
- **Python 3.10+** for all scripts
|
|
361
|
+
- **gh CLI** (optional) for PR creation in Phase 9
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Analyze a JSON product catalog for thin or missing descriptions.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python analyze_catalog.py <catalog.json> [--category <name>] [--min-words 200]
|
|
6
|
+
|
|
7
|
+
Shows per-category stats, identifies the thinnest descriptions,
|
|
8
|
+
and exports batch-ready JSON when filtering by category.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import sys
|
|
13
|
+
import argparse
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
|
|
16
|
+
# Default field names (override via config)
|
|
17
|
+
DESC_FIELD = "beschreibung"
|
|
18
|
+
CAT_FIELD = "kategorieName"
|
|
19
|
+
CAT_FIELD_ALT = "kategorie"
|
|
20
|
+
SKU_FIELD = "sku"
|
|
21
|
+
NAME_FIELD = "name"
|
|
22
|
+
STATUS_FIELD = "status"
|
|
23
|
+
STATUS_ACTIVE = "aktiv"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def count_words(text: str) -> int:
|
|
27
|
+
if not text or not text.strip():
|
|
28
|
+
return 0
|
|
29
|
+
return len(text.split())
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def analyze(catalog_path: str, category_filter: str | None = None, min_words: int = 200):
|
|
33
|
+
with open(catalog_path, "r", encoding="utf-8") as f:
|
|
34
|
+
data = json.load(f)
|
|
35
|
+
|
|
36
|
+
products = data.get("products", [])
|
|
37
|
+
products = [p for p in products if p.get(STATUS_FIELD, STATUS_ACTIVE) == STATUS_ACTIVE]
|
|
38
|
+
|
|
39
|
+
if category_filter:
|
|
40
|
+
cf = category_filter.lower()
|
|
41
|
+
products = [p for p in products
|
|
42
|
+
if cf in p.get(CAT_FIELD_ALT, "").lower()
|
|
43
|
+
or cf in p.get(CAT_FIELD, "").lower()
|
|
44
|
+
or p.get(CAT_FIELD_ALT, "").lower() == cf
|
|
45
|
+
or p.get(CAT_FIELD, "").lower() == cf]
|
|
46
|
+
|
|
47
|
+
if not products:
|
|
48
|
+
print("No matching products found.")
|
|
49
|
+
all_cats = sorted(set(
|
|
50
|
+
p.get(CAT_FIELD, p.get(CAT_FIELD_ALT, "?"))
|
|
51
|
+
for p in data.get("products", [])
|
|
52
|
+
))
|
|
53
|
+
if all_cats:
|
|
54
|
+
print("Available categories:")
|
|
55
|
+
for c in all_cats:
|
|
56
|
+
print(f" - {c}")
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
categories = defaultdict(list)
|
|
60
|
+
for p in products:
|
|
61
|
+
cat = p.get(CAT_FIELD, p.get(CAT_FIELD_ALT, "Unknown"))
|
|
62
|
+
wc = count_words(p.get(DESC_FIELD, ""))
|
|
63
|
+
categories[cat].append({
|
|
64
|
+
SKU_FIELD: p.get(SKU_FIELD, ""),
|
|
65
|
+
NAME_FIELD: p.get(NAME_FIELD, ""),
|
|
66
|
+
"word_count": wc,
|
|
67
|
+
DESC_FIELD: p.get(DESC_FIELD, ""),
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
total = len(products)
|
|
71
|
+
below_threshold = sum(1 for p in products if count_words(p.get(DESC_FIELD, "")) < min_words)
|
|
72
|
+
|
|
73
|
+
print(f"{'=' * 70}")
|
|
74
|
+
print(f"CATALOG DESCRIPTION ANALYSIS")
|
|
75
|
+
print(f"{'=' * 70}")
|
|
76
|
+
print(f"Active products: {total}")
|
|
77
|
+
below_pct = below_threshold * 100 // total if total > 0 else 0
|
|
78
|
+
above_pct = (total - below_threshold) * 100 // total if total > 0 else 0
|
|
79
|
+
print(f"Below {min_words} words: {below_threshold} ({below_pct}%)")
|
|
80
|
+
print(f"Above {min_words} words: {total - below_threshold} ({above_pct}%)")
|
|
81
|
+
print()
|
|
82
|
+
|
|
83
|
+
print(f"{'Category':<45} {'Count':>5} {'Avg Words':>9} {'< {}'.format(min_words):>7}")
|
|
84
|
+
print(f"{'-' * 45} {'-' * 5} {'-' * 9} {'-' * 7}")
|
|
85
|
+
|
|
86
|
+
sorted_cats = sorted(categories.items(),
|
|
87
|
+
key=lambda x: sum(p["word_count"] for p in x[1]) / len(x[1]))
|
|
88
|
+
|
|
89
|
+
for cat_name, prods in sorted_cats:
|
|
90
|
+
avg_wc = sum(p["word_count"] for p in prods) / len(prods)
|
|
91
|
+
below = sum(1 for p in prods if p["word_count"] < min_words)
|
|
92
|
+
print(f"{cat_name:<45} {len(prods):>5} {avg_wc:>9.1f} {below:>7}")
|
|
93
|
+
|
|
94
|
+
# Thinnest descriptions
|
|
95
|
+
print(f"\n{'=' * 70}")
|
|
96
|
+
print(f"THINNEST DESCRIPTIONS (Top 20)")
|
|
97
|
+
print(f"{'=' * 70}")
|
|
98
|
+
|
|
99
|
+
all_products = []
|
|
100
|
+
for cat_name, prods in categories.items():
|
|
101
|
+
for p in prods:
|
|
102
|
+
p["_category"] = cat_name
|
|
103
|
+
all_products.append(p)
|
|
104
|
+
|
|
105
|
+
all_products.sort(key=lambda x: x["word_count"])
|
|
106
|
+
|
|
107
|
+
for i, p in enumerate(all_products[:20]):
|
|
108
|
+
print(f"\n{i + 1}. {p[NAME_FIELD]} (SKU {p[SKU_FIELD]}) — {p['_category']}")
|
|
109
|
+
print(f" Words: {p['word_count']}")
|
|
110
|
+
desc = p[DESC_FIELD]
|
|
111
|
+
desc_preview = desc[:120] + "..." if len(desc) > 120 else (desc or "(empty)")
|
|
112
|
+
print(f' Text: "{desc_preview}"')
|
|
113
|
+
|
|
114
|
+
# JSON export when filtering by category
|
|
115
|
+
if category_filter:
|
|
116
|
+
print(f"\n{'=' * 70}")
|
|
117
|
+
print(f"BATCH-READY JSON (Category: {category_filter})")
|
|
118
|
+
print(f"{'=' * 70}")
|
|
119
|
+
export = [{SKU_FIELD: p[SKU_FIELD], NAME_FIELD: p[NAME_FIELD],
|
|
120
|
+
DESC_FIELD: p[DESC_FIELD], "word_count": p["word_count"]}
|
|
121
|
+
for p in all_products]
|
|
122
|
+
print(json.dumps(export, ensure_ascii=False, indent=2))
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def main():
|
|
126
|
+
parser = argparse.ArgumentParser(description="Analyze catalog descriptions")
|
|
127
|
+
parser.add_argument("catalog_path", help="Path to catalog JSON file")
|
|
128
|
+
parser.add_argument("--category", "-c", help="Filter by category name (fuzzy match)")
|
|
129
|
+
parser.add_argument("--min-words", "-m", type=int, default=200,
|
|
130
|
+
help="Minimum word count threshold (default: 200)")
|
|
131
|
+
args = parser.parse_args()
|
|
132
|
+
analyze(args.catalog_path, args.category, args.min_words)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
if __name__ == "__main__":
|
|
136
|
+
main()
|