@pinkpixel/sugarstitch 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +59 -0
  2. package/LICENSE +21 -0
  3. package/OVERVIEW.md +306 -0
  4. package/README.md +462 -0
  5. package/assets/banner_dark.png +0 -0
  6. package/assets/banner_light.png +0 -0
  7. package/assets/logo.png +0 -0
  8. package/assets/screenshot_cli.png +0 -0
  9. package/assets/screenshot_completed.png +0 -0
  10. package/assets/screenshot_homepage.png +0 -0
  11. package/assets/screenshot_scraping.png +0 -0
  12. package/dist/index.js +216 -0
  13. package/dist/scraper.js +719 -0
  14. package/dist/server.js +1272 -0
  15. package/package.json +26 -0
  16. package/public/favicon.png +0 -0
  17. package/scripts/add-shebang.js +11 -0
  18. package/src/index.ts +217 -0
  19. package/src/scraper.ts +903 -0
  20. package/src/server.ts +1319 -0
  21. package/tsconfig.json +12 -0
  22. package/website/astro.config.mjs +5 -0
  23. package/website/package-lock.json +6358 -0
  24. package/website/package.json +18 -0
  25. package/website/public/banner_dark.png +0 -0
  26. package/website/public/banner_light.png +0 -0
  27. package/website/public/favicon.png +0 -0
  28. package/website/public/screenshot_cli.png +0 -0
  29. package/website/public/screenshot_completed.png +0 -0
  30. package/website/public/screenshot_homepage.png +0 -0
  31. package/website/public/screenshot_scraping.png +0 -0
  32. package/website/src/layouts/DocsLayout.astro +142 -0
  33. package/website/src/pages/docs/install.astro +96 -0
  34. package/website/src/pages/docs/use-the-app.astro +131 -0
  35. package/website/src/pages/index.astro +94 -0
  36. package/website/src/styles/site.css +611 -0
  37. package/website/tsconfig.json +3 -0
  38. package/website/wrangler.toml +6 -0
@@ -0,0 +1,18 @@
1
+ {
2
+ "name": "sugarstitch-docs",
3
+ "private": true,
4
+ "type": "module",
5
+ "scripts": {
6
+ "dev": "astro dev",
7
+ "build": "astro build",
8
+ "preview": "astro preview",
9
+ "cf:dev": "npm run build && wrangler pages dev dist",
10
+ "deploy": "npm run build && wrangler pages deploy dist --project-name sugarstitch-docs"
11
+ },
12
+ "dependencies": {
13
+ "astro": "^5.5.5"
14
+ },
15
+ "devDependencies": {
16
+ "wrangler": "^4.10.0"
17
+ }
18
+ }
Binary file
Binary file
Binary file
@@ -0,0 +1,142 @@
1
+ ---
2
+ import '../styles/site.css';
3
+
4
+ export interface Props {
5
+ title: string;
6
+ description: string;
7
+ currentPath: string;
8
+ pageTitle?: string;
9
+ pageIntro?: string;
10
+ toc?: { href: string; label: string }[];
11
+ }
12
+
13
+ const { title, description, currentPath, pageTitle, pageIntro, toc = [] } = Astro.props;
14
+
15
+ const navGroups = [
16
+ {
17
+ title: 'Start Here',
18
+ links: [
19
+ { href: '/', label: 'Overview' },
20
+ { href: '/docs/install/', label: 'Installation' },
21
+ { href: '/docs/use-the-app/', label: 'Use the App' }
22
+ ]
23
+ }
24
+ ];
25
+ ---
26
+
27
+ <!doctype html>
28
+ <html lang="en">
29
+ <head>
30
+ <meta charset="utf-8" />
31
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
32
+ <meta name="description" content={description} />
33
+ <title>{title}</title>
34
+ <link rel="icon" type="image/png" href="/favicon.png" />
35
+ </head>
36
+ <body>
37
+ <div class="page-shell">
38
+ <header class="topbar">
39
+ <a class="brand-link" href="/">
40
+ <img src="/favicon.png" alt="SugarStitch icon" />
41
+ <span>SugarStitch Docs</span>
42
+ </a>
43
+ <div class="topbar-actions">
44
+ <a class="pill-link" href="/docs/install/">Install</a>
45
+ <button class="theme-toggle" type="button" data-theme-toggle aria-label="Toggle color theme">
46
+ <span data-theme-icon>Moon</span>
47
+ <span data-theme-label>Dark mode</span>
48
+ </button>
49
+ </div>
50
+ </header>
51
+
52
+ <div class="main-grid">
53
+ <aside class="sidebar">
54
+ <span class="eyebrow">Sweet little scraping station</span>
55
+ {navGroups.map((group) => (
56
+ <>
57
+ <div class="nav-group-title">{group.title}</div>
58
+ <nav aria-label={group.title}>
59
+ {group.links.map((link) => (
60
+ <a
61
+ href={link.href}
62
+ class:list={[
63
+ 'nav-link',
64
+ currentPath === link.href ? 'active' : ''
65
+ ]}
66
+ >
67
+ {link.label}
68
+ </a>
69
+ ))}
70
+ </nav>
71
+ </>
72
+ ))}
73
+ <div class="sidebar-note">
74
+ SugarStitch is happiest on regular HTML pattern pages with article content, images, or linked PDFs already present in the markup.
75
+ </div>
76
+ </aside>
77
+
78
+ <main class="content-column">
79
+ {pageTitle && (
80
+ <section class="content-panel">
81
+ <div class="page-heading">
82
+ <span class="section-label">Documentation</span>
83
+ <h1>{pageTitle}</h1>
84
+ {pageIntro && <p>{pageIntro}</p>}
85
+ </div>
86
+ {toc.length > 0 && (
87
+ <div class="toc">
88
+ <div class="toc-title">On this page</div>
89
+ <nav>
90
+ {toc.map((item) => (
91
+ <a href={item.href}>{item.label}</a>
92
+ ))}
93
+ </nav>
94
+ </div>
95
+ )}
96
+ </section>
97
+ )}
98
+
99
+ <slot />
100
+ </main>
101
+ </div>
102
+
103
+ <footer class="footer">
104
+ SugarStitch turns pattern pages into local JSON, text, image, and PDF artifacts with a CLI and a cozy browser UI.
105
+ </footer>
106
+ </div>
107
+
108
+ <script is:inline>
109
+ const storageKey = 'sugarstitch-docs-theme';
110
+ const root = document.documentElement;
111
+ const toggle = document.querySelector('[data-theme-toggle]');
112
+ const label = document.querySelector('[data-theme-label]');
113
+ const icon = document.querySelector('[data-theme-icon]');
114
+
115
+ const updateToggle = (theme) => {
116
+ if (!label || !icon) return;
117
+ const dark = theme === 'dark';
118
+ label.textContent = dark ? 'Light mode' : 'Dark mode';
119
+ icon.textContent = dark ? 'Sun' : 'Moon';
120
+ };
121
+
122
+ const preferredTheme = () => {
123
+ const stored = localStorage.getItem(storageKey);
124
+ if (stored === 'light' || stored === 'dark') return stored;
125
+ return 'light';
126
+ };
127
+
128
+ const applyTheme = (theme) => {
129
+ root.setAttribute('data-theme', theme);
130
+ updateToggle(theme);
131
+ };
132
+
133
+ applyTheme(preferredTheme());
134
+
135
+ toggle?.addEventListener('click', () => {
136
+ const nextTheme = root.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
137
+ localStorage.setItem(storageKey, nextTheme);
138
+ applyTheme(nextTheme);
139
+ });
140
+ </script>
141
+ </body>
142
+ </html>
@@ -0,0 +1,96 @@
1
+ ---
2
+ import DocsLayout from '../../layouts/DocsLayout.astro';
3
+
4
+ const toc = [
5
+ { href: '#requirements', label: 'Requirements' },
6
+ { href: '#npm-install', label: 'Install with npm' },
7
+ { href: '#clone-repo', label: 'Clone the repo' },
8
+ { href: '#run-ui', label: 'Run the local UI' }
9
+ ];
10
+ ---
11
+
12
+ <DocsLayout
13
+ title="Install SugarStitch"
14
+ description="Install SugarStitch globally from npm or by cloning the repository."
15
+ currentPath="/docs/install/"
16
+ pageTitle="Install SugarStitch"
17
+ pageIntro="Pick the quick global npm route if you mainly want the CLI, or clone the repository if you want the local UI and source code together."
18
+ toc={toc}
19
+ >
20
+ <section class="content-panel" id="requirements">
21
+ <h2>Requirements</h2>
22
+ <p>SugarStitch runs on Node.js and uses npm scripts for the local development flow.</p>
23
+ <ul>
24
+ <li>Node.js 20 or newer is the safest baseline for the current TypeScript setup.</li>
25
+ <li>npm is used for installation and running scripts.</li>
26
+ <li>An internet connection is needed when you scrape live pattern pages.</li>
27
+ </ul>
28
+ <div class="callout">
29
+ <strong>Best fit:</strong> SugarStitch works best on traditional HTML pattern pages where the article content is already present in the fetched markup, rather than rendered by a JavaScript app after load.
30
+ </div>
31
+ </section>
32
+
33
+ <section class="content-panel" id="npm-install">
34
+ <h2>Install with npm</h2>
35
+ <p>This is the fastest path when you just want to run the scraper from your terminal.</p>
36
+
37
+ <h3>1. Install the package globally</h3>
38
+ <pre class="code-block"><code>npm install -g @pinkpixel/sugarstitch</code></pre>
39
+
40
+ <h3>2. Run a quick scrape</h3>
41
+ <pre class="code-block"><code>sugarstitch --url "https://example.com/pattern"</code></pre>
42
+
43
+ <h3>3. Add a preset when the site structure is recognizable</h3>
44
+ <pre class="code-block"><code>sugarstitch --url "https://example.com/pattern" --preset wordpress</code></pre>
45
+
46
+ <div class="callout">
47
+ <strong>Preset tip:</strong> Start with <span class="inline-code">generic</span> for unknown sites, <span class="inline-code">wordpress</span> for blog-style posts, and <span class="inline-code">woocommerce</span> for product-style pattern pages.
48
+ </div>
49
+ </section>
50
+
51
+ <section class="content-panel" id="clone-repo">
52
+ <h2>Clone the repo</h2>
53
+ <p>This path is best if you want the browser UI, local scripts, and source files in one place.</p>
54
+
55
+ <h3>1. Clone and install dependencies</h3>
56
+ <pre class="code-block"><code>git clone https://github.com/pinkpixel-dev/sugarstitch.git
57
+ cd sugarstitch
58
+ npm install</code></pre>
59
+
60
+ <h3>2. Optional: build the CLI output</h3>
61
+ <pre class="code-block"><code>npm run build</code></pre>
62
+
63
+ <h3>3. Run the CLI from source</h3>
64
+ <pre class="code-block"><code>npm run scrape -- --url "https://example.com/pattern"</code></pre>
65
+
66
+ <article class="screenshot-card">
67
+ <h3>What the terminal flow looks like</h3>
68
+ <img src="/screenshot_cli.png" alt="CLI output showing the SugarStitch banner and scrape progress" />
69
+ <p class="caption">The CLI prints the SugarStitch banner and progress messages while it fetches pages, downloads assets, and writes local files.</p>
70
+ </article>
71
+ </section>
72
+
73
+ <section class="content-panel" id="run-ui">
74
+ <h2>Run the local UI</h2>
75
+ <p>If you prefer forms over flags, the local browser UI gives you the same scraping controls in a calmer, more visual workflow.</p>
76
+
77
+ <h3>Start the server</h3>
78
+ <pre class="code-block"><code>npm run ui</code></pre>
79
+
80
+ <h3>Open the app</h3>
81
+ <pre class="code-block"><code>http://localhost:4177</code></pre>
82
+
83
+ <div class="two-up">
84
+ <article class="screenshot-card">
85
+ <h3>Homepage</h3>
86
+ <img src="/screenshot_homepage.png" alt="SugarStitch homepage showing the scrape form and sidebar guide" />
87
+ <p class="caption">The homepage includes mode selection, output controls, presets, saved profiles, and advanced selector overrides.</p>
88
+ </article>
89
+ <article class="screenshot-card">
90
+ <h3>In-progress state</h3>
91
+ <img src="/screenshot_scraping.png" alt="SugarStitch scraping progress overlay" />
92
+ <p class="caption">While a preview or scrape is running, the UI swaps into a progress overlay so you know the run is active.</p>
93
+ </article>
94
+ </div>
95
+ </section>
96
+ </DocsLayout>
@@ -0,0 +1,131 @@
1
+ ---
2
+ import DocsLayout from '../../layouts/DocsLayout.astro';
3
+
4
+ const toc = [
5
+ { href: '#quick-start', label: 'Quick start' },
6
+ { href: '#cli-workflow', label: 'CLI workflow' },
7
+ { href: '#ui-workflow', label: 'UI workflow' },
8
+ { href: '#crawl-mode', label: 'Discovery crawl mode' },
9
+ { href: '#output', label: 'Output structure' }
10
+ ];
11
+ ---
12
+
13
+ <DocsLayout
14
+ title="Use the SugarStitch App"
15
+ description="Learn how to use SugarStitch from the CLI or local UI."
16
+ currentPath="/docs/use-the-app/"
17
+ pageTitle="Use the app"
18
+ pageIntro="SugarStitch can scrape a single pattern page, a list of saved URLs, or a discovery page that branches into many pattern links. The CLI and the local UI map to the same core scraper."
19
+ toc={toc}
20
+ >
21
+ <section class="content-panel" id="quick-start">
22
+ <h2>Quick start</h2>
23
+ <p>If you want the shortest path from install to results, start with one pattern page and a preset that matches the site structure.</p>
24
+ <pre class="code-block"><code>npm run scrape -- --url "https://example.com/pattern" --preset wordpress</code></pre>
25
+ <ul>
26
+ <li>Use <span class="inline-code">--url</span> for one page.</li>
27
+ <li>Use <span class="inline-code">--file</span> for a plain text list of URLs.</li>
28
+ <li>Use <span class="inline-code">--preview</span> first if you want to validate extraction before files are written.</li>
29
+ </ul>
30
+ </section>
31
+
32
+ <section class="content-panel" id="cli-workflow">
33
+ <h2>CLI workflow</h2>
34
+
35
+ <h3>Scrape one pattern page</h3>
36
+ <pre class="code-block"><code>npm run scrape -- --url "https://example.com/pattern" --preset generic</code></pre>
37
+
38
+ <h3>Scrape many URLs from a file</h3>
39
+ <pre class="code-block"><code>npm run scrape -- --file urls.txt</code></pre>
40
+
41
+ <h3>Preview without writing files</h3>
42
+ <pre class="code-block"><code>npm run scrape -- --url "https://example.com/pattern" --profile tildas-world --preview</code></pre>
43
+
44
+ <h3>Send output to a different folder</h3>
45
+ <pre class="code-block"><code>npm run scrape -- --url "https://example.com/pattern" --output-dir ./exports --output patterns.json</code></pre>
46
+
47
+ <div class="callout">
48
+ <strong>Saved profiles:</strong> point SugarStitch at a profile with <span class="inline-code">--profile &lt;id&gt;</span> when a site already has known selector overrides you want to reuse.
49
+ </div>
50
+
51
+ <article class="screenshot-card">
52
+ <h3>Terminal-first runs</h3>
53
+ <img src="/screenshot_cli.png" alt="Terminal screenshot showing SugarStitch CLI output" />
54
+ <p class="caption">The CLI is great for quick checks, automation, and repeatable batch runs where you already know the source URLs.</p>
55
+ </article>
56
+ </section>
57
+
58
+ <section class="content-panel" id="ui-workflow">
59
+ <h2>UI workflow</h2>
60
+ <p>The local UI is ideal when you want to test presets, compare overrides, or paste batches into a form instead of remembering flags.</p>
61
+
62
+ <ol>
63
+ <li>Run <span class="inline-code">npm run ui</span> and open <span class="inline-code">http://localhost:4177</span>.</li>
64
+ <li>Choose <span class="inline-code">Single URL</span> or paste multiple URLs.</li>
65
+ <li>Pick a saved profile or selector preset.</li>
66
+ <li>Add output settings or advanced selector overrides if needed.</li>
67
+ <li>Click <span class="inline-code">Test Selectors</span> first or go straight to <span class="inline-code">Start Scraping</span>.</li>
68
+ </ol>
69
+
70
+ <div class="screenshot-grid">
71
+ <article class="screenshot-card">
72
+ <h3>Form overview</h3>
73
+ <img src="/screenshot_homepage.png" alt="SugarStitch homepage showing the main scraping form" />
74
+ <p class="caption">Single URL mode, multi-URL paste mode, presets, and profile loading all live on the main screen.</p>
75
+ </article>
76
+ <article class="screenshot-card">
77
+ <h3>Finished run</h3>
78
+ <img src="/screenshot_completed.png" alt="SugarStitch completed run summary with log output" />
79
+ <p class="caption">After the scrape completes, the UI shows counts, logs, output paths, and the pattern titles that were captured.</p>
80
+ </article>
81
+ </div>
82
+ </section>
83
+
84
+ <section class="content-panel" id="crawl-mode">
85
+ <h2>Discovery crawl mode</h2>
86
+ <p>Use discovery crawl mode when you have a listing page, archive page, or “free patterns” hub instead of a direct pattern URL.</p>
87
+
88
+ <pre class="code-block"><code>npm run scrape -- \
89
+ --url "https://www.tildasworld.com/free-patterns/" \
90
+ --preset wordpress \
91
+ --crawl \
92
+ --crawl-depth 2 \
93
+ --crawl-pattern "free_pattern|pattern|quilt|pillow" \
94
+ --crawl-language english \
95
+ --crawl-paginate</code></pre>
96
+
97
+ <ul>
98
+ <li><span class="inline-code">--crawl</span> turns discovery mode on.</li>
99
+ <li><span class="inline-code">--crawl-depth</span> controls how many link levels deep the crawler follows.</li>
100
+ <li><span class="inline-code">--crawl-pattern</span> narrows what discovered links are allowed through.</li>
101
+ <li><span class="inline-code">--crawl-language</span> helps when a site mixes multiple language sections.</li>
102
+ <li><span class="inline-code">--crawl-paginate</span> adds paginated listing pages like <span class="inline-code">/page/2/</span> before discovery continues.</li>
103
+ </ul>
104
+
105
+ <article class="screenshot-card">
106
+ <h3>Progress while crawling or scraping</h3>
107
+ <img src="/screenshot_scraping.png" alt="SugarStitch loading state shown during scraping" />
108
+ <p class="caption">The UI keeps the run focused with a progress state while it fetches candidate pages and downloads files.</p>
109
+ </article>
110
+ </section>
111
+
112
+ <section class="content-panel" id="output">
113
+ <h2>Output structure</h2>
114
+ <p>A successful run writes one JSON entry per scraped page and may also save page text, images, and PDFs into local folders.</p>
115
+
116
+ <pre class="code-block"><code>{`{
117
+ "title": "Pattern Title",
118
+ "description": "Short description from the page",
119
+ "materials": ["Cotton fabric", "Stuffing", "Thread"],
120
+ "instructions": ["Cut the pieces", "Sew the body", "Stuff and close"],
121
+ "sourceUrl": "https://example.com/pattern",
122
+ "localImages": ["images/pattern_title/image_1.jpg"],
123
+ "localPdfs": ["pdfs/pattern_title/pattern.pdf"],
124
+ "localTextFile": "texts/pattern_title/pattern.txt"
125
+ }`}</code></pre>
126
+
127
+ <div class="callout">
128
+ <strong>Typical output folder:</strong> expect the JSON file plus <span class="inline-code">images/</span>, <span class="inline-code">pdfs/</span>, and <span class="inline-code">texts/</span> folders inside the selected output directory.
129
+ </div>
130
+ </section>
131
+ </DocsLayout>
@@ -0,0 +1,94 @@
1
+ ---
2
+ import DocsLayout from '../layouts/DocsLayout.astro';
3
+ ---
4
+
5
+ <DocsLayout
6
+ title="SugarStitch Docs"
7
+ description="Installation and usage docs for SugarStitch, the fiber arts pattern scraper."
8
+ currentPath="/"
9
+ >
10
+ <section class="hero-panel">
11
+ <div class="hero-grid">
12
+ <div class="hero-copy">
13
+ <span class="hero-eyebrow">Pattern scraping made sweet</span>
14
+ <h1>SugarStitch Documentation</h1>
15
+ <p>
16
+ SugarStitch is a TypeScript scraper for fiber arts pattern pages, with both a command-line workflow and
17
+ a local browser UI. These docs cover how to install it, run it, and pick the right flow for single pages,
18
+ batch jobs, and discovery crawls.
19
+ </p>
20
+ <div class="hero-actions">
21
+ <a class="cta cta-primary" href="/docs/install/">Install SugarStitch</a>
22
+ <a class="cta cta-secondary" href="/docs/use-the-app/">Learn the workflow</a>
23
+ </div>
24
+ </div>
25
+
26
+ <div class="hero-banner">
27
+ <img class="banner-light" src="/banner_light.png" alt="SugarStitch banner for light mode" />
28
+ <img class="banner-dark" src="/banner_dark.png" alt="SugarStitch banner for dark mode" />
29
+ </div>
30
+ </div>
31
+ </section>
32
+
33
+ <section class="hero-stats">
34
+ <article class="stat-card">
35
+ <strong>CLI and UI</strong>
36
+ <span>Use a quick terminal command or work from a local browser form at <span class="inline-code">localhost:4177</span>.</span>
37
+ </article>
38
+ <article class="stat-card">
39
+ <strong>Site-aware scraping</strong>
40
+ <span>Start with <span class="inline-code">generic</span>, <span class="inline-code">wordpress</span>, or <span class="inline-code">woocommerce</span> presets, then add selector overrides only where needed.</span>
41
+ </article>
42
+ <article class="stat-card">
43
+ <strong>Local outputs</strong>
44
+ <span>Each run can save JSON plus <span class="inline-code">images/</span>, <span class="inline-code">pdfs/</span>, and <span class="inline-code">texts/</span> for review.</span>
45
+ </article>
46
+ </section>
47
+
48
+ <section class="feature-grid">
49
+ <article class="feature-card">
50
+ <h3>Install your way</h3>
51
+ <p>Grab it globally from npm for the quickest path, or clone the repo if you want the UI and source side by side while you iterate.</p>
52
+ </article>
53
+ <article class="feature-card">
54
+ <h3>Scrape one page or many</h3>
55
+ <p>Feed SugarStitch a single pattern URL, a text file full of URLs, or a listing page that needs discovery crawl mode first.</p>
56
+ </article>
57
+ <article class="feature-card">
58
+ <h3>Tune extraction gently</h3>
59
+ <p>Saved profiles and advanced selector overrides help when a site is close to working but needs one or two custom selectors to land cleanly.</p>
60
+ </article>
61
+ </section>
62
+
63
+ <section class="content-panel">
64
+ <div class="page-heading">
65
+ <span class="section-label">What it looks like</span>
66
+ </div>
67
+
68
+ <div class="screenshot-grid">
69
+ <article class="screenshot-card">
70
+ <h3>Homepage</h3>
71
+ <img src="/screenshot_homepage.png" alt="SugarStitch homepage with scrape form and saved profiles panel" />
72
+ <p class="caption">The local UI starts with a form-first workflow for URLs, presets, output options, and discovery crawl settings.</p>
73
+ </article>
74
+ <article class="screenshot-card">
75
+ <h3>CLI</h3>
76
+ <img src="/screenshot_cli.png" alt="SugarStitch CLI" />
77
+ <p class="caption">The terminal flow is perfect for quick runs, automation, or batch scraping from a saved URL list.</p>
78
+ </article>
79
+ </div>
80
+ </section>
81
+
82
+ <section class="card-grid">
83
+ <article class="card">
84
+ <h3>Start with installation</h3>
85
+ <p>The install guide walks through both the global npm path and the clone-and-run development path, including the commands you actually need.</p>
86
+ <a class="cta cta-secondary" href="/docs/install/">Open installation guide</a>
87
+ </article>
88
+ <article class="card">
89
+ <h3>Move straight into usage</h3>
90
+ <p>The usage guide covers single-page scrapes, batch files, preview mode, crawl settings, and how the browser UI maps onto the CLI flags.</p>
91
+ <a class="cta cta-secondary" href="/docs/use-the-app/">Open usage guide</a>
92
+ </article>
93
+ </section>
94
+ </DocsLayout>