@sambitcreate/parsely-cli 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +114 -113
- package/dist/app.js +104 -26
- package/dist/cli.js +36 -2
- package/dist/components/Banner.d.ts +9 -1
- package/dist/components/Banner.js +18 -8
- package/dist/components/ErrorDisplay.js +3 -2
- package/dist/components/Footer.d.ts +2 -1
- package/dist/components/Footer.js +28 -4
- package/dist/components/LandingScreen.d.ts +8 -0
- package/dist/components/LandingScreen.js +89 -0
- package/dist/components/LoadingScreen.d.ts +6 -0
- package/dist/components/LoadingScreen.js +19 -0
- package/dist/components/Panel.d.ts +9 -0
- package/dist/components/Panel.js +6 -0
- package/dist/components/PhaseRail.d.ts +9 -0
- package/dist/components/PhaseRail.js +88 -0
- package/dist/components/RecipeCard.d.ts +4 -1
- package/dist/components/RecipeCard.js +202 -12
- package/dist/components/ScrapingStatus.d.ts +2 -1
- package/dist/components/ScrapingStatus.js +25 -8
- package/dist/components/URLInput.d.ts +4 -1
- package/dist/components/URLInput.js +46 -18
- package/dist/components/Welcome.d.ts +6 -1
- package/dist/components/Welcome.js +5 -2
- package/dist/hooks/useDisplayPalette.d.ts +1 -0
- package/dist/hooks/useDisplayPalette.js +15 -0
- package/dist/hooks/useTerminalViewport.d.ts +6 -0
- package/dist/hooks/useTerminalViewport.js +23 -0
- package/dist/services/scraper.d.ts +9 -1
- package/dist/services/scraper.js +290 -58
- package/dist/theme.d.ts +88 -28
- package/dist/theme.js +122 -27
- package/dist/utils/helpers.d.ts +4 -0
- package/dist/utils/helpers.js +30 -0
- package/dist/utils/shortcuts.d.ts +6 -0
- package/dist/utils/shortcuts.js +15 -0
- package/dist/utils/terminal.d.ts +8 -0
- package/dist/utils/terminal.js +114 -0
- package/dist/utils/text-layout.d.ts +1 -0
- package/dist/utils/text-layout.js +63 -0
- package/package.json +12 -8
- package/public/parsely-logo.svg +1 -0
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import { jsx as _jsx, jsxs as _jsxs } from "react/jsx-runtime";
|
|
2
2
|
import { Box, Text } from 'ink';
|
|
3
|
+
import { Panel } from './Panel.js';
|
|
3
4
|
import { theme } from '../theme.js';
|
|
4
|
-
export function Welcome() {
|
|
5
|
-
return (_jsxs(Box, { flexDirection: "column",
|
|
5
|
+
export function Welcome({ compact = false, minimal = false }) {
|
|
6
|
+
return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Panel, { title: minimal ? 'Paste a recipe page to extract the cookable bits.' : 'Turn any recipe page into a clean cooking brief.', eyebrow: "Recipe deck", accentColor: theme.colors.primary, children: _jsx(Text, { color: theme.colors.text, children: minimal
|
|
7
|
+
? 'Parsely pulls out timing, ingredients, and steps without the surrounding clutter.'
|
|
8
|
+
: 'Parsely strips away popups, rambling intros, and clutter so you can focus on timing, ingredients, and steps.' }) }), !minimal && (_jsxs(Panel, { title: "What happens next", eyebrow: "Workflow", accentColor: theme.colors.secondary, marginTop: 1, children: [_jsxs(Text, { color: theme.colors.text, children: [theme.symbols.bullet, " Try JSON-LD and other structured recipe markup first."] }), _jsxs(Text, { color: theme.colors.text, children: [theme.symbols.bullet, " Fall back to AI only when the page needs rescue."] }), _jsxs(Text, { color: theme.colors.text, children: [theme.symbols.bullet, " Plate the result in a terminal-friendly cooking layout."] }), !compact && (_jsx(Box, { marginTop: 1, children: _jsx(Text, { color: theme.colors.muted, children: "Tip: most dedicated recipe sites work immediately if they publish Schema.org metadata." }) }))] }))] }));
|
|
6
9
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function useDisplayPalette(color: string): void;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { useStdout } from 'ink';
|
|
2
|
+
import { useEffect } from 'react';
|
|
3
|
+
import { resetDefaultTerminalBackground, setDefaultTerminalBackground, shouldUseDisplayPalette, } from '../utils/terminal.js';
|
|
4
|
+
export function useDisplayPalette(color) {
|
|
5
|
+
const { stdout, write } = useStdout();
|
|
6
|
+
useEffect(() => {
|
|
7
|
+
if (!stdout.isTTY || !shouldUseDisplayPalette()) {
|
|
8
|
+
return;
|
|
9
|
+
}
|
|
10
|
+
write(setDefaultTerminalBackground(color));
|
|
11
|
+
return () => {
|
|
12
|
+
write(resetDefaultTerminalBackground());
|
|
13
|
+
};
|
|
14
|
+
}, [color, stdout, write]);
|
|
15
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { useStdout } from 'ink';
|
|
2
|
+
import { useEffect, useState } from 'react';
|
|
3
|
+
function getViewport(stdout) {
|
|
4
|
+
return {
|
|
5
|
+
width: stdout.columns ?? 100,
|
|
6
|
+
height: stdout.rows ?? 32,
|
|
7
|
+
};
|
|
8
|
+
}
|
|
9
|
+
export function useTerminalViewport() {
|
|
10
|
+
const { stdout } = useStdout();
|
|
11
|
+
const [viewport, setViewport] = useState(() => getViewport(stdout));
|
|
12
|
+
useEffect(() => {
|
|
13
|
+
const onResize = () => {
|
|
14
|
+
setViewport(getViewport(stdout));
|
|
15
|
+
};
|
|
16
|
+
onResize();
|
|
17
|
+
stdout.on('resize', onResize);
|
|
18
|
+
return () => {
|
|
19
|
+
stdout.off('resize', onResize);
|
|
20
|
+
};
|
|
21
|
+
}, [stdout]);
|
|
22
|
+
return viewport;
|
|
23
|
+
}
|
|
@@ -18,9 +18,17 @@ export interface ScrapeStatus {
|
|
|
18
18
|
message: string;
|
|
19
19
|
recipe?: Recipe;
|
|
20
20
|
}
|
|
21
|
+
/**
|
|
22
|
+
* Walk through JSON-LD script blocks and return the first Recipe object found.
|
|
23
|
+
* Handles direct Recipe type, @graph arrays, and nested lists.
|
|
24
|
+
*/
|
|
25
|
+
export declare function findRecipeJson(scripts: string[]): Record<string, unknown> | null;
|
|
26
|
+
export declare function containsBrowserChallenge(html: string): boolean;
|
|
27
|
+
export declare function normalizeAiRecipe(recipe: Record<string, unknown>): Recipe;
|
|
28
|
+
export declare function extractRecipeFromHtml(html: string): Recipe | null;
|
|
21
29
|
/**
|
|
22
30
|
* Scrape a recipe from the given URL.
|
|
23
31
|
* Tries Puppeteer-based browser scraping first, falls back to OpenAI.
|
|
24
32
|
* Calls `onStatus` with progress updates so the TUI can reflect each phase.
|
|
25
33
|
*/
|
|
26
|
-
export declare function scrapeRecipe(url: string, onStatus: (status: ScrapeStatus) => void): Promise<Recipe>;
|
|
34
|
+
export declare function scrapeRecipe(url: string, onStatus: (status: ScrapeStatus) => void, signal?: AbortSignal): Promise<Recipe>;
|
package/dist/services/scraper.js
CHANGED
|
@@ -1,9 +1,20 @@
|
|
|
1
1
|
import puppeteer from 'puppeteer-core';
|
|
2
2
|
import * as cheerio from 'cheerio';
|
|
3
3
|
import OpenAI from 'openai';
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import { loadConfig } from '../utils/helpers.js';
|
|
4
|
+
import { constants as fsConstants } from 'node:fs';
|
|
5
|
+
import { access } from 'node:fs/promises';
|
|
6
|
+
import { loadConfig, normalizeRecipeUrl, sanitizeTerminalText } from '../utils/helpers.js';
|
|
7
|
+
const BROWSER_ARGS = [
|
|
8
|
+
'--no-sandbox',
|
|
9
|
+
'--disable-setuid-sandbox',
|
|
10
|
+
'--disable-blink-features=AutomationControlled',
|
|
11
|
+
];
|
|
12
|
+
const BROWSER_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 ' +
|
|
13
|
+
'(KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36';
|
|
14
|
+
const PAGE_TIMEOUT_MS = 20_000;
|
|
15
|
+
const NETWORK_IDLE_TIMEOUT_MS = 5_000;
|
|
16
|
+
const AI_TIMEOUT_MS = 30_000;
|
|
17
|
+
const AI_SOURCE_LIMIT = 120_000;
|
|
7
18
|
/* ------------------------------------------------------------------ */
|
|
8
19
|
/* JSON-LD helpers */
|
|
9
20
|
/* ------------------------------------------------------------------ */
|
|
@@ -11,7 +22,7 @@ import { loadConfig } from '../utils/helpers.js';
|
|
|
11
22
|
* Walk through JSON-LD script blocks and return the first Recipe object found.
|
|
12
23
|
* Handles direct Recipe type, @graph arrays, and nested lists.
|
|
13
24
|
*/
|
|
14
|
-
function findRecipeJson(scripts) {
|
|
25
|
+
export function findRecipeJson(scripts) {
|
|
15
26
|
for (const raw of scripts) {
|
|
16
27
|
let data;
|
|
17
28
|
try {
|
|
@@ -43,6 +54,98 @@ function findRecipeJson(scripts) {
|
|
|
43
54
|
}
|
|
44
55
|
return null;
|
|
45
56
|
}
|
|
57
|
+
export function containsBrowserChallenge(html) {
|
|
58
|
+
return html.includes('cf_chl') ||
|
|
59
|
+
html.includes('window._cf_chl_opt') ||
|
|
60
|
+
html.includes('cf-mitigated');
|
|
61
|
+
}
|
|
62
|
+
function normalizeText(value) {
|
|
63
|
+
if (typeof value !== 'string') {
|
|
64
|
+
return undefined;
|
|
65
|
+
}
|
|
66
|
+
const trimmed = value.trim();
|
|
67
|
+
if (!trimmed) {
|
|
68
|
+
return undefined;
|
|
69
|
+
}
|
|
70
|
+
const $ = cheerio.load(`<body>${sanitizeTerminalText(trimmed)}</body>`);
|
|
71
|
+
const text = $('body').text().replace(/\s+/g, ' ').trim();
|
|
72
|
+
return text || undefined;
|
|
73
|
+
}
|
|
74
|
+
function normalizeInstruction(value) {
|
|
75
|
+
if (typeof value === 'string') {
|
|
76
|
+
return normalizeText(value);
|
|
77
|
+
}
|
|
78
|
+
if (!value || typeof value !== 'object') {
|
|
79
|
+
return undefined;
|
|
80
|
+
}
|
|
81
|
+
const text = normalizeText(value.text);
|
|
82
|
+
const itemListElement = Array.isArray(value.itemListElement)
|
|
83
|
+
? value.itemListElement
|
|
84
|
+
.map((item) => {
|
|
85
|
+
const normalized = normalizeText(item?.text);
|
|
86
|
+
return normalized ? { text: normalized } : null;
|
|
87
|
+
})
|
|
88
|
+
.filter((item) => item !== null)
|
|
89
|
+
: undefined;
|
|
90
|
+
if (text) {
|
|
91
|
+
return { text, ...(itemListElement && itemListElement.length > 0 ? { itemListElement } : {}) };
|
|
92
|
+
}
|
|
93
|
+
if (itemListElement && itemListElement.length > 0) {
|
|
94
|
+
return { itemListElement };
|
|
95
|
+
}
|
|
96
|
+
return undefined;
|
|
97
|
+
}
|
|
98
|
+
function normalizeInstructions(value) {
|
|
99
|
+
if (Array.isArray(value)) {
|
|
100
|
+
const steps = value
|
|
101
|
+
.map((step) => normalizeInstruction(step))
|
|
102
|
+
.filter((step) => Boolean(step));
|
|
103
|
+
return steps.length > 0 ? steps : undefined;
|
|
104
|
+
}
|
|
105
|
+
const single = normalizeInstruction(value);
|
|
106
|
+
return single ? [single] : undefined;
|
|
107
|
+
}
|
|
108
|
+
function normalizeRecipePayload(recipe, source) {
|
|
109
|
+
const recipeIngredient = Array.isArray(recipe.recipeIngredient)
|
|
110
|
+
? recipe.recipeIngredient
|
|
111
|
+
.map((item) => normalizeText(item))
|
|
112
|
+
.filter((item) => Boolean(item))
|
|
113
|
+
: undefined;
|
|
114
|
+
return {
|
|
115
|
+
name: normalizeText(recipe.name),
|
|
116
|
+
prepTime: typeof recipe.prepTime === 'string' ? sanitizeTerminalText(recipe.prepTime.trim()) : undefined,
|
|
117
|
+
cookTime: typeof recipe.cookTime === 'string' ? sanitizeTerminalText(recipe.cookTime.trim()) : undefined,
|
|
118
|
+
totalTime: typeof recipe.totalTime === 'string' ? sanitizeTerminalText(recipe.totalTime.trim()) : undefined,
|
|
119
|
+
recipeIngredient,
|
|
120
|
+
recipeInstructions: normalizeInstructions(recipe.recipeInstructions),
|
|
121
|
+
source,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
function normalizeBrowserRecipe(recipe) {
|
|
125
|
+
return normalizeRecipePayload(recipe, 'browser');
|
|
126
|
+
}
|
|
127
|
+
export function normalizeAiRecipe(recipe) {
|
|
128
|
+
return normalizeRecipePayload(recipe, 'ai');
|
|
129
|
+
}
|
|
130
|
+
function hasRecipeContent(recipe) {
|
|
131
|
+
return Boolean(recipe.name ||
|
|
132
|
+
recipe.prepTime ||
|
|
133
|
+
recipe.cookTime ||
|
|
134
|
+
recipe.totalTime ||
|
|
135
|
+
recipe.recipeIngredient?.length ||
|
|
136
|
+
recipe.recipeInstructions?.length);
|
|
137
|
+
}
|
|
138
|
+
export function extractRecipeFromHtml(html) {
|
|
139
|
+
const $ = cheerio.load(html);
|
|
140
|
+
const scripts = [];
|
|
141
|
+
$('script[type="application/ld+json"]').each((_index, element) => {
|
|
142
|
+
const text = $(element).text();
|
|
143
|
+
if (text)
|
|
144
|
+
scripts.push(text);
|
|
145
|
+
});
|
|
146
|
+
const recipe = findRecipeJson(scripts);
|
|
147
|
+
return recipe ? normalizeBrowserRecipe(recipe) : null;
|
|
148
|
+
}
|
|
46
149
|
/* ------------------------------------------------------------------ */
|
|
47
150
|
/* Chrome detection */
|
|
48
151
|
/* ------------------------------------------------------------------ */
|
|
@@ -54,86 +157,203 @@ const CHROME_PATHS = [
|
|
|
54
157
|
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
55
158
|
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
|
56
159
|
];
|
|
57
|
-
function findChrome() {
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
160
|
+
async function findChrome() {
|
|
161
|
+
const candidates = [
|
|
162
|
+
process.env['PUPPETEER_EXECUTABLE_PATH'],
|
|
163
|
+
process.env['CHROME_PATH'],
|
|
164
|
+
...CHROME_PATHS,
|
|
165
|
+
].filter((path) => Boolean(path));
|
|
166
|
+
for (const candidate of candidates) {
|
|
167
|
+
try {
|
|
168
|
+
await access(candidate, fsConstants.X_OK);
|
|
169
|
+
return candidate;
|
|
170
|
+
}
|
|
171
|
+
catch {
|
|
172
|
+
// Try the next well-known location.
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
function createAbortError() {
|
|
178
|
+
const error = new Error('Scrape aborted');
|
|
179
|
+
error.name = 'AbortError';
|
|
180
|
+
return error;
|
|
181
|
+
}
|
|
182
|
+
function throwIfAborted(signal) {
|
|
183
|
+
if (signal?.aborted) {
|
|
184
|
+
throw createAbortError();
|
|
62
185
|
}
|
|
63
|
-
|
|
186
|
+
}
|
|
187
|
+
async function configurePage(page) {
|
|
188
|
+
await page.setUserAgent(BROWSER_USER_AGENT);
|
|
189
|
+
await page.setExtraHTTPHeaders({ 'accept-language': 'en-US,en;q=0.9' });
|
|
190
|
+
await page.evaluateOnNewDocument(() => {
|
|
191
|
+
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
|
192
|
+
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
|
|
193
|
+
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
function createTimedSignal(signal, timeoutMs) {
|
|
197
|
+
const controller = new AbortController();
|
|
198
|
+
if (signal?.aborted) {
|
|
199
|
+
controller.abort();
|
|
200
|
+
}
|
|
201
|
+
const onAbort = () => controller.abort();
|
|
202
|
+
signal?.addEventListener('abort', onAbort, { once: true });
|
|
203
|
+
const timeout = setTimeout(() => {
|
|
204
|
+
controller.abort();
|
|
205
|
+
}, timeoutMs);
|
|
206
|
+
return {
|
|
207
|
+
signal: controller.signal,
|
|
208
|
+
cleanup: () => {
|
|
209
|
+
clearTimeout(timeout);
|
|
210
|
+
signal?.removeEventListener('abort', onAbort);
|
|
211
|
+
},
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
function formatTimeoutError(message, signal) {
|
|
215
|
+
return signal?.aborted ? createAbortError() : new Error(message);
|
|
216
|
+
}
|
|
217
|
+
function limitAiSource(value) {
|
|
218
|
+
return value.trim().slice(0, AI_SOURCE_LIMIT);
|
|
219
|
+
}
|
|
220
|
+
async function fetchAiSource(url, signal) {
|
|
221
|
+
throwIfAborted(signal);
|
|
222
|
+
const { signal: timedSignal, cleanup } = createTimedSignal(signal, PAGE_TIMEOUT_MS);
|
|
64
223
|
try {
|
|
65
|
-
const
|
|
66
|
-
|
|
67
|
-
|
|
224
|
+
const response = await fetch(url, {
|
|
225
|
+
headers: {
|
|
226
|
+
'accept-language': 'en-US,en;q=0.9',
|
|
227
|
+
'user-agent': BROWSER_USER_AGENT,
|
|
228
|
+
},
|
|
229
|
+
signal: timedSignal,
|
|
230
|
+
});
|
|
231
|
+
if (!response.ok) {
|
|
232
|
+
throw new Error(`Failed to load recipe page for AI fallback (${response.status})`);
|
|
233
|
+
}
|
|
234
|
+
return limitAiSource(await response.text());
|
|
235
|
+
}
|
|
236
|
+
catch (error) {
|
|
237
|
+
if (timedSignal.aborted) {
|
|
238
|
+
throw formatTimeoutError('Timed out loading recipe page for AI fallback', signal);
|
|
239
|
+
}
|
|
240
|
+
throw error;
|
|
241
|
+
}
|
|
242
|
+
finally {
|
|
243
|
+
cleanup();
|
|
68
244
|
}
|
|
69
|
-
catch { /* not found */ }
|
|
70
|
-
return null;
|
|
71
245
|
}
|
|
72
246
|
/* ------------------------------------------------------------------ */
|
|
73
247
|
/* Scraping strategies */
|
|
74
248
|
/* ------------------------------------------------------------------ */
|
|
75
|
-
async function scrapeWithBrowser(url) {
|
|
76
|
-
|
|
249
|
+
async function scrapeWithBrowser(url, onStatus, signal) {
|
|
250
|
+
throwIfAborted(signal);
|
|
251
|
+
const chromePath = await findChrome();
|
|
77
252
|
if (!chromePath)
|
|
78
|
-
return null; // No browser available – skip to AI
|
|
253
|
+
return { recipe: null }; // No browser available – skip to AI
|
|
79
254
|
let browser = null;
|
|
255
|
+
let settledHtml;
|
|
256
|
+
const onAbort = async () => {
|
|
257
|
+
if (browser) {
|
|
258
|
+
try {
|
|
259
|
+
await browser.close();
|
|
260
|
+
}
|
|
261
|
+
catch {
|
|
262
|
+
// Ignore close errors when aborting.
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
};
|
|
266
|
+
signal?.addEventListener('abort', onAbort, { once: true });
|
|
80
267
|
try {
|
|
81
268
|
browser = await puppeteer.launch({
|
|
82
269
|
headless: true,
|
|
83
270
|
executablePath: chromePath,
|
|
84
|
-
args:
|
|
271
|
+
args: BROWSER_ARGS,
|
|
85
272
|
});
|
|
273
|
+
throwIfAborted(signal);
|
|
86
274
|
const page = await browser.newPage();
|
|
87
|
-
await page
|
|
275
|
+
await configurePage(page);
|
|
276
|
+
onStatus?.({ phase: 'browser', message: 'Loading recipe page…' });
|
|
277
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: PAGE_TIMEOUT_MS });
|
|
278
|
+
await page.waitForNetworkIdle({ idleTime: 500, timeout: NETWORK_IDLE_TIMEOUT_MS }).catch(() => undefined);
|
|
279
|
+
throwIfAborted(signal);
|
|
88
280
|
const html = await page.content();
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
return null;
|
|
101
|
-
return { ...recipe, source: 'browser' };
|
|
281
|
+
if (containsBrowserChallenge(html)) {
|
|
282
|
+
onStatus?.({ phase: 'browser', message: 'Browser challenge detected, retrying page parsing…' });
|
|
283
|
+
await page.waitForFunction(() => !document.documentElement.outerHTML.includes('cf_chl'), { timeout: 5_000 }).catch(() => undefined);
|
|
284
|
+
}
|
|
285
|
+
throwIfAborted(signal);
|
|
286
|
+
settledHtml = await page.content();
|
|
287
|
+
onStatus?.({ phase: 'parsing', message: 'Scanning recipe schema and JSON-LD blocks…' });
|
|
288
|
+
return {
|
|
289
|
+
recipe: extractRecipeFromHtml(settledHtml),
|
|
290
|
+
html: settledHtml,
|
|
291
|
+
};
|
|
102
292
|
}
|
|
103
|
-
catch {
|
|
293
|
+
catch (error) {
|
|
294
|
+
if (signal?.aborted || (error instanceof Error && error.name === 'AbortError')) {
|
|
295
|
+
throw createAbortError();
|
|
296
|
+
}
|
|
297
|
+
onStatus?.({ phase: 'browser', message: 'Browser extraction failed. Preparing AI fallback…' });
|
|
298
|
+
return { recipe: null, html: settledHtml };
|
|
299
|
+
}
|
|
300
|
+
finally {
|
|
301
|
+
signal?.removeEventListener('abort', onAbort);
|
|
104
302
|
if (browser) {
|
|
105
303
|
try {
|
|
106
304
|
await browser.close();
|
|
107
305
|
}
|
|
108
|
-
catch {
|
|
306
|
+
catch {
|
|
307
|
+
// Ignore close errors during teardown.
|
|
308
|
+
}
|
|
109
309
|
}
|
|
110
|
-
return null;
|
|
111
310
|
}
|
|
112
311
|
}
|
|
113
|
-
async function scrapeWithAI(url) {
|
|
312
|
+
async function scrapeWithAI(url, pageSource, signal) {
|
|
313
|
+
throwIfAborted(signal);
|
|
114
314
|
const { openaiApiKey } = loadConfig();
|
|
115
315
|
if (!openaiApiKey || openaiApiKey === 'YOUR_API_KEY_HERE') {
|
|
116
316
|
throw new Error('OpenAI API key not found. Create a .env.local file with OPENAI_API_KEY=your_key');
|
|
117
317
|
}
|
|
118
318
|
const client = new OpenAI({ apiKey: openaiApiKey });
|
|
119
|
-
const
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
319
|
+
const { signal: timedSignal, cleanup } = createTimedSignal(signal, AI_TIMEOUT_MS);
|
|
320
|
+
let response;
|
|
321
|
+
try {
|
|
322
|
+
response = await client.chat.completions.create({
|
|
323
|
+
model: 'gpt-4o-mini',
|
|
324
|
+
messages: [
|
|
325
|
+
{
|
|
326
|
+
role: 'system',
|
|
327
|
+
content: 'You extract recipe data from supplied page content. Use only the provided page content. ' +
|
|
328
|
+
'Return a JSON object with optional name, prepTime, cookTime, totalTime, recipeIngredient, and recipeInstructions fields.',
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
role: 'user',
|
|
332
|
+
content: `Recipe URL: ${url}\n\n` +
|
|
333
|
+
'Page content:\n' +
|
|
334
|
+
pageSource,
|
|
335
|
+
},
|
|
336
|
+
],
|
|
337
|
+
response_format: { type: 'json_object' },
|
|
338
|
+
}, { signal: timedSignal });
|
|
339
|
+
}
|
|
340
|
+
catch (error) {
|
|
341
|
+
if (timedSignal.aborted) {
|
|
342
|
+
throw formatTimeoutError('AI recipe extraction timed out', signal);
|
|
343
|
+
}
|
|
344
|
+
throw error;
|
|
345
|
+
}
|
|
346
|
+
finally {
|
|
347
|
+
cleanup();
|
|
348
|
+
}
|
|
132
349
|
const content = response.choices[0]?.message?.content;
|
|
133
350
|
if (!content)
|
|
134
351
|
throw new Error('AI returned empty response');
|
|
135
|
-
const recipe = JSON.parse(content);
|
|
136
|
-
|
|
352
|
+
const recipe = normalizeAiRecipe(JSON.parse(content));
|
|
353
|
+
if (!hasRecipeContent(recipe)) {
|
|
354
|
+
throw new Error('AI could not extract recipe data from the page');
|
|
355
|
+
}
|
|
356
|
+
return recipe;
|
|
137
357
|
}
|
|
138
358
|
/* ------------------------------------------------------------------ */
|
|
139
359
|
/* Public orchestrator */
|
|
@@ -143,22 +363,34 @@ async function scrapeWithAI(url) {
|
|
|
143
363
|
* Tries Puppeteer-based browser scraping first, falls back to OpenAI.
|
|
144
364
|
* Calls `onStatus` with progress updates so the TUI can reflect each phase.
|
|
145
365
|
*/
|
|
146
|
-
export async function scrapeRecipe(url, onStatus) {
|
|
366
|
+
export async function scrapeRecipe(url, onStatus, signal) {
|
|
367
|
+
const normalizedUrl = normalizeRecipeUrl(url);
|
|
368
|
+
if (!normalizedUrl) {
|
|
369
|
+
const error = new Error('Invalid URL. Please enter a valid http or https recipe URL.');
|
|
370
|
+
onStatus({ phase: 'error', message: error.message });
|
|
371
|
+
throw error;
|
|
372
|
+
}
|
|
147
373
|
// Phase 1 – browser scraping
|
|
148
374
|
onStatus({ phase: 'browser', message: 'Launching browser\u2026' });
|
|
149
|
-
const browserResult = await scrapeWithBrowser(
|
|
150
|
-
if (browserResult) {
|
|
151
|
-
onStatus({ phase: 'done', message: 'Recipe found!', recipe: browserResult });
|
|
152
|
-
return browserResult;
|
|
375
|
+
const browserResult = await scrapeWithBrowser(normalizedUrl, onStatus, signal);
|
|
376
|
+
if (browserResult.recipe) {
|
|
377
|
+
onStatus({ phase: 'done', message: 'Recipe found!', recipe: browserResult.recipe });
|
|
378
|
+
return browserResult.recipe;
|
|
153
379
|
}
|
|
154
380
|
// Phase 2 – AI fallback
|
|
155
381
|
onStatus({ phase: 'ai', message: 'Falling back to AI scraper\u2026' });
|
|
156
382
|
try {
|
|
157
|
-
const
|
|
383
|
+
const pageSource = browserResult.html && browserResult.html.trim()
|
|
384
|
+
? limitAiSource(browserResult.html)
|
|
385
|
+
: await fetchAiSource(normalizedUrl, signal);
|
|
386
|
+
const aiResult = await scrapeWithAI(normalizedUrl, pageSource, signal);
|
|
158
387
|
onStatus({ phase: 'done', message: 'Recipe extracted via AI!', recipe: aiResult });
|
|
159
388
|
return aiResult;
|
|
160
389
|
}
|
|
161
390
|
catch (error) {
|
|
391
|
+
if (signal?.aborted || (error instanceof Error && error.name === 'AbortError')) {
|
|
392
|
+
throw createAbortError();
|
|
393
|
+
}
|
|
162
394
|
const message = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
163
395
|
onStatus({ phase: 'error', message });
|
|
164
396
|
throw error;
|
package/dist/theme.d.ts
CHANGED
|
@@ -1,31 +1,91 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
1
|
+
export type ThemeMode = 'light' | 'dark';
|
|
2
|
+
declare const themes: {
|
|
3
|
+
readonly light: {
|
|
4
|
+
readonly mode: "light";
|
|
5
|
+
readonly colors: {
|
|
6
|
+
readonly brand: "#009c3f";
|
|
7
|
+
readonly primary: "#0aa043";
|
|
8
|
+
readonly secondary: "#ffbf69";
|
|
9
|
+
readonly accent: "#ff7f50";
|
|
10
|
+
readonly text: "#17311d";
|
|
11
|
+
readonly muted: "#5f7564";
|
|
12
|
+
readonly subtle: "#7a8b7a";
|
|
13
|
+
readonly error: "#c24141";
|
|
14
|
+
readonly success: "#0aa043";
|
|
15
|
+
readonly warning: "#b7791f";
|
|
16
|
+
readonly info: "#2563eb";
|
|
17
|
+
readonly banner: "#009c3f";
|
|
18
|
+
readonly border: "#b7cbb3";
|
|
19
|
+
readonly borderFocus: "#0aa043";
|
|
20
|
+
readonly label: "#b7791f";
|
|
21
|
+
readonly chip: "#e4efe0";
|
|
22
|
+
readonly recipePaper: "#FDFFF7";
|
|
23
|
+
readonly recipeText: "#0aa043";
|
|
24
|
+
readonly recipeMuted: "#5f7564";
|
|
25
|
+
readonly recipeSubtle: "#43684b";
|
|
26
|
+
readonly recipeBorder: "#0aa043";
|
|
27
|
+
readonly recipeSoft: "#dcead5";
|
|
28
|
+
readonly recipePanel: "#f7fbef";
|
|
29
|
+
};
|
|
30
|
+
readonly symbols: {
|
|
31
|
+
readonly bullet: "•";
|
|
32
|
+
readonly arrow: "→";
|
|
33
|
+
readonly check: "✓";
|
|
34
|
+
readonly cross: "✗";
|
|
35
|
+
readonly dot: "·";
|
|
36
|
+
readonly ellipsis: "…";
|
|
37
|
+
readonly line: "─";
|
|
38
|
+
readonly active: "◉";
|
|
39
|
+
readonly pending: "○";
|
|
40
|
+
readonly skip: "−";
|
|
41
|
+
};
|
|
20
42
|
};
|
|
21
|
-
readonly
|
|
22
|
-
readonly
|
|
23
|
-
readonly
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
43
|
+
readonly dark: {
|
|
44
|
+
readonly mode: "dark";
|
|
45
|
+
readonly colors: {
|
|
46
|
+
readonly brand: "#009c3f";
|
|
47
|
+
readonly primary: "#86c06c";
|
|
48
|
+
readonly secondary: "#ffbf69";
|
|
49
|
+
readonly accent: "#ff7f50";
|
|
50
|
+
readonly text: "#e7eef8";
|
|
51
|
+
readonly muted: "#97a3b0";
|
|
52
|
+
readonly subtle: "#64748b";
|
|
53
|
+
readonly error: "#ff7b7b";
|
|
54
|
+
readonly success: "#7bd389";
|
|
55
|
+
readonly warning: "#ffd166";
|
|
56
|
+
readonly info: "#6ec5ff";
|
|
57
|
+
readonly banner: "#e7eef8";
|
|
58
|
+
readonly border: "#2c394d";
|
|
59
|
+
readonly borderFocus: "#86c06c";
|
|
60
|
+
readonly label: "#ffbf69";
|
|
61
|
+
readonly chip: "#192132";
|
|
62
|
+
readonly recipePaper: "#0F1729";
|
|
63
|
+
readonly recipeText: "#8de58b";
|
|
64
|
+
readonly recipeMuted: "#b7c2d4";
|
|
65
|
+
readonly recipeSubtle: "#8b99ad";
|
|
66
|
+
readonly recipeBorder: "#009c3f";
|
|
67
|
+
readonly recipeSoft: "#2b3c34";
|
|
68
|
+
readonly recipePanel: "#111b30";
|
|
69
|
+
};
|
|
70
|
+
readonly symbols: {
|
|
71
|
+
readonly bullet: "•";
|
|
72
|
+
readonly arrow: "→";
|
|
73
|
+
readonly check: "✓";
|
|
74
|
+
readonly cross: "✗";
|
|
75
|
+
readonly dot: "·";
|
|
76
|
+
readonly ellipsis: "…";
|
|
77
|
+
readonly line: "─";
|
|
78
|
+
readonly active: "◉";
|
|
79
|
+
readonly pending: "○";
|
|
80
|
+
readonly skip: "−";
|
|
81
|
+
};
|
|
29
82
|
};
|
|
30
83
|
};
|
|
31
|
-
export type Theme = typeof
|
|
84
|
+
export type Theme = (typeof themes)[ThemeMode];
|
|
85
|
+
export declare function resolveInitialThemeMode(env?: NodeJS.ProcessEnv): ThemeMode;
|
|
86
|
+
export declare function detectPreferredThemeMode(env?: NodeJS.ProcessEnv): Promise<ThemeMode>;
|
|
87
|
+
export declare function getTheme(mode: ThemeMode): Theme;
|
|
88
|
+
export declare function setActiveTheme(mode: ThemeMode): Theme;
|
|
89
|
+
export declare function toggleThemeMode(mode: ThemeMode): ThemeMode;
|
|
90
|
+
export declare const theme: Theme;
|
|
91
|
+
export {};
|