soustack 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -6
- package/dist/cli/index.js +60 -12
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.mts +55 -6
- package/dist/index.d.ts +55 -6
- package/dist/index.js +65 -12
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +65 -13
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1983,14 +1983,30 @@ function extractRecipe(html) {
|
|
|
1983
1983
|
return extractRecipeBrowser(html);
|
|
1984
1984
|
}
|
|
1985
1985
|
const jsonLdRecipe = extractJsonLd(html);
|
|
1986
|
-
|
|
1987
|
-
|
|
1986
|
+
if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
|
|
1987
|
+
try {
|
|
1988
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
1989
|
+
if (globalFetch) {
|
|
1990
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
|
|
1991
|
+
});
|
|
1992
|
+
}
|
|
1993
|
+
} catch {
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1988
1996
|
if (jsonLdRecipe) {
|
|
1989
1997
|
return { recipe: jsonLdRecipe, source: "jsonld" };
|
|
1990
1998
|
}
|
|
1991
1999
|
const microdataRecipe = extractMicrodata(html);
|
|
1992
|
-
|
|
1993
|
-
|
|
2000
|
+
if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
|
|
2001
|
+
try {
|
|
2002
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2003
|
+
if (globalFetch) {
|
|
2004
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
|
|
2005
|
+
});
|
|
2006
|
+
}
|
|
2007
|
+
} catch {
|
|
2008
|
+
}
|
|
2009
|
+
}
|
|
1994
2010
|
if (microdataRecipe) {
|
|
1995
2011
|
return { recipe: microdataRecipe, source: "microdata" };
|
|
1996
2012
|
}
|
|
@@ -1999,20 +2015,52 @@ function extractRecipe(html) {
|
|
|
1999
2015
|
|
|
2000
2016
|
// src/scraper/index.ts
|
|
2001
2017
|
async function scrapeRecipe(url, options = {}) {
|
|
2002
|
-
|
|
2003
|
-
|
|
2018
|
+
if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
|
|
2019
|
+
try {
|
|
2020
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2021
|
+
if (globalFetch) {
|
|
2022
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
|
|
2023
|
+
});
|
|
2024
|
+
}
|
|
2025
|
+
} catch {
|
|
2026
|
+
}
|
|
2027
|
+
}
|
|
2004
2028
|
const html = await fetchPage(url, options);
|
|
2005
|
-
|
|
2006
|
-
|
|
2029
|
+
if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
|
|
2030
|
+
try {
|
|
2031
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2032
|
+
if (globalFetch) {
|
|
2033
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
|
|
2034
|
+
});
|
|
2035
|
+
}
|
|
2036
|
+
} catch {
|
|
2037
|
+
}
|
|
2038
|
+
}
|
|
2007
2039
|
const { recipe } = extractRecipe(html);
|
|
2008
|
-
|
|
2009
|
-
|
|
2040
|
+
if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
|
|
2041
|
+
try {
|
|
2042
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2043
|
+
if (globalFetch) {
|
|
2044
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
|
|
2045
|
+
});
|
|
2046
|
+
}
|
|
2047
|
+
} catch {
|
|
2048
|
+
}
|
|
2049
|
+
}
|
|
2010
2050
|
if (!recipe) {
|
|
2011
2051
|
throw new Error("No Schema.org recipe data found in page");
|
|
2012
2052
|
}
|
|
2013
2053
|
const soustackRecipe = fromSchemaOrg(recipe);
|
|
2014
|
-
|
|
2015
|
-
|
|
2054
|
+
if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
|
|
2055
|
+
try {
|
|
2056
|
+
const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
|
|
2057
|
+
if (globalFetch) {
|
|
2058
|
+
globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
|
|
2059
|
+
});
|
|
2060
|
+
}
|
|
2061
|
+
} catch {
|
|
2062
|
+
}
|
|
2063
|
+
}
|
|
2016
2064
|
if (!soustackRecipe) {
|
|
2017
2065
|
throw new Error("Schema.org data did not include a valid recipe");
|
|
2018
2066
|
}
|
|
@@ -2029,6 +2077,10 @@ function extractRecipeFromHTML(html) {
|
|
|
2029
2077
|
}
|
|
2030
2078
|
return soustackRecipe;
|
|
2031
2079
|
}
|
|
2080
|
+
function extractSchemaOrgRecipeFromHTML(html) {
|
|
2081
|
+
const { recipe } = extractRecipe(html);
|
|
2082
|
+
return recipe;
|
|
2083
|
+
}
|
|
2032
2084
|
|
|
2033
2085
|
// src/parsers/yield.ts
|
|
2034
2086
|
var RANGE_PATTERN = /^(\d+)(?:\s*(?:[-–—]|to)\s*)(\d+)\s+(.+)$/i;
|
|
@@ -2272,6 +2324,6 @@ function wordToNumber(word) {
|
|
|
2272
2324
|
return null;
|
|
2273
2325
|
}
|
|
2274
2326
|
|
|
2275
|
-
export { extractRecipeFromHTML, formatDuration, formatYield2 as formatYield, fromSchemaOrg, normalizeIngredientInput, normalizeYield, parseDuration, parseHumanDuration, parseIngredient, parseIngredientLine, parseIngredients, parseYield2 as parseYield, scaleRecipe, scrapeRecipe, smartParseDuration, toSchemaOrg, validateRecipe };
|
|
2327
|
+
export { extractRecipeFromHTML, extractSchemaOrgRecipeFromHTML, formatDuration, formatYield2 as formatYield, fromSchemaOrg, normalizeIngredientInput, normalizeYield, parseDuration, parseHumanDuration, parseIngredient, parseIngredientLine, parseIngredients, parseYield2 as parseYield, scaleRecipe, scrapeRecipe, smartParseDuration, toSchemaOrg, validateRecipe };
|
|
2276
2328
|
//# sourceMappingURL=index.mjs.map
|
|
2277
2329
|
//# sourceMappingURL=index.mjs.map
|