soustack 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -46,19 +46,23 @@ npm install soustack
46
46
  - **Schema.org Conversion**:
47
47
  - `fromSchemaOrg()` (Schema.org JSON-LD → Soustack)
48
48
  - `toSchemaOrg()` (Soustack → Schema.org JSON-LD)
49
- - **Web Scraping**: `scrapeRecipe()` fetches a recipe page and extracts Schema.org recipe data from:
50
- - JSON-LD (`<script type="application/ld+json">`)
51
- - Microdata (`itemscope/itemtype`)
49
+ - **Web Scraping**:
50
+ - `scrapeRecipe()` fetches a recipe page and extracts Schema.org recipe data (Node.js only)
51
+ - `extractRecipeFromHTML()` extracts recipe data from HTML string, returns Soustack format (browser & Node.js compatible)
52
+ - `extractSchemaOrgRecipeFromHTML()` extracts raw Schema.org recipe data from HTML string (browser & Node.js compatible)
53
+ - Supports JSON-LD (`<script type="application/ld+json">`) and Microdata (`itemscope/itemtype`)
52
54
 
53
55
  ## Programmatic Usage
54
56
 
55
57
  ```ts
56
58
  import {
57
59
  scrapeRecipe,
60
+ extractRecipeFromHTML,
61
+ extractSchemaOrgRecipeFromHTML,
58
62
  fromSchemaOrg,
59
63
  toSchemaOrg,
60
64
  validateRecipe,
61
- scaleRecipe
65
+ scaleRecipe,
62
66
  } from 'soustack';
63
67
 
64
68
  // Validate a Soustack recipe JSON object
@@ -67,9 +71,20 @@ validateRecipe(recipe);
67
71
  // Scale a recipe to a target yield amount (returns a "computed recipe")
68
72
  const computed = scaleRecipe(recipe, 2);
69
73
 
70
- // Scrape a URL into a Soustack recipe (throws if no recipe is found)
74
+ // Scrape a URL into a Soustack recipe (Node.js only, throws if no recipe is found)
71
75
  const scraped = await scrapeRecipe('https://example.com/recipe');
72
76
 
77
+ // Extract recipe from HTML string (browser & Node.js compatible)
78
+ // Option 1: Get Soustack format directly
79
+ const html = await fetch('https://example.com/recipe').then((r) => r.text());
80
+ const recipe = extractRecipeFromHTML(html);
81
+
82
+ // Option 2: Get Schema.org format first (for inspection/modification)
83
+ const schemaOrgRecipe = extractSchemaOrgRecipeFromHTML(html);
84
+ if (schemaOrgRecipe) {
85
+ const soustackRecipe = fromSchemaOrg(schemaOrgRecipe);
86
+ }
87
+
73
88
  // Convert Schema.org → Soustack
74
89
  const soustack = fromSchemaOrg(schemaOrgJsonLd);
75
90
 
@@ -88,9 +103,13 @@ const soustackRecipe = fromSchemaOrg(schemaOrgJsonLd);
88
103
  const schemaOrgRecipe = toSchemaOrg(soustackRecipe);
89
104
  ```
90
105
 
91
- ## 🧰 Scraping Options
106
+ ## 🧰 Web Scraping
107
+
108
+ ### Node.js: `scrapeRecipe()`
92
109
 
93
- `scrapeRecipe(url, options)` supports basic fetch tuning:
110
+ `scrapeRecipe(url, options)` fetches a recipe page and extracts Schema.org data. **Node.js only** due to CORS restrictions.
111
+
112
+ Options:
94
113
 
95
114
  - `timeout` (ms, default `10000`)
96
115
  - `userAgent` (string, optional)
@@ -101,10 +120,56 @@ import { scrapeRecipe } from 'soustack';
101
120
 
102
121
  const recipe = await scrapeRecipe('https://example.com/recipe', {
103
122
  timeout: 15000,
104
- maxRetries: 3
123
+ maxRetries: 3,
105
124
  });
106
125
  ```
107
126
 
127
+ ### Browser: `extractRecipeFromHTML()` and `extractSchemaOrgRecipeFromHTML()`
128
+
129
+ #### `extractRecipeFromHTML()` - Returns Soustack Format
130
+
131
+ `extractRecipeFromHTML(html)` extracts recipe data from an HTML string and returns it in Soustack format. **Works in both browser and Node.js**. Perfect for browser usage where you fetch HTML yourself (with cookies/session for authenticated content).
132
+
133
+ ```ts
134
+ import { extractRecipeFromHTML } from 'soustack';
135
+
136
+ // In browser: fetch HTML yourself (bypasses CORS, uses your cookies/session)
137
+ const response = await fetch('https://example.com/recipe');
138
+ const html = await response.text();
139
+ const recipe = extractRecipeFromHTML(html); // Already in Soustack format
140
+ ```
141
+
142
+ #### `extractSchemaOrgRecipeFromHTML()` - Returns Schema.org Format
143
+
144
+ `extractSchemaOrgRecipeFromHTML(html)` extracts the raw Schema.org recipe data from HTML. Returns `null` if no recipe is found. Use this when you need to inspect, debug, or modify the Schema.org data before converting to Soustack format.
145
+
146
+ ```ts
147
+ import { extractSchemaOrgRecipeFromHTML, fromSchemaOrg } from 'soustack';
148
+
149
+ // In browser: fetch HTML yourself
150
+ const response = await fetch('https://example.com/recipe');
151
+ const html = await response.text();
152
+
153
+ // Extract Schema.org format (for inspection/modification)
154
+ const schemaOrgRecipe = extractSchemaOrgRecipeFromHTML(html);
155
+
156
+ if (schemaOrgRecipe) {
157
+ // Inspect or modify Schema.org data before converting
158
+ console.log('Found recipe:', schemaOrgRecipe.name);
159
+
160
+ // Convert to Soustack format when ready
161
+ const soustackRecipe = fromSchemaOrg(schemaOrgRecipe);
162
+ }
163
+ ```
164
+
165
+ **Why use these functions in browsers?**
166
+
167
+ - ✅ No CORS issues — you fetch HTML yourself
168
+ - ✅ Works with authenticated/paywalled content — uses browser cookies
169
+ - ✅ Smaller bundle — no Node.js dependencies
170
+ - ✅ Universal — works in both browser and Node.js environments
171
+ - ✅ Flexible — choose Schema.org format for inspection/modification, or Soustack format for direct use
172
+
108
173
  ### CLI
109
174
 
110
175
  ```bash
package/dist/cli/index.js CHANGED
@@ -1313,6 +1313,8 @@ function extractRecipeNode(input) {
1313
1313
  function hasRecipeType(value) {
1314
1314
  if (!value) return false;
1315
1315
  const types = Array.isArray(value) ? value : [value];
1316
+ fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "fromSchemaOrg.ts:95", message: "hasRecipeType check", data: { types, typesLower: types.map((t) => typeof t === "string" ? t.toLowerCase() : t), isMatch: types.some((e) => typeof e === "string" && e.toLowerCase() === "recipe") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
1317
+ });
1316
1318
  return types.some(
1317
1319
  (entry) => typeof entry === "string" && entry.toLowerCase() === "recipe"
1318
1320
  );
@@ -1669,18 +1671,26 @@ var DEFAULT_USER_AGENTS = [
1669
1671
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
1670
1672
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"
1671
1673
  ];
1672
- var fetchImpl = null;
1673
- async function ensureFetch() {
1674
- if (!fetchImpl) {
1675
- fetchImpl = import('node-fetch').then((mod) => mod.default);
1676
- }
1677
- return fetchImpl;
1678
- }
1679
1674
  function chooseUserAgent(provided) {
1680
1675
  if (provided) return provided;
1681
1676
  const index = Math.floor(Math.random() * DEFAULT_USER_AGENTS.length);
1682
1677
  return DEFAULT_USER_AGENTS[index];
1683
1678
  }
1679
+ function resolveFetch(fetchFn) {
1680
+ if (fetchFn) {
1681
+ return fetchFn;
1682
+ }
1683
+ const globalFetch = globalThis.fetch;
1684
+ if (!globalFetch) {
1685
+ throw new Error(
1686
+ "A global fetch implementation is not available. Provide window.fetch in browsers or upgrade to Node 18+."
1687
+ );
1688
+ }
1689
+ return globalFetch;
1690
+ }
1691
+ function isBrowserEnvironment() {
1692
+ return typeof globalThis.document !== "undefined";
1693
+ }
1684
1694
  function isClientError(error) {
1685
1695
  if (typeof error.status === "number") {
1686
1696
  return error.status >= 400 && error.status < 500;
@@ -1694,25 +1704,40 @@ async function fetchPage(url, options = {}) {
1694
1704
  const {
1695
1705
  timeout = 1e4,
1696
1706
  userAgent,
1697
- maxRetries = 2
1707
+ maxRetries = 2,
1708
+ fetchFn
1698
1709
  } = options;
1699
1710
  let lastError = null;
1711
+ const resolvedFetch = resolveFetch(fetchFn);
1712
+ const isBrowser2 = isBrowserEnvironment();
1700
1713
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
1701
1714
  const controller = new AbortController();
1702
1715
  const timeoutId = setTimeout(() => controller.abort(), timeout);
1703
1716
  try {
1704
- const fetch = await ensureFetch();
1705
1717
  const headers = {
1706
- "User-Agent": chooseUserAgent(userAgent),
1707
1718
  Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
1708
1719
  "Accept-Language": "en-US,en;q=0.5"
1709
1720
  };
1710
- const response = await fetch(url, {
1721
+ if (!isBrowser2) {
1722
+ headers["User-Agent"] = chooseUserAgent(userAgent);
1723
+ }
1724
+ const requestInit = {
1711
1725
  headers,
1712
1726
  signal: controller.signal,
1713
1727
  redirect: "follow"
1714
- });
1728
+ };
1729
+ const response = await resolvedFetch(url, requestInit);
1715
1730
  clearTimeout(timeoutId);
1731
+ if (response && (typeof process === "undefined" || process.env.NODE_ENV !== "test")) {
1732
+ try {
1733
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
1734
+ if (globalFetch) {
1735
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:63", message: "fetch response", data: { url, status: response.status, statusText: response.statusText, ok: response.ok, isNYTimes: url.includes("nytimes.com") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
1736
+ });
1737
+ }
1738
+ } catch {
1739
+ }
1740
+ }
1716
1741
  if (!response.ok) {
1717
1742
  const error = new Error(
1718
1743
  `HTTP ${response.status}: ${response.statusText}`
@@ -1720,7 +1745,18 @@ async function fetchPage(url, options = {}) {
1720
1745
  error.status = response.status;
1721
1746
  throw error;
1722
1747
  }
1723
- return await response.text();
1748
+ const html = await response.text();
1749
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
1750
+ try {
1751
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
1752
+ if (globalFetch) {
1753
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:75", message: "HTML received", data: { htmlLength: html.length, hasLoginPage: html.toLowerCase().includes("login") || html.toLowerCase().includes("sign in"), hasRecipeData: html.includes("application/ld+json") || html.includes("schema.org/Recipe") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B,D" }) }).catch(() => {
1754
+ });
1755
+ }
1756
+ } catch {
1757
+ }
1758
+ }
1759
+ return html;
1724
1760
  } catch (err) {
1725
1761
  clearTimeout(timeoutId);
1726
1762
  lastError = err instanceof Error ? err : new Error(String(err));
@@ -1747,6 +1783,8 @@ function isRecipeNode(value) {
1747
1783
  return false;
1748
1784
  }
1749
1785
  const type = value["@type"];
1786
+ fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/utils.ts:14", message: "isRecipeNode check", data: { type, typeLower: typeof type === "string" ? type.toLowerCase() : Array.isArray(type) ? type.map((t) => typeof t === "string" ? t.toLowerCase() : t) : void 0, isMatch: typeof type === "string" ? RECIPE_TYPES.has(type.toLowerCase()) : Array.isArray(type) ? type.some((e) => typeof e === "string" && RECIPE_TYPES.has(e.toLowerCase())) : false }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
1787
+ });
1750
1788
  if (typeof type === "string") {
1751
1789
  return RECIPE_TYPES.has(type.toLowerCase());
1752
1790
  }
@@ -1774,14 +1812,20 @@ function normalizeText(value) {
1774
1812
  function extractJsonLd(html) {
1775
1813
  const $ = cheerio.load(html);
1776
1814
  const scripts = $('script[type="application/ld+json"]');
1815
+ fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:8", message: "JSON-LD scripts found", data: { scriptCount: scripts.length }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
1816
+ });
1777
1817
  const candidates = [];
1778
1818
  scripts.each((_, element) => {
1779
1819
  const content = $(element).html();
1780
1820
  if (!content) return;
1781
1821
  const parsed = safeJsonParse(content);
1782
1822
  if (!parsed) return;
1823
+ fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:18", message: "JSON-LD parsed", data: { hasGraph: !!(parsed && typeof parsed === "object" && "@graph" in parsed), type: parsed && typeof parsed === "object" && "@type" in parsed ? parsed["@type"] : void 0 }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
1824
+ });
1783
1825
  collectCandidates(parsed, candidates);
1784
1826
  });
1827
+ fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:22", message: "JSON-LD candidates", data: { candidateCount: candidates.length, candidateTypes: candidates.map((c) => c["@type"]) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
1828
+ });
1785
1829
  return candidates[0] ?? null;
1786
1830
  }
1787
1831
  function collectCandidates(payload, bucket) {
@@ -1853,13 +1897,139 @@ function findPropertyValue($, context, prop) {
1853
1897
  return normalizeText(node.attr("content")) || normalizeText(node.attr("href")) || normalizeText(node.attr("src")) || normalizeText(node.text());
1854
1898
  }
1855
1899
 
1900
+ // src/scraper/extractors/browser.ts
1901
+ var SIMPLE_PROPS2 = ["name", "description", "image", "recipeYield", "prepTime", "cookTime", "totalTime"];
1902
+ function extractRecipeBrowser(html) {
1903
+ const jsonLdRecipe = extractJsonLdBrowser(html);
1904
+ if (jsonLdRecipe) {
1905
+ return { recipe: jsonLdRecipe, source: "jsonld" };
1906
+ }
1907
+ const microdataRecipe = extractMicrodataBrowser(html);
1908
+ if (microdataRecipe) {
1909
+ return { recipe: microdataRecipe, source: "microdata" };
1910
+ }
1911
+ return { recipe: null, source: null };
1912
+ }
1913
+ function extractJsonLdBrowser(html) {
1914
+ if (typeof globalThis.DOMParser === "undefined") {
1915
+ return null;
1916
+ }
1917
+ const parser = new globalThis.DOMParser();
1918
+ const doc = parser.parseFromString(html, "text/html");
1919
+ const scripts = doc.querySelectorAll('script[type="application/ld+json"]');
1920
+ const candidates = [];
1921
+ scripts.forEach((script) => {
1922
+ const content = script.textContent;
1923
+ if (!content) return;
1924
+ const parsed = safeJsonParse(content);
1925
+ if (!parsed) return;
1926
+ collectCandidates2(parsed, candidates);
1927
+ });
1928
+ return candidates[0] ?? null;
1929
+ }
1930
+ function extractMicrodataBrowser(html) {
1931
+ if (typeof globalThis.DOMParser === "undefined") {
1932
+ return null;
1933
+ }
1934
+ const parser = new globalThis.DOMParser();
1935
+ const doc = parser.parseFromString(html, "text/html");
1936
+ const recipeEl = doc.querySelector('[itemscope][itemtype*="schema.org/Recipe"]');
1937
+ if (!recipeEl) {
1938
+ return null;
1939
+ }
1940
+ const recipe = {
1941
+ "@type": "Recipe"
1942
+ };
1943
+ SIMPLE_PROPS2.forEach((prop) => {
1944
+ const value = findPropertyValue2(recipeEl, prop);
1945
+ if (value) {
1946
+ recipe[prop] = value;
1947
+ }
1948
+ });
1949
+ const ingredients = [];
1950
+ recipeEl.querySelectorAll('[itemprop="recipeIngredient"]').forEach((el) => {
1951
+ const text = normalizeText(
1952
+ el.getAttribute("content") || el.textContent || void 0
1953
+ );
1954
+ if (text) ingredients.push(text);
1955
+ });
1956
+ if (ingredients.length) {
1957
+ recipe.recipeIngredient = ingredients;
1958
+ }
1959
+ const instructions = [];
1960
+ recipeEl.querySelectorAll('[itemprop="recipeInstructions"]').forEach((el) => {
1961
+ const text = normalizeText(el.getAttribute("content")) || normalizeText(el.querySelector('[itemprop="text"]')?.textContent || void 0) || normalizeText(el.textContent || void 0);
1962
+ if (text) instructions.push(text);
1963
+ });
1964
+ if (instructions.length) {
1965
+ recipe.recipeInstructions = instructions;
1966
+ }
1967
+ if (recipe.name || ingredients.length) {
1968
+ return recipe;
1969
+ }
1970
+ return null;
1971
+ }
1972
+ function findPropertyValue2(context, prop) {
1973
+ const node = context.querySelector(`[itemprop="${prop}"]`);
1974
+ if (!node) return void 0;
1975
+ return normalizeText(node.getAttribute("content")) || normalizeText(node.getAttribute("href")) || normalizeText(node.getAttribute("src")) || normalizeText(node.textContent || void 0);
1976
+ }
1977
+ function collectCandidates2(payload, bucket) {
1978
+ if (!payload) return;
1979
+ if (Array.isArray(payload)) {
1980
+ payload.forEach((entry) => collectCandidates2(entry, bucket));
1981
+ return;
1982
+ }
1983
+ if (typeof payload !== "object") {
1984
+ return;
1985
+ }
1986
+ if (isRecipeNode(payload)) {
1987
+ bucket.push(payload);
1988
+ return;
1989
+ }
1990
+ const graph = payload["@graph"];
1991
+ if (Array.isArray(graph)) {
1992
+ graph.forEach((entry) => collectCandidates2(entry, bucket));
1993
+ }
1994
+ }
1995
+
1856
1996
  // src/scraper/extractors/index.ts
1997
+ function isBrowser() {
1998
+ try {
1999
+ return typeof globalThis.DOMParser !== "undefined";
2000
+ } catch {
2001
+ return false;
2002
+ }
2003
+ }
1857
2004
  function extractRecipe(html) {
2005
+ if (isBrowser()) {
2006
+ return extractRecipeBrowser(html);
2007
+ }
1858
2008
  const jsonLdRecipe = extractJsonLd(html);
2009
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2010
+ try {
2011
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2012
+ if (globalFetch) {
2013
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
2014
+ });
2015
+ }
2016
+ } catch {
2017
+ }
2018
+ }
1859
2019
  if (jsonLdRecipe) {
1860
2020
  return { recipe: jsonLdRecipe, source: "jsonld" };
1861
2021
  }
1862
2022
  const microdataRecipe = extractMicrodata(html);
2023
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2024
+ try {
2025
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2026
+ if (globalFetch) {
2027
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
2028
+ });
2029
+ }
2030
+ } catch {
2031
+ }
2032
+ }
1863
2033
  if (microdataRecipe) {
1864
2034
  return { recipe: microdataRecipe, source: "microdata" };
1865
2035
  }
@@ -1868,12 +2038,52 @@ function extractRecipe(html) {
1868
2038
 
1869
2039
  // src/scraper/index.ts
1870
2040
  async function scrapeRecipe(url, options = {}) {
2041
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2042
+ try {
2043
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2044
+ if (globalFetch) {
2045
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
2046
+ });
2047
+ }
2048
+ } catch {
2049
+ }
2050
+ }
1871
2051
  const html = await fetchPage(url, options);
2052
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2053
+ try {
2054
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2055
+ if (globalFetch) {
2056
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
2057
+ });
2058
+ }
2059
+ } catch {
2060
+ }
2061
+ }
1872
2062
  const { recipe } = extractRecipe(html);
2063
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2064
+ try {
2065
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2066
+ if (globalFetch) {
2067
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
2068
+ });
2069
+ }
2070
+ } catch {
2071
+ }
2072
+ }
1873
2073
  if (!recipe) {
1874
2074
  throw new Error("No Schema.org recipe data found in page");
1875
2075
  }
1876
2076
  const soustackRecipe = fromSchemaOrg(recipe);
2077
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2078
+ try {
2079
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2080
+ if (globalFetch) {
2081
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
2082
+ });
2083
+ }
2084
+ } catch {
2085
+ }
2086
+ }
1877
2087
  if (!soustackRecipe) {
1878
2088
  throw new Error("Schema.org data did not include a valid recipe");
1879
2089
  }