soustack 0.2.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,26 @@
1
1
  /**
2
- * Soustack Recipe Schema v0.2.1
2
+ * Soustack Recipe Schema v0.3.0
3
3
  * A portable, scalable, interoperable recipe format.
4
4
  */
5
5
  interface SoustackRecipe {
6
+ /** Document marker for Soustack recipes */
7
+ '@type'?: 'Recipe';
6
8
  /** Optional $schema pointer for profile-aware validation */
7
9
  $schema?: string;
10
+ /** Optional declared validation profile */
11
+ profile?: string;
12
+ /** Recipe level: "lite" or "base" */
13
+ level?: "lite" | "base";
14
+ /** Stack declarations as a map: Record<stackName, versionNumber> */
15
+ stacks?: Record<string, number>;
16
+ /** Attribution stack payload */
17
+ attribution?: AttributionModule;
18
+ /** Taxonomy stack payload */
19
+ taxonomy?: TaxonomyModule;
20
+ /** Media stack payload */
21
+ media?: MediaModule;
22
+ /** Times stack payload */
23
+ times?: TimesModule;
8
24
  /** Unique identifier (slug or UUID) */
9
25
  id?: string;
10
26
  /** Optional display title */
@@ -190,15 +206,27 @@ interface Alternative {
190
206
  dietary?: string[];
191
207
  }
192
208
  interface NutritionFacts {
193
- calories?: string;
194
- fatContent?: string;
195
- carbohydrateContent?: string;
196
- proteinContent?: string;
197
- fiberContent?: string;
198
- sugarContent?: string;
199
- sodiumContent?: string;
200
- servingSize?: string;
201
- [key: string]: string | number | null | string[] | undefined;
209
+ calories?: number;
210
+ protein_g?: number;
211
+ }
212
+ interface AttributionModule {
213
+ url?: string;
214
+ author?: string;
215
+ datePublished?: string;
216
+ }
217
+ interface TaxonomyModule {
218
+ keywords?: string[];
219
+ category?: string;
220
+ cuisine?: string;
221
+ }
222
+ interface MediaModule {
223
+ images?: string[];
224
+ videos?: string[];
225
+ }
226
+ interface TimesModule {
227
+ prepMinutes?: number;
228
+ cookMinutes?: number;
229
+ totalMinutes?: number;
202
230
  }
203
231
 
204
232
  interface HowToStep {
@@ -1,10 +1,26 @@
1
1
  /**
2
- * Soustack Recipe Schema v0.2.1
2
+ * Soustack Recipe Schema v0.3.0
3
3
  * A portable, scalable, interoperable recipe format.
4
4
  */
5
5
  interface SoustackRecipe {
6
+ /** Document marker for Soustack recipes */
7
+ '@type'?: 'Recipe';
6
8
  /** Optional $schema pointer for profile-aware validation */
7
9
  $schema?: string;
10
+ /** Optional declared validation profile */
11
+ profile?: string;
12
+ /** Recipe level: "lite" or "base" */
13
+ level?: "lite" | "base";
14
+ /** Stack declarations as a map: Record<stackName, versionNumber> */
15
+ stacks?: Record<string, number>;
16
+ /** Attribution stack payload */
17
+ attribution?: AttributionModule;
18
+ /** Taxonomy stack payload */
19
+ taxonomy?: TaxonomyModule;
20
+ /** Media stack payload */
21
+ media?: MediaModule;
22
+ /** Times stack payload */
23
+ times?: TimesModule;
8
24
  /** Unique identifier (slug or UUID) */
9
25
  id?: string;
10
26
  /** Optional display title */
@@ -190,15 +206,27 @@ interface Alternative {
190
206
  dietary?: string[];
191
207
  }
192
208
  interface NutritionFacts {
193
- calories?: string;
194
- fatContent?: string;
195
- carbohydrateContent?: string;
196
- proteinContent?: string;
197
- fiberContent?: string;
198
- sugarContent?: string;
199
- sodiumContent?: string;
200
- servingSize?: string;
201
- [key: string]: string | number | null | string[] | undefined;
209
+ calories?: number;
210
+ protein_g?: number;
211
+ }
212
+ interface AttributionModule {
213
+ url?: string;
214
+ author?: string;
215
+ datePublished?: string;
216
+ }
217
+ interface TaxonomyModule {
218
+ keywords?: string[];
219
+ category?: string;
220
+ cuisine?: string;
221
+ }
222
+ interface MediaModule {
223
+ images?: string[];
224
+ videos?: string[];
225
+ }
226
+ interface TimesModule {
227
+ prepMinutes?: number;
228
+ cookMinutes?: number;
229
+ totalMinutes?: number;
202
230
  }
203
231
 
204
232
  interface HowToStep {
@@ -130,6 +130,92 @@ function extractUrl(value) {
130
130
  return trimmed || void 0;
131
131
  }
132
132
 
133
+ // src/normalize.ts
134
+ function normalizeRecipe(input) {
135
+ if (!input || typeof input !== "object") {
136
+ throw new Error("Recipe input must be an object");
137
+ }
138
+ const recipe = JSON.parse(JSON.stringify(input));
139
+ const warnings = [];
140
+ const legacyField = ["mod", "ules"].join("");
141
+ if (legacyField in recipe) {
142
+ throw new Error("The legacy field is no longer supported. Use `stacks` instead.");
143
+ }
144
+ normalizeStacks(recipe, warnings);
145
+ if (!recipe.stacks) {
146
+ recipe.stacks = {};
147
+ }
148
+ if (recipe && typeof recipe === "object" && "version" in recipe && !recipe.recipeVersion && typeof recipe.version === "string") {
149
+ recipe.recipeVersion = recipe.version;
150
+ warnings.push("'version' is deprecated; mapped to 'recipeVersion'.");
151
+ }
152
+ normalizeTime(recipe);
153
+ return {
154
+ recipe,
155
+ warnings
156
+ };
157
+ }
158
+ function normalizeStacks(recipe, warnings) {
159
+ let stacks = {};
160
+ if (recipe.stacks && typeof recipe.stacks === "object" && !Array.isArray(recipe.stacks)) {
161
+ for (const [key, value] of Object.entries(recipe.stacks)) {
162
+ if (typeof value === "number" && Number.isInteger(value) && value >= 1) {
163
+ stacks[key] = value;
164
+ } else {
165
+ warnings.push(`Invalid stack version for '${key}': expected positive integer, got ${value}`);
166
+ }
167
+ }
168
+ }
169
+ if (Array.isArray(recipe.stacks)) {
170
+ const stackIdentifiers = recipe.stacks.filter((s) => typeof s === "string");
171
+ for (const identifier of stackIdentifiers) {
172
+ const parsed = parseStackIdentifier(identifier);
173
+ if (parsed) {
174
+ const { name, version } = parsed;
175
+ if (!stacks[name] || stacks[name] < version) {
176
+ stacks[name] = version;
177
+ }
178
+ } else {
179
+ warnings.push(`Invalid stack identifier '${identifier}': expected format 'name@version' (e.g., 'scaling@1')`);
180
+ }
181
+ }
182
+ }
183
+ recipe.stacks = stacks;
184
+ }
185
+ function parseStackIdentifier(identifier) {
186
+ if (typeof identifier !== "string" || !identifier.trim()) {
187
+ return null;
188
+ }
189
+ const match = identifier.trim().match(/^([a-z0-9_-]+)@(\d+)$/i);
190
+ if (!match) {
191
+ return null;
192
+ }
193
+ const [, name, versionStr] = match;
194
+ const version = parseInt(versionStr, 10);
195
+ if (isNaN(version) || version < 1) {
196
+ return null;
197
+ }
198
+ return { name, version };
199
+ }
200
+ function normalizeTime(recipe) {
201
+ const time = recipe?.time;
202
+ if (!time || typeof time !== "object" || Array.isArray(time)) return;
203
+ const structuredKeys = [
204
+ "prep",
205
+ "active",
206
+ "passive",
207
+ "total"
208
+ ];
209
+ structuredKeys.forEach((key) => {
210
+ const value = time[key];
211
+ if (typeof value === "number") return;
212
+ const parsed = parseDuration(value);
213
+ if (parsed !== null) {
214
+ time[key] = parsed;
215
+ }
216
+ });
217
+ }
218
+
133
219
  // src/fromSchemaOrg.ts
134
220
  function fromSchemaOrg(input) {
135
221
  const recipeNode = extractRecipeNode(input);
@@ -143,8 +229,22 @@ function fromSchemaOrg(input) {
143
229
  const tags = collectTags(recipeNode.recipeCuisine, recipeNode.keywords);
144
230
  const category = extractFirst(recipeNode.recipeCategory);
145
231
  const source = convertSource(recipeNode);
146
- const nutrition = recipeNode.nutrition && typeof recipeNode.nutrition === "object" ? recipeNode.nutrition : void 0;
147
- return {
232
+ const dateModified = recipeNode.dateModified || void 0;
233
+ const nutrition = convertNutrition(recipeNode.nutrition);
234
+ const attribution = convertAttribution(recipeNode);
235
+ const taxonomy = convertTaxonomy(tags, category, extractFirst(recipeNode.recipeCuisine));
236
+ const media = convertMedia(recipeNode.image, recipeNode.video);
237
+ const times = convertTimes(time);
238
+ const stacks = {};
239
+ if (attribution) stacks.attribution = 1;
240
+ if (taxonomy) stacks.taxonomy = 1;
241
+ if (media) stacks.media = 1;
242
+ if (nutrition) stacks.nutrition = 1;
243
+ if (times) stacks.times = 1;
244
+ const rawRecipe = {
245
+ "@type": "Recipe",
246
+ profile: "minimal",
247
+ stacks,
148
248
  name: recipeNode.name.trim(),
149
249
  description: recipeNode.description?.trim() || void 0,
150
250
  image: normalizeImage(recipeNode.image),
@@ -152,13 +252,19 @@ function fromSchemaOrg(input) {
152
252
  tags: tags.length ? tags : void 0,
153
253
  source,
154
254
  dateAdded: recipeNode.datePublished || void 0,
155
- dateModified: recipeNode.dateModified || void 0,
156
255
  yield: recipeYield,
157
256
  time,
158
257
  ingredients,
159
258
  instructions,
160
- nutrition
259
+ ...dateModified ? { dateModified } : {},
260
+ ...nutrition ? { nutrition } : {},
261
+ ...attribution ? { attribution } : {},
262
+ ...taxonomy ? { taxonomy } : {},
263
+ ...media ? { media } : {},
264
+ ...times ? { times } : {}
161
265
  };
266
+ const { recipe } = normalizeRecipe(rawRecipe);
267
+ return recipe;
162
268
  }
163
269
  function extractRecipeNode(input) {
164
270
  if (!input) return null;
@@ -370,6 +476,90 @@ function extractEntityName(value) {
370
476
  }
371
477
  return void 0;
372
478
  }
479
+ function convertAttribution(recipe) {
480
+ const attribution = {};
481
+ const url = (recipe.url || recipe.mainEntityOfPage)?.trim();
482
+ const author = extractEntityName(recipe.author);
483
+ const datePublished = recipe.datePublished?.trim();
484
+ if (url) attribution.url = url;
485
+ if (author) attribution.author = author;
486
+ if (datePublished) attribution.datePublished = datePublished;
487
+ return Object.keys(attribution).length ? attribution : void 0;
488
+ }
489
+ function convertTaxonomy(keywords, category, cuisine) {
490
+ const taxonomy = {};
491
+ if (keywords.length) taxonomy.keywords = keywords;
492
+ if (category) taxonomy.category = category;
493
+ if (cuisine) taxonomy.cuisine = cuisine;
494
+ return Object.keys(taxonomy).length ? taxonomy : void 0;
495
+ }
496
+ function normalizeMediaList(value) {
497
+ if (!value) return [];
498
+ if (typeof value === "string") return [value.trim()].filter(Boolean);
499
+ if (Array.isArray(value)) {
500
+ return value.map((item) => typeof item === "string" ? item.trim() : extractMediaUrl(item)).filter((entry) => Boolean(entry?.length));
501
+ }
502
+ const url = extractMediaUrl(value);
503
+ return url ? [url] : [];
504
+ }
505
+ function extractMediaUrl(value) {
506
+ if (value && typeof value === "object" && "url" in value && typeof value.url === "string") {
507
+ const trimmed = value.url.trim();
508
+ return trimmed || void 0;
509
+ }
510
+ return void 0;
511
+ }
512
+ function convertMedia(image, video) {
513
+ const normalizedImage = normalizeImage(image);
514
+ const images = normalizedImage ? Array.isArray(normalizedImage) ? normalizedImage : [normalizedImage] : [];
515
+ const videos = normalizeMediaList(video);
516
+ const media = {};
517
+ if (images.length) media.images = images;
518
+ if (videos.length) media.videos = videos;
519
+ return Object.keys(media).length ? media : void 0;
520
+ }
521
+ function convertTimes(time) {
522
+ if (!time) return void 0;
523
+ const times = {};
524
+ if (typeof time.prep === "number") times.prepMinutes = time.prep;
525
+ if (typeof time.active === "number") times.cookMinutes = time.active;
526
+ if (typeof time.total === "number") times.totalMinutes = time.total;
527
+ return Object.keys(times).length ? times : void 0;
528
+ }
529
+ function convertNutrition(nutrition) {
530
+ if (!nutrition || typeof nutrition !== "object") {
531
+ return void 0;
532
+ }
533
+ const result = {};
534
+ let hasData = false;
535
+ if ("calories" in nutrition) {
536
+ const calories = nutrition.calories;
537
+ if (typeof calories === "number") {
538
+ result.calories = calories;
539
+ hasData = true;
540
+ } else if (typeof calories === "string") {
541
+ const parsed = parseFloat(calories.replace(/[^\d.-]/g, ""));
542
+ if (!isNaN(parsed)) {
543
+ result.calories = parsed;
544
+ hasData = true;
545
+ }
546
+ }
547
+ }
548
+ if ("proteinContent" in nutrition || "protein_g" in nutrition) {
549
+ const protein = nutrition.proteinContent || nutrition.protein_g;
550
+ if (typeof protein === "number") {
551
+ result.protein_g = protein;
552
+ hasData = true;
553
+ } else if (typeof protein === "string") {
554
+ const parsed = parseFloat(protein.replace(/[^\d.-]/g, ""));
555
+ if (!isNaN(parsed)) {
556
+ result.protein_g = parsed;
557
+ hasData = true;
558
+ }
559
+ }
560
+ }
561
+ return hasData ? result : void 0;
562
+ }
373
563
 
374
564
  // src/scraper/fetch.ts
375
565
  var DEFAULT_USER_AGENTS = [
@@ -435,13 +625,16 @@ async function fetchPage(url, options = {}) {
435
625
  const response = await resolvedFetch(url, requestInit);
436
626
  clearTimeout(timeoutId);
437
627
  if (response && typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
438
- try {
439
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
440
- if (globalFetch) {
441
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:63", message: "fetch response", data: { url, status: response.status, statusText: response.statusText, ok: response.ok, isNYTimes: url.includes("nytimes.com") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
442
- });
628
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
629
+ if (ingestUrl) {
630
+ try {
631
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
632
+ if (globalFetch) {
633
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:63", message: "fetch response", data: { url, status: response.status, statusText: response.statusText, ok: response.ok, isNYTimes: url.includes("nytimes.com") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
634
+ });
635
+ }
636
+ } catch {
443
637
  }
444
- } catch {
445
638
  }
446
639
  }
447
640
  if (!response.ok) {
@@ -453,13 +646,16 @@ async function fetchPage(url, options = {}) {
453
646
  }
454
647
  const html = await response.text();
455
648
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
456
- try {
457
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
458
- if (globalFetch) {
459
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:75", message: "HTML received", data: { htmlLength: html.length, hasLoginPage: html.toLowerCase().includes("login") || html.toLowerCase().includes("sign in"), hasRecipeData: html.includes("application/ld+json") || html.includes("schema.org/Recipe") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B,D" }) }).catch(() => {
460
- });
649
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
650
+ if (ingestUrl) {
651
+ try {
652
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
653
+ if (globalFetch) {
654
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/fetch.ts:75", message: "HTML received", data: { htmlLength: html.length, hasLoginPage: html.toLowerCase().includes("login") || html.toLowerCase().includes("sign in"), hasRecipeData: html.includes("application/ld+json") || html.includes("schema.org/Recipe") }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B,D" }) }).catch(() => {
655
+ });
656
+ }
657
+ } catch {
461
658
  }
462
- } catch {
463
659
  }
464
660
  }
465
661
  return html;
@@ -489,8 +685,6 @@ function isRecipeNode(value) {
489
685
  return false;
490
686
  }
491
687
  const type = value["@type"];
492
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/utils.ts:14", message: "isRecipeNode check", data: { type, typeLower: typeof type === "string" ? type.toLowerCase() : Array.isArray(type) ? type.map((t) => typeof t === "string" ? t.toLowerCase() : t) : void 0, isMatch: typeof type === "string" ? RECIPE_TYPES.has(type.toLowerCase()) : Array.isArray(type) ? type.some((e) => typeof e === "string" && RECIPE_TYPES.has(e.toLowerCase())) : false }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
493
- });
494
688
  if (typeof type === "string") {
495
689
  return RECIPE_TYPES.has(type.toLowerCase());
496
690
  }
@@ -518,20 +712,14 @@ function normalizeText(value) {
518
712
  function extractJsonLd(html) {
519
713
  const $ = cheerio.load(html);
520
714
  const scripts = $('script[type="application/ld+json"]');
521
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:8", message: "JSON-LD scripts found", data: { scriptCount: scripts.length }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
522
- });
523
715
  const candidates = [];
524
716
  scripts.each((_, element) => {
525
717
  const content = $(element).html();
526
718
  if (!content) return;
527
719
  const parsed = safeJsonParse(content);
528
720
  if (!parsed) return;
529
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:18", message: "JSON-LD parsed", data: { hasGraph: !!(parsed && typeof parsed === "object" && "@graph" in parsed), type: parsed && typeof parsed === "object" && "@type" in parsed ? parsed["@type"] : void 0 }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
530
- });
531
721
  collectCandidates(parsed, candidates);
532
722
  });
533
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/jsonld.ts:22", message: "JSON-LD candidates", data: { candidateCount: candidates.length, candidateTypes: candidates.map((c) => c["@type"]) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C" }) }).catch(() => {
534
- });
535
723
  return candidates[0] ?? null;
536
724
  }
537
725
  function collectCandidates(payload, bucket) {
@@ -713,13 +901,16 @@ function extractRecipe(html) {
713
901
  }
714
902
  const jsonLdRecipe = extractJsonLd(html);
715
903
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
716
- try {
717
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
718
- if (globalFetch) {
719
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
720
- });
904
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
905
+ if (ingestUrl) {
906
+ try {
907
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
908
+ if (globalFetch) {
909
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
910
+ });
911
+ }
912
+ } catch {
721
913
  }
722
- } catch {
723
914
  }
724
915
  }
725
916
  if (jsonLdRecipe) {
@@ -727,13 +918,16 @@ function extractRecipe(html) {
727
918
  }
728
919
  const microdataRecipe = extractMicrodata(html);
729
920
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
730
- try {
731
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
732
- if (globalFetch) {
733
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
734
- });
921
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
922
+ if (ingestUrl) {
923
+ try {
924
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
925
+ if (globalFetch) {
926
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
927
+ });
928
+ }
929
+ } catch {
735
930
  }
736
- } catch {
737
931
  }
738
932
  }
739
933
  if (microdataRecipe) {
@@ -745,35 +939,44 @@ function extractRecipe(html) {
745
939
  // src/scraper/index.ts
746
940
  async function scrapeRecipe(url, options = {}) {
747
941
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
748
- try {
749
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
750
- if (globalFetch) {
751
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
752
- });
942
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
943
+ if (ingestUrl) {
944
+ try {
945
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
946
+ if (globalFetch) {
947
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
948
+ });
949
+ }
950
+ } catch {
753
951
  }
754
- } catch {
755
952
  }
756
953
  }
757
954
  const html = await fetchPage(url, options);
758
955
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
759
- try {
760
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
761
- if (globalFetch) {
762
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
763
- });
956
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
957
+ if (ingestUrl) {
958
+ try {
959
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
960
+ if (globalFetch) {
961
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
962
+ });
963
+ }
964
+ } catch {
764
965
  }
765
- } catch {
766
966
  }
767
967
  }
768
968
  const { recipe } = extractRecipe(html);
769
969
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
770
- try {
771
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
772
- if (globalFetch) {
773
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
774
- });
970
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
971
+ if (ingestUrl) {
972
+ try {
973
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
974
+ if (globalFetch) {
975
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
976
+ });
977
+ }
978
+ } catch {
775
979
  }
776
- } catch {
777
980
  }
778
981
  }
779
982
  if (!recipe) {
@@ -781,13 +984,16 @@ async function scrapeRecipe(url, options = {}) {
781
984
  }
782
985
  const soustackRecipe = fromSchemaOrg(recipe);
783
986
  if (typeof process !== "undefined" && process.env.NODE_ENV !== "test") {
784
- try {
785
- const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
786
- if (globalFetch) {
787
- globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
788
- });
987
+ const ingestUrl = process.env.SOUSTACK_DEBUG_INGEST_URL;
988
+ if (ingestUrl) {
989
+ try {
990
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
991
+ if (globalFetch) {
992
+ globalFetch(ingestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
993
+ });
994
+ }
995
+ } catch {
789
996
  }
790
- } catch {
791
997
  }
792
998
  }
793
999
  if (!soustackRecipe) {
@@ -815,5 +1021,5 @@ exports.extractRecipeFromHTML = extractRecipeFromHTML;
815
1021
  exports.extractSchemaOrgRecipeFromHTML = extractSchemaOrgRecipeFromHTML;
816
1022
  exports.fetchPage = fetchPage;
817
1023
  exports.scrapeRecipe = scrapeRecipe;
818
- //# sourceMappingURL=scrape.js.map
819
- //# sourceMappingURL=scrape.js.map
1024
+ //# sourceMappingURL=index.js.map
1025
+ //# sourceMappingURL=index.js.map