soustack 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -245,7 +245,7 @@ declare function validateRecipe(data: any): data is Recipe;
245
245
 
246
246
  declare function fromSchemaOrg(input: unknown): Recipe | null;
247
247
 
248
- interface SchemaOrgRecipe {
248
+ interface SchemaOrgRecipe$1 {
249
249
  '@context'?: string;
250
250
  '@type'?: string | string[];
251
251
  name: string;
@@ -270,7 +270,7 @@ interface SchemaOrgRecipe {
270
270
  '@graph'?: unknown;
271
271
  }
272
272
  type SchemaOrgIngredientList = string | string[];
273
- type SchemaOrgInstructionList = string | HowToStep | HowToSection | Array<string | HowToStep | HowToSection>;
273
+ type SchemaOrgInstructionList = string | HowToStep$1 | HowToSection | Array<string | HowToStep$1 | HowToSection>;
274
274
  interface SchemaOrgImageObject {
275
275
  '@type'?: string;
276
276
  url?: string;
@@ -282,7 +282,7 @@ interface SchemaOrgYield {
282
282
  unit?: string;
283
283
  description?: string;
284
284
  }
285
- interface HowToStep {
285
+ interface HowToStep$1 {
286
286
  '@type': 'HowToStep';
287
287
  text?: string;
288
288
  name?: string;
@@ -292,7 +292,7 @@ interface HowToStep {
292
292
  interface HowToSection {
293
293
  '@type': 'HowToSection';
294
294
  name: string;
295
- itemListElement: Array<string | HowToStep | HowToSection>;
295
+ itemListElement: Array<string | HowToStep$1 | HowToSection>;
296
296
  }
297
297
  interface SchemaOrgPersonOrOrganization {
298
298
  '@type'?: 'Person' | 'Organization';
@@ -303,8 +303,30 @@ interface NutritionInformation {
303
303
  [key: string]: string | number | null | undefined;
304
304
  }
305
305
 
306
- declare function toSchemaOrg(recipe: Recipe): SchemaOrgRecipe;
306
+ declare function toSchemaOrg(recipe: Recipe): SchemaOrgRecipe$1;
307
307
 
308
+ interface HowToStep {
309
+ '@type'?: 'HowToStep' | 'HowToSection' | string;
310
+ name?: string;
311
+ text?: string;
312
+ itemListElement?: Array<string | HowToStep>;
313
+ }
314
+ interface SchemaOrgRecipe {
315
+ '@type': string | string[];
316
+ name?: string;
317
+ description?: string;
318
+ image?: string | string[];
319
+ recipeIngredient?: string[];
320
+ recipeInstructions?: Array<string | HowToStep>;
321
+ recipeYield?: string | number;
322
+ prepTime?: string;
323
+ cookTime?: string;
324
+ totalTime?: string;
325
+ author?: unknown;
326
+ datePublished?: string;
327
+ aggregateRating?: unknown;
328
+ [key: string]: unknown;
329
+ }
308
330
  interface FetchRequestInit {
309
331
  headers?: Record<string, string>;
310
332
  signal?: AbortSignal;
@@ -357,6 +379,33 @@ declare function scrapeRecipe(url: string, options?: ScrapeRecipeOptions): Promi
357
379
  * @throws Error if no recipe is found
358
380
  */
359
381
  declare function extractRecipeFromHTML(html: string): Recipe;
382
+ /**
383
+ * Extract Schema.org recipe data from HTML string (browser-compatible).
384
+ *
385
+ * Returns the raw Schema.org recipe object, which can then be converted
386
+ * to Soustack format using fromSchemaOrg(). This gives you access to the
387
+ * original Schema.org data for inspection, debugging, or custom transformations.
388
+ *
389
+ * @param html - HTML string containing Schema.org recipe data
390
+ * @returns Schema.org recipe object, or null if not found
391
+ *
392
+ * @example
393
+ * ```ts
394
+ * // In browser:
395
+ * const response = await fetch('https://example.com/recipe');
396
+ * const html = await response.text();
397
+ * const schemaOrgRecipe = extractSchemaOrgRecipeFromHTML(html);
398
+ *
399
+ * if (schemaOrgRecipe) {
400
+ * // Inspect or modify Schema.org data before converting
401
+ * console.log('Found recipe:', schemaOrgRecipe.name);
402
+ *
403
+ * // Convert to Soustack format
404
+ * const soustackRecipe = fromSchemaOrg(schemaOrgRecipe);
405
+ * }
406
+ * ```
407
+ */
408
+ declare function extractSchemaOrgRecipeFromHTML(html: string): SchemaOrgRecipe | null;
360
409
 
361
410
  declare function normalizeIngredientInput(input: string): string;
362
411
  declare function parseIngredient(text: string): ParsedIngredient;
@@ -376,4 +425,4 @@ declare function normalizeYield(text: string): string;
376
425
  declare function parseYield(text: string): ParsedYield | null;
377
426
  declare function formatYield(value: ParsedYield): string;
378
427
 
379
- export { type Alternative, type ComputedIngredient, type ComputedInstruction, type ComputedRecipe, type Equipment, type FrozenStorageMethod, type Ingredient, type IngredientItem, type IngredientSubsection, type Instruction, type InstructionItem, type InstructionSubsection, type MakeAheadComponent, type NutritionFacts, type ParsedIngredient, type ParsedYield, type Quantity, type Recipe, type Scaling, type ScalingBakersPercentage, type ScalingBase, type ScalingDiscrete, type ScalingFixed, type ScalingLinear, type ScalingProportional, type SimpleTime, type Source, type StepTiming, type Storage, type StorageMethod, type StructuredTime, type Substitution, type Time, type Yield, extractRecipeFromHTML, formatDuration, formatYield, fromSchemaOrg, normalizeIngredientInput, normalizeYield, parseDuration, parseHumanDuration, parseIngredient, parseIngredientLine, parseIngredients, parseYield, scaleRecipe, scrapeRecipe, smartParseDuration, toSchemaOrg, validateRecipe };
428
+ export { type Alternative, type ComputedIngredient, type ComputedInstruction, type ComputedRecipe, type Equipment, type FrozenStorageMethod, type Ingredient, type IngredientItem, type IngredientSubsection, type Instruction, type InstructionItem, type InstructionSubsection, type MakeAheadComponent, type NutritionFacts, type ParsedIngredient, type ParsedYield, type Quantity, type Recipe, type Scaling, type ScalingBakersPercentage, type ScalingBase, type ScalingDiscrete, type ScalingFixed, type ScalingLinear, type ScalingProportional, type SchemaOrgRecipe, type SimpleTime, type Source, type StepTiming, type Storage, type StorageMethod, type StructuredTime, type Substitution, type Time, type Yield, extractRecipeFromHTML, extractSchemaOrgRecipeFromHTML, formatDuration, formatYield, fromSchemaOrg, normalizeIngredientInput, normalizeYield, parseDuration, parseHumanDuration, parseIngredient, parseIngredientLine, parseIngredients, parseYield, scaleRecipe, scrapeRecipe, smartParseDuration, toSchemaOrg, validateRecipe };
package/dist/index.d.ts CHANGED
@@ -245,7 +245,7 @@ declare function validateRecipe(data: any): data is Recipe;
245
245
 
246
246
  declare function fromSchemaOrg(input: unknown): Recipe | null;
247
247
 
248
- interface SchemaOrgRecipe {
248
+ interface SchemaOrgRecipe$1 {
249
249
  '@context'?: string;
250
250
  '@type'?: string | string[];
251
251
  name: string;
@@ -270,7 +270,7 @@ interface SchemaOrgRecipe {
270
270
  '@graph'?: unknown;
271
271
  }
272
272
  type SchemaOrgIngredientList = string | string[];
273
- type SchemaOrgInstructionList = string | HowToStep | HowToSection | Array<string | HowToStep | HowToSection>;
273
+ type SchemaOrgInstructionList = string | HowToStep$1 | HowToSection | Array<string | HowToStep$1 | HowToSection>;
274
274
  interface SchemaOrgImageObject {
275
275
  '@type'?: string;
276
276
  url?: string;
@@ -282,7 +282,7 @@ interface SchemaOrgYield {
282
282
  unit?: string;
283
283
  description?: string;
284
284
  }
285
- interface HowToStep {
285
+ interface HowToStep$1 {
286
286
  '@type': 'HowToStep';
287
287
  text?: string;
288
288
  name?: string;
@@ -292,7 +292,7 @@ interface HowToStep {
292
292
  interface HowToSection {
293
293
  '@type': 'HowToSection';
294
294
  name: string;
295
- itemListElement: Array<string | HowToStep | HowToSection>;
295
+ itemListElement: Array<string | HowToStep$1 | HowToSection>;
296
296
  }
297
297
  interface SchemaOrgPersonOrOrganization {
298
298
  '@type'?: 'Person' | 'Organization';
@@ -303,8 +303,30 @@ interface NutritionInformation {
303
303
  [key: string]: string | number | null | undefined;
304
304
  }
305
305
 
306
- declare function toSchemaOrg(recipe: Recipe): SchemaOrgRecipe;
306
+ declare function toSchemaOrg(recipe: Recipe): SchemaOrgRecipe$1;
307
307
 
308
+ interface HowToStep {
309
+ '@type'?: 'HowToStep' | 'HowToSection' | string;
310
+ name?: string;
311
+ text?: string;
312
+ itemListElement?: Array<string | HowToStep>;
313
+ }
314
+ interface SchemaOrgRecipe {
315
+ '@type': string | string[];
316
+ name?: string;
317
+ description?: string;
318
+ image?: string | string[];
319
+ recipeIngredient?: string[];
320
+ recipeInstructions?: Array<string | HowToStep>;
321
+ recipeYield?: string | number;
322
+ prepTime?: string;
323
+ cookTime?: string;
324
+ totalTime?: string;
325
+ author?: unknown;
326
+ datePublished?: string;
327
+ aggregateRating?: unknown;
328
+ [key: string]: unknown;
329
+ }
308
330
  interface FetchRequestInit {
309
331
  headers?: Record<string, string>;
310
332
  signal?: AbortSignal;
@@ -357,6 +379,33 @@ declare function scrapeRecipe(url: string, options?: ScrapeRecipeOptions): Promi
357
379
  * @throws Error if no recipe is found
358
380
  */
359
381
  declare function extractRecipeFromHTML(html: string): Recipe;
382
+ /**
383
+ * Extract Schema.org recipe data from HTML string (browser-compatible).
384
+ *
385
+ * Returns the raw Schema.org recipe object, which can then be converted
386
+ * to Soustack format using fromSchemaOrg(). This gives you access to the
387
+ * original Schema.org data for inspection, debugging, or custom transformations.
388
+ *
389
+ * @param html - HTML string containing Schema.org recipe data
390
+ * @returns Schema.org recipe object, or null if not found
391
+ *
392
+ * @example
393
+ * ```ts
394
+ * // In browser:
395
+ * const response = await fetch('https://example.com/recipe');
396
+ * const html = await response.text();
397
+ * const schemaOrgRecipe = extractSchemaOrgRecipeFromHTML(html);
398
+ *
399
+ * if (schemaOrgRecipe) {
400
+ * // Inspect or modify Schema.org data before converting
401
+ * console.log('Found recipe:', schemaOrgRecipe.name);
402
+ *
403
+ * // Convert to Soustack format
404
+ * const soustackRecipe = fromSchemaOrg(schemaOrgRecipe);
405
+ * }
406
+ * ```
407
+ */
408
+ declare function extractSchemaOrgRecipeFromHTML(html: string): SchemaOrgRecipe | null;
360
409
 
361
410
  declare function normalizeIngredientInput(input: string): string;
362
411
  declare function parseIngredient(text: string): ParsedIngredient;
@@ -376,4 +425,4 @@ declare function normalizeYield(text: string): string;
376
425
  declare function parseYield(text: string): ParsedYield | null;
377
426
  declare function formatYield(value: ParsedYield): string;
378
427
 
379
- export { type Alternative, type ComputedIngredient, type ComputedInstruction, type ComputedRecipe, type Equipment, type FrozenStorageMethod, type Ingredient, type IngredientItem, type IngredientSubsection, type Instruction, type InstructionItem, type InstructionSubsection, type MakeAheadComponent, type NutritionFacts, type ParsedIngredient, type ParsedYield, type Quantity, type Recipe, type Scaling, type ScalingBakersPercentage, type ScalingBase, type ScalingDiscrete, type ScalingFixed, type ScalingLinear, type ScalingProportional, type SimpleTime, type Source, type StepTiming, type Storage, type StorageMethod, type StructuredTime, type Substitution, type Time, type Yield, extractRecipeFromHTML, formatDuration, formatYield, fromSchemaOrg, normalizeIngredientInput, normalizeYield, parseDuration, parseHumanDuration, parseIngredient, parseIngredientLine, parseIngredients, parseYield, scaleRecipe, scrapeRecipe, smartParseDuration, toSchemaOrg, validateRecipe };
428
+ export { type Alternative, type ComputedIngredient, type ComputedInstruction, type ComputedRecipe, type Equipment, type FrozenStorageMethod, type Ingredient, type IngredientItem, type IngredientSubsection, type Instruction, type InstructionItem, type InstructionSubsection, type MakeAheadComponent, type NutritionFacts, type ParsedIngredient, type ParsedYield, type Quantity, type Recipe, type Scaling, type ScalingBakersPercentage, type ScalingBase, type ScalingDiscrete, type ScalingFixed, type ScalingLinear, type ScalingProportional, type SchemaOrgRecipe, type SimpleTime, type Source, type StepTiming, type Storage, type StorageMethod, type StructuredTime, type Substitution, type Time, type Yield, extractRecipeFromHTML, extractSchemaOrgRecipeFromHTML, formatDuration, formatYield, fromSchemaOrg, normalizeIngredientInput, normalizeYield, parseDuration, parseHumanDuration, parseIngredient, parseIngredientLine, parseIngredients, parseYield, scaleRecipe, scrapeRecipe, smartParseDuration, toSchemaOrg, validateRecipe };
package/dist/index.js CHANGED
@@ -1990,14 +1990,30 @@ function extractRecipe(html) {
1990
1990
  return extractRecipeBrowser(html);
1991
1991
  }
1992
1992
  const jsonLdRecipe = extractJsonLd(html);
1993
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
1994
- });
1993
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
1994
+ try {
1995
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
1996
+ if (globalFetch) {
1997
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:6", message: "JSON-LD extraction result", data: { hasJsonLd: !!jsonLdRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "C,D" }) }).catch(() => {
1998
+ });
1999
+ }
2000
+ } catch {
2001
+ }
2002
+ }
1995
2003
  if (jsonLdRecipe) {
1996
2004
  return { recipe: jsonLdRecipe, source: "jsonld" };
1997
2005
  }
1998
2006
  const microdataRecipe = extractMicrodata(html);
1999
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
2000
- });
2007
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2008
+ try {
2009
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2010
+ if (globalFetch) {
2011
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/extractors/index.ts:12", message: "Microdata extraction result", data: { hasMicrodata: !!microdataRecipe }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "D" }) }).catch(() => {
2012
+ });
2013
+ }
2014
+ } catch {
2015
+ }
2016
+ }
2001
2017
  if (microdataRecipe) {
2002
2018
  return { recipe: microdataRecipe, source: "microdata" };
2003
2019
  }
@@ -2006,20 +2022,52 @@ function extractRecipe(html) {
2006
2022
 
2007
2023
  // src/scraper/index.ts
2008
2024
  async function scrapeRecipe(url, options = {}) {
2009
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
2010
- });
2025
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2026
+ try {
2027
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2028
+ if (globalFetch) {
2029
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:7", message: "scrapeRecipe entry", data: { url, hasOptions: !!options }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,B,C,D,E" }) }).catch(() => {
2030
+ });
2031
+ }
2032
+ } catch {
2033
+ }
2034
+ }
2011
2035
  const html = await fetchPage(url, options);
2012
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
2013
- });
2036
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2037
+ try {
2038
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2039
+ if (globalFetch) {
2040
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:9", message: "HTML fetched", data: { htmlLength: html?.length, htmlPreview: html?.substring(0, 200) }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "B" }) }).catch(() => {
2041
+ });
2042
+ }
2043
+ } catch {
2044
+ }
2045
+ }
2014
2046
  const { recipe } = extractRecipe(html);
2015
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
2016
- });
2047
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2048
+ try {
2049
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2050
+ if (globalFetch) {
2051
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:11", message: "extractRecipe result", data: { hasRecipe: !!recipe, recipeType: recipe?.["@type"], recipeName: recipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A,C,D" }) }).catch(() => {
2052
+ });
2053
+ }
2054
+ } catch {
2055
+ }
2056
+ }
2017
2057
  if (!recipe) {
2018
2058
  throw new Error("No Schema.org recipe data found in page");
2019
2059
  }
2020
2060
  const soustackRecipe = fromSchemaOrg(recipe);
2021
- fetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
2022
- });
2061
+ if (typeof process === "undefined" || process.env.NODE_ENV !== "test") {
2062
+ try {
2063
+ const globalFetch = typeof globalThis !== "undefined" && typeof globalThis.fetch !== "undefined" ? globalThis.fetch : null;
2064
+ if (globalFetch) {
2065
+ globalFetch("http://127.0.0.1:7243/ingest/7225c3b5-9ac2-4c94-b561-807ca9003b66", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ location: "scraper/index.ts:17", message: "fromSchemaOrg result", data: { hasSoustackRecipe: !!soustackRecipe, soustackRecipeName: soustackRecipe?.name }, timestamp: Date.now(), sessionId: "debug-session", runId: "run1", hypothesisId: "A" }) }).catch(() => {
2066
+ });
2067
+ }
2068
+ } catch {
2069
+ }
2070
+ }
2023
2071
  if (!soustackRecipe) {
2024
2072
  throw new Error("Schema.org data did not include a valid recipe");
2025
2073
  }
@@ -2036,6 +2084,10 @@ function extractRecipeFromHTML(html) {
2036
2084
  }
2037
2085
  return soustackRecipe;
2038
2086
  }
2087
+ function extractSchemaOrgRecipeFromHTML(html) {
2088
+ const { recipe } = extractRecipe(html);
2089
+ return recipe;
2090
+ }
2039
2091
 
2040
2092
  // src/parsers/yield.ts
2041
2093
  var RANGE_PATTERN = /^(\d+)(?:\s*(?:[-–—]|to)\s*)(\d+)\s+(.+)$/i;
@@ -2280,6 +2332,7 @@ function wordToNumber(word) {
2280
2332
  }
2281
2333
 
2282
2334
  exports.extractRecipeFromHTML = extractRecipeFromHTML;
2335
+ exports.extractSchemaOrgRecipeFromHTML = extractSchemaOrgRecipeFromHTML;
2283
2336
  exports.formatDuration = formatDuration;
2284
2337
  exports.formatYield = formatYield2;
2285
2338
  exports.fromSchemaOrg = fromSchemaOrg;