@kreuzberg/html-to-markdown-wasm 3.6.0-rc.7 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kreuzberg/html-to-markdown-wasm",
3
- "version": "3.6.0-rc.7",
3
+ "version": "3.6.0",
4
4
  "private": false,
5
5
  "description": "High-performance HTML to Markdown converter",
6
6
  "license": "MIT",
@@ -225,7 +225,6 @@ export class WasmConversionResult {
225
225
  set content(value: string | null | undefined);
226
226
  get document(): WasmDocumentStructure | undefined;
227
227
  set document(value: WasmDocumentStructure | null | undefined);
228
- images: string[];
229
228
  metadata: WasmHtmlMetadata;
230
229
  tables: WasmTableData[];
231
230
  warnings: WasmProcessingWarning[];
@@ -387,6 +386,23 @@ export class WasmHtmlMetadata {
387
386
  structuredData: WasmStructuredData[];
388
387
  }
389
388
 
389
+ /**
390
+ * Image dimensions in pixels.
391
+ *
392
+ * Binding-safe replacement for `(u32, u32)` tuples, which degrade to
393
+ * `Vec<Vec<String>>` when sanitized for cross-language binding generation.
394
+ * Used by both `ImageMetadata` and
395
+ * `InlineImage`.
396
+ */
397
+ export class WasmImageDimensions {
398
+ free(): void;
399
+ [Symbol.dispose](): void;
400
+ static default(): WasmImageDimensions;
401
+ constructor(width: number, height: number);
402
+ height: number;
403
+ width: number;
404
+ }
405
+
390
406
  /**
391
407
  * Image metadata with source and dimensions.
392
408
  *
@@ -399,12 +415,12 @@ export class WasmImageMetadata {
399
415
  free(): void;
400
416
  [Symbol.dispose](): void;
401
417
  static default(): WasmImageMetadata;
402
- constructor(src: string, imageType: WasmImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: Uint32Array | null);
418
+ constructor(src: string, imageType: WasmImageType, attributes: any, alt?: string | null, title?: string | null, dimensions?: WasmImageDimensions | null);
403
419
  get alt(): string | undefined;
404
420
  set alt(value: string | null | undefined);
405
421
  attributes: any;
406
- get dimensions(): Uint32Array | undefined;
407
- set dimensions(value: Uint32Array | null | undefined);
422
+ get dimensions(): WasmImageDimensions | undefined;
423
+ set dimensions(value: WasmImageDimensions | null | undefined);
408
424
  get imageType(): string;
409
425
  set imageType(value: WasmImageType);
410
426
  src: string;
@@ -481,6 +497,22 @@ export enum WasmListIndentType {
481
497
  Tabs = 1,
482
498
  }
483
499
 
500
+ /**
501
+ * A single key-value metadata entry from `<head>` meta tags.
502
+ *
503
+ * Binding-safe replacement for `(String, String)` tuples used in
504
+ * `NodeContent.MetadataBlock`. Tuple pairs cannot be represented
505
+ * across language boundaries without lossy degradation.
506
+ */
507
+ export class WasmMetadataEntry {
508
+ free(): void;
509
+ [Symbol.dispose](): void;
510
+ static default(): WasmMetadataEntry;
511
+ constructor(key: string, value: string);
512
+ key: string;
513
+ value: string;
514
+ }
515
+
484
516
  /**
485
517
  * Line break syntax in Markdown output.
486
518
  *
@@ -507,8 +539,8 @@ export class WasmNodeContent {
507
539
  set definition(value: string | null | undefined);
508
540
  get description(): string | undefined;
509
541
  set description(value: string | null | undefined);
510
- get entries(): any | undefined;
511
- set entries(value: any | null | undefined);
542
+ get entries(): WasmMetadataEntry[] | undefined;
543
+ set entries(value: WasmMetadataEntry[] | null | undefined);
512
544
  get format(): string | undefined;
513
545
  set format(value: string | null | undefined);
514
546
  get grid(): WasmTableGrid | undefined;
@@ -540,14 +572,46 @@ export class WasmNodeContent {
540
572
  * Context information passed to all visitor methods.
541
573
  *
542
574
  * Provides comprehensive metadata about the current node being visited,
543
- * including its type, attributes, position in the DOM tree, and parent context.
575
+ * including its type, tag name, position in the DOM tree, and parent context.
576
+ *
577
+ * ## Attributes
578
+ *
579
+ * Access attributes via `NodeContext.attributes`, which returns
580
+ * `&BTreeMap<String, String>`. When the context was built with
581
+ * `NodeContext.with_lazy_attributes` (the hot path inside the converter),
582
+ * the map is only materialized on the first call — if the visitor never reads
583
+ * attributes, the allocation is skipped.
584
+ *
585
+ * ## Lifetimes
586
+ *
587
+ * String fields use `Cow<'_, str>` so the converter can pass slices directly
588
+ * out of the parsed DOM without allocating. Visitor implementations that need
589
+ * to outlive the callback should call `NodeContext.into_owned`.
544
590
  */
545
591
  export class WasmNodeContext {
546
592
  free(): void;
547
593
  [Symbol.dispose](): void;
594
+ /**
595
+ * Return a reference to the attribute map.
596
+ *
597
+ * If the context was built with `NodeContext.with_lazy_attributes`, the
598
+ * map is materialized on the first call and cached for subsequent calls.
599
+ * If this method is never called, no allocation occurs for attributes.
600
+ */
601
+ attributes(): any;
548
602
  static default(): WasmNodeContext;
549
- constructor(nodeType: WasmNodeType, tagName: string, attributes: any, depth: number, indexInParent: number, isInline: boolean, parentTag?: string | null);
550
- attributes: any;
603
+ /**
604
+ * Promote any borrowed fields into owned storage so the context can outlive `'a`.
605
+ */
606
+ intoOwned(): WasmNodeContext;
607
+ constructor(nodeType: WasmNodeType, tagName: string, depth: number, indexInParent: number, isInline: boolean, parentTag?: string | null);
608
+ /**
609
+ * Construct a `NodeContext` with an owned attribute map.
610
+ *
611
+ * Prefer `NodeContext.with_lazy_attributes` (pub(crate)) inside the
612
+ * converter to avoid the eager `collect_tag_attributes` allocation.
613
+ */
614
+ static withOwnedAttributes(node_type: WasmNodeType, tag_name: string, attributes: any, depth: number, index_in_parent: number, parent_tag: string | null | undefined, is_inline: boolean): WasmNodeContext;
551
615
  depth: number;
552
616
  indexInParent: number;
553
617
  isInline: boolean;
@@ -742,7 +806,7 @@ export class WasmProcessingWarning {
742
806
  }
743
807
 
744
808
  /**
745
- * Structured data block (JSON-LD, Microdata, or RDFa).
809
+ * Structured data block (JSON-LD, Microdata, or `RDFa`).
746
810
  *
747
811
  * Represents machine-readable structured data found in the document.
748
812
  * JSON-LD blocks are collected as raw JSON strings for flexibility.
@@ -5,5 +5,5 @@ import { __wbg_set_wasm } from "./html_to_markdown_wasm_bg.js";
5
5
  __wbg_set_wasm(wasm);
6
6
 
7
7
  export {
8
- WasmAnnotationKind, WasmCodeBlockStyle, WasmConversionOptions, WasmConversionOptionsUpdate, WasmConversionResult, WasmDocumentMetadata, WasmDocumentNode, WasmDocumentStructure, WasmGridCell, WasmHeaderMetadata, WasmHeadingStyle, WasmHighlightStyle, WasmHtmlMetadata, WasmImageMetadata, WasmImageType, WasmLinkMetadata, WasmLinkStyle, WasmLinkType, WasmListIndentType, WasmNewlineStyle, WasmNodeContent, WasmNodeContext, WasmNodeType, WasmOutputFormat, WasmPreprocessingOptions, WasmPreprocessingOptionsUpdate, WasmPreprocessingPreset, WasmProcessingWarning, WasmStructuredData, WasmStructuredDataType, WasmTableData, WasmTableGrid, WasmTextAnnotation, WasmTextDirection, WasmTierStrategy, WasmUrlEscapeStyle, WasmVisitResult, WasmVisitorHandle, WasmWarningKind, WasmWhitespaceMode, convert
8
+ WasmAnnotationKind, WasmCodeBlockStyle, WasmConversionOptions, WasmConversionOptionsUpdate, WasmConversionResult, WasmDocumentMetadata, WasmDocumentNode, WasmDocumentStructure, WasmGridCell, WasmHeaderMetadata, WasmHeadingStyle, WasmHighlightStyle, WasmHtmlMetadata, WasmImageDimensions, WasmImageMetadata, WasmImageType, WasmLinkMetadata, WasmLinkStyle, WasmLinkType, WasmListIndentType, WasmMetadataEntry, WasmNewlineStyle, WasmNodeContent, WasmNodeContext, WasmNodeType, WasmOutputFormat, WasmPreprocessingOptions, WasmPreprocessingOptionsUpdate, WasmPreprocessingPreset, WasmProcessingWarning, WasmStructuredData, WasmStructuredDataType, WasmTableData, WasmTableGrid, WasmTextAnnotation, WasmTextDirection, WasmTierStrategy, WasmUrlEscapeStyle, WasmVisitResult, WasmVisitorHandle, WasmWarningKind, WasmWhitespaceMode, convert
9
9
  } from "./html_to_markdown_wasm_bg.js";