@vakra-dev/reader 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -572,6 +572,36 @@ Reader uses [Ulixee Hero](https://ulixee.org/), a headless browser with advanced
572
572
  - **Health Monitoring** - Background health checks every 5 minutes
573
573
  - **Request Queuing** - Queues requests when pool is full (max 100)
574
574
 
575
+ ### HTML to Markdown: supermarkdown
576
+
577
+ Reader uses [**supermarkdown**](https://github.com/vakra-dev/supermarkdown) for HTML to Markdown conversion - a sister project we built from scratch specifically for web scraping and LLM pipelines.
578
+
579
+ **Why we built it:**
580
+
581
+ When you're scraping the web, you encounter messy, malformed HTML that breaks most converters. And when you're feeding content to LLMs, you need clean output without artifacts or noise. We needed a converter that handles real-world HTML reliably while producing high-quality markdown.
582
+
583
+ **What supermarkdown offers:**
584
+
585
+ | Feature | Benefit |
586
+ |---------|---------|
587
+ | **Written in Rust** | Native performance with Node.js bindings via napi-rs |
588
+ | **Full GFM support** | Tables, task lists, strikethrough, autolinks |
589
+ | **LLM-optimized** | Clean output designed for AI consumption |
590
+ | **Battle-tested** | Handles malformed HTML from real web pages |
591
+ | **CSS selectors** | Include/exclude elements during conversion |
592
+
593
+ supermarkdown is open source and available as both a Rust crate and npm package:
594
+
595
+ ```bash
596
+ # npm
597
+ npm install @vakra-dev/supermarkdown
598
+
599
+ # Rust
600
+ cargo add supermarkdown
601
+ ```
602
+
603
+ Check out the [supermarkdown repository](https://github.com/vakra-dev/supermarkdown) for examples and documentation.
604
+
575
605
  ## Documentation
576
606
 
577
607
  | Guide | Description |
package/dist/cli/index.js CHANGED
@@ -18,21 +18,15 @@ import { ConnectionToHeroCore } from "@ulixee/hero";
18
18
  import pLimit from "p-limit";
19
19
 
20
20
  // src/formatters/markdown.ts
21
- import TurndownService from "turndown";
22
- var turndownService = new TurndownService({
23
- headingStyle: "atx",
24
- hr: "---",
25
- bulletListMarker: "-",
26
- codeBlockStyle: "fenced",
27
- fence: "```",
28
- emDelimiter: "*",
29
- strongDelimiter: "**",
30
- linkStyle: "inlined",
31
- linkReferenceStyle: "full"
32
- });
21
+ import { convert } from "@vakra-dev/supermarkdown";
33
22
  function htmlToMarkdown(html) {
34
23
  try {
35
- return turndownService.turndown(html);
24
+ return convert(html, {
25
+ headingStyle: "atx",
26
+ bulletMarker: "-",
27
+ codeFence: "`",
28
+ linkStyle: "inline"
29
+ });
36
30
  } catch (error) {
37
31
  console.warn("Error converting HTML to Markdown:", error);
38
32
  return html.replace(/<[^>]*>/g, "").trim();
@@ -1723,7 +1717,7 @@ var EngineOrchestrator = class {
1723
1717
  return true;
1724
1718
  }
1725
1719
  if (error instanceof HttpError) {
1726
- return error.statusCode === 403 || error.statusCode === 429 || error.statusCode >= 500;
1720
+ return error.statusCode === 403 || error.statusCode === 404 || error.statusCode === 429 || error.statusCode >= 500;
1727
1721
  }
1728
1722
  if (error instanceof EngineUnavailableError) {
1729
1723
  return true;