html2llm 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -2
- package/dist/server.js +2 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -18,8 +18,6 @@ Raw HTML is token-expensive: every `<div class="wrapper">` pays the cost of angl
|
|
|
18
18
|
npm install html2llm
|
|
19
19
|
```
|
|
20
20
|
|
|
21
|
-
*(Not yet published to npm — clone and build from source: `git clone && npm install && npm run build`)*
|
|
22
|
-
|
|
23
21
|
## Usage
|
|
24
22
|
|
|
25
23
|
### As a module
|
|
@@ -116,6 +114,38 @@ The server:
|
|
|
116
114
|
- Returns CSX as `text/plain; charset=utf-8`
|
|
117
115
|
- Sets `X-Original-URL` response header
|
|
118
116
|
|
|
117
|
+
### Public instance
|
|
118
|
+
|
|
119
|
+
A hosted instance is available at **html2llm.cyncyn.xyz** — use it like `r.jina.ai`:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
# Fetch any webpage as CSX
|
|
123
|
+
curl "https://html2llm.cyncyn.xyz/https://example.com"
|
|
124
|
+
|
|
125
|
+
# Pretty-printed for readability
|
|
126
|
+
curl "https://html2llm.cyncyn.xyz/https://example.com?pretty"
|
|
127
|
+
|
|
128
|
+
# Feed to an LLM in one pipeline
|
|
129
|
+
curl -s "https://html2llm.cyncyn.xyz/https://example.com" | llm "summarize this page"
|
|
130
|
+
|
|
131
|
+
# Bypass JS anti-bot walls with headless Chromium
|
|
132
|
+
curl "https://html2llm.cyncyn.xyz/https://www.zhihu.com/question/...?headless"
|
|
133
|
+
|
|
134
|
+
# Omit https:// — auto-prepended
|
|
135
|
+
curl "https://html2llm.cyncyn.xyz/example.com"
|
|
136
|
+
|
|
137
|
+
# Health check
|
|
138
|
+
curl "https://html2llm.cyncyn.xyz/health"
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Query params:
|
|
142
|
+
|
|
143
|
+
| Param | Effect |
|
|
144
|
+
|---|---|
|
|
145
|
+
| `?pretty` | Indented, human-readable output |
|
|
146
|
+
| `?headless` | Use headless Chromium to bypass JS challenges (slower) |
|
|
147
|
+
| `?pretty&headless` | Combine both |
|
|
148
|
+
|
|
119
149
|
### Docker
|
|
120
150
|
|
|
121
151
|
```bash
|
package/dist/server.js
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { Hono } from "hono";
|
|
2
|
+
import { logger } from "hono/logger";
|
|
2
3
|
import { serve } from "@hono/node-server";
|
|
3
4
|
import { urlToCSX } from "./html2llm.js";
|
|
4
5
|
export const app = new Hono();
|
|
6
|
+
app.use("*", logger());
|
|
5
7
|
// Health check
|
|
6
8
|
app.get("/health", (c) => {
|
|
7
9
|
return c.json({ status: "ok" });
|