mdrip 0.1.2 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +124 -93
- package/dist/api.d.ts +12 -0
- package/dist/api.d.ts.map +1 -0
- package/dist/api.js +109 -0
- package/dist/api.js.map +1 -0
- package/dist/api.test.d.ts +2 -0
- package/dist/api.test.d.ts.map +1 -0
- package/dist/api.test.js +89 -0
- package/dist/api.test.js.map +1 -0
- package/dist/index.js +1 -1
- package/dist/lib/gitignore.d.ts.map +1 -1
- package/dist/lib/gitignore.js +5 -2
- package/dist/lib/gitignore.js.map +1 -1
- package/dist/web.d.ts +5 -0
- package/dist/web.d.ts.map +1 -0
- package/dist/web.js +11 -0
- package/dist/web.js.map +1 -0
- package/package.json +18 -1
package/README.md
CHANGED
|
@@ -1,50 +1,19 @@
|
|
|
1
1
|
# mdrip
|
|
2
2
|
|
|
3
|
-
Fetch markdown snapshots of web
|
|
3
|
+
Fetch clean markdown snapshots of any web page — optimized for AI agents, RAG pipelines, and context-aware workflows.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
This repo also includes an AI-consumable skills catalog in `skills/`, following the [agentskills](https://agentskills.io) format.
|
|
8
|
-
|
|
9
|
-
- Skill index: `skills/README.md`
|
|
10
|
-
- mdrip skill: `skills/mdrip/SKILL.md`
|
|
11
|
-
|
|
12
|
-
### Install skills from this repo
|
|
13
|
-
|
|
14
|
-
If you use a Skills-compatible agent setup, you can add these skills directly:
|
|
15
|
-
|
|
16
|
-
```bash
|
|
17
|
-
# install skills from this repo
|
|
18
|
-
npx skills add charl-kruger/mdrip
|
|
19
|
-
```
|
|
5
|
+
Reduces token overhead by ~90% compared to raw HTML while preserving the content structure LLMs need.
|
|
20
6
|
|
|
21
7
|
## Why
|
|
22
8
|
|
|
23
|
-
|
|
24
|
-
- cleaner structure
|
|
25
|
-
- lower token overhead
|
|
26
|
-
- easier chunking and context management
|
|
27
|
-
|
|
28
|
-
`mdrip` requests pages with `Accept: text/markdown`, stores the markdown locally, and tracks fetched pages in an index.
|
|
29
|
-
|
|
30
|
-
If a site does not return `text/markdown`, `mdrip` can automatically fall back to converting `text/html` into markdown.
|
|
31
|
-
The fallback uses an in-project converter optimized for common documentation/blog content (headings, links, lists, code blocks, tables, blockquotes).
|
|
32
|
-
|
|
33
|
-
## Why Cloudflare Markdown for Agents matters
|
|
9
|
+
AI agents and LLMs work better with markdown than HTML. Feeding raw HTML into a context window wastes tokens on tags, scripts, styles, and boilerplate. mdrip solves this by fetching any URL and returning clean, structured markdown.
|
|
34
10
|
|
|
35
|
-
|
|
36
|
-
-
|
|
37
|
-
-
|
|
38
|
-
-
|
|
11
|
+
- **~90% fewer tokens** than raw HTML
|
|
12
|
+
- **Automatic HTML-to-markdown fallback** when native markdown isn't available
|
|
13
|
+
- **Works everywhere** — CLI, Node.js, Cloudflare Workers, or via remote MCP
|
|
14
|
+
- **Token-aware** — reports estimated token counts so you can manage context budgets
|
|
39
15
|
|
|
40
|
-
For
|
|
41
|
-
- better structure for LLM parsing than raw HTML
|
|
42
|
-
- less token waste in context windows
|
|
43
|
-
- predictable markdown snapshots you can store and reuse in your repo
|
|
44
|
-
|
|
45
|
-
References:
|
|
46
|
-
- [Cloudflare blog: Markdown for Agents](https://blog.cloudflare.com/markdown-for-agents/)
|
|
47
|
-
- [Cloudflare docs: Markdown for Agents](https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/)
|
|
16
|
+
Sites that support [Cloudflare's Markdown for Agents](https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/) return markdown natively at the edge. For all other sites, mdrip's built-in converter handles headings, links, lists, code blocks, tables, blockquotes, and more.
|
|
48
17
|
|
|
49
18
|
## Installation
|
|
50
19
|
|
|
@@ -52,43 +21,31 @@ References:
|
|
|
52
21
|
npm install -g mdrip
|
|
53
22
|
```
|
|
54
23
|
|
|
55
|
-
Or use with `npx`:
|
|
24
|
+
Or use directly with `npx`:
|
|
56
25
|
|
|
57
26
|
```bash
|
|
58
27
|
npx mdrip <url>
|
|
59
28
|
```
|
|
60
29
|
|
|
61
|
-
## Usage
|
|
30
|
+
## CLI Usage
|
|
62
31
|
|
|
63
32
|
### Fetch pages
|
|
64
33
|
|
|
65
34
|
```bash
|
|
66
35
|
# Fetch one page
|
|
67
|
-
mdrip https://
|
|
36
|
+
mdrip https://example.com/docs/getting-started
|
|
68
37
|
|
|
69
38
|
# Fetch multiple pages
|
|
70
|
-
mdrip https://
|
|
39
|
+
mdrip https://example.com/docs https://example.com/api
|
|
71
40
|
|
|
72
|
-
#
|
|
41
|
+
# Custom timeout (ms)
|
|
73
42
|
mdrip https://example.com --timeout 45000
|
|
74
43
|
|
|
75
|
-
#
|
|
44
|
+
# Strict mode — only accept native markdown, no HTML fallback
|
|
76
45
|
mdrip https://example.com --no-html-fallback
|
|
77
46
|
|
|
78
|
-
#
|
|
79
|
-
mdrip https://
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
### Raw mode for agents (OpenClaw, etc.)
|
|
83
|
-
|
|
84
|
-
`--raw` is designed for agent runtimes that only need in-memory content.
|
|
85
|
-
It prints markdown to stdout and skips settings prompts and all file writes.
|
|
86
|
-
|
|
87
|
-
This is useful for flows with OpenClaw and similar AI tools where you want to pipe page content directly into your agent loop.
|
|
88
|
-
|
|
89
|
-
```bash
|
|
90
|
-
# stream markdown directly to another process
|
|
91
|
-
mdrip https://blog.cloudflare.com/markdown-for-agents/ --raw
|
|
47
|
+
# Raw mode — print markdown to stdout, no file writes
|
|
48
|
+
mdrip https://example.com --raw
|
|
92
49
|
```
|
|
93
50
|
|
|
94
51
|
### List fetched pages
|
|
@@ -101,7 +58,7 @@ mdrip list --json
|
|
|
101
58
|
### Remove pages
|
|
102
59
|
|
|
103
60
|
```bash
|
|
104
|
-
mdrip remove https://
|
|
61
|
+
mdrip remove https://example.com/docs/getting-started
|
|
105
62
|
```
|
|
106
63
|
|
|
107
64
|
### Clean snapshots
|
|
@@ -111,64 +68,138 @@ mdrip remove https://developers.cloudflare.com/fundamentals/reference/markdown-f
|
|
|
111
68
|
mdrip clean
|
|
112
69
|
|
|
113
70
|
# Remove only one domain
|
|
114
|
-
mdrip clean --domain
|
|
71
|
+
mdrip clean --domain example.com
|
|
115
72
|
```
|
|
116
73
|
|
|
117
|
-
|
|
74
|
+
### Raw mode for agent runtimes
|
|
118
75
|
|
|
119
|
-
|
|
120
|
-
- `.gitignore` (adds `mdrip/`)
|
|
121
|
-
- `tsconfig.json` (excludes `mdrip`)
|
|
122
|
-
- `AGENTS.md` (adds a section pointing agents to snapshots)
|
|
76
|
+
`--raw` prints markdown to stdout and skips all file writes and prompts. Useful for piping content directly into agent loops.
|
|
123
77
|
|
|
124
|
-
|
|
78
|
+
```bash
|
|
79
|
+
mdrip https://example.com --raw | your-agent-cli
|
|
80
|
+
```
|
|
125
81
|
|
|
126
|
-
|
|
82
|
+
## Programmatic API
|
|
127
83
|
|
|
128
84
|
```bash
|
|
129
|
-
|
|
130
|
-
|
|
85
|
+
npm install mdrip
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Workers / Edge / In-memory
|
|
89
|
+
|
|
90
|
+
```ts
|
|
91
|
+
import { fetchMarkdown } from "mdrip";
|
|
92
|
+
|
|
93
|
+
const page = await fetchMarkdown("https://example.com/docs");
|
|
94
|
+
|
|
95
|
+
console.log(page.markdown); // clean markdown content
|
|
96
|
+
console.log(page.markdownTokens); // estimated token count
|
|
97
|
+
console.log(page.source); // "cloudflare-markdown" or "html-fallback"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Node.js (fetch and store to disk)
|
|
101
|
+
|
|
102
|
+
```ts
|
|
103
|
+
import { fetchToStore, listStoredPages } from "mdrip/node";
|
|
131
104
|
|
|
132
|
-
|
|
133
|
-
|
|
105
|
+
const result = await fetchToStore("https://example.com/docs", {
|
|
106
|
+
cwd: process.cwd(),
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
if (result.success) {
|
|
110
|
+
console.log(`Saved to ${result.path}`);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const pages = await listStoredPages(process.cwd());
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Available exports
|
|
117
|
+
|
|
118
|
+
| Import | Environment | Functions |
|
|
119
|
+
|--------|-------------|-----------|
|
|
120
|
+
| `mdrip` | Workers, edge, browser | `fetchMarkdown()`, `fetchRawMarkdown()` |
|
|
121
|
+
| `mdrip/node` | Node.js | `fetchToStore()`, `fetchManyToStore()`, `listStoredPages()` |
|
|
122
|
+
|
|
123
|
+
## Remote MCP Server
|
|
124
|
+
|
|
125
|
+
mdrip is available as a remote MCP server at **`mdrip.createmcp.dev`** — no install required. Any MCP-compatible client can connect and use the `fetch_markdown` and `batch_fetch_markdown` tools.
|
|
126
|
+
|
|
127
|
+
### Claude Desktop
|
|
128
|
+
|
|
129
|
+
Add to `claude_desktop_config.json`:
|
|
130
|
+
|
|
131
|
+
```json
|
|
132
|
+
{
|
|
133
|
+
"mcpServers": {
|
|
134
|
+
"mdrip": {
|
|
135
|
+
"command": "npx",
|
|
136
|
+
"args": ["mcp-remote", "https://mdrip.createmcp.dev/mcp"]
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Claude Code
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
claude mcp add mdrip-remote --transport sse https://mdrip.createmcp.dev/sse
|
|
134
146
|
```
|
|
135
147
|
|
|
136
|
-
|
|
148
|
+
### Cloudflare AI Playground
|
|
137
149
|
|
|
138
|
-
|
|
150
|
+
Enter `mdrip.createmcp.dev/sse` at [playground.ai.cloudflare.com](https://playground.ai.cloudflare.com/).
|
|
151
|
+
|
|
152
|
+
## File modifications
|
|
139
153
|
|
|
140
|
-
|
|
154
|
+
On first run, mdrip can optionally update:
|
|
155
|
+
- `.gitignore` — adds `mdrip/`
|
|
156
|
+
- `tsconfig.json` — excludes `mdrip/`
|
|
157
|
+
- `AGENTS.md` — adds a section pointing agents to your snapshots
|
|
158
|
+
|
|
159
|
+
Choice is stored in `mdrip/settings.json`. Use `--modify` or `--modify=false` to skip the prompt.
|
|
160
|
+
|
|
161
|
+
`--raw` mode bypasses this entirely.
|
|
162
|
+
|
|
163
|
+
## Output structure
|
|
164
|
+
|
|
165
|
+
```
|
|
141
166
|
mdrip/
|
|
142
167
|
├── settings.json
|
|
143
168
|
├── sources.json
|
|
144
169
|
└── pages/
|
|
145
|
-
└──
|
|
146
|
-
└──
|
|
147
|
-
└──
|
|
148
|
-
└──
|
|
149
|
-
└── index.md
|
|
170
|
+
└── example.com/
|
|
171
|
+
└── docs/
|
|
172
|
+
└── getting-started/
|
|
173
|
+
└── index.md
|
|
150
174
|
```
|
|
151
175
|
|
|
152
|
-
##
|
|
176
|
+
## Benchmark
|
|
153
177
|
|
|
154
|
-
|
|
155
|
-
- The target site must return markdown for `Accept: text/markdown` (Cloudflare Markdown for Agents enabled).
|
|
156
|
-
- If a page does not return `text/markdown`, mdrip can convert `text/html` into markdown fallback unless `--no-html-fallback` is used.
|
|
178
|
+
Measured across popular pages (values vary as pages change):
|
|
157
179
|
|
|
158
|
-
|
|
180
|
+
| Page | Mode | Chars saved | Tokens saved |
|
|
181
|
+
|------|------|------------:|-------------:|
|
|
182
|
+
| blog.cloudflare.com/markdown-for-agents | cloudflare-markdown | 94.9% | 94.9% |
|
|
183
|
+
| developers.cloudflare.com/.../markdown-for-agents | cloudflare-markdown | 95.7% | 95.7% |
|
|
184
|
+
| en.wikipedia.org/wiki/Markdown | html-fallback | 72.7% | 72.7% |
|
|
185
|
+
| github.com/cloudflare/skills | html-fallback | 96.3% | 96.3% |
|
|
186
|
+
| **Average** | | **89.9%** | **89.9%** |
|
|
159
187
|
|
|
160
188
|
```bash
|
|
161
|
-
|
|
162
|
-
|
|
189
|
+
pnpm build && pnpm benchmark
|
|
190
|
+
```
|
|
163
191
|
|
|
164
|
-
|
|
165
|
-
|
|
192
|
+
## AI Skills
|
|
193
|
+
|
|
194
|
+
This repo includes an AI-consumable skills catalog in `skills/`, following the [agentskills](https://agentskills.io) format.
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
npx skills add charl-kruger/mdrip
|
|
166
198
|
```
|
|
167
199
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
-
|
|
171
|
-
- `pnpm build`
|
|
200
|
+
## Requirements
|
|
201
|
+
|
|
202
|
+
- Node.js 18+
|
|
172
203
|
|
|
173
204
|
## Author
|
|
174
205
|
|
package/dist/api.d.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { type FetchMarkdownOptions, type MarkdownResponse } from "./lib/cloudflare.js";
|
|
2
|
+
import type { FetchResult, PageEntry, SourcesIndex } from "./types.js";
|
|
3
|
+
export type { FetchMarkdownOptions, MarkdownResponse, FetchResult, PageEntry, SourcesIndex, };
|
|
4
|
+
export interface StoreFetchOptions extends FetchMarkdownOptions {
|
|
5
|
+
cwd?: string;
|
|
6
|
+
}
|
|
7
|
+
export declare function fetchMarkdown(url: string, options?: FetchMarkdownOptions): Promise<MarkdownResponse>;
|
|
8
|
+
export declare function fetchRawMarkdown(url: string, options?: FetchMarkdownOptions): Promise<string>;
|
|
9
|
+
export declare function fetchToStore(url: string, options?: StoreFetchOptions): Promise<FetchResult>;
|
|
10
|
+
export declare function fetchManyToStore(urls: string[], options?: StoreFetchOptions): Promise<FetchResult[]>;
|
|
11
|
+
export declare function listStoredPages(cwd?: string): Promise<PageEntry[]>;
|
|
12
|
+
//# sourceMappingURL=api.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"api.d.ts","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACtB,MAAM,qBAAqB,CAAC;AAO7B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAEvE,YAAY,EACV,oBAAoB,EACpB,gBAAgB,EAChB,WAAW,EACX,SAAS,EACT,YAAY,GACb,CAAC;AAEF,MAAM,WAAW,iBAAkB,SAAQ,oBAAoB;IAC7D,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AA8ED,wBAAsB,aAAa,CACjC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,oBAAyB,GACjC,OAAO,CAAC,gBAAgB,CAAC,CAG3B;AAED,wBAAsB,gBAAgB,CACpC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,oBAAyB,GACjC,OAAO,CAAC,MAAM,CAAC,CAGjB;AAED,wBAAsB,YAAY,CAChC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,WAAW,CAAC,CAatB;AAED,wBAAsB,gBAAgB,CACpC,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,WAAW,EAAE,CAAC,CAiBxB;AAED,wBAAsB,eAAe,CACnC,GAAG,GAAE,MAAsB,GAC1B,OAAO,CAAC,SAAS,EAAE,CAAC,CAGtB"}
|
package/dist/api.js
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import { fetchMarkdownPage, } from "./lib/cloudflare.js";
|
|
2
|
+
import { listSources, savePageMarkdown, } from "./lib/storage.js";
|
|
3
|
+
import { updatePageIndex } from "./lib/agents.js";
|
|
4
|
+
import { normalizeUrl } from "./lib/url.js";
|
|
5
|
+
function mergeResults(existing, results) {
|
|
6
|
+
const now = new Date().toISOString();
|
|
7
|
+
const merged = [...existing];
|
|
8
|
+
for (const result of results) {
|
|
9
|
+
if (!result.success || !result.path) {
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
const entry = {
|
|
13
|
+
url: result.url,
|
|
14
|
+
resolvedUrl: result.resolvedUrl,
|
|
15
|
+
path: result.path,
|
|
16
|
+
fetchedAt: now,
|
|
17
|
+
status: result.status || 200,
|
|
18
|
+
contentType: result.contentType || "text/markdown",
|
|
19
|
+
markdownTokens: result.markdownTokens,
|
|
20
|
+
contentSignal: result.contentSignal,
|
|
21
|
+
source: result.source,
|
|
22
|
+
};
|
|
23
|
+
const index = merged.findIndex((page) => page.url === result.url);
|
|
24
|
+
if (index >= 0) {
|
|
25
|
+
merged[index] = entry;
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
merged.push(entry);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return merged;
|
|
32
|
+
}
|
|
33
|
+
async function fetchOneForStore(spec, cwd, options) {
|
|
34
|
+
let normalizedUrl;
|
|
35
|
+
try {
|
|
36
|
+
normalizedUrl = normalizeUrl(spec);
|
|
37
|
+
}
|
|
38
|
+
catch (err) {
|
|
39
|
+
return {
|
|
40
|
+
url: spec,
|
|
41
|
+
path: "",
|
|
42
|
+
success: false,
|
|
43
|
+
error: err instanceof Error ? err.message : String(err),
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
try {
|
|
47
|
+
const response = await fetchMarkdownPage(normalizedUrl, options);
|
|
48
|
+
const storageUrl = normalizeUrl(response.resolvedUrl || normalizedUrl);
|
|
49
|
+
const outputPath = await savePageMarkdown(storageUrl, response.markdown, cwd);
|
|
50
|
+
return {
|
|
51
|
+
url: normalizedUrl,
|
|
52
|
+
resolvedUrl: response.resolvedUrl,
|
|
53
|
+
path: outputPath,
|
|
54
|
+
success: true,
|
|
55
|
+
status: response.status,
|
|
56
|
+
contentType: response.contentType,
|
|
57
|
+
markdownTokens: response.markdownTokens,
|
|
58
|
+
contentSignal: response.contentSignal,
|
|
59
|
+
source: response.source,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
catch (err) {
|
|
63
|
+
return {
|
|
64
|
+
url: normalizedUrl,
|
|
65
|
+
path: "",
|
|
66
|
+
success: false,
|
|
67
|
+
error: err instanceof Error ? err.message : String(err),
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
export async function fetchMarkdown(url, options = {}) {
|
|
72
|
+
const normalizedUrl = normalizeUrl(url);
|
|
73
|
+
return fetchMarkdownPage(normalizedUrl, options);
|
|
74
|
+
}
|
|
75
|
+
export async function fetchRawMarkdown(url, options = {}) {
|
|
76
|
+
const response = await fetchMarkdown(url, options);
|
|
77
|
+
return response.markdown;
|
|
78
|
+
}
|
|
79
|
+
export async function fetchToStore(url, options = {}) {
|
|
80
|
+
const cwd = options.cwd || process.cwd();
|
|
81
|
+
const result = await fetchOneForStore(url, cwd, options);
|
|
82
|
+
if (!result.success) {
|
|
83
|
+
return result;
|
|
84
|
+
}
|
|
85
|
+
const existing = await listSources(cwd);
|
|
86
|
+
const pages = mergeResults(existing.pages, [result]);
|
|
87
|
+
await updatePageIndex({ pages }, cwd);
|
|
88
|
+
return result;
|
|
89
|
+
}
|
|
90
|
+
export async function fetchManyToStore(urls, options = {}) {
|
|
91
|
+
const cwd = options.cwd || process.cwd();
|
|
92
|
+
const results = [];
|
|
93
|
+
for (const spec of urls) {
|
|
94
|
+
const result = await fetchOneForStore(spec, cwd, options);
|
|
95
|
+
results.push(result);
|
|
96
|
+
}
|
|
97
|
+
const successful = results.filter((result) => result.success);
|
|
98
|
+
if (successful.length > 0) {
|
|
99
|
+
const existing = await listSources(cwd);
|
|
100
|
+
const pages = mergeResults(existing.pages, successful);
|
|
101
|
+
await updatePageIndex({ pages }, cwd);
|
|
102
|
+
}
|
|
103
|
+
return results;
|
|
104
|
+
}
|
|
105
|
+
export async function listStoredPages(cwd = process.cwd()) {
|
|
106
|
+
const sources = await listSources(cwd);
|
|
107
|
+
return sources.pages;
|
|
108
|
+
}
|
|
109
|
+
//# sourceMappingURL=api.js.map
|
package/dist/api.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"api.js","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,iBAAiB,GAGlB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EACL,WAAW,EACX,gBAAgB,GACjB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAe5C,SAAS,YAAY,CAAC,QAAqB,EAAE,OAAsB;IACjE,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;IAE7B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;YACpC,SAAS;QACX,CAAC;QAED,MAAM,KAAK,GAAc;YACvB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,SAAS,EAAE,GAAG;YACd,MAAM,EAAE,MAAM,CAAC,MAAM,IAAI,GAAG;YAC5B,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,eAAe;YAClD,cAAc,EAAE,MAAM,CAAC,cAAc;YACrC,aAAa,EAAE,MAAM,CAAC,aAAa;YACnC,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,KAAK,MAAM,CAAC,GAAG,CAAC,CAAC;QAClE,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,gBAAgB,CAC7B,IAAY,EACZ,GAAW,EACX,OAA0B;IAE1B,IAAI,aAAqB,CAAC;IAE1B,IAAI,CAAC;QACH,aAAa,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IACrC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,GAAG,EAAE,IAAI;YACT,IAAI,EAAE,EAAE;YACR,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;SACxD,CAAC;IACJ,CAAC;IAED,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,iBAAiB,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QACjE,MAAM,UAAU,GAAG,YAAY,CAAC,QAAQ,CAAC,WAAW,IAAI,aAAa,CAAC,CAAC;QACvE,MAAM,UAAU,GAAG,MAAM,gBAAgB,CAAC,UAAU,EAAE,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAE9E,OAAO;YACL,GAAG,EAAE,aAAa;YAClB,WAAW,EAAE,QAAQ,CAAC,WAAW;YACjC,IAAI,EAAE,UAAU;YAChB,OAAO,EAAE,IAAI;YACb,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,WAAW,EAAE,QAAQ,CAAC,WAAW;YACjC,cAAc,EAAE,QAAQ,CAAC,cAAc;YACvC,aAAa,EAAE,QAAQ,CAAC,aAAa;YACrC,MAAM,EAAE,QAAQ,CAAC,MAAM;SACxB,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,GAAG,EAAE,aAAa;YAClB,IAAI,EAAE,EAAE;YACR,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;SACxD,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAW,EACX,UAAgC,EAAE;IAElC,MAAM,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IACxC,OAAO,iBAAiB,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;AACnD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,GAAW,EACX,UAAgC,EAAE;IAElC,MAAM,QAAQ,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACnD,OAAO,QAAQ,CAAC,QAAQ,CAAC;AAC3B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,GAAW,EACX,UAA6B,EAAE;IAE/B,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACzC,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,GAAG,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IAEzD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,CAAC;IACxC,MAAM,KAAK,GAAG,YAAY,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC;IACrD,MAAM,eAAe,CAAC,EAAE,KAAK,EAAE,EAAE,GAAG,CAAC,CAAC;IAEtC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAc,EACd,UAA6B,EAAE;IAE/B,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACzC,MAAM,OAAO,GAAkB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;QAC1D,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC9D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,KAAK,GAAG,YAAY,CAAC,QAAQ,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;QACvD,MAAM,eAAe,CAAC,EAAE,KAAK,EAAE,EAAE,GAAG,CAAC,CAAC;IACxC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,MAAc,OAAO,CAAC,GAAG,EAAE;IAE3B,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,CAAC;IACvC,OAAO,OAAO,CAAC,KAAK,CAAC;AACvB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"api.test.d.ts","sourceRoot":"","sources":["../src/api.test.ts"],"names":[],"mappings":""}
|
package/dist/api.test.js
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { mkdtemp, readFile, rm } from "fs/promises";
|
|
3
|
+
import { join } from "path";
|
|
4
|
+
import { tmpdir } from "os";
|
|
5
|
+
import { fetchToStore, listStoredPages, fetchManyToStore, } from "./api.js";
|
|
6
|
+
import { fetchMarkdown, fetchRawMarkdown } from "./web.js";
|
|
7
|
+
describe("api", () => {
|
|
8
|
+
it("fetchMarkdown normalizes URL before requesting", async () => {
|
|
9
|
+
let requestedUrl = "";
|
|
10
|
+
const mockFetch = async (input) => {
|
|
11
|
+
requestedUrl = String(input);
|
|
12
|
+
return new Response("# Hello", {
|
|
13
|
+
status: 200,
|
|
14
|
+
headers: {
|
|
15
|
+
"content-type": "text/markdown; charset=utf-8",
|
|
16
|
+
},
|
|
17
|
+
});
|
|
18
|
+
};
|
|
19
|
+
const result = await fetchMarkdown("example.com/docs", {
|
|
20
|
+
fetchImpl: mockFetch,
|
|
21
|
+
});
|
|
22
|
+
expect(requestedUrl).toBe("https://example.com/docs");
|
|
23
|
+
expect(result.markdown).toBe("# Hello");
|
|
24
|
+
});
|
|
25
|
+
it("fetchRawMarkdown returns markdown content only", async () => {
|
|
26
|
+
const mockFetch = async () => new Response("# Raw", {
|
|
27
|
+
status: 200,
|
|
28
|
+
headers: {
|
|
29
|
+
"content-type": "text/markdown; charset=utf-8",
|
|
30
|
+
},
|
|
31
|
+
});
|
|
32
|
+
const markdown = await fetchRawMarkdown("example.com", {
|
|
33
|
+
fetchImpl: mockFetch,
|
|
34
|
+
});
|
|
35
|
+
expect(markdown).toBe("# Raw");
|
|
36
|
+
});
|
|
37
|
+
it("fetchToStore writes snapshot and updates sources index", async () => {
|
|
38
|
+
const cwd = await mkdtemp(join(tmpdir(), "mdrip-api-"));
|
|
39
|
+
try {
|
|
40
|
+
const mockFetch = async () => new Response("# Stored", {
|
|
41
|
+
status: 200,
|
|
42
|
+
headers: {
|
|
43
|
+
"content-type": "text/markdown; charset=utf-8",
|
|
44
|
+
"x-markdown-tokens": "2",
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
const result = await fetchToStore("https://example.com/docs/page", {
|
|
48
|
+
cwd,
|
|
49
|
+
fetchImpl: mockFetch,
|
|
50
|
+
});
|
|
51
|
+
expect(result.success).toBe(true);
|
|
52
|
+
expect(result.path).toBe("pages/example.com/docs/page.md");
|
|
53
|
+
const pages = await listStoredPages(cwd);
|
|
54
|
+
expect(pages).toHaveLength(1);
|
|
55
|
+
expect(pages[0].url).toBe("https://example.com/docs/page");
|
|
56
|
+
const filePath = join(cwd, "mdrip", result.path);
|
|
57
|
+
const content = await readFile(filePath, "utf-8");
|
|
58
|
+
expect(content).toBe("# Stored");
|
|
59
|
+
}
|
|
60
|
+
finally {
|
|
61
|
+
await rm(cwd, { recursive: true, force: true });
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
it("fetchManyToStore records mixed success results", async () => {
|
|
65
|
+
const cwd = await mkdtemp(join(tmpdir(), "mdrip-api-"));
|
|
66
|
+
try {
|
|
67
|
+
const mockFetch = async () => new Response("# Multi", {
|
|
68
|
+
status: 200,
|
|
69
|
+
headers: {
|
|
70
|
+
"content-type": "text/markdown; charset=utf-8",
|
|
71
|
+
},
|
|
72
|
+
});
|
|
73
|
+
const results = await fetchManyToStore(["https://example.com/a", "ftp://example.com/a"], {
|
|
74
|
+
cwd,
|
|
75
|
+
fetchImpl: mockFetch,
|
|
76
|
+
});
|
|
77
|
+
expect(results).toHaveLength(2);
|
|
78
|
+
expect(results[0].success).toBe(true);
|
|
79
|
+
expect(results[1].success).toBe(false);
|
|
80
|
+
const pages = await listStoredPages(cwd);
|
|
81
|
+
expect(pages).toHaveLength(1);
|
|
82
|
+
expect(pages[0].url).toBe("https://example.com/a");
|
|
83
|
+
}
|
|
84
|
+
finally {
|
|
85
|
+
await rm(cwd, { recursive: true, force: true });
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
//# sourceMappingURL=api.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"api.test.js","sourceRoot":"","sources":["../src/api.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAC5B,OAAO,EACL,YAAY,EACZ,eAAe,EACf,gBAAgB,GACjB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAE3D,QAAQ,CAAC,KAAK,EAAE,GAAG,EAAE;IACnB,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,MAAM,SAAS,GAAiB,KAAK,EAAE,KAAK,EAAE,EAAE;YAC9C,YAAY,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;YAC7B,OAAO,IAAI,QAAQ,CAAC,SAAS,EAAE;gBAC7B,MAAM,EAAE,GAAG;gBACX,OAAO,EAAE;oBACP,cAAc,EAAE,8BAA8B;iBAC/C;aACF,CAAC,CAAC;QACL,CAAC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,kBAAkB,EAAE;YACrD,SAAS,EAAE,SAAS;SACrB,CAAC,CAAC;QAEH,MAAM,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,SAAS,GAAiB,KAAK,IAAI,EAAE,CACzC,IAAI,QAAQ,CAAC,OAAO,EAAE;YACpB,MAAM,EAAE,GAAG;YACX,OAAO,EAAE;gBACP,cAAc,EAAE,8BAA8B;aAC/C;SACF,CAAC,CAAC;QAEL,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,aAAa,EAAE;YACrD,SAAS,EAAE,SAAS;SACrB,CAAC,CAAC;QAEH,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;QACtE,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,YAAY,CAAC,CAAC,CAAC;QAExD,IAAI,CAAC;YACH,MAAM,SAAS,GAAiB,KAAK,IAAI,EAAE,CACzC,IAAI,QAAQ,CAAC,UAAU,EAAE;gBACvB,MAAM,EAAE,GAAG;gBACX,OAAO,EAAE;oBACP,cAAc,EAAE,8BAA8B;oBAC9C,mBAAmB,EAAE,GAAG;iBACzB;aACF,CAAC,CAAC;YAEL,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,+BAA+B,EAAE;gBACjE,GAAG;gBACH,SAAS,EAAE,SAAS;aACrB,CAAC,CAAC;YAEH,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;YAE3D,MAAM,KAAK,GAAG,MAAM,eAAe,CAAC,GAAG,CAAC,CAAC;YACzC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC9B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;YAE3D,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;YACjD,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAClD,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACnC,CAAC;gBAAS,CAAC;YACT,MAAM,EAAE,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAClD,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,YAAY,CAAC,CAAC,CAAC;QAExD,IAAI,CAAC;YACH,MAAM,SAAS,GAAiB,KAAK,IAAI,EAAE,CACzC,IAAI,QAAQ,CAAC,SAAS,EAAE;gBACtB,MAAM,EAAE,GAAG;gBACX,OAAO,EAAE;oBACP,cAAc,EAAE,8BAA8B;iBAC/C;aACF,CAAC,CAAC;YAEL,MAAM,OAAO,GAAG,MAAM,gBAAgB,CACpC,CAAC,uBAAuB,EAAE,qBAAqB,CAAC,EAChD;gBACE,GAAG;gBACH,SAAS,EAAE,SAAS;aACrB,CACF,CAAC;YAEF,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAEvC,MAAM,KAAK,GAAG,MAAM,eAAe,CAAC,GAAG,CAAC,CAAC;YACzC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC9B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;QACrD,CAAC;gBAAS,CAAC;YACT,MAAM,EAAE,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAClD,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -8,7 +8,7 @@ const program = new Command();
|
|
|
8
8
|
program
|
|
9
9
|
.name("mdrip")
|
|
10
10
|
.description("Fetch markdown snapshots for URLs using Cloudflare Markdown for Agents")
|
|
11
|
-
.version("0.1.
|
|
11
|
+
.version("0.1.4")
|
|
12
12
|
.option("--cwd <path>", "working directory (default: current directory)");
|
|
13
13
|
program
|
|
14
14
|
.argument("[urls...]", "URLs to fetch as markdown")
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gitignore.d.ts","sourceRoot":"","sources":["../../src/lib/gitignore.ts"],"names":[],"mappings":"AAOA,wBAAsB,aAAa,CACjC,GAAG,GAAE,MAAsB,GAC1B,OAAO,CAAC,OAAO,CAAC,
|
|
1
|
+
{"version":3,"file":"gitignore.d.ts","sourceRoot":"","sources":["../../src/lib/gitignore.ts"],"names":[],"mappings":"AAOA,wBAAsB,aAAa,CACjC,GAAG,GAAE,MAAsB,GAC1B,OAAO,CAAC,OAAO,CAAC,CAuBlB;AAED,wBAAsB,eAAe,CACnC,GAAG,GAAE,MAAsB,GAC1B,OAAO,CAAC,OAAO,CAAC,CAuBlB"}
|
package/dist/lib/gitignore.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { readFile, writeFile } from "fs/promises";
|
|
2
2
|
import { join } from "path";
|
|
3
3
|
import { existsSync } from "fs";
|
|
4
|
-
const MDRIP_ENTRY = "mdrip/";
|
|
4
|
+
const MDRIP_ENTRY = "/mdrip/";
|
|
5
5
|
const MARKER_COMMENT = "# mdrip - markdown snapshots for agents";
|
|
6
6
|
export async function hasMdripEntry(cwd = process.cwd()) {
|
|
7
7
|
const gitignorePath = join(cwd, ".gitignore");
|
|
@@ -13,7 +13,10 @@ export async function hasMdripEntry(cwd = process.cwd()) {
|
|
|
13
13
|
const lines = content.split("\n");
|
|
14
14
|
return lines.some((line) => {
|
|
15
15
|
const trimmed = line.trim();
|
|
16
|
-
return trimmed === MDRIP_ENTRY ||
|
|
16
|
+
return (trimmed === MDRIP_ENTRY ||
|
|
17
|
+
trimmed === "mdrip/" ||
|
|
18
|
+
trimmed === "/mdrip" ||
|
|
19
|
+
trimmed === "mdrip");
|
|
17
20
|
});
|
|
18
21
|
}
|
|
19
22
|
catch {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gitignore.js","sourceRoot":"","sources":["../../src/lib/gitignore.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAEhC,MAAM,WAAW,GAAG,
|
|
1
|
+
{"version":3,"file":"gitignore.js","sourceRoot":"","sources":["../../src/lib/gitignore.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAEhC,MAAM,WAAW,GAAG,SAAS,CAAC;AAC9B,MAAM,cAAc,GAAG,yCAAyC,CAAC;AAEjE,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,MAAc,OAAO,CAAC,GAAG,EAAE;IAE3B,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IAE9C,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC/B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAElC,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,OAAO,CACL,OAAO,KAAK,WAAW;gBACvB,OAAO,KAAK,QAAQ;gBACpB,OAAO,KAAK,QAAQ;gBACpB,OAAO,KAAK,OAAO,CACpB,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,MAAc,OAAO,CAAC,GAAG,EAAE;IAE3B,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IAE9C,IAAI,MAAM,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,IAAI,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC9B,OAAO,GAAG,MAAM,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QACjD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAClD,OAAO,IAAI,IAAI,CAAC;QAClB,CAAC;QACD,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,OAAO,IAAI,IAAI,CAAC;QAClB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,GAAG,cAAc,KAAK,WAAW,IAAI,CAAC;IAEjD,MAAM,SAAS,CAAC,aAAa,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IACjD,OAAO,IAAI,CAAC;AACd,CAAC"}
|
package/dist/web.d.ts
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { type FetchMarkdownOptions, type MarkdownResponse } from "./lib/cloudflare.js";
|
|
2
|
+
export type { FetchMarkdownOptions, MarkdownResponse };
|
|
3
|
+
export declare function fetchMarkdown(url: string, options?: FetchMarkdownOptions): Promise<MarkdownResponse>;
|
|
4
|
+
export declare function fetchRawMarkdown(url: string, options?: FetchMarkdownOptions): Promise<string>;
|
|
5
|
+
//# sourceMappingURL=web.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"web.d.ts","sourceRoot":"","sources":["../src/web.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACtB,MAAM,qBAAqB,CAAC;AAG7B,YAAY,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,CAAC;AAEvD,wBAAsB,aAAa,CACjC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,oBAAyB,GACjC,OAAO,CAAC,gBAAgB,CAAC,CAG3B;AAED,wBAAsB,gBAAgB,CACpC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,oBAAyB,GACjC,OAAO,CAAC,MAAM,CAAC,CAGjB"}
|
package/dist/web.js
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { fetchMarkdownPage, } from "./lib/cloudflare.js";
|
|
2
|
+
import { normalizeUrl } from "./lib/url.js";
|
|
3
|
+
export async function fetchMarkdown(url, options = {}) {
|
|
4
|
+
const normalizedUrl = normalizeUrl(url);
|
|
5
|
+
return fetchMarkdownPage(normalizedUrl, options);
|
|
6
|
+
}
|
|
7
|
+
export async function fetchRawMarkdown(url, options = {}) {
|
|
8
|
+
const response = await fetchMarkdown(url, options);
|
|
9
|
+
return response.markdown;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=web.js.map
|
package/dist/web.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"web.js","sourceRoot":"","sources":["../src/web.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,iBAAiB,GAGlB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAI5C,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAW,EACX,UAAgC,EAAE;IAElC,MAAM,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IACxC,OAAO,iBAAiB,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;AACnD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,GAAW,EACX,UAAgC,EAAE;IAElC,MAAM,QAAQ,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACnD,OAAO,QAAQ,CAAC,QAAQ,CAAC;AAC3B,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,8 +1,24 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdrip",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "Fetch markdown snapshots of web pages using Cloudflare Markdown for Agents",
|
|
5
5
|
"type": "module",
|
|
6
|
+
"main": "./dist/web.js",
|
|
7
|
+
"types": "./dist/web.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/web.d.ts",
|
|
11
|
+
"import": "./dist/web.js"
|
|
12
|
+
},
|
|
13
|
+
"./node": {
|
|
14
|
+
"types": "./dist/api.d.ts",
|
|
15
|
+
"import": "./dist/api.js"
|
|
16
|
+
},
|
|
17
|
+
"./cli": {
|
|
18
|
+
"types": "./dist/index.d.ts",
|
|
19
|
+
"import": "./dist/index.js"
|
|
20
|
+
}
|
|
21
|
+
},
|
|
6
22
|
"bin": {
|
|
7
23
|
"mdrip": "./dist/index.js"
|
|
8
24
|
},
|
|
@@ -22,6 +38,7 @@
|
|
|
22
38
|
"build": "tsc",
|
|
23
39
|
"dev": "tsc --watch",
|
|
24
40
|
"start": "node dist/index.js",
|
|
41
|
+
"benchmark": "node scripts/benchmark.mjs",
|
|
25
42
|
"test": "vitest run",
|
|
26
43
|
"test:watch": "vitest",
|
|
27
44
|
"test:coverage": "vitest run --coverage",
|