mdrip 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +137 -0
- package/dist/commands/clean.d.ts +6 -0
- package/dist/commands/clean.d.ts.map +1 -0
- package/dist/commands/clean.js +49 -0
- package/dist/commands/clean.js.map +1 -0
- package/dist/commands/fetch.d.ts +9 -0
- package/dist/commands/fetch.d.ts.map +1 -0
- package/dist/commands/fetch.js +163 -0
- package/dist/commands/fetch.js.map +1 -0
- package/dist/commands/list.d.ts +6 -0
- package/dist/commands/list.d.ts.map +1 -0
- package/dist/commands/list.js +55 -0
- package/dist/commands/list.js.map +1 -0
- package/dist/commands/remove.d.ts +5 -0
- package/dist/commands/remove.d.ts.map +1 -0
- package/dist/commands/remove.js +53 -0
- package/dist/commands/remove.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +74 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/agents.d.ts +10 -0
- package/dist/lib/agents.d.ts.map +1 -0
- package/dist/lib/agents.js +129 -0
- package/dist/lib/agents.js.map +1 -0
- package/dist/lib/cloudflare.d.ts +17 -0
- package/dist/lib/cloudflare.d.ts.map +1 -0
- package/dist/lib/cloudflare.js +74 -0
- package/dist/lib/cloudflare.js.map +1 -0
- package/dist/lib/cloudflare.test.d.ts +2 -0
- package/dist/lib/cloudflare.test.d.ts.map +1 -0
- package/dist/lib/cloudflare.test.js +59 -0
- package/dist/lib/cloudflare.test.js.map +1 -0
- package/dist/lib/gitignore.d.ts +3 -0
- package/dist/lib/gitignore.d.ts.map +1 -0
- package/dist/lib/gitignore.js +42 -0
- package/dist/lib/gitignore.js.map +1 -0
- package/dist/lib/html-to-markdown.d.ts +3 -0
- package/dist/lib/html-to-markdown.d.ts.map +1 -0
- package/dist/lib/html-to-markdown.js +312 -0
- package/dist/lib/html-to-markdown.js.map +1 -0
- package/dist/lib/html-to-markdown.test.d.ts +2 -0
- package/dist/lib/html-to-markdown.test.d.ts.map +1 -0
- package/dist/lib/html-to-markdown.test.js +54 -0
- package/dist/lib/html-to-markdown.test.js.map +1 -0
- package/dist/lib/prompt.d.ts +5 -0
- package/dist/lib/prompt.d.ts.map +1 -0
- package/dist/lib/prompt.js +47 -0
- package/dist/lib/prompt.js.map +1 -0
- package/dist/lib/settings.d.ts +9 -0
- package/dist/lib/settings.d.ts.map +1 -0
- package/dist/lib/settings.js +42 -0
- package/dist/lib/settings.js.map +1 -0
- package/dist/lib/storage.d.ts +11 -0
- package/dist/lib/storage.d.ts.map +1 -0
- package/dist/lib/storage.js +87 -0
- package/dist/lib/storage.js.map +1 -0
- package/dist/lib/tsconfig.d.ts +4 -0
- package/dist/lib/tsconfig.d.ts.map +1 -0
- package/dist/lib/tsconfig.js +49 -0
- package/dist/lib/tsconfig.js.map +1 -0
- package/dist/lib/url.d.ts +5 -0
- package/dist/lib/url.d.ts.map +1 -0
- package/dist/lib/url.js +73 -0
- package/dist/lib/url.js.map +1 -0
- package/dist/lib/url.test.d.ts +2 -0
- package/dist/lib/url.test.d.ts.map +1 -0
- package/dist/lib/url.test.js +41 -0
- package/dist/lib/url.test.js.map +1 -0
- package/dist/types.d.ts +28 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +58 -0
package/README.md
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# mdrip
|
|
2
|
+
|
|
3
|
+
Fetch markdown snapshots of web pages using Cloudflare's Markdown for Agents feature, so coding agents can consume clean structured content instead of HTML.
|
|
4
|
+
|
|
5
|
+
## AI Skills
|
|
6
|
+
|
|
7
|
+
This repo also includes an AI-consumable skills catalog in `skills/`, following the [agentskills](https://agentskills.io) format.
|
|
8
|
+
|
|
9
|
+
- Skill index: `skills/README.md`
|
|
10
|
+
- mdrip skill: `skills/mdrip/SKILL.md`
|
|
11
|
+
|
|
12
|
+
## Why
|
|
13
|
+
|
|
14
|
+
For agent workflows, markdown is often better than HTML:
|
|
15
|
+
- cleaner structure
|
|
16
|
+
- lower token overhead
|
|
17
|
+
- easier chunking and context management
|
|
18
|
+
|
|
19
|
+
`mdrip` requests pages with `Accept: text/markdown`, stores the markdown locally, and tracks fetched pages in an index.
|
|
20
|
+
|
|
21
|
+
If a site does not return `text/markdown`, `mdrip` can automatically fall back to converting `text/html` into markdown.
|
|
22
|
+
The fallback uses an in-project converter optimized for common documentation/blog content (headings, links, lists, code blocks, tables, blockquotes).
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
npm install -g mdrip
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Or use with `npx`:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
npx mdrip <url>
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Usage
|
|
37
|
+
|
|
38
|
+
### Fetch pages
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Fetch one page
|
|
42
|
+
mdrip https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/
|
|
43
|
+
|
|
44
|
+
# Fetch multiple pages
|
|
45
|
+
mdrip https://blog.cloudflare.com/markdown-for-agents/ https://developers.cloudflare.com/
|
|
46
|
+
|
|
47
|
+
# Optional timeout override (ms)
|
|
48
|
+
mdrip https://example.com --timeout 45000
|
|
49
|
+
|
|
50
|
+
# Disable HTML fallback (strict Cloudflare markdown only)
|
|
51
|
+
mdrip https://example.com --no-html-fallback
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### List fetched pages
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
mdrip list
|
|
58
|
+
mdrip list --json
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Remove pages
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
mdrip remove https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Clean snapshots
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
# Remove all
|
|
71
|
+
mdrip clean
|
|
72
|
+
|
|
73
|
+
# Remove only one domain
|
|
74
|
+
mdrip clean --domain developers.cloudflare.com
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## File modifications
|
|
78
|
+
|
|
79
|
+
On first run, mdrip can optionally update:
|
|
80
|
+
- `.gitignore` (adds `mdrip/`)
|
|
81
|
+
- `tsconfig.json` (excludes `mdrip`)
|
|
82
|
+
- `AGENTS.md` (adds a section pointing agents to snapshots)
|
|
83
|
+
|
|
84
|
+
Choice is stored in `mdrip/settings.json`.
|
|
85
|
+
|
|
86
|
+
Use flags to skip prompt:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
# allow updates
|
|
90
|
+
mdrip https://example.com --modify
|
|
91
|
+
|
|
92
|
+
# deny updates
|
|
93
|
+
mdrip https://example.com --modify=false
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Output
|
|
97
|
+
|
|
98
|
+
```text
|
|
99
|
+
mdrip/
|
|
100
|
+
├── settings.json
|
|
101
|
+
├── sources.json
|
|
102
|
+
└── pages/
|
|
103
|
+
└── developers.cloudflare.com/
|
|
104
|
+
└── fundamentals/
|
|
105
|
+
└── reference/
|
|
106
|
+
└── markdown-for-agents/
|
|
107
|
+
└── index.md
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Requirements and notes
|
|
111
|
+
|
|
112
|
+
- Node.js 18+
|
|
113
|
+
- The target site must return markdown for `Accept: text/markdown` (Cloudflare Markdown for Agents enabled).
|
|
114
|
+
- If a page does not return `text/markdown`, mdrip can convert `text/html` into markdown fallback unless `--no-html-fallback` is used.
|
|
115
|
+
|
|
116
|
+
## Publishing to npm
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# optional package check
|
|
120
|
+
pnpm publish:dry-run
|
|
121
|
+
|
|
122
|
+
# publish to npm
|
|
123
|
+
pnpm publish:npm
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
`prepublishOnly` runs automatically before publish and executes:
|
|
127
|
+
- `pnpm type-check`
|
|
128
|
+
- `pnpm test`
|
|
129
|
+
- `pnpm build`
|
|
130
|
+
|
|
131
|
+
## Author
|
|
132
|
+
|
|
133
|
+
Charl Kruger
|
|
134
|
+
|
|
135
|
+
## License
|
|
136
|
+
|
|
137
|
+
Apache-2.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"clean.d.ts","sourceRoot":"","sources":["../../src/commands/clean.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,YAAY;IAC3B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,YAAY,CAAC,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,IAAI,CAAC,CAsD5E"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { listSources, removeStoredPage, cleanupEmptyPageDirs } from "../lib/storage.js";
|
|
2
|
+
import { updateAgentsMd, updatePageIndex } from "../lib/agents.js";
|
|
3
|
+
import { getFileModificationPermission } from "../lib/settings.js";
|
|
4
|
+
import { getUrlHost } from "../lib/url.js";
|
|
5
|
+
export async function cleanCommand(options = {}) {
|
|
6
|
+
const cwd = options.cwd || process.cwd();
|
|
7
|
+
const sources = await listSources(cwd);
|
|
8
|
+
if (sources.pages.length === 0) {
|
|
9
|
+
console.log("No pages to clean.");
|
|
10
|
+
return;
|
|
11
|
+
}
|
|
12
|
+
const targetDomain = options.domain?.trim().toLowerCase();
|
|
13
|
+
const pagesToRemove = targetDomain
|
|
14
|
+
? sources.pages.filter((page) => getUrlHost(page.resolvedUrl || page.url) === targetDomain)
|
|
15
|
+
: sources.pages;
|
|
16
|
+
if (pagesToRemove.length === 0) {
|
|
17
|
+
console.log(`No pages found for domain: ${targetDomain}`);
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
const removalSet = new Set(pagesToRemove.map((page) => page.url));
|
|
21
|
+
for (const page of pagesToRemove) {
|
|
22
|
+
await removeStoredPage(page.path, cwd);
|
|
23
|
+
}
|
|
24
|
+
await cleanupEmptyPageDirs(cwd);
|
|
25
|
+
const remainingPages = sources.pages.filter((page) => !removalSet.has(page.url));
|
|
26
|
+
const canModifyFiles = await getFileModificationPermission(cwd);
|
|
27
|
+
if (canModifyFiles) {
|
|
28
|
+
const agentsUpdated = await updateAgentsMd({ pages: remainingPages }, cwd);
|
|
29
|
+
if (agentsUpdated) {
|
|
30
|
+
if (remainingPages.length === 0) {
|
|
31
|
+
console.log("✓ Removed mdrip section from AGENTS.md");
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
console.log("✓ Updated AGENTS.md");
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
await updatePageIndex({ pages: remainingPages }, cwd);
|
|
40
|
+
}
|
|
41
|
+
if (targetDomain) {
|
|
42
|
+
console.log(`✓ Removed ${pagesToRemove.length} page(s) for ${targetDomain}`);
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
console.log(`✓ Removed ${pagesToRemove.length} page(s)`);
|
|
46
|
+
}
|
|
47
|
+
console.log(`\nCleaned ${pagesToRemove.length} page(s)`);
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=clean.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"clean.js","sourceRoot":"","sources":["../../src/commands/clean.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACxF,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnE,OAAO,EAAE,6BAA6B,EAAE,MAAM,oBAAoB,CAAC;AACnE,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAQ3C,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,UAAwB,EAAE;IAC3D,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACzC,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,CAAC;IAEvC,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO;IACT,CAAC;IAED,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAE1D,MAAM,aAAa,GAAG,YAAY;QAChC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,YAAY,CAAC;QAC3F,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC;IAElB,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,8BAA8B,YAAY,EAAE,CAAC,CAAC;QAC1D,OAAO;IACT,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAElE,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;QACjC,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACzC,CAAC;IAED,MAAM,oBAAoB,CAAC,GAAG,CAAC,CAAC;IAEhC,MAAM,cAAc,GAAgB,OAAO,CAAC,KAAK,CAAC,MAAM,CACtD,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CACpC,CAAC;IAEF,MAAM,cAAc,GAAG,MAAM,6BAA6B,CAAC,GAAG,CAAC,CAAC;IAEhE,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,aAAa,GAAG,MAAM,cAAc,CAAC,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE,GAAG,CAAC,CAAC;QAC3E,IAAI,aAAa,EAAE,CAAC;YAClB,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAChC,OAAO,CAAC,GAAG,CAAC,wCAAwC,CAAC,CAAC;YACxD,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,eAAe,CAAC,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE,GAAG,CAAC,CAAC;IACxD,CAAC;IAED,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,CAAC,GAAG,CAAC,aAAa,aAAa,CAAC,MAAM,gBAAgB,YAAY,EAAE,CAAC,CAAC;IAC/E,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,aAAa,aAAa,CAAC,MAAM,UAAU,CAAC,CAAC;IAC3D,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,aAAa,aAAa,CAAC,MAAM,UAAU,CAAC,CAAC;AAC3D,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { FetchResult } from "../types.js";
|
|
2
|
+
export interface FetchOptions {
|
|
3
|
+
cwd?: string;
|
|
4
|
+
allowModifications?: boolean;
|
|
5
|
+
timeoutMs?: number;
|
|
6
|
+
htmlFallback?: boolean;
|
|
7
|
+
}
|
|
8
|
+
export declare function fetchCommand(urls: string[], options?: FetchOptions): Promise<FetchResult[]>;
|
|
9
|
+
//# sourceMappingURL=fetch.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch.d.ts","sourceRoot":"","sources":["../../src/commands/fetch.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,WAAW,EAAa,MAAM,aAAa,CAAC;AAE1D,MAAM,WAAW,YAAY;IAC3B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB;AAmJD,wBAAsB,YAAY,CAChC,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,WAAW,EAAE,CAAC,CAmDxB"}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { fetchMarkdownPage } from "../lib/cloudflare.js";
|
|
2
|
+
import { ensureGitignore } from "../lib/gitignore.js";
|
|
3
|
+
import { ensureTsconfigExclude } from "../lib/tsconfig.js";
|
|
4
|
+
import { updateAgentsMd, updatePageIndex, } from "../lib/agents.js";
|
|
5
|
+
import { getFileModificationPermission, setFileModificationPermission } from "../lib/settings.js";
|
|
6
|
+
import { confirm } from "../lib/prompt.js";
|
|
7
|
+
import { listSources, getPageInfo, savePageMarkdown } from "../lib/storage.js";
|
|
8
|
+
import { normalizeUrl } from "../lib/url.js";
|
|
9
|
+
async function checkFileModificationPermission(cwd, cliOverride) {
|
|
10
|
+
if (cliOverride !== undefined) {
|
|
11
|
+
await setFileModificationPermission(cliOverride, cwd);
|
|
12
|
+
if (cliOverride) {
|
|
13
|
+
console.log("✓ File modifications enabled (--modify)");
|
|
14
|
+
}
|
|
15
|
+
else {
|
|
16
|
+
console.log("✗ File modifications disabled (--modify=false)");
|
|
17
|
+
}
|
|
18
|
+
return cliOverride;
|
|
19
|
+
}
|
|
20
|
+
const storedPermission = await getFileModificationPermission(cwd);
|
|
21
|
+
if (storedPermission !== undefined) {
|
|
22
|
+
return storedPermission;
|
|
23
|
+
}
|
|
24
|
+
console.log("\nmdrip can update the following files for better integration:");
|
|
25
|
+
console.log(" • .gitignore - add mdrip/ to ignore list");
|
|
26
|
+
console.log(" • tsconfig.json - exclude mdrip/ from compilation");
|
|
27
|
+
console.log(" • AGENTS.md - add markdown snapshot reference section\n");
|
|
28
|
+
const allowed = await confirm("Allow mdrip to modify these files?");
|
|
29
|
+
await setFileModificationPermission(allowed, cwd);
|
|
30
|
+
if (allowed) {
|
|
31
|
+
console.log("✓ Permission granted - saved to mdrip/settings.json\n");
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
console.log("✗ Permission denied - saved to mdrip/settings.json\n");
|
|
35
|
+
}
|
|
36
|
+
return allowed;
|
|
37
|
+
}
|
|
38
|
+
async function fetchUrlInput(spec, cwd, timeoutMs, htmlFallback) {
|
|
39
|
+
let normalizedUrl;
|
|
40
|
+
try {
|
|
41
|
+
normalizedUrl = normalizeUrl(spec);
|
|
42
|
+
}
|
|
43
|
+
catch (err) {
|
|
44
|
+
return {
|
|
45
|
+
url: spec,
|
|
46
|
+
path: "",
|
|
47
|
+
success: false,
|
|
48
|
+
error: err instanceof Error ? err.message : String(err),
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
console.log(`\nFetching ${normalizedUrl}...`);
|
|
52
|
+
try {
|
|
53
|
+
const existing = await getPageInfo(normalizedUrl, cwd);
|
|
54
|
+
if (existing) {
|
|
55
|
+
console.log(` → Updating existing snapshot at mdrip/${existing.path}`);
|
|
56
|
+
}
|
|
57
|
+
const response = await fetchMarkdownPage(normalizedUrl, {
|
|
58
|
+
timeoutMs,
|
|
59
|
+
htmlFallback,
|
|
60
|
+
});
|
|
61
|
+
const storageUrl = normalizeUrl(response.resolvedUrl || normalizedUrl);
|
|
62
|
+
const outputPath = await savePageMarkdown(storageUrl, response.markdown, cwd);
|
|
63
|
+
console.log(` ✓ Saved to mdrip/${outputPath}`);
|
|
64
|
+
if (response.markdownTokens !== undefined) {
|
|
65
|
+
console.log(` → x-markdown-tokens: ${response.markdownTokens}`);
|
|
66
|
+
}
|
|
67
|
+
if (response.source === "html-fallback") {
|
|
68
|
+
console.log(" → Converted from HTML (fallback mode)");
|
|
69
|
+
}
|
|
70
|
+
if (response.contentSignal) {
|
|
71
|
+
console.log(` → Content-Signal: ${response.contentSignal}`);
|
|
72
|
+
}
|
|
73
|
+
if (response.resolvedUrl && response.resolvedUrl !== normalizedUrl) {
|
|
74
|
+
console.log(` → Resolved URL: ${response.resolvedUrl}`);
|
|
75
|
+
}
|
|
76
|
+
return {
|
|
77
|
+
url: normalizedUrl,
|
|
78
|
+
resolvedUrl: response.resolvedUrl,
|
|
79
|
+
path: outputPath,
|
|
80
|
+
success: true,
|
|
81
|
+
status: response.status,
|
|
82
|
+
contentType: response.contentType,
|
|
83
|
+
markdownTokens: response.markdownTokens,
|
|
84
|
+
contentSignal: response.contentSignal,
|
|
85
|
+
source: response.source,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
catch (err) {
|
|
89
|
+
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
90
|
+
console.log(` ✗ Failed: ${errorMessage}`);
|
|
91
|
+
return {
|
|
92
|
+
url: normalizedUrl,
|
|
93
|
+
path: "",
|
|
94
|
+
success: false,
|
|
95
|
+
error: errorMessage,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
function mergeResults(existing, results) {
|
|
100
|
+
const now = new Date().toISOString();
|
|
101
|
+
const merged = [...existing];
|
|
102
|
+
for (const result of results) {
|
|
103
|
+
if (!result.success) {
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
const entry = {
|
|
107
|
+
url: result.url,
|
|
108
|
+
resolvedUrl: result.resolvedUrl,
|
|
109
|
+
path: result.path,
|
|
110
|
+
fetchedAt: now,
|
|
111
|
+
status: result.status || 200,
|
|
112
|
+
contentType: result.contentType || "text/markdown",
|
|
113
|
+
markdownTokens: result.markdownTokens,
|
|
114
|
+
contentSignal: result.contentSignal,
|
|
115
|
+
source: result.source,
|
|
116
|
+
};
|
|
117
|
+
const index = merged.findIndex((page) => page.url === result.url);
|
|
118
|
+
if (index >= 0) {
|
|
119
|
+
merged[index] = entry;
|
|
120
|
+
}
|
|
121
|
+
else {
|
|
122
|
+
merged.push(entry);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
return merged;
|
|
126
|
+
}
|
|
127
|
+
export async function fetchCommand(urls, options = {}) {
|
|
128
|
+
const cwd = options.cwd || process.cwd();
|
|
129
|
+
const results = [];
|
|
130
|
+
const canModifyFiles = await checkFileModificationPermission(cwd, options.allowModifications);
|
|
131
|
+
if (canModifyFiles) {
|
|
132
|
+
const gitignoreUpdated = await ensureGitignore(cwd);
|
|
133
|
+
if (gitignoreUpdated) {
|
|
134
|
+
console.log("✓ Added mdrip/ to .gitignore");
|
|
135
|
+
}
|
|
136
|
+
const tsconfigUpdated = await ensureTsconfigExclude(cwd);
|
|
137
|
+
if (tsconfigUpdated) {
|
|
138
|
+
console.log("✓ Added mdrip/ to tsconfig.json exclude");
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
for (const spec of urls) {
|
|
142
|
+
const result = await fetchUrlInput(spec, cwd, options.timeoutMs, options.htmlFallback);
|
|
143
|
+
results.push(result);
|
|
144
|
+
}
|
|
145
|
+
const successful = results.filter((result) => result.success).length;
|
|
146
|
+
const failed = results.length - successful;
|
|
147
|
+
console.log(`\nDone: ${successful} succeeded, ${failed} failed`);
|
|
148
|
+
if (successful > 0) {
|
|
149
|
+
const existing = await listSources(cwd);
|
|
150
|
+
const pages = mergeResults(existing.pages, results);
|
|
151
|
+
if (canModifyFiles) {
|
|
152
|
+
const agentsUpdated = await updateAgentsMd({ pages }, cwd);
|
|
153
|
+
if (agentsUpdated) {
|
|
154
|
+
console.log("✓ Updated AGENTS.md");
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
await updatePageIndex({ pages }, cwd);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return results;
|
|
162
|
+
}
|
|
163
|
+
//# sourceMappingURL=fetch.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch.js","sourceRoot":"","sources":["../../src/commands/fetch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAC3D,OAAO,EACL,cAAc,EACd,eAAe,GAChB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,6BAA6B,EAAE,6BAA6B,EAAE,MAAM,oBAAoB,CAAC;AAClG,OAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAC3C,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC/E,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAU7C,KAAK,UAAU,+BAA+B,CAC5C,GAAW,EACX,WAAqB;IAErB,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;QAC9B,MAAM,6BAA6B,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;QACtD,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;QACzD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,gDAAgD,CAAC,CAAC;QAChE,CAAC;QACD,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,MAAM,gBAAgB,GAAG,MAAM,6BAA6B,CAAC,GAAG,CAAC,CAAC;IAClE,IAAI,gBAAgB,KAAK,SAAS,EAAE,CAAC;QACnC,OAAO,gBAAgB,CAAC;IAC1B,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,gEAAgE,CAAC,CAAC;IAC9E,OAAO,CAAC,GAAG,CAAC,4CAA4C,CAAC,CAAC;IAC1D,OAAO,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;IACnE,OAAO,CAAC,GAAG,CAAC,2DAA2D,CAAC,CAAC;IAEzE,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,oCAAoC,CAAC,CAAC;IAEpE,MAAM,6BAA6B,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IAElD,IAAI,OAAO,EAAE,CAAC;QACZ,OAAO,CAAC,GAAG,CAAC,uDAAuD,CAAC,CAAC;IACvE,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,sDAAsD,CAAC,CAAC;IACtE,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,IAAY,EACZ,GAAW,EACX,SAAkB,EAClB,YAAsB;IAEtB,IAAI,aAAqB,CAAC;IAE1B,IAAI,CAAC;QACH,aAAa,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IACrC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,GAAG,EAAE,IAAI;YACT,IAAI,EAAE,EAAE;YACR,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;SACxD,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,cAAc,aAAa,KAAK,CAAC,CAAC;IAE9C,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;QACvD,IAAI,QAAQ,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,2CAA2C,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;QAC1E,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,iBAAiB,CAAC,aAAa,EAAE;YACtD,SAAS;YACT,YAAY;SACb,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,YAAY,CAAC,QAAQ,CAAC,WAAW,IAAI,aAAa,CAAC,CAAC;QACvE,MAAM,UAAU,GAAG,MAAM,gBAAgB,CAAC,UAAU,EAAE,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAE9E,OAAO,CAAC,GAAG,CAAC,sBAAsB,UAAU,EAAE,CAAC,CAAC;QAEhD,IAAI,QAAQ,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;YAC1C,OAAO,CAAC,GAAG,CAAC,0BAA0B,QAAQ,CAAC,cAAc,EAAE,CAAC,CAAC;QACnE,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,eAAe,EAAE,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;QACzD,CAAC;QAED,IAAI,QAAQ,CAAC,aAAa,EAAE,CAAC;YAC3B,OAAO,CAAC,GAAG,CAAC,uBAAuB,QAAQ,CAAC,aAAa,EAAE,CAAC,CAAC;QAC/D,CAAC;QAED,IAAI,QAAQ,CAAC,WAAW,IAAI,QAAQ,CAAC,WAAW,KAAK,aAAa,EAAE,CAAC;YACnE,OAAO,CAAC,GAAG,CAAC,qBAAqB,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,OAAO;YACL,GAAG,EAAE,aAAa;YAClB,WAAW,EAAE,QAAQ,CAAC,WAAW;YACjC,IAAI,EAAE,UAAU;YAChB,OAAO,EAAE,IAAI;YACb,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,WAAW,EAAE,QAAQ,CAAC,WAAW;YACjC,cAAc,EAAE,QAAQ,CAAC,cAAc;YACvC,aAAa,EAAE,QAAQ,CAAC,aAAa;YACrC,MAAM,EAAE,QAAQ,CAAC,MAAM;SACxB,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,YAAY,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACtE,OAAO,CAAC,GAAG,CAAC,eAAe,YAAY,EAAE,CAAC,CAAC;QAC3C,OAAO;YACL,GAAG,EAAE,aAAa;YAClB,IAAI,EAAE,EAAE;YACR,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,YAAY;SACpB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,YAAY,CAAC,QAAqB,EAAE,OAAsB;IACjE,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;IAE7B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,SAAS;QACX,CAAC;QAED,MAAM,KAAK,GAAc;YACvB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,SAAS,EAAE,GAAG;YACd,MAAM,EAAE,MAAM,CAAC,MAAM,IAAI,GAAG;YAC5B,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,eAAe;YAClD,cAAc,EAAE,MAAM,CAAC,cAAc;YACrC,aAAa,EAAE,MAAM,CAAC,aAAa;YACnC,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,KAAK,MAAM,CAAC,GAAG,CAAC,CAAC;QAClE,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,IAAc,EACd,UAAwB,EAAE;IAE1B,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACzC,MAAM,OAAO,GAAkB,EAAE,CAAC;IAElC,MAAM,cAAc,GAAG,MAAM,+BAA+B,CAC1D,GAAG,EACH,OAAO,CAAC,kBAAkB,CAC3B,CAAC;IAEF,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAAC,GAAG,CAAC,CAAC;QACpD,IAAI,gBAAgB,EAAE,CAAC;YACrB,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;QAC9C,CAAC;QAED,MAAM,eAAe,GAAG,MAAM,qBAAqB,CAAC,GAAG,CAAC,CAAC;QACzD,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,MAAM,MAAM,GAAG,MAAM,aAAa,CAChC,IAAI,EACJ,GAAG,EACH,OAAO,CAAC,SAAS,EACjB,OAAO,CAAC,YAAY,CACrB,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IACrE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,UAAU,CAAC;IAE3C,OAAO,CAAC,GAAG,CAAC,WAAW,UAAU,eAAe,MAAM,SAAS,CAAC,CAAC;IAEjE,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;QACnB,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,KAAK,GAAG,YAAY,CAAC,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAEpD,IAAI,cAAc,EAAE,CAAC;YACnB,MAAM,aAAa,GAAG,MAAM,cAAc,CAAC,EAAE,KAAK,EAAE,EAAE,GAAG,CAAC,CAAC;YAC3D,IAAI,aAAa,EAAE,CAAC;gBAClB,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,eAAe,CAAC,EAAE,KAAK,EAAE,EAAE,GAAG,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"list.d.ts","sourceRoot":"","sources":["../../src/commands/list.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,WAAW;IAC1B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,OAAO,CAAC;CAChB;AAED,wBAAsB,WAAW,CAAC,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,IAAI,CAAC,CAiE1E"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { listSources } from "../lib/storage.js";
|
|
2
|
+
import { getUrlHost } from "../lib/url.js";
|
|
3
|
+
export async function listCommand(options = {}) {
|
|
4
|
+
const cwd = options.cwd || process.cwd();
|
|
5
|
+
const sources = await listSources(cwd);
|
|
6
|
+
if (sources.pages.length === 0) {
|
|
7
|
+
console.log("No markdown pages fetched yet.");
|
|
8
|
+
console.log("\nUse `mdrip <url>` to fetch markdown for a page.");
|
|
9
|
+
return;
|
|
10
|
+
}
|
|
11
|
+
if (options.json) {
|
|
12
|
+
console.log(JSON.stringify(sources, null, 2));
|
|
13
|
+
return;
|
|
14
|
+
}
|
|
15
|
+
const byHost = new Map();
|
|
16
|
+
for (const page of sources.pages) {
|
|
17
|
+
const host = getUrlHost(page.resolvedUrl || page.url);
|
|
18
|
+
const list = byHost.get(host) || [];
|
|
19
|
+
list.push(page);
|
|
20
|
+
byHost.set(host, list);
|
|
21
|
+
}
|
|
22
|
+
const hosts = [...byHost.keys()].sort((a, b) => a.localeCompare(b));
|
|
23
|
+
for (const host of hosts) {
|
|
24
|
+
console.log(`${host}:\n`);
|
|
25
|
+
const pages = byHost
|
|
26
|
+
.get(host)
|
|
27
|
+
.slice()
|
|
28
|
+
.sort((a, b) => b.fetchedAt.localeCompare(a.fetchedAt));
|
|
29
|
+
for (const page of pages) {
|
|
30
|
+
const date = new Date(page.fetchedAt);
|
|
31
|
+
const formattedDate = date.toLocaleDateString("en-US", {
|
|
32
|
+
year: "numeric",
|
|
33
|
+
month: "short",
|
|
34
|
+
day: "numeric",
|
|
35
|
+
});
|
|
36
|
+
console.log(` ${page.url}`);
|
|
37
|
+
console.log(` Path: mdrip/${page.path}`);
|
|
38
|
+
console.log(` Fetched: ${formattedDate}`);
|
|
39
|
+
if (page.source) {
|
|
40
|
+
const sourceLabel = page.source === "cloudflare-markdown"
|
|
41
|
+
? "Cloudflare Markdown for Agents"
|
|
42
|
+
: "HTML fallback conversion";
|
|
43
|
+
console.log(` Source: ${sourceLabel}`);
|
|
44
|
+
}
|
|
45
|
+
if (page.markdownTokens !== undefined) {
|
|
46
|
+
console.log(` Tokens: ${page.markdownTokens}`);
|
|
47
|
+
}
|
|
48
|
+
console.log("");
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
const totalTokens = sources.pages.reduce((acc, page) => acc + (page.markdownTokens || 0), 0);
|
|
52
|
+
const tokenSummary = totalTokens > 0 ? `, ~${totalTokens} tokens` : "";
|
|
53
|
+
console.log(`Total: ${sources.pages.length} page(s), ${hosts.length} domain(s)${tokenSummary}`);
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=list.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"list.js","sourceRoot":"","sources":["../../src/commands/list.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAO3C,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,UAAuB,EAAE;IACzD,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACzC,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,CAAC;IAEvC,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,mDAAmD,CAAC,CAAC;QACjE,OAAO;IACT,CAAC;IAED,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;QACjB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC9C,OAAO;IACT,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,EAAgC,CAAC;IAEvD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC;QACtD,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACpC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChB,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACzB,CAAC;IAED,MAAM,KAAK,GAAG,CAAC,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;IAEpE,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,OAAO,CAAC,GAAG,CAAC,GAAG,IAAI,KAAK,CAAC,CAAC;QAC1B,MAAM,KAAK,GAAG,MAAM;aACjB,GAAG,CAAC,IAAI,CAAE;aACV,KAAK,EAAE;aACP,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;QAE1D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACtC,MAAM,aAAa,GAAG,IAAI,CAAC,kBAAkB,CAAC,OAAO,EAAE;gBACrD,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE,OAAO;gBACd,GAAG,EAAE,SAAS;aACf,CAAC,CAAC;YAEH,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5C,OAAO,CAAC,GAAG,CAAC,gBAAgB,aAAa,EAAE,CAAC,CAAC;YAC7C,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAChB,MAAM,WAAW,GACf,IAAI,CAAC,MAAM,KAAK,qBAAqB;oBACnC,CAAC,CAAC,gCAAgC;oBAClC,CAAC,CAAC,0BAA0B,CAAC;gBACjC,OAAO,CAAC,GAAG,CAAC,eAAe,WAAW,EAAE,CAAC,CAAC;YAC5C,CAAC;YACD,IAAI,IAAI,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;gBACtC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,CAAC,cAAc,EAAE,CAAC,CAAC;YACpD,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CACtC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,CAAC,EAC/C,CAAC,CACF,CAAC;IAEF,MAAM,YAAY,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,WAAW,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;IACvE,OAAO,CAAC,GAAG,CAAC,UAAU,OAAO,CAAC,KAAK,CAAC,MAAM,aAAa,KAAK,CAAC,MAAM,aAAa,YAAY,EAAE,CAAC,CAAC;AAClG,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"remove.d.ts","sourceRoot":"","sources":["../../src/commands/remove.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,aAAa;IAC5B,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,aAAkB,GAC1B,OAAO,CAAC,IAAI,CAAC,CA2Df"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { listSources, removeStoredPage, cleanupEmptyPageDirs } from "../lib/storage.js";
|
|
2
|
+
import { normalizeUrl } from "../lib/url.js";
|
|
3
|
+
import { updateAgentsMd, updatePageIndex } from "../lib/agents.js";
|
|
4
|
+
import { getFileModificationPermission } from "../lib/settings.js";
|
|
5
|
+
export async function removeCommand(urls, options = {}) {
|
|
6
|
+
const cwd = options.cwd || process.cwd();
|
|
7
|
+
const sources = await listSources(cwd);
|
|
8
|
+
let pages = [...sources.pages];
|
|
9
|
+
let removed = 0;
|
|
10
|
+
let notFound = 0;
|
|
11
|
+
for (const spec of urls) {
|
|
12
|
+
let normalized;
|
|
13
|
+
try {
|
|
14
|
+
normalized = normalizeUrl(spec);
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
console.log(` ⚠ Invalid URL: ${spec}`);
|
|
18
|
+
notFound++;
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
const index = pages.findIndex((page) => page.url === normalized || page.resolvedUrl === normalized);
|
|
22
|
+
if (index === -1) {
|
|
23
|
+
console.log(` ⚠ ${normalized} not found`);
|
|
24
|
+
notFound++;
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
const entry = pages[index];
|
|
28
|
+
await removeStoredPage(entry.path, cwd);
|
|
29
|
+
pages = pages.filter((_, idx) => idx !== index);
|
|
30
|
+
console.log(` ✓ Removed ${normalized}`);
|
|
31
|
+
removed++;
|
|
32
|
+
}
|
|
33
|
+
if (removed > 0) {
|
|
34
|
+
await cleanupEmptyPageDirs(cwd);
|
|
35
|
+
const canModifyFiles = await getFileModificationPermission(cwd);
|
|
36
|
+
if (canModifyFiles) {
|
|
37
|
+
const agentsUpdated = await updateAgentsMd({ pages }, cwd);
|
|
38
|
+
if (agentsUpdated) {
|
|
39
|
+
if (pages.length === 0) {
|
|
40
|
+
console.log("✓ Removed mdrip section from AGENTS.md");
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
console.log("✓ Updated AGENTS.md");
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
else {
|
|
48
|
+
await updatePageIndex({ pages }, cwd);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
console.log(`\nRemoved ${removed} page(s)${notFound > 0 ? `, ${notFound} not found` : ""}`);
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=remove.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"remove.js","sourceRoot":"","sources":["../../src/commands/remove.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACxF,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnE,OAAO,EAAE,6BAA6B,EAAE,MAAM,oBAAoB,CAAC;AAOnE,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAAc,EACd,UAAyB,EAAE;IAE3B,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACzC,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,CAAC;IACvC,IAAI,KAAK,GAAgB,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC;IAE5C,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,UAAkB,CAAC;QAEvB,IAAI,CAAC;YACH,UAAU,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;QAClC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,EAAE,CAAC,CAAC;YACxC,QAAQ,EAAE,CAAC;YACX,SAAS;QACX,CAAC;QAED,MAAM,KAAK,GAAG,KAAK,CAAC,SAAS,CAC3B,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,KAAK,UAAU,IAAI,IAAI,CAAC,WAAW,KAAK,UAAU,CACrE,CAAC;QAEF,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,OAAO,UAAU,YAAY,CAAC,CAAC;YAC3C,QAAQ,EAAE,CAAC;YACX,SAAS;QACX,CAAC;QAED,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;QAC3B,MAAM,gBAAgB,CAAC,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACxC,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,KAAK,KAAK,CAAC,CAAC;QAEhD,OAAO,CAAC,GAAG,CAAC,eAAe,UAAU,EAAE,CAAC,CAAC;QACzC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QAChB,MAAM,oBAAoB,CAAC,GAAG,CAAC,CAAC;QAEhC,MAAM,cAAc,GAAG,MAAM,6BAA6B,CAAC,GAAG,CAAC,CAAC;QAEhE,IAAI,cAAc,EAAE,CAAC;YACnB,MAAM,aAAa,GAAG,MAAM,cAAc,CAAC,EAAE,KAAK,EAAE,EAAE,GAAG,CAAC,CAAC;YAC3D,IAAI,aAAa,EAAE,CAAC;gBAClB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACvB,OAAO,CAAC,GAAG,CAAC,wCAAwC,CAAC,CAAC;gBACxD,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;gBACrC,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,eAAe,CAAC,EAAE,KAAK,EAAE,EAAE,GAAG,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CACT,aAAa,OAAO,WAAW,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,QAAQ,YAAY,CAAC,CAAC,CAAC,EAAE,EAAE,CAC/E,CAAC;AACJ,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import { fetchCommand } from "./commands/fetch.js";
|
|
4
|
+
import { listCommand } from "./commands/list.js";
|
|
5
|
+
import { removeCommand } from "./commands/remove.js";
|
|
6
|
+
import { cleanCommand } from "./commands/clean.js";
|
|
7
|
+
const program = new Command();
|
|
8
|
+
program
|
|
9
|
+
.name("mdrip")
|
|
10
|
+
.description("Fetch markdown snapshots for URLs using Cloudflare Markdown for Agents")
|
|
11
|
+
.version("0.1.0")
|
|
12
|
+
.option("--cwd <path>", "working directory (default: current directory)");
|
|
13
|
+
program
|
|
14
|
+
.argument("[urls...]", "URLs to fetch as markdown")
|
|
15
|
+
.option("--modify [value]", "allow/deny modifying .gitignore, tsconfig.json, AGENTS.md", (val) => {
|
|
16
|
+
if (val === undefined || val === "" || val === "true")
|
|
17
|
+
return true;
|
|
18
|
+
if (val === "false")
|
|
19
|
+
return false;
|
|
20
|
+
return true;
|
|
21
|
+
})
|
|
22
|
+
.option("--timeout <ms>", "request timeout in milliseconds", (value) => {
|
|
23
|
+
const parsed = Number.parseInt(value, 10);
|
|
24
|
+
if (!Number.isFinite(parsed) || parsed <= 0) {
|
|
25
|
+
throw new Error("--timeout must be a positive integer");
|
|
26
|
+
}
|
|
27
|
+
return parsed;
|
|
28
|
+
})
|
|
29
|
+
.option("--no-html-fallback", "disable HTML->Markdown fallback when text/markdown is unavailable")
|
|
30
|
+
.action(async (urls, options, command) => {
|
|
31
|
+
const globalOptions = command.optsWithGlobals();
|
|
32
|
+
if (urls.length === 0) {
|
|
33
|
+
program.help();
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
await fetchCommand(urls, {
|
|
37
|
+
cwd: globalOptions.cwd,
|
|
38
|
+
allowModifications: options.modify,
|
|
39
|
+
timeoutMs: options.timeout,
|
|
40
|
+
htmlFallback: options.htmlFallback,
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
program
|
|
44
|
+
.command("list")
|
|
45
|
+
.description("List all fetched markdown pages")
|
|
46
|
+
.option("--json", "output as JSON")
|
|
47
|
+
.action(async (options, command) => {
|
|
48
|
+
const globalOptions = command.optsWithGlobals();
|
|
49
|
+
await listCommand({
|
|
50
|
+
json: options.json,
|
|
51
|
+
cwd: globalOptions.cwd,
|
|
52
|
+
});
|
|
53
|
+
});
|
|
54
|
+
program
|
|
55
|
+
.command("remove <urls...>")
|
|
56
|
+
.alias("rm")
|
|
57
|
+
.description("Remove fetched markdown snapshots for one or more URLs")
|
|
58
|
+
.action(async (urls, _options, command) => {
|
|
59
|
+
const globalOptions = command.optsWithGlobals();
|
|
60
|
+
await removeCommand(urls, { cwd: globalOptions.cwd });
|
|
61
|
+
});
|
|
62
|
+
program
|
|
63
|
+
.command("clean")
|
|
64
|
+
.description("Remove all fetched markdown snapshots (or only one domain)")
|
|
65
|
+
.option("--domain <host>", "only clean snapshots for a specific domain")
|
|
66
|
+
.action(async (options, command) => {
|
|
67
|
+
const globalOptions = command.optsWithGlobals();
|
|
68
|
+
await cleanCommand({
|
|
69
|
+
domain: options.domain,
|
|
70
|
+
cwd: globalOptions.cwd,
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
program.parse();
|
|
74
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAEnD,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,OAAO,CAAC;KACb,WAAW,CACV,wEAAwE,CACzE;KACA,OAAO,CAAC,OAAO,CAAC;KAChB,MAAM,CAAC,cAAc,EAAE,gDAAgD,CAAC,CAAC;AAE5E,OAAO;KACJ,QAAQ,CAAC,WAAW,EAAE,2BAA2B,CAAC;KAClD,MAAM,CACL,kBAAkB,EAClB,2DAA2D,EAC3D,CAAC,GAAG,EAAE,EAAE;IACN,IAAI,GAAG,KAAK,SAAS,IAAI,GAAG,KAAK,EAAE,IAAI,GAAG,KAAK,MAAM;QAAE,OAAO,IAAI,CAAC;IACnE,IAAI,GAAG,KAAK,OAAO;QAAE,OAAO,KAAK,CAAC;IAClC,OAAO,IAAI,CAAC;AACd,CAAC,CACF;KACA,MAAM,CAAC,gBAAgB,EAAE,iCAAiC,EAAE,CAAC,KAAK,EAAE,EAAE;IACrE,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAC1C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;KACD,MAAM,CACL,oBAAoB,EACpB,mEAAmE,CACpE;KACA,MAAM,CACL,KAAK,EACH,IAAc,EACd,OAAuE,EACvE,OAAgB,EAChB,EAAE;IACF,MAAM,aAAa,GAAG,OAAO,CAAC,eAAe,EAAoB,CAAC;IAElE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,CAAC,IAAI,EAAE,CAAC;QACf,OAAO;IACT,CAAC;IAED,MAAM,YAAY,CAAC,IAAI,EAAE;QACvB,GAAG,EAAE,aAAa,CAAC,GAAG;QACtB,kBAAkB,EAAE,OAAO,CAAC,MAAM;QAClC,SAAS,EAAE,OAAO,CAAC,OAAO;QAC1B,YAAY,EAAE,OAAO,CAAC,YAAY;KACnC,CAAC,CAAC;AACL,CAAC,CACF,CAAC;AAEJ,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,iCAAiC,CAAC;KAC9C,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,KAAK,EAAE,OAA2B,EAAE,OAAgB,EAAE,EAAE;IAC9D,MAAM,aAAa,GAAG,OAAO,CAAC,eAAe,EAAoB,CAAC;IAClE,MAAM,WAAW,CAAC;QAChB,IAAI,EAAE,OAAO,CAAC,IAAI;QAClB,GAAG,EAAE,aAAa,CAAC,GAAG;KACvB,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,kBAAkB,CAAC;KAC3B,KAAK,CAAC,IAAI,CAAC;KACX,WAAW,CAAC,wDAAwD,CAAC;KACrE,MAAM,CAAC,KAAK,EAAE,IAAc,EAAE,QAAiB,EAAE,OAAgB,EAAE,EAAE;IACpE,MAAM,aAAa,GAAG,OAAO,CAAC,eAAe,EAAoB,CAAC;IAClE,MAAM,aAAa,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,aAAa,CAAC,GAAG,EAAE,CAAC,CAAC;AACxD,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,4DAA4D,CAAC;KACzE,MAAM,CAAC,iBAAiB,EAAE,4CAA4C,CAAC;KACvE,MAAM,CAAC,KAAK,EAAE,OAA4B,EAAE,OAAgB,EAAE,EAAE;IAC/D,MAAM,aAAa,GAAG,OAAO,CAAC,eAAe,EAAoB,CAAC;IAClE,MAAM,YAAY,CAAC;QACjB,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,GAAG,EAAE,aAAa,CAAC,GAAG;KACvB,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { PageEntry } from "../types.js";
|
|
2
|
+
export declare function updatePageIndex(sources: {
|
|
3
|
+
pages: PageEntry[];
|
|
4
|
+
}, cwd?: string): Promise<void>;
|
|
5
|
+
export declare function ensureAgentsMd(cwd?: string): Promise<boolean>;
|
|
6
|
+
export declare function removeMdripSection(cwd?: string): Promise<boolean>;
|
|
7
|
+
export declare function updateAgentsMd(sources: {
|
|
8
|
+
pages: PageEntry[];
|
|
9
|
+
}, cwd?: string): Promise<boolean>;
|
|
10
|
+
//# sourceMappingURL=agents.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agents.d.ts","sourceRoot":"","sources":["../../src/lib/agents.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,SAAS,EAAgB,MAAM,aAAa,CAAC;AAgD3D,wBAAsB,eAAe,CACnC,OAAO,EAAE;IAAE,KAAK,EAAE,SAAS,EAAE,CAAA;CAAE,EAC/B,GAAG,GAAE,MAAsB,GAC1B,OAAO,CAAC,IAAI,CAAC,CAiBf;AAED,wBAAsB,cAAc,CAClC,GAAG,GAAE,MAAsB,GAC1B,OAAO,CAAC,OAAO,CAAC,CAwClB;AAED,wBAAsB,kBAAkB,CACtC,GAAG,GAAE,MAAsB,GAC1B,OAAO,CAAC,OAAO,CAAC,CAoClB;AAED,wBAAsB,cAAc,CAClC,OAAO,EAAE;IAAE,KAAK,EAAE,SAAS,EAAE,CAAA;CAAE,EAC/B,GAAG,GAAE,MAAsB,GAC1B,OAAO,CAAC,OAAO,CAAC,CAQlB"}
|