@jenslys/curldown 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +3 -1
  2. package/dist/cli.js +35 -1
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -7,6 +7,7 @@ Fetch a webpage and return clean Markdown for AI workflows.
7
7
  - Static mode: `fetch` HTML -> Cheerio cleanup -> Turndown markdown.
8
8
  - Dynamic mode: headless Chromium (Playwright) -> HTML -> markdown.
9
9
  - `--auto` tries static first and falls back to dynamic when static output is thin.
10
+ - Direct markdown responses are passed through (including `.md` URLs served as `text/plain`).
10
11
  - `--format json` emits markdown plus metadata for agent pipelines.
11
12
 
12
13
  ## Install
@@ -82,7 +83,8 @@ Paste this into your `AGENTS.md` if you want agents to always use `curldown` for
82
83
  ```md
83
84
  ## Website Content Retrieval
84
85
 
85
- - Always use `curldown` to fetch web pages for agent workflows.
86
+ - Use `curldown` for website/article page retrieval in agent workflows.
87
+ - Do not use `curldown` for raw code files or repository file blobs (for those, fetch the file directly).
86
88
  - Default command: `curldown <url>`.
87
89
  - Prefer `curldown <url> --auto` when page rendering might be uncertain.
88
90
  - Use `curldown <url> --format json` when downstream steps need structured metadata.
package/dist/cli.js CHANGED
@@ -15,6 +15,16 @@ const MARKDOWN_CONTENT_TYPES = new Set([
15
15
  "application/markdown",
16
16
  "application/x-markdown"
17
17
  ]);
18
+ const PLAINTEXT_CONTENT_TYPE = "text/plain";
19
+ const MARKDOWN_FILE_EXTENSIONS = [
20
+ ".md",
21
+ ".markdown",
22
+ ".mdown",
23
+ ".mkd",
24
+ ".mkdn",
25
+ ".mdtxt",
26
+ ".mdx"
27
+ ];
18
28
  const defaultDependencies = {
19
29
  fetchStatic: fetchStaticHtml,
20
30
  fetchDynamic: fetchDynamicHtml,
@@ -108,6 +118,30 @@ function isMarkdownContentType(contentType) {
108
118
  const normalized = contentType.toLowerCase().split(";")[0]?.trim() ?? "";
109
119
  return MARKDOWN_CONTENT_TYPES.has(normalized);
110
120
  }
121
+ function isPlainTextContentType(contentType) {
122
+ if (!contentType) {
123
+ return false;
124
+ }
125
+ const normalized = contentType.toLowerCase().split(";")[0]?.trim() ?? "";
126
+ return normalized === PLAINTEXT_CONTENT_TYPE;
127
+ }
128
+ function hasMarkdownFileExtension(urlValue) {
129
+ let pathname;
130
+ try {
131
+ pathname = new URL(urlValue).pathname;
132
+ }
133
+ catch {
134
+ return false;
135
+ }
136
+ const normalizedPath = pathname.toLowerCase();
137
+ return MARKDOWN_FILE_EXTENSIONS.some((extension) => normalizedPath.endsWith(extension));
138
+ }
139
+ function shouldTreatAsMarkdownPassthrough(result) {
140
+ if (isMarkdownContentType(result.contentType)) {
141
+ return true;
142
+ }
143
+ return isPlainTextContentType(result.contentType) && hasMarkdownFileExtension(result.finalUrl);
144
+ }
111
145
  function countWords(value) {
112
146
  const trimmed = value.trim();
113
147
  if (!trimmed) {
@@ -165,7 +199,7 @@ function normalizeArgs(urlInput, options) {
165
199
  };
166
200
  }
167
201
  function prepareContentFromFetchResult(result, deps) {
168
- if (isMarkdownContentType(result.contentType)) {
202
+ if (shouldTreatAsMarkdownPassthrough(result)) {
169
203
  const markdown = normalizeMarkdown(result.body);
170
204
  return {
171
205
  markdown,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jenslys/curldown",
3
- "version": "1.0.2",
3
+ "version": "1.0.3",
4
4
  "description": "Fetch URL content and convert it to markdown.",
5
5
  "repository": {
6
6
  "type": "git",