@jenslys/curldown 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +3 -1
  2. package/dist/cli.js +58 -3
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -7,6 +7,7 @@ Fetch a webpage and return clean Markdown for AI workflows.
7
7
  - Static mode: `fetch` HTML -> Cheerio cleanup -> Turndown markdown.
8
8
  - Dynamic mode: headless Chromium (Playwright) -> HTML -> markdown.
9
9
  - `--auto` tries static first and falls back to dynamic when static output is thin.
10
+ - Direct markdown responses are passed through (including `.md` URLs served as `text/plain`).
10
11
  - `--format json` emits markdown plus metadata for agent pipelines.
11
12
 
12
13
  ## Install
@@ -82,7 +83,8 @@ Paste this into your `AGENTS.md` if you want agents to always use `curldown` for
82
83
  ```md
83
84
  ## Website Content Retrieval
84
85
 
85
- - Always use `curldown` to fetch web pages for agent workflows.
86
+ - Use `curldown` for website/article page retrieval in agent workflows.
87
+ - Do not use `curldown` for raw code files or repository file blobs (for those, fetch the file directly).
86
88
  - Default command: `curldown <url>`.
87
89
  - Prefer `curldown <url> --auto` when page rendering might be uncertain.
88
90
  - Use `curldown <url> --format json` when downstream steps need structured metadata.
package/dist/cli.js CHANGED
@@ -1,7 +1,8 @@
1
1
  #!/usr/bin/env node
2
2
  import { createHash } from "node:crypto";
3
+ import { realpathSync } from "node:fs";
3
4
  import { Command, CommanderError } from "commander";
4
- import { pathToFileURL } from "node:url";
5
+ import { fileURLToPath, pathToFileURL } from "node:url";
5
6
  import { DEFAULT_DYNAMIC_TIMEOUT_MS, DEFAULT_STATIC_TIMEOUT_MS, VERSION } from "./constants.js";
6
7
  import { asCurldownError, ConversionError, InputError } from "./errors.js";
7
8
  import { fetchDynamicHtml } from "./fetch-dynamic.js";
@@ -14,6 +15,16 @@ const MARKDOWN_CONTENT_TYPES = new Set([
14
15
  "application/markdown",
15
16
  "application/x-markdown"
16
17
  ]);
18
+ const PLAINTEXT_CONTENT_TYPE = "text/plain";
19
+ const MARKDOWN_FILE_EXTENSIONS = [
20
+ ".md",
21
+ ".markdown",
22
+ ".mdown",
23
+ ".mkd",
24
+ ".mkdn",
25
+ ".mdtxt",
26
+ ".mdx"
27
+ ];
17
28
  const defaultDependencies = {
18
29
  fetchStatic: fetchStaticHtml,
19
30
  fetchDynamic: fetchDynamicHtml,
@@ -107,6 +118,30 @@ function isMarkdownContentType(contentType) {
107
118
  const normalized = contentType.toLowerCase().split(";")[0]?.trim() ?? "";
108
119
  return MARKDOWN_CONTENT_TYPES.has(normalized);
109
120
  }
121
+ function isPlainTextContentType(contentType) {
122
+ if (!contentType) {
123
+ return false;
124
+ }
125
+ const normalized = contentType.toLowerCase().split(";")[0]?.trim() ?? "";
126
+ return normalized === PLAINTEXT_CONTENT_TYPE;
127
+ }
128
+ function hasMarkdownFileExtension(urlValue) {
129
+ let pathname;
130
+ try {
131
+ pathname = new URL(urlValue).pathname;
132
+ }
133
+ catch {
134
+ return false;
135
+ }
136
+ const normalizedPath = pathname.toLowerCase();
137
+ return MARKDOWN_FILE_EXTENSIONS.some((extension) => normalizedPath.endsWith(extension));
138
+ }
139
+ function shouldTreatAsMarkdownPassthrough(result) {
140
+ if (isMarkdownContentType(result.contentType)) {
141
+ return true;
142
+ }
143
+ return isPlainTextContentType(result.contentType) && hasMarkdownFileExtension(result.finalUrl);
144
+ }
110
145
  function countWords(value) {
111
146
  const trimmed = value.trim();
112
147
  if (!trimmed) {
@@ -164,7 +199,7 @@ function normalizeArgs(urlInput, options) {
164
199
  };
165
200
  }
166
201
  function prepareContentFromFetchResult(result, deps) {
167
- if (isMarkdownContentType(result.contentType)) {
202
+ if (shouldTreatAsMarkdownPassthrough(result)) {
168
203
  const markdown = normalizeMarkdown(result.body);
169
204
  return {
170
205
  markdown,
@@ -275,7 +310,27 @@ export async function run(argv, deps = defaultDependencies) {
275
310
  return curldownError.exitCode;
276
311
  }
277
312
  }
278
- const isMain = process.argv[1] !== undefined && pathToFileURL(process.argv[1]).href === import.meta.url;
313
+ function resolvePathStrict(pathInput) {
314
+ return realpathSync(pathInput);
315
+ }
316
+ /**
317
+ * Determine whether this module was invoked as the CLI entrypoint.
318
+ * Resolves symlinks for both paths so global installs that expose a symlinked bin still execute.
319
+ */
320
+ export function isMainModule(argvPath = process.argv[1]) {
321
+ if (argvPath === undefined) {
322
+ return false;
323
+ }
324
+ try {
325
+ const invokedPath = resolvePathStrict(argvPath);
326
+ const modulePath = resolvePathStrict(fileURLToPath(import.meta.url));
327
+ return invokedPath === modulePath;
328
+ }
329
+ catch {
330
+ return pathToFileURL(argvPath).href === import.meta.url;
331
+ }
332
+ }
333
+ const isMain = isMainModule();
279
334
  if (isMain) {
280
335
  void run(process.argv.slice(2)).then((exitCode) => {
281
336
  process.exitCode = exitCode;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jenslys/curldown",
3
- "version": "1.0.1",
3
+ "version": "1.0.3",
4
4
  "description": "Fetch URL content and convert it to markdown.",
5
5
  "repository": {
6
6
  "type": "git",