npm - @jenslys/curldown - Versions diffs - 1.0.2 → 1.0.3 - Mend

@jenslys/curldown 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -7,6 +7,7 @@ Fetch a webpage and return clean Markdown for AI workflows.
 - Static mode: `fetch` HTML -> Cheerio cleanup -> Turndown markdown.
 - Dynamic mode: headless Chromium (Playwright) -> HTML -> markdown.
 - `--auto` tries static first and falls back to dynamic when static output is thin.
+- Direct markdown responses are passed through (including `.md` URLs served as `text/plain`).
 - `--format json` emits markdown plus metadata for agent pipelines.
 ## Install
@@ -82,7 +83,8 @@ Paste this into your `AGENTS.md` if you want agents to always use `curldown` for
 ```md
 ## Website Content Retrieval
-- Always use `curldown` to fetch web pages for agent workflows.
+- Use `curldown` for website/article page retrieval in agent workflows.
+- Do not use `curldown` for raw code files or repository file blobs (for those, fetch the file directly).
 - Default command: `curldown <url>`.
 - Prefer `curldown <url> --auto` when page rendering might be uncertain.
 - Use `curldown <url> --format json` when downstream steps need structured metadata.

package/dist/cli.js CHANGED Viewed

@@ -15,6 +15,16 @@ const MARKDOWN_CONTENT_TYPES = new Set([
     "application/markdown",
     "application/x-markdown"
 ]);
+const PLAINTEXT_CONTENT_TYPE = "text/plain";
+const MARKDOWN_FILE_EXTENSIONS = [
+    ".md",
+    ".markdown",
+    ".mdown",
+    ".mkd",
+    ".mkdn",
+    ".mdtxt",
+    ".mdx"
+];
 const defaultDependencies = {
     fetchStatic: fetchStaticHtml,
     fetchDynamic: fetchDynamicHtml,
@@ -108,6 +118,30 @@ function isMarkdownContentType(contentType) {
     const normalized = contentType.toLowerCase().split(";")[0]?.trim() ?? "";
     return MARKDOWN_CONTENT_TYPES.has(normalized);
 }
+function isPlainTextContentType(contentType) {
+    if (!contentType) {
+        return false;
+    }
+    const normalized = contentType.toLowerCase().split(";")[0]?.trim() ?? "";
+    return normalized === PLAINTEXT_CONTENT_TYPE;
+}
+function hasMarkdownFileExtension(urlValue) {
+    let pathname;
+    try {
+        pathname = new URL(urlValue).pathname;
+    }
+    catch {
+        return false;
+    }
+    const normalizedPath = pathname.toLowerCase();
+    return MARKDOWN_FILE_EXTENSIONS.some((extension) => normalizedPath.endsWith(extension));
+}
+function shouldTreatAsMarkdownPassthrough(result) {
+    if (isMarkdownContentType(result.contentType)) {
+        return true;
+    }
+    return isPlainTextContentType(result.contentType) && hasMarkdownFileExtension(result.finalUrl);
+}
 function countWords(value) {
     const trimmed = value.trim();
     if (!trimmed) {
@@ -165,7 +199,7 @@ function normalizeArgs(urlInput, options) {
     };
 }
 function prepareContentFromFetchResult(result, deps) {
-    if (isMarkdownContentType(result.contentType)) {
+    if (shouldTreatAsMarkdownPassthrough(result)) {
         const markdown = normalizeMarkdown(result.body);
         return {
             markdown,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@jenslys/curldown",
-  "version": "1.0.2",
+  "version": "1.0.3",
   "description": "Fetch URL content and convert it to markdown.",
   "repository": {
     "type": "git",