npm - @ariesfish/feedloom - Versions diffs - 0.1.0 → 0.1.1 - Mend

@ariesfish/feedloom 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -68,6 +68,22 @@ npm link
 feedloom --help
 ```
+## Agent Skill
+Feedloom ships an Agent Skill in `skills/feedloom`, so agents that support the `skills` CLI can install the clipping workflow directly from the package or repository:
+```bash
+npx skills add @ariesfish/feedloom --skill feedloom
+```
+For a global install across supported agents:
+```bash
+npx skills add @ariesfish/feedloom --skill feedloom --global
+```
+After installing the skill, ask your agent to save article URLs, URL lists, or RSS feeds as Markdown. The skill runs the CLI through `npx -y @ariesfish/feedloom` by default.
 ## Quick Start
 Archive a single article to the default `clippings/` directory:
@@ -242,6 +258,7 @@ Only use this on your own device and accounts. Always respect the target site's
 --prefer-browser-state          Try local Chrome user state first
 --chrome-user-data-dir <path>   Chrome User Data directory
 --chrome-profile <name>         Chrome profile name. Default: Default
+--site-rules-dir <dir>          Optional directory of private TOML site rules
 ```
 For the full option list, run:
@@ -264,7 +281,7 @@ npm test
 - Respect robots.txt, website terms of service, copyright, and rate limits.
 - For dynamic pages, try `--fetch-mode browser` first.
 - For static blogs and news sites, `--fetch-mode static` is usually faster.
-- If article extraction is poor for a specific site, add or adjust a site rule in `src/site-rules/`.
+- If article extraction is poor for a specific site, keep private TOML site rules outside the package and pass them with `--site-rules-dir <dir>`.
 - For large batches, test with `--limit` before running the full job.
 ## Acknowledgements

package/dist/cli.js CHANGED Viewed

@@ -2,8 +2,7 @@
 // src/cli.ts
 import { readdir as readdir2 } from "fs/promises";
-import { dirname, join as join7, resolve as resolve2 } from "path";
-import { fileURLToPath } from "url";
+import { join as join7, resolve as resolve2 } from "path";
 import { Command } from "commander";
 // src/cleaning/profiles.ts
@@ -1618,27 +1617,9 @@ var ProgressTracker = class {
 // src/cli.ts
 var program = new Command();
-async function standardSiteRulePaths() {
-  const here = dirname(fileURLToPath(import.meta.url));
-  const candidates = [
-    resolve2(here, "../src/site-rules"),
-    resolve2(here, "../../src/site-rules"),
-    resolve2(process.cwd(), "src/site-rules"),
-    resolve2(here, "../../src/feedloom/site_rules"),
-    resolve2(process.cwd(), "../src/feedloom/site_rules"),
-    resolve2(process.cwd(), "src/feedloom/site_rules")
-  ];
-  for (const dir of candidates) {
-    try {
-      const names = await readdir2(dir);
-      return names.filter((name) => name.endsWith(".toml")).map((name) => join7(dir, name));
-    } catch (error) {
-      if (error.code !== "ENOENT") {
-        throw error;
-      }
-    }
-  }
-  return [];
+async function siteRulePathsFromDir(dir) {
+  const names = await readdir2(dir);
+  return names.filter((name) => name.endsWith(".toml")).map((name) => join7(dir, name));
 }
 function positiveIntOption(value, fallback) {
   const parsed = Number(value ?? fallback);
@@ -1647,7 +1628,7 @@ function positiveIntOption(value, fallback) {
   }
   return parsed;
 }
-program.name("feedloom").description("Archive long-form web content as clean Markdown with local assets").version("0.1.0").option("--output-dir <dir>", "Output directory for markdown notes", "clippings").option("--source-kind <kind>", "auto, html-page, or rss-feed", "auto").option("--since <date>", "Only keep feed entries on or after YYYY-MM-DD", "").option("--limit <n>", "Process only first N deduplicated URLs", "0").option("--start <n>", "Start from 1-based index after deduplication", "1").option("--end <n>", "End at 1-based index after deduplication", "0").option("--prefer-browser-state", "Try copied local Chrome profile before regular browser fallback", false).option("--chrome-user-data-dir <path>", "Chrome user data directory used with --prefer-browser-state", "").option("--chrome-profile <name>", "Chrome profile directory name", "Default").option("--fetch-mode <mode>", "auto, static, browser, or stealth", "auto").option("--no-network-idle", "Do not wait for browser networkidle before reading HTML").option("--wait-ms <ms>", "Extra browser wait after load", "2500").option("--solve-cloudflare", "In stealth mode, attempt Cloudflare Turnstile/interstitial challenge handling", false).option("--disable-resources", "In stealth mode, block images/media/fonts/stylesheets for speed", false).option("--proxy <server>", "Proxy server for browser/stealth fetch, e.g. http://127.0.0.1:8080", "").option("--dns-over-https", "Use Chromium Cloudflare DNS-over-HTTPS flag for browser/stealth fetch", false).option("--wait-selector <selector>", "Wait for a CSS selector after page load", "").option("--wait-selector-state <state>", "attached, detached, visible, or hidden", "attached").option("--click-selector <selector...>", "Click one or more selectors after page load", []).option("--scroll-to-bottom", "Scroll to the bottom before reading HTML", false).option("--headful", "Run browser/browser-state fetches with a visible Chrome window", false).option("--no-real-chrome-defaults", "Disable Scrapling-inspired real Chrome context defaults").option("--no-reuse-browser", "Disable batch browser/stealth context reuse").argument("[inputs...]", "URLs or files containing URLs").action(async (inputs, options) => {
+program.name("feedloom").description("Archive long-form web content as clean Markdown with local assets").version("0.1.0").option("--output-dir <dir>", "Output directory for markdown notes", "clippings").option("--source-kind <kind>", "auto, html-page, or rss-feed", "auto").option("--since <date>", "Only keep feed entries on or after YYYY-MM-DD", "").option("--limit <n>", "Process only first N deduplicated URLs", "0").option("--start <n>", "Start from 1-based index after deduplication", "1").option("--end <n>", "End at 1-based index after deduplication", "0").option("--prefer-browser-state", "Try copied local Chrome profile before regular browser fallback", false).option("--chrome-user-data-dir <path>", "Chrome user data directory used with --prefer-browser-state", "").option("--chrome-profile <name>", "Chrome profile directory name", "Default").option("--fetch-mode <mode>", "auto, static, browser, or stealth", "auto").option("--no-network-idle", "Do not wait for browser networkidle before reading HTML").option("--wait-ms <ms>", "Extra browser wait after load", "2500").option("--solve-cloudflare", "In stealth mode, attempt Cloudflare Turnstile/interstitial challenge handling", false).option("--disable-resources", "In stealth mode, block images/media/fonts/stylesheets for speed", false).option("--proxy <server>", "Proxy server for browser/stealth fetch, e.g. http://127.0.0.1:8080", "").option("--dns-over-https", "Use Chromium Cloudflare DNS-over-HTTPS flag for browser/stealth fetch", false).option("--wait-selector <selector>", "Wait for a CSS selector after page load", "").option("--wait-selector-state <state>", "attached, detached, visible, or hidden", "attached").option("--click-selector <selector...>", "Click one or more selectors after page load", []).option("--scroll-to-bottom", "Scroll to the bottom before reading HTML", false).option("--headful", "Run browser/browser-state fetches with a visible Chrome window", false).option("--site-rules-dir <dir>", "Optional directory of private TOML site extraction/cleaning rules", "").option("--no-real-chrome-defaults", "Disable Scrapling-inspired real Chrome context defaults").option("--no-reuse-browser", "Disable batch browser/stealth context reuse").argument("[inputs...]", "URLs or files containing URLs").action(async (inputs, options) => {
   if (inputs.length === 0) {
     program.help({ error: true });
   }
@@ -1674,7 +1655,8 @@ program.name("feedloom").description("Archive long-form web content as clean Mar
       positiveIntOption(options.end, 0),
       positiveIntOption(options.limit, 0)
     );
-    const profiles = await loadSiteProfiles(await standardSiteRulePaths());
+    const siteRulesDir = String(options.siteRulesDir || "");
+    const profiles = siteRulesDir ? await loadSiteProfiles(await siteRulePathsFromDir(resolve2(siteRulesDir))) : [];
     const outputDir = String(options.outputDir ?? "clippings");
     let failures = 0;
     const tracker = new ProgressTracker(selected, outputDir);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ariesfish/feedloom",
-  "version": "0.1.0",
+  "version": "0.1.1",
   "type": "module",
   "author": "ariesfish",
   "license": "MIT",
@@ -18,6 +18,7 @@
   },
   "files": [
     "dist",
+    "skills",
     "README.md",
     "LICENSE"
   ],

package/skills/feedloom/SKILL.md ADDED Viewed

@@ -0,0 +1,78 @@
+---
+name: feedloom
+description: Capture long-form web content, article URLs, URL list files, or RSS/Atom feeds into clean Markdown with local assets using the Feedloom CLI. Use for web clipping, saving articles as Markdown, archiving URL batches, clipping Zhihu/WeChat/Kaggle/blog posts, 抓取网页文章, 保存为 Markdown, URL 列表转归档, RSS 归档, and 网页长文归档.
+---
+# Feedloom
+Use Feedloom for article clipping instead of writing ad-hoc scrapers.
+## Command
+```bash
+npx -y @ariesfish/feedloom <inputs...> [options]
+```
+## Inputs
+- Direct article URLs.
+- Files containing URLs, one per line.
+- Markdown checklist files with lines like `- [ ] <url>` or `- [x] <url>`.
+- RSS/Atom feeds with `--source-kind rss-feed`.
+## Common usage
+```bash
+npx -y @ariesfish/feedloom "https://example.com/article"
+npx -y @ariesfish/feedloom urls.txt
+npx -y @ariesfish/feedloom urls.txt --limit 10
+npx -y @ariesfish/feedloom urls.txt --start 11 --end 20
+npx -y @ariesfish/feedloom urls.txt --output-dir clippings
+npx -y @ariesfish/feedloom "https://example.com/feed.xml" --source-kind rss-feed
+npx -y @ariesfish/feedloom "https://example.com/feed.xml" --source-kind rss-feed --since 2026-01-01
+npx -y @ariesfish/feedloom "https://example.com/article" --fetch-mode browser --wait-ms 4000 --scroll-to-bottom
+npx -y @ariesfish/feedloom "https://example.com/article" --prefer-browser-state
+```
+## Fetch workflow
+Use the least expensive mode that works:
+1. Start with default `auto`. It tries meaningful content in order: `static` → `browser-state` when `--prefer-browser-state` is set → `browser` → `stealth`.
+2. Use `--fetch-mode static` only for simple pages when speed matters and JavaScript/login state is unnecessary.
+3. Use `--fetch-mode browser` for JavaScript-rendered pages; add `--wait-ms`, `--wait-selector`, `--click-selector`, or `--scroll-to-bottom` only when needed.
+4. Use `--prefer-browser-state` with `--chrome-user-data-dir` / `--chrome-profile` for pages that need local login state.
+5. Use `--fetch-mode stealth` only after static/browser fails or for anti-bot pages; add `--solve-cloudflare`, `--proxy`, or `--dns-over-https` only when required.
+6. For batches, test one URL first, then run the list with the working options plus `--limit`, `--start`, or `--end` as needed.
+## Useful options
+- `--output-dir <dir>`: write notes and assets somewhere other than `clippings/`.
+- `--source-kind rss-feed`: treat input as an RSS/Atom feed and archive feed entries.
+- `--since <YYYY-MM-DD>`: limit RSS/Atom entries by date.
+- `--limit <n>`, `--start <n>`, `--end <n>`: process URL lists in small batches or resume partway through a list.
+- `--fetch-mode <static|browser|stealth>`: force a specific fetch layer when `auto` is too broad or too slow.
+- `--prefer-browser-state`: try a copied local Chrome profile before regular browser fallback.
+- `--wait-ms <ms>`, `--wait-selector <selector>`, `--scroll-to-bottom`: give dynamic pages time or actions to reveal article content.
+- `--click-selector <selector...>`: click dismiss/expand selectors before extracting HTML.
+- `--headful`: show the browser window for debugging login, popups, or dynamic loading.
+- `--site-rules-dir <dir>`: load optional private TOML extraction/cleaning rules from a local directory, for example `skills/feedloom/site-rules/` reference folder.
+- `--solve-cloudflare`, `--proxy <server>`, `--dns-over-https`: use only when stealth fetching needs them.
+Run `npx -y @ariesfish/feedloom --help` for the complete option list. Do not invent unsupported options.
+## Private site rules
+Site-specific TOML rules are intentionally optional and should not be assumed to be bundled with the package. If the user keeps private rules next to this skill, pass that directory explicitly:
+```bash
+npx -y @ariesfish/feedloom "https://example.com/article" --site-rules-dir skills/feedloom/site-rules
+```
+Treat rule files in `skills/feedloom/site-rules/` as local reference material: use them only when present and relevant.
+## Output
+- Markdown files are written to `clippings/` by default, or to `--output-dir`.
+- Assets are written under an `assets/` subdirectory.
+- Successful Markdown checklist items are marked done.