@ariesfish/feedloom 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Feedloom
2
2
 
3
+ <div align="center">
4
+ <img src="assets/logo.png" alt="Feedloom logo" width="160">
5
+ <p><strong>Archive long-form web content as clean Markdown with local assets.</strong></p>
6
+ <p>
7
+ <a href="https://www.npmjs.com/package/@ariesfish/feedloom"><img alt="npm" src="https://img.shields.io/npm/v/@ariesfish/feedloom"></a>
8
+ <img alt="Node.js >= 24" src="https://img.shields.io/badge/node-%3E%3D24-339933">
9
+ <img alt="License MIT" src="https://img.shields.io/badge/license-MIT-blue">
10
+ </p>
11
+ </div>
12
+
3
13
  Feedloom is a command-line tool for archiving long-form web content. It takes article URLs, URL list files, or RSS/Atom feeds, extracts readable article content, converts it to Markdown with YAML frontmatter, and saves page images as local assets. It is designed for personal knowledge bases, notebook vaults, and offline reading archives.
4
14
 
5
15
  ## Features
@@ -43,6 +53,14 @@ If you plan to use `browser`, `stealth`, or the browser fallback in `auto` mode,
43
53
  npx patchright install chromium
44
54
  ```
45
55
 
56
+ You can verify or repair the runtime later with:
57
+
58
+ ```bash
59
+ npm run dev -- doctor
60
+ ```
61
+
62
+ If the Patchright Chromium executable is missing, `doctor` runs `npx patchright install chromium` automatically.
63
+
46
64
  ### 4. Build the CLI
47
65
 
48
66
  ```bash
@@ -261,6 +279,12 @@ Only use this on your own device and accounts. Always respect the target site's
261
279
  --site-rules-dir <dir> Optional directory of private TOML site rules
262
280
  ```
263
281
 
282
+ Run environment checks:
283
+
284
+ ```bash
285
+ npm run dev -- doctor
286
+ ```
287
+
264
288
  For the full option list, run:
265
289
 
266
290
  ```bash
Binary file
package/dist/cli.js CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  // src/cli.ts
4
4
  import { readdir as readdir2 } from "fs/promises";
5
+ import { createRequire } from "module";
5
6
  import { dirname, join as join7, resolve as resolve2 } from "path";
6
7
  import { fileURLToPath } from "url";
7
8
  import { Command } from "commander";
@@ -112,11 +113,172 @@ function firstContentSelector(profiles) {
112
113
  return void 0;
113
114
  }
114
115
 
116
+ // src/doctor.ts
117
+ import { spawn } from "child_process";
118
+ import { access } from "fs/promises";
119
+ import { chromium } from "patchright";
120
+ var INSTALL_CHROMIUM_COMMAND = "npx patchright install chromium";
121
+ var INSTALL_CHROMIUM_HINT = `Run: ${INSTALL_CHROMIUM_COMMAND}`;
122
+ function errorMessage(error) {
123
+ return error instanceof Error ? error.message : String(error);
124
+ }
125
+ function chromiumExecutablePath() {
126
+ const browserType = chromium;
127
+ return browserType.executablePath();
128
+ }
129
+ function appendCheck(checks, check) {
130
+ checks.push(check);
131
+ }
132
+ async function installPatchrightChromium(events = {}) {
133
+ return new Promise((resolve3, reject) => {
134
+ const child = spawn("npx", ["patchright", "install", "chromium"], {
135
+ stdio: ["ignore", "pipe", "pipe"]
136
+ });
137
+ child.stdout.setEncoding("utf8");
138
+ child.stderr.setEncoding("utf8");
139
+ child.stdout.on("data", (chunk) => events.onStdout?.(chunk));
140
+ child.stderr.on("data", (chunk) => events.onStderr?.(chunk));
141
+ child.on("error", reject);
142
+ child.on("close", (code, signal) => resolve3({ code, signal }));
143
+ });
144
+ }
145
+ async function executableExists(path) {
146
+ try {
147
+ await access(path);
148
+ return { ok: true };
149
+ } catch (error) {
150
+ return { ok: false, error };
151
+ }
152
+ }
153
+ async function ensureChromiumInstalled(checks, executablePath, options) {
154
+ const firstCheck = await executableExists(executablePath);
155
+ if (firstCheck.ok) {
156
+ appendCheck(checks, {
157
+ name: "Patchright Chromium installation",
158
+ ok: true,
159
+ message: "Chromium executable exists.",
160
+ detail: executablePath
161
+ });
162
+ return true;
163
+ }
164
+ appendCheck(checks, {
165
+ name: "Patchright Chromium installation",
166
+ ok: false,
167
+ message: "Chromium executable was not found on disk. Installing Patchright Chromium...",
168
+ detail: `${executablePath}
169
+ ${errorMessage(firstCheck.error)}`
170
+ });
171
+ let output = "";
172
+ const appendOutput = (chunk) => {
173
+ output += chunk;
174
+ options.stderr.write(chunk);
175
+ };
176
+ try {
177
+ const result = await options.installChromium({ onStdout: appendOutput, onStderr: appendOutput });
178
+ if (result.code !== 0) {
179
+ appendCheck(checks, {
180
+ name: "Patchright Chromium auto-install",
181
+ ok: false,
182
+ message: `Installation command failed with exit code ${result.code ?? "null"}${result.signal ? ` and signal ${result.signal}` : ""}.`,
183
+ detail: output.trim() || void 0,
184
+ hint: INSTALL_CHROMIUM_HINT
185
+ });
186
+ return false;
187
+ }
188
+ } catch (error) {
189
+ appendCheck(checks, {
190
+ name: "Patchright Chromium auto-install",
191
+ ok: false,
192
+ message: "Installation command failed to start or crashed.",
193
+ detail: errorMessage(error),
194
+ hint: INSTALL_CHROMIUM_HINT
195
+ });
196
+ return false;
197
+ }
198
+ const secondCheck = await executableExists(executablePath);
199
+ appendCheck(checks, secondCheck.ok ? {
200
+ name: "Patchright Chromium auto-install",
201
+ ok: true,
202
+ message: "Chromium installed successfully.",
203
+ detail: executablePath
204
+ } : {
205
+ name: "Patchright Chromium auto-install",
206
+ ok: false,
207
+ message: "Installation finished, but Chromium executable is still missing.",
208
+ detail: `${executablePath}
209
+ ${errorMessage(secondCheck.error)}`,
210
+ hint: INSTALL_CHROMIUM_HINT
211
+ });
212
+ return secondCheck.ok;
213
+ }
214
+ async function runDoctor(options = {}) {
215
+ const checks = [];
216
+ const resolvedOptions = {
217
+ installChromium: options.installChromium ?? installPatchrightChromium,
218
+ stderr: options.stderr ?? process.stderr
219
+ };
220
+ let executablePath = "";
221
+ try {
222
+ executablePath = chromiumExecutablePath();
223
+ checks.push({
224
+ name: "Patchright Chromium executable path",
225
+ ok: true,
226
+ message: executablePath
227
+ });
228
+ } catch (error) {
229
+ checks.push({
230
+ name: "Patchright Chromium executable path",
231
+ ok: false,
232
+ message: "Patchright does not report a Chromium executable for this platform.",
233
+ detail: errorMessage(error),
234
+ hint: INSTALL_CHROMIUM_HINT
235
+ });
236
+ }
237
+ const installed = executablePath ? await ensureChromiumInstalled(checks, executablePath, resolvedOptions) : false;
238
+ if (installed) {
239
+ try {
240
+ const browser = await chromium.launch({ headless: true });
241
+ await browser.close();
242
+ checks.push({
243
+ name: "Patchright Chromium launch",
244
+ ok: true,
245
+ message: "Chromium launched successfully in headless mode."
246
+ });
247
+ } catch (error) {
248
+ checks.push({
249
+ name: "Patchright Chromium launch",
250
+ ok: false,
251
+ message: "Chromium executable exists but failed to launch.",
252
+ detail: errorMessage(error),
253
+ hint: INSTALL_CHROMIUM_HINT
254
+ });
255
+ }
256
+ }
257
+ return {
258
+ ok: checks.at(-1)?.ok === true,
259
+ checks
260
+ };
261
+ }
262
+ function formatDoctorResult(result) {
263
+ const lines = ["Feedloom doctor"];
264
+ for (const check of result.checks) {
265
+ lines.push(`${check.ok ? "\u2713" : "\u2717"} ${check.name}: ${check.message}`);
266
+ if (check.detail) {
267
+ lines.push(...check.detail.split("\n").map((line) => ` ${line}`));
268
+ }
269
+ if (check.hint) {
270
+ lines.push(` ${check.hint}`);
271
+ }
272
+ }
273
+ lines.push(result.ok ? "OK" : "FAILED");
274
+ return lines.join("\n");
275
+ }
276
+
115
277
  // src/fetch/browser.ts
116
278
  import { mkdtemp, rm } from "fs/promises";
117
279
  import { tmpdir } from "os";
118
280
  import { join } from "path";
119
- import { chromium } from "patchright";
281
+ import { chromium as chromium2 } from "patchright";
120
282
  var SCRAPLING_DEFAULT_ARGS = [
121
283
  "--no-pings",
122
284
  "--no-first-run",
@@ -165,7 +327,7 @@ async function launchBrowserContext(options) {
165
327
  if (options.dnsOverHttps) {
166
328
  extraArgs.push("--dns-over-https-templates=https://cloudflare-dns.com/dns-query");
167
329
  }
168
- const context = await chromium.launchPersistentContext(userDataDir, {
330
+ const context = await chromium2.launchPersistentContext(userDataDir, {
169
331
  channel: options.channel,
170
332
  headless: options.headless ?? true,
171
333
  args: extraArgs,
@@ -242,7 +404,7 @@ async function fetchBrowserHtml(url, options = {}) {
242
404
  import { mkdtemp as mkdtemp2, rm as rm2 } from "fs/promises";
243
405
  import { tmpdir as tmpdir2 } from "os";
244
406
  import { join as join2 } from "path";
245
- import { chromium as chromium2 } from "patchright";
407
+ import { chromium as chromium3 } from "patchright";
246
408
  var DEFAULT_ARGS = [
247
409
  "--no-pings",
248
410
  "--no-first-run",
@@ -385,7 +547,7 @@ async function solveCloudflare(page) {
385
547
  async function launchStealthContext(options) {
386
548
  const userDataDir = options.userDataDir ?? await mkdtemp2(join2(tmpdir2(), "feedloom-stealth-"));
387
549
  const ownsUserDataDir = options.userDataDir === void 0;
388
- const context = await chromium2.launchPersistentContext(userDataDir, {
550
+ const context = await chromium3.launchPersistentContext(userDataDir, {
389
551
  channel: "chromium",
390
552
  headless: options.headless ?? true,
391
553
  args: stealthArgs(options),
@@ -1953,6 +2115,8 @@ var ProgressTracker = class {
1953
2115
  };
1954
2116
 
1955
2117
  // src/cli.ts
2118
+ var require2 = createRequire(import.meta.url);
2119
+ var packageJson = require2("../package.json");
1956
2120
  var program = new Command();
1957
2121
  async function siteRulePathsFromDir(dir) {
1958
2122
  const names = await readdir2(dir);
@@ -1968,7 +2132,13 @@ function positiveIntOption(value, fallback) {
1968
2132
  }
1969
2133
  return parsed;
1970
2134
  }
1971
- program.name("feedloom").description("Archive long-form web content as clean Markdown with local assets").version("0.1.0").option("--output-dir <dir>", "Output directory for markdown notes", "clippings").option("--source-kind <kind>", "auto, html-page, or rss-feed", "auto").option("--since <date>", "Only keep feed entries on or after YYYY-MM-DD", "").option("--limit <n>", "Process only first N deduplicated URLs", "0").option("--start <n>", "Start from 1-based index after deduplication", "1").option("--end <n>", "End at 1-based index after deduplication", "0").option("--prefer-browser-state", "Try copied local Chrome profile before regular browser fallback", false).option("--chrome-user-data-dir <path>", "Chrome user data directory used with --prefer-browser-state", "").option("--chrome-profile <name>", "Chrome profile directory name", "Default").option("--fetch-mode <mode>", "auto, static, browser, or stealth", "auto").option("--no-network-idle", "Do not wait for browser networkidle before reading HTML").option("--wait-ms <ms>", "Extra browser wait after load", "2500").option("--solve-cloudflare", "In stealth mode, attempt Cloudflare Turnstile/interstitial challenge handling", false).option("--disable-resources", "In stealth mode, block images/media/fonts/stylesheets for speed", false).option("--proxy <server>", "Proxy server for browser/stealth fetch, e.g. http://127.0.0.1:8080", "").option("--dns-over-https", "Use Chromium Cloudflare DNS-over-HTTPS flag for browser/stealth fetch", false).option("--wait-selector <selector>", "Wait for a CSS selector after page load", "").option("--wait-selector-state <state>", "attached, detached, visible, or hidden", "attached").option("--click-selector <selector...>", "Click one or more selectors after page load", []).option("--scroll-to-bottom", "Scroll to the bottom before reading HTML", false).option("--headful", "Run browser/browser-state fetches with a visible Chrome window", false).option("--site-rules-dir <dir>", "Optional directory of private TOML site extraction/cleaning rules", "").option("--no-real-chrome-defaults", "Disable Scrapling-inspired real Chrome context defaults").option("--no-reuse-browser", "Disable batch browser/stealth context reuse").argument("[inputs...]", "URLs or files containing URLs").action(async (inputs, options) => {
2135
+ program.name("feedloom").description("Archive long-form web content as clean Markdown with local assets").version(packageJson.version ?? "0.0.0");
2136
+ program.command("doctor").description("Check Feedloom runtime dependencies").action(async () => {
2137
+ const result = await runDoctor();
2138
+ console.error(formatDoctorResult(result));
2139
+ process.exitCode = result.ok ? 0 : 1;
2140
+ });
2141
+ program.option("--output-dir <dir>", "Output directory for markdown notes", "clippings").option("--source-kind <kind>", "auto, html-page, or rss-feed", "auto").option("--since <date>", "Only keep feed entries on or after YYYY-MM-DD", "").option("--limit <n>", "Process only first N deduplicated URLs", "0").option("--start <n>", "Start from 1-based index after deduplication", "1").option("--end <n>", "End at 1-based index after deduplication", "0").option("--prefer-browser-state", "Try copied local Chrome profile before regular browser fallback", false).option("--chrome-user-data-dir <path>", "Chrome user data directory used with --prefer-browser-state", "").option("--chrome-profile <name>", "Chrome profile directory name", "Default").option("--fetch-mode <mode>", "auto, static, browser, or stealth", "auto").option("--no-network-idle", "Do not wait for browser networkidle before reading HTML").option("--wait-ms <ms>", "Extra browser wait after load", "2500").option("--solve-cloudflare", "In stealth mode, attempt Cloudflare Turnstile/interstitial challenge handling", false).option("--disable-resources", "In stealth mode, block images/media/fonts/stylesheets for speed", false).option("--proxy <server>", "Proxy server for browser/stealth fetch, e.g. http://127.0.0.1:8080", "").option("--dns-over-https", "Use Chromium Cloudflare DNS-over-HTTPS flag for browser/stealth fetch", false).option("--wait-selector <selector>", "Wait for a CSS selector after page load", "").option("--wait-selector-state <state>", "attached, detached, visible, or hidden", "attached").option("--click-selector <selector...>", "Click one or more selectors after page load", []).option("--scroll-to-bottom", "Scroll to the bottom before reading HTML", false).option("--headful", "Run browser/browser-state fetches with a visible Chrome window", false).option("--site-rules-dir <dir>", "Optional directory of private TOML site extraction/cleaning rules", "").option("--no-real-chrome-defaults", "Disable Scrapling-inspired real Chrome context defaults").option("--no-reuse-browser", "Disable batch browser/stealth context reuse").argument("[inputs...]", "URLs or files containing URLs").action(async (inputs, options) => {
1972
2142
  if (inputs.length === 0) {
1973
2143
  program.help({ error: true });
1974
2144
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ariesfish/feedloom",
3
- "version": "0.1.3",
3
+ "version": "0.1.4",
4
4
  "type": "module",
5
5
  "author": "ariesfish",
6
6
  "license": "MIT",
@@ -17,6 +17,7 @@
17
17
  "feedloom": "dist/cli.js"
18
18
  },
19
19
  "files": [
20
+ "assets",
20
21
  "dist",
21
22
  "skills",
22
23
  "README.md",
@@ -22,6 +22,12 @@ npx -y @ariesfish/feedloom <inputs...> [options]
22
22
 
23
23
  ## Common usage
24
24
 
25
+ Before clipping with browser-based fetch modes, run `doctor` once to verify and repair the Patchright Chromium runtime. If Chromium is missing, `doctor` automatically runs `npx patchright install chromium`.
26
+
27
+ ```bash
28
+ npx -y @ariesfish/feedloom doctor
29
+ ```
30
+
25
31
  Before running Feedloom, check whether this skill directory has a `site-rules/` directory. If it exists, always pass it with `--site-rules-dir $HOME/.agents/skills/feedloom/site-rules`; do not omit available site rules.
26
32
 
27
33
  ```bash
@@ -61,7 +67,7 @@ Use the least expensive mode that works:
61
67
  - `--site-rules-dir <dir>`: load optional private TOML extraction/cleaning rules from a local directory, for example `$HOME/.agents/skills/feedloom/site-rules/` reference folder.
62
68
  - `--solve-cloudflare`, `--proxy <server>`, `--dns-over-https`: use only when stealth fetching needs them.
63
69
 
64
- Run `npx -y @ariesfish/feedloom --help` for the complete option list. Do not invent unsupported options.
70
+ Run `npx -y @ariesfish/feedloom doctor` when browser, stealth, or auto fallback fails because Chromium is missing or cannot launch. Run `npx -y @ariesfish/feedloom --help` for the complete option list. Do not invent unsupported options.
65
71
 
66
72
  ## Site rules
67
73