@ariesfish/feedloom 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -0
- package/assets/logo.png +0 -0
- package/dist/cli.js +175 -5
- package/package.json +2 -1
- package/skills/feedloom/SKILL.md +7 -1
package/README.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# Feedloom
|
|
2
2
|
|
|
3
|
+
<div align="center">
|
|
4
|
+
<img src="assets/logo.png" alt="Feedloom logo" width="160">
|
|
5
|
+
<p><strong>Archive long-form web content as clean Markdown with local assets.</strong></p>
|
|
6
|
+
<p>
|
|
7
|
+
<a href="https://www.npmjs.com/package/@ariesfish/feedloom"><img alt="npm" src="https://img.shields.io/npm/v/@ariesfish/feedloom"></a>
|
|
8
|
+
<img alt="Node.js >= 24" src="https://img.shields.io/badge/node-%3E%3D24-339933">
|
|
9
|
+
<img alt="License MIT" src="https://img.shields.io/badge/license-MIT-blue">
|
|
10
|
+
</p>
|
|
11
|
+
</div>
|
|
12
|
+
|
|
3
13
|
Feedloom is a command-line tool for archiving long-form web content. It takes article URLs, URL list files, or RSS/Atom feeds, extracts readable article content, converts it to Markdown with YAML frontmatter, and saves page images as local assets. It is designed for personal knowledge bases, notebook vaults, and offline reading archives.
|
|
4
14
|
|
|
5
15
|
## Features
|
|
@@ -43,6 +53,14 @@ If you plan to use `browser`, `stealth`, or the browser fallback in `auto` mode,
|
|
|
43
53
|
npx patchright install chromium
|
|
44
54
|
```
|
|
45
55
|
|
|
56
|
+
You can verify or repair the runtime later with:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
npm run dev -- doctor
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
If the Patchright Chromium executable is missing, `doctor` runs `npx patchright install chromium` automatically.
|
|
63
|
+
|
|
46
64
|
### 4. Build the CLI
|
|
47
65
|
|
|
48
66
|
```bash
|
|
@@ -261,6 +279,12 @@ Only use this on your own device and accounts. Always respect the target site's
|
|
|
261
279
|
--site-rules-dir <dir> Optional directory of private TOML site rules
|
|
262
280
|
```
|
|
263
281
|
|
|
282
|
+
Run environment checks:
|
|
283
|
+
|
|
284
|
+
```bash
|
|
285
|
+
npm run dev -- doctor
|
|
286
|
+
```
|
|
287
|
+
|
|
264
288
|
For the full option list, run:
|
|
265
289
|
|
|
266
290
|
```bash
|
package/assets/logo.png
ADDED
|
Binary file
|
package/dist/cli.js
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
// src/cli.ts
|
|
4
4
|
import { readdir as readdir2 } from "fs/promises";
|
|
5
|
+
import { createRequire } from "module";
|
|
5
6
|
import { dirname, join as join7, resolve as resolve2 } from "path";
|
|
6
7
|
import { fileURLToPath } from "url";
|
|
7
8
|
import { Command } from "commander";
|
|
@@ -112,11 +113,172 @@ function firstContentSelector(profiles) {
|
|
|
112
113
|
return void 0;
|
|
113
114
|
}
|
|
114
115
|
|
|
116
|
+
// src/doctor.ts
|
|
117
|
+
import { spawn } from "child_process";
|
|
118
|
+
import { access } from "fs/promises";
|
|
119
|
+
import { chromium } from "patchright";
|
|
120
|
+
var INSTALL_CHROMIUM_COMMAND = "npx patchright install chromium";
|
|
121
|
+
var INSTALL_CHROMIUM_HINT = `Run: ${INSTALL_CHROMIUM_COMMAND}`;
|
|
122
|
+
function errorMessage(error) {
|
|
123
|
+
return error instanceof Error ? error.message : String(error);
|
|
124
|
+
}
|
|
125
|
+
function chromiumExecutablePath() {
|
|
126
|
+
const browserType = chromium;
|
|
127
|
+
return browserType.executablePath();
|
|
128
|
+
}
|
|
129
|
+
function appendCheck(checks, check) {
|
|
130
|
+
checks.push(check);
|
|
131
|
+
}
|
|
132
|
+
async function installPatchrightChromium(events = {}) {
|
|
133
|
+
return new Promise((resolve3, reject) => {
|
|
134
|
+
const child = spawn("npx", ["patchright", "install", "chromium"], {
|
|
135
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
136
|
+
});
|
|
137
|
+
child.stdout.setEncoding("utf8");
|
|
138
|
+
child.stderr.setEncoding("utf8");
|
|
139
|
+
child.stdout.on("data", (chunk) => events.onStdout?.(chunk));
|
|
140
|
+
child.stderr.on("data", (chunk) => events.onStderr?.(chunk));
|
|
141
|
+
child.on("error", reject);
|
|
142
|
+
child.on("close", (code, signal) => resolve3({ code, signal }));
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
async function executableExists(path) {
|
|
146
|
+
try {
|
|
147
|
+
await access(path);
|
|
148
|
+
return { ok: true };
|
|
149
|
+
} catch (error) {
|
|
150
|
+
return { ok: false, error };
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
async function ensureChromiumInstalled(checks, executablePath, options) {
|
|
154
|
+
const firstCheck = await executableExists(executablePath);
|
|
155
|
+
if (firstCheck.ok) {
|
|
156
|
+
appendCheck(checks, {
|
|
157
|
+
name: "Patchright Chromium installation",
|
|
158
|
+
ok: true,
|
|
159
|
+
message: "Chromium executable exists.",
|
|
160
|
+
detail: executablePath
|
|
161
|
+
});
|
|
162
|
+
return true;
|
|
163
|
+
}
|
|
164
|
+
appendCheck(checks, {
|
|
165
|
+
name: "Patchright Chromium installation",
|
|
166
|
+
ok: false,
|
|
167
|
+
message: "Chromium executable was not found on disk. Installing Patchright Chromium...",
|
|
168
|
+
detail: `${executablePath}
|
|
169
|
+
${errorMessage(firstCheck.error)}`
|
|
170
|
+
});
|
|
171
|
+
let output = "";
|
|
172
|
+
const appendOutput = (chunk) => {
|
|
173
|
+
output += chunk;
|
|
174
|
+
options.stderr.write(chunk);
|
|
175
|
+
};
|
|
176
|
+
try {
|
|
177
|
+
const result = await options.installChromium({ onStdout: appendOutput, onStderr: appendOutput });
|
|
178
|
+
if (result.code !== 0) {
|
|
179
|
+
appendCheck(checks, {
|
|
180
|
+
name: "Patchright Chromium auto-install",
|
|
181
|
+
ok: false,
|
|
182
|
+
message: `Installation command failed with exit code ${result.code ?? "null"}${result.signal ? ` and signal ${result.signal}` : ""}.`,
|
|
183
|
+
detail: output.trim() || void 0,
|
|
184
|
+
hint: INSTALL_CHROMIUM_HINT
|
|
185
|
+
});
|
|
186
|
+
return false;
|
|
187
|
+
}
|
|
188
|
+
} catch (error) {
|
|
189
|
+
appendCheck(checks, {
|
|
190
|
+
name: "Patchright Chromium auto-install",
|
|
191
|
+
ok: false,
|
|
192
|
+
message: "Installation command failed to start or crashed.",
|
|
193
|
+
detail: errorMessage(error),
|
|
194
|
+
hint: INSTALL_CHROMIUM_HINT
|
|
195
|
+
});
|
|
196
|
+
return false;
|
|
197
|
+
}
|
|
198
|
+
const secondCheck = await executableExists(executablePath);
|
|
199
|
+
appendCheck(checks, secondCheck.ok ? {
|
|
200
|
+
name: "Patchright Chromium auto-install",
|
|
201
|
+
ok: true,
|
|
202
|
+
message: "Chromium installed successfully.",
|
|
203
|
+
detail: executablePath
|
|
204
|
+
} : {
|
|
205
|
+
name: "Patchright Chromium auto-install",
|
|
206
|
+
ok: false,
|
|
207
|
+
message: "Installation finished, but Chromium executable is still missing.",
|
|
208
|
+
detail: `${executablePath}
|
|
209
|
+
${errorMessage(secondCheck.error)}`,
|
|
210
|
+
hint: INSTALL_CHROMIUM_HINT
|
|
211
|
+
});
|
|
212
|
+
return secondCheck.ok;
|
|
213
|
+
}
|
|
214
|
+
async function runDoctor(options = {}) {
|
|
215
|
+
const checks = [];
|
|
216
|
+
const resolvedOptions = {
|
|
217
|
+
installChromium: options.installChromium ?? installPatchrightChromium,
|
|
218
|
+
stderr: options.stderr ?? process.stderr
|
|
219
|
+
};
|
|
220
|
+
let executablePath = "";
|
|
221
|
+
try {
|
|
222
|
+
executablePath = chromiumExecutablePath();
|
|
223
|
+
checks.push({
|
|
224
|
+
name: "Patchright Chromium executable path",
|
|
225
|
+
ok: true,
|
|
226
|
+
message: executablePath
|
|
227
|
+
});
|
|
228
|
+
} catch (error) {
|
|
229
|
+
checks.push({
|
|
230
|
+
name: "Patchright Chromium executable path",
|
|
231
|
+
ok: false,
|
|
232
|
+
message: "Patchright does not report a Chromium executable for this platform.",
|
|
233
|
+
detail: errorMessage(error),
|
|
234
|
+
hint: INSTALL_CHROMIUM_HINT
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
const installed = executablePath ? await ensureChromiumInstalled(checks, executablePath, resolvedOptions) : false;
|
|
238
|
+
if (installed) {
|
|
239
|
+
try {
|
|
240
|
+
const browser = await chromium.launch({ headless: true });
|
|
241
|
+
await browser.close();
|
|
242
|
+
checks.push({
|
|
243
|
+
name: "Patchright Chromium launch",
|
|
244
|
+
ok: true,
|
|
245
|
+
message: "Chromium launched successfully in headless mode."
|
|
246
|
+
});
|
|
247
|
+
} catch (error) {
|
|
248
|
+
checks.push({
|
|
249
|
+
name: "Patchright Chromium launch",
|
|
250
|
+
ok: false,
|
|
251
|
+
message: "Chromium executable exists but failed to launch.",
|
|
252
|
+
detail: errorMessage(error),
|
|
253
|
+
hint: INSTALL_CHROMIUM_HINT
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
return {
|
|
258
|
+
ok: checks.at(-1)?.ok === true,
|
|
259
|
+
checks
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
function formatDoctorResult(result) {
|
|
263
|
+
const lines = ["Feedloom doctor"];
|
|
264
|
+
for (const check of result.checks) {
|
|
265
|
+
lines.push(`${check.ok ? "\u2713" : "\u2717"} ${check.name}: ${check.message}`);
|
|
266
|
+
if (check.detail) {
|
|
267
|
+
lines.push(...check.detail.split("\n").map((line) => ` ${line}`));
|
|
268
|
+
}
|
|
269
|
+
if (check.hint) {
|
|
270
|
+
lines.push(` ${check.hint}`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
lines.push(result.ok ? "OK" : "FAILED");
|
|
274
|
+
return lines.join("\n");
|
|
275
|
+
}
|
|
276
|
+
|
|
115
277
|
// src/fetch/browser.ts
|
|
116
278
|
import { mkdtemp, rm } from "fs/promises";
|
|
117
279
|
import { tmpdir } from "os";
|
|
118
280
|
import { join } from "path";
|
|
119
|
-
import { chromium } from "patchright";
|
|
281
|
+
import { chromium as chromium2 } from "patchright";
|
|
120
282
|
var SCRAPLING_DEFAULT_ARGS = [
|
|
121
283
|
"--no-pings",
|
|
122
284
|
"--no-first-run",
|
|
@@ -165,7 +327,7 @@ async function launchBrowserContext(options) {
|
|
|
165
327
|
if (options.dnsOverHttps) {
|
|
166
328
|
extraArgs.push("--dns-over-https-templates=https://cloudflare-dns.com/dns-query");
|
|
167
329
|
}
|
|
168
|
-
const context = await
|
|
330
|
+
const context = await chromium2.launchPersistentContext(userDataDir, {
|
|
169
331
|
channel: options.channel,
|
|
170
332
|
headless: options.headless ?? true,
|
|
171
333
|
args: extraArgs,
|
|
@@ -242,7 +404,7 @@ async function fetchBrowserHtml(url, options = {}) {
|
|
|
242
404
|
import { mkdtemp as mkdtemp2, rm as rm2 } from "fs/promises";
|
|
243
405
|
import { tmpdir as tmpdir2 } from "os";
|
|
244
406
|
import { join as join2 } from "path";
|
|
245
|
-
import { chromium as
|
|
407
|
+
import { chromium as chromium3 } from "patchright";
|
|
246
408
|
var DEFAULT_ARGS = [
|
|
247
409
|
"--no-pings",
|
|
248
410
|
"--no-first-run",
|
|
@@ -385,7 +547,7 @@ async function solveCloudflare(page) {
|
|
|
385
547
|
async function launchStealthContext(options) {
|
|
386
548
|
const userDataDir = options.userDataDir ?? await mkdtemp2(join2(tmpdir2(), "feedloom-stealth-"));
|
|
387
549
|
const ownsUserDataDir = options.userDataDir === void 0;
|
|
388
|
-
const context = await
|
|
550
|
+
const context = await chromium3.launchPersistentContext(userDataDir, {
|
|
389
551
|
channel: "chromium",
|
|
390
552
|
headless: options.headless ?? true,
|
|
391
553
|
args: stealthArgs(options),
|
|
@@ -1953,6 +2115,8 @@ var ProgressTracker = class {
|
|
|
1953
2115
|
};
|
|
1954
2116
|
|
|
1955
2117
|
// src/cli.ts
|
|
2118
|
+
var require2 = createRequire(import.meta.url);
|
|
2119
|
+
var packageJson = require2("../package.json");
|
|
1956
2120
|
var program = new Command();
|
|
1957
2121
|
async function siteRulePathsFromDir(dir) {
|
|
1958
2122
|
const names = await readdir2(dir);
|
|
@@ -1968,7 +2132,13 @@ function positiveIntOption(value, fallback) {
|
|
|
1968
2132
|
}
|
|
1969
2133
|
return parsed;
|
|
1970
2134
|
}
|
|
1971
|
-
program.name("feedloom").description("Archive long-form web content as clean Markdown with local assets").version(
|
|
2135
|
+
program.name("feedloom").description("Archive long-form web content as clean Markdown with local assets").version(packageJson.version ?? "0.0.0");
|
|
2136
|
+
program.command("doctor").description("Check Feedloom runtime dependencies").action(async () => {
|
|
2137
|
+
const result = await runDoctor();
|
|
2138
|
+
console.error(formatDoctorResult(result));
|
|
2139
|
+
process.exitCode = result.ok ? 0 : 1;
|
|
2140
|
+
});
|
|
2141
|
+
program.option("--output-dir <dir>", "Output directory for markdown notes", "clippings").option("--source-kind <kind>", "auto, html-page, or rss-feed", "auto").option("--since <date>", "Only keep feed entries on or after YYYY-MM-DD", "").option("--limit <n>", "Process only first N deduplicated URLs", "0").option("--start <n>", "Start from 1-based index after deduplication", "1").option("--end <n>", "End at 1-based index after deduplication", "0").option("--prefer-browser-state", "Try copied local Chrome profile before regular browser fallback", false).option("--chrome-user-data-dir <path>", "Chrome user data directory used with --prefer-browser-state", "").option("--chrome-profile <name>", "Chrome profile directory name", "Default").option("--fetch-mode <mode>", "auto, static, browser, or stealth", "auto").option("--no-network-idle", "Do not wait for browser networkidle before reading HTML").option("--wait-ms <ms>", "Extra browser wait after load", "2500").option("--solve-cloudflare", "In stealth mode, attempt Cloudflare Turnstile/interstitial challenge handling", false).option("--disable-resources", "In stealth mode, block images/media/fonts/stylesheets for speed", false).option("--proxy <server>", "Proxy server for browser/stealth fetch, e.g. http://127.0.0.1:8080", "").option("--dns-over-https", "Use Chromium Cloudflare DNS-over-HTTPS flag for browser/stealth fetch", false).option("--wait-selector <selector>", "Wait for a CSS selector after page load", "").option("--wait-selector-state <state>", "attached, detached, visible, or hidden", "attached").option("--click-selector <selector...>", "Click one or more selectors after page load", []).option("--scroll-to-bottom", "Scroll to the bottom before reading HTML", false).option("--headful", "Run browser/browser-state fetches with a visible Chrome window", false).option("--site-rules-dir <dir>", "Optional directory of private TOML site extraction/cleaning rules", "").option("--no-real-chrome-defaults", "Disable Scrapling-inspired real Chrome context defaults").option("--no-reuse-browser", "Disable batch browser/stealth context reuse").argument("[inputs...]", "URLs or files containing URLs").action(async (inputs, options) => {
|
|
1972
2142
|
if (inputs.length === 0) {
|
|
1973
2143
|
program.help({ error: true });
|
|
1974
2144
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ariesfish/feedloom",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"author": "ariesfish",
|
|
6
6
|
"license": "MIT",
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
"feedloom": "dist/cli.js"
|
|
18
18
|
},
|
|
19
19
|
"files": [
|
|
20
|
+
"assets",
|
|
20
21
|
"dist",
|
|
21
22
|
"skills",
|
|
22
23
|
"README.md",
|
package/skills/feedloom/SKILL.md
CHANGED
|
@@ -22,6 +22,12 @@ npx -y @ariesfish/feedloom <inputs...> [options]
|
|
|
22
22
|
|
|
23
23
|
## Common usage
|
|
24
24
|
|
|
25
|
+
Before clipping with browser-based fetch modes, run `doctor` once to verify and repair the Patchright Chromium runtime. If Chromium is missing, `doctor` automatically runs `npx patchright install chromium`.
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
npx -y @ariesfish/feedloom doctor
|
|
29
|
+
```
|
|
30
|
+
|
|
25
31
|
Before running Feedloom, check whether this skill directory has a `site-rules/` directory. If it exists, always pass it with `--site-rules-dir $HOME/.agents/skills/feedloom/site-rules`; do not omit available site rules.
|
|
26
32
|
|
|
27
33
|
```bash
|
|
@@ -61,7 +67,7 @@ Use the least expensive mode that works:
|
|
|
61
67
|
- `--site-rules-dir <dir>`: load optional private TOML extraction/cleaning rules from a local directory, for example `$HOME/.agents/skills/feedloom/site-rules/` reference folder.
|
|
62
68
|
- `--solve-cloudflare`, `--proxy <server>`, `--dns-over-https`: use only when stealth fetching needs them.
|
|
63
69
|
|
|
64
|
-
Run `npx -y @ariesfish/feedloom --help` for the complete option list. Do not invent unsupported options.
|
|
70
|
+
Run `npx -y @ariesfish/feedloom doctor` when browser, stealth, or auto fallback fails because Chromium is missing or cannot launch. Run `npx -y @ariesfish/feedloom --help` for the complete option list. Do not invent unsupported options.
|
|
65
71
|
|
|
66
72
|
## Site rules
|
|
67
73
|
|