@marcfargas/skills 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/LICENSE-CC0 +118 -0
- package/README.md +43 -13
- package/package.json +11 -3
- package/process/pm2/SKILL.md +240 -0
- package/search/web-search/SKILL.md +107 -0
- package/search/web-search/content.js +86 -0
- package/search/web-search/package-lock.json +617 -0
- package/search/web-search/package.json +13 -0
- package/search/web-search/search.js +215 -0
- package/terminal/vhs/SKILL.md +89 -1
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { Readability } from "@mozilla/readability";
|
|
4
|
+
import { JSDOM } from "jsdom";
|
|
5
|
+
import TurndownService from "turndown";
|
|
6
|
+
import { gfm } from "turndown-plugin-gfm";
|
|
7
|
+
|
|
8
|
+
const url = process.argv[2];
|
|
9
|
+
|
|
10
|
+
if (!url) {
|
|
11
|
+
console.log("Usage: content.js <url>");
|
|
12
|
+
console.log("\nExtracts readable content from a webpage as markdown.");
|
|
13
|
+
console.log("\nExamples:");
|
|
14
|
+
console.log(" content.js https://example.com/article");
|
|
15
|
+
console.log(" content.js https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html");
|
|
16
|
+
process.exit(1);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function htmlToMarkdown(html) {
|
|
20
|
+
const turndown = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
|
|
21
|
+
turndown.use(gfm);
|
|
22
|
+
turndown.addRule("removeEmptyLinks", {
|
|
23
|
+
filter: (node) => node.nodeName === "A" && !node.textContent?.trim(),
|
|
24
|
+
replacement: () => "",
|
|
25
|
+
});
|
|
26
|
+
return turndown
|
|
27
|
+
.turndown(html)
|
|
28
|
+
.replace(/\[\\?\[\s*\\?\]\]\([^)]*\)/g, "")
|
|
29
|
+
.replace(/ +/g, " ")
|
|
30
|
+
.replace(/\s+,/g, ",")
|
|
31
|
+
.replace(/\s+\./g, ".")
|
|
32
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
33
|
+
.trim();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
try {
|
|
37
|
+
const response = await fetch(url, {
|
|
38
|
+
headers: {
|
|
39
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
40
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
41
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
42
|
+
},
|
|
43
|
+
signal: AbortSignal.timeout(15000),
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
if (!response.ok) {
|
|
47
|
+
console.error(`HTTP ${response.status}: ${response.statusText}`);
|
|
48
|
+
process.exit(1);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const html = await response.text();
|
|
52
|
+
const dom = new JSDOM(html, { url });
|
|
53
|
+
const reader = new Readability(dom.window.document);
|
|
54
|
+
const article = reader.parse();
|
|
55
|
+
|
|
56
|
+
if (article && article.content) {
|
|
57
|
+
if (article.title) {
|
|
58
|
+
console.log(`# ${article.title}\n`);
|
|
59
|
+
}
|
|
60
|
+
console.log(htmlToMarkdown(article.content));
|
|
61
|
+
process.exit(0);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Fallback: try to extract main content
|
|
65
|
+
const fallbackDoc = new JSDOM(html, { url });
|
|
66
|
+
const body = fallbackDoc.window.document;
|
|
67
|
+
body.querySelectorAll("script, style, noscript, nav, header, footer, aside").forEach((el) => el.remove());
|
|
68
|
+
|
|
69
|
+
const title = body.querySelector("title")?.textContent?.trim();
|
|
70
|
+
const main = body.querySelector("main, article, [role='main'], .content, #content") || body.body;
|
|
71
|
+
|
|
72
|
+
if (title) {
|
|
73
|
+
console.log(`# ${title}\n`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const text = main?.innerHTML || "";
|
|
77
|
+
if (text.trim().length > 100) {
|
|
78
|
+
console.log(htmlToMarkdown(text));
|
|
79
|
+
} else {
|
|
80
|
+
console.error("Could not extract readable content from this page.");
|
|
81
|
+
process.exit(1);
|
|
82
|
+
}
|
|
83
|
+
} catch (e) {
|
|
84
|
+
console.error(`Error: ${e.message}`);
|
|
85
|
+
process.exit(1);
|
|
86
|
+
}
|