@docyrus/docyrus 0.0.37 → 0.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/main.js +45148 -60640
- package/main.js.map +4 -4
- package/package.json +6 -8
- package/resources/chrome-tools/browser-hn-scraper.js +26 -35
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@docyrus/docyrus",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.38",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Docyrus API CLI",
|
|
6
6
|
"main": "./main.js",
|
|
@@ -10,24 +10,22 @@
|
|
|
10
10
|
"dependencies": {
|
|
11
11
|
"@clack/prompts": "^0.11.0",
|
|
12
12
|
"@hono/node-server": "^1.14.1",
|
|
13
|
-
"@libsql/client": "^0.17.2",
|
|
14
13
|
"@mariozechner/pi-ai": "0.64.0",
|
|
15
14
|
"@mariozechner/pi-coding-agent": "0.64.0",
|
|
16
15
|
"@modelcontextprotocol/ext-apps": "^1.2.2",
|
|
17
16
|
"@modelcontextprotocol/sdk": "^1.25.1",
|
|
18
17
|
"@mozilla/readability": "^0.6.0",
|
|
19
|
-
"@opentui/core": "
|
|
20
|
-
"@opentui/react": "
|
|
18
|
+
"@opentui/core": "0.1.96",
|
|
19
|
+
"@opentui/react": "0.1.96",
|
|
21
20
|
"@repomix/tree-sitter-wasms": "^0.1.16",
|
|
22
21
|
"@sinclair/typebox": "^0.34.48",
|
|
23
22
|
"@xterm/headless": "^5.5.0",
|
|
24
|
-
"cheerio": "^1.1.2",
|
|
25
23
|
"diff": "^8.0.2",
|
|
26
24
|
"hono": "^4.7.10",
|
|
27
25
|
"ignore-walk": "^8.0.0",
|
|
28
26
|
"incur": "^0.1.6",
|
|
29
|
-
"jsdom": "^
|
|
30
|
-
"libsql": "^0.5.
|
|
27
|
+
"jsdom": "^29.0.1",
|
|
28
|
+
"libsql": "^0.5.29",
|
|
31
29
|
"marked": "^15.0.12",
|
|
32
30
|
"marked-terminal": "^7.3.0",
|
|
33
31
|
"node-pty": "^1.0.0",
|
|
@@ -42,7 +40,7 @@
|
|
|
42
40
|
"undici": "^7.16.0",
|
|
43
41
|
"unified": "^11.0.5",
|
|
44
42
|
"unist-util-visit": "^5.1.0",
|
|
45
|
-
"web-tree-sitter": "
|
|
43
|
+
"web-tree-sitter": "0.25.10",
|
|
46
44
|
"zod": "^4.3.6"
|
|
47
45
|
},
|
|
48
46
|
"devDependencies": {
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* Usage: node browser-hn-scraper.js [--limit <number>]
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
-
import
|
|
10
|
+
import { JSDOM } from "jsdom";
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
13
|
* Scrapes Hacker News front page
|
|
@@ -24,57 +24,48 @@ async function scrapeHackerNews(limit = 30) {
|
|
|
24
24
|
}
|
|
25
25
|
|
|
26
26
|
const html = await response.text();
|
|
27
|
-
const
|
|
27
|
+
const dom = new JSDOM(html, { url });
|
|
28
|
+
const document = dom.window.document;
|
|
28
29
|
const submissions = [];
|
|
29
30
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
const $element = $(element);
|
|
35
|
-
const id = $element.attr('id');
|
|
36
|
-
|
|
37
|
-
// Get title and URL from titleline
|
|
38
|
-
const $titleLine = $element.find('.titleline > a').first();
|
|
39
|
-
const title = $titleLine.text().trim();
|
|
40
|
-
const url = $titleLine.attr('href');
|
|
41
|
-
|
|
42
|
-
// Get the next row which contains metadata (points, author, comments)
|
|
43
|
-
const $metadataRow = $element.next();
|
|
44
|
-
const $subtext = $metadataRow.find('.subtext');
|
|
45
|
-
|
|
46
|
-
// Get points
|
|
47
|
-
const $score = $subtext.find(`#score_${id}`);
|
|
48
|
-
const pointsText = $score.text();
|
|
49
|
-
const points = pointsText ? parseInt(pointsText.match(/\d+/)?.[0] || '0') : 0;
|
|
50
|
-
|
|
51
|
-
// Get author
|
|
52
|
-
const author = $subtext.find('.hnuser').text().trim();
|
|
53
|
-
|
|
54
|
-
// Get time
|
|
55
|
-
const time = $subtext.find('.age').attr('title') || $subtext.find('.age').text().trim();
|
|
31
|
+
for (const element of document.querySelectorAll(".athing")) {
|
|
32
|
+
if (submissions.length >= limit) {
|
|
33
|
+
break;
|
|
34
|
+
}
|
|
56
35
|
|
|
57
|
-
|
|
58
|
-
const
|
|
59
|
-
const
|
|
36
|
+
const id = element.getAttribute("id");
|
|
37
|
+
const titleLink = element.querySelector(".titleline > a");
|
|
38
|
+
const title = titleLink?.textContent?.trim() || "";
|
|
39
|
+
const submissionUrl = titleLink?.getAttribute("href") || "";
|
|
40
|
+
|
|
41
|
+
const metadataRow = element.nextElementSibling;
|
|
42
|
+
const subtext = metadataRow?.querySelector(".subtext");
|
|
43
|
+
const score = id ? subtext?.querySelector(`#score_${id}`) : null;
|
|
44
|
+
const pointsText = score?.textContent || "";
|
|
45
|
+
const points = pointsText ? parseInt(pointsText.match(/\d+/)?.[0] || "0", 10) : 0;
|
|
46
|
+
const author = subtext?.querySelector(".hnuser")?.textContent?.trim() || "";
|
|
47
|
+
const age = subtext?.querySelector(".age");
|
|
48
|
+
const time = age?.getAttribute("title") || age?.textContent?.trim() || "";
|
|
49
|
+
const commentLinks = subtext ? Array.from(subtext.querySelectorAll("a")) : [];
|
|
50
|
+
const commentsText = commentLinks.at(-1)?.textContent || "";
|
|
60
51
|
let commentsCount = 0;
|
|
61
52
|
|
|
62
|
-
if (commentsText.includes(
|
|
53
|
+
if (commentsText.includes("comment")) {
|
|
63
54
|
const match = commentsText.match(/(\d+)/);
|
|
64
|
-
commentsCount = match ? parseInt(match[0]) : 0;
|
|
55
|
+
commentsCount = match ? parseInt(match[0], 10) : 0;
|
|
65
56
|
}
|
|
66
57
|
|
|
67
58
|
submissions.push({
|
|
68
59
|
id,
|
|
69
60
|
title,
|
|
70
|
-
url,
|
|
61
|
+
url: submissionUrl,
|
|
71
62
|
points,
|
|
72
63
|
author,
|
|
73
64
|
time,
|
|
74
65
|
comments: commentsCount,
|
|
75
66
|
hnUrl: `https://news.ycombinator.com/item?id=${id}`
|
|
76
67
|
});
|
|
77
|
-
}
|
|
68
|
+
}
|
|
78
69
|
|
|
79
70
|
return submissions;
|
|
80
71
|
} catch (error) {
|