@apmantza/greedysearch-pi 1.8.3 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/reddit.mjs CHANGED
@@ -1,210 +1,210 @@
1
- // src/reddit.mjs - Reddit content fetching via public JSON API
2
- // Reddit exposes structured data by appending .json to any URL
3
-
4
- const REDDIT_HEADERS = {
5
- "user-agent": "GreedySearch/1.0 (Research Bot)",
6
- accept: "application/json",
7
- };
8
-
9
- /**
10
- * Parse a Reddit URL to check if it's a post, comment, or user profile
11
- * @param {string} url
12
- * @returns {{type: 'post'|'user'|'other', cleanUrl: string} | null}
13
- */
14
- export function parseRedditUrl(url) {
15
- try {
16
- const parsed = new URL(url);
17
- const hostname = parsed.hostname.toLowerCase();
18
-
19
- // Support reddit.com, old.reddit.com, www.reddit.com
20
- if (!hostname.endsWith("reddit.com")) {
21
- return null;
22
- }
23
-
24
- const pathname = parsed.pathname;
25
-
26
- // User profile: /u/username or /user/username
27
- if (pathname.match(/^\/(u|user)\/[^/]+\/?$/i)) {
28
- return { type: "user", cleanUrl: normalizeRedditUrl(url) };
29
- }
30
-
31
- // Post: /r/subreddit/comments/xxxx/...
32
- if (pathname.match(/^\/r\/[^/]+\/comments\/[^/]+/i)) {
33
- return { type: "post", cleanUrl: normalizeRedditUrl(url) };
34
- }
35
-
36
- return null;
37
- } catch {
38
- return null;
39
- }
40
- }
41
-
42
- /**
43
- * Normalize Reddit URL (remove query params, fragments)
44
- * @param {string} url
45
- * @returns {string}
46
- */
47
- function normalizeRedditUrl(url) {
48
- try {
49
- const parsed = new URL(url);
50
- // Reconstruct without query/fragment
51
- return `${parsed.protocol}//${parsed.hostname}${parsed.pathname}`;
52
- } catch {
53
- return url;
54
- }
55
- }
56
-
57
- /**
58
- * Fetch Reddit content via the .json API
59
- * @param {string} url - Reddit URL (will have .json appended)
60
- * @param {number} maxChars - Max characters for content
61
- * @returns {Promise<FetchResult>}
62
- */
63
- export async function fetchRedditContent(url, maxChars = 8000) {
64
- const start = Date.now();
65
-
66
- try {
67
- // Append .json to get API response
68
- const jsonUrl = url.replace(/\/?$/, ".json");
69
-
70
- const controller = new AbortController();
71
- const timeoutId = setTimeout(() => controller.abort(), 15000);
72
-
73
- const response = await fetch(jsonUrl, {
74
- headers: REDDIT_HEADERS,
75
- signal: controller.signal,
76
- });
77
-
78
- clearTimeout(timeoutId);
79
-
80
- if (!response.ok) {
81
- throw new Error(`Reddit API ${response.status}`);
82
- }
83
-
84
- const data = await response.json();
85
-
86
- // data[0] = post listing, data[1] = comments listing
87
- if (!Array.isArray(data) || data.length < 1) {
88
- throw new Error("Invalid Reddit API response structure");
89
- }
90
-
91
- const postListing = data[0];
92
- const commentsListing = data[1];
93
-
94
- // Extract post data
95
- const post = postListing?.data?.children?.[0]?.data;
96
- if (!post) {
97
- throw new Error("No post data in Reddit response");
98
- }
99
-
100
- // Format as markdown
101
- const markdown = formatRedditPost(post, commentsListing, maxChars);
102
-
103
- return {
104
- ok: true,
105
- url,
106
- finalUrl: url,
107
- status: 200,
108
- contentType: "text/markdown",
109
- lastModified: "",
110
- title: post.title || "Reddit Post",
111
- byline: `u/${post.author}`,
112
- siteName: `r/${post.subreddit}`,
113
- lang: "en",
114
- publishedTime: new Date(post.created_utc * 1000).toISOString(),
115
- excerpt: post.selftext?.slice(0, 300).replace(/\n/g, " ") || "",
116
- markdown,
117
- contentLength: markdown.length,
118
- needsBrowser: false,
119
- duration: Date.now() - start,
120
- };
121
- } catch (error) {
122
- return {
123
- ok: false,
124
- url,
125
- finalUrl: url,
126
- status: 0,
127
- error: `Reddit fetch failed: ${error.message}`,
128
- needsBrowser: false,
129
- duration: Date.now() - start,
130
- };
131
- }
132
- }
133
-
134
- /**
135
- * Format Reddit post and comments as clean markdown
136
- * @param {object} post - Reddit post data
137
- * @param {object|null} commentsListing - Comments listing data
138
- * @param {number} maxChars - Max characters
139
- * @returns {string}
140
- */
141
- function formatRedditPost(post, commentsListing, maxChars) {
142
- let md = "";
143
-
144
- // Post header
145
- md += `# ${post.title}\n\n`;
146
- md += `**Subreddit:** r/${post.subreddit} | **Author:** u/${post.author} | **Score:** ${post.score}\n\n`;
147
-
148
- // Post body (selftext) or link
149
- if (post.selftext) {
150
- md += post.selftext;
151
- md += "\n\n";
152
- } else if (post.url && !post.url.includes("reddit.com")) {
153
- // External link post
154
- md += `**Link:** ${post.url}\n\n`;
155
- }
156
-
157
- // Comments section
158
- if (commentsListing?.data?.children?.length > 0) {
159
- md += "---\n\n## Comments\n\n";
160
- const comments = commentsListing.data.children
161
- .filter((c) => c.kind === "t1") // t1 = comment
162
- .slice(0, 10); // Top 10 comments
163
-
164
- for (const comment of comments) {
165
- md += formatComment(comment.data, 0);
166
- md += "\n";
167
- }
168
- }
169
-
170
- // Trim to maxChars while keeping structure
171
- if (md.length > maxChars) {
172
- md = md.slice(0, maxChars).trim() + "\n\n... (truncated)";
173
- }
174
-
175
- return md;
176
- }
177
-
178
- /**
179
- * Format a single comment with nesting
180
- * @param {object} comment - Reddit comment data
181
- * @param {number} depth - Nesting depth
182
- * @returns {string}
183
- */
184
- function formatComment(comment, depth) {
185
- if (
186
- !comment ||
187
- comment.body === "[deleted]" ||
188
- comment.body === "[removed]"
189
- ) {
190
- return "";
191
- }
192
-
193
- const indent = "> ".repeat(depth);
194
- let md = "";
195
-
196
- md += `${indent}**u/${comment.author}** (${comment.score} pts)\n`;
197
- md += `${indent}${comment.body.replace(/\n/g, "\n" + indent)}\n`;
198
-
199
- // Handle nested replies (limit depth to 3)
200
- if (depth < 3 && comment.replies?.data?.children) {
201
- const replies = comment.replies.data.children.filter(
202
- (r) => r.kind === "t1",
203
- );
204
- for (const reply of replies.slice(0, 5)) {
205
- md += "\n" + formatComment(reply.data, depth + 1);
206
- }
207
- }
208
-
209
- return md;
210
- }
1
+ // src/reddit.mjs - Reddit content fetching via public JSON API
2
+ // Reddit exposes structured data by appending .json to any URL
3
+
4
+ const REDDIT_HEADERS = {
5
+ "user-agent": "GreedySearch/1.0 (Research Bot)",
6
+ accept: "application/json",
7
+ };
8
+
9
+ /**
10
+ * Parse a Reddit URL to check if it's a post, comment, or user profile
11
+ * @param {string} url
12
+ * @returns {{type: 'post'|'user'|'other', cleanUrl: string} | null}
13
+ */
14
+ export function parseRedditUrl(url) {
15
+ try {
16
+ const parsed = new URL(url);
17
+ const hostname = parsed.hostname.toLowerCase();
18
+
19
+ // Support reddit.com, old.reddit.com, www.reddit.com
20
+ if (!hostname.endsWith("reddit.com")) {
21
+ return null;
22
+ }
23
+
24
+ const pathname = parsed.pathname;
25
+
26
+ // User profile: /u/username or /user/username
27
+ if (pathname.match(/^\/(u|user)\/[^/]+\/?$/i)) {
28
+ return { type: "user", cleanUrl: normalizeRedditUrl(url) };
29
+ }
30
+
31
+ // Post: /r/subreddit/comments/xxxx/...
32
+ if (pathname.match(/^\/r\/[^/]+\/comments\/[^/]+/i)) {
33
+ return { type: "post", cleanUrl: normalizeRedditUrl(url) };
34
+ }
35
+
36
+ return null;
37
+ } catch {
38
+ return null;
39
+ }
40
+ }
41
+
42
+ /**
43
+ * Normalize Reddit URL (remove query params, fragments)
44
+ * @param {string} url
45
+ * @returns {string}
46
+ */
47
+ function normalizeRedditUrl(url) {
48
+ try {
49
+ const parsed = new URL(url);
50
+ // Reconstruct without query/fragment
51
+ return `${parsed.protocol}//${parsed.hostname}${parsed.pathname}`;
52
+ } catch {
53
+ return url;
54
+ }
55
+ }
56
+
57
+ /**
58
+ * Fetch Reddit content via the .json API
59
+ * @param {string} url - Reddit URL (will have .json appended)
60
+ * @param {number} maxChars - Max characters for content
61
+ * @returns {Promise<FetchResult>}
62
+ */
63
+ export async function fetchRedditContent(url, maxChars = 8000) {
64
+ const start = Date.now();
65
+
66
+ try {
67
+ // Append .json to get API response
68
+ const jsonUrl = url.replace(/\/?$/, ".json");
69
+
70
+ const controller = new AbortController();
71
+ const timeoutId = setTimeout(() => controller.abort(), 15000);
72
+
73
+ const response = await fetch(jsonUrl, {
74
+ headers: REDDIT_HEADERS,
75
+ signal: controller.signal,
76
+ });
77
+
78
+ clearTimeout(timeoutId);
79
+
80
+ if (!response.ok) {
81
+ throw new Error(`Reddit API ${response.status}`);
82
+ }
83
+
84
+ const data = await response.json();
85
+
86
+ // data[0] = post listing, data[1] = comments listing
87
+ if (!Array.isArray(data) || data.length < 1) {
88
+ throw new Error("Invalid Reddit API response structure");
89
+ }
90
+
91
+ const postListing = data[0];
92
+ const commentsListing = data[1];
93
+
94
+ // Extract post data
95
+ const post = postListing?.data?.children?.[0]?.data;
96
+ if (!post) {
97
+ throw new Error("No post data in Reddit response");
98
+ }
99
+
100
+ // Format as markdown
101
+ const markdown = formatRedditPost(post, commentsListing, maxChars);
102
+
103
+ return {
104
+ ok: true,
105
+ url,
106
+ finalUrl: url,
107
+ status: 200,
108
+ contentType: "text/markdown",
109
+ lastModified: "",
110
+ title: post.title || "Reddit Post",
111
+ byline: `u/${post.author}`,
112
+ siteName: `r/${post.subreddit}`,
113
+ lang: "en",
114
+ publishedTime: new Date(post.created_utc * 1000).toISOString(),
115
+ excerpt: post.selftext?.slice(0, 300).replace(/\n/g, " ") || "",
116
+ markdown,
117
+ contentLength: markdown.length,
118
+ needsBrowser: false,
119
+ duration: Date.now() - start,
120
+ };
121
+ } catch (error) {
122
+ return {
123
+ ok: false,
124
+ url,
125
+ finalUrl: url,
126
+ status: 0,
127
+ error: `Reddit fetch failed: ${error.message}`,
128
+ needsBrowser: false,
129
+ duration: Date.now() - start,
130
+ };
131
+ }
132
+ }
133
+
134
+ /**
135
+ * Format Reddit post and comments as clean markdown
136
+ * @param {object} post - Reddit post data
137
+ * @param {object|null} commentsListing - Comments listing data
138
+ * @param {number} maxChars - Max characters
139
+ * @returns {string}
140
+ */
141
+ function formatRedditPost(post, commentsListing, maxChars) {
142
+ let md = "";
143
+
144
+ // Post header
145
+ md += `# ${post.title}\n\n`;
146
+ md += `**Subreddit:** r/${post.subreddit} | **Author:** u/${post.author} | **Score:** ${post.score}\n\n`;
147
+
148
+ // Post body (selftext) or link
149
+ if (post.selftext) {
150
+ md += post.selftext;
151
+ md += "\n\n";
152
+ } else if (post.url && !post.url.includes("reddit.com")) {
153
+ // External link post
154
+ md += `**Link:** ${post.url}\n\n`;
155
+ }
156
+
157
+ // Comments section
158
+ if (commentsListing?.data?.children?.length > 0) {
159
+ md += "---\n\n## Comments\n\n";
160
+ const comments = commentsListing.data.children
161
+ .filter((c) => c.kind === "t1") // t1 = comment
162
+ .slice(0, 10); // Top 10 comments
163
+
164
+ for (const comment of comments) {
165
+ md += formatComment(comment.data, 0);
166
+ md += "\n";
167
+ }
168
+ }
169
+
170
+ // Trim to maxChars while keeping structure
171
+ if (md.length > maxChars) {
172
+ md = md.slice(0, maxChars).trim() + "\n\n... (truncated)";
173
+ }
174
+
175
+ return md;
176
+ }
177
+
178
+ /**
179
+ * Format a single comment with nesting
180
+ * @param {object} comment - Reddit comment data
181
+ * @param {number} depth - Nesting depth
182
+ * @returns {string}
183
+ */
184
+ function formatComment(comment, depth) {
185
+ if (
186
+ !comment ||
187
+ comment.body === "[deleted]" ||
188
+ comment.body === "[removed]"
189
+ ) {
190
+ return "";
191
+ }
192
+
193
+ const indent = "> ".repeat(depth);
194
+ let md = "";
195
+
196
+ md += `${indent}**u/${comment.author}** (${comment.score} pts)\n`;
197
+ md += `${indent}${comment.body.replace(/\n/g, "\n" + indent)}\n`;
198
+
199
+ // Handle nested replies (limit depth to 3)
200
+ if (depth < 3 && comment.replies?.data?.children) {
201
+ const replies = comment.replies.data.children.filter(
202
+ (r) => r.kind === "t1",
203
+ );
204
+ for (const reply of replies.slice(0, 5)) {
205
+ md += "\n" + formatComment(reply.data, depth + 1);
206
+ }
207
+ }
208
+
209
+ return md;
210
+ }