@apitap/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/LICENSE +60 -0
  2. package/README.md +362 -0
  3. package/SKILL.md +270 -0
  4. package/dist/auth/crypto.d.ts +31 -0
  5. package/dist/auth/crypto.js +66 -0
  6. package/dist/auth/crypto.js.map +1 -0
  7. package/dist/auth/handoff.d.ts +29 -0
  8. package/dist/auth/handoff.js +180 -0
  9. package/dist/auth/handoff.js.map +1 -0
  10. package/dist/auth/manager.d.ts +46 -0
  11. package/dist/auth/manager.js +127 -0
  12. package/dist/auth/manager.js.map +1 -0
  13. package/dist/auth/oauth-refresh.d.ts +16 -0
  14. package/dist/auth/oauth-refresh.js +91 -0
  15. package/dist/auth/oauth-refresh.js.map +1 -0
  16. package/dist/auth/refresh.d.ts +43 -0
  17. package/dist/auth/refresh.js +217 -0
  18. package/dist/auth/refresh.js.map +1 -0
  19. package/dist/capture/anti-bot.d.ts +15 -0
  20. package/dist/capture/anti-bot.js +43 -0
  21. package/dist/capture/anti-bot.js.map +1 -0
  22. package/dist/capture/blocklist.d.ts +6 -0
  23. package/dist/capture/blocklist.js +70 -0
  24. package/dist/capture/blocklist.js.map +1 -0
  25. package/dist/capture/body-diff.d.ts +8 -0
  26. package/dist/capture/body-diff.js +102 -0
  27. package/dist/capture/body-diff.js.map +1 -0
  28. package/dist/capture/body-variables.d.ts +13 -0
  29. package/dist/capture/body-variables.js +142 -0
  30. package/dist/capture/body-variables.js.map +1 -0
  31. package/dist/capture/domain.d.ts +8 -0
  32. package/dist/capture/domain.js +34 -0
  33. package/dist/capture/domain.js.map +1 -0
  34. package/dist/capture/entropy.d.ts +33 -0
  35. package/dist/capture/entropy.js +100 -0
  36. package/dist/capture/entropy.js.map +1 -0
  37. package/dist/capture/filter.d.ts +11 -0
  38. package/dist/capture/filter.js +49 -0
  39. package/dist/capture/filter.js.map +1 -0
  40. package/dist/capture/graphql.d.ts +21 -0
  41. package/dist/capture/graphql.js +99 -0
  42. package/dist/capture/graphql.js.map +1 -0
  43. package/dist/capture/idle.d.ts +23 -0
  44. package/dist/capture/idle.js +44 -0
  45. package/dist/capture/idle.js.map +1 -0
  46. package/dist/capture/monitor.d.ts +26 -0
  47. package/dist/capture/monitor.js +183 -0
  48. package/dist/capture/monitor.js.map +1 -0
  49. package/dist/capture/oauth-detector.d.ts +18 -0
  50. package/dist/capture/oauth-detector.js +96 -0
  51. package/dist/capture/oauth-detector.js.map +1 -0
  52. package/dist/capture/pagination.d.ts +9 -0
  53. package/dist/capture/pagination.js +40 -0
  54. package/dist/capture/pagination.js.map +1 -0
  55. package/dist/capture/parameterize.d.ts +17 -0
  56. package/dist/capture/parameterize.js +63 -0
  57. package/dist/capture/parameterize.js.map +1 -0
  58. package/dist/capture/scrubber.d.ts +5 -0
  59. package/dist/capture/scrubber.js +38 -0
  60. package/dist/capture/scrubber.js.map +1 -0
  61. package/dist/capture/session.d.ts +46 -0
  62. package/dist/capture/session.js +445 -0
  63. package/dist/capture/session.js.map +1 -0
  64. package/dist/capture/token-detector.d.ts +16 -0
  65. package/dist/capture/token-detector.js +62 -0
  66. package/dist/capture/token-detector.js.map +1 -0
  67. package/dist/capture/verifier.d.ts +17 -0
  68. package/dist/capture/verifier.js +147 -0
  69. package/dist/capture/verifier.js.map +1 -0
  70. package/dist/cli.d.ts +2 -0
  71. package/dist/cli.js +930 -0
  72. package/dist/cli.js.map +1 -0
  73. package/dist/discovery/auth.d.ts +17 -0
  74. package/dist/discovery/auth.js +81 -0
  75. package/dist/discovery/auth.js.map +1 -0
  76. package/dist/discovery/fetch.d.ts +17 -0
  77. package/dist/discovery/fetch.js +59 -0
  78. package/dist/discovery/fetch.js.map +1 -0
  79. package/dist/discovery/frameworks.d.ts +11 -0
  80. package/dist/discovery/frameworks.js +249 -0
  81. package/dist/discovery/frameworks.js.map +1 -0
  82. package/dist/discovery/index.d.ts +21 -0
  83. package/dist/discovery/index.js +219 -0
  84. package/dist/discovery/index.js.map +1 -0
  85. package/dist/discovery/openapi.d.ts +13 -0
  86. package/dist/discovery/openapi.js +175 -0
  87. package/dist/discovery/openapi.js.map +1 -0
  88. package/dist/discovery/probes.d.ts +9 -0
  89. package/dist/discovery/probes.js +70 -0
  90. package/dist/discovery/probes.js.map +1 -0
  91. package/dist/index.d.ts +25 -0
  92. package/dist/index.js +25 -0
  93. package/dist/index.js.map +1 -0
  94. package/dist/inspect/report.d.ts +52 -0
  95. package/dist/inspect/report.js +191 -0
  96. package/dist/inspect/report.js.map +1 -0
  97. package/dist/mcp.d.ts +8 -0
  98. package/dist/mcp.js +526 -0
  99. package/dist/mcp.js.map +1 -0
  100. package/dist/orchestration/browse.d.ts +38 -0
  101. package/dist/orchestration/browse.js +198 -0
  102. package/dist/orchestration/browse.js.map +1 -0
  103. package/dist/orchestration/cache.d.ts +15 -0
  104. package/dist/orchestration/cache.js +24 -0
  105. package/dist/orchestration/cache.js.map +1 -0
  106. package/dist/plugin.d.ts +17 -0
  107. package/dist/plugin.js +158 -0
  108. package/dist/plugin.js.map +1 -0
  109. package/dist/read/decoders/deepwiki.d.ts +2 -0
  110. package/dist/read/decoders/deepwiki.js +148 -0
  111. package/dist/read/decoders/deepwiki.js.map +1 -0
  112. package/dist/read/decoders/grokipedia.d.ts +2 -0
  113. package/dist/read/decoders/grokipedia.js +210 -0
  114. package/dist/read/decoders/grokipedia.js.map +1 -0
  115. package/dist/read/decoders/hackernews.d.ts +2 -0
  116. package/dist/read/decoders/hackernews.js +168 -0
  117. package/dist/read/decoders/hackernews.js.map +1 -0
  118. package/dist/read/decoders/index.d.ts +2 -0
  119. package/dist/read/decoders/index.js +12 -0
  120. package/dist/read/decoders/index.js.map +1 -0
  121. package/dist/read/decoders/reddit.d.ts +2 -0
  122. package/dist/read/decoders/reddit.js +142 -0
  123. package/dist/read/decoders/reddit.js.map +1 -0
  124. package/dist/read/decoders/twitter.d.ts +12 -0
  125. package/dist/read/decoders/twitter.js +187 -0
  126. package/dist/read/decoders/twitter.js.map +1 -0
  127. package/dist/read/decoders/wikipedia.d.ts +2 -0
  128. package/dist/read/decoders/wikipedia.js +66 -0
  129. package/dist/read/decoders/wikipedia.js.map +1 -0
  130. package/dist/read/decoders/youtube.d.ts +2 -0
  131. package/dist/read/decoders/youtube.js +69 -0
  132. package/dist/read/decoders/youtube.js.map +1 -0
  133. package/dist/read/extract.d.ts +25 -0
  134. package/dist/read/extract.js +320 -0
  135. package/dist/read/extract.js.map +1 -0
  136. package/dist/read/index.d.ts +14 -0
  137. package/dist/read/index.js +66 -0
  138. package/dist/read/index.js.map +1 -0
  139. package/dist/read/peek.d.ts +9 -0
  140. package/dist/read/peek.js +137 -0
  141. package/dist/read/peek.js.map +1 -0
  142. package/dist/read/types.d.ts +44 -0
  143. package/dist/read/types.js +3 -0
  144. package/dist/read/types.js.map +1 -0
  145. package/dist/replay/engine.d.ts +53 -0
  146. package/dist/replay/engine.js +441 -0
  147. package/dist/replay/engine.js.map +1 -0
  148. package/dist/replay/truncate.d.ts +16 -0
  149. package/dist/replay/truncate.js +92 -0
  150. package/dist/replay/truncate.js.map +1 -0
  151. package/dist/serve.d.ts +31 -0
  152. package/dist/serve.js +149 -0
  153. package/dist/serve.js.map +1 -0
  154. package/dist/skill/generator.d.ts +44 -0
  155. package/dist/skill/generator.js +419 -0
  156. package/dist/skill/generator.js.map +1 -0
  157. package/dist/skill/importer.d.ts +26 -0
  158. package/dist/skill/importer.js +80 -0
  159. package/dist/skill/importer.js.map +1 -0
  160. package/dist/skill/search.d.ts +19 -0
  161. package/dist/skill/search.js +51 -0
  162. package/dist/skill/search.js.map +1 -0
  163. package/dist/skill/signing.d.ts +16 -0
  164. package/dist/skill/signing.js +34 -0
  165. package/dist/skill/signing.js.map +1 -0
  166. package/dist/skill/ssrf.d.ts +27 -0
  167. package/dist/skill/ssrf.js +210 -0
  168. package/dist/skill/ssrf.js.map +1 -0
  169. package/dist/skill/store.d.ts +7 -0
  170. package/dist/skill/store.js +93 -0
  171. package/dist/skill/store.js.map +1 -0
  172. package/dist/stats/report.d.ts +26 -0
  173. package/dist/stats/report.js +157 -0
  174. package/dist/stats/report.js.map +1 -0
  175. package/dist/types.d.ts +214 -0
  176. package/dist/types.js +3 -0
  177. package/dist/types.js.map +1 -0
  178. package/package.json +58 -0
  179. package/src/auth/crypto.ts +92 -0
  180. package/src/auth/handoff.ts +229 -0
  181. package/src/auth/manager.ts +140 -0
  182. package/src/auth/oauth-refresh.ts +120 -0
  183. package/src/auth/refresh.ts +300 -0
  184. package/src/capture/anti-bot.ts +63 -0
  185. package/src/capture/blocklist.ts +75 -0
  186. package/src/capture/body-diff.ts +109 -0
  187. package/src/capture/body-variables.ts +156 -0
  188. package/src/capture/domain.ts +34 -0
  189. package/src/capture/entropy.ts +121 -0
  190. package/src/capture/filter.ts +56 -0
  191. package/src/capture/graphql.ts +124 -0
  192. package/src/capture/idle.ts +45 -0
  193. package/src/capture/monitor.ts +224 -0
  194. package/src/capture/oauth-detector.ts +106 -0
  195. package/src/capture/pagination.ts +49 -0
  196. package/src/capture/parameterize.ts +68 -0
  197. package/src/capture/scrubber.ts +49 -0
  198. package/src/capture/session.ts +502 -0
  199. package/src/capture/token-detector.ts +76 -0
  200. package/src/capture/verifier.ts +171 -0
  201. package/src/cli.ts +1031 -0
  202. package/src/discovery/auth.ts +99 -0
  203. package/src/discovery/fetch.ts +85 -0
  204. package/src/discovery/frameworks.ts +231 -0
  205. package/src/discovery/index.ts +256 -0
  206. package/src/discovery/openapi.ts +230 -0
  207. package/src/discovery/probes.ts +76 -0
  208. package/src/index.ts +26 -0
  209. package/src/inspect/report.ts +247 -0
  210. package/src/mcp.ts +618 -0
  211. package/src/orchestration/browse.ts +250 -0
  212. package/src/orchestration/cache.ts +37 -0
  213. package/src/plugin.ts +188 -0
  214. package/src/read/decoders/deepwiki.ts +180 -0
  215. package/src/read/decoders/grokipedia.ts +246 -0
  216. package/src/read/decoders/hackernews.ts +198 -0
  217. package/src/read/decoders/index.ts +15 -0
  218. package/src/read/decoders/reddit.ts +158 -0
  219. package/src/read/decoders/twitter.ts +211 -0
  220. package/src/read/decoders/wikipedia.ts +75 -0
  221. package/src/read/decoders/youtube.ts +75 -0
  222. package/src/read/extract.ts +396 -0
  223. package/src/read/index.ts +78 -0
  224. package/src/read/peek.ts +175 -0
  225. package/src/read/types.ts +37 -0
  226. package/src/replay/engine.ts +559 -0
  227. package/src/replay/truncate.ts +116 -0
  228. package/src/serve.ts +189 -0
  229. package/src/skill/generator.ts +473 -0
  230. package/src/skill/importer.ts +107 -0
  231. package/src/skill/search.ts +76 -0
  232. package/src/skill/signing.ts +36 -0
  233. package/src/skill/ssrf.ts +238 -0
  234. package/src/skill/store.ts +107 -0
  235. package/src/stats/report.ts +208 -0
  236. package/src/types.ts +233 -0
@@ -0,0 +1,246 @@
1
+ // src/read/decoders/grokipedia.ts
2
+ import type { Decoder, ReadResult } from '../types.js';
3
+ import { safeFetch } from '../../discovery/fetch.js';
4
+
5
+ function estimateTokens(text: string): number {
6
+ return Math.ceil(text.length / 4);
7
+ }
8
+
9
+ /**
10
+ * Grokipedia decoder — xAI's open knowledge base (6M+ articles)
11
+ *
12
+ * API endpoints (all public, no auth):
13
+ * /api/page?slug=X&includeContent=true — Full article with citations
14
+ * /api/full-text-search?query=X&limit=N — Search with relevance scoring
15
+ * /api/stats — Site-wide stats
16
+ * /api/typeahead?query=X — Autocomplete
17
+ * /api/list-pages?limit=N — Browse articles
18
+ * /api/top-contributors?limit=N — Top editors
19
+ * /api/list-edit-requests?limit=N — Recent edits
20
+ */
21
+
22
+ const GROKIPEDIA_API = 'https://grokipedia.com/api';
23
+
24
+ export const grokipediaDecoder: Decoder = {
25
+ name: 'grokipedia',
26
+ patterns: [
27
+ /grokipedia\.com\/wiki\/([^#?]+)/,
28
+ /grokipedia\.com\/article\/([^#?]+)/,
29
+ /grokipedia\.com\/search\?/,
30
+ /grokipedia\.com\/?$/,
31
+ /grokipedia\.com\/?(?:\?|#|$)/,
32
+ ],
33
+
34
+ async decode(url: string, options: { skipSsrf?: boolean; maxBytes?: number; [key: string]: any } = {}): Promise<ReadResult | null> {
35
+ try {
36
+ const apiBase = options._apiBaseUrl || GROKIPEDIA_API;
37
+
38
+ // Search URL: /search?q=query
39
+ const searchMatch = url.match(/grokipedia\.com\/search\?.*q=([^&#]+)/);
40
+ if (searchMatch) {
41
+ return decodeSearch(apiBase, decodeURIComponent(searchMatch[1]), url, options);
42
+ }
43
+
44
+ // Article URL: /wiki/Slug or /article/Slug
45
+ const articleMatch = url.match(/grokipedia\.com\/(?:wiki|article)\/([^#?]+)/);
46
+ if (articleMatch) {
47
+ return decodeArticle(apiBase, articleMatch[1], url, options);
48
+ }
49
+
50
+ // Homepage: return stats + trending/recent
51
+ if (/grokipedia\.com\/?(?:\?|#|$)/.test(url)) {
52
+ return decodeHomepage(apiBase, url, options);
53
+ }
54
+
55
+ return null;
56
+ } catch {
57
+ return null;
58
+ }
59
+ },
60
+ };
61
+
62
+ async function decodeArticle(
63
+ apiBase: string,
64
+ slug: string,
65
+ url: string,
66
+ options: { skipSsrf?: boolean; maxBytes?: number; [key: string]: any },
67
+ ): Promise<ReadResult | null> {
68
+ const apiUrl = `${apiBase}/page?slug=${encodeURIComponent(slug)}&includeContent=true`;
69
+ // Grokipedia articles can be very large (743KB+ for Elon Musk) — raise body limit to 2MB
70
+ const result = await safeFetch(apiUrl, { skipSsrf: options.skipSsrf, maxBodySize: 2 * 1024 * 1024 });
71
+ if (!result || result.status !== 200) return null;
72
+
73
+ let data: any;
74
+ try {
75
+ data = JSON.parse(result.body);
76
+ } catch {
77
+ return null;
78
+ }
79
+
80
+ const page = data?.page;
81
+ if (!page) return null;
82
+
83
+ const title = page.title || decodeURIComponent(slug).replace(/_/g, ' ');
84
+ const content = page.content || page.description || '';
85
+ const citations = page.citations || [];
86
+ const images = page.images || [];
87
+ const metadata = page.metadata || {};
88
+ const stats = page.stats || {};
89
+
90
+ // Truncate content if maxBytes specified
91
+ const maxChars = options.maxBytes ? options.maxBytes : 20000;
92
+ const truncatedContent = content.length > maxChars
93
+ ? content.slice(0, maxChars) + `\n\n[Truncated — full article is ${content.length} chars. ${citations.length} citations available.]`
94
+ : content;
95
+
96
+ // Build citations section (top 10)
97
+ const topCitations = citations.slice(0, 10);
98
+ const citationBlock = topCitations.length > 0
99
+ ? '\n\n## Sources\n' + topCitations.map((c: any, i: number) =>
100
+ `${i + 1}. [${c.title || 'Source'}](${c.url})`
101
+ ).join('\n')
102
+ : '';
103
+
104
+ // Build stats line
105
+ const statsLine = stats.totalViews
106
+ ? `\n\nViews: ${Number(stats.totalViews).toLocaleString()} | Quality: ${stats.qualityScore || 'N/A'} | Language: ${metadata.language || 'en'}`
107
+ : '';
108
+
109
+ const resultImages = images.slice(0, 5).map((img: any) => ({
110
+ alt: img.caption || title,
111
+ src: img.url || '',
112
+ }));
113
+
114
+ const resultLinks: Array<{ text: string; href: string }> = [
115
+ { text: 'Full article', href: `https://grokipedia.com/wiki/${slug}` },
116
+ ];
117
+
118
+ // Add citation links
119
+ topCitations.forEach((c: any) => {
120
+ if (c.url) {
121
+ resultLinks.push({ text: c.title || 'Source', href: c.url });
122
+ }
123
+ });
124
+
125
+ return {
126
+ url,
127
+ title,
128
+ author: metadata.lastEditor || null,
129
+ description: page.description || null,
130
+ content: truncatedContent + citationBlock + statsLine,
131
+ links: resultLinks,
132
+ images: resultImages,
133
+ metadata: {
134
+ type: 'article',
135
+ publishedAt: metadata.lastModified ? new Date(metadata.lastModified * 1000).toISOString() : null,
136
+ source: 'grokipedia-api',
137
+ canonical: `https://grokipedia.com/wiki/${slug}`,
138
+ siteName: 'Grokipedia',
139
+ },
140
+ cost: { tokens: estimateTokens(truncatedContent + citationBlock + statsLine) },
141
+ };
142
+ }
143
+
144
+ async function decodeSearch(
145
+ apiBase: string,
146
+ query: string,
147
+ url: string,
148
+ options: { skipSsrf?: boolean; [key: string]: any },
149
+ ): Promise<ReadResult | null> {
150
+ const apiUrl = `${apiBase}/full-text-search?query=${encodeURIComponent(query)}&limit=10`;
151
+ const result = await safeFetch(apiUrl, { skipSsrf: options.skipSsrf });
152
+ if (!result || result.status !== 200) return null;
153
+
154
+ let data: any;
155
+ try {
156
+ data = JSON.parse(result.body);
157
+ } catch {
158
+ return null;
159
+ }
160
+
161
+ const results = data?.results || [];
162
+ if (results.length === 0) return null;
163
+
164
+ const content = results.map((r: any, i: number) => {
165
+ const views = r.viewCount ? ` (${Number(r.viewCount).toLocaleString()} views)` : '';
166
+ const snippet = (r.snippet || '').replace(/<\/?em>/g, '**').replace(/\n/g, ' ').trim();
167
+ return `${i + 1}. **[${r.title}](https://grokipedia.com/wiki/${r.slug})**${views}\n ${snippet}`;
168
+ }).join('\n\n');
169
+
170
+ const links = results.map((r: any) => ({
171
+ text: r.title || r.slug,
172
+ href: `https://grokipedia.com/wiki/${r.slug}`,
173
+ }));
174
+
175
+ return {
176
+ url,
177
+ title: `Grokipedia search: "${query}"`,
178
+ author: null,
179
+ description: `${results.length} results for "${query}"`,
180
+ content,
181
+ links,
182
+ images: [],
183
+ metadata: {
184
+ type: 'search-results',
185
+ publishedAt: null,
186
+ source: 'grokipedia-api',
187
+ canonical: null,
188
+ siteName: 'Grokipedia',
189
+ },
190
+ cost: { tokens: estimateTokens(content) },
191
+ };
192
+ }
193
+
194
+ async function decodeHomepage(
195
+ apiBase: string,
196
+ url: string,
197
+ options: { skipSsrf?: boolean; [key: string]: any },
198
+ ): Promise<ReadResult | null> {
199
+ // Fetch stats
200
+ const statsResult = await safeFetch(`${apiBase}/stats`, { skipSsrf: options.skipSsrf });
201
+ let statsData: any = {};
202
+ if (statsResult?.status === 200) {
203
+ try { statsData = JSON.parse(statsResult.body); } catch {}
204
+ }
205
+
206
+ // Fetch recent edits
207
+ const editsResult = await safeFetch(`${apiBase}/list-edit-requests?limit=5`, { skipSsrf: options.skipSsrf });
208
+ let editsData: any = {};
209
+ if (editsResult?.status === 200) {
210
+ try { editsData = JSON.parse(editsResult.body); } catch {}
211
+ }
212
+
213
+ const totalPages = Number(statsData.totalPages || 0).toLocaleString();
214
+ const indexGB = (Number(statsData.indexSizeBytes || 0) / (1024 ** 3)).toFixed(1);
215
+
216
+ let content = `# Grokipedia\n\nAn open source, comprehensive collection of all knowledge.\n\n`;
217
+ content += `**${totalPages} articles** | **${indexGB} GB index**\n\n`;
218
+
219
+ const edits = editsData.editRequests || [];
220
+ if (edits.length > 0) {
221
+ content += `## Recent Activity\n`;
222
+ for (const edit of edits) {
223
+ const article = edit.slug?.replace(/_/g, ' ') || 'Unknown';
224
+ const editor = edit.userId || 'Anonymous';
225
+ content += `- **${article}** — edited by ${editor} (${edit.type?.replace('EDIT_REQUEST_TYPE_', '').toLowerCase().replace(/_/g, ' ')})\n`;
226
+ }
227
+ }
228
+
229
+ return {
230
+ url,
231
+ title: 'Grokipedia',
232
+ author: null,
233
+ description: `Open knowledge base with ${totalPages} articles`,
234
+ content,
235
+ links: [],
236
+ images: [],
237
+ metadata: {
238
+ type: 'website',
239
+ publishedAt: null,
240
+ source: 'grokipedia-api',
241
+ canonical: 'https://grokipedia.com',
242
+ siteName: 'Grokipedia',
243
+ },
244
+ cost: { tokens: estimateTokens(content) },
245
+ };
246
+ }
@@ -0,0 +1,198 @@
1
+ // src/read/decoders/hackernews.ts
2
+ import type { Decoder, ReadResult } from '../types.js';
3
+ import { safeFetch } from '../../discovery/fetch.js';
4
+
5
+ const DEFAULT_API_BASE = 'https://hacker-news.firebaseio.com';
6
+
7
+ function estimateTokens(text: string): number {
8
+ return Math.ceil(text.length / 4);
9
+ }
10
+
11
+ export const hackernewsDecoder: Decoder = {
12
+ name: 'hackernews',
13
+ patterns: [
14
+ /news\.ycombinator\.com\/item\?id=\d+/,
15
+ /news\.ycombinator\.com\/?(?:\?|$)/,
16
+ ],
17
+
18
+ async decode(url: string, options: { skipSsrf?: boolean; [key: string]: any } = {}): Promise<ReadResult | null> {
19
+ try {
20
+ const apiBase = options._apiBaseUrl || DEFAULT_API_BASE;
21
+ const fetchOpts = { skipSsrf: options.skipSsrf };
22
+
23
+ // Check if this is an item page or front page
24
+ const itemMatch = url.match(/item\?id=(\d+)/);
25
+
26
+ if (itemMatch) {
27
+ return decodeItem(url, itemMatch[1], apiBase, fetchOpts);
28
+ }
29
+
30
+ return decodeFrontPage(url, apiBase, fetchOpts);
31
+ } catch {
32
+ return null;
33
+ }
34
+ },
35
+ };
36
+
37
+ async function decodeItem(
38
+ url: string,
39
+ id: string,
40
+ apiBase: string,
41
+ fetchOpts: { skipSsrf?: boolean },
42
+ ): Promise<ReadResult | null> {
43
+ try {
44
+ const result = await safeFetch(`${apiBase}/v0/item/${id}.json`, fetchOpts);
45
+ if (!result || result.status !== 200) return null;
46
+
47
+ let item: any;
48
+ try {
49
+ item = JSON.parse(result.body);
50
+ } catch {
51
+ return null;
52
+ }
53
+
54
+ if (!item) return null;
55
+
56
+ const title = item.title || null;
57
+ const author = item.by || null;
58
+ const score = item.score ?? 0;
59
+ const itemUrl = item.url || null;
60
+ const text = item.text || '';
61
+
62
+ // Fetch top 10 comments
63
+ const kids = item.kids || [];
64
+ const commentIds = kids.slice(0, 10);
65
+ const comments = await fetchComments(commentIds, apiBase, fetchOpts);
66
+
67
+ const commentText = comments
68
+ .map((c: any) => `${c.by || '[deleted]'}: ${c.text || '[deleted]'}`)
69
+ .join('\n\n');
70
+
71
+ const contentParts: string[] = [];
72
+ if (text) contentParts.push(text);
73
+ contentParts.push(`Score: ${score} | ${kids.length} comments`);
74
+ if (commentText) contentParts.push(`---\n${commentText}`);
75
+ const content = contentParts.join('\n\n');
76
+
77
+ const links: Array<{ text: string; href: string }> = [];
78
+ if (itemUrl) {
79
+ links.push({ text: title || 'Link', href: itemUrl });
80
+ }
81
+
82
+ return {
83
+ url,
84
+ title,
85
+ author,
86
+ description: `HN ${item.type || 'story'} by ${author} (${score} points)`,
87
+ content,
88
+ links,
89
+ images: [],
90
+ metadata: {
91
+ type: item.type || 'story',
92
+ publishedAt: item.time ? new Date(item.time * 1000).toISOString() : null,
93
+ source: 'hackernews-firebase',
94
+ canonical: `https://news.ycombinator.com/item?id=${id}`,
95
+ siteName: 'Hacker News',
96
+ },
97
+ cost: { tokens: estimateTokens(content) },
98
+ };
99
+ } catch {
100
+ return null;
101
+ }
102
+ }
103
+
104
+ async function decodeFrontPage(
105
+ url: string,
106
+ apiBase: string,
107
+ fetchOpts: { skipSsrf?: boolean },
108
+ ): Promise<ReadResult | null> {
109
+ try {
110
+ const result = await safeFetch(`${apiBase}/v0/topstories.json`, fetchOpts);
111
+ if (!result || result.status !== 200) return null;
112
+
113
+ let storyIds: number[];
114
+ try {
115
+ storyIds = JSON.parse(result.body);
116
+ } catch {
117
+ return null;
118
+ }
119
+
120
+ if (!Array.isArray(storyIds)) return null;
121
+
122
+ // Fetch first 10 stories
123
+ const topIds = storyIds.slice(0, 10);
124
+ const stories = await fetchStories(topIds, apiBase, fetchOpts);
125
+
126
+ const content = stories
127
+ .map((s: any, i: number) => `${i + 1}. ${s.title || '[untitled]'} (${s.score ?? 0} pts, ${(s.descendants ?? 0)} comments) by ${s.by || '[deleted]'}`)
128
+ .join('\n');
129
+
130
+ const links = stories
131
+ .filter((s: any) => s.url)
132
+ .map((s: any) => ({ text: s.title || 'Link', href: s.url }));
133
+
134
+ return {
135
+ url,
136
+ title: 'Hacker News — Top Stories',
137
+ author: null,
138
+ description: `Top ${stories.length} stories`,
139
+ content,
140
+ links,
141
+ images: [],
142
+ metadata: {
143
+ type: 'listing',
144
+ publishedAt: null,
145
+ source: 'hackernews-firebase',
146
+ canonical: 'https://news.ycombinator.com/',
147
+ siteName: 'Hacker News',
148
+ },
149
+ cost: { tokens: estimateTokens(content) },
150
+ };
151
+ } catch {
152
+ return null;
153
+ }
154
+ }
155
+
156
+ async function fetchComments(
157
+ ids: number[],
158
+ apiBase: string,
159
+ fetchOpts: { skipSsrf?: boolean },
160
+ ): Promise<any[]> {
161
+ const comments: any[] = [];
162
+ for (const id of ids) {
163
+ try {
164
+ const result = await safeFetch(`${apiBase}/v0/item/${id}.json`, fetchOpts);
165
+ if (result && result.status === 200) {
166
+ const comment = JSON.parse(result.body);
167
+ if (comment && !comment.deleted) {
168
+ comments.push(comment);
169
+ }
170
+ }
171
+ } catch {
172
+ // skip failed comments
173
+ }
174
+ }
175
+ return comments;
176
+ }
177
+
178
+ async function fetchStories(
179
+ ids: number[],
180
+ apiBase: string,
181
+ fetchOpts: { skipSsrf?: boolean },
182
+ ): Promise<any[]> {
183
+ const stories: any[] = [];
184
+ for (const id of ids) {
185
+ try {
186
+ const result = await safeFetch(`${apiBase}/v0/item/${id}.json`, fetchOpts);
187
+ if (result && result.status === 200) {
188
+ const story = JSON.parse(result.body);
189
+ if (story) {
190
+ stories.push(story);
191
+ }
192
+ }
193
+ } catch {
194
+ // skip failed stories
195
+ }
196
+ }
197
+ return stories;
198
+ }
@@ -0,0 +1,15 @@
1
+ // src/read/decoders/index.ts
2
+ import type { Decoder } from '../types.js';
3
+ import { redditDecoder } from './reddit.js';
4
+ import { youtubeDecoder } from './youtube.js';
5
+ import { wikipediaDecoder } from './wikipedia.js';
6
+ import { hackernewsDecoder } from './hackernews.js';
7
+ import { grokipediaDecoder } from './grokipedia.js';
8
+ import { twitterDecoder } from './twitter.js';
9
+ import { deepwikiDecoder } from './deepwiki.js';
10
+
11
+ const decoders: Decoder[] = [redditDecoder, youtubeDecoder, wikipediaDecoder, hackernewsDecoder, grokipediaDecoder, twitterDecoder, deepwikiDecoder];
12
+
13
+ export function findDecoder(url: string): Decoder | null {
14
+ return decoders.find(d => d.patterns.some(p => p.test(url))) ?? null;
15
+ }
@@ -0,0 +1,158 @@
1
+ // src/read/decoders/reddit.ts
2
+ import type { Decoder, ReadResult } from '../types.js';
3
+ import { safeFetch } from '../../discovery/fetch.js';
4
+
5
+ function estimateTokens(text: string): number {
6
+ return Math.ceil(text.length / 4);
7
+ }
8
+
9
+ export const redditDecoder: Decoder = {
10
+ name: 'reddit',
11
+ patterns: [
12
+ /reddit\.com\/r\/[^/]+\/comments\//,
13
+ /reddit\.com\/r\/[^/]+\/?$/,
14
+ /reddit\.com\/r\/[^/]+\/?(?:\?|$)/,
15
+ /reddit\.com\/user\/[^/]+/,
16
+ ],
17
+
18
+ async decode(url: string, options: { skipSsrf?: boolean; [key: string]: any } = {}): Promise<ReadResult | null> {
19
+ try {
20
+ // Append .json to the URL to get JSON response
21
+ const jsonUrl = url.replace(/\/?(\?|$)/, '.json$1');
22
+
23
+ const result = await safeFetch(jsonUrl, { skipSsrf: options.skipSsrf });
24
+ if (!result || result.status !== 200) return null;
25
+
26
+ let data: any;
27
+ try {
28
+ data = JSON.parse(result.body);
29
+ } catch {
30
+ return null;
31
+ }
32
+
33
+ // Post page: response is an array [post, comments]
34
+ if (Array.isArray(data) && data.length >= 1) {
35
+ return decodePostPage(url, data);
36
+ }
37
+
38
+ // Subreddit/user listing: response has data.children
39
+ if (data && data.data && Array.isArray(data.data.children)) {
40
+ return decodeListingPage(url, data);
41
+ }
42
+
43
+ return null;
44
+ } catch {
45
+ return null;
46
+ }
47
+ },
48
+ };
49
+
50
+ function decodePostPage(url: string, data: any[]): ReadResult | null {
51
+ try {
52
+ const postData = data[0]?.data?.children?.[0]?.data;
53
+ if (!postData) return null;
54
+
55
+ const title = postData.title || null;
56
+ const author = postData.author || null;
57
+ const selftext = postData.selftext || '';
58
+ const score = postData.score ?? 0;
59
+ const subreddit = postData.subreddit || '';
60
+
61
+ // Extract comments
62
+ const commentChildren = data[1]?.data?.children || [];
63
+ const comments = commentChildren
64
+ .filter((c: any) => c.kind === 't1' && c.data)
65
+ .slice(0, 25)
66
+ .map((c: any) => ({
67
+ author: c.data.author || '[deleted]',
68
+ body: c.data.body || '',
69
+ score: c.data.score ?? 0,
70
+ }));
71
+
72
+ const commentText = comments
73
+ .map((c: any) => `${c.author} (${c.score} pts): ${c.body}`)
74
+ .join('\n\n');
75
+
76
+ const content = selftext
77
+ ? `${selftext}\n\n---\nScore: ${score} | ${comments.length} comments\n\n${commentText}`
78
+ : `Score: ${score} | ${comments.length} comments\n\n${commentText}`;
79
+
80
+ const links: Array<{ text: string; href: string }> = [];
81
+ if (postData.url && postData.url !== postData.permalink) {
82
+ links.push({ text: 'Link', href: postData.url });
83
+ }
84
+
85
+ return {
86
+ url,
87
+ title,
88
+ author,
89
+ description: `r/${subreddit} post by u/${author} (${score} points)`,
90
+ content,
91
+ links,
92
+ images: [],
93
+ metadata: {
94
+ type: 'discussion',
95
+ publishedAt: postData.created_utc ? new Date(postData.created_utc * 1000).toISOString() : null,
96
+ source: 'reddit-json',
97
+ canonical: postData.permalink ? `https://www.reddit.com${postData.permalink}` : null,
98
+ siteName: 'Reddit',
99
+ },
100
+ cost: { tokens: estimateTokens(content) },
101
+ };
102
+ } catch {
103
+ return null;
104
+ }
105
+ }
106
+
107
+ function decodeListingPage(url: string, data: any): ReadResult | null {
108
+ try {
109
+ const children = data.data.children || [];
110
+ const posts = children
111
+ .filter((c: any) => c.data)
112
+ .slice(0, 25)
113
+ .map((c: any) => ({
114
+ title: c.data.title || c.data.link_title || '',
115
+ author: c.data.author || '[deleted]',
116
+ score: c.data.score ?? 0,
117
+ numComments: c.data.num_comments ?? 0,
118
+ permalink: c.data.permalink || '',
119
+ subreddit: c.data.subreddit || '',
120
+ }));
121
+
122
+ const content = posts
123
+ .map((p: any, i: number) => `${i + 1}. ${p.title} (${p.score} pts, ${p.numComments} comments) by u/${p.author}`)
124
+ .join('\n');
125
+
126
+ const links = posts
127
+ .filter((p: any) => p.permalink)
128
+ .map((p: any) => ({ text: p.title, href: `https://www.reddit.com${p.permalink}` }));
129
+
130
+ // Try to determine subreddit name from URL
131
+ const subMatch = url.match(/\/r\/([^/]+)/);
132
+ const subreddit = subMatch ? subMatch[1] : null;
133
+ const userMatch = url.match(/\/user\/([^/]+)/);
134
+ const user = userMatch ? userMatch[1] : null;
135
+
136
+ const title = subreddit ? `r/${subreddit}` : user ? `u/${user}` : 'Reddit listing';
137
+
138
+ return {
139
+ url,
140
+ title,
141
+ author: null,
142
+ description: `${posts.length} posts`,
143
+ content,
144
+ links,
145
+ images: [],
146
+ metadata: {
147
+ type: 'listing',
148
+ publishedAt: null,
149
+ source: 'reddit-json',
150
+ canonical: null,
151
+ siteName: 'Reddit',
152
+ },
153
+ cost: { tokens: estimateTokens(content) },
154
+ };
155
+ } catch {
156
+ return null;
157
+ }
158
+ }