@apitap/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +60 -0
- package/README.md +362 -0
- package/SKILL.md +270 -0
- package/dist/auth/crypto.d.ts +31 -0
- package/dist/auth/crypto.js +66 -0
- package/dist/auth/crypto.js.map +1 -0
- package/dist/auth/handoff.d.ts +29 -0
- package/dist/auth/handoff.js +180 -0
- package/dist/auth/handoff.js.map +1 -0
- package/dist/auth/manager.d.ts +46 -0
- package/dist/auth/manager.js +127 -0
- package/dist/auth/manager.js.map +1 -0
- package/dist/auth/oauth-refresh.d.ts +16 -0
- package/dist/auth/oauth-refresh.js +91 -0
- package/dist/auth/oauth-refresh.js.map +1 -0
- package/dist/auth/refresh.d.ts +43 -0
- package/dist/auth/refresh.js +217 -0
- package/dist/auth/refresh.js.map +1 -0
- package/dist/capture/anti-bot.d.ts +15 -0
- package/dist/capture/anti-bot.js +43 -0
- package/dist/capture/anti-bot.js.map +1 -0
- package/dist/capture/blocklist.d.ts +6 -0
- package/dist/capture/blocklist.js +70 -0
- package/dist/capture/blocklist.js.map +1 -0
- package/dist/capture/body-diff.d.ts +8 -0
- package/dist/capture/body-diff.js +102 -0
- package/dist/capture/body-diff.js.map +1 -0
- package/dist/capture/body-variables.d.ts +13 -0
- package/dist/capture/body-variables.js +142 -0
- package/dist/capture/body-variables.js.map +1 -0
- package/dist/capture/domain.d.ts +8 -0
- package/dist/capture/domain.js +34 -0
- package/dist/capture/domain.js.map +1 -0
- package/dist/capture/entropy.d.ts +33 -0
- package/dist/capture/entropy.js +100 -0
- package/dist/capture/entropy.js.map +1 -0
- package/dist/capture/filter.d.ts +11 -0
- package/dist/capture/filter.js +49 -0
- package/dist/capture/filter.js.map +1 -0
- package/dist/capture/graphql.d.ts +21 -0
- package/dist/capture/graphql.js +99 -0
- package/dist/capture/graphql.js.map +1 -0
- package/dist/capture/idle.d.ts +23 -0
- package/dist/capture/idle.js +44 -0
- package/dist/capture/idle.js.map +1 -0
- package/dist/capture/monitor.d.ts +26 -0
- package/dist/capture/monitor.js +183 -0
- package/dist/capture/monitor.js.map +1 -0
- package/dist/capture/oauth-detector.d.ts +18 -0
- package/dist/capture/oauth-detector.js +96 -0
- package/dist/capture/oauth-detector.js.map +1 -0
- package/dist/capture/pagination.d.ts +9 -0
- package/dist/capture/pagination.js +40 -0
- package/dist/capture/pagination.js.map +1 -0
- package/dist/capture/parameterize.d.ts +17 -0
- package/dist/capture/parameterize.js +63 -0
- package/dist/capture/parameterize.js.map +1 -0
- package/dist/capture/scrubber.d.ts +5 -0
- package/dist/capture/scrubber.js +38 -0
- package/dist/capture/scrubber.js.map +1 -0
- package/dist/capture/session.d.ts +46 -0
- package/dist/capture/session.js +445 -0
- package/dist/capture/session.js.map +1 -0
- package/dist/capture/token-detector.d.ts +16 -0
- package/dist/capture/token-detector.js +62 -0
- package/dist/capture/token-detector.js.map +1 -0
- package/dist/capture/verifier.d.ts +17 -0
- package/dist/capture/verifier.js +147 -0
- package/dist/capture/verifier.js.map +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +930 -0
- package/dist/cli.js.map +1 -0
- package/dist/discovery/auth.d.ts +17 -0
- package/dist/discovery/auth.js +81 -0
- package/dist/discovery/auth.js.map +1 -0
- package/dist/discovery/fetch.d.ts +17 -0
- package/dist/discovery/fetch.js +59 -0
- package/dist/discovery/fetch.js.map +1 -0
- package/dist/discovery/frameworks.d.ts +11 -0
- package/dist/discovery/frameworks.js +249 -0
- package/dist/discovery/frameworks.js.map +1 -0
- package/dist/discovery/index.d.ts +21 -0
- package/dist/discovery/index.js +219 -0
- package/dist/discovery/index.js.map +1 -0
- package/dist/discovery/openapi.d.ts +13 -0
- package/dist/discovery/openapi.js +175 -0
- package/dist/discovery/openapi.js.map +1 -0
- package/dist/discovery/probes.d.ts +9 -0
- package/dist/discovery/probes.js +70 -0
- package/dist/discovery/probes.js.map +1 -0
- package/dist/index.d.ts +25 -0
- package/dist/index.js +25 -0
- package/dist/index.js.map +1 -0
- package/dist/inspect/report.d.ts +52 -0
- package/dist/inspect/report.js +191 -0
- package/dist/inspect/report.js.map +1 -0
- package/dist/mcp.d.ts +8 -0
- package/dist/mcp.js +526 -0
- package/dist/mcp.js.map +1 -0
- package/dist/orchestration/browse.d.ts +38 -0
- package/dist/orchestration/browse.js +198 -0
- package/dist/orchestration/browse.js.map +1 -0
- package/dist/orchestration/cache.d.ts +15 -0
- package/dist/orchestration/cache.js +24 -0
- package/dist/orchestration/cache.js.map +1 -0
- package/dist/plugin.d.ts +17 -0
- package/dist/plugin.js +158 -0
- package/dist/plugin.js.map +1 -0
- package/dist/read/decoders/deepwiki.d.ts +2 -0
- package/dist/read/decoders/deepwiki.js +148 -0
- package/dist/read/decoders/deepwiki.js.map +1 -0
- package/dist/read/decoders/grokipedia.d.ts +2 -0
- package/dist/read/decoders/grokipedia.js +210 -0
- package/dist/read/decoders/grokipedia.js.map +1 -0
- package/dist/read/decoders/hackernews.d.ts +2 -0
- package/dist/read/decoders/hackernews.js +168 -0
- package/dist/read/decoders/hackernews.js.map +1 -0
- package/dist/read/decoders/index.d.ts +2 -0
- package/dist/read/decoders/index.js +12 -0
- package/dist/read/decoders/index.js.map +1 -0
- package/dist/read/decoders/reddit.d.ts +2 -0
- package/dist/read/decoders/reddit.js +142 -0
- package/dist/read/decoders/reddit.js.map +1 -0
- package/dist/read/decoders/twitter.d.ts +12 -0
- package/dist/read/decoders/twitter.js +187 -0
- package/dist/read/decoders/twitter.js.map +1 -0
- package/dist/read/decoders/wikipedia.d.ts +2 -0
- package/dist/read/decoders/wikipedia.js +66 -0
- package/dist/read/decoders/wikipedia.js.map +1 -0
- package/dist/read/decoders/youtube.d.ts +2 -0
- package/dist/read/decoders/youtube.js +69 -0
- package/dist/read/decoders/youtube.js.map +1 -0
- package/dist/read/extract.d.ts +25 -0
- package/dist/read/extract.js +320 -0
- package/dist/read/extract.js.map +1 -0
- package/dist/read/index.d.ts +14 -0
- package/dist/read/index.js +66 -0
- package/dist/read/index.js.map +1 -0
- package/dist/read/peek.d.ts +9 -0
- package/dist/read/peek.js +137 -0
- package/dist/read/peek.js.map +1 -0
- package/dist/read/types.d.ts +44 -0
- package/dist/read/types.js +3 -0
- package/dist/read/types.js.map +1 -0
- package/dist/replay/engine.d.ts +53 -0
- package/dist/replay/engine.js +441 -0
- package/dist/replay/engine.js.map +1 -0
- package/dist/replay/truncate.d.ts +16 -0
- package/dist/replay/truncate.js +92 -0
- package/dist/replay/truncate.js.map +1 -0
- package/dist/serve.d.ts +31 -0
- package/dist/serve.js +149 -0
- package/dist/serve.js.map +1 -0
- package/dist/skill/generator.d.ts +44 -0
- package/dist/skill/generator.js +419 -0
- package/dist/skill/generator.js.map +1 -0
- package/dist/skill/importer.d.ts +26 -0
- package/dist/skill/importer.js +80 -0
- package/dist/skill/importer.js.map +1 -0
- package/dist/skill/search.d.ts +19 -0
- package/dist/skill/search.js +51 -0
- package/dist/skill/search.js.map +1 -0
- package/dist/skill/signing.d.ts +16 -0
- package/dist/skill/signing.js +34 -0
- package/dist/skill/signing.js.map +1 -0
- package/dist/skill/ssrf.d.ts +27 -0
- package/dist/skill/ssrf.js +210 -0
- package/dist/skill/ssrf.js.map +1 -0
- package/dist/skill/store.d.ts +7 -0
- package/dist/skill/store.js +93 -0
- package/dist/skill/store.js.map +1 -0
- package/dist/stats/report.d.ts +26 -0
- package/dist/stats/report.js +157 -0
- package/dist/stats/report.js.map +1 -0
- package/dist/types.d.ts +214 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +58 -0
- package/src/auth/crypto.ts +92 -0
- package/src/auth/handoff.ts +229 -0
- package/src/auth/manager.ts +140 -0
- package/src/auth/oauth-refresh.ts +120 -0
- package/src/auth/refresh.ts +300 -0
- package/src/capture/anti-bot.ts +63 -0
- package/src/capture/blocklist.ts +75 -0
- package/src/capture/body-diff.ts +109 -0
- package/src/capture/body-variables.ts +156 -0
- package/src/capture/domain.ts +34 -0
- package/src/capture/entropy.ts +121 -0
- package/src/capture/filter.ts +56 -0
- package/src/capture/graphql.ts +124 -0
- package/src/capture/idle.ts +45 -0
- package/src/capture/monitor.ts +224 -0
- package/src/capture/oauth-detector.ts +106 -0
- package/src/capture/pagination.ts +49 -0
- package/src/capture/parameterize.ts +68 -0
- package/src/capture/scrubber.ts +49 -0
- package/src/capture/session.ts +502 -0
- package/src/capture/token-detector.ts +76 -0
- package/src/capture/verifier.ts +171 -0
- package/src/cli.ts +1031 -0
- package/src/discovery/auth.ts +99 -0
- package/src/discovery/fetch.ts +85 -0
- package/src/discovery/frameworks.ts +231 -0
- package/src/discovery/index.ts +256 -0
- package/src/discovery/openapi.ts +230 -0
- package/src/discovery/probes.ts +76 -0
- package/src/index.ts +26 -0
- package/src/inspect/report.ts +247 -0
- package/src/mcp.ts +618 -0
- package/src/orchestration/browse.ts +250 -0
- package/src/orchestration/cache.ts +37 -0
- package/src/plugin.ts +188 -0
- package/src/read/decoders/deepwiki.ts +180 -0
- package/src/read/decoders/grokipedia.ts +246 -0
- package/src/read/decoders/hackernews.ts +198 -0
- package/src/read/decoders/index.ts +15 -0
- package/src/read/decoders/reddit.ts +158 -0
- package/src/read/decoders/twitter.ts +211 -0
- package/src/read/decoders/wikipedia.ts +75 -0
- package/src/read/decoders/youtube.ts +75 -0
- package/src/read/extract.ts +396 -0
- package/src/read/index.ts +78 -0
- package/src/read/peek.ts +175 -0
- package/src/read/types.ts +37 -0
- package/src/replay/engine.ts +559 -0
- package/src/replay/truncate.ts +116 -0
- package/src/serve.ts +189 -0
- package/src/skill/generator.ts +473 -0
- package/src/skill/importer.ts +107 -0
- package/src/skill/search.ts +76 -0
- package/src/skill/signing.ts +36 -0
- package/src/skill/ssrf.ts +238 -0
- package/src/skill/store.ts +107 -0
- package/src/stats/report.ts +208 -0
- package/src/types.ts +233 -0
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
// src/read/decoders/grokipedia.ts
|
|
2
|
+
import type { Decoder, ReadResult } from '../types.js';
|
|
3
|
+
import { safeFetch } from '../../discovery/fetch.js';
|
|
4
|
+
|
|
5
|
+
function estimateTokens(text: string): number {
|
|
6
|
+
return Math.ceil(text.length / 4);
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Grokipedia decoder — xAI's open knowledge base (6M+ articles)
|
|
11
|
+
*
|
|
12
|
+
* API endpoints (all public, no auth):
|
|
13
|
+
* /api/page?slug=X&includeContent=true — Full article with citations
|
|
14
|
+
* /api/full-text-search?query=X&limit=N — Search with relevance scoring
|
|
15
|
+
* /api/stats — Site-wide stats
|
|
16
|
+
* /api/typeahead?query=X — Autocomplete
|
|
17
|
+
* /api/list-pages?limit=N — Browse articles
|
|
18
|
+
* /api/top-contributors?limit=N — Top editors
|
|
19
|
+
* /api/list-edit-requests?limit=N — Recent edits
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
const GROKIPEDIA_API = 'https://grokipedia.com/api';
|
|
23
|
+
|
|
24
|
+
export const grokipediaDecoder: Decoder = {
|
|
25
|
+
name: 'grokipedia',
|
|
26
|
+
patterns: [
|
|
27
|
+
/grokipedia\.com\/wiki\/([^#?]+)/,
|
|
28
|
+
/grokipedia\.com\/article\/([^#?]+)/,
|
|
29
|
+
/grokipedia\.com\/search\?/,
|
|
30
|
+
/grokipedia\.com\/?$/,
|
|
31
|
+
/grokipedia\.com\/?(?:\?|#|$)/,
|
|
32
|
+
],
|
|
33
|
+
|
|
34
|
+
async decode(url: string, options: { skipSsrf?: boolean; maxBytes?: number; [key: string]: any } = {}): Promise<ReadResult | null> {
|
|
35
|
+
try {
|
|
36
|
+
const apiBase = options._apiBaseUrl || GROKIPEDIA_API;
|
|
37
|
+
|
|
38
|
+
// Search URL: /search?q=query
|
|
39
|
+
const searchMatch = url.match(/grokipedia\.com\/search\?.*q=([^&#]+)/);
|
|
40
|
+
if (searchMatch) {
|
|
41
|
+
return decodeSearch(apiBase, decodeURIComponent(searchMatch[1]), url, options);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Article URL: /wiki/Slug or /article/Slug
|
|
45
|
+
const articleMatch = url.match(/grokipedia\.com\/(?:wiki|article)\/([^#?]+)/);
|
|
46
|
+
if (articleMatch) {
|
|
47
|
+
return decodeArticle(apiBase, articleMatch[1], url, options);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Homepage: return stats + trending/recent
|
|
51
|
+
if (/grokipedia\.com\/?(?:\?|#|$)/.test(url)) {
|
|
52
|
+
return decodeHomepage(apiBase, url, options);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return null;
|
|
56
|
+
} catch {
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
async function decodeArticle(
|
|
63
|
+
apiBase: string,
|
|
64
|
+
slug: string,
|
|
65
|
+
url: string,
|
|
66
|
+
options: { skipSsrf?: boolean; maxBytes?: number; [key: string]: any },
|
|
67
|
+
): Promise<ReadResult | null> {
|
|
68
|
+
const apiUrl = `${apiBase}/page?slug=${encodeURIComponent(slug)}&includeContent=true`;
|
|
69
|
+
// Grokipedia articles can be very large (743KB+ for Elon Musk) — raise body limit to 2MB
|
|
70
|
+
const result = await safeFetch(apiUrl, { skipSsrf: options.skipSsrf, maxBodySize: 2 * 1024 * 1024 });
|
|
71
|
+
if (!result || result.status !== 200) return null;
|
|
72
|
+
|
|
73
|
+
let data: any;
|
|
74
|
+
try {
|
|
75
|
+
data = JSON.parse(result.body);
|
|
76
|
+
} catch {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const page = data?.page;
|
|
81
|
+
if (!page) return null;
|
|
82
|
+
|
|
83
|
+
const title = page.title || decodeURIComponent(slug).replace(/_/g, ' ');
|
|
84
|
+
const content = page.content || page.description || '';
|
|
85
|
+
const citations = page.citations || [];
|
|
86
|
+
const images = page.images || [];
|
|
87
|
+
const metadata = page.metadata || {};
|
|
88
|
+
const stats = page.stats || {};
|
|
89
|
+
|
|
90
|
+
// Truncate content if maxBytes specified
|
|
91
|
+
const maxChars = options.maxBytes ? options.maxBytes : 20000;
|
|
92
|
+
const truncatedContent = content.length > maxChars
|
|
93
|
+
? content.slice(0, maxChars) + `\n\n[Truncated — full article is ${content.length} chars. ${citations.length} citations available.]`
|
|
94
|
+
: content;
|
|
95
|
+
|
|
96
|
+
// Build citations section (top 10)
|
|
97
|
+
const topCitations = citations.slice(0, 10);
|
|
98
|
+
const citationBlock = topCitations.length > 0
|
|
99
|
+
? '\n\n## Sources\n' + topCitations.map((c: any, i: number) =>
|
|
100
|
+
`${i + 1}. [${c.title || 'Source'}](${c.url})`
|
|
101
|
+
).join('\n')
|
|
102
|
+
: '';
|
|
103
|
+
|
|
104
|
+
// Build stats line
|
|
105
|
+
const statsLine = stats.totalViews
|
|
106
|
+
? `\n\nViews: ${Number(stats.totalViews).toLocaleString()} | Quality: ${stats.qualityScore || 'N/A'} | Language: ${metadata.language || 'en'}`
|
|
107
|
+
: '';
|
|
108
|
+
|
|
109
|
+
const resultImages = images.slice(0, 5).map((img: any) => ({
|
|
110
|
+
alt: img.caption || title,
|
|
111
|
+
src: img.url || '',
|
|
112
|
+
}));
|
|
113
|
+
|
|
114
|
+
const resultLinks: Array<{ text: string; href: string }> = [
|
|
115
|
+
{ text: 'Full article', href: `https://grokipedia.com/wiki/${slug}` },
|
|
116
|
+
];
|
|
117
|
+
|
|
118
|
+
// Add citation links
|
|
119
|
+
topCitations.forEach((c: any) => {
|
|
120
|
+
if (c.url) {
|
|
121
|
+
resultLinks.push({ text: c.title || 'Source', href: c.url });
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
url,
|
|
127
|
+
title,
|
|
128
|
+
author: metadata.lastEditor || null,
|
|
129
|
+
description: page.description || null,
|
|
130
|
+
content: truncatedContent + citationBlock + statsLine,
|
|
131
|
+
links: resultLinks,
|
|
132
|
+
images: resultImages,
|
|
133
|
+
metadata: {
|
|
134
|
+
type: 'article',
|
|
135
|
+
publishedAt: metadata.lastModified ? new Date(metadata.lastModified * 1000).toISOString() : null,
|
|
136
|
+
source: 'grokipedia-api',
|
|
137
|
+
canonical: `https://grokipedia.com/wiki/${slug}`,
|
|
138
|
+
siteName: 'Grokipedia',
|
|
139
|
+
},
|
|
140
|
+
cost: { tokens: estimateTokens(truncatedContent + citationBlock + statsLine) },
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
async function decodeSearch(
|
|
145
|
+
apiBase: string,
|
|
146
|
+
query: string,
|
|
147
|
+
url: string,
|
|
148
|
+
options: { skipSsrf?: boolean; [key: string]: any },
|
|
149
|
+
): Promise<ReadResult | null> {
|
|
150
|
+
const apiUrl = `${apiBase}/full-text-search?query=${encodeURIComponent(query)}&limit=10`;
|
|
151
|
+
const result = await safeFetch(apiUrl, { skipSsrf: options.skipSsrf });
|
|
152
|
+
if (!result || result.status !== 200) return null;
|
|
153
|
+
|
|
154
|
+
let data: any;
|
|
155
|
+
try {
|
|
156
|
+
data = JSON.parse(result.body);
|
|
157
|
+
} catch {
|
|
158
|
+
return null;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const results = data?.results || [];
|
|
162
|
+
if (results.length === 0) return null;
|
|
163
|
+
|
|
164
|
+
const content = results.map((r: any, i: number) => {
|
|
165
|
+
const views = r.viewCount ? ` (${Number(r.viewCount).toLocaleString()} views)` : '';
|
|
166
|
+
const snippet = (r.snippet || '').replace(/<\/?em>/g, '**').replace(/\n/g, ' ').trim();
|
|
167
|
+
return `${i + 1}. **[${r.title}](https://grokipedia.com/wiki/${r.slug})**${views}\n ${snippet}`;
|
|
168
|
+
}).join('\n\n');
|
|
169
|
+
|
|
170
|
+
const links = results.map((r: any) => ({
|
|
171
|
+
text: r.title || r.slug,
|
|
172
|
+
href: `https://grokipedia.com/wiki/${r.slug}`,
|
|
173
|
+
}));
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
url,
|
|
177
|
+
title: `Grokipedia search: "${query}"`,
|
|
178
|
+
author: null,
|
|
179
|
+
description: `${results.length} results for "${query}"`,
|
|
180
|
+
content,
|
|
181
|
+
links,
|
|
182
|
+
images: [],
|
|
183
|
+
metadata: {
|
|
184
|
+
type: 'search-results',
|
|
185
|
+
publishedAt: null,
|
|
186
|
+
source: 'grokipedia-api',
|
|
187
|
+
canonical: null,
|
|
188
|
+
siteName: 'Grokipedia',
|
|
189
|
+
},
|
|
190
|
+
cost: { tokens: estimateTokens(content) },
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
async function decodeHomepage(
|
|
195
|
+
apiBase: string,
|
|
196
|
+
url: string,
|
|
197
|
+
options: { skipSsrf?: boolean; [key: string]: any },
|
|
198
|
+
): Promise<ReadResult | null> {
|
|
199
|
+
// Fetch stats
|
|
200
|
+
const statsResult = await safeFetch(`${apiBase}/stats`, { skipSsrf: options.skipSsrf });
|
|
201
|
+
let statsData: any = {};
|
|
202
|
+
if (statsResult?.status === 200) {
|
|
203
|
+
try { statsData = JSON.parse(statsResult.body); } catch {}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Fetch recent edits
|
|
207
|
+
const editsResult = await safeFetch(`${apiBase}/list-edit-requests?limit=5`, { skipSsrf: options.skipSsrf });
|
|
208
|
+
let editsData: any = {};
|
|
209
|
+
if (editsResult?.status === 200) {
|
|
210
|
+
try { editsData = JSON.parse(editsResult.body); } catch {}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const totalPages = Number(statsData.totalPages || 0).toLocaleString();
|
|
214
|
+
const indexGB = (Number(statsData.indexSizeBytes || 0) / (1024 ** 3)).toFixed(1);
|
|
215
|
+
|
|
216
|
+
let content = `# Grokipedia\n\nAn open source, comprehensive collection of all knowledge.\n\n`;
|
|
217
|
+
content += `**${totalPages} articles** | **${indexGB} GB index**\n\n`;
|
|
218
|
+
|
|
219
|
+
const edits = editsData.editRequests || [];
|
|
220
|
+
if (edits.length > 0) {
|
|
221
|
+
content += `## Recent Activity\n`;
|
|
222
|
+
for (const edit of edits) {
|
|
223
|
+
const article = edit.slug?.replace(/_/g, ' ') || 'Unknown';
|
|
224
|
+
const editor = edit.userId || 'Anonymous';
|
|
225
|
+
content += `- **${article}** — edited by ${editor} (${edit.type?.replace('EDIT_REQUEST_TYPE_', '').toLowerCase().replace(/_/g, ' ')})\n`;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
return {
|
|
230
|
+
url,
|
|
231
|
+
title: 'Grokipedia',
|
|
232
|
+
author: null,
|
|
233
|
+
description: `Open knowledge base with ${totalPages} articles`,
|
|
234
|
+
content,
|
|
235
|
+
links: [],
|
|
236
|
+
images: [],
|
|
237
|
+
metadata: {
|
|
238
|
+
type: 'website',
|
|
239
|
+
publishedAt: null,
|
|
240
|
+
source: 'grokipedia-api',
|
|
241
|
+
canonical: 'https://grokipedia.com',
|
|
242
|
+
siteName: 'Grokipedia',
|
|
243
|
+
},
|
|
244
|
+
cost: { tokens: estimateTokens(content) },
|
|
245
|
+
};
|
|
246
|
+
}
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
// src/read/decoders/hackernews.ts
|
|
2
|
+
import type { Decoder, ReadResult } from '../types.js';
|
|
3
|
+
import { safeFetch } from '../../discovery/fetch.js';
|
|
4
|
+
|
|
5
|
+
const DEFAULT_API_BASE = 'https://hacker-news.firebaseio.com';
|
|
6
|
+
|
|
7
|
+
function estimateTokens(text: string): number {
|
|
8
|
+
return Math.ceil(text.length / 4);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export const hackernewsDecoder: Decoder = {
|
|
12
|
+
name: 'hackernews',
|
|
13
|
+
patterns: [
|
|
14
|
+
/news\.ycombinator\.com\/item\?id=\d+/,
|
|
15
|
+
/news\.ycombinator\.com\/?(?:\?|$)/,
|
|
16
|
+
],
|
|
17
|
+
|
|
18
|
+
async decode(url: string, options: { skipSsrf?: boolean; [key: string]: any } = {}): Promise<ReadResult | null> {
|
|
19
|
+
try {
|
|
20
|
+
const apiBase = options._apiBaseUrl || DEFAULT_API_BASE;
|
|
21
|
+
const fetchOpts = { skipSsrf: options.skipSsrf };
|
|
22
|
+
|
|
23
|
+
// Check if this is an item page or front page
|
|
24
|
+
const itemMatch = url.match(/item\?id=(\d+)/);
|
|
25
|
+
|
|
26
|
+
if (itemMatch) {
|
|
27
|
+
return decodeItem(url, itemMatch[1], apiBase, fetchOpts);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return decodeFrontPage(url, apiBase, fetchOpts);
|
|
31
|
+
} catch {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
async function decodeItem(
|
|
38
|
+
url: string,
|
|
39
|
+
id: string,
|
|
40
|
+
apiBase: string,
|
|
41
|
+
fetchOpts: { skipSsrf?: boolean },
|
|
42
|
+
): Promise<ReadResult | null> {
|
|
43
|
+
try {
|
|
44
|
+
const result = await safeFetch(`${apiBase}/v0/item/${id}.json`, fetchOpts);
|
|
45
|
+
if (!result || result.status !== 200) return null;
|
|
46
|
+
|
|
47
|
+
let item: any;
|
|
48
|
+
try {
|
|
49
|
+
item = JSON.parse(result.body);
|
|
50
|
+
} catch {
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (!item) return null;
|
|
55
|
+
|
|
56
|
+
const title = item.title || null;
|
|
57
|
+
const author = item.by || null;
|
|
58
|
+
const score = item.score ?? 0;
|
|
59
|
+
const itemUrl = item.url || null;
|
|
60
|
+
const text = item.text || '';
|
|
61
|
+
|
|
62
|
+
// Fetch top 10 comments
|
|
63
|
+
const kids = item.kids || [];
|
|
64
|
+
const commentIds = kids.slice(0, 10);
|
|
65
|
+
const comments = await fetchComments(commentIds, apiBase, fetchOpts);
|
|
66
|
+
|
|
67
|
+
const commentText = comments
|
|
68
|
+
.map((c: any) => `${c.by || '[deleted]'}: ${c.text || '[deleted]'}`)
|
|
69
|
+
.join('\n\n');
|
|
70
|
+
|
|
71
|
+
const contentParts: string[] = [];
|
|
72
|
+
if (text) contentParts.push(text);
|
|
73
|
+
contentParts.push(`Score: ${score} | ${kids.length} comments`);
|
|
74
|
+
if (commentText) contentParts.push(`---\n${commentText}`);
|
|
75
|
+
const content = contentParts.join('\n\n');
|
|
76
|
+
|
|
77
|
+
const links: Array<{ text: string; href: string }> = [];
|
|
78
|
+
if (itemUrl) {
|
|
79
|
+
links.push({ text: title || 'Link', href: itemUrl });
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
url,
|
|
84
|
+
title,
|
|
85
|
+
author,
|
|
86
|
+
description: `HN ${item.type || 'story'} by ${author} (${score} points)`,
|
|
87
|
+
content,
|
|
88
|
+
links,
|
|
89
|
+
images: [],
|
|
90
|
+
metadata: {
|
|
91
|
+
type: item.type || 'story',
|
|
92
|
+
publishedAt: item.time ? new Date(item.time * 1000).toISOString() : null,
|
|
93
|
+
source: 'hackernews-firebase',
|
|
94
|
+
canonical: `https://news.ycombinator.com/item?id=${id}`,
|
|
95
|
+
siteName: 'Hacker News',
|
|
96
|
+
},
|
|
97
|
+
cost: { tokens: estimateTokens(content) },
|
|
98
|
+
};
|
|
99
|
+
} catch {
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
async function decodeFrontPage(
|
|
105
|
+
url: string,
|
|
106
|
+
apiBase: string,
|
|
107
|
+
fetchOpts: { skipSsrf?: boolean },
|
|
108
|
+
): Promise<ReadResult | null> {
|
|
109
|
+
try {
|
|
110
|
+
const result = await safeFetch(`${apiBase}/v0/topstories.json`, fetchOpts);
|
|
111
|
+
if (!result || result.status !== 200) return null;
|
|
112
|
+
|
|
113
|
+
let storyIds: number[];
|
|
114
|
+
try {
|
|
115
|
+
storyIds = JSON.parse(result.body);
|
|
116
|
+
} catch {
|
|
117
|
+
return null;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if (!Array.isArray(storyIds)) return null;
|
|
121
|
+
|
|
122
|
+
// Fetch first 10 stories
|
|
123
|
+
const topIds = storyIds.slice(0, 10);
|
|
124
|
+
const stories = await fetchStories(topIds, apiBase, fetchOpts);
|
|
125
|
+
|
|
126
|
+
const content = stories
|
|
127
|
+
.map((s: any, i: number) => `${i + 1}. ${s.title || '[untitled]'} (${s.score ?? 0} pts, ${(s.descendants ?? 0)} comments) by ${s.by || '[deleted]'}`)
|
|
128
|
+
.join('\n');
|
|
129
|
+
|
|
130
|
+
const links = stories
|
|
131
|
+
.filter((s: any) => s.url)
|
|
132
|
+
.map((s: any) => ({ text: s.title || 'Link', href: s.url }));
|
|
133
|
+
|
|
134
|
+
return {
|
|
135
|
+
url,
|
|
136
|
+
title: 'Hacker News — Top Stories',
|
|
137
|
+
author: null,
|
|
138
|
+
description: `Top ${stories.length} stories`,
|
|
139
|
+
content,
|
|
140
|
+
links,
|
|
141
|
+
images: [],
|
|
142
|
+
metadata: {
|
|
143
|
+
type: 'listing',
|
|
144
|
+
publishedAt: null,
|
|
145
|
+
source: 'hackernews-firebase',
|
|
146
|
+
canonical: 'https://news.ycombinator.com/',
|
|
147
|
+
siteName: 'Hacker News',
|
|
148
|
+
},
|
|
149
|
+
cost: { tokens: estimateTokens(content) },
|
|
150
|
+
};
|
|
151
|
+
} catch {
|
|
152
|
+
return null;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async function fetchComments(
|
|
157
|
+
ids: number[],
|
|
158
|
+
apiBase: string,
|
|
159
|
+
fetchOpts: { skipSsrf?: boolean },
|
|
160
|
+
): Promise<any[]> {
|
|
161
|
+
const comments: any[] = [];
|
|
162
|
+
for (const id of ids) {
|
|
163
|
+
try {
|
|
164
|
+
const result = await safeFetch(`${apiBase}/v0/item/${id}.json`, fetchOpts);
|
|
165
|
+
if (result && result.status === 200) {
|
|
166
|
+
const comment = JSON.parse(result.body);
|
|
167
|
+
if (comment && !comment.deleted) {
|
|
168
|
+
comments.push(comment);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
} catch {
|
|
172
|
+
// skip failed comments
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
return comments;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
async function fetchStories(
|
|
179
|
+
ids: number[],
|
|
180
|
+
apiBase: string,
|
|
181
|
+
fetchOpts: { skipSsrf?: boolean },
|
|
182
|
+
): Promise<any[]> {
|
|
183
|
+
const stories: any[] = [];
|
|
184
|
+
for (const id of ids) {
|
|
185
|
+
try {
|
|
186
|
+
const result = await safeFetch(`${apiBase}/v0/item/${id}.json`, fetchOpts);
|
|
187
|
+
if (result && result.status === 200) {
|
|
188
|
+
const story = JSON.parse(result.body);
|
|
189
|
+
if (story) {
|
|
190
|
+
stories.push(story);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
} catch {
|
|
194
|
+
// skip failed stories
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return stories;
|
|
198
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
// src/read/decoders/index.ts
|
|
2
|
+
import type { Decoder } from '../types.js';
|
|
3
|
+
import { redditDecoder } from './reddit.js';
|
|
4
|
+
import { youtubeDecoder } from './youtube.js';
|
|
5
|
+
import { wikipediaDecoder } from './wikipedia.js';
|
|
6
|
+
import { hackernewsDecoder } from './hackernews.js';
|
|
7
|
+
import { grokipediaDecoder } from './grokipedia.js';
|
|
8
|
+
import { twitterDecoder } from './twitter.js';
|
|
9
|
+
import { deepwikiDecoder } from './deepwiki.js';
|
|
10
|
+
|
|
11
|
+
const decoders: Decoder[] = [redditDecoder, youtubeDecoder, wikipediaDecoder, hackernewsDecoder, grokipediaDecoder, twitterDecoder, deepwikiDecoder];
|
|
12
|
+
|
|
13
|
+
export function findDecoder(url: string): Decoder | null {
|
|
14
|
+
return decoders.find(d => d.patterns.some(p => p.test(url))) ?? null;
|
|
15
|
+
}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
// src/read/decoders/reddit.ts
|
|
2
|
+
import type { Decoder, ReadResult } from '../types.js';
|
|
3
|
+
import { safeFetch } from '../../discovery/fetch.js';
|
|
4
|
+
|
|
5
|
+
function estimateTokens(text: string): number {
|
|
6
|
+
return Math.ceil(text.length / 4);
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export const redditDecoder: Decoder = {
|
|
10
|
+
name: 'reddit',
|
|
11
|
+
patterns: [
|
|
12
|
+
/reddit\.com\/r\/[^/]+\/comments\//,
|
|
13
|
+
/reddit\.com\/r\/[^/]+\/?$/,
|
|
14
|
+
/reddit\.com\/r\/[^/]+\/?(?:\?|$)/,
|
|
15
|
+
/reddit\.com\/user\/[^/]+/,
|
|
16
|
+
],
|
|
17
|
+
|
|
18
|
+
async decode(url: string, options: { skipSsrf?: boolean; [key: string]: any } = {}): Promise<ReadResult | null> {
|
|
19
|
+
try {
|
|
20
|
+
// Append .json to the URL to get JSON response
|
|
21
|
+
const jsonUrl = url.replace(/\/?(\?|$)/, '.json$1');
|
|
22
|
+
|
|
23
|
+
const result = await safeFetch(jsonUrl, { skipSsrf: options.skipSsrf });
|
|
24
|
+
if (!result || result.status !== 200) return null;
|
|
25
|
+
|
|
26
|
+
let data: any;
|
|
27
|
+
try {
|
|
28
|
+
data = JSON.parse(result.body);
|
|
29
|
+
} catch {
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Post page: response is an array [post, comments]
|
|
34
|
+
if (Array.isArray(data) && data.length >= 1) {
|
|
35
|
+
return decodePostPage(url, data);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Subreddit/user listing: response has data.children
|
|
39
|
+
if (data && data.data && Array.isArray(data.data.children)) {
|
|
40
|
+
return decodeListingPage(url, data);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return null;
|
|
44
|
+
} catch {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
function decodePostPage(url: string, data: any[]): ReadResult | null {
|
|
51
|
+
try {
|
|
52
|
+
const postData = data[0]?.data?.children?.[0]?.data;
|
|
53
|
+
if (!postData) return null;
|
|
54
|
+
|
|
55
|
+
const title = postData.title || null;
|
|
56
|
+
const author = postData.author || null;
|
|
57
|
+
const selftext = postData.selftext || '';
|
|
58
|
+
const score = postData.score ?? 0;
|
|
59
|
+
const subreddit = postData.subreddit || '';
|
|
60
|
+
|
|
61
|
+
// Extract comments
|
|
62
|
+
const commentChildren = data[1]?.data?.children || [];
|
|
63
|
+
const comments = commentChildren
|
|
64
|
+
.filter((c: any) => c.kind === 't1' && c.data)
|
|
65
|
+
.slice(0, 25)
|
|
66
|
+
.map((c: any) => ({
|
|
67
|
+
author: c.data.author || '[deleted]',
|
|
68
|
+
body: c.data.body || '',
|
|
69
|
+
score: c.data.score ?? 0,
|
|
70
|
+
}));
|
|
71
|
+
|
|
72
|
+
const commentText = comments
|
|
73
|
+
.map((c: any) => `${c.author} (${c.score} pts): ${c.body}`)
|
|
74
|
+
.join('\n\n');
|
|
75
|
+
|
|
76
|
+
const content = selftext
|
|
77
|
+
? `${selftext}\n\n---\nScore: ${score} | ${comments.length} comments\n\n${commentText}`
|
|
78
|
+
: `Score: ${score} | ${comments.length} comments\n\n${commentText}`;
|
|
79
|
+
|
|
80
|
+
const links: Array<{ text: string; href: string }> = [];
|
|
81
|
+
if (postData.url && postData.url !== postData.permalink) {
|
|
82
|
+
links.push({ text: 'Link', href: postData.url });
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
url,
|
|
87
|
+
title,
|
|
88
|
+
author,
|
|
89
|
+
description: `r/${subreddit} post by u/${author} (${score} points)`,
|
|
90
|
+
content,
|
|
91
|
+
links,
|
|
92
|
+
images: [],
|
|
93
|
+
metadata: {
|
|
94
|
+
type: 'discussion',
|
|
95
|
+
publishedAt: postData.created_utc ? new Date(postData.created_utc * 1000).toISOString() : null,
|
|
96
|
+
source: 'reddit-json',
|
|
97
|
+
canonical: postData.permalink ? `https://www.reddit.com${postData.permalink}` : null,
|
|
98
|
+
siteName: 'Reddit',
|
|
99
|
+
},
|
|
100
|
+
cost: { tokens: estimateTokens(content) },
|
|
101
|
+
};
|
|
102
|
+
} catch {
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function decodeListingPage(url: string, data: any): ReadResult | null {
|
|
108
|
+
try {
|
|
109
|
+
const children = data.data.children || [];
|
|
110
|
+
const posts = children
|
|
111
|
+
.filter((c: any) => c.data)
|
|
112
|
+
.slice(0, 25)
|
|
113
|
+
.map((c: any) => ({
|
|
114
|
+
title: c.data.title || c.data.link_title || '',
|
|
115
|
+
author: c.data.author || '[deleted]',
|
|
116
|
+
score: c.data.score ?? 0,
|
|
117
|
+
numComments: c.data.num_comments ?? 0,
|
|
118
|
+
permalink: c.data.permalink || '',
|
|
119
|
+
subreddit: c.data.subreddit || '',
|
|
120
|
+
}));
|
|
121
|
+
|
|
122
|
+
const content = posts
|
|
123
|
+
.map((p: any, i: number) => `${i + 1}. ${p.title} (${p.score} pts, ${p.numComments} comments) by u/${p.author}`)
|
|
124
|
+
.join('\n');
|
|
125
|
+
|
|
126
|
+
const links = posts
|
|
127
|
+
.filter((p: any) => p.permalink)
|
|
128
|
+
.map((p: any) => ({ text: p.title, href: `https://www.reddit.com${p.permalink}` }));
|
|
129
|
+
|
|
130
|
+
// Try to determine subreddit name from URL
|
|
131
|
+
const subMatch = url.match(/\/r\/([^/]+)/);
|
|
132
|
+
const subreddit = subMatch ? subMatch[1] : null;
|
|
133
|
+
const userMatch = url.match(/\/user\/([^/]+)/);
|
|
134
|
+
const user = userMatch ? userMatch[1] : null;
|
|
135
|
+
|
|
136
|
+
const title = subreddit ? `r/${subreddit}` : user ? `u/${user}` : 'Reddit listing';
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
url,
|
|
140
|
+
title,
|
|
141
|
+
author: null,
|
|
142
|
+
description: `${posts.length} posts`,
|
|
143
|
+
content,
|
|
144
|
+
links,
|
|
145
|
+
images: [],
|
|
146
|
+
metadata: {
|
|
147
|
+
type: 'listing',
|
|
148
|
+
publishedAt: null,
|
|
149
|
+
source: 'reddit-json',
|
|
150
|
+
canonical: null,
|
|
151
|
+
siteName: 'Reddit',
|
|
152
|
+
},
|
|
153
|
+
cost: { tokens: estimateTokens(content) },
|
|
154
|
+
};
|
|
155
|
+
} catch {
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
158
|
+
}
|