awess 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -2
- package/src/diff.ts +42 -0
- package/src/enricher.ts +190 -8
- package/src/index.ts +50 -6
- package/src/types.ts +7 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "awess",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "MCP server for searching curated awesome lists and their contents",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.ts",
|
|
@@ -34,7 +34,6 @@
|
|
|
34
34
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
35
35
|
"mdast-util-from-markdown": "^2.0.2",
|
|
36
36
|
"minisearch": "^7.2.0",
|
|
37
|
-
"octokit": "^5.0.5",
|
|
38
37
|
"zod": "^4.3.5"
|
|
39
38
|
}
|
|
40
39
|
}
|
package/src/diff.ts
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { Item, DiffResult } from "./types";
|
|
2
|
+
|
|
3
|
+
export function diffItems(oldItems: Item[], newItems: Item[]): DiffResult {
|
|
4
|
+
const oldByUrl = new Map(oldItems.map(i => [i.url, i]));
|
|
5
|
+
const newByUrl = new Map(newItems.map(i => [i.url, i]));
|
|
6
|
+
|
|
7
|
+
const added: Item[] = [];
|
|
8
|
+
const removed: Item[] = [];
|
|
9
|
+
const unchanged: Item[] = [];
|
|
10
|
+
const updated: Item[] = [];
|
|
11
|
+
|
|
12
|
+
// Check new items against old
|
|
13
|
+
for (const newItem of newItems) {
|
|
14
|
+
const oldItem = oldByUrl.get(newItem.url);
|
|
15
|
+
if (!oldItem) {
|
|
16
|
+
added.push(newItem);
|
|
17
|
+
} else if (oldItem.name === newItem.name && oldItem.description === newItem.description) {
|
|
18
|
+
// Unchanged - preserve enrichment data from old item
|
|
19
|
+
unchanged.push({
|
|
20
|
+
...newItem,
|
|
21
|
+
github: oldItem.github,
|
|
22
|
+
lastEnriched: oldItem.lastEnriched,
|
|
23
|
+
});
|
|
24
|
+
} else {
|
|
25
|
+
// Updated - preserve enrichment data, use new metadata
|
|
26
|
+
updated.push({
|
|
27
|
+
...newItem,
|
|
28
|
+
github: oldItem.github,
|
|
29
|
+
lastEnriched: oldItem.lastEnriched,
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Find removed items
|
|
35
|
+
for (const oldItem of oldItems) {
|
|
36
|
+
if (!newByUrl.has(oldItem.url)) {
|
|
37
|
+
removed.push(oldItem);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return { added, removed, unchanged, updated };
|
|
42
|
+
}
|
package/src/enricher.ts
CHANGED
|
@@ -1,10 +1,29 @@
|
|
|
1
1
|
// src/enricher.ts
|
|
2
|
-
import { Octokit } from "octokit";
|
|
3
2
|
import type { Item } from "./types";
|
|
4
3
|
|
|
5
4
|
export function extractGitHubRepo(url: string): string | null {
|
|
6
5
|
const match = url.match(/github\.com\/([^\/]+\/[^\/]+)/);
|
|
7
|
-
|
|
6
|
+
if (!match) return null;
|
|
7
|
+
|
|
8
|
+
// Clean up repo name: remove .git, #readme, query params, etc.
|
|
9
|
+
let repo = match[1]
|
|
10
|
+
.replace(/\.git$/, "")
|
|
11
|
+
.replace(/#.*$/, "")
|
|
12
|
+
.replace(/\?.*$/, "");
|
|
13
|
+
|
|
14
|
+
// Skip non-repo paths like "topics/awesome", "sponsors/foo"
|
|
15
|
+
const invalidPrefixes = ["topics", "sponsors", "orgs", "settings", "marketplace"];
|
|
16
|
+
if (invalidPrefixes.some(p => repo.startsWith(p + "/"))) {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
return repo;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Sleep helper with jitter
|
|
24
|
+
function sleep(ms: number, jitter = 0.2): Promise<void> {
|
|
25
|
+
const jitterMs = ms * jitter * (Math.random() - 0.5) * 2;
|
|
26
|
+
return new Promise(resolve => setTimeout(resolve, ms + jitterMs));
|
|
8
27
|
}
|
|
9
28
|
|
|
10
29
|
export async function batchEnrichItems(items: Item[]): Promise<Item[]> {
|
|
@@ -14,8 +33,6 @@ export async function batchEnrichItems(items: Item[]): Promise<Item[]> {
|
|
|
14
33
|
return items;
|
|
15
34
|
}
|
|
16
35
|
|
|
17
|
-
const octokit = new Octokit({ auth: token });
|
|
18
|
-
|
|
19
36
|
// Extract unique GitHub repos
|
|
20
37
|
const repoMap = new Map<string, Item[]>();
|
|
21
38
|
for (const item of items) {
|
|
@@ -29,13 +46,26 @@ export async function batchEnrichItems(items: Item[]): Promise<Item[]> {
|
|
|
29
46
|
const repos = Array.from(repoMap.keys());
|
|
30
47
|
console.log(`Enriching ${repos.length} unique repos...`);
|
|
31
48
|
|
|
32
|
-
//
|
|
33
|
-
const batchSize =
|
|
49
|
+
// Tuned for GitHub's secondary rate limits
|
|
50
|
+
const batchSize = 50; // Smaller batches = lower query cost
|
|
51
|
+
const baseDelayMs = 500; // Base delay between batches
|
|
52
|
+
let currentDelay = baseDelayMs;
|
|
53
|
+
let consecutiveErrors = 0;
|
|
54
|
+
|
|
34
55
|
for (let i = 0; i < repos.length; i += batchSize) {
|
|
35
56
|
const batch = repos.slice(i, i + batchSize);
|
|
57
|
+
const batchNum = Math.floor(i / batchSize) + 1;
|
|
58
|
+
const totalBatches = Math.ceil(repos.length / batchSize);
|
|
59
|
+
|
|
60
|
+
// Add delay between batches (except first)
|
|
61
|
+
if (i > 0) {
|
|
62
|
+
await sleep(currentDelay);
|
|
63
|
+
}
|
|
36
64
|
|
|
65
|
+
// Include rateLimit in query to monitor usage
|
|
37
66
|
const query = `
|
|
38
67
|
query {
|
|
68
|
+
rateLimit { cost remaining resetAt }
|
|
39
69
|
${batch.map((repo, idx) => {
|
|
40
70
|
const [owner, name] = repo.split("/");
|
|
41
71
|
return `repo${idx}: repository(owner: "${owner}", name: "${name}") {
|
|
@@ -48,8 +78,55 @@ export async function batchEnrichItems(items: Item[]): Promise<Item[]> {
|
|
|
48
78
|
`;
|
|
49
79
|
|
|
50
80
|
try {
|
|
51
|
-
|
|
81
|
+
// Use fetch instead of octokit.graphql to handle partial results
|
|
82
|
+
const response = await fetch("https://api.github.com/graphql", {
|
|
83
|
+
method: "POST",
|
|
84
|
+
headers: {
|
|
85
|
+
Authorization: `Bearer ${token}`,
|
|
86
|
+
"Content-Type": "application/json",
|
|
87
|
+
},
|
|
88
|
+
body: JSON.stringify({ query }),
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
if (!response.ok) {
|
|
92
|
+
throw new Error(`HTTP ${response.status}`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const json: any = await response.json();
|
|
96
|
+
|
|
97
|
+
// Check for complete failure (errors but no data)
|
|
98
|
+
if (json.errors && !json.data) {
|
|
99
|
+
throw new Error(json.errors[0]?.message || "GraphQL query failed");
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Log any partial errors (repos that don't exist, etc.)
|
|
103
|
+
if (json.errors) {
|
|
104
|
+
const failedRepos = json.errors
|
|
105
|
+
.filter((e: any) => e.path?.[0]?.startsWith("repo"))
|
|
106
|
+
.map((e: any) => batch[parseInt(e.path[0].slice(4))])
|
|
107
|
+
.filter(Boolean);
|
|
108
|
+
if (failedRepos.length > 0) {
|
|
109
|
+
console.log(` [${batchNum}/${totalBatches}] ${failedRepos.length} repos not found`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const result = json.data;
|
|
114
|
+
|
|
115
|
+
// Log rate limit status periodically
|
|
116
|
+
const rl = result.rateLimit;
|
|
117
|
+
if (batchNum % 10 === 0 || rl?.remaining < 100) {
|
|
118
|
+
console.log(` [${batchNum}/${totalBatches}] Rate limit: ${rl?.remaining} remaining, cost: ${rl?.cost}`);
|
|
119
|
+
} else {
|
|
120
|
+
process.stdout.write(` [${batchNum}/${totalBatches}]\r`);
|
|
121
|
+
}
|
|
52
122
|
|
|
123
|
+
// If running low on points, slow down
|
|
124
|
+
if (rl?.remaining < 500) {
|
|
125
|
+
currentDelay = Math.min(currentDelay * 1.5, 5000);
|
|
126
|
+
console.log(` ⚠️ Low rate limit (${rl.remaining}), increasing delay to ${currentDelay}ms`);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Process results - now handles partial data correctly
|
|
53
130
|
for (let j = 0; j < batch.length; j++) {
|
|
54
131
|
const data = result[`repo${j}`];
|
|
55
132
|
if (data) {
|
|
@@ -64,10 +141,115 @@ export async function batchEnrichItems(items: Item[]): Promise<Item[]> {
|
|
|
64
141
|
}
|
|
65
142
|
}
|
|
66
143
|
}
|
|
144
|
+
|
|
145
|
+
// Reset on success
|
|
146
|
+
consecutiveErrors = 0;
|
|
147
|
+
currentDelay = Math.max(currentDelay * 0.9, baseDelayMs); // Gradually speed up
|
|
148
|
+
|
|
67
149
|
} catch (error: any) {
|
|
68
|
-
|
|
150
|
+
consecutiveErrors++;
|
|
151
|
+
|
|
152
|
+
// Check for secondary rate limit
|
|
153
|
+
if (error.message?.includes("SecondaryRateLimit") || error.message?.includes("403")) {
|
|
154
|
+
// Exponential backoff with jitter
|
|
155
|
+
const backoffMs = Math.min(baseDelayMs * Math.pow(2, consecutiveErrors), 60000);
|
|
156
|
+
console.log(` ⚠️ Secondary rate limit hit, backing off for ${backoffMs}ms...`);
|
|
157
|
+
await sleep(backoffMs, 0.3);
|
|
158
|
+
currentDelay = backoffMs; // Keep the higher delay
|
|
159
|
+
i -= batchSize; // Retry this batch
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Log other errors but continue
|
|
164
|
+
console.error(` Error batch ${batchNum}: ${error.message?.slice(0, 100)}`);
|
|
165
|
+
|
|
166
|
+
// If too many consecutive errors, slow down
|
|
167
|
+
if (consecutiveErrors >= 3) {
|
|
168
|
+
currentDelay = Math.min(currentDelay * 2, 10000);
|
|
169
|
+
console.log(` Multiple errors, slowing to ${currentDelay}ms`);
|
|
170
|
+
}
|
|
69
171
|
}
|
|
70
172
|
}
|
|
71
173
|
|
|
174
|
+
console.log(""); // Clear the \r line
|
|
72
175
|
return items;
|
|
73
176
|
}
|
|
177
|
+
|
|
178
|
+
export async function batchQueryListRepos(
|
|
179
|
+
repos: string[]
|
|
180
|
+
): Promise<Map<string, { pushedAt: string } | null>> {
|
|
181
|
+
const results = new Map<string, { pushedAt: string } | null>();
|
|
182
|
+
|
|
183
|
+
const token = process.env.GITHUB_TOKEN;
|
|
184
|
+
if (!token) {
|
|
185
|
+
console.warn("No GITHUB_TOKEN - skipping list repo query");
|
|
186
|
+
return results;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const BATCH_SIZE = 50;
|
|
190
|
+
|
|
191
|
+
for (let i = 0; i < repos.length; i += BATCH_SIZE) {
|
|
192
|
+
const batch = repos.slice(i, i + BATCH_SIZE);
|
|
193
|
+
const batchNum = Math.floor(i / BATCH_SIZE) + 1;
|
|
194
|
+
const totalBatches = Math.ceil(repos.length / BATCH_SIZE);
|
|
195
|
+
process.stdout.write(` [${batchNum}/${totalBatches}]\r`);
|
|
196
|
+
|
|
197
|
+
// Build GraphQL query for this batch
|
|
198
|
+
const repoQueries = batch.map((repo, idx) => {
|
|
199
|
+
const [owner, name] = repo.split("/");
|
|
200
|
+
return `repo${idx}: repository(owner: "${owner}", name: "${name}") {
|
|
201
|
+
pushedAt
|
|
202
|
+
}`;
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
const query = `query { ${repoQueries.join("\n")} }`;
|
|
206
|
+
|
|
207
|
+
try {
|
|
208
|
+
const response = await fetch("https://api.github.com/graphql", {
|
|
209
|
+
method: "POST",
|
|
210
|
+
headers: {
|
|
211
|
+
Authorization: `Bearer ${token}`,
|
|
212
|
+
"Content-Type": "application/json",
|
|
213
|
+
},
|
|
214
|
+
body: JSON.stringify({ query }),
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
if (!response.ok) {
|
|
218
|
+
console.error(` Error batch ${batchNum}: HTTP ${response.status}`);
|
|
219
|
+
for (const repo of batch) {
|
|
220
|
+
results.set(repo, null);
|
|
221
|
+
}
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const json = await response.json();
|
|
226
|
+
|
|
227
|
+
if (json.errors && !json.data) {
|
|
228
|
+
console.error(` Error batch ${batchNum}:`, json.errors[0]?.message);
|
|
229
|
+
// Mark all repos in batch as null
|
|
230
|
+
for (const repo of batch) {
|
|
231
|
+
results.set(repo, null);
|
|
232
|
+
}
|
|
233
|
+
continue;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Extract results
|
|
237
|
+
batch.forEach((repo, idx) => {
|
|
238
|
+
const data = json.data?.[`repo${idx}`];
|
|
239
|
+
if (data?.pushedAt) {
|
|
240
|
+
results.set(repo, { pushedAt: data.pushedAt });
|
|
241
|
+
} else {
|
|
242
|
+
results.set(repo, null);
|
|
243
|
+
}
|
|
244
|
+
});
|
|
245
|
+
} catch (error: any) {
|
|
246
|
+
console.error(` Error batch ${batchNum}:`, error.message);
|
|
247
|
+
for (const repo of batch) {
|
|
248
|
+
results.set(repo, null);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
console.log(); // newline after progress
|
|
254
|
+
return results;
|
|
255
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -14,14 +14,25 @@ interface AwesomeList {
|
|
|
14
14
|
source: string;
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
//
|
|
18
|
-
const
|
|
17
|
+
// CDN URLs - jsDelivr primary (faster, global CDN), GitHub raw fallback
|
|
18
|
+
const JSDELIVR_URL = "https://cdn.jsdelivr.net/gh/arimxyer/ass@main/data";
|
|
19
|
+
const GITHUB_RAW_URL = "https://raw.githubusercontent.com/arimxyer/ass/main/data";
|
|
19
20
|
|
|
20
|
-
// Load data from
|
|
21
|
+
// Load data from CDN, fallback to local for development
|
|
21
22
|
async function loadData<T>(filename: string): Promise<T> {
|
|
22
|
-
// Try
|
|
23
|
+
// Try jsDelivr first (faster global CDN)
|
|
23
24
|
try {
|
|
24
|
-
const res = await fetch(`${
|
|
25
|
+
const res = await fetch(`${JSDELIVR_URL}/${filename}`);
|
|
26
|
+
if (res.ok) {
|
|
27
|
+
return res.json();
|
|
28
|
+
}
|
|
29
|
+
} catch {
|
|
30
|
+
// jsDelivr failed, try GitHub raw
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Try GitHub raw as fallback
|
|
34
|
+
try {
|
|
35
|
+
const res = await fetch(`${GITHUB_RAW_URL}/${filename}`);
|
|
25
36
|
if (res.ok) {
|
|
26
37
|
return res.json();
|
|
27
38
|
}
|
|
@@ -34,6 +45,39 @@ async function loadData<T>(filename: string): Promise<T> {
|
|
|
34
45
|
return Bun.file(localPath).json();
|
|
35
46
|
}
|
|
36
47
|
|
|
48
|
+
// Load gzipped data from CDN, fallback to local
|
|
49
|
+
async function loadGzippedData<T>(filename: string): Promise<T> {
|
|
50
|
+
// Try jsDelivr first
|
|
51
|
+
try {
|
|
52
|
+
const res = await fetch(`${JSDELIVR_URL}/${filename}`);
|
|
53
|
+
if (res.ok) {
|
|
54
|
+
const compressed = new Uint8Array(await res.arrayBuffer());
|
|
55
|
+
const decompressed = Bun.gunzipSync(compressed);
|
|
56
|
+
return JSON.parse(new TextDecoder().decode(decompressed));
|
|
57
|
+
}
|
|
58
|
+
} catch {
|
|
59
|
+
// jsDelivr failed, try GitHub raw
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Try GitHub raw as fallback
|
|
63
|
+
try {
|
|
64
|
+
const res = await fetch(`${GITHUB_RAW_URL}/${filename}`);
|
|
65
|
+
if (res.ok) {
|
|
66
|
+
const compressed = new Uint8Array(await res.arrayBuffer());
|
|
67
|
+
const decompressed = Bun.gunzipSync(compressed);
|
|
68
|
+
return JSON.parse(new TextDecoder().decode(decompressed));
|
|
69
|
+
}
|
|
70
|
+
} catch {
|
|
71
|
+
// Remote failed, try local
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Fallback to local file
|
|
75
|
+
const localPath = new URL(`../data/${filename}`, import.meta.url);
|
|
76
|
+
const compressed = new Uint8Array(await Bun.file(localPath).arrayBuffer());
|
|
77
|
+
const decompressed = Bun.gunzipSync(compressed);
|
|
78
|
+
return JSON.parse(new TextDecoder().decode(decompressed));
|
|
79
|
+
}
|
|
80
|
+
|
|
37
81
|
// Load curated data
|
|
38
82
|
const lists: AwesomeList[] = await loadData("lists.json");
|
|
39
83
|
|
|
@@ -225,7 +269,7 @@ server.tool(
|
|
|
225
269
|
let itemsIndex: ItemsIndex | null = null;
|
|
226
270
|
|
|
227
271
|
try {
|
|
228
|
-
itemsIndex = await
|
|
272
|
+
itemsIndex = await loadGzippedData<ItemsIndex>("items.json.gz");
|
|
229
273
|
console.error(`Loaded ${itemsIndex?.itemCount} items from ${itemsIndex?.listCount} lists`);
|
|
230
274
|
} catch {
|
|
231
275
|
console.error("No items.json found - get_items will be unavailable");
|