awess 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "awess",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "MCP server for searching curated awesome lists and their contents",
5
5
  "type": "module",
6
6
  "main": "src/index.ts",
@@ -34,7 +34,6 @@
34
34
  "@modelcontextprotocol/sdk": "^1.0.0",
35
35
  "mdast-util-from-markdown": "^2.0.2",
36
36
  "minisearch": "^7.2.0",
37
- "octokit": "^5.0.5",
38
37
  "zod": "^4.3.5"
39
38
  }
40
39
  }
package/src/diff.ts ADDED
@@ -0,0 +1,42 @@
1
+ import type { Item, DiffResult } from "./types";
2
+
3
+ export function diffItems(oldItems: Item[], newItems: Item[]): DiffResult {
4
+ const oldByUrl = new Map(oldItems.map(i => [i.url, i]));
5
+ const newByUrl = new Map(newItems.map(i => [i.url, i]));
6
+
7
+ const added: Item[] = [];
8
+ const removed: Item[] = [];
9
+ const unchanged: Item[] = [];
10
+ const updated: Item[] = [];
11
+
12
+ // Check new items against old
13
+ for (const newItem of newItems) {
14
+ const oldItem = oldByUrl.get(newItem.url);
15
+ if (!oldItem) {
16
+ added.push(newItem);
17
+ } else if (oldItem.name === newItem.name && oldItem.description === newItem.description) {
18
+ // Unchanged - preserve enrichment data from old item
19
+ unchanged.push({
20
+ ...newItem,
21
+ github: oldItem.github,
22
+ lastEnriched: oldItem.lastEnriched,
23
+ });
24
+ } else {
25
+ // Updated - preserve enrichment data, use new metadata
26
+ updated.push({
27
+ ...newItem,
28
+ github: oldItem.github,
29
+ lastEnriched: oldItem.lastEnriched,
30
+ });
31
+ }
32
+ }
33
+
34
+ // Find removed items
35
+ for (const oldItem of oldItems) {
36
+ if (!newByUrl.has(oldItem.url)) {
37
+ removed.push(oldItem);
38
+ }
39
+ }
40
+
41
+ return { added, removed, unchanged, updated };
42
+ }
package/src/enricher.ts CHANGED
@@ -1,10 +1,29 @@
1
1
  // src/enricher.ts
2
- import { Octokit } from "octokit";
3
2
  import type { Item } from "./types";
4
3
 
5
4
  export function extractGitHubRepo(url: string): string | null {
6
5
  const match = url.match(/github\.com\/([^\/]+\/[^\/]+)/);
7
- return match ? match[1].replace(/\.git$/, "") : null;
6
+ if (!match) return null;
7
+
8
+ // Clean up repo name: remove .git, #readme, query params, etc.
9
+ let repo = match[1]
10
+ .replace(/\.git$/, "")
11
+ .replace(/#.*$/, "")
12
+ .replace(/\?.*$/, "");
13
+
14
+ // Skip non-repo paths like "topics/awesome", "sponsors/foo"
15
+ const invalidPrefixes = ["topics", "sponsors", "orgs", "settings", "marketplace"];
16
+ if (invalidPrefixes.some(p => repo.startsWith(p + "/"))) {
17
+ return null;
18
+ }
19
+
20
+ return repo;
21
+ }
22
+
23
+ // Sleep helper with jitter
24
+ function sleep(ms: number, jitter = 0.2): Promise<void> {
25
+ const jitterMs = ms * jitter * (Math.random() - 0.5) * 2;
26
+ return new Promise(resolve => setTimeout(resolve, ms + jitterMs));
8
27
  }
9
28
 
10
29
  export async function batchEnrichItems(items: Item[]): Promise<Item[]> {
@@ -14,8 +33,6 @@ export async function batchEnrichItems(items: Item[]): Promise<Item[]> {
14
33
  return items;
15
34
  }
16
35
 
17
- const octokit = new Octokit({ auth: token });
18
-
19
36
  // Extract unique GitHub repos
20
37
  const repoMap = new Map<string, Item[]>();
21
38
  for (const item of items) {
@@ -29,13 +46,26 @@ export async function batchEnrichItems(items: Item[]): Promise<Item[]> {
29
46
  const repos = Array.from(repoMap.keys());
30
47
  console.log(`Enriching ${repos.length} unique repos...`);
31
48
 
32
- // Batch query using GraphQL (100 at a time)
33
- const batchSize = 100;
49
+ // Tuned for GitHub's secondary rate limits
50
+ const batchSize = 50; // Smaller batches = lower query cost
51
+ const baseDelayMs = 500; // Base delay between batches
52
+ let currentDelay = baseDelayMs;
53
+ let consecutiveErrors = 0;
54
+
34
55
  for (let i = 0; i < repos.length; i += batchSize) {
35
56
  const batch = repos.slice(i, i + batchSize);
57
+ const batchNum = Math.floor(i / batchSize) + 1;
58
+ const totalBatches = Math.ceil(repos.length / batchSize);
59
+
60
+ // Add delay between batches (except first)
61
+ if (i > 0) {
62
+ await sleep(currentDelay);
63
+ }
36
64
 
65
+ // Include rateLimit in query to monitor usage
37
66
  const query = `
38
67
  query {
68
+ rateLimit { cost remaining resetAt }
39
69
  ${batch.map((repo, idx) => {
40
70
  const [owner, name] = repo.split("/");
41
71
  return `repo${idx}: repository(owner: "${owner}", name: "${name}") {
@@ -48,8 +78,55 @@ export async function batchEnrichItems(items: Item[]): Promise<Item[]> {
48
78
  `;
49
79
 
50
80
  try {
51
- const result: any = await octokit.graphql(query);
81
+ // Use fetch instead of octokit.graphql to handle partial results
82
+ const response = await fetch("https://api.github.com/graphql", {
83
+ method: "POST",
84
+ headers: {
85
+ Authorization: `Bearer ${token}`,
86
+ "Content-Type": "application/json",
87
+ },
88
+ body: JSON.stringify({ query }),
89
+ });
90
+
91
+ if (!response.ok) {
92
+ throw new Error(`HTTP ${response.status}`);
93
+ }
94
+
95
+ const json: any = await response.json();
96
+
97
+ // Check for complete failure (errors but no data)
98
+ if (json.errors && !json.data) {
99
+ throw new Error(json.errors[0]?.message || "GraphQL query failed");
100
+ }
101
+
102
+ // Log any partial errors (repos that don't exist, etc.)
103
+ if (json.errors) {
104
+ const failedRepos = json.errors
105
+ .filter((e: any) => e.path?.[0]?.startsWith("repo"))
106
+ .map((e: any) => batch[parseInt(e.path[0].slice(4))])
107
+ .filter(Boolean);
108
+ if (failedRepos.length > 0) {
109
+ console.log(` [${batchNum}/${totalBatches}] ${failedRepos.length} repos not found`);
110
+ }
111
+ }
112
+
113
+ const result = json.data;
114
+
115
+ // Log rate limit status periodically
116
+ const rl = result.rateLimit;
117
+ if (batchNum % 10 === 0 || rl?.remaining < 100) {
118
+ console.log(` [${batchNum}/${totalBatches}] Rate limit: ${rl?.remaining} remaining, cost: ${rl?.cost}`);
119
+ } else {
120
+ process.stdout.write(` [${batchNum}/${totalBatches}]\r`);
121
+ }
52
122
 
123
+ // If running low on points, slow down
124
+ if (rl?.remaining < 500) {
125
+ currentDelay = Math.min(currentDelay * 1.5, 5000);
126
+ console.log(` ⚠️ Low rate limit (${rl.remaining}), increasing delay to ${currentDelay}ms`);
127
+ }
128
+
129
+ // Process results - now handles partial data correctly
53
130
  for (let j = 0; j < batch.length; j++) {
54
131
  const data = result[`repo${j}`];
55
132
  if (data) {
@@ -64,10 +141,115 @@ export async function batchEnrichItems(items: Item[]): Promise<Item[]> {
64
141
  }
65
142
  }
66
143
  }
144
+
145
+ // Reset on success
146
+ consecutiveErrors = 0;
147
+ currentDelay = Math.max(currentDelay * 0.9, baseDelayMs); // Gradually speed up
148
+
67
149
  } catch (error: any) {
68
- console.error(`Error enriching batch ${i}-${i + batchSize}:`, error.message);
150
+ consecutiveErrors++;
151
+
152
+ // Check for secondary rate limit
153
+ if (error.message?.includes("SecondaryRateLimit") || error.message?.includes("403")) {
154
+ // Exponential backoff with jitter
155
+ const backoffMs = Math.min(baseDelayMs * Math.pow(2, consecutiveErrors), 60000);
156
+ console.log(` ⚠️ Secondary rate limit hit, backing off for ${backoffMs}ms...`);
157
+ await sleep(backoffMs, 0.3);
158
+ currentDelay = backoffMs; // Keep the higher delay
159
+ i -= batchSize; // Retry this batch
160
+ continue;
161
+ }
162
+
163
+ // Log other errors but continue
164
+ console.error(` Error batch ${batchNum}: ${error.message?.slice(0, 100)}`);
165
+
166
+ // If too many consecutive errors, slow down
167
+ if (consecutiveErrors >= 3) {
168
+ currentDelay = Math.min(currentDelay * 2, 10000);
169
+ console.log(` Multiple errors, slowing to ${currentDelay}ms`);
170
+ }
69
171
  }
70
172
  }
71
173
 
174
+ console.log(""); // Clear the \r line
72
175
  return items;
73
176
  }
177
+
178
+ export async function batchQueryListRepos(
179
+ repos: string[]
180
+ ): Promise<Map<string, { pushedAt: string } | null>> {
181
+ const results = new Map<string, { pushedAt: string } | null>();
182
+
183
+ const token = process.env.GITHUB_TOKEN;
184
+ if (!token) {
185
+ console.warn("No GITHUB_TOKEN - skipping list repo query");
186
+ return results;
187
+ }
188
+
189
+ const BATCH_SIZE = 50;
190
+
191
+ for (let i = 0; i < repos.length; i += BATCH_SIZE) {
192
+ const batch = repos.slice(i, i + BATCH_SIZE);
193
+ const batchNum = Math.floor(i / BATCH_SIZE) + 1;
194
+ const totalBatches = Math.ceil(repos.length / BATCH_SIZE);
195
+ process.stdout.write(` [${batchNum}/${totalBatches}]\r`);
196
+
197
+ // Build GraphQL query for this batch
198
+ const repoQueries = batch.map((repo, idx) => {
199
+ const [owner, name] = repo.split("/");
200
+ return `repo${idx}: repository(owner: "${owner}", name: "${name}") {
201
+ pushedAt
202
+ }`;
203
+ });
204
+
205
+ const query = `query { ${repoQueries.join("\n")} }`;
206
+
207
+ try {
208
+ const response = await fetch("https://api.github.com/graphql", {
209
+ method: "POST",
210
+ headers: {
211
+ Authorization: `Bearer ${token}`,
212
+ "Content-Type": "application/json",
213
+ },
214
+ body: JSON.stringify({ query }),
215
+ });
216
+
217
+ if (!response.ok) {
218
+ console.error(` Error batch ${batchNum}: HTTP ${response.status}`);
219
+ for (const repo of batch) {
220
+ results.set(repo, null);
221
+ }
222
+ continue;
223
+ }
224
+
225
+ const json = await response.json();
226
+
227
+ if (json.errors && !json.data) {
228
+ console.error(` Error batch ${batchNum}:`, json.errors[0]?.message);
229
+ // Mark all repos in batch as null
230
+ for (const repo of batch) {
231
+ results.set(repo, null);
232
+ }
233
+ continue;
234
+ }
235
+
236
+ // Extract results
237
+ batch.forEach((repo, idx) => {
238
+ const data = json.data?.[`repo${idx}`];
239
+ if (data?.pushedAt) {
240
+ results.set(repo, { pushedAt: data.pushedAt });
241
+ } else {
242
+ results.set(repo, null);
243
+ }
244
+ });
245
+ } catch (error: any) {
246
+ console.error(` Error batch ${batchNum}:`, error.message);
247
+ for (const repo of batch) {
248
+ results.set(repo, null);
249
+ }
250
+ }
251
+ }
252
+
253
+ console.log(); // newline after progress
254
+ return results;
255
+ }
package/src/index.ts CHANGED
@@ -14,14 +14,25 @@ interface AwesomeList {
14
14
  source: string;
15
15
  }
16
16
 
17
- // Data URL - GitHub raw (no file size limits, 5 min cache)
18
- const DATA_URL = "https://raw.githubusercontent.com/arimxyer/ass/main/data";
17
+ // CDN URLs - jsDelivr primary (faster, global CDN), GitHub raw fallback
18
+ const JSDELIVR_URL = "https://cdn.jsdelivr.net/gh/arimxyer/ass@main/data";
19
+ const GITHUB_RAW_URL = "https://raw.githubusercontent.com/arimxyer/ass/main/data";
19
20
 
20
- // Load data from GitHub, fallback to local for development
21
+ // Load data from CDN, fallback to local for development
21
22
  async function loadData<T>(filename: string): Promise<T> {
22
- // Try remote first
23
+ // Try jsDelivr first (faster global CDN)
23
24
  try {
24
- const res = await fetch(`${DATA_URL}/${filename}`);
25
+ const res = await fetch(`${JSDELIVR_URL}/${filename}`);
26
+ if (res.ok) {
27
+ return res.json();
28
+ }
29
+ } catch {
30
+ // jsDelivr failed, try GitHub raw
31
+ }
32
+
33
+ // Try GitHub raw as fallback
34
+ try {
35
+ const res = await fetch(`${GITHUB_RAW_URL}/${filename}`);
25
36
  if (res.ok) {
26
37
  return res.json();
27
38
  }
@@ -34,6 +45,39 @@ async function loadData<T>(filename: string): Promise<T> {
34
45
  return Bun.file(localPath).json();
35
46
  }
36
47
 
48
+ // Load gzipped data from CDN, fallback to local
49
+ async function loadGzippedData<T>(filename: string): Promise<T> {
50
+ // Try jsDelivr first
51
+ try {
52
+ const res = await fetch(`${JSDELIVR_URL}/${filename}`);
53
+ if (res.ok) {
54
+ const compressed = new Uint8Array(await res.arrayBuffer());
55
+ const decompressed = Bun.gunzipSync(compressed);
56
+ return JSON.parse(new TextDecoder().decode(decompressed));
57
+ }
58
+ } catch {
59
+ // jsDelivr failed, try GitHub raw
60
+ }
61
+
62
+ // Try GitHub raw as fallback
63
+ try {
64
+ const res = await fetch(`${GITHUB_RAW_URL}/${filename}`);
65
+ if (res.ok) {
66
+ const compressed = new Uint8Array(await res.arrayBuffer());
67
+ const decompressed = Bun.gunzipSync(compressed);
68
+ return JSON.parse(new TextDecoder().decode(decompressed));
69
+ }
70
+ } catch {
71
+ // Remote failed, try local
72
+ }
73
+
74
+ // Fallback to local file
75
+ const localPath = new URL(`../data/${filename}`, import.meta.url);
76
+ const compressed = new Uint8Array(await Bun.file(localPath).arrayBuffer());
77
+ const decompressed = Bun.gunzipSync(compressed);
78
+ return JSON.parse(new TextDecoder().decode(decompressed));
79
+ }
80
+
37
81
  // Load curated data
38
82
  const lists: AwesomeList[] = await loadData("lists.json");
39
83
 
@@ -225,7 +269,7 @@ server.tool(
225
269
  let itemsIndex: ItemsIndex | null = null;
226
270
 
227
271
  try {
228
- itemsIndex = await loadData<ItemsIndex>("items.json");
272
+ itemsIndex = await loadGzippedData<ItemsIndex>("items.json.gz");
229
273
  console.error(`Loaded ${itemsIndex?.itemCount} items from ${itemsIndex?.listCount} lists`);
230
274
  } catch {
231
275
  console.error("No items.json found - get_items will be unavailable");
package/src/types.ts CHANGED
@@ -26,3 +26,10 @@ export interface ItemsIndex {
26
26
  itemCount: number;
27
27
  lists: Record<string, ListEntry>;
28
28
  }
29
+
30
+ export interface DiffResult {
31
+ added: Item[];
32
+ removed: Item[];
33
+ unchanged: Item[];
34
+ updated: Item[];
35
+ }