mallmaverick-store-scraper 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/mcp-server.js +65 -38
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mallmaverick-store-scraper",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "MCP server + CLI for scraping shopping mall store directories. Hours-first layered pipeline + image classification.",
|
|
5
5
|
"main": "src/main.js",
|
|
6
6
|
"type": "commonjs",
|
package/src/mcp-server.js
CHANGED
|
@@ -60,11 +60,11 @@ const TOOLS = [
|
|
|
60
60
|
'(name, hours, phone, logo, brand image, categories, etc.). Use this ' +
|
|
61
61
|
'when the user wants to capture a directory like ' +
|
|
62
62
|
'https://grasslands.ca/store-directory/.\n\n' +
|
|
63
|
-
'AFTER RUNNING THIS TOOL:
|
|
64
|
-
'
|
|
65
|
-
'
|
|
66
|
-
'
|
|
67
|
-
'
|
|
63
|
+
'AFTER RUNNING THIS TOOL: reply with ONE short sentence — the count ' +
|
|
64
|
+
'and the file path. The CSV file is attached as a resource_link in the ' +
|
|
65
|
+
'tool response; do NOT paste CSV text into your reply, do NOT print the ' +
|
|
66
|
+
'JSON, do NOT summarize each store. If the tool response includes an ' +
|
|
67
|
+
'error block, surface that error verbatim to the user.',
|
|
68
68
|
inputSchema: {
|
|
69
69
|
type: 'object',
|
|
70
70
|
properties: {
|
|
@@ -138,7 +138,7 @@ const TOOLS = [
|
|
|
138
138
|
},
|
|
139
139
|
];
|
|
140
140
|
|
|
141
|
-
const PACKAGE_VERSION = '0.1.
|
|
141
|
+
const PACKAGE_VERSION = '0.1.5';
|
|
142
142
|
|
|
143
143
|
const server = new Server(
|
|
144
144
|
{ name: 'mall-scraper-mcp', version: PACKAGE_VERSION },
|
|
@@ -244,46 +244,73 @@ async function handleScrapeDirectory({ directory_url, max_stores = 10, concurren
|
|
|
244
244
|
mcp_version: PACKAGE_VERSION,
|
|
245
245
|
};
|
|
246
246
|
|
|
247
|
-
//
|
|
248
|
-
//
|
|
249
|
-
//
|
|
250
|
-
//
|
|
247
|
+
// Response design:
|
|
248
|
+
// 1. Brief status line (always) — what the user sees in the chat reply
|
|
249
|
+
// 2. resource_link to the CSV — file attachment with user-priority annotations
|
|
250
|
+
// 3. ONLY on error: a loud error block so the user knows something failed
|
|
251
|
+
//
|
|
252
|
+
// No JSON dump / no inline CSV preview when things succeed — keeps the chat
|
|
253
|
+
// reply minimal.
|
|
251
254
|
const host = new URL(directory_url).hostname.replace(/^www\./, '');
|
|
252
255
|
const csvFilename = writtenPaths
|
|
253
256
|
? path.basename(writtenPaths.csv)
|
|
254
257
|
: `stores_v5_${host}.csv`;
|
|
255
258
|
const csvUri = writtenPaths
|
|
256
259
|
? `file://${writtenPaths.csv}`
|
|
257
|
-
:
|
|
260
|
+
: null;
|
|
261
|
+
|
|
262
|
+
const hasLlmFailure = usage.errors > 0;
|
|
263
|
+
const hasWriteFailure = !!writeError;
|
|
264
|
+
const anyFailure = hasLlmFailure || hasWriteFailure;
|
|
258
265
|
|
|
259
266
|
const brief =
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
{
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
267
|
+
`✅ mall-scraper-mcp v${PACKAGE_VERSION}\n` +
|
|
268
|
+
`${stores.length} store${stores.length === 1 ? '' : 's'} from ${host}\n` +
|
|
269
|
+
(writtenPaths ? `📄 ${writtenPaths.csv}` : '⚠ Disk write failed');
|
|
270
|
+
|
|
271
|
+
const content = [
|
|
272
|
+
{
|
|
273
|
+
type: 'text',
|
|
274
|
+
text: brief,
|
|
275
|
+
annotations: { audience: ['user'], priority: 1.0 },
|
|
276
|
+
},
|
|
277
|
+
];
|
|
278
|
+
|
|
279
|
+
// resource_link only if we have a real file path (file:// URI must point at
|
|
280
|
+
// an existing file for the client to do anything useful with it).
|
|
281
|
+
if (csvUri) {
|
|
282
|
+
content.push({
|
|
283
|
+
type: 'resource_link',
|
|
284
|
+
uri: csvUri,
|
|
285
|
+
name: csvFilename,
|
|
286
|
+
description: `Store directory scrape — ${stores.length} stores from ${host}`,
|
|
287
|
+
mimeType: 'text/csv',
|
|
288
|
+
annotations: { audience: ['user'], priority: 0.9 },
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Loud error block — only when something failed. The user explicitly asked
|
|
293
|
+
// for nothing other than a status rundown UNLESS something broke.
|
|
294
|
+
if (anyFailure) {
|
|
295
|
+
const errLines = [];
|
|
296
|
+
if (hasLlmFailure) {
|
|
297
|
+
errLines.push(
|
|
298
|
+
`❌ ${usage.errors} LLM call${usage.errors === 1 ? '' : 's'} failed: ${usage.lastError}`,
|
|
299
|
+
' → description / categories / location_type fields will be empty.',
|
|
300
|
+
' → Run check_status to diagnose (most likely the Worker token doesn\'t match the SHARED_SECRET).',
|
|
301
|
+
);
|
|
302
|
+
}
|
|
303
|
+
if (hasWriteFailure) {
|
|
304
|
+
errLines.push(`❌ Disk write failed: ${writeError}`);
|
|
305
|
+
}
|
|
306
|
+
content.push({
|
|
307
|
+
type: 'text',
|
|
308
|
+
text: '\n' + errLines.join('\n'),
|
|
309
|
+
annotations: { audience: ['user'], priority: 1.0 },
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
return { content };
|
|
287
314
|
} finally {
|
|
288
315
|
try { await browser.close(); } catch (_) {}
|
|
289
316
|
}
|