mallmaverick-store-scraper 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/mcp-server.js +72 -10
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mallmaverick-store-scraper",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "MCP server + CLI for scraping shopping mall store directories. Hours-first layered pipeline + image classification.",
|
|
5
5
|
"main": "src/main.js",
|
|
6
6
|
"type": "commonjs",
|
package/src/mcp-server.js
CHANGED
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
|
|
21
21
|
require('dotenv').config();
|
|
22
22
|
const fs = require('fs');
|
|
23
|
+
const os = require('os');
|
|
23
24
|
const path = require('path');
|
|
24
25
|
const { URL } = require('url');
|
|
25
26
|
const http = require('http');
|
|
@@ -58,7 +59,12 @@ const TOOLS = [
|
|
|
58
59
|
'Scrape a shopping-mall store directory and return per-store records ' +
|
|
59
60
|
'(name, hours, phone, logo, brand image, categories, etc.). Use this ' +
|
|
60
61
|
'when the user wants to capture a directory like ' +
|
|
61
|
-
'https://grasslands.ca/store-directory
|
|
62
|
+
'https://grasslands.ca/store-directory/.\n\n' +
|
|
63
|
+
'AFTER RUNNING THIS TOOL: paste the full CSV content (from the ' +
|
|
64
|
+
'"--- CSV ---" block of the response) into your reply inside a fenced ' +
|
|
65
|
+
'code block so the user can copy it directly into their CMS. ' +
|
|
66
|
+
'Also state the saved file path and a one-line summary of how many ' +
|
|
67
|
+
'stores were extracted. Do NOT summarize away the CSV — show it in full.',
|
|
62
68
|
inputSchema: {
|
|
63
69
|
type: 'object',
|
|
64
70
|
properties: {
|
|
@@ -123,8 +129,10 @@ const TOOLS = [
|
|
|
123
129
|
},
|
|
124
130
|
];
|
|
125
131
|
|
|
132
|
+
const PACKAGE_VERSION = '0.1.2';
|
|
133
|
+
|
|
126
134
|
const server = new Server(
|
|
127
|
-
{ name: 'mall-scraper-mcp', version:
|
|
135
|
+
{ name: 'mall-scraper-mcp', version: PACKAGE_VERSION },
|
|
128
136
|
{ capabilities: { tools: {} } }
|
|
129
137
|
);
|
|
130
138
|
|
|
@@ -190,9 +198,19 @@ async function handleScrapeDirectory({ directory_url, max_stores = 10, concurren
|
|
|
190
198
|
await Promise.all(tasks);
|
|
191
199
|
stores.sort((a, b) => a.mm_id - b.mm_id);
|
|
192
200
|
|
|
201
|
+
// Generate the CSV regardless of whether we manage to write it to disk —
|
|
202
|
+
// it's always returned inline so the user gets it back automatically.
|
|
203
|
+
const csvText = storesToCSV(stores);
|
|
204
|
+
|
|
193
205
|
let writtenPaths = null;
|
|
206
|
+
let writeError = null;
|
|
194
207
|
if (write_csv) {
|
|
195
|
-
|
|
208
|
+
try {
|
|
209
|
+
writtenPaths = writeResults(directory_url, stores, csvText);
|
|
210
|
+
} catch (err) {
|
|
211
|
+
writeError = err.message;
|
|
212
|
+
// Don't fail the tool — the CSV is still returned inline below.
|
|
213
|
+
}
|
|
196
214
|
}
|
|
197
215
|
|
|
198
216
|
const bySource = {};
|
|
@@ -208,13 +226,25 @@ async function handleScrapeDirectory({ directory_url, max_stores = 10, concurren
|
|
|
208
226
|
hours_layer_breakdown: bySource,
|
|
209
227
|
llm_usage: usage,
|
|
210
228
|
written_files: writtenPaths,
|
|
229
|
+
write_error: writeError,
|
|
211
230
|
auth_mode: creds.mode,
|
|
231
|
+
mcp_version: PACKAGE_VERSION,
|
|
212
232
|
};
|
|
213
233
|
|
|
234
|
+
// Order matters — Claude is more likely to surface the first content
|
|
235
|
+
// blocks. Lead with the CSV so it can't be summarized away.
|
|
214
236
|
return {
|
|
215
237
|
content: [
|
|
216
|
-
{
|
|
217
|
-
|
|
238
|
+
{
|
|
239
|
+
type: 'text',
|
|
240
|
+
text:
|
|
241
|
+
`mall-scraper-mcp v${PACKAGE_VERSION}\n` +
|
|
242
|
+
'CSV ready — paste the block below into your CMS. ' +
|
|
243
|
+
`Also saved to: ${writtenPaths ? writtenPaths.csv : '(disk write failed; CSV is inline only)'}.\n\n` +
|
|
244
|
+
'```csv\n' + csvText + '\n```',
|
|
245
|
+
},
|
|
246
|
+
{ type: 'text', text: '\n--- Run summary ---\n' + JSON.stringify(summary, null, 2) },
|
|
247
|
+
{ type: 'text', text: '\n--- Stores (JSON for debugging) ---\n' + JSON.stringify(stores, null, 2) },
|
|
218
248
|
],
|
|
219
249
|
};
|
|
220
250
|
} finally {
|
|
@@ -393,15 +423,47 @@ function slugToName(slug) {
|
|
|
393
423
|
return slug.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
|
|
394
424
|
}
|
|
395
425
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
426
|
+
/**
|
|
427
|
+
* Pick an output directory that's reliably writable when the MCP is spawned
|
|
428
|
+
* by Claude Desktop / Claude Code (process.cwd() is often unwritable).
|
|
429
|
+
*
|
|
430
|
+
* Preference order:
|
|
431
|
+
* 1. MALL_SCRAPER_OUTPUT_DIR env var (explicit user choice)
|
|
432
|
+
* 2. ~/Documents/mall-scraper-output (Mac/Win — most discoverable)
|
|
433
|
+
* 3. ~/mall-scraper-output (Linux / Documents unavailable)
|
|
434
|
+
* 4. os.tmpdir()/mall-scraper-output (last resort)
|
|
435
|
+
*/
|
|
436
|
+
function pickOutDir() {
|
|
437
|
+
const candidates = [
|
|
438
|
+
process.env.MALL_SCRAPER_OUTPUT_DIR,
|
|
439
|
+
path.join(os.homedir(), 'Documents', 'mall-scraper-output'),
|
|
440
|
+
path.join(os.homedir(), 'mall-scraper-output'),
|
|
441
|
+
path.join(os.tmpdir(), 'mall-scraper-output'),
|
|
442
|
+
].filter(Boolean);
|
|
443
|
+
|
|
444
|
+
for (const dir of candidates) {
|
|
445
|
+
try {
|
|
446
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
447
|
+
// Probe write access by touching a file
|
|
448
|
+
const probe = path.join(dir, '.write-probe');
|
|
449
|
+
fs.writeFileSync(probe, '');
|
|
450
|
+
fs.unlinkSync(probe);
|
|
451
|
+
return dir;
|
|
452
|
+
} catch (_) {
|
|
453
|
+
continue;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
throw new Error('No writable output directory found');
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
function writeResults(directoryUrl, stores, csvText) {
|
|
460
|
+
const outDir = pickOutDir();
|
|
399
461
|
const host = new URL(directoryUrl).hostname.replace(/^www\./, '');
|
|
400
462
|
const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
|
|
401
463
|
const base = path.join(outDir, `stores_v5_${host}_${ts}`);
|
|
402
464
|
fs.writeFileSync(`${base}.json`, JSON.stringify(stores, null, 2));
|
|
403
|
-
fs.writeFileSync(`${base}.csv`,
|
|
404
|
-
return { json: `${base}.json`, csv: `${base}.csv
|
|
465
|
+
fs.writeFileSync(`${base}.csv`, csvText);
|
|
466
|
+
return { json: `${base}.json`, csv: `${base}.csv`, dir: outDir };
|
|
405
467
|
}
|
|
406
468
|
|
|
407
469
|
function errorResult(message) {
|