mallmaverick-store-scraper 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/mcp-server.js +53 -8
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mallmaverick-store-scraper",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "MCP server + CLI for scraping shopping mall store directories. Hours-first layered pipeline + image classification.",
|
|
5
5
|
"main": "src/main.js",
|
|
6
6
|
"type": "commonjs",
|
package/src/mcp-server.js
CHANGED
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
|
|
21
21
|
require('dotenv').config();
|
|
22
22
|
const fs = require('fs');
|
|
23
|
+
const os = require('os');
|
|
23
24
|
const path = require('path');
|
|
24
25
|
const { URL } = require('url');
|
|
25
26
|
const http = require('http');
|
|
@@ -124,7 +125,7 @@ const TOOLS = [
|
|
|
124
125
|
];
|
|
125
126
|
|
|
126
127
|
const server = new Server(
|
|
127
|
-
{ name: 'mall-scraper-mcp', version: '0.1.
|
|
128
|
+
{ name: 'mall-scraper-mcp', version: '0.1.1' },
|
|
128
129
|
{ capabilities: { tools: {} } }
|
|
129
130
|
);
|
|
130
131
|
|
|
@@ -190,9 +191,19 @@ async function handleScrapeDirectory({ directory_url, max_stores = 10, concurren
|
|
|
190
191
|
await Promise.all(tasks);
|
|
191
192
|
stores.sort((a, b) => a.mm_id - b.mm_id);
|
|
192
193
|
|
|
194
|
+
// Generate the CSV regardless of whether we manage to write it to disk —
|
|
195
|
+
// it's always returned inline so the user gets it back automatically.
|
|
196
|
+
const csvText = storesToCSV(stores);
|
|
197
|
+
|
|
193
198
|
let writtenPaths = null;
|
|
199
|
+
let writeError = null;
|
|
194
200
|
if (write_csv) {
|
|
195
|
-
|
|
201
|
+
try {
|
|
202
|
+
writtenPaths = writeResults(directory_url, stores, csvText);
|
|
203
|
+
} catch (err) {
|
|
204
|
+
writeError = err.message;
|
|
205
|
+
// Don't fail the tool — the CSV is still returned inline below.
|
|
206
|
+
}
|
|
196
207
|
}
|
|
197
208
|
|
|
198
209
|
const bySource = {};
|
|
@@ -208,13 +219,15 @@ async function handleScrapeDirectory({ directory_url, max_stores = 10, concurren
|
|
|
208
219
|
hours_layer_breakdown: bySource,
|
|
209
220
|
llm_usage: usage,
|
|
210
221
|
written_files: writtenPaths,
|
|
222
|
+
write_error: writeError,
|
|
211
223
|
auth_mode: creds.mode,
|
|
212
224
|
};
|
|
213
225
|
|
|
214
226
|
return {
|
|
215
227
|
content: [
|
|
216
228
|
{ type: 'text', text: JSON.stringify(summary, null, 2) },
|
|
217
|
-
{ type: 'text', text: '\
|
|
229
|
+
{ type: 'text', text: '\n--- CSV (paste this into your CMS) ---\n' + csvText },
|
|
230
|
+
{ type: 'text', text: '\n--- Stores (JSON) ---\n' + JSON.stringify(stores, null, 2) },
|
|
218
231
|
],
|
|
219
232
|
};
|
|
220
233
|
} finally {
|
|
@@ -393,15 +406,47 @@ function slugToName(slug) {
|
|
|
393
406
|
return slug.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
|
|
394
407
|
}
|
|
395
408
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
409
|
+
/**
|
|
410
|
+
* Pick an output directory that's reliably writable when the MCP is spawned
|
|
411
|
+
* by Claude Desktop / Claude Code (process.cwd() is often unwritable).
|
|
412
|
+
*
|
|
413
|
+
* Preference order:
|
|
414
|
+
* 1. MALL_SCRAPER_OUTPUT_DIR env var (explicit user choice)
|
|
415
|
+
* 2. ~/Documents/mall-scraper-output (Mac/Win — most discoverable)
|
|
416
|
+
* 3. ~/mall-scraper-output (Linux / Documents unavailable)
|
|
417
|
+
* 4. os.tmpdir()/mall-scraper-output (last resort)
|
|
418
|
+
*/
|
|
419
|
+
function pickOutDir() {
|
|
420
|
+
const candidates = [
|
|
421
|
+
process.env.MALL_SCRAPER_OUTPUT_DIR,
|
|
422
|
+
path.join(os.homedir(), 'Documents', 'mall-scraper-output'),
|
|
423
|
+
path.join(os.homedir(), 'mall-scraper-output'),
|
|
424
|
+
path.join(os.tmpdir(), 'mall-scraper-output'),
|
|
425
|
+
].filter(Boolean);
|
|
426
|
+
|
|
427
|
+
for (const dir of candidates) {
|
|
428
|
+
try {
|
|
429
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
430
|
+
// Probe write access by touching a file
|
|
431
|
+
const probe = path.join(dir, '.write-probe');
|
|
432
|
+
fs.writeFileSync(probe, '');
|
|
433
|
+
fs.unlinkSync(probe);
|
|
434
|
+
return dir;
|
|
435
|
+
} catch (_) {
|
|
436
|
+
continue;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
throw new Error('No writable output directory found');
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
function writeResults(directoryUrl, stores, csvText) {
|
|
443
|
+
const outDir = pickOutDir();
|
|
399
444
|
const host = new URL(directoryUrl).hostname.replace(/^www\./, '');
|
|
400
445
|
const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
|
|
401
446
|
const base = path.join(outDir, `stores_v5_${host}_${ts}`);
|
|
402
447
|
fs.writeFileSync(`${base}.json`, JSON.stringify(stores, null, 2));
|
|
403
|
-
fs.writeFileSync(`${base}.csv`,
|
|
404
|
-
return { json: `${base}.json`, csv: `${base}.csv
|
|
448
|
+
fs.writeFileSync(`${base}.csv`, csvText);
|
|
449
|
+
return { json: `${base}.json`, csv: `${base}.csv`, dir: outDir };
|
|
405
450
|
}
|
|
406
451
|
|
|
407
452
|
function errorResult(message) {
|