mallmaverick-store-scraper 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/mcp-server.js +72 -10
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mallmaverick-store-scraper",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "MCP server + CLI for scraping shopping mall store directories. Hours-first layered pipeline + image classification.",
5
5
  "main": "src/main.js",
6
6
  "type": "commonjs",
package/src/mcp-server.js CHANGED
@@ -20,6 +20,7 @@
20
20
 
21
21
  require('dotenv').config();
22
22
  const fs = require('fs');
23
+ const os = require('os');
23
24
  const path = require('path');
24
25
  const { URL } = require('url');
25
26
  const http = require('http');
@@ -58,7 +59,12 @@ const TOOLS = [
58
59
  'Scrape a shopping-mall store directory and return per-store records ' +
59
60
  '(name, hours, phone, logo, brand image, categories, etc.). Use this ' +
60
61
  'when the user wants to capture a directory like ' +
61
- 'https://grasslands.ca/store-directory/.',
62
+ 'https://grasslands.ca/store-directory/.\n\n' +
63
+ 'AFTER RUNNING THIS TOOL: paste the full CSV content (from the ' +
64
+ '"--- CSV ---" block of the response) into your reply inside a fenced ' +
65
+ 'code block so the user can copy it directly into their CMS. ' +
66
+ 'Also state the saved file path and a one-line summary of how many ' +
67
+ 'stores were extracted. Do NOT summarize away the CSV — show it in full.',
62
68
  inputSchema: {
63
69
  type: 'object',
64
70
  properties: {
@@ -123,8 +129,10 @@ const TOOLS = [
123
129
  },
124
130
  ];
125
131
 
132
+ const PACKAGE_VERSION = '0.1.2';
133
+
126
134
  const server = new Server(
127
- { name: 'mall-scraper-mcp', version: '0.1.0' },
135
+ { name: 'mall-scraper-mcp', version: PACKAGE_VERSION },
128
136
  { capabilities: { tools: {} } }
129
137
  );
130
138
 
@@ -190,9 +198,19 @@ async function handleScrapeDirectory({ directory_url, max_stores = 10, concurren
190
198
  await Promise.all(tasks);
191
199
  stores.sort((a, b) => a.mm_id - b.mm_id);
192
200
 
201
+ // Generate the CSV regardless of whether we manage to write it to disk —
202
+ // it's always returned inline so the user gets it back automatically.
203
+ const csvText = storesToCSV(stores);
204
+
193
205
  let writtenPaths = null;
206
+ let writeError = null;
194
207
  if (write_csv) {
195
- writtenPaths = writeResults(directory_url, stores);
208
+ try {
209
+ writtenPaths = writeResults(directory_url, stores, csvText);
210
+ } catch (err) {
211
+ writeError = err.message;
212
+ // Don't fail the tool — the CSV is still returned inline below.
213
+ }
196
214
  }
197
215
 
198
216
  const bySource = {};
@@ -208,13 +226,25 @@ async function handleScrapeDirectory({ directory_url, max_stores = 10, concurren
208
226
  hours_layer_breakdown: bySource,
209
227
  llm_usage: usage,
210
228
  written_files: writtenPaths,
229
+ write_error: writeError,
211
230
  auth_mode: creds.mode,
231
+ mcp_version: PACKAGE_VERSION,
212
232
  };
213
233
 
234
+ // Order matters — Claude is more likely to surface the first content
235
+ // blocks. Lead with the CSV so it can't be summarized away.
214
236
  return {
215
237
  content: [
216
- { type: 'text', text: JSON.stringify(summary, null, 2) },
217
- { type: 'text', text: '\nStores:\n' + JSON.stringify(stores, null, 2) },
238
+ {
239
+ type: 'text',
240
+ text:
241
+ `mall-scraper-mcp v${PACKAGE_VERSION}\n` +
242
+ 'CSV ready — paste the block below into your CMS. ' +
243
+ `Also saved to: ${writtenPaths ? writtenPaths.csv : '(disk write failed; CSV is inline only)'}.\n\n` +
244
+ '```csv\n' + csvText + '\n```',
245
+ },
246
+ { type: 'text', text: '\n--- Run summary ---\n' + JSON.stringify(summary, null, 2) },
247
+ { type: 'text', text: '\n--- Stores (JSON for debugging) ---\n' + JSON.stringify(stores, null, 2) },
218
248
  ],
219
249
  };
220
250
  } finally {
@@ -393,15 +423,47 @@ function slugToName(slug) {
393
423
  return slug.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
394
424
  }
395
425
 
396
- function writeResults(directoryUrl, stores) {
397
- const outDir = path.join(process.cwd(), 'extracted_stores');
398
- fs.mkdirSync(outDir, { recursive: true });
426
+ /**
427
+ * Pick an output directory that's reliably writable when the MCP is spawned
428
+ * by Claude Desktop / Claude Code (process.cwd() is often unwritable).
429
+ *
430
+ * Preference order:
431
+ * 1. MALL_SCRAPER_OUTPUT_DIR env var (explicit user choice)
432
+ * 2. ~/Documents/mall-scraper-output (Mac/Win — most discoverable)
433
+ * 3. ~/mall-scraper-output (Linux / Documents unavailable)
434
+ * 4. os.tmpdir()/mall-scraper-output (last resort)
435
+ */
436
+ function pickOutDir() {
437
+ const candidates = [
438
+ process.env.MALL_SCRAPER_OUTPUT_DIR,
439
+ path.join(os.homedir(), 'Documents', 'mall-scraper-output'),
440
+ path.join(os.homedir(), 'mall-scraper-output'),
441
+ path.join(os.tmpdir(), 'mall-scraper-output'),
442
+ ].filter(Boolean);
443
+
444
+ for (const dir of candidates) {
445
+ try {
446
+ fs.mkdirSync(dir, { recursive: true });
447
+ // Probe write access by touching a file
448
+ const probe = path.join(dir, '.write-probe');
449
+ fs.writeFileSync(probe, '');
450
+ fs.unlinkSync(probe);
451
+ return dir;
452
+ } catch (_) {
453
+ continue;
454
+ }
455
+ }
456
+ throw new Error('No writable output directory found');
457
+ }
458
+
459
+ function writeResults(directoryUrl, stores, csvText) {
460
+ const outDir = pickOutDir();
399
461
  const host = new URL(directoryUrl).hostname.replace(/^www\./, '');
400
462
  const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
401
463
  const base = path.join(outDir, `stores_v5_${host}_${ts}`);
402
464
  fs.writeFileSync(`${base}.json`, JSON.stringify(stores, null, 2));
403
- fs.writeFileSync(`${base}.csv`, storesToCSV(stores));
404
- return { json: `${base}.json`, csv: `${base}.csv` };
465
+ fs.writeFileSync(`${base}.csv`, csvText);
466
+ return { json: `${base}.json`, csv: `${base}.csv`, dir: outDir };
405
467
  }
406
468
 
407
469
  function errorResult(message) {