bizgate-mcp-server 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -60,8 +60,8 @@ Mac の場合:
60
60
 
61
61
  ```smalltalk
62
62
  claude mcp add bizgate --scope user \
63
- -e "BIZGATE_USERNAME=digi-man_bizg1" \
64
- -e "BIZGATE_PASSWORD=digi-man_bizg1" \
63
+ -e "BIZGATE_USERNAME=digi-man_testbizg1" \
64
+ -e "BIZGATE_PASSWORD=digi-man_testbizg1" \
65
65
  -e "BIZGATE_AUTH_MODE=basic" \
66
66
  -e "BIZGATE_DAILY_LIMIT=200" \
67
67
  -e "BIZGATE_SKEY_COMPANY=/EhMJ9YMCJtJgo73.DjLuew8rnnTlb.F6/MuiESFXmZwlCKvG8bMm" \
package/dist/index.d.ts CHANGED
@@ -1,2 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  import "./shared/env.js";
3
+ import { BizGateClient } from "./bizgate-client.js";
4
+ import { UsageTracker } from "./usage-tracker.js";
5
+ export declare const usageTracker: UsageTracker;
6
+ export declare const client: BizGateClient;
package/dist/index.js CHANGED
@@ -64,8 +64,8 @@ const config = {
64
64
  app: process.env.BIZGATE_APP,
65
65
  dailyLimit,
66
66
  };
67
- const usageTracker = new UsageTracker(usageFile, dailyLimit);
68
- const client = new BizGateClient(config, usageTracker);
67
+ export const usageTracker = new UsageTracker(usageFile, dailyLimit);
68
+ export const client = new BizGateClient(config, usageTracker);
69
69
  const seedCache = new SeedCache(seedCsvUrl);
70
70
  const resultCache = new BizGateResultCache(sharedCacheUrl);
71
71
  const jpxCache = new JpxCache(process.env.JPX_DATA_URL);
@@ -0,0 +1,8 @@
1
+ export declare function friendlyError(err: unknown, opts?: {
2
+ lang?: "ja" | "ko";
3
+ context?: string;
4
+ }): {
5
+ message: string;
6
+ code: string;
7
+ raw: string;
8
+ };
@@ -0,0 +1,103 @@
1
+ const FRIENDLY_ERRORS = [
2
+ {
3
+ pattern: /econnreset|connection.*reset|timeout|ETIMEDOUT|ENOTFOUND|EAI_AGAIN/i,
4
+ ja: "😥 Papatto / BizGate サイトへの接続が一時的に不安定です。5 分後にもう一度同じご依頼をお願いします。",
5
+ ko: "😥 Papatto / BizGate 사이트 접속이 잠깐 불안정해요. 5분 후에 다시 같은 요청해주세요.",
6
+ code: "ERR-NET-CONN",
7
+ },
8
+ {
9
+ pattern: /drive.*storage.*quota|storage quota.*exceeded/i,
10
+ ja: "😥 Google Drive の保存容量が不足しています。管理者にご連絡ください。",
11
+ ko: "😥 구글 드라이브 저장 용량이 부족해요. IT 관리자에게 알려주세요.",
12
+ code: "ERR-DRIVE-QUOTA",
13
+ },
14
+ {
15
+ pattern: /caller does not have permission|permission_denied|403/i,
16
+ ja: "😥 Google Sheets へのアクセス権限が設定されていません。管理者にご確認ください。",
17
+ ko: "😥 구글 시트 권한 설정이 되어있지 않아요. IT 관리자에게 알려주세요.",
18
+ code: "ERR-SHEETS-PERM",
19
+ },
20
+ {
21
+ pattern: /今月のダウンロード.*上限|download.*quota.*exhausted/i,
22
+ ja: "📊 今月の Papatto ダウンロード上限に達しました。月初にリセットされます。今月は別のリストをご活用ください。",
23
+ ko: "📊 이번 달 Papatto 다운로드 한도에 도달했어요. 다음 달 1일에 리셋됩니다. 이번 달엔 기존 리스트 활용해주세요.",
24
+ code: "ERR-PAPATTO-DL-LIMIT",
25
+ },
26
+ {
27
+ pattern: /検索回数.*上限|search.*quota.*exceeded/i,
28
+ ja: "📊 今月の Papatto 検索回数上限に達しました。月初にリセットされます。",
29
+ ko: "📊 이번 달 Papatto 검색 회수 한도에 도달했어요.",
30
+ code: "ERR-PAPATTO-SEARCH-LIMIT",
31
+ },
32
+ {
33
+ pattern: /no papatto credential|credential not stored|credentials? not found/i,
34
+ ja: "🔑 Papatto のログイン情報がまだ登録されていません。先に「Papatto アカウントを登録」とお伝えください。",
35
+ ko: "🔑 Papatto 로그인 정보가 아직 없어요. 먼저 'Papatto 계정 등록' 이라고 알려주세요.",
36
+ code: "ERR-NO-CRED",
37
+ },
38
+ {
39
+ pattern: /papatto login failed|invalid.*credentials/i,
40
+ ja: "🔑 Papatto へのログインに失敗しました。パスワードを確認して再登録してください。",
41
+ ko: "🔑 Papatto 로그인에 실패했어요. 비밀번호 확인 후 재등록 부탁드려요.",
42
+ code: "ERR-LOGIN-FAIL",
43
+ },
44
+ {
45
+ pattern: /master.*未生成|papatto.*master.*not.*found/i,
46
+ ja: "📚 Papatto のカテゴリ一覧 (マスタ) がまだ準備されていません。管理者にご連絡ください (一度だけ実行が必要)。",
47
+ ko: "📚 Papatto 카테고리 마스터가 아직 준비 안 됐어요. 관리자에게 알려주세요 (1회만 실행 필요).",
48
+ code: "ERR-NO-MASTER",
49
+ },
50
+ {
51
+ pattern: /検索ボタン.*見つかりません|search button not found/i,
52
+ ja: "😥 Papatto サイトの構造が変わったようです。管理者にご連絡ください。",
53
+ ko: "😥 Papatto 사이트 구조가 바뀐 것 같아요. 관리자에게 알려주세요.",
54
+ code: "ERR-PAPATTO-UI-CHANGED",
55
+ },
56
+ {
57
+ pattern: /1日のリクエスト上限|daily limit reached/i,
58
+ ja: "📊 今日の BizGate 利用回数を使い切りました。明日リセットされます。",
59
+ ko: "📊 오늘 BizGate 사용 회수를 다 썼어요. 내일 리셋됩니다.",
60
+ code: "ERR-BIZGATE-DAILY",
61
+ },
62
+ {
63
+ pattern: /サービスキー.*無効|invalid.*service.*key/i,
64
+ ja: "🔑 BizGate のサービスキーが無効です。管理者にご確認ください。",
65
+ ko: "🔑 BizGate 서비스 키가 유효하지 않아요. 관리자에게 알려주세요.",
66
+ code: "ERR-BIZGATE-KEY",
67
+ },
68
+ {
69
+ pattern: /該当する企業が見つかりません|company not found/i,
70
+ ja: "🔍 該当する会社が見つかりませんでした。会社名を正式名称 (例: 株式会社○○) で入れてみてください。",
71
+ ko: "🔍 해당하는 회사를 못 찾았어요. 회사명을 정식명 (예: 株式会社○○)으로 입력해주세요.",
72
+ code: "ERR-NO-COMPANY",
73
+ },
74
+ ];
75
+ const GENERIC_FALLBACK = {
76
+ ja: "😥 予期しないエラーが発生しました。下のエラーコードを IT 担当者にお伝えください。",
77
+ ko: "😥 예상치 못한 오류가 발생했어요. 아래 오류 코드를 IT 담당자에게 알려주세요.",
78
+ };
79
+ function timestamp() {
80
+ const d = new Date();
81
+ const pad = (n) => String(n).padStart(2, "0");
82
+ return `${d.getFullYear()}${pad(d.getMonth() + 1)}${pad(d.getDate())}-${pad(d.getHours())}${pad(d.getMinutes())}`;
83
+ }
84
+ export function friendlyError(err, opts = {}) {
85
+ const lang = opts.lang ?? "ja";
86
+ const raw = err instanceof Error ? err.message : String(err);
87
+ for (const entry of FRIENDLY_ERRORS) {
88
+ if (entry.pattern.test(raw)) {
89
+ const code = `${entry.code}-${timestamp()}`;
90
+ return {
91
+ message: `${entry[lang]}\n\n🔎 エラーコード: \`${code}\``,
92
+ code,
93
+ raw,
94
+ };
95
+ }
96
+ }
97
+ const code = `ERR-UNKNOWN-${timestamp()}`;
98
+ return {
99
+ message: `${GENERIC_FALLBACK[lang]}\n\n🔎 エラーコード: \`${code}\`\n📝 詳細: ${raw.slice(0, 200)}`,
100
+ code,
101
+ raw,
102
+ };
103
+ }
@@ -0,0 +1,55 @@
1
+ import { type PapattoConditions } from "./search.js";
2
+ export interface PageCompanyRow {
3
+ name: string;
4
+ corporate_number?: string;
5
+ industry?: string;
6
+ address?: string;
7
+ phone?: string;
8
+ capital_size?: string;
9
+ emp_size?: string;
10
+ fiscal_month?: string;
11
+ market_type?: string;
12
+ department_url?: string;
13
+ clicksearch_url?: string;
14
+ has_finance_report?: boolean;
15
+ raw_text?: string;
16
+ }
17
+ export interface PageExtractOutcome {
18
+ num_found: number | null;
19
+ company_count: number | null;
20
+ page_url: string;
21
+ pages_visited: number;
22
+ companies: PageCompanyRow[];
23
+ csv_local_path?: string;
24
+ search_quota?: {
25
+ used: number;
26
+ limit: number;
27
+ };
28
+ download_quota?: {
29
+ used: number;
30
+ limit: number;
31
+ exhausted: boolean;
32
+ };
33
+ error?: string;
34
+ error_screenshot_url?: string;
35
+ row_selector_used?: string;
36
+ debug_row_candidates?: Array<{
37
+ selector: string;
38
+ matched: number;
39
+ sample_text: string;
40
+ }>;
41
+ debug_body_snippet?: string;
42
+ debug_element_dist?: Record<string, number>;
43
+ }
44
+ export interface PageExtractOptions {
45
+ slack_user_id: string;
46
+ campaign_id: string;
47
+ conditions: PapattoConditions;
48
+ max_companies?: number;
49
+ throttle_ms?: number;
50
+ headless?: boolean;
51
+ search_url?: string;
52
+ row_selector?: string;
53
+ debug?: boolean;
54
+ }
55
+ export declare function runSearchOnly(opts: PageExtractOptions): Promise<PageExtractOutcome>;
@@ -0,0 +1,324 @@
1
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
2
+ import { homedir } from "node:os";
3
+ import { join } from "node:path";
4
+ import { openSession } from "./playwright-session.js";
5
+ import { applyConditions, clickSearch, readCounts, readQuotas, activatedTabs, loadMaster, } from "./search.js";
6
+ import { runQueued } from "../queue/playwright-queue.js";
7
+ import { log } from "../shared/logger.js";
8
+ const ROW_SELECTOR_CANDIDATES = [
9
+ ".result-document",
10
+ "table.result tbody tr",
11
+ "table.list tbody tr",
12
+ "table.companies tbody tr",
13
+ ".result-row",
14
+ ".company-row",
15
+ '[class*="result"] > [class*="row"]',
16
+ '[class*="company"] > [class*="row"]',
17
+ "table tbody tr",
18
+ "ol > li",
19
+ "ul.results > li",
20
+ ];
21
+ const NEXT_BUTTON_CANDIDATES = [
22
+ 'a:has-text("次へ")',
23
+ 'a:has-text("次")',
24
+ 'a[title*="次"]',
25
+ 'a[rel="next"]',
26
+ 'a.next',
27
+ 'button:has-text("次へ")',
28
+ ];
29
+ function escapeCsv(value) {
30
+ if (/[",\r\n]/.test(value))
31
+ return `"${value.replace(/"/g, '""')}"`;
32
+ return value;
33
+ }
34
+ function rowsToCsv(rows) {
35
+ const headers = [
36
+ "name",
37
+ "corporate_number",
38
+ "industry",
39
+ "address",
40
+ "phone",
41
+ "capital_size",
42
+ "emp_size",
43
+ "fiscal_month",
44
+ "market_type",
45
+ "has_finance_report",
46
+ "department_url",
47
+ "clicksearch_url",
48
+ ];
49
+ const lines = [headers.join(",")];
50
+ for (const r of rows) {
51
+ lines.push(headers
52
+ .map((h) => escapeCsv(r[h] ?? ""))
53
+ .join(","));
54
+ }
55
+ return lines.join("\n");
56
+ }
57
+ async function captureRowCandidates(page) {
58
+ return page.evaluate((candidates) => {
59
+ const out = [];
60
+ for (const sel of candidates) {
61
+ try {
62
+ const els = document.querySelectorAll(sel);
63
+ if (els.length > 0) {
64
+ out.push({
65
+ selector: sel,
66
+ matched: els.length,
67
+ sample_text: (els[0].textContent || "").trim().slice(0, 120),
68
+ });
69
+ }
70
+ }
71
+ catch {
72
+ // invalid selector
73
+ }
74
+ }
75
+ return out;
76
+ }, ROW_SELECTOR_CANDIDATES);
77
+ }
78
+ async function pickRowSelector(page, hint) {
79
+ if (hint) {
80
+ const c = await page.locator(hint).count();
81
+ if (c > 1)
82
+ return hint;
83
+ }
84
+ const candidates = await captureRowCandidates(page);
85
+ if (candidates.length === 0)
86
+ return null;
87
+ const best = candidates
88
+ .filter((c) => c.matched >= 2 && c.matched <= 200)
89
+ .sort((a, b) => b.matched - a.matched)[0];
90
+ return best?.selector ?? null;
91
+ }
92
+ async function parseRowsOnPage(page, rowSelector) {
93
+ const raw = await page.evaluate((sel) => {
94
+ function textOf(el) {
95
+ if (!el)
96
+ return "";
97
+ return (el.textContent || "").replace(/\s+/g, " ").trim();
98
+ }
99
+ function pick(text, re) {
100
+ const m = text.match(re);
101
+ return m ? m[0] : undefined;
102
+ }
103
+ const rows = Array.from(document.querySelectorAll(sel));
104
+ const out = [];
105
+ for (const row of rows) {
106
+ // Papatto .result-document 構造想定. 他セレクタにも汎用フォールバック.
107
+ const titleEl = row.querySelector(".company_line .title") ??
108
+ row.querySelector(".company_link .title") ??
109
+ row.querySelector(".title");
110
+ const name = textOf(titleEl) || textOf(row.querySelector("a"));
111
+ if (!name)
112
+ continue;
113
+ const record = { name };
114
+ // 法人番号 (.company_line 안 13자리 숫자)
115
+ const companyLineText = textOf(row.querySelector(".company_line")) || textOf(row);
116
+ const compno = pick(companyLineText, /\b\d{13}\b/);
117
+ if (compno)
118
+ record.corporate_number = compno;
119
+ // .result-meta 의 텍스트에서 각 컬럼 추출
120
+ const metaEl = row.querySelector(".result-meta");
121
+ const metaText = textOf(metaEl) || textOf(row);
122
+ // 업종 (XXXX_명칭 형식)
123
+ const ind = pick(metaText, /\d{4}_[^\s0-9\d]+(?:[・・、]?[^\s0-9\d]+)?/);
124
+ if (ind)
125
+ record.industry = ind;
126
+ // 주소 (도도부현 시작)
127
+ const addr = pick(metaText, /(?:東京都|北海道|大阪府|京都府|[^\s]{2,3}県)[^\s0-9][^\s]*/);
128
+ if (addr)
129
+ record.address = addr;
130
+ // 전화
131
+ const phone = pick(metaText, /\b0\d{1,3}-\d{1,4}-\d{4}\b/);
132
+ if (phone)
133
+ record.phone = phone;
134
+ // 자본 규모 (例: "6.50億以上" / "2.1000万-3000万未満")
135
+ const cap = pick(metaText, /[0-9\d]+[.\.][\s]*[\d一-鿿,億万兆\-未満以上]+/);
136
+ if (cap)
137
+ record.capital_size = cap;
138
+ // 종업원 규모
139
+ const emp = pick(metaText, /[0-9\d]+[.\.][\s]*[\d一-鿿,人未満以上\-]+/g)
140
+ ? metaText.match(/[0-9\d]+[.\.][\s]*[\d一-鿿,人未満以上\-]+人[未満以上]*/)?.[0]
141
+ : undefined;
142
+ if (emp)
143
+ record.emp_size = emp;
144
+ // 결산월
145
+ const fiscal = pick(metaText, /(?<![\d])(?:1[0-2]|[1-9])月(?![\d])/);
146
+ if (fiscal)
147
+ record.fiscal_month = fiscal;
148
+ // 부서 페이지 URL
149
+ const deptHref = row.querySelector('a[href*="dptpapatto.php"]')?.getAttribute("href");
150
+ if (deptHref)
151
+ record.department_url = deptHref.startsWith("http") ? deptHref : `https://www.papatto.info${deptHref}`;
152
+ // ClickSearch URL
153
+ const csA = row.querySelector(".clicksrc")?.closest("a");
154
+ const csHref = csA?.getAttribute("href");
155
+ if (csHref)
156
+ record.clicksearch_url = csHref;
157
+ // 유가증권 보고서 보유 여부
158
+ if (row.querySelector(".financeR"))
159
+ record.has_finance_report = true;
160
+ out.push(record);
161
+ }
162
+ return out;
163
+ }, rowSelector);
164
+ return raw;
165
+ }
166
+ async function gotoNextPage(page) {
167
+ for (const sel of NEXT_BUTTON_CANDIDATES) {
168
+ const loc = page.locator(sel).first();
169
+ if ((await loc.count()) > 0 && (await loc.isEnabled().catch(() => false))) {
170
+ const beforeUrl = page.url();
171
+ try {
172
+ await Promise.all([
173
+ page.waitForLoadState("domcontentloaded").catch(() => undefined),
174
+ loc.click({ timeout: 5000 }),
175
+ ]);
176
+ }
177
+ catch {
178
+ return false;
179
+ }
180
+ const afterUrl = page.url();
181
+ return afterUrl !== beforeUrl;
182
+ }
183
+ }
184
+ return false;
185
+ }
186
+ export async function runSearchOnly(opts) {
187
+ const master = loadMaster();
188
+ if (!master) {
189
+ throw new Error("Papatto マスタ未生成。先に papatto__master_refresh を実行してください。");
190
+ }
191
+ return runQueued(() => runSearchOnlyInner(opts, master), `search_only:${opts.campaign_id}`);
192
+ }
193
+ async function runSearchOnlyInner(opts, master) {
194
+ const maxCompanies = Math.max(1, Math.min(opts.max_companies ?? 100, 2000));
195
+ const throttle = Math.max(300, opts.throttle_ms ?? 1500);
196
+ const session = await openSession(opts.slack_user_id, { headless: opts.headless ?? true });
197
+ let pagesVisited = 0;
198
+ let lastError;
199
+ let rowSelectorUsed;
200
+ let debugCandidates;
201
+ const collected = [];
202
+ try {
203
+ const { page } = session;
204
+ const searchUrl = opts.search_url ?? master.source_url;
205
+ await page.goto(searchUrl, { waitUntil: "domcontentloaded" });
206
+ await page.waitForTimeout(800);
207
+ activatedTabs.clear();
208
+ await applyConditions(page, opts.conditions, master);
209
+ await clickSearch(page);
210
+ await page.waitForLoadState("networkidle", { timeout: 15_000 }).catch(() => undefined);
211
+ await page.waitForTimeout(500);
212
+ const counts = await readCounts(page);
213
+ const quotas = await readQuotas(page);
214
+ const rowSelector = await pickRowSelector(page, opts.row_selector);
215
+ if (!rowSelector) {
216
+ const cand = await captureRowCandidates(page);
217
+ const dist = await page.evaluate(() => {
218
+ const out = {};
219
+ const all = document.querySelectorAll("body *");
220
+ for (const el of Array.from(all)) {
221
+ const tag = el.tagName.toLowerCase();
222
+ out[tag] = (out[tag] ?? 0) + 1;
223
+ }
224
+ return out;
225
+ });
226
+ const snippet = await page.evaluate(() => {
227
+ const knownIds = ["result_panel", "search_result", "search_results", "result", "results", "search-result", "main", "content"];
228
+ for (const id of knownIds) {
229
+ const el = document.getElementById(id) ?? document.querySelector(`.${id}`);
230
+ if (el && (el.innerHTML || "").length > 1000) {
231
+ return `<!-- matched: #${id} or .${id} -->\n` + (el.innerHTML || "").slice(0, 8000);
232
+ }
233
+ }
234
+ const all = document.querySelectorAll("body *");
235
+ for (const el of Array.from(all)) {
236
+ const text = el.textContent || "";
237
+ if (/会社数\s*[\d,]+\s*社/.test(text) && (el.innerHTML || "").length > 800 && (el.innerHTML || "").length < 200000) {
238
+ return `<!-- matched: ${el.tagName.toLowerCase()}#${el.id}.${el.className} (件数 영역) -->\n` + (el.innerHTML || "").slice(0, 8000);
239
+ }
240
+ }
241
+ return "<!-- fallback: body 끝 부분 -->\n" + document.body.innerHTML.slice(-8000);
242
+ });
243
+ return {
244
+ num_found: counts.records,
245
+ company_count: counts.companies,
246
+ page_url: page.url(),
247
+ pages_visited: 1,
248
+ companies: [],
249
+ search_quota: quotas.search,
250
+ download_quota: quotas.download,
251
+ error: "行セレクタが特定できませんでした。row_selector を明示するか、debug:true で候補を確認してください。",
252
+ debug_row_candidates: cand,
253
+ debug_element_dist: dist,
254
+ debug_body_snippet: snippet,
255
+ };
256
+ }
257
+ rowSelectorUsed = rowSelector;
258
+ if (opts.debug)
259
+ debugCandidates = await captureRowCandidates(page);
260
+ while (collected.length < maxCompanies) {
261
+ pagesVisited++;
262
+ const rows = await parseRowsOnPage(page, rowSelector);
263
+ for (const r of rows) {
264
+ if (collected.length >= maxCompanies)
265
+ break;
266
+ collected.push(r);
267
+ }
268
+ if (collected.length >= maxCompanies)
269
+ break;
270
+ const movedNext = await gotoNextPage(page);
271
+ if (!movedNext)
272
+ break;
273
+ await page.waitForTimeout(throttle);
274
+ if (pagesVisited > 200) {
275
+ log("warn", "page-extract: exceeded 200 page cap", { campaign_id: opts.campaign_id });
276
+ break;
277
+ }
278
+ }
279
+ const destDir = join(homedir(), ".papatto-bizgate", "downloads", opts.campaign_id);
280
+ if (!existsSync(destDir))
281
+ mkdirSync(destDir, { recursive: true });
282
+ const csvPath = join(destDir, `page_extract__${Date.now()}.csv`);
283
+ writeFileSync(csvPath, rowsToCsv(collected), "utf-8");
284
+ log("info", "page-extract csv saved", {
285
+ campaign_id: opts.campaign_id,
286
+ companies: collected.length,
287
+ pages: pagesVisited,
288
+ throttle_ms: throttle,
289
+ });
290
+ return {
291
+ num_found: counts.records,
292
+ company_count: counts.companies,
293
+ page_url: page.url(),
294
+ pages_visited: pagesVisited,
295
+ companies: collected,
296
+ csv_local_path: csvPath,
297
+ search_quota: quotas.search,
298
+ download_quota: quotas.download,
299
+ row_selector_used: rowSelectorUsed,
300
+ debug_row_candidates: debugCandidates,
301
+ };
302
+ }
303
+ catch (e) {
304
+ lastError = e instanceof Error ? e : new Error(String(e));
305
+ log("error", "page-extract failed", {
306
+ campaign_id: opts.campaign_id,
307
+ error: lastError.message,
308
+ pages_visited: pagesVisited,
309
+ });
310
+ return {
311
+ num_found: null,
312
+ company_count: null,
313
+ page_url: session.page.url(),
314
+ pages_visited: pagesVisited,
315
+ companies: collected,
316
+ error: lastError.message,
317
+ row_selector_used: rowSelectorUsed,
318
+ debug_row_candidates: debugCandidates,
319
+ };
320
+ }
321
+ finally {
322
+ await session.close();
323
+ }
324
+ }
@@ -0,0 +1,67 @@
1
+ import type { PapattoConditions } from "./search.js";
2
+ export interface EnrichedCompany {
3
+ name: string;
4
+ papatto_industry?: string;
5
+ papatto_address?: string;
6
+ papatto_phone?: string;
7
+ papatto_capital?: string;
8
+ papatto_emp?: string;
9
+ papatto_fiscal_month?: string;
10
+ bizgate_matched: boolean;
11
+ match_pattern?: string;
12
+ corporate_number?: string;
13
+ ceo?: string;
14
+ revenue?: string;
15
+ industry?: string;
16
+ hp_url?: string;
17
+ email?: string;
18
+ capital?: string;
19
+ emp?: string;
20
+ departments?: Array<{
21
+ name: string;
22
+ phone?: string;
23
+ }>;
24
+ keymen?: Array<{
25
+ role: string;
26
+ }>;
27
+ priority_score: number;
28
+ priority_reason?: string;
29
+ }
30
+ export interface PipelineOptions {
31
+ slack_user_id: string;
32
+ campaign_id?: string;
33
+ conditions: PapattoConditions;
34
+ natural_language?: string;
35
+ candidates_limit?: number;
36
+ enrich_depth?: "basic" | "smart" | "full";
37
+ smart_top_n?: number;
38
+ smart_ranking?: "heuristic" | "llm";
39
+ headless?: boolean;
40
+ search_url?: string;
41
+ }
42
+ export interface PipelineOutcome {
43
+ campaign_id: string;
44
+ papatto_found: number;
45
+ papatto_extracted: number;
46
+ bizgate_matched: number;
47
+ bizgate_enriched_deep: number;
48
+ companies: EnrichedCompany[];
49
+ csv_local_path?: string;
50
+ r2_url?: string;
51
+ api_used: {
52
+ bizgate: number;
53
+ };
54
+ duration_ms: number;
55
+ error?: string;
56
+ search_quota?: {
57
+ used: number;
58
+ limit: number;
59
+ };
60
+ download_quota?: {
61
+ used: number;
62
+ limit: number;
63
+ exhausted: boolean;
64
+ };
65
+ ranking_mode: "heuristic" | "llm";
66
+ }
67
+ export declare function runPipeline(opts: PipelineOptions): Promise<PipelineOutcome>;
@@ -0,0 +1,295 @@
1
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
2
+ import { homedir } from "node:os";
3
+ import { join } from "node:path";
4
+ import { runSearchOnly } from "./page-extract.js";
5
+ import { client as bizgateClient } from "../index.js";
6
+ import { first } from "../bizgate-client.js";
7
+ import * as r2 from "../cloud/r2-client.js";
8
+ import * as neon from "../cloud/neon-client.js";
9
+ import { log } from "../shared/logger.js";
10
+ function genCampaignId() {
11
+ const d = new Date();
12
+ const pad = (n) => String(n).padStart(2, "0");
13
+ const dt = `${d.getFullYear()}${pad(d.getMonth() + 1)}${pad(d.getDate())}_${pad(d.getHours())}${pad(d.getMinutes())}`;
14
+ const rand = Math.random().toString(36).slice(2, 6);
15
+ return `${dt}_${rand}`;
16
+ }
17
+ function heuristicScore(c) {
18
+ let s = 50;
19
+ const reasons = [];
20
+ const cap = `${c.papatto_capital ?? ""} ${c.capital ?? ""}`;
21
+ if (/1兆|5000億|1000億/.test(cap)) {
22
+ s += 25;
23
+ reasons.push("超大企業規模");
24
+ }
25
+ else if (/500億|300億/.test(cap)) {
26
+ s += 18;
27
+ reasons.push("大企業");
28
+ }
29
+ else if (/100億|50億/.test(cap)) {
30
+ s += 10;
31
+ reasons.push("中堅規模");
32
+ }
33
+ const emp = `${c.papatto_emp ?? ""} ${c.emp ?? ""}`;
34
+ if (/5000人|1000人/.test(emp)) {
35
+ s += 12;
36
+ reasons.push("従業員 1000+");
37
+ }
38
+ else if (/500人|100人/.test(emp)) {
39
+ s += 6;
40
+ reasons.push("従業員 100~");
41
+ }
42
+ if (c.bizgate_matched) {
43
+ s += 5;
44
+ reasons.push("BizGate データ確保");
45
+ }
46
+ if (c.departments && c.departments.length >= 3) {
47
+ s += 5;
48
+ reasons.push(`部署 ${c.departments.length} 件`);
49
+ }
50
+ if (c.keymen && c.keymen.length > 0) {
51
+ s += 10;
52
+ reasons.push(`キーマン情報あり`);
53
+ }
54
+ if (c.hp_url) {
55
+ s += 2;
56
+ }
57
+ if (c.email) {
58
+ s += 3;
59
+ reasons.push("メール公開");
60
+ }
61
+ return {
62
+ score: Math.min(100, s),
63
+ reason: reasons.slice(0, 3).join(" / ") || "基本情報のみ",
64
+ };
65
+ }
66
+ async function enrichWithCompanySearch(rows) {
67
+ const items = [];
68
+ let calls = 0;
69
+ for (const r of rows) {
70
+ const ec = {
71
+ name: r.name,
72
+ papatto_industry: r.industry,
73
+ papatto_address: r.address,
74
+ papatto_phone: r.phone,
75
+ papatto_capital: r.capital_size,
76
+ papatto_emp: r.emp_size,
77
+ papatto_fiscal_month: r.fiscal_month,
78
+ corporate_number: r.corporate_number,
79
+ bizgate_matched: false,
80
+ priority_score: 0,
81
+ };
82
+ try {
83
+ const params = { shogo: r.name };
84
+ if (r.corporate_number)
85
+ params.compno = r.corporate_number;
86
+ const { matchPattern, docs } = await bizgateClient.searchCompany(params);
87
+ calls++;
88
+ if (docs.length > 0) {
89
+ const d = docs[0];
90
+ ec.bizgate_matched = true;
91
+ ec.match_pattern = matchPattern;
92
+ ec.corporate_number = d.compno;
93
+ ec.ceo = first(d.ceo);
94
+ ec.revenue = d.revenue ? String(d.revenue) : undefined;
95
+ ec.industry = first(d.gyoshu_facet);
96
+ ec.hp_url = first(d.hpurl);
97
+ ec.email = first(d.mail);
98
+ ec.capital = d.shihon ? String(d.shihon) : undefined;
99
+ ec.emp = d.emp ? String(d.emp) : undefined;
100
+ }
101
+ }
102
+ catch (e) {
103
+ log("warn", "bizgate company_search failed", { name: r.name, error: String(e) });
104
+ }
105
+ const sc = heuristicScore(ec);
106
+ ec.priority_score = sc.score;
107
+ ec.priority_reason = sc.reason;
108
+ items.push(ec);
109
+ }
110
+ return { items, api_calls: calls };
111
+ }
112
+ async function enrichDepartmentsAndKeymen(targets) {
113
+ let calls = 0;
114
+ for (const c of targets) {
115
+ if (!c.bizgate_matched)
116
+ continue;
117
+ try {
118
+ const dept = await bizgateClient.searchDepartments({
119
+ compno: c.corporate_number,
120
+ shogo: c.name,
121
+ });
122
+ calls++;
123
+ c.departments = dept.docs.slice(0, 10).map((d) => ({
124
+ name: first(d.bumon) || "(不明)",
125
+ phone: first(d.tel),
126
+ }));
127
+ }
128
+ catch (e) {
129
+ log("warn", "department_search failed", { name: c.name, error: String(e) });
130
+ }
131
+ try {
132
+ const km = await bizgateClient.searchKeyman({
133
+ compno: c.corporate_number,
134
+ shogo: c.name,
135
+ });
136
+ calls++;
137
+ c.keymen = km.docs.slice(0, 5).map((d) => ({ role: first(d.bumon) || "(不明)" }));
138
+ }
139
+ catch (e) {
140
+ log("warn", "keyman_search failed", { name: c.name, error: String(e) });
141
+ }
142
+ const sc = heuristicScore(c);
143
+ c.priority_score = sc.score;
144
+ c.priority_reason = sc.reason;
145
+ }
146
+ return calls;
147
+ }
148
+ function csvEscape(v) {
149
+ if (/[",\r\n]/.test(v))
150
+ return `"${v.replace(/"/g, '""')}"`;
151
+ return v;
152
+ }
153
+ function rowsToCsv(items) {
154
+ const headers = [
155
+ "priority_score",
156
+ "name",
157
+ "corporate_number",
158
+ "ceo",
159
+ "industry",
160
+ "revenue",
161
+ "capital",
162
+ "emp",
163
+ "papatto_address",
164
+ "papatto_phone",
165
+ "hp_url",
166
+ "email",
167
+ "departments",
168
+ "keymen",
169
+ "priority_reason",
170
+ ];
171
+ const lines = [headers.join(",")];
172
+ for (const c of items) {
173
+ const row = headers.map((h) => {
174
+ switch (h) {
175
+ case "departments":
176
+ return csvEscape(c.departments?.map((d) => `${d.name}${d.phone ? `(${d.phone})` : ""}`).join("; ") ?? "");
177
+ case "keymen":
178
+ return csvEscape(c.keymen?.map((k) => k.role).join("; ") ?? "");
179
+ default: {
180
+ const v = c[h];
181
+ return csvEscape(v != null ? String(v) : "");
182
+ }
183
+ }
184
+ });
185
+ lines.push(row.join(","));
186
+ }
187
+ return lines.join("\n");
188
+ }
189
+ export async function runPipeline(opts) {
190
+ const t0 = Date.now();
191
+ const campaign_id = opts.campaign_id ?? genCampaignId();
192
+ const ranking_mode = opts.smart_ranking ?? "heuristic";
193
+ const candidates_limit = Math.max(1, Math.min(opts.candidates_limit ?? 50, 200));
194
+ const depth = opts.enrich_depth ?? "smart";
195
+ const smart_top_n = Math.max(1, Math.min(opts.smart_top_n ?? 20, candidates_limit));
196
+ if (neon.isConfigured()) {
197
+ try {
198
+ await neon.insertCampaign({
199
+ campaign_id,
200
+ owner_slack_user_id: opts.slack_user_id,
201
+ natural_language_query: opts.natural_language,
202
+ papatto_conditions: opts.conditions,
203
+ });
204
+ }
205
+ catch (e) {
206
+ log("warn", "neon insertCampaign (pipeline) failed", { campaign_id, error: String(e) });
207
+ }
208
+ }
209
+ const search = await runSearchOnly({
210
+ slack_user_id: opts.slack_user_id,
211
+ campaign_id,
212
+ conditions: opts.conditions,
213
+ max_companies: candidates_limit,
214
+ headless: opts.headless ?? true,
215
+ search_url: opts.search_url,
216
+ });
217
+ if (search.error || search.companies.length === 0) {
218
+ if (neon.isConfigured()) {
219
+ await neon.updateCampaign({ campaign_id, status: "failed" }).catch(() => undefined);
220
+ }
221
+ return {
222
+ campaign_id,
223
+ papatto_found: search.company_count ?? 0,
224
+ papatto_extracted: 0,
225
+ bizgate_matched: 0,
226
+ bizgate_enriched_deep: 0,
227
+ companies: [],
228
+ api_used: { bizgate: 0 },
229
+ duration_ms: Date.now() - t0,
230
+ error: search.error ?? "検索結果が 0 件です",
231
+ search_quota: search.search_quota,
232
+ download_quota: search.download_quota,
233
+ ranking_mode,
234
+ };
235
+ }
236
+ const { items: enriched, api_calls: searchCalls } = await enrichWithCompanySearch(search.companies);
237
+ let deepCalls = 0;
238
+ let deeplyEnriched = 0;
239
+ if (depth === "smart") {
240
+ const sorted = [...enriched].sort((a, b) => b.priority_score - a.priority_score);
241
+ const top = sorted.slice(0, smart_top_n).filter((c) => c.bizgate_matched);
242
+ deepCalls = await enrichDepartmentsAndKeymen(top);
243
+ deeplyEnriched = top.length;
244
+ }
245
+ else if (depth === "full") {
246
+ const targets = enriched.filter((c) => c.bizgate_matched);
247
+ deepCalls = await enrichDepartmentsAndKeymen(targets);
248
+ deeplyEnriched = targets.length;
249
+ }
250
+ enriched.sort((a, b) => b.priority_score - a.priority_score);
251
+ const dir = join(homedir(), ".papatto-bizgate", "downloads", campaign_id);
252
+ if (!existsSync(dir))
253
+ mkdirSync(dir, { recursive: true });
254
+ const csvPath = join(dir, `pipeline__${Date.now()}.csv`);
255
+ writeFileSync(csvPath, rowsToCsv(enriched), "utf-8");
256
+ let r2Url;
257
+ if (r2.isConfigured()) {
258
+ try {
259
+ const key = r2.csvKey(campaign_id, "pipeline.csv");
260
+ const up = await r2.uploadFile(key, csvPath, "text/csv; charset=utf-8");
261
+ r2Url = up.url;
262
+ }
263
+ catch (e) {
264
+ log("warn", "r2 upload (pipeline) failed", { campaign_id, error: String(e) });
265
+ }
266
+ }
267
+ if (neon.isConfigured()) {
268
+ try {
269
+ await neon.updateCampaign({
270
+ campaign_id,
271
+ status: "done",
272
+ extracted_count: enriched.length,
273
+ r2_extracted_url: r2Url,
274
+ });
275
+ }
276
+ catch (e) {
277
+ log("warn", "neon updateCampaign (pipeline) failed", { campaign_id, error: String(e) });
278
+ }
279
+ }
280
+ return {
281
+ campaign_id,
282
+ papatto_found: search.company_count ?? 0,
283
+ papatto_extracted: enriched.length,
284
+ bizgate_matched: enriched.filter((c) => c.bizgate_matched).length,
285
+ bizgate_enriched_deep: deeplyEnriched,
286
+ companies: enriched,
287
+ csv_local_path: csvPath,
288
+ r2_url: r2Url,
289
+ api_used: { bizgate: searchCalls + deepCalls },
290
+ duration_ms: Date.now() - t0,
291
+ search_quota: search.search_quota,
292
+ download_quota: search.download_quota,
293
+ ranking_mode,
294
+ };
295
+ }
@@ -1,3 +1,5 @@
1
+ import type { Page } from "playwright";
2
+ import { type PapattoMaster } from "./master-extractor.js";
1
3
  export interface PapattoConditions {
2
4
  industries?: string[];
3
5
  papatto_tags?: string[];
@@ -39,6 +41,25 @@ export interface ExtractOutcome extends SearchOutcome {
39
41
  error?: string;
40
42
  error_screenshot_url?: string;
41
43
  }
44
+ export declare function loadMaster(): PapattoMaster | null;
45
+ export declare const activatedTabs: Set<string>;
46
+ export declare function applyConditions(page: Page, conditions: PapattoConditions, master: PapattoMaster): Promise<SearchOutcome["applied"]>;
47
+ export declare function readCounts(page: Page): Promise<{
48
+ records: number | null;
49
+ companies: number | null;
50
+ }>;
51
+ export declare function readQuotas(page: Page): Promise<{
52
+ download?: {
53
+ used: number;
54
+ limit: number;
55
+ exhausted: boolean;
56
+ };
57
+ search?: {
58
+ used: number;
59
+ limit: number;
60
+ };
61
+ }>;
62
+ export declare function clickSearch(page: Page): Promise<void>;
42
63
  export interface ExtractOptions {
43
64
  slack_user_id: string;
44
65
  campaign_id: string;
@@ -8,7 +8,7 @@ import { runQueued } from "../queue/playwright-queue.js";
8
8
  import { log } from "../shared/logger.js";
9
9
  const __dirname = dirname(fileURLToPath(import.meta.url));
10
10
  const MASTER_PATH = join(__dirname, "..", "..", "data", "papatto_master.json");
11
- function loadMaster() {
11
+ export function loadMaster() {
12
12
  if (!existsSync(MASTER_PATH))
13
13
  return null;
14
14
  try {
@@ -26,7 +26,7 @@ function findGroup(master, patterns) {
26
26
  }
27
27
  return undefined;
28
28
  }
29
- const activatedTabs = new Set();
29
+ export const activatedTabs = new Set();
30
30
  async function activateTab(page, sectionId) {
31
31
  if (!sectionId)
32
32
  return;
@@ -208,7 +208,7 @@ async function applyFlags(page, master, flags) {
208
208
  }
209
209
  return ok;
210
210
  }
211
- async function applyConditions(page, conditions, master) {
211
+ export async function applyConditions(page, conditions, master) {
212
212
  const applied = [];
213
213
  if (conditions.industries && conditions.industries.length > 0) {
214
214
  const { matched, missed } = await applyIndustries(page, master, conditions.industries);
@@ -264,7 +264,7 @@ async function applyConditions(page, conditions, master) {
264
264
  }
265
265
  return applied;
266
266
  }
267
- async function readCounts(page) {
267
+ export async function readCounts(page) {
268
268
  const text = await page.locator("body").innerText().catch(() => "");
269
269
  const both = text.match(/([\d,]+)\s*件\s*\(\s*会社数\s*([\d,]+)\s*社\s*\)/);
270
270
  if (both) {
@@ -280,7 +280,7 @@ async function readCounts(page) {
280
280
  }
281
281
  return { records: null, companies: null };
282
282
  }
283
- async function readQuotas(page) {
283
+ export async function readQuotas(page) {
284
284
  const text = await page.locator("body").innerText().catch(() => "");
285
285
  const out = {};
286
286
  const dl = text.match(/今月のダウンロード[::]?\s*([\d,]+)\s*社\s*\/\s*([\d,]+)\s*社/);
@@ -298,7 +298,7 @@ async function readQuotas(page) {
298
298
  }
299
299
  return out;
300
300
  }
301
- async function clickSearch(page) {
301
+ export async function clickSearch(page) {
302
302
  const candidates = [
303
303
  'input[value="Papatto検索"]',
304
304
  'button:has-text("Papatto検索")',
@@ -5,7 +5,10 @@ import { fileURLToPath } from "node:url";
5
5
  import { setCredential, deleteCredential, listAccounts, getCredential } from "./credentials.js";
6
6
  import { extractMaster, classifyGroup, } from "./master-extractor.js";
7
7
  import { runExtract } from "./search.js";
8
+ import { runSearchOnly } from "./page-extract.js";
9
+ import { runPipeline } from "./pipeline.js";
8
10
  import { parseCsv, sortRowsBy, takeTopN } from "./csv-parser.js";
11
+ import { friendlyError } from "./friendly-errors.js";
9
12
  import * as r2 from "../cloud/r2-client.js";
10
13
  import * as neon from "../cloud/neon-client.js";
11
14
  import * as gsheets from "../sheets/client.js";
@@ -419,6 +422,287 @@ export function registerPapattoTools(server) {
419
422
  return fail(e instanceof Error ? e.message : String(e));
420
423
  }
421
424
  });
425
+ server.tool("papatto__pipeline", "【営業部向けメインフロー】自然言語 ICP → Papatto で会社発掘 → BizGate で精密データ補強 → 営業優先度評価 → Google Sheets まで一気通貫。" +
426
+ "Papatto ダウンロード上限 (月 4000 社) を使わず、検索回数 (月 500 回) のみ消費。" +
427
+ "enrich_depth=smart (デフォルト) で上位 N 社のみ部署 / キーマンまで深掘り (BizGate API 節約)。" +
428
+ "応答は営業部向けに親切なテキスト (上位 5 社プレビュー + 統計 + 次のアクション提案)。", {
429
+ slack_user_id: z.string().describe("Slack ユーザー ID"),
430
+ campaign_id: z.string().optional().describe("キャンペーン ID (未指定時は YYYYMMDD_HHMM_xxxx 自動生成)"),
431
+ natural_language: z.string().optional().describe("元の自然言語クエリ (DB 保存用、検索ロジック未使用)"),
432
+ conditions: z
433
+ .object({
434
+ industries: z.array(z.string()).optional(),
435
+ papatto_tags: z.array(z.string()).optional(),
436
+ intent_tags: z.array(z.string()).optional(),
437
+ flags: z
438
+ .array(z.enum(["tel", "email", "url", "updorg", "hasbusho", "isB2B", "isB2C", "financer"]))
439
+ .optional(),
440
+ keyword: z.string().optional(),
441
+ raw: z.record(z.union([z.string(), z.array(z.string())])).optional(),
442
+ })
443
+ .describe("papatto__extract と同形式の検索条件"),
444
+ candidates_limit: z.number().optional().describe("Papatto で発掘する会社数 (デフォルト 50、最大 200)"),
445
+ enrich_depth: z
446
+ .enum(["basic", "smart", "full"])
447
+ .optional()
448
+ .describe("BizGate 補強深度: basic=会社基本情報のみ / smart=上位 N 社のみ部署+キーマン (デフォルト) / full=全社の部署+キーマン"),
449
+ smart_top_n: z.number().optional().describe("smart 時の深堀対象数 (デフォルト 20)"),
450
+ smart_ranking: z
451
+ .enum(["heuristic", "llm"])
452
+ .optional()
453
+ .describe("優先度評価方式: heuristic=規模スコア (デフォルト) / llm=応答後 Claude が再評価する想定"),
454
+ headless: z.boolean().optional(),
455
+ search_url: z.string().optional(),
456
+ }, async ({ slack_user_id, campaign_id, natural_language, conditions, candidates_limit, enrich_depth, smart_top_n, smart_ranking, headless, search_url, }) => {
457
+ try {
458
+ const outcome = await runPipeline({
459
+ slack_user_id,
460
+ campaign_id,
461
+ natural_language,
462
+ conditions: conditions,
463
+ candidates_limit,
464
+ enrich_depth,
465
+ smart_top_n,
466
+ smart_ranking,
467
+ headless: headless ?? true,
468
+ search_url,
469
+ });
470
+ if (outcome.error) {
471
+ const f = friendlyError(outcome.error, { lang: "ja" });
472
+ return ok([
473
+ `# 😥 申し訳ありません、リスト作成中に問題が発生しました`,
474
+ ``,
475
+ f.message,
476
+ ``,
477
+ `## 経過`,
478
+ `- Papatto 検索ヒット数: ${outcome.papatto_found}`,
479
+ `- 抽出済みデータ: ${outcome.papatto_extracted} 件 (途中まで)`,
480
+ `- BizGate 利用: ${outcome.api_used.bizgate} 回`,
481
+ ``,
482
+ `💡 5 分後にもう一度同じご依頼をお試しください。`,
483
+ ].join("\n"));
484
+ }
485
+ const top5 = outcome.companies.slice(0, 5);
486
+ const stars = (s) => {
487
+ const n = Math.min(5, Math.max(1, Math.round(s / 20)));
488
+ return "⭐".repeat(n);
489
+ };
490
+ const lines = [
491
+ `# ✅ 営業リスト作成 完了!`,
492
+ ``,
493
+ ];
494
+ if (outcome.r2_url || outcome.csv_local_path) {
495
+ lines.push(`📄 結果 CSV (ローカル): ${outcome.csv_local_path}`);
496
+ if (outcome.r2_url)
497
+ lines.push(`☁️ クラウド バックアップ: 保存済み`);
498
+ lines.push(``);
499
+ }
500
+ lines.push(`## 🥇 営業優先 おすすめ (上位 ${top5.length} 社)`, ``);
501
+ for (let i = 0; i < top5.length; i++) {
502
+ const c = top5[i];
503
+ lines.push(`### ${i + 1}. ${c.name} ${stars(c.priority_score)}`);
504
+ if (c.priority_reason)
505
+ lines.push(` _${c.priority_reason}_`);
506
+ const meta = [];
507
+ if (c.industry || c.papatto_industry)
508
+ meta.push(`業種: ${c.industry ?? c.papatto_industry}`);
509
+ if (c.revenue)
510
+ meta.push(`売上: ${c.revenue}`);
511
+ if (c.papatto_address)
512
+ meta.push(`所在: ${c.papatto_address}`);
513
+ if (c.ceo)
514
+ meta.push(`代表: ${c.ceo}`);
515
+ if (meta.length > 0)
516
+ lines.push(` ${meta.join(" / ")}`);
517
+ if (c.departments && c.departments.length > 0) {
518
+ const deptText = c.departments.slice(0, 3).map((d) => d.name).join(" / ");
519
+ lines.push(` 📋 部署: ${deptText}${c.departments.length > 3 ? ` ほか${c.departments.length - 3}件` : ""}`);
520
+ }
521
+ if (c.keymen && c.keymen.length > 0) {
522
+ lines.push(` 👤 キーマン: ${c.keymen.slice(0, 3).map((k) => k.role).join(" / ")}`);
523
+ }
524
+ if (c.papatto_phone || c.hp_url) {
525
+ const contact = [];
526
+ if (c.papatto_phone)
527
+ contact.push(`📞 ${c.papatto_phone}`);
528
+ if (c.hp_url)
529
+ contact.push(`🌐 ${c.hp_url}`);
530
+ lines.push(` ${contact.join(" / ")}`);
531
+ }
532
+ lines.push(``);
533
+ }
534
+ lines.push(`## 📊 全体統計`, `- Papatto ヒット件数: ${outcome.papatto_found.toLocaleString()} 社`, `- 抽出: ${outcome.papatto_extracted} 社`, `- BizGate マッチ: ${outcome.bizgate_matched} 社`);
535
+ if (outcome.bizgate_enriched_deep > 0) {
536
+ lines.push(`- 部署 / キーマンまで調査: ${outcome.bizgate_enriched_deep} 社`);
537
+ }
538
+ lines.push(`- BizGate API 利用: ${outcome.api_used.bizgate} 回`, `- 所要時間: ${(outcome.duration_ms / 1000).toFixed(1)} 秒`);
539
+ if (outcome.search_quota) {
540
+ lines.push(`- Papatto 検索回数: ${outcome.search_quota.used} / ${outcome.search_quota.limit}`);
541
+ }
542
+ lines.push(``, `## 💡 次にできること`, `- "1 番の会社をもっと詳しく" — 特定の会社を深掘り`, `- "東京の会社だけに絞って" — 別条件で再検索`, `- "私の過去の営業リスト見せて" — 過去キャンペーン一覧`, ``, `📁 キャンペーン ID: \`${outcome.campaign_id}\` (後で参照する時に使えます)`);
543
+ return ok(lines.join("\n"));
544
+ }
545
+ catch (e) {
546
+ const f = friendlyError(e, { lang: "ja" });
547
+ return ok(`# 😥 エラーが発生しました\n\n${f.message}`);
548
+ }
549
+ });
550
+ server.tool("papatto__search_only", "⚠️ CSV ダウンロードクォータを使わず、検索結果ページから企業情報を直接抽出する。月間ダウンロード上限 (例 4,000 社/月) が枯渇した際の代替手段。検索回数 (月 500 回程度) のみ消費。" +
551
+ "規約グレーゾーン: 「画面に表示されている情報を自動化で読む」形態だが、明確にダウンロードの代替であるため Papatto 側ポリシー確認推奨。" +
552
+ "保守的な throttle (デフォルト 1500ms) でサイト負荷最小化。" +
553
+ "取得できる列: 会社名 / 業種 / 代表所在地 / 電話 / 資本規模 / 従業員規模 / 決算月 (ページ表示分のみ)。" +
554
+ "ダウンロード CSV と比較: 法人番号 / メール / インボイス / 部署データは含まれない (これらは papatto__extract または BizGate enrichment で補完)。", {
555
+ slack_user_id: z.string().describe("Slack ユーザー ID"),
556
+ campaign_id: z.string().describe("キャンペーン ID (R2 / Neon 保存キー)"),
557
+ conditions: z
558
+ .object({
559
+ industries: z.array(z.string()).optional(),
560
+ papatto_tags: z.array(z.string()).optional(),
561
+ intent_tags: z.array(z.string()).optional(),
562
+ flags: z
563
+ .array(z.enum(["tel", "email", "url", "updorg", "hasbusho", "isB2B", "isB2C", "financer"]))
564
+ .optional(),
565
+ keyword: z.string().optional(),
566
+ raw: z.record(z.union([z.string(), z.array(z.string())])).optional(),
567
+ })
568
+ .describe("papatto__extract と同形式の検索条件"),
569
+ max_companies: z.number().optional().describe("抽出する企業数の上限 (デフォルト 100、最大 2000 推奨)"),
570
+ throttle_ms: z.number().optional().describe("ページ間待機時間ミリ秒 (デフォルト 1500、最小 300)"),
571
+ headless: z.boolean().optional(),
572
+ search_url: z.string().optional(),
573
+ row_selector: z.string().optional().describe("ページの企業 row CSS セレクタを明示 (未指定時は自動推定)"),
574
+ debug: z.boolean().optional().describe("debug:true で row 候補セレクタ一覧を応答に含める (初回検証用)"),
575
+ }, async ({ slack_user_id, campaign_id, conditions, max_companies, throttle_ms, headless, search_url, row_selector, debug, }) => {
576
+ const dbActive = neon.isConfigured();
577
+ if (dbActive) {
578
+ try {
579
+ const cred = await getCredential(slack_user_id);
580
+ if (cred)
581
+ await neon.upsertUser({ slack_user_id, papatto_email: cred.papatto_email });
582
+ await neon.insertCampaign({
583
+ campaign_id,
584
+ owner_slack_user_id: slack_user_id,
585
+ papatto_conditions: conditions,
586
+ });
587
+ }
588
+ catch (e) {
589
+ log("warn", "neon insertCampaign (search_only) failed", { campaign_id, error: String(e) });
590
+ }
591
+ }
592
+ try {
593
+ const outcome = await runSearchOnly({
594
+ slack_user_id,
595
+ campaign_id,
596
+ conditions: conditions,
597
+ max_companies: max_companies ?? 100,
598
+ throttle_ms: throttle_ms ?? 1500,
599
+ headless: headless ?? true,
600
+ search_url,
601
+ row_selector,
602
+ debug,
603
+ });
604
+ const lines = [
605
+ `## Papatto Search-Only 抽出結果 (campaign_id=${campaign_id})`,
606
+ `- ページ URL: ${outcome.page_url}`,
607
+ `- ヒット件数 (件 / 社): ${outcome.num_found ?? "-"} / ${outcome.company_count ?? "-"}`,
608
+ `- 抽出企業数: ${outcome.companies.length}`,
609
+ `- 巡回ページ数: ${outcome.pages_visited}`,
610
+ `- 使用 row セレクタ: ${outcome.row_selector_used ?? "(未特定)"}`,
611
+ ];
612
+ if (outcome.search_quota) {
613
+ lines.push(`- 検索回数: ${outcome.search_quota.used} / ${outcome.search_quota.limit}`);
614
+ }
615
+ if (outcome.download_quota) {
616
+ const q = outcome.download_quota;
617
+ lines.push(`- 今月のダウンロード (参考): ${q.used} / ${q.limit} 社${q.exhausted ? " ⚠️ (このツールでは消費しません)" : ""}`);
618
+ }
619
+ if (outcome.error) {
620
+ lines.push(``, `## ⚠️ エラー`, `- ${outcome.error}`);
621
+ if (outcome.debug_row_candidates && outcome.debug_row_candidates.length > 0) {
622
+ lines.push(`- row 候補 セレクタ (matched 数 / sample):`);
623
+ for (const c of outcome.debug_row_candidates.slice(0, 10)) {
624
+ lines.push(` - \`${c.selector}\` → ${c.matched} 件 / sample="${c.sample_text}"`);
625
+ }
626
+ }
627
+ else {
628
+ lines.push(`- 11 種 row セレクタ全て matched 0`);
629
+ }
630
+ if (outcome.debug_element_dist) {
631
+ const top = Object.entries(outcome.debug_element_dist).sort((a, b) => b[1] - a[1]).slice(0, 15);
632
+ lines.push(``, `### element 分布 (top 15)`);
633
+ for (const [tag, n] of top)
634
+ lines.push(` - ${tag}: ${n}`);
635
+ }
636
+ if (outcome.debug_body_snippet) {
637
+ lines.push(``, `### body / main HTML snippet (先頭 4000 字)`);
638
+ lines.push("```html");
639
+ lines.push(outcome.debug_body_snippet);
640
+ lines.push("```");
641
+ }
642
+ }
643
+ let r2Url;
644
+ if (outcome.csv_local_path) {
645
+ lines.push(``, `## 抽出 CSV`, `- ローカル: ${outcome.csv_local_path}`);
646
+ if (r2.isConfigured()) {
647
+ try {
648
+ const key = r2.csvKey(campaign_id, "page_extract.csv");
649
+ const up = await r2.uploadFile(key, outcome.csv_local_path, "text/csv; charset=utf-8");
650
+ r2Url = up.url;
651
+ lines.push(`- R2: ${up.url}`);
652
+ }
653
+ catch (e) {
654
+ lines.push(`- ⚠️ R2 アップロード失敗: ${e instanceof Error ? e.message : String(e)}`);
655
+ log("warn", "r2 upload (search_only) failed", { campaign_id, error: String(e) });
656
+ }
657
+ }
658
+ if (dbActive) {
659
+ try {
660
+ await neon.updateCampaign({
661
+ campaign_id,
662
+ status: "done",
663
+ extracted_count: outcome.companies.length,
664
+ r2_extracted_url: r2Url,
665
+ });
666
+ }
667
+ catch (e) {
668
+ log("warn", "neon updateCampaign (search_only done) failed", { campaign_id, error: String(e) });
669
+ }
670
+ }
671
+ }
672
+ else if (dbActive) {
673
+ try {
674
+ await neon.updateCampaign({ campaign_id, status: "failed" });
675
+ }
676
+ catch (e) {
677
+ log("warn", "neon updateCampaign (search_only failed) failed", { campaign_id, error: String(e) });
678
+ }
679
+ }
680
+ if (outcome.companies.length > 0) {
681
+ lines.push(``, `## 先頭 5 社 サンプル`);
682
+ for (const c of outcome.companies.slice(0, 5)) {
683
+ const fields = [c.name];
684
+ if (c.industry)
685
+ fields.push(`業: ${c.industry}`);
686
+ if (c.address)
687
+ fields.push(`住: ${c.address}`);
688
+ if (c.phone)
689
+ fields.push(`電: ${c.phone}`);
690
+ if (c.capital_size)
691
+ fields.push(`資: ${c.capital_size}`);
692
+ if (c.emp_size)
693
+ fields.push(`従: ${c.emp_size}`);
694
+ if (c.fiscal_month)
695
+ fields.push(`決: ${c.fiscal_month}`);
696
+ lines.push(`- ${fields.join(" / ")}`);
697
+ }
698
+ }
699
+ lines.push(``, `> ⚠️ 規約グレー: Papatto 公式 CSV ダウンロード とは別ルート。Papatto 側ポリシー確認推奨。`, `> 補完情報 (法人番号 / メール / インボイス / 部署 など) は papatto__extract (CSV ダウンロード) または BizGate enrichment で取得可能。`);
700
+ return ok(lines.join("\n"));
701
+ }
702
+ catch (e) {
703
+ return fail(e instanceof Error ? e.message : String(e));
704
+ }
705
+ });
422
706
  server.tool("papatto__queue_status", "Playwright 실행 큐의 현재 상태를 반환한다. concurrency / active (실행 중) / pending (대기 중) / paused. 환경변수 PAPATTO_PLAYWRIGHT_CONCURRENCY 로 동시 실행 수 조정 (default 2).", {}, async () => {
423
707
  const s = getQueueStatus();
424
708
  return ok([
@@ -447,8 +731,8 @@ export function registerPapattoTools(server) {
447
731
  `- オーナー: ${c.owner_slack_user_id}`,
448
732
  `- ステータス: ${c.status}`,
449
733
  `- 抽出件数: ${c.extracted_count ?? "-"}`,
450
- `- 作成: ${new Date(c.created_at).toISOString()}`,
451
- `- 更新: ${new Date(c.updated_at).toISOString()}`,
734
+ `- 作成: ${new Date(Number(c.created_at)).toISOString()}`,
735
+ `- 更新: ${new Date(Number(c.updated_at)).toISOString()}`,
452
736
  `- R2: ${c.r2_extracted_url ?? "-"}`,
453
737
  `- Sheet: ${c.sheet_url ?? "-"}`,
454
738
  ];
@@ -468,7 +752,7 @@ export function registerPapattoTools(server) {
468
752
  return ok(`${owner_slack_user_id} の campaign は見つかりませんでした。`);
469
753
  const header = `| campaign_id | status | 抽出件数 | 作成日時 | R2 |\n|-------------|--------|----------|----------|----|`;
470
754
  const lines = rows.map((c) => {
471
- const dt = new Date(c.created_at).toISOString().replace("T", " ").slice(0, 16);
755
+ const dt = new Date(Number(c.created_at)).toISOString().replace("T", " ").slice(0, 16);
472
756
  const r2_short = c.r2_extracted_url ? "✓" : "-";
473
757
  return `| ${c.campaign_id} | ${c.status} | ${c.extracted_count ?? "-"} | ${dt} | ${r2_short} |`;
474
758
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bizgate-mcp-server",
3
- "version": "0.4.0",
3
+ "version": "0.4.1",
4
4
  "description": "BizGate API + Papatto Cloud × Claude 連携 MCP サーバー (CSV → R2 → Neon → Google Sheets 一括ワークフロー)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",