@zonuexe/techbook-mcp 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/.claude/settings.local.json +13 -1
  2. package/.codex/skills/techbook-mcp-release-prep/SKILL.md +105 -0
  3. package/.github/workflows/test.yml +36 -0
  4. package/.oxlintrc.json +12 -0
  5. package/AGENTS.md +29 -1
  6. package/CHANGELOG.md +34 -0
  7. package/deno.json +3 -0
  8. package/dist/adapters/html/cheerio-parser.d.ts.map +1 -1
  9. package/dist/adapters/html/cheerio-parser.js.map +1 -1
  10. package/dist/adapters/publishers/base.d.ts +22 -1
  11. package/dist/adapters/publishers/base.d.ts.map +1 -1
  12. package/dist/adapters/publishers/base.js +142 -2
  13. package/dist/adapters/publishers/base.js.map +1 -1
  14. package/dist/adapters/publishers/book-tech.d.ts +3 -0
  15. package/dist/adapters/publishers/book-tech.d.ts.map +1 -0
  16. package/dist/adapters/publishers/book-tech.js +95 -0
  17. package/dist/adapters/publishers/book-tech.js.map +1 -0
  18. package/dist/adapters/publishers/born-digital.d.ts +3 -0
  19. package/dist/adapters/publishers/born-digital.d.ts.map +1 -0
  20. package/dist/adapters/publishers/born-digital.js +122 -0
  21. package/dist/adapters/publishers/born-digital.js.map +1 -0
  22. package/dist/adapters/publishers/coronasha.d.ts +3 -0
  23. package/dist/adapters/publishers/coronasha.d.ts.map +1 -0
  24. package/dist/adapters/publishers/coronasha.js +119 -0
  25. package/dist/adapters/publishers/coronasha.js.map +1 -0
  26. package/dist/adapters/publishers/impress.d.ts +3 -0
  27. package/dist/adapters/publishers/impress.d.ts.map +1 -0
  28. package/dist/adapters/publishers/impress.js +92 -0
  29. package/dist/adapters/publishers/impress.js.map +1 -0
  30. package/dist/adapters/publishers/manatee.d.ts +3 -0
  31. package/dist/adapters/publishers/manatee.d.ts.map +1 -0
  32. package/dist/adapters/publishers/manatee.js +93 -0
  33. package/dist/adapters/publishers/manatee.js.map +1 -0
  34. package/dist/adapters/publishers/maruzen-publishing.d.ts +3 -0
  35. package/dist/adapters/publishers/maruzen-publishing.d.ts.map +1 -0
  36. package/dist/adapters/publishers/maruzen-publishing.js +108 -0
  37. package/dist/adapters/publishers/maruzen-publishing.js.map +1 -0
  38. package/dist/adapters/publishers/optronics.d.ts +3 -0
  39. package/dist/adapters/publishers/optronics.d.ts.map +1 -0
  40. package/dist/adapters/publishers/optronics.js +92 -0
  41. package/dist/adapters/publishers/optronics.js.map +1 -0
  42. package/dist/adapters/publishers/oreilly-japan.d.ts +3 -0
  43. package/dist/adapters/publishers/oreilly-japan.d.ts.map +1 -0
  44. package/dist/adapters/publishers/oreilly-japan.js +112 -0
  45. package/dist/adapters/publishers/oreilly-japan.js.map +1 -0
  46. package/dist/adapters/publishers/peaks.d.ts +3 -0
  47. package/dist/adapters/publishers/peaks.d.ts.map +1 -0
  48. package/dist/adapters/publishers/peaks.js +80 -0
  49. package/dist/adapters/publishers/peaks.js.map +1 -0
  50. package/dist/adapters/publishers/personal-media.d.ts +3 -0
  51. package/dist/adapters/publishers/personal-media.d.ts.map +1 -0
  52. package/dist/adapters/publishers/personal-media.js +144 -0
  53. package/dist/adapters/publishers/personal-media.js.map +1 -0
  54. package/dist/adapters/publishers/registry.d.ts.map +1 -1
  55. package/dist/adapters/publishers/registry.js +26 -0
  56. package/dist/adapters/publishers/registry.js.map +1 -1
  57. package/dist/adapters/publishers/rutles.d.ts +3 -0
  58. package/dist/adapters/publishers/rutles.d.ts.map +1 -0
  59. package/dist/adapters/publishers/rutles.js +128 -0
  60. package/dist/adapters/publishers/rutles.js.map +1 -0
  61. package/dist/adapters/publishers/saiensu.d.ts +3 -0
  62. package/dist/adapters/publishers/saiensu.d.ts.map +1 -0
  63. package/dist/adapters/publishers/saiensu.js +109 -0
  64. package/dist/adapters/publishers/saiensu.js.map +1 -0
  65. package/dist/adapters/publishers/seshop.d.ts +3 -0
  66. package/dist/adapters/publishers/seshop.d.ts.map +1 -0
  67. package/dist/adapters/publishers/seshop.js +98 -0
  68. package/dist/adapters/publishers/seshop.js.map +1 -0
  69. package/dist/application/get-book-detail.d.ts.map +1 -1
  70. package/dist/application/get-book-detail.js +5 -0
  71. package/dist/application/get-book-detail.js.map +1 -1
  72. package/dist/application/search-books.d.ts.map +1 -1
  73. package/dist/application/search-books.js +7 -1
  74. package/dist/application/search-books.js.map +1 -1
  75. package/dist/domain/book.d.ts +5 -4
  76. package/dist/domain/book.d.ts.map +1 -1
  77. package/dist/main.d.ts +1 -0
  78. package/dist/main.js +1 -0
  79. package/dist/main.js.map +1 -1
  80. package/dist/mcp/server.d.ts.map +1 -1
  81. package/dist/mcp/server.js +1 -0
  82. package/dist/mcp/server.js.map +1 -1
  83. package/flake.nix +1 -1
  84. package/package.json +7 -5
  85. package/src/adapters/html/cheerio-parser.ts +4 -3
  86. package/src/adapters/publishers/base.ts +150 -0
  87. package/src/adapters/publishers/born-digital.ts +2 -17
  88. package/src/adapters/publishers/impress.ts +103 -0
  89. package/src/adapters/publishers/manatee.ts +2 -1
  90. package/src/adapters/publishers/maruzen-publishing.ts +4 -16
  91. package/src/adapters/publishers/oreilly-japan.ts +5 -10
  92. package/src/adapters/publishers/registry.ts +2 -0
  93. package/src/adapters/publishers/rutles.ts +1 -13
  94. package/src/adapters/publishers/saiensu.ts +5 -18
  95. package/src/adapters/publishers/seshop.ts +1 -1
  96. package/src/adapters/publishers/tatsu-zine.ts +61 -36
  97. package/src/application/get-book-detail.ts +7 -0
  98. package/src/application/search-books.ts +6 -1
  99. package/src/main.ts +1 -0
  100. package/tests/fixtures/impress-detail-epub.html +746 -0
  101. package/tests/fixtures/impress-detail-social.html +689 -0
  102. package/tests/fixtures/tatsu-zine-search.html +29 -13
  103. package/tests/unit/adapters/base.test.ts +441 -0
  104. package/tests/unit/adapters/publishers/book-tech.test.ts +18 -15
  105. package/tests/unit/adapters/publishers/born-digital.test.ts +18 -15
  106. package/tests/unit/adapters/publishers/coronasha.test.ts +26 -20
  107. package/tests/unit/adapters/publishers/gihyo.test.ts +21 -19
  108. package/tests/unit/adapters/publishers/impress.test.ts +129 -0
  109. package/tests/unit/adapters/publishers/lambdanote.test.ts +12 -11
  110. package/tests/unit/adapters/publishers/manatee.test.ts +14 -12
  111. package/tests/unit/adapters/publishers/maruzen-publishing.test.ts +19 -17
  112. package/tests/unit/adapters/publishers/optronics.test.ts +19 -16
  113. package/tests/unit/adapters/publishers/oreilly-japan.test.ts +19 -16
  114. package/tests/unit/adapters/publishers/peaks.test.ts +17 -14
  115. package/tests/unit/adapters/publishers/personal-media.test.ts +18 -15
  116. package/tests/unit/adapters/publishers/rutles.test.ts +15 -12
  117. package/tests/unit/adapters/publishers/saiensu.test.ts +14 -12
  118. package/tests/unit/adapters/publishers/seshop.test.ts +16 -13
  119. package/tests/unit/adapters/publishers/tatsu-zine.test.ts +56 -14
  120. package/tests/unit/adapters/publishers/techbookfest.test.ts +12 -11
  121. package/tests/unit/adapters/registry.test.ts +37 -0
  122. package/tests/unit/application/get-book-detail.test.ts +102 -0
  123. package/tests/unit/application/search-books.test.ts +137 -0
  124. package/vitest.config.ts +0 -8
@@ -1,3 +1,4 @@
1
+ import iconv from "iconv-lite";
1
2
  import type { PublisherDeps } from "../../domain/publisher.js";
2
3
  import type { EbookStore, DrmType } from "../../domain/book.js";
3
4
  import type { HtmlDocument } from "../../ports/html-parser.js";
@@ -9,6 +10,126 @@ const DEFAULT_HEADERS = {
9
10
  };
10
11
 
11
12
  export const CACHE_TTL_SECONDS = 3600; // 1時間
13
+ export const ROBOTS_CACHE_TTL_SECONDS = 6 * 3600; // 6時間
14
+
15
+ // --- robots.txt チェック ---
16
+
17
+ /** robots.txt の1ルール */
18
+ interface RobotsRule {
19
+ type: "allow" | "disallow";
20
+ path: string;
21
+ }
22
+
23
+ /** robots.txt のユーザーエージェントセクション */
24
+ interface RobotsSection {
25
+ agents: string[];
26
+ rules: RobotsRule[];
27
+ }
28
+
29
+ /** robots.txt をパースしてセクション一覧を返す */
30
+ function parseRobotsTxt(content: string): RobotsSection[] {
31
+ const sections: RobotsSection[] = [];
32
+ let current: RobotsSection | null = null;
33
+ let inAgentBlock = true;
34
+
35
+ for (const rawLine of content.split(/\r?\n/)) {
36
+ const trimmedRaw = rawLine.trim();
37
+ // 空行(コメント行ではない)のみセクションをリセット
38
+ if (!trimmedRaw || trimmedRaw.startsWith("#")) {
39
+ if (!trimmedRaw) {
40
+ current = null;
41
+ inAgentBlock = true;
42
+ }
43
+ continue;
44
+ }
45
+
46
+ const line = trimmedRaw.split("#")[0].trim();
47
+ if (!line) continue;
48
+
49
+ const colonIdx = line.indexOf(":");
50
+ if (colonIdx === -1) continue;
51
+
52
+ const key = line.slice(0, colonIdx).trim().toLowerCase();
53
+ const value = line.slice(colonIdx + 1).trim();
54
+
55
+ if (key === "user-agent") {
56
+ if (inAgentBlock && current !== null) {
57
+ // 同じセクションに複数のUser-agent行
58
+ current.agents.push(value.toLowerCase());
59
+ } else {
60
+ // 新しいセクション開始
61
+ current = { agents: [value.toLowerCase()], rules: [] };
62
+ sections.push(current);
63
+ inAgentBlock = true;
64
+ }
65
+ } else if (current !== null && (key === "allow" || key === "disallow")) {
66
+ inAgentBlock = false;
67
+ current.rules.push({ type: key, path: value });
68
+ }
69
+ }
70
+
71
+ return sections;
72
+ }
73
+
74
+ /** 指定ユーザーエージェントに適用されるルールを返す(固有エージェント優先、なければ * にフォールバック) */
75
+ function getRulesForAgent(sections: RobotsSection[], agentToken: string): RobotsRule[] {
76
+ const lower = agentToken.toLowerCase();
77
+
78
+ for (const section of sections) {
79
+ if (section.agents.includes(lower)) return section.rules;
80
+ }
81
+ for (const section of sections) {
82
+ if (section.agents.includes("*")) return section.rules;
83
+ }
84
+ return [];
85
+ }
86
+
87
+ /** パスがルール一覧で許可されているか判定する(最長プレフィックス一致) */
88
+ function isPathAllowed(path: string, rules: RobotsRule[]): boolean {
89
+ let bestMatch = { length: -1, allowed: true };
90
+
91
+ for (const rule of rules) {
92
+ if (!rule.path) continue; // 空の Disallow は「全許可」を意味するが不一致として扱う
93
+
94
+ if (path.startsWith(rule.path) && rule.path.length > bestMatch.length) {
95
+ bestMatch = { length: rule.path.length, allowed: rule.type === "allow" };
96
+ }
97
+ }
98
+
99
+ return bestMatch.allowed;
100
+ }
101
+
102
+ /**
103
+ * 指定URLのオリジンの robots.txt を取得してアクセス可否を返す。
104
+ * 取得結果は6時間キャッシュする。エラー時はアクセスを許可する(fail-open)。
105
+ */
106
+ export async function checkRobotsTxt(url: string, deps: PublisherDeps): Promise<boolean> {
107
+ const parsed = new URL(url);
108
+ const origin = `${parsed.protocol}//${parsed.host}`;
109
+ const cacheKey = `robots:${origin}`;
110
+
111
+ let content: string;
112
+ const cached = await deps.cache.get(cacheKey);
113
+
114
+ if (cached !== null) {
115
+ content = cached;
116
+ } else {
117
+ try {
118
+ const response = await deps.http.get(`${origin}/robots.txt`, { headers: DEFAULT_HEADERS });
119
+ content = response.status === 200 ? await response.text() : "";
120
+ } catch {
121
+ // robots.txt 取得失敗時はアクセスを許可する
122
+ content = "";
123
+ }
124
+ await deps.cache.set(cacheKey, content, ROBOTS_CACHE_TTL_SECONDS);
125
+ }
126
+
127
+ if (!content) return true;
128
+
129
+ const sections = parseRobotsTxt(content);
130
+ const rules = getRulesForAgent(sections, "techbook-mcp");
131
+ return isPathAllowed(parsed.pathname + parsed.search, rules);
132
+ }
12
133
 
13
134
  export async function fetchText(
14
135
  url: string,
@@ -37,6 +158,35 @@ export function stripHtmlTags(html: string): string {
37
158
  return html.replace(/<[^>]+>/g, "");
38
159
  }
39
160
 
161
+ /**
162
+ * キーワードを EUC-JP でパーセントエンコードする。
163
+ * born-digital・rutles など EUC-JP エンコードのみ受け付けるサイト向け。
164
+ */
165
+ export function encodeEucJp(text: string): string {
166
+ const bytes = iconv.encode(text, "euc-jp");
167
+ return Array.from(bytes)
168
+ .map(b => "%" + b.toString(16).toUpperCase().padStart(2, "0"))
169
+ .join("");
170
+ }
171
+
172
+ /**
173
+ * "2026年3月25日" → "2026-03-25"
174
+ * 1桁の月・日も対応する。
175
+ */
176
+ export function parseJapaneseDateToISO(text: string): string | undefined {
177
+ const m = text.match(/(\d{4})年(\d{1,2})月(\d{1,2})日/);
178
+ if (!m) return undefined;
179
+ return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
180
+ }
181
+
182
+ /**
183
+ * 著者名末尾の役割語(著・訳・編・監修・監訳など)を除去して名前だけを返す。
184
+ * 例: "Dan Vanderkam 著" → "Dan Vanderkam"
185
+ */
186
+ export function stripAuthorRole(name: string): string {
187
+ return name.replace(/[\u3000\s]*(著|訳|編|監修|監訳|著訳|著・訳|他)[\u3000\s]*$/, "").trim();
188
+ }
189
+
40
190
  /** "¥3,960" や "3,300円(税込)" などから整数値を取り出す */
41
191
  export function parseJapanesePrice(text: string): number | undefined {
42
192
  const match = text.match(/[\d,]+/);
@@ -1,21 +1,9 @@
1
- import iconv from "iconv-lite";
2
1
  import type { PublisherAdapter, PublisherDeps } from "../../domain/publisher.js";
3
2
  import type { BookRecord, SearchQuery } from "../../domain/book.js";
4
- import { fetchText, parseJapanesePrice, resolveUrl } from "./base.js";
3
+ import { fetchText, parseJapanesePrice, resolveUrl, encodeEucJp, parseJapaneseDateToISO } from "./base.js";
5
4
 
6
5
  const BASE_URL = "https://wgn-obs.shop-pro.jp";
7
6
 
8
- /**
9
- * キーワードを EUC-JP でパーセントエンコードする。
10
- * wgn-obs.shop-pro.jp は EUC-JP エンコードされたクエリのみ受け付けるため。
11
- */
12
- function encodeEucJp(text: string): string {
13
- const bytes = iconv.encode(text, "euc-jp");
14
- return Array.from(bytes)
15
- .map(b => "%" + b.toString(16).toUpperCase().padStart(2, "0"))
16
- .join("");
17
- }
18
-
19
7
  /**
20
8
  * 商品説明テキストから著者・出版社・発売日を取得する。
21
9
  * 以下の2形式に対応:
@@ -45,10 +33,7 @@ function parseDescription(text: string): {
45
33
  } else if (key.startsWith("発行") || key === "発売") {
46
34
  publisher = value.replace(/^株式会社\s*/, "").replace(/\s*株式会社$/, "").trim();
47
35
  } else if (key === "発売日") {
48
- const dm = value.match(/(\d{4})年(\d{1,2})月(\d{1,2})日/);
49
- if (dm) {
50
- publishedAt = `${dm[1]}-${dm[2].padStart(2, "0")}-${dm[3].padStart(2, "0")}`;
51
- }
36
+ publishedAt = parseJapaneseDateToISO(value);
52
37
  }
53
38
  }
54
39
 
@@ -0,0 +1,103 @@
1
+ import type { PublisherAdapter, PublisherDeps } from "../../domain/publisher.js";
2
+ import type { BookRecord, SearchQuery, DrmType } from "../../domain/book.js";
3
+ import { fetchText, parseJapanesePrice, stripAuthorRole } from "./base.js";
4
+
5
+ const BASE_URL = "https://book.impress.co.jp";
6
+
7
+ /** "2026/1/22" → "2026-01-22" */
8
+ function parseImpressDate(text: string): string | undefined {
9
+ const m = text.trim().match(/(\d{4})\/(\d{1,2})\/(\d{1,2})/);
10
+ if (!m) return undefined;
11
+ return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
12
+ }
13
+
14
+ /**
15
+ * 著者文字列から著者名配列を返す。
16
+ * 例: "山本康太 著" → ["山本康太"]
17
+ * 複数著者は「、」または改行で区切られる。
18
+ */
19
+ function parseAuthors(text: string): string[] {
20
+ return text
21
+ .split(/[、,\n]/)
22
+ .map(s => stripAuthorRole(s.trim()))
23
+ .filter(Boolean);
24
+ }
25
+
26
+ /**
27
+ * 電子書籍ガイドのテキストから DRM 種別を判定する。
28
+ * 明示されていない場合はインプレスの公式方針に基づき social を返す。
29
+ */
30
+ function parseDrmType(text: string): DrmType {
31
+ if (/ソーシャルDRM/i.test(text)) return "social";
32
+ if (/DRM-?free|DRMフリー/i.test(text)) return "free";
33
+ if (/パスワード/i.test(text)) return "password_pdf";
34
+ return "social";
35
+ }
36
+
37
+ export const impressBooksAdapter: PublisherAdapter = {
38
+ id: "impress-books",
39
+ name: "インプレスブックス",
40
+ baseUrl: BASE_URL,
41
+
42
+ async search(_query: SearchQuery, _deps: PublisherDeps): Promise<BookRecord[]> {
43
+ // 検索ページは Google Custom Search Engine による JavaScript レンダリングのためスクレイピング不可
44
+ return [];
45
+ },
46
+
47
+ async getDetail(url: string, deps: PublisherDeps): Promise<BookRecord> {
48
+ const html = await fetchText(url, deps);
49
+ const doc = deps.parser.parse(html);
50
+
51
+ // タイトル(ページ内最初の h2)
52
+ const title = doc.selectOne("h2")?.text().trim() ?? "";
53
+
54
+ // dl.module-book-data の dt/dd を順序でペアリング
55
+ const dts = doc.select("dl.module-book-data dt");
56
+ const dds = doc.select("dl.module-book-data dd");
57
+ const bookDataMap = new Map<string, string>();
58
+ for (let i = 0; i < dts.length; i++) {
59
+ const key = dts[i].text().trim();
60
+ const val = dds[i]?.text().trim() ?? "";
61
+ if (key) bookDataMap.set(key, val);
62
+ }
63
+
64
+ const authors = parseAuthors(bookDataMap.get("著者") ?? "");
65
+ const isbn = bookDataMap.get("ISBN")?.replace(/\s/g, "") || undefined;
66
+ const publishedAt = parseImpressDate(bookDataMap.get("発売日") ?? "");
67
+
68
+ // カバー画像(img.ips.co.jp のプロトコル相対URLに https: を補完)
69
+ const coverSrc = doc.selectOne(".block-book-detail-img img")?.attr("src");
70
+ const coverImageUrl = coverSrc
71
+ ? (coverSrc.startsWith("//") ? `https:${coverSrc}` : coverSrc)
72
+ : undefined;
73
+
74
+ // 電子版価格・DRM
75
+ const ebookGuide = doc.selectOne(".module-e-book-buy-guide-txt");
76
+ const ebookBuyBtn = doc.selectOne(".module-e-book-buy-guide-btn a");
77
+
78
+ let price: number | undefined;
79
+ let drm: DrmType = "social";
80
+
81
+ if (ebookGuide) {
82
+ const priceText = ebookGuide.find(".module-e-book-price")[0]?.text();
83
+ if (priceText) price = parseJapanesePrice(priceText);
84
+ drm = parseDrmType(ebookGuide.text());
85
+ }
86
+
87
+ const ebookStores = ebookBuyBtn
88
+ ? [{ name: "インプレスブックス", url, drm }]
89
+ : [];
90
+
91
+ return {
92
+ title,
93
+ authors,
94
+ publisher: "インプレスブックス",
95
+ url,
96
+ isbn,
97
+ price,
98
+ publishedAt,
99
+ coverImageUrl,
100
+ ebookStores,
101
+ };
102
+ },
103
+ };
@@ -1,5 +1,6 @@
1
1
  import type { PublisherAdapter, PublisherDeps } from "../../domain/publisher.js";
2
2
  import type { BookRecord, SearchQuery } from "../../domain/book.js";
3
+ import type { HtmlDocument } from "../../ports/html-parser.js";
3
4
  import { fetchText, parseJapanesePrice, resolveUrl } from "./base.js";
4
5
 
5
6
  const BASE_URL = "https://book.mynavi.jp/manatee";
@@ -9,7 +10,7 @@ const BOOKS_URL = `${BASE_URL}/books/`;
9
10
  * `.attribute li` 内の著者リンクを配列にする。
10
11
  * 各 <a> のテキストが著者名(役割は括弧内テキストで付記されているが名前は <a> 内)。
11
12
  */
12
- function parseAuthors(doc: ReturnType<import("../../ports/html-parser.js").HtmlParser["parse"]>): string[] {
13
+ function parseAuthors(doc: HtmlDocument): string[] {
13
14
  return doc
14
15
  .select(".attribute li a")
15
16
  .map(el => el.text().trim())
@@ -1,6 +1,6 @@
1
1
  import type { PublisherAdapter, PublisherDeps } from "../../domain/publisher.js";
2
2
  import type { BookRecord, SearchQuery } from "../../domain/book.js";
3
- import { fetchText, resolveUrl, extractEbookStoresFromDoc } from "./base.js";
3
+ import { fetchText, resolveUrl, extractEbookStoresFromDoc, parseJapaneseDateToISO, stripAuthorRole } from "./base.js";
4
4
 
5
5
  const BASE_URL = "https://www.maruzen-publishing.co.jp";
6
6
  const SEARCH_URL = `${BASE_URL}/search/`;
@@ -13,20 +13,14 @@ const EXTRA_HEADERS = { Referer: `${BASE_URL}/` };
13
13
  * 役割語(著・訳・編・監訳・監修など)を除去する。
14
14
  */
15
15
  function parseAuthorsFromText(text: string): string[] {
16
- return text
17
- .split(/[ \s]+(?=\S)/)
18
- .map(part => part.replace(/[\u3000\s]*(著|訳|編|監修|監訳|他)[\u3000\s]*$/, "").trim())
19
- .filter(Boolean);
16
+ return text.split(/[ \s]+(?=\S)/).map(stripAuthorRole).filter(Boolean);
20
17
  }
21
18
 
22
19
  /**
23
20
  * div.author 内の各リンクから役割語を除去して著者名のみ返す。
24
- * リンクテキストには名前のみ含まれるため、隣接するテキストノードの役割語は無視してよい。
25
21
  */
26
22
  function parseAuthorLinks(authors: string[]): string[] {
27
- return authors
28
- .map(name => name.replace(/[\u3000\s]*(著|訳|編|監修|監訳|他)[\u3000\s]*$/, "").trim())
29
- .filter(Boolean);
23
+ return authors.map(stripAuthorRole).filter(Boolean);
30
24
  }
31
25
 
32
26
  /**
@@ -34,17 +28,11 @@ function parseAuthorLinks(authors: string[]): string[] {
34
28
  * "2020年3月31日" → "2020-03-31"
35
29
  */
36
30
  function parseDate(text: string): string | undefined {
37
- // YYYY/MM/DD
38
31
  const m1 = text.match(/(\d{4})\/(\d{1,2})\/(\d{1,2})/);
39
32
  if (m1) {
40
33
  return `${m1[1]}-${m1[2].padStart(2, "0")}-${m1[3].padStart(2, "0")}`;
41
34
  }
42
- // YYYY年M月D日
43
- const m2 = text.match(/(\d{4})年(\d{1,2})月(\d{1,2})日/);
44
- if (m2) {
45
- return `${m2[1]}-${m2[2].padStart(2, "0")}-${m2[3].padStart(2, "0")}`;
46
- }
47
- return undefined;
35
+ return parseJapaneseDateToISO(text);
48
36
  }
49
37
 
50
38
  export const maruzenPublishingAdapter: PublisherAdapter = {
@@ -1,6 +1,6 @@
1
1
  import type { PublisherAdapter, PublisherDeps } from "../../domain/publisher.js";
2
2
  import type { BookRecord, SearchQuery } from "../../domain/book.js";
3
- import { fetchText, parseJapanesePrice, resolveUrl } from "./base.js";
3
+ import { fetchText, parseJapanesePrice, resolveUrl, parseJapaneseDateToISO, stripAuthorRole } from "./base.js";
4
4
 
5
5
  const BASE_URL = "https://www.oreilly.co.jp";
6
6
  const EBOOK_LIST_URL = `${BASE_URL}/ebook/`;
@@ -10,22 +10,17 @@ const EBOOK_LIST_URL = `${BASE_URL}/ebook/`;
10
10
  * "2025-04-08" (content属性) はそのまま返す
11
11
  */
12
12
  function parseOreillyDate(text: string): string | undefined {
13
- const jpMatch = text.match(/(\d{4})年(\d{2})月(\d{2})日/);
14
- if (jpMatch) return `${jpMatch[1]}-${jpMatch[2]}-${jpMatch[3]}`;
15
13
  const isoMatch = text.match(/\d{4}-\d{2}-\d{2}/);
16
- return isoMatch ? isoMatch[0] : undefined;
14
+ if (isoMatch) return isoMatch[0];
15
+ return parseJapaneseDateToISO(text);
17
16
  }
18
17
 
19
18
  /**
20
19
  * 著者文字列をパースして配列に変換する。
21
20
  * 例: "Dan Vanderkam 著、今村 謙士 訳" → ["Dan Vanderkam", "今村 謙士"]
22
- * 役割語(著・訳・監訳・監修・編など)を除去する。
23
21
  */
24
22
  function parseAuthors(text: string): string[] {
25
- return text
26
- .split(/[、,]/)
27
- .map(s => s.replace(/[\u3000\s]*(著|訳|監訳|監修|編|他|著訳|著・訳)[\u3000\s]*$/, "").trim())
28
- .filter(Boolean);
23
+ return text.split(/[、,]/).map(stripAuthorRole).filter(Boolean);
29
24
  }
30
25
 
31
26
  export const oreillyJapanAdapter: PublisherAdapter = {
@@ -94,7 +89,7 @@ export const oreillyJapanAdapter: PublisherAdapter = {
94
89
 
95
90
  const titleMain = doc.selectOne("h1[itemprop='name']")?.text().trim() ?? "";
96
91
  const subTitle = doc.selectOne("p.sub_title")?.text()
97
- .replace(/^[\s\u3000―\-]+/, "").trim();
92
+ .replace(/^[\s\u3000―-]+/, "").trim();
98
93
  const title = subTitle ? `${titleMain} ―${subTitle}` : titleMain;
99
94
 
100
95
  const authorText = doc.selectOne("span[itemprop='author']")?.text().trim() ?? "";
@@ -3,6 +3,7 @@ import { bookTechAdapter } from "./book-tech.js";
3
3
  import { bornDigitalAdapter } from "./born-digital.js";
4
4
  import { coronashaAdapter } from "./coronasha.js";
5
5
  import { gihyoAdapter } from "./gihyo.js";
6
+ import { impressBooksAdapter } from "./impress.js";
6
7
  import { lambdanoteAdapter } from "./lambdanote.js";
7
8
  import { manateeAdapter } from "./manatee.js";
8
9
  import { maruzenPublishingAdapter } from "./maruzen-publishing.js";
@@ -21,6 +22,7 @@ export const DEFAULT_PUBLISHERS: readonly PublisherAdapter[] = [
21
22
  bornDigitalAdapter,
22
23
  coronashaAdapter,
23
24
  gihyoAdapter,
25
+ impressBooksAdapter,
24
26
  lambdanoteAdapter,
25
27
  manateeAdapter,
26
28
  maruzenPublishingAdapter,
@@ -1,21 +1,9 @@
1
- import iconv from "iconv-lite";
2
1
  import type { PublisherAdapter, PublisherDeps } from "../../domain/publisher.js";
3
2
  import type { BookRecord, SearchQuery } from "../../domain/book.js";
4
- import { fetchText, parseJapanesePrice } from "./base.js";
3
+ import { fetchText, parseJapanesePrice, encodeEucJp } from "./base.js";
5
4
 
6
5
  const BASE_URL = "https://shop.rutles.net";
7
6
 
8
- /**
9
- * キーワードを EUC-JP でパーセントエンコードする。
10
- * shop.rutles.net は EUC-JP エンコードされたクエリのみ受け付けるため。
11
- */
12
- function encodeEucJp(text: string): string {
13
- const bytes = iconv.encode(text, "euc-jp");
14
- return Array.from(bytes)
15
- .map(b => "%" + b.toString(16).toUpperCase().padStart(2, "0"))
16
- .join("");
17
- }
18
-
19
7
  /**
20
8
  * "著者: 大槻有一郎:著 山田巧(DXライブラリ管理人):監修<br />"
21
9
  * から著者名のリストを取得する。
@@ -1,29 +1,16 @@
1
1
  import type { PublisherAdapter, PublisherDeps } from "../../domain/publisher.js";
2
2
  import type { BookRecord, SearchQuery } from "../../domain/book.js";
3
- import { fetchText, parseJapanesePrice } from "./base.js";
3
+ import { fetchText, parseJapanesePrice, parseJapaneseDateToISO, stripAuthorRole } from "./base.js";
4
4
 
5
5
  const BASE_URL = "https://www.saiensu.co.jp";
6
6
  const SEARCH_URL = `${BASE_URL}/search/`;
7
7
 
8
8
  /**
9
9
  * "堀井俊佑(早稲田大学准教授) 監修" → "堀井俊佑"
10
- * 所属(括弧内)と役割語(著・編・監修など)を除去する。
10
+ * 所属(括弧内)と役割語を除去する。
11
11
  */
12
12
  function parseAuthorName(text: string): string {
13
- return text
14
- .replace(/\(.*?\)/g, "") // (所属) を除去
15
- .replace(/[\u3000\s]*(著|訳|編|監修|監訳|他)[\u3000\s]*$/, "")
16
- .trim();
17
- }
18
-
19
- /**
20
- * "発行日:2026年3月25日" → "2026-03-25"
21
- * 1桁の月・日も対応する。
22
- */
23
- function parseDate(text: string): string | undefined {
24
- const m = text.match(/(\d{4})年(\d{1,2})月(\d{1,2})日/);
25
- if (!m) return undefined;
26
- return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
13
+ return stripAuthorRole(text.replace(/\(.*?\)/g, ""));
27
14
  }
28
15
 
29
16
  /** "ISBN:978-4-7819-9049-1" → "9784781990491" */
@@ -76,7 +63,7 @@ export const saiensuAdapter: PublisherAdapter = {
76
63
  const price = priceText ? parseJapanesePrice(priceText) : undefined;
77
64
 
78
65
  const dateText = article.find(".bookListItemData_publishDate")[0]?.text().trim();
79
- const publishedAt = dateText ? parseDate(dateText) : undefined;
66
+ const publishedAt = dateText ? parseJapaneseDateToISO(dateText) : undefined;
80
67
 
81
68
  const publisherText = article.find(".bookListItemData_publisher")[0]?.text().trim();
82
69
  const publisher = publisherText ? parsePublisher(publisherText) : "サイエンス社";
@@ -124,7 +111,7 @@ export const saiensuAdapter: PublisherAdapter = {
124
111
  const price = priceText ? parseJapanesePrice(priceText) : undefined;
125
112
 
126
113
  const dateText = doc.selectOne(".bookDetail_publishDate")?.text().trim();
127
- const publishedAt = dateText ? parseDate(dateText) : undefined;
114
+ const publishedAt = dateText ? parseJapaneseDateToISO(dateText) : undefined;
128
115
 
129
116
  const publisherText = doc.selectOne(".bookDetail_publisher")?.text().trim();
130
117
  const publisher = publisherText ? parsePublisher(publisherText) : "サイエンス社";
@@ -1,6 +1,6 @@
1
1
  import type { PublisherAdapter, PublisherDeps } from "../../domain/publisher.js";
2
2
  import type { BookRecord, SearchQuery } from "../../domain/book.js";
3
- import { fetchText, parseJapanesePrice, resolveUrl } from "./base.js";
3
+ import { fetchText, resolveUrl } from "./base.js";
4
4
 
5
5
  const BASE_URL = "https://www.seshop.com";
6
6
  const SEARCH_URL = `${BASE_URL}/search`;
@@ -1,5 +1,6 @@
1
1
  import type { PublisherAdapter, PublisherDeps } from "../../domain/publisher.js";
2
2
  import type { BookRecord, SearchQuery, EbookStore } from "../../domain/book.js";
3
+ import type { HtmlDocument } from "../../ports/html-parser.js";
3
4
  import { fetchText, parseJapanesePrice, resolveUrl } from "./base.js";
4
5
 
5
6
  const BASE_URL = "https://tatsu-zine.com";
@@ -15,6 +16,20 @@ function parseAuthors(text: string): string[] {
15
16
  .filter(Boolean);
16
17
  }
17
18
 
19
+ /**
20
+ * ページネーションリンクから最終ページ番号を取得する。
21
+ * <a class="btn-pagination" href="/books?page=11">最後へ</a>
22
+ */
23
+ function detectLastPage(doc: HtmlDocument): number {
24
+ let max = 1;
25
+ for (const a of doc.select("a.btn-pagination")) {
26
+ const href = a.attr("href") ?? "";
27
+ const m = href.match(/[?&]page=(\d+)/);
28
+ if (m) max = Math.max(max, parseInt(m[1], 10));
29
+ }
30
+ return max;
31
+ }
32
+
18
33
  /**
19
34
  * "3,300円 (3,000円+税)" → 3300
20
35
  * 最初の数値が税込価格。
@@ -32,49 +47,59 @@ export const tatsuZineAdapter: PublisherAdapter = {
32
47
  baseUrl: BASE_URL,
33
48
 
34
49
  async search(query: SearchQuery, deps: PublisherDeps): Promise<BookRecord[]> {
35
- const word = [query.title, query.author].filter(Boolean).join(" ");
36
- if (!word) return [];
50
+ // 検索APIがないため書籍一覧からローカルフィルタリングする
51
+ // 著者のみの検索は非対応
52
+ if (!query.title) return [];
37
53
 
38
- // 検索フォーム: <form method="get" action="/books/"><input name="search">
39
- const url = `${BASE_URL}/books/?search=${encodeURIComponent(word)}`;
40
- const html = await fetchText(url, deps);
41
- const doc = deps.parser.parse(html);
54
+ const titleKeyword = query.title.toLowerCase();
55
+ const authorKeyword = query.author?.toLowerCase();
56
+ const limit = query.limit ?? 10;
42
57
 
43
- // 書籍アイテムのHTML構造:
44
- // <a href="/books/{slug}"><img src="/images/books/{id}/cover_s.jpg" alt="Title"></a>
45
- // <h3><a href="/books/{slug}">Title</a></h3>
46
- // <p>Author(), ...</p>
47
- //
48
- // タイトルリンクと著者段落を位置で対応付ける
49
- const titleLinks = doc.select("h3 a[href]").filter(a => {
50
- const href = a.attr("href") ?? "";
51
- return href.startsWith("/books/") && !href.startsWith("/books/pub/");
52
- });
53
- const authorParagraphs = doc.select("h3 + p");
58
+ // 書籍一覧ページ: <article class="book"> が各書籍アイテム、ページネーションあり
59
+ const firstHtml = await fetchText(`${BASE_URL}/books/`, deps);
60
+ const firstDoc = deps.parser.parse(firstHtml);
61
+ const lastPage = detectLastPage(firstDoc);
54
62
 
55
63
  const results: BookRecord[] = [];
64
+ const docs = [[firstHtml, firstDoc] as const];
65
+
66
+ // ページ2以降を先行して取得しておく(キャッシュ経由)
67
+ for (let page = 2; page <= lastPage; page++) {
68
+ const html = await fetchText(`${BASE_URL}/books?page=${page}`, deps);
69
+ docs.push([html, deps.parser.parse(html)]);
70
+ }
56
71
 
57
- for (let i = 0; i < titleLinks.length; i++) {
58
- const titleLink = titleLinks[i];
59
- const title = titleLink.text().trim();
60
- const href = titleLink.attr("href");
61
- if (!title || !href) continue;
62
-
63
- const bookUrl = resolveUrl(BASE_URL, href);
64
- const authorText = authorParagraphs[i]?.text().trim() ?? "";
65
- const authors = authorText ? parseAuthors(authorText) : [];
66
-
67
- results.push({
68
- title,
69
- authors,
70
- publisher: "達人出版会",
71
- url: bookUrl,
72
- // 達人出版会は全書籍で購入者情報を各ページに印字 (ソーシャルDRM)
73
- ebookStores: [{ name: "達人出版会", url: bookUrl, drm: "social" }],
74
- });
72
+ outer: for (const [, doc] of docs) {
73
+ for (const article of doc.select("article.book")) {
74
+ const titleEl = article.find("h3[itemprop='name'] a")[0];
75
+ if (!titleEl) continue;
76
+
77
+ const title = titleEl.text().trim();
78
+ if (!title.toLowerCase().includes(titleKeyword)) continue;
79
+
80
+ const authorText = article.find("p[itemprop='author']")[0]?.text().trim() ?? "";
81
+ if (authorKeyword && !authorText.toLowerCase().includes(authorKeyword)) continue;
82
+
83
+ const href = titleEl.attr("href");
84
+ if (!href) continue;
85
+ const bookUrl = resolveUrl(BASE_URL, href);
86
+
87
+ const authors = authorText ? parseAuthors(authorText) : [];
88
+
89
+ results.push({
90
+ title,
91
+ authors,
92
+ publisher: "達人出版会",
93
+ url: bookUrl,
94
+ // 達人出版会は全書籍で購入者情報を各ページに印字 (ソーシャルDRM)
95
+ ebookStores: [{ name: "達人出版会", url: bookUrl, drm: "social" }],
96
+ });
97
+
98
+ if (results.length >= limit) break outer;
99
+ }
75
100
  }
76
101
 
77
- return results.slice(0, query.limit ?? 10);
102
+ return results;
78
103
  },
79
104
 
80
105
  async getDetail(url: string, deps: PublisherDeps): Promise<BookRecord> {
@@ -1,5 +1,6 @@
1
1
  import type { BookRecord } from "../domain/book.js";
2
2
  import type { PublisherAdapter, PublisherDeps } from "../domain/publisher.js";
3
+ import { checkRobotsTxt } from "../adapters/publishers/base.js";
3
4
 
4
5
  export async function getBookDetail(
5
6
  url: string,
@@ -13,5 +14,11 @@ export async function getBookDetail(
13
14
  `対応URL: ${publishers.map(p => p.baseUrl).join(", ")}`,
14
15
  );
15
16
  }
17
+
18
+ const allowed = await checkRobotsTxt(url, deps);
19
+ if (!allowed) {
20
+ throw new Error(`robots.txt によりアクセスが禁止されています: ${url}`);
21
+ }
22
+
16
23
  return publisher.getDetail(url, deps);
17
24
  }