@zonuexe/techbook-mcp 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -1
- package/README.md +39 -20
- package/dist/adapters/publishers/google-books.d.ts +4 -0
- package/dist/adapters/publishers/google-books.d.ts.map +1 -0
- package/dist/adapters/publishers/google-books.js +75 -0
- package/dist/adapters/publishers/google-books.js.map +1 -0
- package/dist/adapters/publishers/isbn-publisher-codes.d.ts +21 -0
- package/dist/adapters/publishers/isbn-publisher-codes.d.ts.map +1 -0
- package/dist/adapters/publishers/isbn-publisher-codes.js +49 -0
- package/dist/adapters/publishers/isbn-publisher-codes.js.map +1 -0
- package/dist/adapters/publishers/juse-p.d.ts +3 -0
- package/dist/adapters/publishers/juse-p.d.ts.map +1 -0
- package/dist/adapters/publishers/juse-p.js +110 -0
- package/dist/adapters/publishers/juse-p.js.map +1 -0
- package/dist/adapters/publishers/registry.d.ts.map +1 -1
- package/dist/adapters/publishers/registry.js +4 -0
- package/dist/adapters/publishers/registry.js.map +1 -1
- package/dist/application/get-book-by-isbn.d.ts +3 -2
- package/dist/application/get-book-by-isbn.d.ts.map +1 -1
- package/dist/application/get-book-by-isbn.js +22 -3
- package/dist/application/get-book-by-isbn.js.map +1 -1
- package/dist/config/credentials.d.ts +8 -0
- package/dist/config/credentials.d.ts.map +1 -0
- package/dist/config/credentials.js +32 -0
- package/dist/config/credentials.js.map +1 -0
- package/dist/main.js +15 -1
- package/dist/main.js.map +1 -1
- package/dist/setup.d.ts +2 -0
- package/dist/setup.d.ts.map +1 -0
- package/dist/setup.js +43 -0
- package/dist/setup.js.map +1 -0
- package/flake.lock +61 -0
- package/package.json +1 -1
- package/.claude/settings.local.json +0 -38
- package/.codex/skills/techbook-mcp-release-prep/SKILL.md +0 -105
- package/.github/workflows/test.yml +0 -72
- package/.oxlintrc.json +0 -12
- package/AGENTS.md +0 -100
- package/deno.json +0 -3
- package/src/adapters/cache/memory-cache.ts +0 -31
- package/src/adapters/cache/null-cache.ts +0 -8
- package/src/adapters/calil.ts +0 -57
- package/src/adapters/html/cheerio-parser.ts +0 -50
- package/src/adapters/http/fetch-client.ts +0 -47
- package/src/adapters/http/mock-client.ts +0 -77
- package/src/adapters/openbd.ts +0 -142
- package/src/adapters/publishers/base.ts +0 -279
- package/src/adapters/publishers/book-tech.ts +0 -117
- package/src/adapters/publishers/born-digital.ts +0 -143
- package/src/adapters/publishers/coronasha.ts +0 -139
- package/src/adapters/publishers/gihyo.ts +0 -120
- package/src/adapters/publishers/impress.ts +0 -103
- package/src/adapters/publishers/lambdanote.ts +0 -146
- package/src/adapters/publishers/manatee.ts +0 -113
- package/src/adapters/publishers/maruzen-publishing.ts +0 -129
- package/src/adapters/publishers/optronics.ts +0 -113
- package/src/adapters/publishers/oreilly-japan.ts +0 -133
- package/src/adapters/publishers/peaks.ts +0 -98
- package/src/adapters/publishers/personal-media.ts +0 -168
- package/src/adapters/publishers/registry.ts +0 -38
- package/src/adapters/publishers/rutles.ts +0 -149
- package/src/adapters/publishers/saiensu.ts +0 -136
- package/src/adapters/publishers/seshop.ts +0 -121
- package/src/adapters/publishers/tatsu-zine.ts +0 -142
- package/src/adapters/publishers/techbookfest.ts +0 -179
- package/src/application/get-book-by-isbn.ts +0 -50
- package/src/application/get-book-detail.ts +0 -40
- package/src/application/search-books.ts +0 -64
- package/src/domain/book.ts +0 -35
- package/src/domain/publisher.ts +0 -18
- package/src/main.ts +0 -14
- package/src/mcp/server.ts +0 -113
- package/src/mcp/tools.ts +0 -71
- package/src/ports/cache.ts +0 -5
- package/src/ports/html-parser.ts +0 -15
- package/src/ports/http.ts +0 -17
- package/tests/fixtures/book-tech-detail.html +0 -51
- package/tests/fixtures/book-tech-search.html +0 -91
- package/tests/fixtures/born-digital-detail.html +0 -62
- package/tests/fixtures/born-digital-search.html +0 -51
- package/tests/fixtures/calil-book.html +0 -987
- package/tests/fixtures/coronasha-detail.html +0 -41
- package/tests/fixtures/coronasha-search.html +0 -61
- package/tests/fixtures/gihyo-detail.html +0 -42
- package/tests/fixtures/gihyo-search.json +0 -54
- package/tests/fixtures/impress-detail-epub.html +0 -746
- package/tests/fixtures/impress-detail-social.html +0 -689
- package/tests/fixtures/lambdanote-search.html +0 -66
- package/tests/fixtures/manatee-detail.html +0 -53
- package/tests/fixtures/manatee-search.html +0 -59
- package/tests/fixtures/maruzen-detail.html +0 -51
- package/tests/fixtures/maruzen-search.html +0 -60
- package/tests/fixtures/openbd-response.json +0 -110
- package/tests/fixtures/optronics-detail.html +0 -30
- package/tests/fixtures/optronics-search.html +0 -75
- package/tests/fixtures/oreilly-detail.html +0 -52
- package/tests/fixtures/oreilly-ebook-list.html +0 -53
- package/tests/fixtures/peaks-detail.html +0 -39
- package/tests/fixtures/peaks-top.html +0 -50
- package/tests/fixtures/personal-media-detail.html +0 -32
- package/tests/fixtures/personal-media-search.html +0 -39
- package/tests/fixtures/rutles-detail.html +0 -32
- package/tests/fixtures/rutles-search.html +0 -62
- package/tests/fixtures/saiensu-detail.html +0 -41
- package/tests/fixtures/saiensu-search.html +0 -65
- package/tests/fixtures/seshop-detail.html +0 -45
- package/tests/fixtures/seshop-search.html +0 -58
- package/tests/fixtures/tatsu-zine-detail-free.html +0 -24
- package/tests/fixtures/tatsu-zine-search.html +0 -40
- package/tests/fixtures/techbookfest-search.json +0 -73
- package/tests/unit/adapters/base.test.ts +0 -441
- package/tests/unit/adapters/calil.test.ts +0 -69
- package/tests/unit/adapters/openbd.test.ts +0 -185
- package/tests/unit/adapters/publishers/book-tech.test.ts +0 -186
- package/tests/unit/adapters/publishers/born-digital.test.ts +0 -194
- package/tests/unit/adapters/publishers/coronasha.test.ts +0 -207
- package/tests/unit/adapters/publishers/gihyo.test.ts +0 -137
- package/tests/unit/adapters/publishers/impress.test.ts +0 -129
- package/tests/unit/adapters/publishers/lambdanote.test.ts +0 -85
- package/tests/unit/adapters/publishers/manatee.test.ts +0 -165
- package/tests/unit/adapters/publishers/maruzen-publishing.test.ts +0 -179
- package/tests/unit/adapters/publishers/optronics.test.ts +0 -208
- package/tests/unit/adapters/publishers/oreilly-japan.test.ts +0 -194
- package/tests/unit/adapters/publishers/peaks.test.ts +0 -177
- package/tests/unit/adapters/publishers/personal-media.test.ts +0 -199
- package/tests/unit/adapters/publishers/rutles.test.ts +0 -173
- package/tests/unit/adapters/publishers/saiensu.test.ts +0 -169
- package/tests/unit/adapters/publishers/seshop.test.ts +0 -174
- package/tests/unit/adapters/publishers/tatsu-zine.test.ts +0 -172
- package/tests/unit/adapters/publishers/techbookfest.test.ts +0 -94
- package/tests/unit/adapters/registry.test.ts +0 -37
- package/tests/unit/application/get-book-by-isbn.test.ts +0 -176
- package/tests/unit/application/get-book-detail.test.ts +0 -102
- package/tests/unit/application/search-books.test.ts +0 -137
- package/tsconfig.json +0 -17
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
import type { HttpClient, RequestOptions, HttpResponse } from "../../ports/http.js";
|
|
2
|
-
|
|
3
|
-
export interface MockResponseData {
|
|
4
|
-
status: number;
|
|
5
|
-
body: string;
|
|
6
|
-
headers?: Record<string, string>;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
class MockHttpResponse implements HttpResponse {
|
|
10
|
-
constructor(
|
|
11
|
-
private readonly data: MockResponseData,
|
|
12
|
-
private readonly requestUrl: string,
|
|
13
|
-
) {}
|
|
14
|
-
|
|
15
|
-
get status(): number { return this.data.status; }
|
|
16
|
-
get url(): string { return this.requestUrl; }
|
|
17
|
-
async text(): Promise<string> { return this.data.body; }
|
|
18
|
-
header(name: string): string | null {
|
|
19
|
-
return this.data.headers?.[name.toLowerCase()] ?? null;
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
export class MockHttpClient implements HttpClient {
|
|
24
|
-
private readonly handlers = new Map<string, MockResponseData>();
|
|
25
|
-
private readonly postHandlers = new Map<string, MockResponseData>();
|
|
26
|
-
private readonly _calls: string[] = [];
|
|
27
|
-
|
|
28
|
-
/** GET: URL の前方一致でレスポンスを登録する */
|
|
29
|
-
addResponse(urlPrefix: string, data: MockResponseData): this {
|
|
30
|
-
this.handlers.set(urlPrefix, data);
|
|
31
|
-
return this;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/** POST: URL の前方一致でレスポンスを登録する */
|
|
35
|
-
addPostResponse(urlPrefix: string, data: MockResponseData): this {
|
|
36
|
-
this.postHandlers.set(urlPrefix, data);
|
|
37
|
-
return this;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
get calls(): readonly string[] {
|
|
41
|
-
return this._calls;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
async get(url: string, _options?: RequestOptions): Promise<HttpResponse> {
|
|
45
|
-
this._calls.push(url);
|
|
46
|
-
|
|
47
|
-
// 完全一致を優先
|
|
48
|
-
if (this.handlers.has(url)) {
|
|
49
|
-
return new MockHttpResponse(this.handlers.get(url)!, url);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
// 前方一致
|
|
53
|
-
for (const [prefix, data] of this.handlers) {
|
|
54
|
-
if (url.startsWith(prefix)) {
|
|
55
|
-
return new MockHttpResponse(data, url);
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
throw new Error(`MockHttpClient: no handler for GET: ${url}`);
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
async post(url: string, _body: string, _options?: RequestOptions): Promise<HttpResponse> {
|
|
63
|
-
this._calls.push(url);
|
|
64
|
-
|
|
65
|
-
if (this.postHandlers.has(url)) {
|
|
66
|
-
return new MockHttpResponse(this.postHandlers.get(url)!, url);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
for (const [prefix, data] of this.postHandlers) {
|
|
70
|
-
if (url.startsWith(prefix)) {
|
|
71
|
-
return new MockHttpResponse(data, url);
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
throw new Error(`MockHttpClient: no handler for POST: ${url}`);
|
|
76
|
-
}
|
|
77
|
-
}
|
package/src/adapters/openbd.ts
DELETED
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
import type { PublisherDeps } from "../domain/publisher.js";
|
|
2
|
-
import type { BookRecord } from "../domain/book.js";
|
|
3
|
-
import { fetchText } from "./publishers/base.js";
|
|
4
|
-
|
|
5
|
-
const OPENBD_API_URL = "https://api.openbd.jp/v1/get";
|
|
6
|
-
|
|
7
|
-
// --- 型定義 ---
|
|
8
|
-
|
|
9
|
-
interface OpenBDSummary {
|
|
10
|
-
isbn: string;
|
|
11
|
-
title: string;
|
|
12
|
-
publisher: string;
|
|
13
|
-
pubdate: string; // "YYYYMMDD"
|
|
14
|
-
cover: string; // "https://cover.openbd.jp/{isbn}.jpg"
|
|
15
|
-
author: string;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
interface OpenBDTextContent {
|
|
19
|
-
TextType: string; // "02": 短い説明, "03": 説明文, "04": 目次
|
|
20
|
-
ContentAudience: string;
|
|
21
|
-
Text: string;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
interface OpenBDPrice {
|
|
25
|
-
PriceType: string; // "03": 税込定価
|
|
26
|
-
PriceAmount: string;
|
|
27
|
-
CurrencyCode: string;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
interface OpenBDHanmoto {
|
|
31
|
-
isbn: string;
|
|
32
|
-
storelink?: string;
|
|
33
|
-
[key: string]: unknown;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export interface OpenBDEntry {
|
|
37
|
-
summary: OpenBDSummary;
|
|
38
|
-
hanmoto?: OpenBDHanmoto;
|
|
39
|
-
onix: {
|
|
40
|
-
CollateralDetail?: {
|
|
41
|
-
TextContent?: OpenBDTextContent[];
|
|
42
|
-
};
|
|
43
|
-
ProductSupply?: {
|
|
44
|
-
SupplyDetail?: {
|
|
45
|
-
Price?: OpenBDPrice[];
|
|
46
|
-
};
|
|
47
|
-
};
|
|
48
|
-
};
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// --- ユーティリティ ---
|
|
52
|
-
|
|
53
|
-
function parsePubDate(pubdate: string): string | undefined {
|
|
54
|
-
if (!pubdate || pubdate.length < 8) return undefined;
|
|
55
|
-
return `${pubdate.slice(0, 4)}-${pubdate.slice(4, 6)}-${pubdate.slice(6, 8)}`;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function findTextByType(entry: OpenBDEntry, ...types: string[]): string | undefined {
|
|
59
|
-
const texts = entry.onix.CollateralDetail?.TextContent;
|
|
60
|
-
if (!texts) return undefined;
|
|
61
|
-
for (const type of types) {
|
|
62
|
-
const found = texts.find(t => t.TextType === type);
|
|
63
|
-
if (found?.Text) return found.Text;
|
|
64
|
-
}
|
|
65
|
-
return undefined;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
function getTaxIncludedPrice(entry: OpenBDEntry): number | undefined {
|
|
69
|
-
const prices = entry.onix.ProductSupply?.SupplyDetail?.Price;
|
|
70
|
-
if (!prices) return undefined;
|
|
71
|
-
// PriceType "03" = 税込定価
|
|
72
|
-
const price = prices.find(p => p.PriceType === "03");
|
|
73
|
-
if (!price) return undefined;
|
|
74
|
-
const amount = parseInt(price.PriceAmount, 10);
|
|
75
|
-
return isNaN(amount) ? undefined : amount;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// --- 公開API ---
|
|
79
|
-
|
|
80
|
-
/**
|
|
81
|
-
* openBD API から複数ISBNの書誌情報を一括取得する。
|
|
82
|
-
* @returns ISBNをキーとするMapを返す。該当なし・取得失敗のISBNは含まれない。
|
|
83
|
-
*/
|
|
84
|
-
export async function fetchOpenBDBooks(
|
|
85
|
-
isbns: string[],
|
|
86
|
-
deps: PublisherDeps,
|
|
87
|
-
): Promise<Map<string, OpenBDEntry>> {
|
|
88
|
-
if (isbns.length === 0) return new Map();
|
|
89
|
-
|
|
90
|
-
const url = `${OPENBD_API_URL}?isbn=${isbns.join(",")}`;
|
|
91
|
-
const text = await fetchText(url, deps);
|
|
92
|
-
const data: (OpenBDEntry | null)[] = JSON.parse(text);
|
|
93
|
-
|
|
94
|
-
const result = new Map<string, OpenBDEntry>();
|
|
95
|
-
for (let i = 0; i < isbns.length; i++) {
|
|
96
|
-
const entry = data[i];
|
|
97
|
-
if (entry !== null && entry !== undefined) {
|
|
98
|
-
result.set(isbns[i], entry);
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
return result;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/**
|
|
105
|
-
* openBD エントリを BookRecord に変換する。
|
|
106
|
-
* 出版社サイトから取得できない場合のフォールバック用。
|
|
107
|
-
* url には hanmoto.storelink を使用し、なければ openBD API URL を使用する。
|
|
108
|
-
*/
|
|
109
|
-
export function openBDEntryToBookRecord(entry: OpenBDEntry): BookRecord {
|
|
110
|
-
const { summary } = entry;
|
|
111
|
-
const storelink = entry.hanmoto?.storelink;
|
|
112
|
-
|
|
113
|
-
const authors = summary.author
|
|
114
|
-
? summary.author.split(/[\//、,,]/).map(a => a.trim()).filter(Boolean)
|
|
115
|
-
: [];
|
|
116
|
-
|
|
117
|
-
return {
|
|
118
|
-
title: summary.title,
|
|
119
|
-
authors,
|
|
120
|
-
publisher: summary.publisher,
|
|
121
|
-
isbn: summary.isbn,
|
|
122
|
-
publishedAt: parsePubDate(summary.pubdate),
|
|
123
|
-
url: storelink ?? `https://api.openbd.jp/v1/get?isbn=${summary.isbn}`,
|
|
124
|
-
price: getTaxIncludedPrice(entry),
|
|
125
|
-
coverImageUrl: summary.cover || undefined,
|
|
126
|
-
description: findTextByType(entry, "03", "02"),
|
|
127
|
-
};
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
/**
|
|
131
|
-
* openBD の書誌情報で BookRecord の欠損フィールドを補完する。
|
|
132
|
-
* 既存のフィールドは上書きしない。
|
|
133
|
-
*/
|
|
134
|
-
export function enrichWithOpenBD(book: BookRecord, entry: OpenBDEntry): BookRecord {
|
|
135
|
-
return {
|
|
136
|
-
...book,
|
|
137
|
-
publishedAt: book.publishedAt ?? parsePubDate(entry.summary.pubdate),
|
|
138
|
-
price: book.price ?? getTaxIncludedPrice(entry),
|
|
139
|
-
coverImageUrl: book.coverImageUrl ?? (entry.summary.cover || undefined),
|
|
140
|
-
description: book.description ?? findTextByType(entry, "03", "02"),
|
|
141
|
-
};
|
|
142
|
-
}
|
|
@@ -1,279 +0,0 @@
|
|
|
1
|
-
import iconv from "iconv-lite";
|
|
2
|
-
import type { PublisherDeps } from "../../domain/publisher.js";
|
|
3
|
-
import type { EbookStore, DrmType } from "../../domain/book.js";
|
|
4
|
-
import type { HtmlDocument } from "../../ports/html-parser.js";
|
|
5
|
-
|
|
6
|
-
const DEFAULT_HEADERS = {
|
|
7
|
-
"User-Agent": "techbook-mcp/0.1.0 (+https://github.com/zonuexe/techbook-mcp; bibliographic search bot)",
|
|
8
|
-
"Accept": "text/html,application/xhtml+xml,application/json",
|
|
9
|
-
"Accept-Language": "ja,en;q=0.9",
|
|
10
|
-
};
|
|
11
|
-
|
|
12
|
-
export const CACHE_TTL_SECONDS = 3600; // 1時間
|
|
13
|
-
export const ROBOTS_CACHE_TTL_SECONDS = 6 * 3600; // 6時間
|
|
14
|
-
|
|
15
|
-
// --- robots.txt チェック ---
|
|
16
|
-
|
|
17
|
-
/** robots.txt の1ルール */
|
|
18
|
-
interface RobotsRule {
|
|
19
|
-
type: "allow" | "disallow";
|
|
20
|
-
path: string;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
/** robots.txt のユーザーエージェントセクション */
|
|
24
|
-
interface RobotsSection {
|
|
25
|
-
agents: string[];
|
|
26
|
-
rules: RobotsRule[];
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
/** robots.txt をパースしてセクション一覧を返す */
|
|
30
|
-
function parseRobotsTxt(content: string): RobotsSection[] {
|
|
31
|
-
const sections: RobotsSection[] = [];
|
|
32
|
-
let current: RobotsSection | null = null;
|
|
33
|
-
let inAgentBlock = true;
|
|
34
|
-
|
|
35
|
-
for (const rawLine of content.split(/\r?\n/)) {
|
|
36
|
-
const trimmedRaw = rawLine.trim();
|
|
37
|
-
// 空行(コメント行ではない)のみセクションをリセット
|
|
38
|
-
if (!trimmedRaw || trimmedRaw.startsWith("#")) {
|
|
39
|
-
if (!trimmedRaw) {
|
|
40
|
-
current = null;
|
|
41
|
-
inAgentBlock = true;
|
|
42
|
-
}
|
|
43
|
-
continue;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
const line = trimmedRaw.split("#")[0].trim();
|
|
47
|
-
if (!line) continue;
|
|
48
|
-
|
|
49
|
-
const colonIdx = line.indexOf(":");
|
|
50
|
-
if (colonIdx === -1) continue;
|
|
51
|
-
|
|
52
|
-
const key = line.slice(0, colonIdx).trim().toLowerCase();
|
|
53
|
-
const value = line.slice(colonIdx + 1).trim();
|
|
54
|
-
|
|
55
|
-
if (key === "user-agent") {
|
|
56
|
-
if (inAgentBlock && current !== null) {
|
|
57
|
-
// 同じセクションに複数のUser-agent行
|
|
58
|
-
current.agents.push(value.toLowerCase());
|
|
59
|
-
} else {
|
|
60
|
-
// 新しいセクション開始
|
|
61
|
-
current = { agents: [value.toLowerCase()], rules: [] };
|
|
62
|
-
sections.push(current);
|
|
63
|
-
inAgentBlock = true;
|
|
64
|
-
}
|
|
65
|
-
} else if (current !== null && (key === "allow" || key === "disallow")) {
|
|
66
|
-
inAgentBlock = false;
|
|
67
|
-
current.rules.push({ type: key, path: value });
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
return sections;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
/** 指定ユーザーエージェントに適用されるルールを返す(固有エージェント優先、なければ * にフォールバック) */
|
|
75
|
-
function getRulesForAgent(sections: RobotsSection[], agentToken: string): RobotsRule[] {
|
|
76
|
-
const lower = agentToken.toLowerCase();
|
|
77
|
-
|
|
78
|
-
for (const section of sections) {
|
|
79
|
-
if (section.agents.includes(lower)) return section.rules;
|
|
80
|
-
}
|
|
81
|
-
for (const section of sections) {
|
|
82
|
-
if (section.agents.includes("*")) return section.rules;
|
|
83
|
-
}
|
|
84
|
-
return [];
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
/** パスがルール一覧で許可されているか判定する(最長プレフィックス一致) */
|
|
88
|
-
function isPathAllowed(path: string, rules: RobotsRule[]): boolean {
|
|
89
|
-
let bestMatch = { length: -1, allowed: true };
|
|
90
|
-
|
|
91
|
-
for (const rule of rules) {
|
|
92
|
-
if (!rule.path) continue; // 空の Disallow は「全許可」を意味するが不一致として扱う
|
|
93
|
-
|
|
94
|
-
if (path.startsWith(rule.path) && rule.path.length > bestMatch.length) {
|
|
95
|
-
bestMatch = { length: rule.path.length, allowed: rule.type === "allow" };
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
return bestMatch.allowed;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* 指定URLのオリジンの robots.txt を取得してアクセス可否を返す。
|
|
104
|
-
* 取得結果は6時間キャッシュする。エラー時はアクセスを許可する(fail-open)。
|
|
105
|
-
*/
|
|
106
|
-
export async function checkRobotsTxt(url: string, deps: PublisherDeps): Promise<boolean> {
|
|
107
|
-
const parsed = new URL(url);
|
|
108
|
-
const origin = `${parsed.protocol}//${parsed.host}`;
|
|
109
|
-
const cacheKey = `robots:${origin}`;
|
|
110
|
-
|
|
111
|
-
let content: string;
|
|
112
|
-
const cached = await deps.cache.get(cacheKey);
|
|
113
|
-
|
|
114
|
-
if (cached !== null) {
|
|
115
|
-
content = cached;
|
|
116
|
-
} else {
|
|
117
|
-
try {
|
|
118
|
-
const response = await deps.http.get(`${origin}/robots.txt`, { headers: DEFAULT_HEADERS });
|
|
119
|
-
content = response.status === 200 ? await response.text() : "";
|
|
120
|
-
} catch {
|
|
121
|
-
// robots.txt 取得失敗時はアクセスを許可する
|
|
122
|
-
content = "";
|
|
123
|
-
}
|
|
124
|
-
await deps.cache.set(cacheKey, content, ROBOTS_CACHE_TTL_SECONDS);
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
if (!content) return true;
|
|
128
|
-
|
|
129
|
-
const sections = parseRobotsTxt(content);
|
|
130
|
-
const rules = getRulesForAgent(sections, "techbook-mcp");
|
|
131
|
-
return isPathAllowed(parsed.pathname + parsed.search, rules);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
export async function fetchText(
|
|
135
|
-
url: string,
|
|
136
|
-
deps: PublisherDeps,
|
|
137
|
-
extraHeaders?: Record<string, string>,
|
|
138
|
-
): Promise<string> {
|
|
139
|
-
const cached = await deps.cache.get(url);
|
|
140
|
-
if (cached !== null) return cached;
|
|
141
|
-
|
|
142
|
-
const headers = extraHeaders
|
|
143
|
-
? { ...DEFAULT_HEADERS, ...extraHeaders }
|
|
144
|
-
: DEFAULT_HEADERS;
|
|
145
|
-
|
|
146
|
-
const response = await deps.http.get(url, { headers });
|
|
147
|
-
if (response.status !== 200) {
|
|
148
|
-
throw new Error(`HTTP ${response.status}: ${url}`);
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
const text = await response.text();
|
|
152
|
-
await deps.cache.set(url, text, CACHE_TTL_SECONDS);
|
|
153
|
-
return text;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
/** HTMLタグを除去する(gihyo APIのauthorフィールドのruby markup除去に使用) */
|
|
157
|
-
export function stripHtmlTags(html: string): string {
|
|
158
|
-
return html.replace(/<[^>]+>/g, "");
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
/**
|
|
162
|
-
* キーワードを EUC-JP でパーセントエンコードする。
|
|
163
|
-
* born-digital・rutles など EUC-JP エンコードのみ受け付けるサイト向け。
|
|
164
|
-
*/
|
|
165
|
-
export function encodeEucJp(text: string): string {
|
|
166
|
-
const bytes = iconv.encode(text, "euc-jp");
|
|
167
|
-
return Array.from(bytes)
|
|
168
|
-
.map(b => "%" + b.toString(16).toUpperCase().padStart(2, "0"))
|
|
169
|
-
.join("");
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
/**
|
|
173
|
-
* "2026年3月25日" → "2026-03-25"
|
|
174
|
-
* 1桁の月・日も対応する。
|
|
175
|
-
*/
|
|
176
|
-
export function parseJapaneseDateToISO(text: string): string | undefined {
|
|
177
|
-
const m = text.match(/(\d{4})年(\d{1,2})月(\d{1,2})日/);
|
|
178
|
-
if (!m) return undefined;
|
|
179
|
-
return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
/**
|
|
183
|
-
* 著者名末尾の役割語(著・訳・編・監修・監訳など)を除去して名前だけを返す。
|
|
184
|
-
* 例: "Dan Vanderkam 著" → "Dan Vanderkam"
|
|
185
|
-
*/
|
|
186
|
-
export function stripAuthorRole(name: string): string {
|
|
187
|
-
return name.replace(/[\u3000\s]*(著|訳|編|監修|監訳|著訳|著・訳|他)[\u3000\s]*$/, "").trim();
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
/** "¥3,960" や "3,300円(税込)" などから整数値を取り出す */
|
|
191
|
-
export function parseJapanesePrice(text: string): number | undefined {
|
|
192
|
-
const match = text.match(/[\d,]+/);
|
|
193
|
-
if (!match) return undefined;
|
|
194
|
-
return parseInt(match[0].replace(/,/g, ""), 10);
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
/** 相対URLを絶対URLに解決する */
|
|
198
|
-
export function resolveUrl(base: string, path: string): string {
|
|
199
|
-
return new URL(path, base).toString();
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
/**
|
|
203
|
-
* HTMLテキストから Amazon ASIN を抽出する。
|
|
204
|
-
* amazon.co.jp/dp/{ASIN}, /gp/product/{ASIN}, /o/ASIN/{ASIN} 形式に対応。
|
|
205
|
-
*/
|
|
206
|
-
export function extractAsin(html: string): string | undefined {
|
|
207
|
-
const match = html.match(/amazon\.co\.jp\/(?:dp|gp\/product|o\/ASIN)\/([A-Z0-9]{10})/);
|
|
208
|
-
return match?.[1];
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
// --- 電子書籍ストア分類 ---
|
|
212
|
-
|
|
213
|
-
interface StorePattern {
|
|
214
|
-
pattern: RegExp;
|
|
215
|
-
name: string;
|
|
216
|
-
drm: DrmType;
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
const EBOOK_STORE_PATTERNS: StorePattern[] = [
|
|
220
|
-
// DRM-free
|
|
221
|
-
{ pattern: /techbookfest\.org\/product\//, name: "技術書典", drm: "free" },
|
|
222
|
-
{ pattern: /oreilly\.co\.jp\/books\//, name: "オライリー・ジャパン", drm: "free" },
|
|
223
|
-
{ pattern: /shop\.rutles\.net\//, name: "ラトルズ", drm: "free" },
|
|
224
|
-
{ pattern: /peaks\.cc\/books\//, name: "PEAKS", drm: "free" },
|
|
225
|
-
{ pattern: /optronics-ebook\.com\/products\//, name: "オプトロニクス社", drm: "free" },
|
|
226
|
-
{ pattern: /gihyo\.jp\/dp\/ebook\//, name: "Gihyo Digital Publishing", drm: "social" },
|
|
227
|
-
{ pattern: /seshop\.com\/product\//, name: "SEshop", drm: "social" },
|
|
228
|
-
{ pattern: /book-tech\.com\/books\//, name: "BOOK TECH", drm: "social" },
|
|
229
|
-
{ pattern: /wgn-obs\.shop-pro\.jp\/\?pid=/, name: "ボーンデジタル", drm: "social" },
|
|
230
|
-
// ソーシャルDRM (購入時生成IDまたは購入者情報を透かし刻印、技術的制限なし)
|
|
231
|
-
{ pattern: /book\.mynavi\.jp\/manatee\//, name: "マナティ", drm: "social" },
|
|
232
|
-
{ pattern: /www\.lambdanote\.com\/products\//, name: "ラムダノート", drm: "social" },
|
|
233
|
-
{ pattern: /tatsu-zine\.com\/books\/(?!pub\/)/, name: "達人出版会", drm: "social" },
|
|
234
|
-
// ソーシャルDRM (購入者情報透かし入りPDF、技術的制限なし)
|
|
235
|
-
{ pattern: /book\.impress\.co\.jp\/books\//, name: "インプレスブックス", drm: "social" },
|
|
236
|
-
// DRM-attached
|
|
237
|
-
{ pattern: /saiensu\.co\.jp/, name: "サイエンス社", drm: "password_pdf" },
|
|
238
|
-
{ pattern: /amazon\.co\.jp/, name: "Kindle", drm: "drm" },
|
|
239
|
-
{ pattern: /kinokuniya\.co\.jp\/(?:kinoppystore|f\/dsg-08)/, name: "Kinoppy", drm: "drm" },
|
|
240
|
-
{ pattern: /coop-ebook\.jp\/mem\//, name: "VarsityWave eBooks", drm: "drm" },
|
|
241
|
-
{ pattern: /books\.rakuten\.co\.jp|rakuten\.kobo\.com|kobo\.com/, name: "楽天Kobo", drm: "drm" },
|
|
242
|
-
{ pattern: /booklive\.jp/, name: "BookLive", drm: "drm" },
|
|
243
|
-
{ pattern: /honto\.jp/, name: "honto", drm: "drm" },
|
|
244
|
-
{ pattern: /bookwalker\.jp/, name: "BOOK☆WALKER", drm: "drm" },
|
|
245
|
-
{ pattern: /ebookjapan\.yahoo\.co\.jp/, name: "eBookJapan", drm: "drm" },
|
|
246
|
-
{ pattern: /store\.line\.me/, name: "LINEマンガ", drm: "drm" },
|
|
247
|
-
];
|
|
248
|
-
|
|
249
|
-
/** URLから電子書籍ストア情報を返す。未知のストアは null。 */
|
|
250
|
-
export function classifyEbookStore(url: string): EbookStore | null {
|
|
251
|
-
for (const { pattern, name, drm } of EBOOK_STORE_PATTERNS) {
|
|
252
|
-
if (pattern.test(url)) {
|
|
253
|
-
return { name, url, drm };
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
return null;
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
/**
|
|
260
|
-
* HTMLドキュメント内の全リンクを走査して電子書籍ストアを抽出する。
|
|
261
|
-
* 同一ストアのURLが複数あれば最初の1件のみ返す。
|
|
262
|
-
*/
|
|
263
|
-
export function extractEbookStoresFromDoc(doc: HtmlDocument): EbookStore[] {
|
|
264
|
-
const stores: EbookStore[] = [];
|
|
265
|
-
const seenNames = new Set<string>();
|
|
266
|
-
|
|
267
|
-
for (const link of doc.select("a[href]")) {
|
|
268
|
-
const href = link.attr("href");
|
|
269
|
-
if (!href) continue;
|
|
270
|
-
|
|
271
|
-
const store = classifyEbookStore(href);
|
|
272
|
-
if (store && !seenNames.has(store.name)) {
|
|
273
|
-
seenNames.add(store.name);
|
|
274
|
-
stores.push(store);
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
return stores;
|
|
279
|
-
}
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
import type { PublisherAdapter, PublisherDeps } from "../../domain/publisher.js";
|
|
2
|
-
import type { BookRecord, SearchQuery } from "../../domain/book.js";
|
|
3
|
-
import { fetchText, parseJapanesePrice, resolveUrl } from "./base.js";
|
|
4
|
-
|
|
5
|
-
const BASE_URL = "https://book-tech.com";
|
|
6
|
-
const SEARCH_URL = `${BASE_URL}/books`;
|
|
7
|
-
// クエリパラメータキー(URLエンコード済み)
|
|
8
|
-
const SEARCH_PARAM = "q%5Btitle_or_overview_or_identification_number_1_or_product_code_cont%5D";
|
|
9
|
-
|
|
10
|
-
/** "2026/2/20" → "2026-02-20" */
|
|
11
|
-
function parseDate(text: string): string | undefined {
|
|
12
|
-
const m = text.match(/(\d{4})\/(\d{1,2})\/(\d{1,2})/);
|
|
13
|
-
if (!m) return undefined;
|
|
14
|
-
return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
/** "(著)" などの役割語を末尾から除去する */
|
|
18
|
-
function stripRole(name: string): string {
|
|
19
|
-
return name.replace(/\s*[((][^))]*[))]\s*$/, "").trim();
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export const bookTechAdapter: PublisherAdapter = {
|
|
23
|
-
id: "book-tech",
|
|
24
|
-
name: "BOOK TECH",
|
|
25
|
-
baseUrl: BASE_URL,
|
|
26
|
-
|
|
27
|
-
async search(query: SearchQuery, deps: PublisherDeps): Promise<BookRecord[]> {
|
|
28
|
-
const word = [query.title, query.author].filter(Boolean).join(" ");
|
|
29
|
-
if (!word) return [];
|
|
30
|
-
|
|
31
|
-
const url = `${SEARCH_URL}?${SEARCH_PARAM}=${encodeURIComponent(word)}`;
|
|
32
|
-
const html = await fetchText(url, deps);
|
|
33
|
-
const doc = deps.parser.parse(html);
|
|
34
|
-
|
|
35
|
-
const results: BookRecord[] = [];
|
|
36
|
-
const limit = query.limit ?? 10;
|
|
37
|
-
|
|
38
|
-
for (const item of doc.select("div.contents-index-item")) {
|
|
39
|
-
const linkEl = item.find("a.book-ribbon-link")[0];
|
|
40
|
-
const href = linkEl?.attr("href");
|
|
41
|
-
if (!href) continue;
|
|
42
|
-
const bookUrl = resolveUrl(BASE_URL, href);
|
|
43
|
-
|
|
44
|
-
const title = item.find(".contents-index-item-detail-title")[0]?.text().trim();
|
|
45
|
-
if (!title) continue;
|
|
46
|
-
|
|
47
|
-
const publisherEl = item.find("a[href*='publisher_relations']")[0];
|
|
48
|
-
const publisher = publisherEl?.text().trim() ?? "";
|
|
49
|
-
|
|
50
|
-
const authors = item.find("a[href*='author_relations']")
|
|
51
|
-
.map(el => stripRole(el.text().trim()))
|
|
52
|
-
.filter(Boolean);
|
|
53
|
-
|
|
54
|
-
const priceText = item.find(".contents-index-item-detail-price_include_tax")[0]?.text();
|
|
55
|
-
const price = priceText ? parseJapanesePrice(priceText) : undefined;
|
|
56
|
-
|
|
57
|
-
const dateText = item.find(".my-1")[0]?.text();
|
|
58
|
-
const publishedAt = dateText ? parseDate(dateText) : undefined;
|
|
59
|
-
|
|
60
|
-
const imgEl = item.find("img.thumb")[0];
|
|
61
|
-
const coverImageUrl = imgEl?.attr("src") ?? undefined;
|
|
62
|
-
|
|
63
|
-
results.push({
|
|
64
|
-
title,
|
|
65
|
-
authors,
|
|
66
|
-
publisher,
|
|
67
|
-
url: bookUrl,
|
|
68
|
-
price,
|
|
69
|
-
publishedAt,
|
|
70
|
-
coverImageUrl,
|
|
71
|
-
ebookStores: [{ name: "BOOK TECH", url: bookUrl, drm: "social" }],
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
if (results.length >= limit) break;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
return results;
|
|
78
|
-
},
|
|
79
|
-
|
|
80
|
-
async getDetail(url: string, deps: PublisherDeps): Promise<BookRecord> {
|
|
81
|
-
const html = await fetchText(url, deps);
|
|
82
|
-
const doc = deps.parser.parse(html);
|
|
83
|
-
|
|
84
|
-
const title = doc.selectOne(".contents-book-about-title h1")?.text().trim() ?? "";
|
|
85
|
-
|
|
86
|
-
const publisherEl = doc.selectOne("a[href*='publisher_relations']");
|
|
87
|
-
const publisher = publisherEl?.text().trim() ?? "";
|
|
88
|
-
|
|
89
|
-
const authors = doc.select("a[href*='author_relations']")
|
|
90
|
-
.map(el => stripRole(el.text().trim()))
|
|
91
|
-
.filter(Boolean);
|
|
92
|
-
|
|
93
|
-
const priceText = doc.selectOne(".contents-book-item-detail-price_include_tax")?.text();
|
|
94
|
-
const price = priceText ? parseJapanesePrice(priceText) : undefined;
|
|
95
|
-
|
|
96
|
-
const dateText = doc.selectOne(".contents-book-about-publicationdate")?.text();
|
|
97
|
-
const publishedAt = dateText ? parseDate(dateText) : undefined;
|
|
98
|
-
|
|
99
|
-
const isbnText = doc.selectOne(".contents-book-about-id")?.text();
|
|
100
|
-
const isbn = isbnText?.match(/\d{13}/)?.[0];
|
|
101
|
-
|
|
102
|
-
const imgEl = doc.selectOne("img.thumb");
|
|
103
|
-
const coverImageUrl = imgEl?.attr("src") ?? undefined;
|
|
104
|
-
|
|
105
|
-
return {
|
|
106
|
-
title,
|
|
107
|
-
authors,
|
|
108
|
-
publisher,
|
|
109
|
-
url,
|
|
110
|
-
isbn,
|
|
111
|
-
price,
|
|
112
|
-
publishedAt,
|
|
113
|
-
coverImageUrl,
|
|
114
|
-
ebookStores: [{ name: "BOOK TECH", url, drm: "social" }],
|
|
115
|
-
};
|
|
116
|
-
},
|
|
117
|
-
};
|