@nahisaho/katashiro-collector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/api-client.d.ts +70 -0
- package/dist/api/api-client.d.ts.map +1 -0
- package/dist/api/api-client.js +132 -0
- package/dist/api/api-client.js.map +1 -0
- package/dist/api/index.d.ts +5 -0
- package/dist/api/index.d.ts.map +1 -0
- package/dist/api/index.js +5 -0
- package/dist/api/index.js.map +1 -0
- package/dist/feed/feed-reader.d.ts +70 -0
- package/dist/feed/feed-reader.d.ts.map +1 -0
- package/dist/feed/feed-reader.js +272 -0
- package/dist/feed/feed-reader.js.map +1 -0
- package/dist/feed/index.d.ts +5 -0
- package/dist/feed/index.d.ts.map +1 -0
- package/dist/feed/index.js +5 -0
- package/dist/feed/index.js.map +1 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/index.js.map +1 -0
- package/dist/interfaces.d.ts +53 -0
- package/dist/interfaces.d.ts.map +1 -0
- package/dist/interfaces.js +9 -0
- package/dist/interfaces.js.map +1 -0
- package/dist/media/index.d.ts +5 -0
- package/dist/media/index.d.ts.map +1 -0
- package/dist/media/index.js +5 -0
- package/dist/media/index.js.map +1 -0
- package/dist/media/media-extractor.d.ts +74 -0
- package/dist/media/media-extractor.d.ts.map +1 -0
- package/dist/media/media-extractor.js +287 -0
- package/dist/media/media-extractor.js.map +1 -0
- package/dist/scraper/index.d.ts +5 -0
- package/dist/scraper/index.d.ts.map +1 -0
- package/dist/scraper/index.js +5 -0
- package/dist/scraper/index.js.map +1 -0
- package/dist/scraper/web-scraper.d.ts +48 -0
- package/dist/scraper/web-scraper.d.ts.map +1 -0
- package/dist/scraper/web-scraper.js +144 -0
- package/dist/scraper/web-scraper.js.map +1 -0
- package/dist/types.d.ts +82 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +9 -0
- package/dist/types.js.map +1 -0
- package/dist/web-search/index.d.ts +8 -0
- package/dist/web-search/index.d.ts.map +1 -0
- package/dist/web-search/index.js +8 -0
- package/dist/web-search/index.js.map +1 -0
- package/dist/web-search/web-search-client.d.ts +44 -0
- package/dist/web-search/web-search-client.d.ts.map +1 -0
- package/dist/web-search/web-search-client.js +131 -0
- package/dist/web-search/web-search-client.js.map +1 -0
- package/dist/youtube/index.d.ts +5 -0
- package/dist/youtube/index.d.ts.map +1 -0
- package/dist/youtube/index.js +5 -0
- package/dist/youtube/index.js.map +1 -0
- package/dist/youtube/youtube-transcript.d.ts +57 -0
- package/dist/youtube/youtube-transcript.d.ts.map +1 -0
- package/dist/youtube/youtube-transcript.js +228 -0
- package/dist/youtube/youtube-transcript.js.map +1 -0
- package/package.json +44 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebScraper - Webスクレイパー
|
|
3
|
+
*
|
|
4
|
+
* @requirement REQ-COLLECT-002
|
|
5
|
+
* @design DES-KATASHIRO-001 §2.2 Collector Container
|
|
6
|
+
* @task TSK-011
|
|
7
|
+
*/
|
|
8
|
+
import { ok, err, formatTimestamp, validateUrl, isErr, } from '@nahisaho/katashiro-core';
|
|
9
|
+
/**
|
|
10
|
+
* Webスクレイパー実装
|
|
11
|
+
* Note: 本番環境ではPlaywrightを使用推奨
|
|
12
|
+
*/
|
|
13
|
+
export class WebScraper {
|
|
14
|
+
/**
|
|
15
|
+
* URLからコンテンツをスクレイピング
|
|
16
|
+
*/
|
|
17
|
+
async scrape(url, options) {
|
|
18
|
+
// URL検証
|
|
19
|
+
const urlValidation = validateUrl(url);
|
|
20
|
+
if (isErr(urlValidation)) {
|
|
21
|
+
return err(new Error(`Invalid URL: ${urlValidation.error}`));
|
|
22
|
+
}
|
|
23
|
+
try {
|
|
24
|
+
const html = await this.fetchPage(url, options);
|
|
25
|
+
const result = this.parseHtml(html, url, options);
|
|
26
|
+
return ok(result);
|
|
27
|
+
}
|
|
28
|
+
catch (error) {
|
|
29
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
30
|
+
return err(new Error(`Scraping error: ${message}`));
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* ページを取得
|
|
35
|
+
*/
|
|
36
|
+
async fetchPage(url, options) {
|
|
37
|
+
const userAgent = options?.userAgent ?? 'Mozilla/5.0 (compatible; KATASHIRO/0.1.0)';
|
|
38
|
+
const response = await fetch(url, {
|
|
39
|
+
headers: {
|
|
40
|
+
'User-Agent': userAgent,
|
|
41
|
+
Accept: 'text/html,application/xhtml+xml',
|
|
42
|
+
},
|
|
43
|
+
signal: options?.timeout
|
|
44
|
+
? AbortSignal.timeout(options.timeout)
|
|
45
|
+
: undefined,
|
|
46
|
+
});
|
|
47
|
+
if (!response.ok) {
|
|
48
|
+
throw new Error(`HTTP error: ${response.status}`);
|
|
49
|
+
}
|
|
50
|
+
return response.text();
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* HTMLをパース
|
|
54
|
+
*/
|
|
55
|
+
parseHtml(html, url, options) {
|
|
56
|
+
const title = this.extractTitle(html);
|
|
57
|
+
const content = this.extractContent(html);
|
|
58
|
+
const images = options?.extractImages ? this.extractImages(html, url) : undefined;
|
|
59
|
+
const links = options?.extractLinks ? this.extractLinks(html, url) : undefined;
|
|
60
|
+
return {
|
|
61
|
+
url,
|
|
62
|
+
title,
|
|
63
|
+
content,
|
|
64
|
+
html: options?.javascript ? html : undefined,
|
|
65
|
+
images,
|
|
66
|
+
links,
|
|
67
|
+
fetchedAt: formatTimestamp(),
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* タイトルを抽出
|
|
72
|
+
*/
|
|
73
|
+
extractTitle(html) {
|
|
74
|
+
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
75
|
+
return titleMatch?.[1]?.trim() ?? 'Untitled';
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* コンテンツを抽出(スクリプトとスタイルを除去)
|
|
79
|
+
*/
|
|
80
|
+
extractContent(html) {
|
|
81
|
+
// script, style, nav, footer, header タグを除去
|
|
82
|
+
let cleaned = html
|
|
83
|
+
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
84
|
+
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
85
|
+
.replace(/<nav[^>]*>[\s\S]*?<\/nav>/gi, '')
|
|
86
|
+
.replace(/<footer[^>]*>[\s\S]*?<\/footer>/gi, '')
|
|
87
|
+
.replace(/<header[^>]*>[\s\S]*?<\/header>/gi, '');
|
|
88
|
+
// HTMLタグを除去
|
|
89
|
+
cleaned = cleaned.replace(/<[^>]+>/g, ' ');
|
|
90
|
+
// 空白を正規化
|
|
91
|
+
cleaned = cleaned
|
|
92
|
+
.replace(/\s+/g, ' ')
|
|
93
|
+
.replace(/\n\s*\n/g, '\n')
|
|
94
|
+
.trim();
|
|
95
|
+
return cleaned;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* 画像URLを抽出
|
|
99
|
+
*/
|
|
100
|
+
extractImages(html, baseUrl) {
|
|
101
|
+
const imgRegex = /<img[^>]+src=["']([^"']+)["']/gi;
|
|
102
|
+
const images = [];
|
|
103
|
+
let match;
|
|
104
|
+
while ((match = imgRegex.exec(html)) !== null) {
|
|
105
|
+
const src = match[1];
|
|
106
|
+
if (src) {
|
|
107
|
+
const absoluteUrl = this.resolveUrl(src, baseUrl);
|
|
108
|
+
images.push(absoluteUrl);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return images;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* リンクを抽出
|
|
115
|
+
*/
|
|
116
|
+
extractLinks(html, baseUrl) {
|
|
117
|
+
const linkRegex = /<a[^>]+href=["']([^"']+)["']/gi;
|
|
118
|
+
const links = [];
|
|
119
|
+
let match;
|
|
120
|
+
while ((match = linkRegex.exec(html)) !== null) {
|
|
121
|
+
const href = match[1];
|
|
122
|
+
if (href && !href.startsWith('#') && !href.startsWith('javascript:')) {
|
|
123
|
+
const absoluteUrl = this.resolveUrl(href, baseUrl);
|
|
124
|
+
links.push(absoluteUrl);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return links;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* 相対URLを絶対URLに変換
|
|
131
|
+
*/
|
|
132
|
+
resolveUrl(url, baseUrl) {
|
|
133
|
+
if (url.startsWith('http://') || url.startsWith('https://')) {
|
|
134
|
+
return url;
|
|
135
|
+
}
|
|
136
|
+
try {
|
|
137
|
+
return new URL(url, baseUrl).href;
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
return url;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=web-scraper.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"web-scraper.js","sourceRoot":"","sources":["../../src/scraper/web-scraper.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAEL,EAAE,EACF,GAAG,EACH,eAAe,EACf,WAAW,EACX,KAAK,GACN,MAAM,0BAA0B,CAAC;AAGlC;;;GAGG;AACH,MAAM,OAAO,UAAU;IACrB;;OAEG;IACH,KAAK,CAAC,MAAM,CACV,GAAW,EACX,OAAyB;QAEzB,QAAQ;QACR,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;QACvC,IAAI,KAAK,CAAC,aAAa,CAAC,EAAE,CAAC;YACzB,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,gBAAgB,aAAa,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAC/D,CAAC;QAED,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAChD,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;YAClD,OAAO,EAAE,CAAC,MAAM,CAAC,CAAC;QACpB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YACzE,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,mBAAmB,OAAO,EAAE,CAAC,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,OAAyB;QAC5D,MAAM,SAAS,GACb,OAAO,EAAE,SAAS,IAAI,2CAA2C,CAAC;QAEpE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,OAAO,EAAE;gBACP,YAAY,EAAE,SAAS;gBACvB,MAAM,EAAE,iCAAiC;aAC1C;YACD,MAAM,EAAE,OAAO,EAAE,OAAO;gBACtB,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC;gBACtC,CAAC,CAAC,SAAS;SACd,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,eAAe,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QACpD,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC;IAED;;OAEG;IACK,SAAS,CACf,IAAY,EACZ,GAAW,EACX,OAAyB;QAEzB,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,MAAM,GAAG,OAAO,EAAE,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAClF,MAAM,KAAK,GAAG,OAAO,EAAE,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAE/E,OAAO;YACL,GAAG;YACH,KAAK;YACL,OAAO;YACP,IAAI,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;YAC5C,MAAM;YACN,KAAK;YACL,SAAS,EAAE,eAAe,EAAE;SAC7B,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAY;QAC/B,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;QAC/D,OAAO,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,UAAU,CAAC;IAC/C,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QACjC,2CAA2C;QAC3C,IAAI,OAAO,GAAG,IAAI;aACf,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC;aAChD,OAAO,CAAC,iCAAiC,EAAE,EAAE,CAAC;aAC9C,OAAO,CAAC,6BAA6B,EAAE,EAAE,CAAC;aAC1C,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC;aAChD,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;QAEpD,YAAY;QACZ,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QAE3C,SAAS;QACT,OAAO,GAAG,OAAO;aACd,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;aACpB,OAAO,CAAC,UAAU,EAAE,IAAI,CAAC;aACzB,IAAI,EAAE,CAAC;QAEV,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,IAAY,EAAE,OAAe;QACjD,MAAM,QAAQ,GAAG,iCAAiC,CAAC;QACnD,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,KAAK,CAAC;QAEV,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACrB,IAAI,GAAG,EAAE,CAAC;gBACR,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAClD,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAY,EAAE,OAAe;QAChD,MAAM,SAAS,GAAG,gCAAgC,CAAC;QACnD,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC;QAEV,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;gBACrE,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;gBACnD,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,GAAW,EAAE,OAAe;QAC7C,IAAI,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC5D,OAAO,GAAG,CAAC;QACb,CAAC;QACD,IAAI,CAAC;YACH,OAAO,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;QACpC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,CAAC;QACb,CAAC;IACH,CAAC;CACF"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Collector型定義
|
|
3
|
+
*
|
|
4
|
+
* @requirement REQ-COLLECT-001 ~ REQ-COLLECT-009
|
|
5
|
+
* @design DES-KATASHIRO-001 §2.2 Collector Container
|
|
6
|
+
* @task TSK-010 ~ TSK-015
|
|
7
|
+
*/
|
|
8
|
+
import type { Timestamp, URL } from '@nahisaho/katashiro-core';
|
|
9
|
+
/**
|
|
10
|
+
* Web検索オプション
|
|
11
|
+
*/
|
|
12
|
+
export interface WebSearchOptions {
|
|
13
|
+
readonly provider?: 'google' | 'bing' | 'duckduckgo';
|
|
14
|
+
readonly maxResults?: number;
|
|
15
|
+
readonly language?: string;
|
|
16
|
+
readonly region?: string;
|
|
17
|
+
readonly safeSearch?: boolean;
|
|
18
|
+
readonly timeout?: number;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* スクレイピングオプション
|
|
22
|
+
*/
|
|
23
|
+
export interface ScrapingOptions {
|
|
24
|
+
readonly waitForSelector?: string;
|
|
25
|
+
readonly timeout?: number;
|
|
26
|
+
readonly userAgent?: string;
|
|
27
|
+
readonly javascript?: boolean;
|
|
28
|
+
readonly extractImages?: boolean;
|
|
29
|
+
readonly extractLinks?: boolean;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* スクレイピング結果
|
|
33
|
+
*/
|
|
34
|
+
export interface ScrapingResult {
|
|
35
|
+
readonly url: URL;
|
|
36
|
+
readonly title: string;
|
|
37
|
+
readonly content: string;
|
|
38
|
+
readonly html?: string;
|
|
39
|
+
readonly images?: string[];
|
|
40
|
+
readonly links?: string[];
|
|
41
|
+
readonly fetchedAt: Timestamp;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* フィードアイテム
|
|
45
|
+
*/
|
|
46
|
+
export interface FeedItem {
|
|
47
|
+
readonly id: string;
|
|
48
|
+
readonly title: string;
|
|
49
|
+
readonly link: URL;
|
|
50
|
+
readonly description?: string;
|
|
51
|
+
readonly content?: string;
|
|
52
|
+
readonly author?: string;
|
|
53
|
+
readonly publishedAt?: Timestamp;
|
|
54
|
+
readonly categories?: string[];
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* 文字起こしセグメント
|
|
58
|
+
*/
|
|
59
|
+
export interface TranscriptSegment {
|
|
60
|
+
readonly text: string;
|
|
61
|
+
readonly startTime: number;
|
|
62
|
+
readonly endTime: number;
|
|
63
|
+
readonly duration?: number;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* メディアメタデータ
|
|
67
|
+
*/
|
|
68
|
+
export interface MediaMetadata {
|
|
69
|
+
readonly url?: URL;
|
|
70
|
+
readonly sourceUrl?: URL;
|
|
71
|
+
readonly type: 'image' | 'video' | 'audio';
|
|
72
|
+
readonly title?: string;
|
|
73
|
+
readonly description?: string;
|
|
74
|
+
readonly width?: number;
|
|
75
|
+
readonly height?: number;
|
|
76
|
+
readonly duration?: number;
|
|
77
|
+
readonly format?: string;
|
|
78
|
+
readonly size?: number;
|
|
79
|
+
readonly thumbnailUrl?: string;
|
|
80
|
+
readonly fetchedAt?: Timestamp;
|
|
81
|
+
}
|
|
82
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,0BAA0B,CAAC;AAE/D;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,QAAQ,CAAC,EAAE,QAAQ,GAAG,MAAM,GAAG,YAAY,CAAC;IACrD,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,UAAU,CAAC,EAAE,OAAO,CAAC;IAC9B,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC;IAClC,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,CAAC,EAAE,OAAO,CAAC;IAC9B,QAAQ,CAAC,aAAa,CAAC,EAAE,OAAO,CAAC;IACjC,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC;IAClB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,QAAQ,CAAC,SAAS,EAAE,SAAS,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,GAAG,CAAC;IACnB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,WAAW,CAAC,EAAE,SAAS,CAAC;IACjC,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC;IACnB,QAAQ,CAAC,SAAS,CAAC,EAAE,GAAG,CAAC;IACzB,QAAQ,CAAC,IAAI,EAAE,OAAO,GAAG,OAAO,GAAG,OAAO,CAAC;IAC3C,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,SAAS,CAAC,EAAE,SAAS,CAAC;CAChC"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/web-search/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/web-search/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebSearchClient - Web検索クライアント
|
|
3
|
+
*
|
|
4
|
+
* @requirement REQ-COLLECT-001
|
|
5
|
+
* @design DES-KATASHIRO-001 §2.2 Collector Container
|
|
6
|
+
* @task TSK-010
|
|
7
|
+
*/
|
|
8
|
+
import { type Result, type SearchResult, type SearchQuery } from '@nahisaho/katashiro-core';
|
|
9
|
+
import type { IWebSearchClient, WebSearchOptions } from '../index.js';
|
|
10
|
+
/**
|
|
11
|
+
* Web検索クライアント実装
|
|
12
|
+
*/
|
|
13
|
+
export declare class WebSearchClient implements IWebSearchClient {
|
|
14
|
+
private readonly defaultProvider;
|
|
15
|
+
private readonly defaultMaxResults;
|
|
16
|
+
/**
|
|
17
|
+
* Web検索を実行
|
|
18
|
+
*/
|
|
19
|
+
search(query: SearchQuery, options?: WebSearchOptions): Promise<Result<SearchResult[], Error>>;
|
|
20
|
+
/**
|
|
21
|
+
* プロバイダーから検索結果を取得
|
|
22
|
+
*/
|
|
23
|
+
private fetchFromProvider;
|
|
24
|
+
/**
|
|
25
|
+
* DuckDuckGo Instant Answer API を使用した検索
|
|
26
|
+
* Note: 本番環境ではSerpAPIやBraveSearchAPIなどを使用推奨
|
|
27
|
+
*/
|
|
28
|
+
private searchDuckDuckGo;
|
|
29
|
+
/**
|
|
30
|
+
* Google Custom Search API を使用した検索
|
|
31
|
+
* Note: 実装にはAPI Keyが必要
|
|
32
|
+
*/
|
|
33
|
+
private searchGoogle;
|
|
34
|
+
/**
|
|
35
|
+
* Bing Search API を使用した検索
|
|
36
|
+
* Note: 実装にはAPI Keyが必要
|
|
37
|
+
*/
|
|
38
|
+
private searchBing;
|
|
39
|
+
/**
|
|
40
|
+
* テキストからタイトルを抽出
|
|
41
|
+
*/
|
|
42
|
+
private extractTitle;
|
|
43
|
+
}
|
|
44
|
+
//# sourceMappingURL=web-search-client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"web-search-client.d.ts","sourceRoot":"","sources":["../../src/web-search/web-search-client.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,KAAK,MAAM,EACX,KAAK,YAAY,EACjB,KAAK,WAAW,EAKjB,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAoBtE;;GAEG;AACH,qBAAa,eAAgB,YAAW,gBAAgB;IACtD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAgC;IAChE,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAM;IAExC;;OAEG;IACG,MAAM,CACV,KAAK,EAAE,WAAW,EAClB,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,KAAK,CAAC,CAAC;IAyBzC;;OAEG;YACW,iBAAiB;IAgB/B;;;OAGG;YACW,gBAAgB;IAkD9B;;;OAGG;YACW,YAAY;IAM1B;;;OAGG;YACW,UAAU;IAMxB;;OAEG;IACH,OAAO,CAAC,YAAY;CAQrB"}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebSearchClient - Web検索クライアント
|
|
3
|
+
*
|
|
4
|
+
* @requirement REQ-COLLECT-001
|
|
5
|
+
* @design DES-KATASHIRO-001 §2.2 Collector Container
|
|
6
|
+
* @task TSK-010
|
|
7
|
+
*/
|
|
8
|
+
import { ok, err, generateId, formatTimestamp, } from '@nahisaho/katashiro-core';
|
|
9
|
+
/**
|
|
10
|
+
* Web検索クライアント実装
|
|
11
|
+
*/
|
|
12
|
+
export class WebSearchClient {
|
|
13
|
+
defaultProvider = 'duckduckgo';
|
|
14
|
+
defaultMaxResults = 10;
|
|
15
|
+
/**
|
|
16
|
+
* Web検索を実行
|
|
17
|
+
*/
|
|
18
|
+
async search(query, options) {
|
|
19
|
+
// バリデーション
|
|
20
|
+
if (!query.query || query.query.trim().length === 0) {
|
|
21
|
+
return err(new Error('Search query cannot be empty'));
|
|
22
|
+
}
|
|
23
|
+
const provider = options?.provider ?? this.defaultProvider;
|
|
24
|
+
const maxResults = query.maxResults ?? options?.maxResults ?? this.defaultMaxResults;
|
|
25
|
+
try {
|
|
26
|
+
const results = await this.fetchFromProvider(provider, {
|
|
27
|
+
...query,
|
|
28
|
+
maxResults,
|
|
29
|
+
});
|
|
30
|
+
// maxResultsでスライス
|
|
31
|
+
const limitedResults = results.slice(0, maxResults);
|
|
32
|
+
return ok(limitedResults);
|
|
33
|
+
}
|
|
34
|
+
catch (error) {
|
|
35
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
36
|
+
return err(new Error(`Search error: ${message}`));
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* プロバイダーから検索結果を取得
|
|
41
|
+
*/
|
|
42
|
+
async fetchFromProvider(provider, query) {
|
|
43
|
+
switch (provider) {
|
|
44
|
+
case 'duckduckgo':
|
|
45
|
+
return this.searchDuckDuckGo(query);
|
|
46
|
+
case 'google':
|
|
47
|
+
return this.searchGoogle(query);
|
|
48
|
+
case 'bing':
|
|
49
|
+
return this.searchBing(query);
|
|
50
|
+
default:
|
|
51
|
+
return this.searchDuckDuckGo(query);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* DuckDuckGo Instant Answer API を使用した検索
|
|
56
|
+
* Note: 本番環境ではSerpAPIやBraveSearchAPIなどを使用推奨
|
|
57
|
+
*/
|
|
58
|
+
async searchDuckDuckGo(query) {
|
|
59
|
+
const encodedQuery = encodeURIComponent(query.query);
|
|
60
|
+
const url = `https://api.duckduckgo.com/?q=${encodedQuery}&format=json&no_html=1`;
|
|
61
|
+
const response = await fetch(url, {
|
|
62
|
+
headers: {
|
|
63
|
+
'User-Agent': 'KATASHIRO/0.1.0',
|
|
64
|
+
},
|
|
65
|
+
});
|
|
66
|
+
if (!response.ok) {
|
|
67
|
+
throw new Error(`DuckDuckGo API error: ${response.status}`);
|
|
68
|
+
}
|
|
69
|
+
const data = (await response.json());
|
|
70
|
+
const results = [];
|
|
71
|
+
// Abstract結果を追加
|
|
72
|
+
if (data.AbstractText && data.AbstractURL) {
|
|
73
|
+
results.push({
|
|
74
|
+
id: generateId('search'),
|
|
75
|
+
title: data.Heading ?? query.query,
|
|
76
|
+
url: data.AbstractURL,
|
|
77
|
+
snippet: data.AbstractText,
|
|
78
|
+
source: 'duckduckgo',
|
|
79
|
+
timestamp: formatTimestamp(),
|
|
80
|
+
relevanceScore: 1.0,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
// Related Topics を追加
|
|
84
|
+
if (data.RelatedTopics) {
|
|
85
|
+
for (const topic of data.RelatedTopics) {
|
|
86
|
+
if (topic.Text && topic.FirstURL) {
|
|
87
|
+
results.push({
|
|
88
|
+
id: generateId('search'),
|
|
89
|
+
title: this.extractTitle(topic.Text),
|
|
90
|
+
url: topic.FirstURL,
|
|
91
|
+
snippet: topic.Text,
|
|
92
|
+
source: 'duckduckgo',
|
|
93
|
+
timestamp: formatTimestamp(),
|
|
94
|
+
relevanceScore: 0.8,
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return results;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Google Custom Search API を使用した検索
|
|
103
|
+
* Note: 実装にはAPI Keyが必要
|
|
104
|
+
*/
|
|
105
|
+
async searchGoogle(_query) {
|
|
106
|
+
// Google Custom Search API の実装
|
|
107
|
+
// 環境変数 GOOGLE_API_KEY, GOOGLE_CX が必要
|
|
108
|
+
throw new Error('Google search requires API key configuration');
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Bing Search API を使用した検索
|
|
112
|
+
* Note: 実装にはAPI Keyが必要
|
|
113
|
+
*/
|
|
114
|
+
async searchBing(_query) {
|
|
115
|
+
// Bing Search API の実装
|
|
116
|
+
// 環境変数 BING_API_KEY が必要
|
|
117
|
+
throw new Error('Bing search requires API key configuration');
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* テキストからタイトルを抽出
|
|
121
|
+
*/
|
|
122
|
+
extractTitle(text) {
|
|
123
|
+
// 最初の文または最初の50文字をタイトルとして使用
|
|
124
|
+
const firstSentence = text.split(/[.!?]/)[0];
|
|
125
|
+
if (firstSentence && firstSentence.length <= 100) {
|
|
126
|
+
return firstSentence.trim();
|
|
127
|
+
}
|
|
128
|
+
return text.substring(0, 50).trim() + '...';
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
//# sourceMappingURL=web-search-client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"web-search-client.js","sourceRoot":"","sources":["../../src/web-search/web-search-client.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAIL,EAAE,EACF,GAAG,EACH,UAAU,EACV,eAAe,GAChB,MAAM,0BAA0B,CAAC;AAqBlC;;GAEG;AACH,MAAM,OAAO,eAAe;IACT,eAAe,GAAmB,YAAY,CAAC;IAC/C,iBAAiB,GAAG,EAAE,CAAC;IAExC;;OAEG;IACH,KAAK,CAAC,MAAM,CACV,KAAkB,EAClB,OAA0B;QAE1B,UAAU;QACV,IAAI,CAAC,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACpD,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC,CAAC;QACxD,CAAC;QAED,MAAM,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,IAAI,CAAC,eAAe,CAAC;QAC3D,MAAM,UAAU,GAAG,KAAK,CAAC,UAAU,IAAI,OAAO,EAAE,UAAU,IAAI,IAAI,CAAC,iBAAiB,CAAC;QAErF,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE;gBACrD,GAAG,KAAK;gBACR,UAAU;aACX,CAAC,CAAC;YAEH,kBAAkB;YAClB,MAAM,cAAc,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YAEpD,OAAO,EAAE,CAAC,cAAc,CAAC,CAAC;QAC5B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YACzE,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,iBAAiB,OAAO,EAAE,CAAC,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB,CAC7B,QAAwB,EACxB,KAAkB;QAElB,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,YAAY;gBACf,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;YACtC,KAAK,QAAQ;gBACX,OAAO,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;YAClC,KAAK,MAAM;gBACT,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAChC;gBACE,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,gBAAgB,CAAC,KAAkB;QAC/C,MAAM,YAAY,GAAG,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACrD,MAAM,GAAG,GAAG,iCAAiC,YAAY,wBAAwB,CAAC;QAElF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,OAAO,EAAE;gBACP,YAAY,EAAE,iBAAiB;aAChC;SACF,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,yBAAyB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC9D,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAuB,CAAC;QAC3D,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,gBAAgB;QAChB,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YAC1C,OAAO,CAAC,IAAI,CAAC;gBACX,EAAE,EAAE,UAAU,CAAC,QAAQ,CAAC;gBACxB,KAAK,EAAE,IAAI,CAAC,OAAO,IAAI,KAAK,CAAC,KAAK;gBAClC,GAAG,EAAE,IAAI,CAAC,WAAW;gBACrB,OAAO,EAAE,IAAI,CAAC,YAAY;gBAC1B,MAAM,EAAE,YAAY;gBACpB,SAAS,EAAE,eAAe,EAAE;gBAC5B,cAAc,EAAE,GAAG;aACpB,CAAC,CAAC;QACL,CAAC;QAED,qBAAqB;QACrB,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvC,IAAI,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;oBACjC,OAAO,CAAC,IAAI,CAAC;wBACX,EAAE,EAAE,UAAU,CAAC,QAAQ,CAAC;wBACxB,KAAK,EAAE,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC;wBACpC,GAAG,EAAE,KAAK,CAAC,QAAQ;wBACnB,OAAO,EAAE,KAAK,CAAC,IAAI;wBACnB,MAAM,EAAE,YAAY;wBACpB,SAAS,EAAE,eAAe,EAAE;wBAC5B,cAAc,EAAE,GAAG;qBACpB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,YAAY,CAAC,MAAmB;QAC5C,+BAA+B;QAC/B,qCAAqC;QACrC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IAClE,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,UAAU,CAAC,MAAmB;QAC1C,sBAAsB;QACtB,wBAAwB;QACxB,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;IAChE,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAY;QAC/B,2BAA2B;QAC3B,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7C,IAAI,aAAa,IAAI,aAAa,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;YACjD,OAAO,aAAa,CAAC,IAAI,EAAE,CAAC;QAC9B,CAAC;QACD,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,GAAG,KAAK,CAAC;IAC9C,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/youtube/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/youtube/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* YouTubeTranscript - YouTube字幕・メタデータ取得
|
|
3
|
+
*
|
|
4
|
+
* @requirement REQ-COLLECT-003
|
|
5
|
+
* @design DES-KATASHIRO-001 §2.2 Collector Container
|
|
6
|
+
* @task TSK-012
|
|
7
|
+
*/
|
|
8
|
+
import { type Result } from '@nahisaho/katashiro-core';
|
|
9
|
+
import type { IYouTubeTranscript, TranscriptSegment, MediaMetadata } from '../index.js';
|
|
10
|
+
/**
|
|
11
|
+
* YouTube字幕・メタデータ取得実装
|
|
12
|
+
*/
|
|
13
|
+
export declare class YouTubeTranscript implements IYouTubeTranscript {
|
|
14
|
+
private readonly userAgent;
|
|
15
|
+
/**
|
|
16
|
+
* YouTube URLから動画IDを抽出
|
|
17
|
+
*/
|
|
18
|
+
extractVideoId(url: string): string | null;
|
|
19
|
+
/**
|
|
20
|
+
* 字幕を取得
|
|
21
|
+
*/
|
|
22
|
+
getTranscript(url: string, language?: string): Promise<Result<TranscriptSegment[], Error>>;
|
|
23
|
+
/**
|
|
24
|
+
* 動画メタデータを取得
|
|
25
|
+
*/
|
|
26
|
+
getVideoMetadata(url: string): Promise<Result<MediaMetadata, Error>>;
|
|
27
|
+
/**
|
|
28
|
+
* 字幕セグメントをテキストにフォーマット
|
|
29
|
+
*/
|
|
30
|
+
formatTranscript(segments: TranscriptSegment[]): string;
|
|
31
|
+
/**
|
|
32
|
+
* 秒をMM:SS形式に変換
|
|
33
|
+
*/
|
|
34
|
+
private formatTime;
|
|
35
|
+
/**
|
|
36
|
+
* YouTube APIから字幕を取得
|
|
37
|
+
* Note: 本番環境ではyoutube-transcript等のライブラリ使用推奨
|
|
38
|
+
*/
|
|
39
|
+
private fetchTranscript;
|
|
40
|
+
/**
|
|
41
|
+
* 字幕XMLをパース
|
|
42
|
+
*/
|
|
43
|
+
private parseTranscriptXml;
|
|
44
|
+
/**
|
|
45
|
+
* HTMLエンティティをデコード
|
|
46
|
+
*/
|
|
47
|
+
private decodeHtmlEntities;
|
|
48
|
+
/**
|
|
49
|
+
* 動画メタデータを取得
|
|
50
|
+
*/
|
|
51
|
+
private fetchMetadata;
|
|
52
|
+
/**
|
|
53
|
+
* メタタグからコンテンツを抽出
|
|
54
|
+
*/
|
|
55
|
+
private extractMetaContent;
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=youtube-transcript.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"youtube-transcript.d.ts","sourceRoot":"","sources":["../../src/youtube/youtube-transcript.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,KAAK,MAAM,EAIZ,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAExF;;GAEG;AACH,qBAAa,iBAAkB,YAAW,kBAAkB;IAC1D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA+C;IAEzE;;OAEG;IACH,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAsB1C;;OAEG;IACG,aAAa,CACjB,GAAG,EAAE,MAAM,EACX,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC,iBAAiB,EAAE,EAAE,KAAK,CAAC,CAAC;IAe9C;;OAEG;IACG,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;IAe1E;;OAEG;IACH,gBAAgB,CAAC,QAAQ,EAAE,iBAAiB,EAAE,GAAG,MAAM;IAavD;;OAEG;IACH,OAAO,CAAC,UAAU;IAMlB;;;OAGG;YACW,eAAe;IAyD7B;;OAEG;IACH,OAAO,CAAC,kBAAkB;IA0B1B;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAY1B;;OAEG;YACW,aAAa;IAmC3B;;OAEG;IACH,OAAO,CAAC,kBAAkB;CAwB3B"}
|