@nahisaho/katashiro-collector 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/api/api-client.d.ts +70 -0
  2. package/dist/api/api-client.d.ts.map +1 -0
  3. package/dist/api/api-client.js +132 -0
  4. package/dist/api/api-client.js.map +1 -0
  5. package/dist/api/index.d.ts +5 -0
  6. package/dist/api/index.d.ts.map +1 -0
  7. package/dist/api/index.js +5 -0
  8. package/dist/api/index.js.map +1 -0
  9. package/dist/feed/feed-reader.d.ts +70 -0
  10. package/dist/feed/feed-reader.d.ts.map +1 -0
  11. package/dist/feed/feed-reader.js +272 -0
  12. package/dist/feed/feed-reader.js.map +1 -0
  13. package/dist/feed/index.d.ts +5 -0
  14. package/dist/feed/index.d.ts.map +1 -0
  15. package/dist/feed/index.js +5 -0
  16. package/dist/feed/index.js.map +1 -0
  17. package/dist/index.d.ts +17 -0
  18. package/dist/index.d.ts.map +1 -0
  19. package/dist/index.js +16 -0
  20. package/dist/index.js.map +1 -0
  21. package/dist/interfaces.d.ts +53 -0
  22. package/dist/interfaces.d.ts.map +1 -0
  23. package/dist/interfaces.js +9 -0
  24. package/dist/interfaces.js.map +1 -0
  25. package/dist/media/index.d.ts +5 -0
  26. package/dist/media/index.d.ts.map +1 -0
  27. package/dist/media/index.js +5 -0
  28. package/dist/media/index.js.map +1 -0
  29. package/dist/media/media-extractor.d.ts +74 -0
  30. package/dist/media/media-extractor.d.ts.map +1 -0
  31. package/dist/media/media-extractor.js +287 -0
  32. package/dist/media/media-extractor.js.map +1 -0
  33. package/dist/scraper/index.d.ts +5 -0
  34. package/dist/scraper/index.d.ts.map +1 -0
  35. package/dist/scraper/index.js +5 -0
  36. package/dist/scraper/index.js.map +1 -0
  37. package/dist/scraper/web-scraper.d.ts +48 -0
  38. package/dist/scraper/web-scraper.d.ts.map +1 -0
  39. package/dist/scraper/web-scraper.js +144 -0
  40. package/dist/scraper/web-scraper.js.map +1 -0
  41. package/dist/types.d.ts +82 -0
  42. package/dist/types.d.ts.map +1 -0
  43. package/dist/types.js +9 -0
  44. package/dist/types.js.map +1 -0
  45. package/dist/web-search/index.d.ts +8 -0
  46. package/dist/web-search/index.d.ts.map +1 -0
  47. package/dist/web-search/index.js +8 -0
  48. package/dist/web-search/index.js.map +1 -0
  49. package/dist/web-search/web-search-client.d.ts +44 -0
  50. package/dist/web-search/web-search-client.d.ts.map +1 -0
  51. package/dist/web-search/web-search-client.js +131 -0
  52. package/dist/web-search/web-search-client.js.map +1 -0
  53. package/dist/youtube/index.d.ts +5 -0
  54. package/dist/youtube/index.d.ts.map +1 -0
  55. package/dist/youtube/index.js +5 -0
  56. package/dist/youtube/index.js.map +1 -0
  57. package/dist/youtube/youtube-transcript.d.ts +57 -0
  58. package/dist/youtube/youtube-transcript.d.ts.map +1 -0
  59. package/dist/youtube/youtube-transcript.js +228 -0
  60. package/dist/youtube/youtube-transcript.js.map +1 -0
  61. package/package.json +44 -0
@@ -0,0 +1,144 @@
1
+ /**
2
+ * WebScraper - Webスクレイパー
3
+ *
4
+ * @requirement REQ-COLLECT-002
5
+ * @design DES-KATASHIRO-001 §2.2 Collector Container
6
+ * @task TSK-011
7
+ */
8
+ import { ok, err, formatTimestamp, validateUrl, isErr, } from '@nahisaho/katashiro-core';
9
+ /**
10
+ * Webスクレイパー実装
11
+ * Note: 本番環境ではPlaywrightを使用推奨
12
+ */
13
+ export class WebScraper {
14
+ /**
15
+ * URLからコンテンツをスクレイピング
16
+ */
17
+ async scrape(url, options) {
18
+ // URL検証
19
+ const urlValidation = validateUrl(url);
20
+ if (isErr(urlValidation)) {
21
+ return err(new Error(`Invalid URL: ${urlValidation.error}`));
22
+ }
23
+ try {
24
+ const html = await this.fetchPage(url, options);
25
+ const result = this.parseHtml(html, url, options);
26
+ return ok(result);
27
+ }
28
+ catch (error) {
29
+ const message = error instanceof Error ? error.message : 'Unknown error';
30
+ return err(new Error(`Scraping error: ${message}`));
31
+ }
32
+ }
33
+ /**
34
+ * ページを取得
35
+ */
36
+ async fetchPage(url, options) {
37
+ const userAgent = options?.userAgent ?? 'Mozilla/5.0 (compatible; KATASHIRO/0.1.0)';
38
+ const response = await fetch(url, {
39
+ headers: {
40
+ 'User-Agent': userAgent,
41
+ Accept: 'text/html,application/xhtml+xml',
42
+ },
43
+ signal: options?.timeout
44
+ ? AbortSignal.timeout(options.timeout)
45
+ : undefined,
46
+ });
47
+ if (!response.ok) {
48
+ throw new Error(`HTTP error: ${response.status}`);
49
+ }
50
+ return response.text();
51
+ }
52
+ /**
53
+ * HTMLをパース
54
+ */
55
+ parseHtml(html, url, options) {
56
+ const title = this.extractTitle(html);
57
+ const content = this.extractContent(html);
58
+ const images = options?.extractImages ? this.extractImages(html, url) : undefined;
59
+ const links = options?.extractLinks ? this.extractLinks(html, url) : undefined;
60
+ return {
61
+ url,
62
+ title,
63
+ content,
64
+ html: options?.javascript ? html : undefined,
65
+ images,
66
+ links,
67
+ fetchedAt: formatTimestamp(),
68
+ };
69
+ }
70
+ /**
71
+ * タイトルを抽出
72
+ */
73
+ extractTitle(html) {
74
+ const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
75
+ return titleMatch?.[1]?.trim() ?? 'Untitled';
76
+ }
77
+ /**
78
+ * コンテンツを抽出(スクリプトとスタイルを除去)
79
+ */
80
+ extractContent(html) {
81
+ // script, style, nav, footer, header タグを除去
82
+ let cleaned = html
83
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
84
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
85
+ .replace(/<nav[^>]*>[\s\S]*?<\/nav>/gi, '')
86
+ .replace(/<footer[^>]*>[\s\S]*?<\/footer>/gi, '')
87
+ .replace(/<header[^>]*>[\s\S]*?<\/header>/gi, '');
88
+ // HTMLタグを除去
89
+ cleaned = cleaned.replace(/<[^>]+>/g, ' ');
90
+ // 空白を正規化
91
+ cleaned = cleaned
92
+ .replace(/\s+/g, ' ')
93
+ .replace(/\n\s*\n/g, '\n')
94
+ .trim();
95
+ return cleaned;
96
+ }
97
+ /**
98
+ * 画像URLを抽出
99
+ */
100
+ extractImages(html, baseUrl) {
101
+ const imgRegex = /<img[^>]+src=["']([^"']+)["']/gi;
102
+ const images = [];
103
+ let match;
104
+ while ((match = imgRegex.exec(html)) !== null) {
105
+ const src = match[1];
106
+ if (src) {
107
+ const absoluteUrl = this.resolveUrl(src, baseUrl);
108
+ images.push(absoluteUrl);
109
+ }
110
+ }
111
+ return images;
112
+ }
113
+ /**
114
+ * リンクを抽出
115
+ */
116
+ extractLinks(html, baseUrl) {
117
+ const linkRegex = /<a[^>]+href=["']([^"']+)["']/gi;
118
+ const links = [];
119
+ let match;
120
+ while ((match = linkRegex.exec(html)) !== null) {
121
+ const href = match[1];
122
+ if (href && !href.startsWith('#') && !href.startsWith('javascript:')) {
123
+ const absoluteUrl = this.resolveUrl(href, baseUrl);
124
+ links.push(absoluteUrl);
125
+ }
126
+ }
127
+ return links;
128
+ }
129
+ /**
130
+ * 相対URLを絶対URLに変換
131
+ */
132
+ resolveUrl(url, baseUrl) {
133
+ if (url.startsWith('http://') || url.startsWith('https://')) {
134
+ return url;
135
+ }
136
+ try {
137
+ return new URL(url, baseUrl).href;
138
+ }
139
+ catch {
140
+ return url;
141
+ }
142
+ }
143
+ }
144
+ //# sourceMappingURL=web-scraper.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"web-scraper.js","sourceRoot":"","sources":["../../src/scraper/web-scraper.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAEL,EAAE,EACF,GAAG,EACH,eAAe,EACf,WAAW,EACX,KAAK,GACN,MAAM,0BAA0B,CAAC;AAGlC;;;GAGG;AACH,MAAM,OAAO,UAAU;IACrB;;OAEG;IACH,KAAK,CAAC,MAAM,CACV,GAAW,EACX,OAAyB;QAEzB,QAAQ;QACR,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;QACvC,IAAI,KAAK,CAAC,aAAa,CAAC,EAAE,CAAC;YACzB,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,gBAAgB,aAAa,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAC/D,CAAC;QAED,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAChD,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;YAClD,OAAO,EAAE,CAAC,MAAM,CAAC,CAAC;QACpB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YACzE,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,mBAAmB,OAAO,EAAE,CAAC,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,OAAyB;QAC5D,MAAM,SAAS,GACb,OAAO,EAAE,SAAS,IAAI,2CAA2C,CAAC;QAEpE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,OAAO,EAAE;gBACP,YAAY,EAAE,SAAS;gBACvB,MAAM,EAAE,iCAAiC;aAC1C;YACD,MAAM,EAAE,OAAO,EAAE,OAAO;gBACtB,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC;gBACtC,CAAC,CAAC,SAAS;SACd,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,eAAe,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QACpD,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC;IAED;;OAEG;IACK,SAAS,CACf,IAAY,EACZ,GAAW,EACX,OAAyB;QAEzB,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,MAAM,GAAG,OAAO,EAAE,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAClF,MAAM,KAAK,GAAG,OAAO,EAAE,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAE/E,OAAO;YACL,GAAG;YACH,KAAK;YACL,OAAO;YACP,IAAI,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;YAC5C,MAAM;YACN,KAAK;YACL,SAAS,EAAE,eAAe,EAAE;SAC7B,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAY;QAC/B,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;QAC/D,OAAO,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,UAAU,CAAC;IAC/C,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QACjC,2CAA2C;QAC3C,IAAI,OAAO,GAAG,IAAI;aACf,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC;aAChD,OAAO,CAAC,iCAAiC,EAAE,EAAE,CAAC;aAC9C,OAAO,CAAC,6BAA6B,EAAE,EAAE,CAAC;aAC1C,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC;aAChD,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;QAEpD,YAAY;QACZ,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QAE3C,SAAS;QACT,OAAO,GAAG,OAAO;aACd,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;aACpB,OAAO,CAAC,UAAU,EAAE,IAAI,CAAC;aACzB,IAAI,EAAE,CAAC;QAEV,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,IAAY,EAAE,OAAe;QACjD,MAAM,QAAQ,GAAG,iCAAiC,CAAC;QACnD,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,KAAK,CAAC;QAEV,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACrB,IAAI,GAAG,EAAE,CAAC;gBACR,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAClD,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAY,EAAE,OAAe;QAChD,MAAM,SAAS,GAAG,gCAAgC,CAAC;QACnD,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC;QAEV,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;gBACrE,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;gBACnD,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,GAAW,EAAE,OAAe;QAC7C,IAAI,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC5D,OAAO,GAAG,CAAC;QACb,CAAC;QACD,IAAI,CAAC;YACH,OAAO,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;QACpC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,CAAC;QACb,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Collector型定義
3
+ *
4
+ * @requirement REQ-COLLECT-001 ~ REQ-COLLECT-009
5
+ * @design DES-KATASHIRO-001 §2.2 Collector Container
6
+ * @task TSK-010 ~ TSK-015
7
+ */
8
+ import type { Timestamp, URL } from '@nahisaho/katashiro-core';
9
+ /**
10
+ * Web検索オプション
11
+ */
12
+ export interface WebSearchOptions {
13
+ readonly provider?: 'google' | 'bing' | 'duckduckgo';
14
+ readonly maxResults?: number;
15
+ readonly language?: string;
16
+ readonly region?: string;
17
+ readonly safeSearch?: boolean;
18
+ readonly timeout?: number;
19
+ }
20
+ /**
21
+ * スクレイピングオプション
22
+ */
23
+ export interface ScrapingOptions {
24
+ readonly waitForSelector?: string;
25
+ readonly timeout?: number;
26
+ readonly userAgent?: string;
27
+ readonly javascript?: boolean;
28
+ readonly extractImages?: boolean;
29
+ readonly extractLinks?: boolean;
30
+ }
31
+ /**
32
+ * スクレイピング結果
33
+ */
34
+ export interface ScrapingResult {
35
+ readonly url: URL;
36
+ readonly title: string;
37
+ readonly content: string;
38
+ readonly html?: string;
39
+ readonly images?: string[];
40
+ readonly links?: string[];
41
+ readonly fetchedAt: Timestamp;
42
+ }
43
+ /**
44
+ * フィードアイテム
45
+ */
46
+ export interface FeedItem {
47
+ readonly id: string;
48
+ readonly title: string;
49
+ readonly link: URL;
50
+ readonly description?: string;
51
+ readonly content?: string;
52
+ readonly author?: string;
53
+ readonly publishedAt?: Timestamp;
54
+ readonly categories?: string[];
55
+ }
56
+ /**
57
+ * 文字起こしセグメント
58
+ */
59
+ export interface TranscriptSegment {
60
+ readonly text: string;
61
+ readonly startTime: number;
62
+ readonly endTime: number;
63
+ readonly duration?: number;
64
+ }
65
+ /**
66
+ * メディアメタデータ
67
+ */
68
+ export interface MediaMetadata {
69
+ readonly url?: URL;
70
+ readonly sourceUrl?: URL;
71
+ readonly type: 'image' | 'video' | 'audio';
72
+ readonly title?: string;
73
+ readonly description?: string;
74
+ readonly width?: number;
75
+ readonly height?: number;
76
+ readonly duration?: number;
77
+ readonly format?: string;
78
+ readonly size?: number;
79
+ readonly thumbnailUrl?: string;
80
+ readonly fetchedAt?: Timestamp;
81
+ }
82
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,0BAA0B,CAAC;AAE/D;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,QAAQ,CAAC,EAAE,QAAQ,GAAG,MAAM,GAAG,YAAY,CAAC;IACrD,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,UAAU,CAAC,EAAE,OAAO,CAAC;IAC9B,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC;IAClC,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,CAAC,EAAE,OAAO,CAAC;IAC9B,QAAQ,CAAC,aAAa,CAAC,EAAE,OAAO,CAAC;IACjC,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC;IAClB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,QAAQ,CAAC,SAAS,EAAE,SAAS,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,GAAG,CAAC;IACnB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,WAAW,CAAC,EAAE,SAAS,CAAC;IACjC,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC;IACnB,QAAQ,CAAC,SAAS,CAAC,EAAE,GAAG,CAAC;IACzB,QAAQ,CAAC,IAAI,EAAE,OAAO,GAAG,OAAO,GAAG,OAAO,CAAC;IAC3C,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,SAAS,CAAC,EAAE,SAAS,CAAC;CAChC"}
package/dist/types.js ADDED
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Collector型定義
3
+ *
4
+ * @requirement REQ-COLLECT-001 ~ REQ-COLLECT-009
5
+ * @design DES-KATASHIRO-001 §2.2 Collector Container
6
+ * @task TSK-010 ~ TSK-015
7
+ */
8
+ export {};
9
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Web検索モジュール
3
+ *
4
+ * @requirement REQ-COLLECT-001
5
+ * @task TSK-010
6
+ */
7
+ export { WebSearchClient } from './web-search-client.js';
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/web-search/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Web検索モジュール
3
+ *
4
+ * @requirement REQ-COLLECT-001
5
+ * @task TSK-010
6
+ */
7
+ export { WebSearchClient } from './web-search-client.js';
8
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/web-search/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC"}
@@ -0,0 +1,44 @@
1
+ /**
2
+ * WebSearchClient - Web検索クライアント
3
+ *
4
+ * @requirement REQ-COLLECT-001
5
+ * @design DES-KATASHIRO-001 §2.2 Collector Container
6
+ * @task TSK-010
7
+ */
8
+ import { type Result, type SearchResult, type SearchQuery } from '@nahisaho/katashiro-core';
9
+ import type { IWebSearchClient, WebSearchOptions } from '../index.js';
10
+ /**
11
+ * Web検索クライアント実装
12
+ */
13
+ export declare class WebSearchClient implements IWebSearchClient {
14
+ private readonly defaultProvider;
15
+ private readonly defaultMaxResults;
16
+ /**
17
+ * Web検索を実行
18
+ */
19
+ search(query: SearchQuery, options?: WebSearchOptions): Promise<Result<SearchResult[], Error>>;
20
+ /**
21
+ * プロバイダーから検索結果を取得
22
+ */
23
+ private fetchFromProvider;
24
+ /**
25
+ * DuckDuckGo Instant Answer API を使用した検索
26
+ * Note: 本番環境ではSerpAPIやBraveSearchAPIなどを使用推奨
27
+ */
28
+ private searchDuckDuckGo;
29
+ /**
30
+ * Google Custom Search API を使用した検索
31
+ * Note: 実装にはAPI Keyが必要
32
+ */
33
+ private searchGoogle;
34
+ /**
35
+ * Bing Search API を使用した検索
36
+ * Note: 実装にはAPI Keyが必要
37
+ */
38
+ private searchBing;
39
+ /**
40
+ * テキストからタイトルを抽出
41
+ */
42
+ private extractTitle;
43
+ }
44
+ //# sourceMappingURL=web-search-client.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"web-search-client.d.ts","sourceRoot":"","sources":["../../src/web-search/web-search-client.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,KAAK,MAAM,EACX,KAAK,YAAY,EACjB,KAAK,WAAW,EAKjB,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAoBtE;;GAEG;AACH,qBAAa,eAAgB,YAAW,gBAAgB;IACtD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAgC;IAChE,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAM;IAExC;;OAEG;IACG,MAAM,CACV,KAAK,EAAE,WAAW,EAClB,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,KAAK,CAAC,CAAC;IAyBzC;;OAEG;YACW,iBAAiB;IAgB/B;;;OAGG;YACW,gBAAgB;IAkD9B;;;OAGG;YACW,YAAY;IAM1B;;;OAGG;YACW,UAAU;IAMxB;;OAEG;IACH,OAAO,CAAC,YAAY;CAQrB"}
@@ -0,0 +1,131 @@
1
+ /**
2
+ * WebSearchClient - Web検索クライアント
3
+ *
4
+ * @requirement REQ-COLLECT-001
5
+ * @design DES-KATASHIRO-001 §2.2 Collector Container
6
+ * @task TSK-010
7
+ */
8
+ import { ok, err, generateId, formatTimestamp, } from '@nahisaho/katashiro-core';
9
+ /**
10
+ * Web検索クライアント実装
11
+ */
12
+ export class WebSearchClient {
13
+ defaultProvider = 'duckduckgo';
14
+ defaultMaxResults = 10;
15
+ /**
16
+ * Web検索を実行
17
+ */
18
+ async search(query, options) {
19
+ // バリデーション
20
+ if (!query.query || query.query.trim().length === 0) {
21
+ return err(new Error('Search query cannot be empty'));
22
+ }
23
+ const provider = options?.provider ?? this.defaultProvider;
24
+ const maxResults = query.maxResults ?? options?.maxResults ?? this.defaultMaxResults;
25
+ try {
26
+ const results = await this.fetchFromProvider(provider, {
27
+ ...query,
28
+ maxResults,
29
+ });
30
+ // maxResultsでスライス
31
+ const limitedResults = results.slice(0, maxResults);
32
+ return ok(limitedResults);
33
+ }
34
+ catch (error) {
35
+ const message = error instanceof Error ? error.message : 'Unknown error';
36
+ return err(new Error(`Search error: ${message}`));
37
+ }
38
+ }
39
+ /**
40
+ * プロバイダーから検索結果を取得
41
+ */
42
+ async fetchFromProvider(provider, query) {
43
+ switch (provider) {
44
+ case 'duckduckgo':
45
+ return this.searchDuckDuckGo(query);
46
+ case 'google':
47
+ return this.searchGoogle(query);
48
+ case 'bing':
49
+ return this.searchBing(query);
50
+ default:
51
+ return this.searchDuckDuckGo(query);
52
+ }
53
+ }
54
+ /**
55
+ * DuckDuckGo Instant Answer API を使用した検索
56
+ * Note: 本番環境ではSerpAPIやBraveSearchAPIなどを使用推奨
57
+ */
58
+ async searchDuckDuckGo(query) {
59
+ const encodedQuery = encodeURIComponent(query.query);
60
+ const url = `https://api.duckduckgo.com/?q=${encodedQuery}&format=json&no_html=1`;
61
+ const response = await fetch(url, {
62
+ headers: {
63
+ 'User-Agent': 'KATASHIRO/0.1.0',
64
+ },
65
+ });
66
+ if (!response.ok) {
67
+ throw new Error(`DuckDuckGo API error: ${response.status}`);
68
+ }
69
+ const data = (await response.json());
70
+ const results = [];
71
+ // Abstract結果を追加
72
+ if (data.AbstractText && data.AbstractURL) {
73
+ results.push({
74
+ id: generateId('search'),
75
+ title: data.Heading ?? query.query,
76
+ url: data.AbstractURL,
77
+ snippet: data.AbstractText,
78
+ source: 'duckduckgo',
79
+ timestamp: formatTimestamp(),
80
+ relevanceScore: 1.0,
81
+ });
82
+ }
83
+ // Related Topics を追加
84
+ if (data.RelatedTopics) {
85
+ for (const topic of data.RelatedTopics) {
86
+ if (topic.Text && topic.FirstURL) {
87
+ results.push({
88
+ id: generateId('search'),
89
+ title: this.extractTitle(topic.Text),
90
+ url: topic.FirstURL,
91
+ snippet: topic.Text,
92
+ source: 'duckduckgo',
93
+ timestamp: formatTimestamp(),
94
+ relevanceScore: 0.8,
95
+ });
96
+ }
97
+ }
98
+ }
99
+ return results;
100
+ }
101
+ /**
102
+ * Google Custom Search API を使用した検索
103
+ * Note: 実装にはAPI Keyが必要
104
+ */
105
+ async searchGoogle(_query) {
106
+ // Google Custom Search API の実装
107
+ // 環境変数 GOOGLE_API_KEY, GOOGLE_CX が必要
108
+ throw new Error('Google search requires API key configuration');
109
+ }
110
+ /**
111
+ * Bing Search API を使用した検索
112
+ * Note: 実装にはAPI Keyが必要
113
+ */
114
+ async searchBing(_query) {
115
+ // Bing Search API の実装
116
+ // 環境変数 BING_API_KEY が必要
117
+ throw new Error('Bing search requires API key configuration');
118
+ }
119
+ /**
120
+ * テキストからタイトルを抽出
121
+ */
122
+ extractTitle(text) {
123
+ // 最初の文または最初の50文字をタイトルとして使用
124
+ const firstSentence = text.split(/[.!?]/)[0];
125
+ if (firstSentence && firstSentence.length <= 100) {
126
+ return firstSentence.trim();
127
+ }
128
+ return text.substring(0, 50).trim() + '...';
129
+ }
130
+ }
131
+ //# sourceMappingURL=web-search-client.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"web-search-client.js","sourceRoot":"","sources":["../../src/web-search/web-search-client.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAIL,EAAE,EACF,GAAG,EACH,UAAU,EACV,eAAe,GAChB,MAAM,0BAA0B,CAAC;AAqBlC;;GAEG;AACH,MAAM,OAAO,eAAe;IACT,eAAe,GAAmB,YAAY,CAAC;IAC/C,iBAAiB,GAAG,EAAE,CAAC;IAExC;;OAEG;IACH,KAAK,CAAC,MAAM,CACV,KAAkB,EAClB,OAA0B;QAE1B,UAAU;QACV,IAAI,CAAC,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACpD,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC,CAAC;QACxD,CAAC;QAED,MAAM,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,IAAI,CAAC,eAAe,CAAC;QAC3D,MAAM,UAAU,GAAG,KAAK,CAAC,UAAU,IAAI,OAAO,EAAE,UAAU,IAAI,IAAI,CAAC,iBAAiB,CAAC;QAErF,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE;gBACrD,GAAG,KAAK;gBACR,UAAU;aACX,CAAC,CAAC;YAEH,kBAAkB;YAClB,MAAM,cAAc,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YAEpD,OAAO,EAAE,CAAC,cAAc,CAAC,CAAC;QAC5B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YACzE,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,iBAAiB,OAAO,EAAE,CAAC,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB,CAC7B,QAAwB,EACxB,KAAkB;QAElB,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,YAAY;gBACf,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;YACtC,KAAK,QAAQ;gBACX,OAAO,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;YAClC,KAAK,MAAM;gBACT,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAChC;gBACE,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,gBAAgB,CAAC,KAAkB;QAC/C,MAAM,YAAY,GAAG,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACrD,MAAM,GAAG,GAAG,iCAAiC,YAAY,wBAAwB,CAAC;QAElF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,OAAO,EAAE;gBACP,YAAY,EAAE,iBAAiB;aAChC;SACF,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,yBAAyB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC9D,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAuB,CAAC;QAC3D,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,gBAAgB;QAChB,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YAC1C,OAAO,CAAC,IAAI,CAAC;gBACX,EAAE,EAAE,UAAU,CAAC,QAAQ,CAAC;gBACxB,KAAK,EAAE,IAAI,CAAC,OAAO,IAAI,KAAK,CAAC,KAAK;gBAClC,GAAG,EAAE,IAAI,CAAC,WAAW;gBACrB,OAAO,EAAE,IAAI,CAAC,YAAY;gBAC1B,MAAM,EAAE,YAAY;gBACpB,SAAS,EAAE,eAAe,EAAE;gBAC5B,cAAc,EAAE,GAAG;aACpB,CAAC,CAAC;QACL,CAAC;QAED,qBAAqB;QACrB,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvC,IAAI,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;oBACjC,OAAO,CAAC,IAAI,CAAC;wBACX,EAAE,EAAE,UAAU,CAAC,QAAQ,CAAC;wBACxB,KAAK,EAAE,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC;wBACpC,GAAG,EAAE,KAAK,CAAC,QAAQ;wBACnB,OAAO,EAAE,KAAK,CAAC,IAAI;wBACnB,MAAM,EAAE,YAAY;wBACpB,SAAS,EAAE,eAAe,EAAE;wBAC5B,cAAc,EAAE,GAAG;qBACpB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,YAAY,CAAC,MAAmB;QAC5C,+BAA+B;QAC/B,qCAAqC;QACrC,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IAClE,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,UAAU,CAAC,MAAmB;QAC1C,sBAAsB;QACtB,wBAAwB;QACxB,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;IAChE,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAY;QAC/B,2BAA2B;QAC3B,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7C,IAAI,aAAa,IAAI,aAAa,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;YACjD,OAAO,aAAa,CAAC,IAAI,EAAE,CAAC;QAC9B,CAAC;QACD,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,GAAG,KAAK,CAAC;IAC9C,CAAC;CACF"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * YouTube module exports
3
+ */
4
+ export { YouTubeTranscript } from './youtube-transcript.js';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/youtube/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * YouTube module exports
3
+ */
4
+ export { YouTubeTranscript } from './youtube-transcript.js';
5
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/youtube/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC"}
@@ -0,0 +1,57 @@
1
+ /**
2
+ * YouTubeTranscript - YouTube字幕・メタデータ取得
3
+ *
4
+ * @requirement REQ-COLLECT-003
5
+ * @design DES-KATASHIRO-001 §2.2 Collector Container
6
+ * @task TSK-012
7
+ */
8
+ import { type Result } from '@nahisaho/katashiro-core';
9
+ import type { IYouTubeTranscript, TranscriptSegment, MediaMetadata } from '../index.js';
10
+ /**
11
+ * YouTube字幕・メタデータ取得実装
12
+ */
13
+ export declare class YouTubeTranscript implements IYouTubeTranscript {
14
+ private readonly userAgent;
15
+ /**
16
+ * YouTube URLから動画IDを抽出
17
+ */
18
+ extractVideoId(url: string): string | null;
19
+ /**
20
+ * 字幕を取得
21
+ */
22
+ getTranscript(url: string, language?: string): Promise<Result<TranscriptSegment[], Error>>;
23
+ /**
24
+ * 動画メタデータを取得
25
+ */
26
+ getVideoMetadata(url: string): Promise<Result<MediaMetadata, Error>>;
27
+ /**
28
+ * 字幕セグメントをテキストにフォーマット
29
+ */
30
+ formatTranscript(segments: TranscriptSegment[]): string;
31
+ /**
32
+ * 秒をMM:SS形式に変換
33
+ */
34
+ private formatTime;
35
+ /**
36
+ * YouTube APIから字幕を取得
37
+ * Note: 本番環境ではyoutube-transcript等のライブラリ使用推奨
38
+ */
39
+ private fetchTranscript;
40
+ /**
41
+ * 字幕XMLをパース
42
+ */
43
+ private parseTranscriptXml;
44
+ /**
45
+ * HTMLエンティティをデコード
46
+ */
47
+ private decodeHtmlEntities;
48
+ /**
49
+ * 動画メタデータを取得
50
+ */
51
+ private fetchMetadata;
52
+ /**
53
+ * メタタグからコンテンツを抽出
54
+ */
55
+ private extractMetaContent;
56
+ }
57
+ //# sourceMappingURL=youtube-transcript.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"youtube-transcript.d.ts","sourceRoot":"","sources":["../../src/youtube/youtube-transcript.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,KAAK,MAAM,EAIZ,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAExF;;GAEG;AACH,qBAAa,iBAAkB,YAAW,kBAAkB;IAC1D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA+C;IAEzE;;OAEG;IACH,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAsB1C;;OAEG;IACG,aAAa,CACjB,GAAG,EAAE,MAAM,EACX,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC,iBAAiB,EAAE,EAAE,KAAK,CAAC,CAAC;IAe9C;;OAEG;IACG,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;IAe1E;;OAEG;IACH,gBAAgB,CAAC,QAAQ,EAAE,iBAAiB,EAAE,GAAG,MAAM;IAavD;;OAEG;IACH,OAAO,CAAC,UAAU;IAMlB;;;OAGG;YACW,eAAe;IAyD7B;;OAEG;IACH,OAAO,CAAC,kBAAkB;IA0B1B;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAY1B;;OAEG;YACW,aAAa;IAmC3B;;OAEG;IACH,OAAO,CAAC,kBAAkB;CAwB3B"}