@nahisaho/katashiro-collector 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/api/api-client.d.ts +70 -0
  2. package/dist/api/api-client.d.ts.map +1 -0
  3. package/dist/api/api-client.js +132 -0
  4. package/dist/api/api-client.js.map +1 -0
  5. package/dist/api/index.d.ts +5 -0
  6. package/dist/api/index.d.ts.map +1 -0
  7. package/dist/api/index.js +5 -0
  8. package/dist/api/index.js.map +1 -0
  9. package/dist/feed/feed-reader.d.ts +70 -0
  10. package/dist/feed/feed-reader.d.ts.map +1 -0
  11. package/dist/feed/feed-reader.js +272 -0
  12. package/dist/feed/feed-reader.js.map +1 -0
  13. package/dist/feed/index.d.ts +5 -0
  14. package/dist/feed/index.d.ts.map +1 -0
  15. package/dist/feed/index.js +5 -0
  16. package/dist/feed/index.js.map +1 -0
  17. package/dist/index.d.ts +17 -0
  18. package/dist/index.d.ts.map +1 -0
  19. package/dist/index.js +16 -0
  20. package/dist/index.js.map +1 -0
  21. package/dist/interfaces.d.ts +53 -0
  22. package/dist/interfaces.d.ts.map +1 -0
  23. package/dist/interfaces.js +9 -0
  24. package/dist/interfaces.js.map +1 -0
  25. package/dist/media/index.d.ts +5 -0
  26. package/dist/media/index.d.ts.map +1 -0
  27. package/dist/media/index.js +5 -0
  28. package/dist/media/index.js.map +1 -0
  29. package/dist/media/media-extractor.d.ts +74 -0
  30. package/dist/media/media-extractor.d.ts.map +1 -0
  31. package/dist/media/media-extractor.js +287 -0
  32. package/dist/media/media-extractor.js.map +1 -0
  33. package/dist/scraper/index.d.ts +5 -0
  34. package/dist/scraper/index.d.ts.map +1 -0
  35. package/dist/scraper/index.js +5 -0
  36. package/dist/scraper/index.js.map +1 -0
  37. package/dist/scraper/web-scraper.d.ts +48 -0
  38. package/dist/scraper/web-scraper.d.ts.map +1 -0
  39. package/dist/scraper/web-scraper.js +144 -0
  40. package/dist/scraper/web-scraper.js.map +1 -0
  41. package/dist/types.d.ts +82 -0
  42. package/dist/types.d.ts.map +1 -0
  43. package/dist/types.js +9 -0
  44. package/dist/types.js.map +1 -0
  45. package/dist/web-search/index.d.ts +8 -0
  46. package/dist/web-search/index.d.ts.map +1 -0
  47. package/dist/web-search/index.js +8 -0
  48. package/dist/web-search/index.js.map +1 -0
  49. package/dist/web-search/web-search-client.d.ts +44 -0
  50. package/dist/web-search/web-search-client.d.ts.map +1 -0
  51. package/dist/web-search/web-search-client.js +131 -0
  52. package/dist/web-search/web-search-client.js.map +1 -0
  53. package/dist/youtube/index.d.ts +5 -0
  54. package/dist/youtube/index.d.ts.map +1 -0
  55. package/dist/youtube/index.js +5 -0
  56. package/dist/youtube/index.js.map +1 -0
  57. package/dist/youtube/youtube-transcript.d.ts +57 -0
  58. package/dist/youtube/youtube-transcript.d.ts.map +1 -0
  59. package/dist/youtube/youtube-transcript.js +228 -0
  60. package/dist/youtube/youtube-transcript.js.map +1 -0
  61. package/package.json +44 -0
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Collectorインターフェース定義
3
+ *
4
+ * @requirement REQ-COLLECT-001 ~ REQ-COLLECT-009
5
+ * @design DES-KATASHIRO-001 §2.2 Collector Container
6
+ * @task TSK-010 ~ TSK-015
7
+ */
8
+ import type { Result, SearchResult, SearchQuery } from '@nahisaho/katashiro-core';
9
+ import type { WebSearchOptions, ScrapingOptions, ScrapingResult, FeedItem, TranscriptSegment, MediaMetadata } from './types.js';
10
+ /**
11
+ * Web検索クライアントインターフェース
12
+ * @requirement REQ-COLLECT-001
13
+ */
14
+ export interface IWebSearchClient {
15
+ search(query: SearchQuery, options?: WebSearchOptions): Promise<Result<SearchResult[], Error>>;
16
+ }
17
+ /**
18
+ * Webスクレイパーインターフェース
19
+ * @requirement REQ-COLLECT-002
20
+ */
21
+ export interface IWebScraper {
22
+ scrape(url: string, options?: ScrapingOptions): Promise<Result<ScrapingResult, Error>>;
23
+ }
24
+ /**
25
+ * フィードリーダーインターフェース
26
+ * @requirement REQ-COLLECT-004
27
+ */
28
+ export interface IFeedReader {
29
+ fetch(feedUrl: string): Promise<Result<FeedItem[], Error>>;
30
+ }
31
+ /**
32
+ * APIクライアントインターフェース
33
+ * @requirement REQ-COLLECT-005
34
+ */
35
+ export interface IAPIClient {
36
+ get<T>(endpoint: string, params?: Record<string, string>): Promise<Result<T, Error>>;
37
+ post<T>(endpoint: string, body: unknown): Promise<Result<T, Error>>;
38
+ }
39
+ /**
40
+ * YouTube文字起こしインターフェース
41
+ * @requirement REQ-COLLECT-003
42
+ */
43
+ export interface IYouTubeTranscript {
44
+ getTranscript(videoId: string): Promise<Result<TranscriptSegment[], Error>>;
45
+ }
46
+ /**
47
+ * メディア抽出インターフェース
48
+ * @requirement REQ-COLLECT-006
49
+ */
50
+ export interface IMediaExtractor {
51
+ extractMetadata(url: string): Promise<Result<MediaMetadata, Error>>;
52
+ }
53
+ //# sourceMappingURL=interfaces.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interfaces.d.ts","sourceRoot":"","sources":["../src/interfaces.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAClF,OAAO,KAAK,EACV,gBAAgB,EAChB,eAAe,EACf,cAAc,EACd,QAAQ,EACR,iBAAiB,EACjB,aAAa,EACd,MAAM,YAAY,CAAC;AAEpB;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,MAAM,CACJ,KAAK,EAAE,WAAW,EAClB,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC;CAC3C;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC,CAAC;CACxF;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B,KAAK,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC;CAC5D;AAED;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB,GAAG,CAAC,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;IACrF,IAAI,CAAC,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;CACrE;AAED;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IACjC,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,iBAAiB,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC;CAC7E;AAED;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC9B,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC,CAAC;CACrE"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Collectorインターフェース定義
3
+ *
4
+ * @requirement REQ-COLLECT-001 ~ REQ-COLLECT-009
5
+ * @design DES-KATASHIRO-001 §2.2 Collector Container
6
+ * @task TSK-010 ~ TSK-015
7
+ */
8
+ export {};
9
+ //# sourceMappingURL=interfaces.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interfaces.js","sourceRoot":"","sources":["../src/interfaces.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Media module exports
3
+ */
4
+ export { MediaExtractor, type ExtractedMedia } from './media-extractor.js';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/media/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,cAAc,EAAE,KAAK,cAAc,EAAE,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Media module exports
3
+ */
4
+ export { MediaExtractor } from './media-extractor.js';
5
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/media/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,cAAc,EAAuB,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,74 @@
1
+ /**
2
+ * MediaExtractor - メディア抽出・メタデータ取得
3
+ *
4
+ * @requirement REQ-COLLECT-006
5
+ * @design DES-KATASHIRO-001 §2.2 Collector Container
6
+ * @task TSK-015
7
+ */
8
+ import { type Result } from '@nahisaho/katashiro-core';
9
+ import type { IMediaExtractor, MediaMetadata } from '../index.js';
10
+ type MediaType = 'image' | 'video' | 'audio';
11
+ /**
12
+ * HTMLから抽出されたメディア
13
+ */
14
+ export interface ExtractedMedia {
15
+ readonly images: string[];
16
+ readonly videos: string[];
17
+ readonly audio: string[];
18
+ }
19
+ /**
20
+ * メディア抽出・メタデータ取得実装
21
+ */
22
+ export declare class MediaExtractor implements IMediaExtractor {
23
+ private readonly userAgent;
24
+ /**
25
+ * URLからメディアメタデータを抽出
26
+ */
27
+ extractMetadata(url: string): Promise<Result<MediaMetadata, Error>>;
28
+ /**
29
+ * GETリクエストでメタデータを抽出(HEADが失敗した場合)
30
+ */
31
+ private extractMetadataWithGet;
32
+ /**
33
+ * レスポンスからメタデータをパース
34
+ */
35
+ private parseMetadataFromResponse;
36
+ /**
37
+ * Content-Typeからメディアタイプを検出
38
+ */
39
+ detectMediaType(contentType: string): MediaType | null;
40
+ /**
41
+ * URLからメディアタイプを検出
42
+ */
43
+ private detectMediaTypeFromUrl;
44
+ /**
45
+ * Content-Typeからフォーマットを抽出
46
+ */
47
+ extractFormat(contentType: string): string | null;
48
+ /**
49
+ * URLから拡張子を抽出
50
+ */
51
+ extractFormatFromUrl(url: string): string | null;
52
+ /**
53
+ * HTMLからメディアURLを抽出
54
+ */
55
+ extractFromHtml(html: string, baseUrl: string): ExtractedMedia;
56
+ /**
57
+ * HTMLから画像URLを抽出
58
+ */
59
+ private extractImages;
60
+ /**
61
+ * HTMLから動画URLを抽出
62
+ */
63
+ private extractVideos;
64
+ /**
65
+ * HTMLから音声URLを抽出
66
+ */
67
+ private extractAudio;
68
+ /**
69
+ * 相対URLを絶対URLに変換
70
+ */
71
+ private resolveUrl;
72
+ }
73
+ export {};
74
+ //# sourceMappingURL=media-extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"media-extractor.d.ts","sourceRoot":"","sources":["../../src/media/media-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,KAAK,MAAM,EAMZ,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAElE,KAAK,SAAS,GAAG,OAAO,GAAG,OAAO,GAAG,OAAO,CAAC;AAE7C;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAC1B,QAAQ,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED;;GAEG;AACH,qBAAa,cAAe,YAAW,eAAe;IACpD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA+C;IAEzE;;OAEG;IACG,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;IA0BzE;;OAEG;YACW,sBAAsB;IAsBpC;;OAEG;IACH,OAAO,CAAC,yBAAyB;IA4BjC;;OAEG;IACH,eAAe,CAAC,WAAW,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI;IAgBtD;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAe9B;;OAEG;IACH,aAAa,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAKjD;;OAEG;IACH,oBAAoB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAUhD;;OAEG;IACH,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,cAAc;IAQ9D;;OAEG;IACH,OAAO,CAAC,aAAa;IAkErB;;OAEG;IACH,OAAO,CAAC,aAAa;IAgCrB;;OAEG;IACH,OAAO,CAAC,YAAY;IAgCpB;;OAEG;IACH,OAAO,CAAC,UAAU;CAUnB"}
@@ -0,0 +1,287 @@
1
+ /**
2
+ * MediaExtractor - メディア抽出・メタデータ取得
3
+ *
4
+ * @requirement REQ-COLLECT-006
5
+ * @design DES-KATASHIRO-001 §2.2 Collector Container
6
+ * @task TSK-015
7
+ */
8
+ import { ok, err, formatTimestamp, validateUrl, isErr, } from '@nahisaho/katashiro-core';
9
+ /**
10
+ * メディア抽出・メタデータ取得実装
11
+ */
12
+ export class MediaExtractor {
13
+ userAgent = 'Mozilla/5.0 (compatible; KATASHIRO/0.1.0)';
14
+ /**
15
+ * URLからメディアメタデータを抽出
16
+ */
17
+ async extractMetadata(url) {
18
+ const urlValidation = validateUrl(url);
19
+ if (isErr(urlValidation)) {
20
+ return err(new Error(`Invalid URL: ${urlValidation.error}`));
21
+ }
22
+ try {
23
+ const response = await fetch(url, {
24
+ method: 'HEAD',
25
+ headers: {
26
+ 'User-Agent': this.userAgent,
27
+ },
28
+ });
29
+ if (!response.ok) {
30
+ // Try GET if HEAD fails
31
+ return this.extractMetadataWithGet(url);
32
+ }
33
+ return this.parseMetadataFromResponse(url, response);
34
+ }
35
+ catch (error) {
36
+ const message = error instanceof Error ? error.message : 'Unknown error';
37
+ return err(new Error(`Metadata extraction error: ${message}`));
38
+ }
39
+ }
40
+ /**
41
+ * GETリクエストでメタデータを抽出(HEADが失敗した場合)
42
+ */
43
+ async extractMetadataWithGet(url) {
44
+ try {
45
+ const response = await fetch(url, {
46
+ headers: {
47
+ 'User-Agent': this.userAgent,
48
+ Range: 'bytes=0-0', // Minimize download
49
+ },
50
+ });
51
+ if (!response.ok && response.status !== 206) {
52
+ return err(new Error(`HTTP error: ${response.status}`));
53
+ }
54
+ return this.parseMetadataFromResponse(url, response);
55
+ }
56
+ catch (error) {
57
+ const message = error instanceof Error ? error.message : 'Unknown error';
58
+ return err(new Error(`Metadata extraction error: ${message}`));
59
+ }
60
+ }
61
+ /**
62
+ * レスポンスからメタデータをパース
63
+ */
64
+ parseMetadataFromResponse(url, response) {
65
+ const contentType = response.headers.get('content-type') ?? '';
66
+ const contentLength = response.headers.get('content-length');
67
+ const mediaType = this.detectMediaType(contentType);
68
+ if (!mediaType) {
69
+ // Try to detect from URL
70
+ const urlType = this.detectMediaTypeFromUrl(url);
71
+ if (!urlType) {
72
+ return err(new Error('Unable to determine media type'));
73
+ }
74
+ }
75
+ const format = this.extractFormat(contentType) ?? this.extractFormatFromUrl(url);
76
+ return ok({
77
+ url,
78
+ type: mediaType ?? this.detectMediaTypeFromUrl(url) ?? 'image',
79
+ format: format ?? undefined,
80
+ size: contentLength ? parseInt(contentLength, 10) : undefined,
81
+ fetchedAt: formatTimestamp(),
82
+ });
83
+ }
84
+ /**
85
+ * Content-Typeからメディアタイプを検出
86
+ */
87
+ detectMediaType(contentType) {
88
+ const lower = contentType.toLowerCase();
89
+ if (lower.startsWith('image/')) {
90
+ return 'image';
91
+ }
92
+ if (lower.startsWith('video/')) {
93
+ return 'video';
94
+ }
95
+ if (lower.startsWith('audio/')) {
96
+ return 'audio';
97
+ }
98
+ return null;
99
+ }
100
+ /**
101
+ * URLからメディアタイプを検出
102
+ */
103
+ detectMediaTypeFromUrl(url) {
104
+ const ext = this.extractFormatFromUrl(url)?.toLowerCase();
105
+ if (!ext)
106
+ return null;
107
+ const imageExtensions = ['jpg', 'jpeg', 'png', 'gif', 'webp', 'svg', 'bmp', 'ico'];
108
+ const videoExtensions = ['mp4', 'webm', 'mov', 'avi', 'mkv', 'flv', 'wmv'];
109
+ const audioExtensions = ['mp3', 'wav', 'ogg', 'flac', 'aac', 'm4a', 'wma'];
110
+ if (imageExtensions.includes(ext))
111
+ return 'image';
112
+ if (videoExtensions.includes(ext))
113
+ return 'video';
114
+ if (audioExtensions.includes(ext))
115
+ return 'audio';
116
+ return null;
117
+ }
118
+ /**
119
+ * Content-Typeからフォーマットを抽出
120
+ */
121
+ extractFormat(contentType) {
122
+ const match = contentType.match(/^(?:image|video|audio)\/([^;]+)/);
123
+ return match?.[1] ?? null;
124
+ }
125
+ /**
126
+ * URLから拡張子を抽出
127
+ */
128
+ extractFormatFromUrl(url) {
129
+ try {
130
+ const pathname = new URL(url).pathname;
131
+ const match = pathname.match(/\.([a-zA-Z0-9]+)$/);
132
+ return match?.[1] ?? null;
133
+ }
134
+ catch {
135
+ return null;
136
+ }
137
+ }
138
+ /**
139
+ * HTMLからメディアURLを抽出
140
+ */
141
+ extractFromHtml(html, baseUrl) {
142
+ const images = this.extractImages(html, baseUrl);
143
+ const videos = this.extractVideos(html, baseUrl);
144
+ const audio = this.extractAudio(html, baseUrl);
145
+ return { images, videos, audio };
146
+ }
147
+ /**
148
+ * HTMLから画像URLを抽出
149
+ */
150
+ extractImages(html, baseUrl) {
151
+ const images = [];
152
+ const seen = new Set();
153
+ // img src
154
+ const imgSrcRegex = /<img[^>]+src=["']([^"']+)["']/gi;
155
+ let match;
156
+ while ((match = imgSrcRegex.exec(html)) !== null) {
157
+ if (match[1]) {
158
+ const url = this.resolveUrl(match[1], baseUrl);
159
+ if (!seen.has(url)) {
160
+ seen.add(url);
161
+ images.push(url);
162
+ }
163
+ }
164
+ }
165
+ // img srcset
166
+ const srcsetRegex = /<img[^>]+srcset=["']([^"']+)["']/gi;
167
+ while ((match = srcsetRegex.exec(html)) !== null) {
168
+ if (match[1]) {
169
+ const srcset = match[1];
170
+ const urls = srcset.split(',').map((s) => s.trim().split(/\s+/)[0]);
171
+ for (const src of urls) {
172
+ if (src) {
173
+ const url = this.resolveUrl(src, baseUrl);
174
+ if (!seen.has(url)) {
175
+ seen.add(url);
176
+ images.push(url);
177
+ }
178
+ }
179
+ }
180
+ }
181
+ }
182
+ // picture source
183
+ const sourceRegex = /<source[^>]+srcset=["']([^"']+)["']/gi;
184
+ while ((match = sourceRegex.exec(html)) !== null) {
185
+ if (match[1]) {
186
+ const srcsetParts = match[1].split(',')[0]?.trim().split(/\s+/);
187
+ const src = srcsetParts?.[0];
188
+ if (src) {
189
+ const url = this.resolveUrl(src, baseUrl);
190
+ if (!seen.has(url)) {
191
+ seen.add(url);
192
+ images.push(url);
193
+ }
194
+ }
195
+ }
196
+ }
197
+ // background-image in style
198
+ const bgRegex = /background(?:-image)?:\s*url\(['"]?([^'")\s]+)['"]?\)/gi;
199
+ while ((match = bgRegex.exec(html)) !== null) {
200
+ if (match[1]) {
201
+ const url = this.resolveUrl(match[1], baseUrl);
202
+ if (!seen.has(url)) {
203
+ seen.add(url);
204
+ images.push(url);
205
+ }
206
+ }
207
+ }
208
+ return images;
209
+ }
210
+ /**
211
+ * HTMLから動画URLを抽出
212
+ */
213
+ extractVideos(html, baseUrl) {
214
+ const videos = [];
215
+ const seen = new Set();
216
+ // video src
217
+ const videoSrcRegex = /<video[^>]+src=["']([^"']+)["']/gi;
218
+ let match;
219
+ while ((match = videoSrcRegex.exec(html)) !== null) {
220
+ if (match[1]) {
221
+ const url = this.resolveUrl(match[1], baseUrl);
222
+ if (!seen.has(url)) {
223
+ seen.add(url);
224
+ videos.push(url);
225
+ }
226
+ }
227
+ }
228
+ // source inside video
229
+ const videoSourceRegex = /<video[^>]*>[\s\S]*?<source[^>]+src=["']([^"']+)["']/gi;
230
+ while ((match = videoSourceRegex.exec(html)) !== null) {
231
+ if (match[1]) {
232
+ const url = this.resolveUrl(match[1], baseUrl);
233
+ if (!seen.has(url)) {
234
+ seen.add(url);
235
+ videos.push(url);
236
+ }
237
+ }
238
+ }
239
+ return videos;
240
+ }
241
+ /**
242
+ * HTMLから音声URLを抽出
243
+ */
244
+ extractAudio(html, baseUrl) {
245
+ const audio = [];
246
+ const seen = new Set();
247
+ // audio src
248
+ const audioSrcRegex = /<audio[^>]+src=["']([^"']+)["']/gi;
249
+ let match;
250
+ while ((match = audioSrcRegex.exec(html)) !== null) {
251
+ if (match[1]) {
252
+ const url = this.resolveUrl(match[1], baseUrl);
253
+ if (!seen.has(url)) {
254
+ seen.add(url);
255
+ audio.push(url);
256
+ }
257
+ }
258
+ }
259
+ // source inside audio
260
+ const audioSourceRegex = /<audio[^>]*>[\s\S]*?<source[^>]+src=["']([^"']+)["']/gi;
261
+ while ((match = audioSourceRegex.exec(html)) !== null) {
262
+ if (match[1]) {
263
+ const url = this.resolveUrl(match[1], baseUrl);
264
+ if (!seen.has(url)) {
265
+ seen.add(url);
266
+ audio.push(url);
267
+ }
268
+ }
269
+ }
270
+ return audio;
271
+ }
272
+ /**
273
+ * 相対URLを絶対URLに変換
274
+ */
275
+ resolveUrl(url, baseUrl) {
276
+ if (url.startsWith('http://') || url.startsWith('https://')) {
277
+ return url;
278
+ }
279
+ try {
280
+ return new URL(url, baseUrl).href;
281
+ }
282
+ catch {
283
+ return url;
284
+ }
285
+ }
286
+ }
287
+ //# sourceMappingURL=media-extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"media-extractor.js","sourceRoot":"","sources":["../../src/media/media-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAEL,EAAE,EACF,GAAG,EACH,eAAe,EACf,WAAW,EACX,KAAK,GACN,MAAM,0BAA0B,CAAC;AAclC;;GAEG;AACH,MAAM,OAAO,cAAc;IACR,SAAS,GAAG,2CAA2C,CAAC;IAEzE;;OAEG;IACH,KAAK,CAAC,eAAe,CAAC,GAAW;QAC/B,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;QACvC,IAAI,KAAK,CAAC,aAAa,CAAC,EAAE,CAAC;YACzB,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,gBAAgB,aAAa,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAC/D,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE;oBACP,YAAY,EAAE,IAAI,CAAC,SAAS;iBAC7B;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,wBAAwB;gBACxB,OAAO,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;YAC1C,CAAC;YAED,OAAO,IAAI,CAAC,yBAAyB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QACvD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YACzE,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,8BAA8B,OAAO,EAAE,CAAC,CAAC,CAAC;QACjE,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,sBAAsB,CAClC,GAAW;QAEX,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO,EAAE;oBACP,YAAY,EAAE,IAAI,CAAC,SAAS;oBAC5B,KAAK,EAAE,WAAW,EAAE,oBAAoB;iBACzC;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;gBAC5C,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,eAAe,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;YAC1D,CAAC;YAED,OAAO,IAAI,CAAC,yBAAyB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QACvD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YACzE,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,8BAA8B,OAAO,EAAE,CAAC,CAAC,CAAC;QACjE,CAAC;IACH,CAAC;IAED;;OAEG;IACK,yBAAyB,CAC/B,GAAW,EACX,QAAkB;QAElB,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QAC/D,MAAM,aAAa,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;QAE7D,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,WAAW,CAAC,CAAC;QACpD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,yBAAyB;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;YACjD,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC,CAAC;YAC1D,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GACV,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC;QAEpE,OAAO,EAAE,CAAC;YACR,GAAG;YACH,IAAI,EAAE,SAAS,IAAI,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,IAAI,OAAO;YAC9D,MAAM,EAAE,MAAM,IAAI,SAAS;YAC3B,IAAI,EAAE,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;YAC7D,SAAS,EAAE,eAAe,EAAE;SAC7B,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,WAAmB;QACjC,MAAM,KAAK,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;QAExC,IAAI,KAAK,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC/B,OAAO,OAAO,CAAC;QACjB,CAAC;QACD,IAAI,KAAK,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC/B,OAAO,OAAO,CAAC;QACjB,CAAC;QACD,IAAI,KAAK,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC/B,OAAO,OAAO,CAAC;QACjB,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACK,sBAAsB,CAAC,GAAW;QACxC,MAAM,GAAG,GAAG,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,EAAE,WAAW,EAAE,CAAC;QAC1D,IAAI,CAAC,GAAG;YAAE,OAAO,IAAI,CAAC;QAEtB,MAAM,eAAe,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;QACnF,MAAM,eAAe,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;QAC3E,MAAM,eAAe,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;QAE3E,IAAI,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,OAAO,CAAC;QAClD,IAAI,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,OAAO,CAAC;QAClD,IAAI,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,OAAO,CAAC;QAElD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,WAAmB;QAC/B,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACnE,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,GAAW;QAC9B,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;YAClD,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,IAAY,EAAE,OAAe;QAC3C,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACjD,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACjD,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAE/C,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IACnC,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,IAAY,EAAE,OAAe;QACjD,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,UAAU;QACV,MAAM,WAAW,GAAG,iCAAiC,CAAC;QACtD,IAAI,KAAK,CAAC;QACV,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACjD,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBACb,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBAC/C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;oBACd,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACnB,CAAC;YACH,CAAC;QACH,CAAC;QAED,aAAa;QACb,MAAM,WAAW,GAAG,oCAAoC,CAAC;QACzD,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACjD,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBACb,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACxB,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACpE,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;oBACvB,IAAI,GAAG,EAAE,CAAC;wBACR,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;wBAC1C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;4BACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;4BACd,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;wBACnB,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,iBAAiB;QACjB,MAAM,WAAW,GAAG,uCAAuC,CAAC;QAC5D,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACjD,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBACb,MAAM,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;gBAChE,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC;gBAC7B,IAAI,GAAG,EAAE,CAAC;oBACR,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;oBAC1C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;wBACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;wBACd,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;oBACnB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,4BAA4B;QAC5B,MAAM,OAAO,GAAG,yDAAyD,CAAC;QAC1E,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC7C,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBACb,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBAC/C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;oBACd,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACnB,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,IAAY,EAAE,OAAe;QACjD,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,YAAY;QACZ,MAAM,aAAa,GAAG,mCAAmC,CAAC;QAC1D,IAAI,KAAK,CAAC;QACV,OAAO,CAAC,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACnD,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBACb,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBAC/C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;oBACd,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACnB,CAAC;YACH,CAAC;QACH,CAAC;QAED,sBAAsB;QACtB,MAAM,gBAAgB,GAAG,wDAAwD,CAAC;QAClF,OAAO,CAAC,KAAK,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACtD,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBACb,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBAC/C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;oBACd,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACnB,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAY,EAAE,OAAe;QAChD,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,YAAY;QACZ,MAAM,aAAa,GAAG,mCAAmC,CAAC;QAC1D,IAAI,KAAK,CAAC;QACV,OAAO,CAAC,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACnD,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBACb,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBAC/C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;oBACd,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAClB,CAAC;YACH,CAAC;QACH,CAAC;QAED,sBAAsB;QACtB,MAAM,gBAAgB,GAAG,wDAAwD,CAAC;QAClF,OAAO,CAAC,KAAK,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACtD,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBACb,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;gBAC/C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;oBACd,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAClB,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,GAAW,EAAE,OAAe;QAC7C,IAAI,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC5D,OAAO,GAAG,CAAC;QACb,CAAC;QACD,IAAI,CAAC;YACH,OAAO,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;QACpC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,CAAC;QACb,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Scraper module exports
3
+ */
4
+ export { WebScraper } from './web-scraper.js';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/scraper/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Scraper module exports
3
+ */
4
+ export { WebScraper } from './web-scraper.js';
5
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/scraper/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC"}
@@ -0,0 +1,48 @@
1
+ /**
2
+ * WebScraper - Webスクレイパー
3
+ *
4
+ * @requirement REQ-COLLECT-002
5
+ * @design DES-KATASHIRO-001 §2.2 Collector Container
6
+ * @task TSK-011
7
+ */
8
+ import { type Result } from '@nahisaho/katashiro-core';
9
+ import type { IWebScraper, ScrapingOptions, ScrapingResult } from '../index.js';
10
+ /**
11
+ * Webスクレイパー実装
12
+ * Note: 本番環境ではPlaywrightを使用推奨
13
+ */
14
+ export declare class WebScraper implements IWebScraper {
15
+ /**
16
+ * URLからコンテンツをスクレイピング
17
+ */
18
+ scrape(url: string, options?: ScrapingOptions): Promise<Result<ScrapingResult, Error>>;
19
+ /**
20
+ * ページを取得
21
+ */
22
+ private fetchPage;
23
+ /**
24
+ * HTMLをパース
25
+ */
26
+ private parseHtml;
27
+ /**
28
+ * タイトルを抽出
29
+ */
30
+ private extractTitle;
31
+ /**
32
+ * コンテンツを抽出(スクリプトとスタイルを除去)
33
+ */
34
+ private extractContent;
35
+ /**
36
+ * 画像URLを抽出
37
+ */
38
+ private extractImages;
39
+ /**
40
+ * リンクを抽出
41
+ */
42
+ private extractLinks;
43
+ /**
44
+ * 相対URLを絶対URLに変換
45
+ */
46
+ private resolveUrl;
47
+ }
48
+ //# sourceMappingURL=web-scraper.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"web-scraper.d.ts","sourceRoot":"","sources":["../../src/scraper/web-scraper.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,KAAK,MAAM,EAMZ,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAEhF;;;GAGG;AACH,qBAAa,UAAW,YAAW,WAAW;IAC5C;;OAEG;IACG,MAAM,CACV,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE,eAAe,GACxB,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;IAiBzC;;OAEG;YACW,SAAS;IAqBvB;;OAEG;IACH,OAAO,CAAC,SAAS;IAqBjB;;OAEG;IACH,OAAO,CAAC,YAAY;IAKpB;;OAEG;IACH,OAAO,CAAC,cAAc;IAqBtB;;OAEG;IACH,OAAO,CAAC,aAAa;IAgBrB;;OAEG;IACH,OAAO,CAAC,YAAY;IAgBpB;;OAEG;IACH,OAAO,CAAC,UAAU;CAUnB"}