@nahisaho/katashiro-collector 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/ActionExecutor.d.ts +85 -0
- package/dist/browser/ActionExecutor.d.ts.map +1 -0
- package/dist/browser/ActionExecutor.js +171 -0
- package/dist/browser/ActionExecutor.js.map +1 -0
- package/dist/browser/BrowserAutomation.d.ts +147 -0
- package/dist/browser/BrowserAutomation.d.ts.map +1 -0
- package/dist/browser/BrowserAutomation.js +463 -0
- package/dist/browser/BrowserAutomation.js.map +1 -0
- package/dist/browser/ContentExtractor.d.ts +54 -0
- package/dist/browser/ContentExtractor.d.ts.map +1 -0
- package/dist/browser/ContentExtractor.js +159 -0
- package/dist/browser/ContentExtractor.js.map +1 -0
- package/dist/browser/SessionManager.d.ts +67 -0
- package/dist/browser/SessionManager.d.ts.map +1 -0
- package/dist/browser/SessionManager.js +173 -0
- package/dist/browser/SessionManager.js.map +1 -0
- package/dist/browser/index.d.ts +17 -0
- package/dist/browser/index.d.ts.map +1 -0
- package/dist/browser/index.js +17 -0
- package/dist/browser/index.js.map +1 -0
- package/dist/browser/types.d.ts +361 -0
- package/dist/browser/types.d.ts.map +1 -0
- package/dist/browser/types.js +23 -0
- package/dist/browser/types.js.map +1 -0
- package/dist/document/DocumentParser.d.ts +91 -0
- package/dist/document/DocumentParser.d.ts.map +1 -0
- package/dist/document/DocumentParser.js +234 -0
- package/dist/document/DocumentParser.js.map +1 -0
- package/dist/document/index.d.ts +11 -0
- package/dist/document/index.d.ts.map +1 -0
- package/dist/document/index.js +10 -0
- package/dist/document/index.js.map +1 -0
- package/dist/document/parsers/DOCXParser.d.ts +63 -0
- package/dist/document/parsers/DOCXParser.d.ts.map +1 -0
- package/dist/document/parsers/DOCXParser.js +362 -0
- package/dist/document/parsers/DOCXParser.js.map +1 -0
- package/dist/document/parsers/PDFParser.d.ts +60 -0
- package/dist/document/parsers/PDFParser.d.ts.map +1 -0
- package/dist/document/parsers/PDFParser.js +338 -0
- package/dist/document/parsers/PDFParser.js.map +1 -0
- package/dist/document/parsers/XLSXParser.d.ts +55 -0
- package/dist/document/parsers/XLSXParser.d.ts.map +1 -0
- package/dist/document/parsers/XLSXParser.js +314 -0
- package/dist/document/parsers/XLSXParser.js.map +1 -0
- package/dist/document/parsers/index.d.ts +10 -0
- package/dist/document/parsers/index.d.ts.map +1 -0
- package/dist/document/parsers/index.js +10 -0
- package/dist/document/parsers/index.js.map +1 -0
- package/dist/document/types.d.ts +251 -0
- package/dist/document/types.d.ts.map +1 -0
- package/dist/document/types.js +13 -0
- package/dist/document/types.js.map +1 -0
- package/dist/index.d.ts +7 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +14 -2
- package/dist/index.js.map +1 -1
- package/dist/research/CoverageAnalyzer.d.ts +50 -0
- package/dist/research/CoverageAnalyzer.d.ts.map +1 -0
- package/dist/research/CoverageAnalyzer.js +169 -0
- package/dist/research/CoverageAnalyzer.js.map +1 -0
- package/dist/research/QueryPlanner.d.ts +57 -0
- package/dist/research/QueryPlanner.d.ts.map +1 -0
- package/dist/research/QueryPlanner.js +102 -0
- package/dist/research/QueryPlanner.js.map +1 -0
- package/dist/research/ResultAggregator.d.ts +39 -0
- package/dist/research/ResultAggregator.d.ts.map +1 -0
- package/dist/research/ResultAggregator.js +85 -0
- package/dist/research/ResultAggregator.js.map +1 -0
- package/dist/research/WideResearchEngine.d.ts +110 -0
- package/dist/research/WideResearchEngine.d.ts.map +1 -0
- package/dist/research/WideResearchEngine.js +330 -0
- package/dist/research/WideResearchEngine.js.map +1 -0
- package/dist/research/agents/AcademicSearchAgent.d.ts +57 -0
- package/dist/research/agents/AcademicSearchAgent.d.ts.map +1 -0
- package/dist/research/agents/AcademicSearchAgent.js +180 -0
- package/dist/research/agents/AcademicSearchAgent.js.map +1 -0
- package/dist/research/agents/EncyclopediaAgent.d.ts +49 -0
- package/dist/research/agents/EncyclopediaAgent.d.ts.map +1 -0
- package/dist/research/agents/EncyclopediaAgent.js +153 -0
- package/dist/research/agents/EncyclopediaAgent.js.map +1 -0
- package/dist/research/agents/NewsSearchAgent.d.ts +38 -0
- package/dist/research/agents/NewsSearchAgent.d.ts.map +1 -0
- package/dist/research/agents/NewsSearchAgent.js +146 -0
- package/dist/research/agents/NewsSearchAgent.js.map +1 -0
- package/dist/research/agents/WebSearchAgent.d.ts +45 -0
- package/dist/research/agents/WebSearchAgent.d.ts.map +1 -0
- package/dist/research/agents/WebSearchAgent.js +135 -0
- package/dist/research/agents/WebSearchAgent.js.map +1 -0
- package/dist/research/agents/index.d.ts +13 -0
- package/dist/research/agents/index.d.ts.map +1 -0
- package/dist/research/agents/index.js +12 -0
- package/dist/research/agents/index.js.map +1 -0
- package/dist/research/agents/types.d.ts +60 -0
- package/dist/research/agents/types.d.ts.map +1 -0
- package/dist/research/agents/types.js +9 -0
- package/dist/research/agents/types.js.map +1 -0
- package/dist/research/index.d.ts +16 -0
- package/dist/research/index.d.ts.map +1 -0
- package/dist/research/index.js +17 -0
- package/dist/research/index.js.map +1 -0
- package/dist/research/types.d.ts +206 -0
- package/dist/research/types.d.ts.map +1 -0
- package/dist/research/types.js +33 -0
- package/dist/research/types.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser Automation 型定義
|
|
3
|
+
*
|
|
4
|
+
* @requirement REQ-COLLECT-009
|
|
5
|
+
* @design DES-COLLECT-009-BrowserAutomation
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* ビューポート設定
|
|
9
|
+
*/
|
|
10
|
+
export interface Viewport {
|
|
11
|
+
width: number;
|
|
12
|
+
height: number;
|
|
13
|
+
deviceScaleFactor?: number;
|
|
14
|
+
isMobile?: boolean;
|
|
15
|
+
hasTouch?: boolean;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* プロキシ設定
|
|
19
|
+
*/
|
|
20
|
+
export interface ProxyConfig {
|
|
21
|
+
server: string;
|
|
22
|
+
username?: string;
|
|
23
|
+
password?: string;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* リソース制限
|
|
27
|
+
*/
|
|
28
|
+
export interface ResourceLimits {
|
|
29
|
+
/** 最大メモリ使用量(MB) */
|
|
30
|
+
maxMemory?: number;
|
|
31
|
+
/** 最大同時ページ数 */
|
|
32
|
+
maxPages?: number;
|
|
33
|
+
/** 最大実行時間(ミリ秒) */
|
|
34
|
+
maxExecutionTime?: number;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* ブラウザ設定
|
|
38
|
+
*/
|
|
39
|
+
export interface BrowserConfig {
|
|
40
|
+
/** ヘッドレスモード */
|
|
41
|
+
headless?: boolean;
|
|
42
|
+
/** ビューポートサイズ */
|
|
43
|
+
viewport?: Viewport;
|
|
44
|
+
/** ユーザーエージェント */
|
|
45
|
+
userAgent?: string;
|
|
46
|
+
/** プロキシ設定 */
|
|
47
|
+
proxy?: ProxyConfig;
|
|
48
|
+
/** タイムアウト(ミリ秒) */
|
|
49
|
+
timeout?: number;
|
|
50
|
+
/** 言語設定 */
|
|
51
|
+
locale?: string;
|
|
52
|
+
/** タイムゾーン */
|
|
53
|
+
timezone?: string;
|
|
54
|
+
/** JavaScript無効化 */
|
|
55
|
+
disableJavaScript?: boolean;
|
|
56
|
+
/** 画像読み込み無効化 */
|
|
57
|
+
disableImages?: boolean;
|
|
58
|
+
/** リソース制限 */
|
|
59
|
+
resourceLimits?: ResourceLimits;
|
|
60
|
+
/** 追加の起動引数 */
|
|
61
|
+
args?: string[];
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* 待機条件
|
|
65
|
+
*/
|
|
66
|
+
export type WaitUntilOption = 'load' | 'domcontentloaded' | 'networkidle0' | 'networkidle2';
|
|
67
|
+
/**
|
|
68
|
+
* ナビゲーションオプション
|
|
69
|
+
*/
|
|
70
|
+
export interface NavigationOptions {
|
|
71
|
+
/** 待機条件 */
|
|
72
|
+
waitUntil?: WaitUntilOption | WaitUntilOption[];
|
|
73
|
+
/** タイムアウト(ミリ秒) */
|
|
74
|
+
timeout?: number;
|
|
75
|
+
/** リファラー */
|
|
76
|
+
referer?: string;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* クリックオプション
|
|
80
|
+
*/
|
|
81
|
+
export interface ClickOptions {
|
|
82
|
+
button?: 'left' | 'right' | 'middle';
|
|
83
|
+
clickCount?: number;
|
|
84
|
+
delay?: number;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* タイプオプション
|
|
88
|
+
*/
|
|
89
|
+
export interface TypeOptions {
|
|
90
|
+
delay?: number;
|
|
91
|
+
clear?: boolean;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* スクロールオプション
|
|
95
|
+
*/
|
|
96
|
+
export interface ScrollOptions {
|
|
97
|
+
behavior?: 'auto' | 'smooth';
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* セレクタ待機オプション
|
|
101
|
+
*/
|
|
102
|
+
export interface WaitForSelectorOptions {
|
|
103
|
+
visible?: boolean;
|
|
104
|
+
hidden?: boolean;
|
|
105
|
+
timeout?: number;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* スクリーンショットオプション
|
|
109
|
+
*/
|
|
110
|
+
export interface ScreenshotOptions {
|
|
111
|
+
path?: string;
|
|
112
|
+
fullPage?: boolean;
|
|
113
|
+
clip?: {
|
|
114
|
+
x: number;
|
|
115
|
+
y: number;
|
|
116
|
+
width: number;
|
|
117
|
+
height: number;
|
|
118
|
+
};
|
|
119
|
+
type?: 'png' | 'jpeg';
|
|
120
|
+
quality?: number;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* PDFオプション
|
|
124
|
+
*/
|
|
125
|
+
export interface PdfOptions {
|
|
126
|
+
path?: string;
|
|
127
|
+
format?: 'A4' | 'Letter' | 'Legal';
|
|
128
|
+
landscape?: boolean;
|
|
129
|
+
margin?: {
|
|
130
|
+
top?: string;
|
|
131
|
+
right?: string;
|
|
132
|
+
bottom?: string;
|
|
133
|
+
left?: string;
|
|
134
|
+
};
|
|
135
|
+
printBackground?: boolean;
|
|
136
|
+
}
|
|
137
|
+
export interface NavigateAction {
|
|
138
|
+
type: 'navigate';
|
|
139
|
+
url: string;
|
|
140
|
+
options?: NavigationOptions;
|
|
141
|
+
}
|
|
142
|
+
export interface ClickAction {
|
|
143
|
+
type: 'click';
|
|
144
|
+
selector: string;
|
|
145
|
+
options?: ClickOptions;
|
|
146
|
+
}
|
|
147
|
+
export interface TypeAction {
|
|
148
|
+
type: 'type';
|
|
149
|
+
selector: string;
|
|
150
|
+
text: string;
|
|
151
|
+
options?: TypeOptions;
|
|
152
|
+
}
|
|
153
|
+
export interface WaitAction {
|
|
154
|
+
type: 'wait';
|
|
155
|
+
duration: number;
|
|
156
|
+
}
|
|
157
|
+
export interface ScrollAction {
|
|
158
|
+
type: 'scroll';
|
|
159
|
+
target?: string | {
|
|
160
|
+
x: number;
|
|
161
|
+
y: number;
|
|
162
|
+
};
|
|
163
|
+
options?: ScrollOptions;
|
|
164
|
+
}
|
|
165
|
+
export interface SelectAction {
|
|
166
|
+
type: 'select';
|
|
167
|
+
selector: string;
|
|
168
|
+
values: string[];
|
|
169
|
+
}
|
|
170
|
+
export interface HoverAction {
|
|
171
|
+
type: 'hover';
|
|
172
|
+
selector: string;
|
|
173
|
+
}
|
|
174
|
+
export interface ScreenshotAction {
|
|
175
|
+
type: 'screenshot';
|
|
176
|
+
options?: ScreenshotOptions;
|
|
177
|
+
}
|
|
178
|
+
export interface PdfAction {
|
|
179
|
+
type: 'pdf';
|
|
180
|
+
options?: PdfOptions;
|
|
181
|
+
}
|
|
182
|
+
export interface EvaluateAction {
|
|
183
|
+
type: 'evaluate';
|
|
184
|
+
script: string;
|
|
185
|
+
args?: unknown[];
|
|
186
|
+
}
|
|
187
|
+
export interface WaitForSelectorAction {
|
|
188
|
+
type: 'waitForSelector';
|
|
189
|
+
selector: string;
|
|
190
|
+
options?: WaitForSelectorOptions;
|
|
191
|
+
}
|
|
192
|
+
export interface ExtractAction {
|
|
193
|
+
type: 'extract';
|
|
194
|
+
selector: string;
|
|
195
|
+
attribute?: string;
|
|
196
|
+
multiple?: boolean;
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* ブラウザアクション
|
|
200
|
+
*/
|
|
201
|
+
export type BrowserAction = NavigateAction | ClickAction | TypeAction | WaitAction | ScrollAction | SelectAction | HoverAction | ScreenshotAction | PdfAction | EvaluateAction | WaitForSelectorAction | ExtractAction;
|
|
202
|
+
/**
|
|
203
|
+
* アクション結果
|
|
204
|
+
*/
|
|
205
|
+
export interface ActionResult {
|
|
206
|
+
/** アクションタイプ */
|
|
207
|
+
actionType: BrowserAction['type'];
|
|
208
|
+
/** 成功したか */
|
|
209
|
+
success: boolean;
|
|
210
|
+
/** 結果データ */
|
|
211
|
+
data?: unknown;
|
|
212
|
+
/** スクリーンショット(バイナリ) */
|
|
213
|
+
screenshot?: Buffer;
|
|
214
|
+
/** PDF(バイナリ) */
|
|
215
|
+
pdf?: Buffer;
|
|
216
|
+
/** 抽出されたテキスト */
|
|
217
|
+
extractedText?: string | string[];
|
|
218
|
+
/** エラーメッセージ */
|
|
219
|
+
error?: string;
|
|
220
|
+
/** 実行時間(ミリ秒) */
|
|
221
|
+
duration: number;
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* ページリンク
|
|
225
|
+
*/
|
|
226
|
+
export interface PageLink {
|
|
227
|
+
href: string;
|
|
228
|
+
text: string;
|
|
229
|
+
rel?: string;
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* ページ画像
|
|
233
|
+
*/
|
|
234
|
+
export interface PageImage {
|
|
235
|
+
src: string;
|
|
236
|
+
alt?: string;
|
|
237
|
+
width?: number;
|
|
238
|
+
height?: number;
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* ページメタデータ
|
|
242
|
+
*/
|
|
243
|
+
export interface PageMetadata {
|
|
244
|
+
/** メタディスクリプション */
|
|
245
|
+
description?: string;
|
|
246
|
+
/** キーワード */
|
|
247
|
+
keywords?: string[];
|
|
248
|
+
/** OGP情報 */
|
|
249
|
+
ogp?: Record<string, string>;
|
|
250
|
+
/** 最終更新日 */
|
|
251
|
+
lastModified?: string;
|
|
252
|
+
/** 言語 */
|
|
253
|
+
language?: string;
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* ページスクレイプ結果
|
|
257
|
+
*/
|
|
258
|
+
export interface PageScrapeResult {
|
|
259
|
+
/** URL */
|
|
260
|
+
url: string;
|
|
261
|
+
/** ページタイトル */
|
|
262
|
+
title: string;
|
|
263
|
+
/** メインコンテンツ */
|
|
264
|
+
content: string;
|
|
265
|
+
/** HTML全体 */
|
|
266
|
+
html: string;
|
|
267
|
+
/** 抽出されたデータ */
|
|
268
|
+
extractedData?: Record<string, unknown>;
|
|
269
|
+
/** リンク一覧 */
|
|
270
|
+
links: PageLink[];
|
|
271
|
+
/** 画像一覧 */
|
|
272
|
+
images: PageImage[];
|
|
273
|
+
/** メタデータ */
|
|
274
|
+
metadata: PageMetadata;
|
|
275
|
+
/** スクリーンショット */
|
|
276
|
+
screenshot?: Buffer;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Cookie
|
|
280
|
+
*/
|
|
281
|
+
export interface Cookie {
|
|
282
|
+
name: string;
|
|
283
|
+
value: string;
|
|
284
|
+
domain: string;
|
|
285
|
+
path: string;
|
|
286
|
+
expires?: number;
|
|
287
|
+
httpOnly?: boolean;
|
|
288
|
+
secure?: boolean;
|
|
289
|
+
sameSite?: 'Strict' | 'Lax' | 'None';
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* セッション情報
|
|
293
|
+
*/
|
|
294
|
+
export interface SessionInfo {
|
|
295
|
+
/** セッションID */
|
|
296
|
+
id: string;
|
|
297
|
+
/** Cookie一覧 */
|
|
298
|
+
cookies: Cookie[];
|
|
299
|
+
/** ローカルストレージ */
|
|
300
|
+
localStorage: Record<string, string>;
|
|
301
|
+
/** セッションストレージ */
|
|
302
|
+
sessionStorage: Record<string, string>;
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* 認証情報
|
|
306
|
+
*/
|
|
307
|
+
export interface AuthCredentials {
|
|
308
|
+
/** ユーザー名 */
|
|
309
|
+
username: string;
|
|
310
|
+
/** パスワード */
|
|
311
|
+
password: string;
|
|
312
|
+
/** 追加フィールド */
|
|
313
|
+
additionalFields?: Record<string, string>;
|
|
314
|
+
}
|
|
315
|
+
/**
|
|
316
|
+
* ログインセレクタ
|
|
317
|
+
*/
|
|
318
|
+
export interface LoginSelectors {
|
|
319
|
+
username: string;
|
|
320
|
+
password: string;
|
|
321
|
+
submit: string;
|
|
322
|
+
successIndicator?: string;
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* ブラウザスクリプト
|
|
326
|
+
*/
|
|
327
|
+
export interface BrowserScript {
|
|
328
|
+
/** スクリプト名 */
|
|
329
|
+
name: string;
|
|
330
|
+
/** 説明 */
|
|
331
|
+
description?: string;
|
|
332
|
+
/** アクション一覧 */
|
|
333
|
+
actions: BrowserAction[];
|
|
334
|
+
/** 変数 */
|
|
335
|
+
variables?: Record<string, string>;
|
|
336
|
+
}
|
|
337
|
+
/**
|
|
338
|
+
* 抽出器設定
|
|
339
|
+
*/
|
|
340
|
+
export interface ExtractorConfig {
|
|
341
|
+
name: string;
|
|
342
|
+
selector: string;
|
|
343
|
+
attribute?: string;
|
|
344
|
+
multiple?: boolean;
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* 抽出結果
|
|
348
|
+
*/
|
|
349
|
+
export interface ExtractionResult {
|
|
350
|
+
content: string;
|
|
351
|
+
html: string;
|
|
352
|
+
extractedData?: Record<string, unknown>;
|
|
353
|
+
links: PageLink[];
|
|
354
|
+
images: PageImage[];
|
|
355
|
+
metadata: PageMetadata;
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* デフォルトブラウザ設定
|
|
359
|
+
*/
|
|
360
|
+
export declare const DEFAULT_BROWSER_CONFIG: BrowserConfig;
|
|
361
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/browser/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,mBAAmB;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,eAAe;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,kBAAkB;IAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,eAAe;IACf,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,gBAAgB;IAChB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,iBAAiB;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,kBAAkB;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW;IACX,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,aAAa;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,oBAAoB;IACpB,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,gBAAgB;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,aAAa;IACb,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,cAAc;IACd,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,MAAM,eAAe,GACvB,MAAM,GACN,kBAAkB,GAClB,cAAc,GACd,cAAc,CAAC;AAEnB;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,WAAW;IACX,SAAS,CAAC,EAAE,eAAe,GAAG,eAAe,EAAE,CAAC;IAChD,kBAAkB;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY;IACZ,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,CAAC;IACrC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,EAAE,MAAM,GAAG,QAAQ,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,IAAI,CAAC,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAC/D,IAAI,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,IAAI,GAAG,QAAQ,GAAG,OAAO,CAAC;IACnC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE;QAAE,GAAG,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC1E,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAMD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,UAAU,CAAC;IACjB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,CAAC,EAAE,iBAAiB,CAAC;CAC7B;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,YAAY,CAAC;CACxB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,WAAW,CAAC;CACvB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,QAAQ,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,GAAG;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC3C,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,QAAQ,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,YAAY,CAAC;IACnB,OAAO,CAAC,EAAE,iBAAiB,CAAC;CAC7B;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,KAAK,CAAC;IACZ,OAAO,CAAC,EAAE,UAAU,CAAC;CACtB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,UAAU,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,OAAO,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,iBAAiB,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,sBAAsB,CAAC;CAClC;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,SAAS,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,MAAM,aAAa,GACrB,cAAc,GACd,WAAW,GACX,UAAU,GACV,UAAU,GACV,YAAY,GACZ,YAAY,GACZ,WAAW,GACX,gBAAgB,GAChB,SAAS,GACT,cAAc,GACd,qBAAqB,GACrB,aAAa,CAAC;AAElB;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,eAAe;IACf,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;IAClC,YAAY;IACZ,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY;IACZ,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,sBAAsB;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB;IAChB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,gBAAgB;IAChB,aAAa,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAClC,eAAe;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,gBAAgB;IAChB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAMD;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,kBAAkB;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY;IACZ,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,YAAY;IACZ,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,YAAY;IACZ,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS;IACT,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,UAAU;IACV,GAAG,EAAE,MAAM,CAAC;IACZ,cAAc;IACd,KAAK,EAAE,MAAM,CAAC;IACd,eAAe;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa;IACb,IAAI,EAAE,MAAM,CAAC;IACb,eAAe;IACf,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACxC,YAAY;IACZ,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,WAAW;IACX,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,YAAY;IACZ,QAAQ,EAAE,YAAY,CAAC;IACvB,gBAAgB;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAMD;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,QAAQ,CAAC,EAAE,QAAQ,GAAG,KAAK,GAAG,MAAM,CAAC;CACtC;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,cAAc;IACd,EAAE,EAAE,MAAM,CAAC;IACX,eAAe;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,gBAAgB;IAChB,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACrC,iBAAiB;IACjB,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACxC;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,YAAY;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc;IACd,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC3C;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAMD;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,aAAa;IACb,IAAI,EAAE,MAAM,CAAC;IACb,SAAS;IACT,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc;IACd,OAAO,EAAE,aAAa,EAAE,CAAC;IACzB,SAAS;IACT,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACpC;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACxC,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,QAAQ,EAAE,YAAY,CAAC;CACxB;AAMD;;GAEG;AACH,eAAO,MAAM,sBAAsB,EAAE,aASpC,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser Automation 型定義
|
|
3
|
+
*
|
|
4
|
+
* @requirement REQ-COLLECT-009
|
|
5
|
+
* @design DES-COLLECT-009-BrowserAutomation
|
|
6
|
+
*/
|
|
7
|
+
// ============================================================================
|
|
8
|
+
// デフォルト設定
|
|
9
|
+
// ============================================================================
|
|
10
|
+
/**
|
|
11
|
+
* デフォルトブラウザ設定
|
|
12
|
+
*/
|
|
13
|
+
export const DEFAULT_BROWSER_CONFIG = {
|
|
14
|
+
headless: true,
|
|
15
|
+
viewport: { width: 1920, height: 1080 },
|
|
16
|
+
timeout: 30000,
|
|
17
|
+
resourceLimits: {
|
|
18
|
+
maxMemory: 512,
|
|
19
|
+
maxPages: 5,
|
|
20
|
+
maxExecutionTime: 300000,
|
|
21
|
+
},
|
|
22
|
+
};
|
|
23
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/browser/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAwZH,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E;;GAEG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAkB;IACnD,QAAQ,EAAE,IAAI;IACd,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;IACvC,OAAO,EAAE,KAAK;IACd,cAAc,EAAE;QACd,SAAS,EAAE,GAAG;QACd,QAAQ,EAAE,CAAC;QACX,gBAAgB,EAAE,MAAM;KACzB;CACF,CAAC"}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ドキュメントパーサー
|
|
3
|
+
*
|
|
4
|
+
* @design DES-COLLECT-003 §2.3
|
|
5
|
+
* @task TASK-001-2
|
|
6
|
+
*/
|
|
7
|
+
import { type Result } from '@nahisaho/katashiro-core';
|
|
8
|
+
import type { IDocumentParser, ParsedDocument, DocumentError, ParseOptions, SupportedFormat } from './types.js';
|
|
9
|
+
/**
|
|
10
|
+
* ドキュメントパーサーのファサード
|
|
11
|
+
*
|
|
12
|
+
* PDF、Word(DOCX)、Excel(XLSX)ファイルからテキストと構造を抽出します。
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```typescript
|
|
16
|
+
* import { DocumentParser, isOk } from '@nahisaho/katashiro-collector';
|
|
17
|
+
*
|
|
18
|
+
* const parser = new DocumentParser();
|
|
19
|
+
*
|
|
20
|
+
* // PDFを解析
|
|
21
|
+
* const result = await parser.parse('./document.pdf');
|
|
22
|
+
* if (isOk(result)) {
|
|
23
|
+
* console.log(result.value.content);
|
|
24
|
+
* console.log(result.value.structure.headings);
|
|
25
|
+
* }
|
|
26
|
+
*
|
|
27
|
+
* // Excelを解析(特定シートのみ)
|
|
28
|
+
* const excelResult = await parser.parse('./data.xlsx', {
|
|
29
|
+
* sheetNames: ['Sheet1', 'Summary'],
|
|
30
|
+
* extractTables: true,
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* // バッファから解析
|
|
34
|
+
* const buffer = await fs.readFile('./document.pdf');
|
|
35
|
+
* const bufferResult = await parser.parseBuffer(buffer, 'document.pdf');
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export declare class DocumentParser implements IDocumentParser {
|
|
39
|
+
private parsers;
|
|
40
|
+
constructor();
|
|
41
|
+
/**
|
|
42
|
+
* デフォルトパーサーを登録
|
|
43
|
+
*/
|
|
44
|
+
private registerDefaultParsers;
|
|
45
|
+
/**
|
|
46
|
+
* カスタムパーサーを登録
|
|
47
|
+
*
|
|
48
|
+
* @param extension - ファイル拡張子(例: '.pptx')
|
|
49
|
+
* @param parser - パーサー実装
|
|
50
|
+
*/
|
|
51
|
+
registerParser(extension: string, parser: IDocumentParser): void;
|
|
52
|
+
/**
|
|
53
|
+
* ファイルパスからドキュメントを解析
|
|
54
|
+
*
|
|
55
|
+
* @param filePath - ファイルの絶対または相対パス
|
|
56
|
+
* @param options - パースオプション
|
|
57
|
+
* @returns 解析結果またはエラー
|
|
58
|
+
*/
|
|
59
|
+
parse(filePath: string, options?: ParseOptions): Promise<Result<ParsedDocument, DocumentError>>;
|
|
60
|
+
/**
|
|
61
|
+
* バッファからドキュメントを解析
|
|
62
|
+
*
|
|
63
|
+
* @param buffer - ファイルのバイナリデータ
|
|
64
|
+
* @param filename - ファイル名(MIME タイプ判定用)
|
|
65
|
+
* @param options - パースオプション
|
|
66
|
+
* @returns 解析結果またはエラー
|
|
67
|
+
*/
|
|
68
|
+
parseBuffer(buffer: Buffer, filename: string, options?: ParseOptions): Promise<Result<ParsedDocument, DocumentError>>;
|
|
69
|
+
/**
|
|
70
|
+
* ストリームからドキュメントを解析
|
|
71
|
+
*
|
|
72
|
+
* @param stream - 読み取り可能ストリーム
|
|
73
|
+
* @param filename - ファイル名
|
|
74
|
+
* @param options - パースオプション
|
|
75
|
+
* @returns 解析結果またはエラー
|
|
76
|
+
*/
|
|
77
|
+
parseStream(stream: NodeJS.ReadableStream, filename: string, options?: ParseOptions): Promise<Result<ParsedDocument, DocumentError>>;
|
|
78
|
+
/**
|
|
79
|
+
* サポートするファイル形式を取得
|
|
80
|
+
*/
|
|
81
|
+
getSupportedFormats(): SupportedFormat[];
|
|
82
|
+
/**
|
|
83
|
+
* ファイルがサポートされているか確認
|
|
84
|
+
*/
|
|
85
|
+
isSupported(filename: string): boolean;
|
|
86
|
+
/**
|
|
87
|
+
* タイムアウト付きで実行
|
|
88
|
+
*/
|
|
89
|
+
private withTimeout;
|
|
90
|
+
}
|
|
91
|
+
//# sourceMappingURL=DocumentParser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/document/DocumentParser.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,EAAO,KAAK,MAAM,EAAE,MAAM,0BAA0B,CAAC;AAI5D,OAAO,KAAK,EACV,eAAe,EACf,cAAc,EACd,aAAa,EACb,YAAY,EACZ,eAAe,EAChB,MAAM,YAAY,CAAC;AAGpB;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,qBAAa,cAAe,YAAW,eAAe;IACpD,OAAO,CAAC,OAAO,CAA+B;;IAO9C;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAW9B;;;;;OAKG;IACH,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,eAAe,GAAG,IAAI;IAIhE;;;;;;OAMG;IACG,KAAK,CACT,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IAmDjD;;;;;;;OAOG;IACG,WAAW,CACf,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IA2BjD;;;;;;;OAOG;IACG,WAAW,CACf,MAAM,EAAE,MAAM,CAAC,cAAc,EAC7B,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IA0CjD;;OAEG;IACH,mBAAmB,IAAI,eAAe,EAAE;IAgBxC;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAKtC;;OAEG;YACW,WAAW;CAmB1B"}
|