@nahisaho/katashiro-collector 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/dist/browser/ActionExecutor.d.ts +85 -0
  2. package/dist/browser/ActionExecutor.d.ts.map +1 -0
  3. package/dist/browser/ActionExecutor.js +171 -0
  4. package/dist/browser/ActionExecutor.js.map +1 -0
  5. package/dist/browser/BrowserAutomation.d.ts +147 -0
  6. package/dist/browser/BrowserAutomation.d.ts.map +1 -0
  7. package/dist/browser/BrowserAutomation.js +463 -0
  8. package/dist/browser/BrowserAutomation.js.map +1 -0
  9. package/dist/browser/ContentExtractor.d.ts +54 -0
  10. package/dist/browser/ContentExtractor.d.ts.map +1 -0
  11. package/dist/browser/ContentExtractor.js +159 -0
  12. package/dist/browser/ContentExtractor.js.map +1 -0
  13. package/dist/browser/SessionManager.d.ts +67 -0
  14. package/dist/browser/SessionManager.d.ts.map +1 -0
  15. package/dist/browser/SessionManager.js +173 -0
  16. package/dist/browser/SessionManager.js.map +1 -0
  17. package/dist/browser/index.d.ts +17 -0
  18. package/dist/browser/index.d.ts.map +1 -0
  19. package/dist/browser/index.js +17 -0
  20. package/dist/browser/index.js.map +1 -0
  21. package/dist/browser/types.d.ts +361 -0
  22. package/dist/browser/types.d.ts.map +1 -0
  23. package/dist/browser/types.js +23 -0
  24. package/dist/browser/types.js.map +1 -0
  25. package/dist/document/DocumentParser.d.ts +91 -0
  26. package/dist/document/DocumentParser.d.ts.map +1 -0
  27. package/dist/document/DocumentParser.js +234 -0
  28. package/dist/document/DocumentParser.js.map +1 -0
  29. package/dist/document/index.d.ts +11 -0
  30. package/dist/document/index.d.ts.map +1 -0
  31. package/dist/document/index.js +10 -0
  32. package/dist/document/index.js.map +1 -0
  33. package/dist/document/parsers/DOCXParser.d.ts +63 -0
  34. package/dist/document/parsers/DOCXParser.d.ts.map +1 -0
  35. package/dist/document/parsers/DOCXParser.js +362 -0
  36. package/dist/document/parsers/DOCXParser.js.map +1 -0
  37. package/dist/document/parsers/PDFParser.d.ts +60 -0
  38. package/dist/document/parsers/PDFParser.d.ts.map +1 -0
  39. package/dist/document/parsers/PDFParser.js +338 -0
  40. package/dist/document/parsers/PDFParser.js.map +1 -0
  41. package/dist/document/parsers/XLSXParser.d.ts +55 -0
  42. package/dist/document/parsers/XLSXParser.d.ts.map +1 -0
  43. package/dist/document/parsers/XLSXParser.js +314 -0
  44. package/dist/document/parsers/XLSXParser.js.map +1 -0
  45. package/dist/document/parsers/index.d.ts +10 -0
  46. package/dist/document/parsers/index.d.ts.map +1 -0
  47. package/dist/document/parsers/index.js +10 -0
  48. package/dist/document/parsers/index.js.map +1 -0
  49. package/dist/document/types.d.ts +251 -0
  50. package/dist/document/types.d.ts.map +1 -0
  51. package/dist/document/types.js +13 -0
  52. package/dist/document/types.js.map +1 -0
  53. package/dist/index.d.ts +7 -2
  54. package/dist/index.d.ts.map +1 -1
  55. package/dist/index.js +14 -2
  56. package/dist/index.js.map +1 -1
  57. package/dist/research/CoverageAnalyzer.d.ts +50 -0
  58. package/dist/research/CoverageAnalyzer.d.ts.map +1 -0
  59. package/dist/research/CoverageAnalyzer.js +169 -0
  60. package/dist/research/CoverageAnalyzer.js.map +1 -0
  61. package/dist/research/QueryPlanner.d.ts +57 -0
  62. package/dist/research/QueryPlanner.d.ts.map +1 -0
  63. package/dist/research/QueryPlanner.js +102 -0
  64. package/dist/research/QueryPlanner.js.map +1 -0
  65. package/dist/research/ResultAggregator.d.ts +39 -0
  66. package/dist/research/ResultAggregator.d.ts.map +1 -0
  67. package/dist/research/ResultAggregator.js +85 -0
  68. package/dist/research/ResultAggregator.js.map +1 -0
  69. package/dist/research/WideResearchEngine.d.ts +110 -0
  70. package/dist/research/WideResearchEngine.d.ts.map +1 -0
  71. package/dist/research/WideResearchEngine.js +330 -0
  72. package/dist/research/WideResearchEngine.js.map +1 -0
  73. package/dist/research/agents/AcademicSearchAgent.d.ts +57 -0
  74. package/dist/research/agents/AcademicSearchAgent.d.ts.map +1 -0
  75. package/dist/research/agents/AcademicSearchAgent.js +180 -0
  76. package/dist/research/agents/AcademicSearchAgent.js.map +1 -0
  77. package/dist/research/agents/EncyclopediaAgent.d.ts +49 -0
  78. package/dist/research/agents/EncyclopediaAgent.d.ts.map +1 -0
  79. package/dist/research/agents/EncyclopediaAgent.js +153 -0
  80. package/dist/research/agents/EncyclopediaAgent.js.map +1 -0
  81. package/dist/research/agents/NewsSearchAgent.d.ts +38 -0
  82. package/dist/research/agents/NewsSearchAgent.d.ts.map +1 -0
  83. package/dist/research/agents/NewsSearchAgent.js +146 -0
  84. package/dist/research/agents/NewsSearchAgent.js.map +1 -0
  85. package/dist/research/agents/WebSearchAgent.d.ts +45 -0
  86. package/dist/research/agents/WebSearchAgent.d.ts.map +1 -0
  87. package/dist/research/agents/WebSearchAgent.js +135 -0
  88. package/dist/research/agents/WebSearchAgent.js.map +1 -0
  89. package/dist/research/agents/index.d.ts +13 -0
  90. package/dist/research/agents/index.d.ts.map +1 -0
  91. package/dist/research/agents/index.js +12 -0
  92. package/dist/research/agents/index.js.map +1 -0
  93. package/dist/research/agents/types.d.ts +60 -0
  94. package/dist/research/agents/types.d.ts.map +1 -0
  95. package/dist/research/agents/types.js +9 -0
  96. package/dist/research/agents/types.js.map +1 -0
  97. package/dist/research/index.d.ts +16 -0
  98. package/dist/research/index.d.ts.map +1 -0
  99. package/dist/research/index.js +17 -0
  100. package/dist/research/index.js.map +1 -0
  101. package/dist/research/types.d.ts +206 -0
  102. package/dist/research/types.d.ts.map +1 -0
  103. package/dist/research/types.js +33 -0
  104. package/dist/research/types.js.map +1 -0
  105. package/package.json +1 -1
@@ -0,0 +1,314 @@
1
+ /**
2
+ * XLSXパーサー
3
+ *
4
+ * @design DES-COLLECT-003 §2.3
5
+ * @task TASK-001-5
6
+ */
7
+ import { ok, err } from '@nahisaho/katashiro-core';
8
+ import { DEFAULT_PARSE_OPTIONS } from '../types.js';
9
+ /**
10
+ * XLSXパーサー実装
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * const parser = new XLSXParser();
15
+ * const result = await parser.parse('./data.xlsx');
16
+ * ```
17
+ */
18
+ export class XLSXParser {
19
+ xlsx = null;
20
+ async loadXLSX() {
21
+ if (!this.xlsx) {
22
+ try {
23
+ // @ts-expect-error: xlsx is optional dependency
24
+ this.xlsx = (await import('xlsx'));
25
+ }
26
+ catch {
27
+ throw new Error('xlsx is not installed. Run: npm install xlsx');
28
+ }
29
+ }
30
+ return this.xlsx;
31
+ }
32
+ async parse(filePath, options = {}) {
33
+ const fs = await import('fs/promises');
34
+ const path = await import('path');
35
+ try {
36
+ const buffer = await fs.readFile(filePath);
37
+ const filename = path.basename(filePath);
38
+ return this.parseBuffer(buffer, filename, options);
39
+ }
40
+ catch (error) {
41
+ if (error.code === 'ENOENT') {
42
+ return err({
43
+ code: 'FILE_NOT_FOUND',
44
+ message: `File not found: ${filePath}`,
45
+ filePath,
46
+ });
47
+ }
48
+ if (error.code === 'EACCES') {
49
+ return err({
50
+ code: 'PERMISSION_DENIED',
51
+ message: `Permission denied: ${filePath}`,
52
+ filePath,
53
+ });
54
+ }
55
+ return err({
56
+ code: 'PARSE_ERROR',
57
+ message: `Failed to read file: ${error.message}`,
58
+ filePath,
59
+ details: error,
60
+ });
61
+ }
62
+ }
63
+ async parseBuffer(buffer, filename, options = {}) {
64
+ const mergedOptions = { ...DEFAULT_PARSE_OPTIONS, ...options };
65
+ // サイズチェック
66
+ if (buffer.length > mergedOptions.maxFileSize) {
67
+ return err({
68
+ code: 'FILE_TOO_LARGE',
69
+ message: `File size ${buffer.length} exceeds maximum ${mergedOptions.maxFileSize}`,
70
+ });
71
+ }
72
+ try {
73
+ const xlsx = await this.loadXLSX();
74
+ const workbook = xlsx.read(buffer, { type: 'buffer' });
75
+ // シートのフィルタリング
76
+ let sheetNames = workbook.SheetNames;
77
+ if (options.sheetNames && options.sheetNames.length > 0) {
78
+ sheetNames = sheetNames.filter((name) => options.sheetNames.includes(name));
79
+ }
80
+ const content = this.extractContent(workbook, sheetNames, xlsx);
81
+ const structure = this.extractStructure(workbook, sheetNames);
82
+ const metadata = this.extractMetadata(workbook, filename, buffer.length, sheetNames);
83
+ const sheets = this.extractSheetInfo(workbook, sheetNames, xlsx);
84
+ const result = {
85
+ content,
86
+ structure,
87
+ metadata,
88
+ sheets,
89
+ };
90
+ // テーブル抽出
91
+ if (mergedOptions.extractTables) {
92
+ result.tables = this.extractTables(workbook, sheetNames, xlsx);
93
+ }
94
+ return ok(result);
95
+ }
96
+ catch (error) {
97
+ const errorMessage = error.message || 'Unknown error';
98
+ if (errorMessage.includes('password') || errorMessage.includes('encrypted')) {
99
+ return err({
100
+ code: 'PASSWORD_PROTECTED',
101
+ message: 'XLSX is password protected',
102
+ details: error,
103
+ });
104
+ }
105
+ if (errorMessage.includes('Invalid') ||
106
+ errorMessage.includes('corrupt') ||
107
+ errorMessage.includes('not a valid')) {
108
+ return err({
109
+ code: 'CORRUPTED_FILE',
110
+ message: 'XLSX file is corrupted or invalid',
111
+ details: error,
112
+ });
113
+ }
114
+ return err({
115
+ code: 'PARSE_ERROR',
116
+ message: `Failed to parse XLSX: ${errorMessage}`,
117
+ details: error,
118
+ });
119
+ }
120
+ }
121
+ async parseStream(stream, filename, options = {}) {
122
+ const chunks = [];
123
+ return new Promise((resolve) => {
124
+ stream.on('data', (chunk) => chunks.push(Buffer.from(chunk)));
125
+ stream.on('error', (error) => {
126
+ resolve(err({
127
+ code: 'PARSE_ERROR',
128
+ message: `Stream read error: ${error.message}`,
129
+ }));
130
+ });
131
+ stream.on('end', async () => {
132
+ const buffer = Buffer.concat(chunks);
133
+ const result = await this.parseBuffer(buffer, filename, options);
134
+ resolve(result);
135
+ });
136
+ });
137
+ }
138
+ getSupportedFormats() {
139
+ return [
140
+ {
141
+ extension: '.xlsx',
142
+ mimeType: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
143
+ description: 'Excel Spreadsheet',
144
+ },
145
+ ];
146
+ }
147
+ isSupported(filename) {
148
+ return filename.toLowerCase().endsWith('.xlsx');
149
+ }
150
+ /**
151
+ * コンテンツを抽出(テキスト形式)
152
+ */
153
+ extractContent(workbook, sheetNames, xlsx) {
154
+ const parts = [];
155
+ for (const sheetName of sheetNames) {
156
+ const sheet = workbook.Sheets[sheetName];
157
+ if (!sheet)
158
+ continue;
159
+ parts.push(`=== ${sheetName} ===`);
160
+ const rows = xlsx.utils.sheet_to_json(sheet, { header: 1 });
161
+ for (const row of rows) {
162
+ if (Array.isArray(row)) {
163
+ const line = row.map((cell) => String(cell ?? '')).join('\t');
164
+ if (line.trim()) {
165
+ parts.push(line);
166
+ }
167
+ }
168
+ }
169
+ parts.push('');
170
+ }
171
+ return parts.join('\n');
172
+ }
173
+ /**
174
+ * 構造を抽出
175
+ */
176
+ extractStructure(_workbook, sheetNames) {
177
+ const headings = sheetNames.map((name, index) => ({
178
+ level: 1,
179
+ text: name,
180
+ position: index,
181
+ }));
182
+ const paragraphs = [];
183
+ const sections = sheetNames.map((name) => ({
184
+ title: name,
185
+ content: `Sheet: ${name}`,
186
+ start: 0,
187
+ end: 0,
188
+ }));
189
+ return {
190
+ headings,
191
+ paragraphs,
192
+ sections,
193
+ };
194
+ }
195
+ /**
196
+ * メタデータを抽出
197
+ */
198
+ extractMetadata(workbook, filename, fileSize, sheetNames) {
199
+ const props = workbook.Props || {};
200
+ return {
201
+ filename,
202
+ fileSize,
203
+ mimeType: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
204
+ title: props.Title,
205
+ author: props.Author,
206
+ subject: props.Subject,
207
+ keywords: props.Keywords ? props.Keywords.split(/[,;]/).map((k) => k.trim()) : undefined,
208
+ createdAt: props.CreatedDate,
209
+ modifiedAt: props.ModifiedDate,
210
+ pageCount: sheetNames.length,
211
+ };
212
+ }
213
+ /**
214
+ * シート情報を抽出
215
+ */
216
+ extractSheetInfo(workbook, sheetNames, xlsx) {
217
+ return sheetNames.map((name, index) => {
218
+ const sheet = workbook.Sheets[name];
219
+ let rowCount = 0;
220
+ let columnCount = 0;
221
+ if (sheet && sheet['!ref']) {
222
+ const range = xlsx.utils.decode_range(sheet['!ref']);
223
+ rowCount = range.e.r - range.s.r + 1;
224
+ columnCount = range.e.c - range.s.c + 1;
225
+ }
226
+ return {
227
+ name,
228
+ index,
229
+ rowCount,
230
+ columnCount,
231
+ isHidden: false,
232
+ };
233
+ });
234
+ }
235
+ /**
236
+ * テーブルデータを抽出
237
+ */
238
+ extractTables(workbook, sheetNames, xlsx) {
239
+ const tables = [];
240
+ for (const sheetName of sheetNames) {
241
+ const sheet = workbook.Sheets[sheetName];
242
+ if (!sheet || !sheet['!ref'])
243
+ continue;
244
+ const range = xlsx.utils.decode_range(sheet['!ref']);
245
+ const rows = [];
246
+ const headers = [];
247
+ // ヘッダー行を取得
248
+ for (let c = range.s.c; c <= range.e.c; c++) {
249
+ const cellAddress = xlsx.utils.encode_cell({ r: range.s.r, c });
250
+ const cell = sheet[cellAddress];
251
+ headers.push(cell?.w || cell?.v?.toString() || '');
252
+ }
253
+ // データ行を取得
254
+ for (let r = range.s.r + 1; r <= range.e.r; r++) {
255
+ const cells = [];
256
+ for (let c = range.s.c; c <= range.e.c; c++) {
257
+ const cellAddress = xlsx.utils.encode_cell({ r, c });
258
+ const cell = sheet[cellAddress];
259
+ if (cell) {
260
+ cells.push(this.convertCell(cell));
261
+ }
262
+ else {
263
+ cells.push({ value: null, type: 'empty' });
264
+ }
265
+ }
266
+ // 空行でなければ追加
267
+ if (cells.some((c) => c.value !== null && c.value !== '')) {
268
+ rows.push({ cells });
269
+ }
270
+ }
271
+ tables.push({
272
+ id: `sheet-${sheetName}`,
273
+ name: sheetName,
274
+ headers,
275
+ rows,
276
+ position: 0,
277
+ sheetName,
278
+ });
279
+ }
280
+ return tables;
281
+ }
282
+ /**
283
+ * セルを変換
284
+ */
285
+ convertCell(cell) {
286
+ const type = this.getCellType(cell.t);
287
+ const value = cell.v ?? null;
288
+ return {
289
+ value: value instanceof Date ? value.toISOString() : value,
290
+ type,
291
+ formula: cell.f,
292
+ };
293
+ }
294
+ /**
295
+ * セルタイプを変換
296
+ */
297
+ getCellType(t) {
298
+ switch (t) {
299
+ case 's':
300
+ return 'string';
301
+ case 'n':
302
+ return 'number';
303
+ case 'b':
304
+ return 'boolean';
305
+ case 'd':
306
+ return 'date';
307
+ case 'e':
308
+ return 'string'; // error as string
309
+ default:
310
+ return 'string';
311
+ }
312
+ }
313
+ }
314
+ //# sourceMappingURL=XLSXParser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"XLSXParser.js","sourceRoot":"","sources":["../../../src/document/parsers/XLSXParser.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,EAAE,EAAE,GAAG,EAAe,MAAM,0BAA0B,CAAC;AAiBhE,OAAO,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AAyCpD;;;;;;;;GAQG;AACH,MAAM,OAAO,UAAU;IACb,IAAI,GAAsB,IAAI,CAAC;IAE/B,KAAK,CAAC,QAAQ;QACpB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACf,IAAI,CAAC;gBACH,gDAAgD;gBAChD,IAAI,CAAC,IAAI,GAAG,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC,CAA0B,CAAC;YAC9D,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;YAClE,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC,IAAI,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,KAAK,CACT,QAAgB,EAChB,UAAwB,EAAE;QAE1B,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;QACvC,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,CAAC;QAElC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YACzC,OAAO,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;QACrD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAK,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACvD,OAAO,GAAG,CAAC;oBACT,IAAI,EAAE,gBAAgB;oBACtB,OAAO,EAAE,mBAAmB,QAAQ,EAAE;oBACtC,QAAQ;iBACT,CAAC,CAAC;YACL,CAAC;YACD,IAAK,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACvD,OAAO,GAAG,CAAC;oBACT,IAAI,EAAE,mBAAmB;oBACzB,OAAO,EAAE,sBAAsB,QAAQ,EAAE;oBACzC,QAAQ;iBACT,CAAC,CAAC;YACL,CAAC;YACD,OAAO,GAAG,CAAC;gBACT,IAAI,EAAE,aAAa;gBACnB,OAAO,EAAE,wBAAyB,KAAe,CAAC,OAAO,EAAE;gBAC3D,QAAQ;gBACR,OAAO,EAAE,KAAK;aACf,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW,CACf,MAAc,EACd,QAAgB,EAChB,UAAwB,EAAE;QAE1B,MAAM,aAAa,GAAG,EAAE,GAAG,qBAAqB,EAAE,GAAG,OAAO,EAAE,CAAC;QAE/D,UAAU;QACV,IAAI,MAAM,CAAC,MAAM,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC;YAC9C,OAAO,GAAG,CAAC;gBACT,IAAI,EAAE,gBAAgB;gBACtB,OAAO,EAAE,aAAa,MAAM,CAAC,MAAM,oBAAoB,aAAa,CAAC,WAAW,EAAE;aACnF,CAAC,CAAC;QACL,CAAC;QAED,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;YAEvD,cAAc;YACd,IAAI,UAAU,GAAG,QAAQ,CAAC,UAAU,CAAC;YACrC,IAAI,OAAO,CAAC,UAAU,IAAI,OAAO,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxD,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,UAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;YAC/E,CAAC;YAED,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC;YAChE,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;YAC9D,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;YACrF,MAAM,MAAM,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC;YAEjE,MAAM,MAAM,GAAmB;gBAC7B,OAAO;gBACP,SAAS;gBACT,QAAQ;gBACR,MAAM;aACP,CAAC;YAEF,SAAS;YACT,IAAI,aAAa,CAAC,aAAa,EAAE,CAAC;gBAChC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC;YACjE,CAAC;YAED,OAAO,EAAE,CAAC,MAAM,CAAC,CAAC;QACpB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAAI,KAAe,CAAC,OAAO,IAAI,eAAe,CAAC;YAEjE,IAAI,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;gBAC5E,OAAO,GAAG,CAAC;oBACT,IAAI,EAAE,oBAAoB;oBAC1B,OAAO,EAAE,4BAA4B;oBACrC,OAAO,EAAE,KAAK;iBACf,CAAC,CAAC;YACL,CAAC;YAED,IACE,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC;gBAChC,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC;gBAChC,YAAY,CAAC,QAAQ,CAAC,aAAa,CAAC,EACpC,CAAC;gBACD,OAAO,GAAG,CAAC;oBACT,IAAI,EAAE,gBAAgB;oBACtB,OAAO,EAAE,mCAAmC;oBAC5C,OAAO,EAAE,KAAK;iBACf,CAAC,CAAC;YACL,CAAC;YAED,OAAO,GAAG,CAAC;gBACT,IAAI,EAAE,aAAa;gBACnB,OAAO,EAAE,yBAAyB,YAAY,EAAE;gBAChD,OAAO,EAAE,KAAK;aACf,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW,CACf,MAA6B,EAC7B,QAAgB,EAChB,UAAwB,EAAE;QAE1B,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YAC7B,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9D,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;gBAC3B,OAAO,CACL,GAAG,CAAC;oBACF,IAAI,EAAE,aAAa;oBACnB,OAAO,EAAE,sBAAsB,KAAK,CAAC,OAAO,EAAE;iBAC/C,CAAC,CACH,CAAC;YACJ,CAAC,CAAC,CAAC;YACH,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,KAAK,IAAI,EAAE;gBAC1B,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;gBACrC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACjE,OAAO,CAAC,MAAM,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED,mBAAmB;QACjB,OAAO;YACL;gBACE,SAAS,EAAE,OAAO;gBAClB,QAAQ,EAAE,mEAAmE;gBAC7E,WAAW,EAAE,mBAAmB;aACjC;SACF,CAAC;IACJ,CAAC;IAED,WAAW,CAAC,QAAgB;QAC1B,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAkB,EAAE,UAAoB,EAAE,IAAgB;QAC/E,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YACzC,IAAI,CAAC,KAAK;gBAAE,SAAS;YAErB,KAAK,CAAC,IAAI,CAAC,OAAO,SAAS,MAAM,CAAC,CAAC;YAEnC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAW,KAAK,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC;YACtE,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;gBACvB,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;oBACvB,MAAM,IAAI,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAC9D,IAAI,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;wBAChB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACnB,CAAC;gBACH,CAAC;YACH,CAAC;YAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjB,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,SAAmB,EAAE,UAAoB;QAChE,MAAM,QAAQ,GAAc,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;YAC3D,KAAK,EAAE,CAAC;YACR,IAAI,EAAE,IAAI;YACV,QAAQ,EAAE,KAAK;SAChB,CAAC,CAAC,CAAC;QAEJ,MAAM,UAAU,GAAgB,EAAE,CAAC;QACnC,MAAM,QAAQ,GAAc,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YACpD,KAAK,EAAE,IAAI;YACX,OAAO,EAAE,UAAU,IAAI,EAAE;YACzB,KAAK,EAAE,CAAC;YACR,GAAG,EAAE,CAAC;SACP,CAAC,CAAC,CAAC;QAEJ,OAAO;YACL,QAAQ;YACR,UAAU;YACV,QAAQ;SACT,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,QAAkB,EAClB,QAAgB,EAChB,QAAgB,EAChB,UAAoB;QAEpB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,IAAI,EAAE,CAAC;QAEnC,OAAO;YACL,QAAQ;YACR,QAAQ;YACR,QAAQ,EAAE,mEAAmE;YAC7E,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;YACxF,SAAS,EAAE,KAAK,CAAC,WAAW;YAC5B,UAAU,EAAE,KAAK,CAAC,YAAY;YAC9B,SAAS,EAAE,UAAU,CAAC,MAAM;SAC7B,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,gBAAgB,CACtB,QAAkB,EAClB,UAAoB,EACpB,IAAgB;QAEhB,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;YACpC,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACpC,IAAI,QAAQ,GAAG,CAAC,CAAC;YACjB,IAAI,WAAW,GAAG,CAAC,CAAC;YAEpB,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;gBACrD,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACrC,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC1C,CAAC;YAED,OAAO;gBACL,IAAI;gBACJ,KAAK;gBACL,QAAQ;gBACR,WAAW;gBACX,QAAQ,EAAE,KAAK;aAChB,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAkB,EAAE,UAAoB,EAAE,IAAgB;QAC9E,MAAM,MAAM,GAAgB,EAAE,CAAC;QAE/B,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YACzC,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;gBAAE,SAAS;YAEvC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;YACrD,MAAM,IAAI,GAAe,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAa,EAAE,CAAC;YAE7B,WAAW;YACX,KAAK,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;gBAChE,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,CAA2B,CAAC;gBAC1D,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,UAAU;YACV,KAAK,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAChD,MAAM,KAAK,GAAgB,EAAE,CAAC;gBAE9B,KAAK,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;oBACrD,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,CAA2B,CAAC;oBAE1D,IAAI,IAAI,EAAE,CAAC;wBACT,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC;oBACrC,CAAC;yBAAM,CAAC;wBACN,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;oBAC7C,CAAC;gBACH,CAAC;gBAED,YAAY;gBACZ,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,IAAI,IAAI,CAAC,CAAC,KAAK,KAAK,EAAE,CAAC,EAAE,CAAC;oBAC1D,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;gBACvB,CAAC;YACH,CAAC;YAED,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,SAAS,SAAS,EAAE;gBACxB,IAAI,EAAE,SAAS;gBACf,OAAO;gBACP,IAAI;gBACJ,QAAQ,EAAE,CAAC;gBACX,SAAS;aACV,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,IAAgB;QAClC,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACtC,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC;QAE7B,OAAO;YACL,KAAK,EAAE,KAAK,YAAY,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK;YAC1D,IAAI;YACJ,OAAO,EAAE,IAAI,CAAC,CAAC;SAChB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,CAAU;QAC5B,QAAQ,CAAC,EAAE,CAAC;YACV,KAAK,GAAG;gBACN,OAAO,QAAQ,CAAC;YAClB,KAAK,GAAG;gBACN,OAAO,QAAQ,CAAC;YAClB,KAAK,GAAG;gBACN,OAAO,SAAS,CAAC;YACnB,KAAK,GAAG;gBACN,OAAO,MAAM,CAAC;YAChB,KAAK,GAAG;gBACN,OAAO,QAAQ,CAAC,CAAC,kBAAkB;YACrC;gBACE,OAAO,QAAQ,CAAC;QACpB,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * パーサーモジュール
3
+ *
4
+ * @design DES-COLLECT-003
5
+ * @task TASK-001
6
+ */
7
+ export { PDFParser } from './PDFParser.js';
8
+ export { DOCXParser } from './DOCXParser.js';
9
+ export { XLSXParser } from './XLSXParser.js';
10
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/document/parsers/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * パーサーモジュール
3
+ *
4
+ * @design DES-COLLECT-003
5
+ * @task TASK-001
6
+ */
7
+ export { PDFParser } from './PDFParser.js';
8
+ export { DOCXParser } from './DOCXParser.js';
9
+ export { XLSXParser } from './XLSXParser.js';
10
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/document/parsers/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,251 @@
1
+ /**
2
+ * ドキュメントパーサー型定義
3
+ *
4
+ * @design DES-COLLECT-003 §2.1
5
+ * @task TASK-001-1
6
+ */
7
+ import type { Result } from '@nahisaho/katashiro-core';
8
+ /**
9
+ * ドキュメント解析の統一出力形式
10
+ */
11
+ export interface ParsedDocument {
12
+ /** 抽出されたテキストコンテンツ(プレーンテキスト) */
13
+ content: string;
14
+ /** ドキュメント構造情報 */
15
+ structure: DocumentStructure;
16
+ /** ドキュメントメタデータ */
17
+ metadata: DocumentMetadata;
18
+ /** 抽出されたテーブルデータ(存在する場合) */
19
+ tables?: TableData[];
20
+ /** 画像参照情報(存在する場合) */
21
+ images?: ImageReference[];
22
+ /** ページ情報(PDF/DOCXの場合) */
23
+ pages?: PageInfo[];
24
+ /** シート情報(XLSXの場合) */
25
+ sheets?: SheetInfo[];
26
+ }
27
+ /**
28
+ * ドキュメント構造
29
+ */
30
+ export interface DocumentStructure {
31
+ /** 見出し階層 */
32
+ headings: Heading[];
33
+ /** 段落情報 */
34
+ paragraphs: Paragraph[];
35
+ /** 目次(存在する場合) */
36
+ toc?: TableOfContents;
37
+ /** セクション分割 */
38
+ sections: Section[];
39
+ }
40
+ export interface Heading {
41
+ /** 見出しレベル (1-6) */
42
+ level: number;
43
+ /** 見出しテキスト */
44
+ text: string;
45
+ /** ドキュメント内の位置(文字オフセット) */
46
+ position: number;
47
+ /** ページ番号(PDF/DOCXの場合) */
48
+ page?: number;
49
+ }
50
+ export interface Paragraph {
51
+ /** 段落テキスト */
52
+ text: string;
53
+ /** 開始位置 */
54
+ start: number;
55
+ /** 終了位置 */
56
+ end: number;
57
+ /** スタイル情報 */
58
+ style?: ParagraphStyle;
59
+ }
60
+ export interface ParagraphStyle {
61
+ bold?: boolean;
62
+ italic?: boolean;
63
+ fontSize?: number;
64
+ fontFamily?: string;
65
+ alignment?: 'left' | 'center' | 'right' | 'justify';
66
+ }
67
+ export interface Section {
68
+ /** セクションタイトル */
69
+ title: string;
70
+ /** セクション内容 */
71
+ content: string;
72
+ /** 開始位置 */
73
+ start: number;
74
+ /** 終了位置 */
75
+ end: number;
76
+ /** 子セクション */
77
+ children?: Section[];
78
+ }
79
+ export interface TableOfContents {
80
+ entries: TocEntry[];
81
+ }
82
+ export interface TocEntry {
83
+ level: number;
84
+ text: string;
85
+ page?: number;
86
+ }
87
+ /**
88
+ * メタデータ
89
+ */
90
+ export interface DocumentMetadata {
91
+ /** ファイル名 */
92
+ filename: string;
93
+ /** ファイルサイズ(バイト) */
94
+ fileSize: number;
95
+ /** MIME タイプ */
96
+ mimeType: string;
97
+ /** 作成日時 */
98
+ createdAt?: Date;
99
+ /** 更新日時 */
100
+ modifiedAt?: Date;
101
+ /** 作成者 */
102
+ author?: string;
103
+ /** タイトル */
104
+ title?: string;
105
+ /** サブジェクト */
106
+ subject?: string;
107
+ /** キーワード */
108
+ keywords?: string[];
109
+ /** ページ数/シート数 */
110
+ pageCount?: number;
111
+ /** 文字数 */
112
+ characterCount?: number;
113
+ /** 単語数(推定) */
114
+ wordCount?: number;
115
+ }
116
+ /**
117
+ * テーブルデータ
118
+ */
119
+ export interface TableData {
120
+ /** テーブルID */
121
+ id: string;
122
+ /** テーブル名/キャプション */
123
+ name?: string;
124
+ /** ヘッダー行 */
125
+ headers?: string[];
126
+ /** データ行 */
127
+ rows: TableRow[];
128
+ /** ドキュメント内の位置 */
129
+ position: number;
130
+ /** ページ番号 */
131
+ page?: number;
132
+ /** シート名(XLSXの場合) */
133
+ sheetName?: string;
134
+ }
135
+ export interface TableRow {
136
+ cells: TableCell[];
137
+ }
138
+ export interface TableCell {
139
+ value: string | number | boolean | null;
140
+ type: 'string' | 'number' | 'boolean' | 'date' | 'formula' | 'empty';
141
+ formula?: string;
142
+ colspan?: number;
143
+ rowspan?: number;
144
+ }
145
+ /**
146
+ * 画像参照
147
+ */
148
+ export interface ImageReference {
149
+ /** 画像ID */
150
+ id: string;
151
+ /** ファイル名 */
152
+ filename?: string;
153
+ /** MIME タイプ */
154
+ mimeType: string;
155
+ /** 幅(ピクセル) */
156
+ width?: number;
157
+ /** 高さ(ピクセル) */
158
+ height?: number;
159
+ /** 代替テキスト */
160
+ altText?: string;
161
+ /** ドキュメント内の位置 */
162
+ position: number;
163
+ /** Base64エンコードされたデータ(オプション) */
164
+ data?: string;
165
+ }
166
+ /**
167
+ * ページ情報
168
+ */
169
+ export interface PageInfo {
170
+ pageNumber: number;
171
+ content: string;
172
+ startOffset: number;
173
+ endOffset: number;
174
+ }
175
+ /**
176
+ * シート情報(Excel)
177
+ */
178
+ export interface SheetInfo {
179
+ name: string;
180
+ index: number;
181
+ rowCount: number;
182
+ columnCount: number;
183
+ isHidden: boolean;
184
+ }
185
+ /**
186
+ * ドキュメントエラー
187
+ */
188
+ export interface DocumentError {
189
+ code: DocumentErrorCode;
190
+ message: string;
191
+ details?: unknown;
192
+ filePath?: string;
193
+ }
194
+ export type DocumentErrorCode = 'FILE_NOT_FOUND' | 'PERMISSION_DENIED' | 'UNSUPPORTED_FORMAT' | 'PARSE_ERROR' | 'CORRUPTED_FILE' | 'PASSWORD_PROTECTED' | 'FILE_TOO_LARGE' | 'TIMEOUT' | 'UNKNOWN_ERROR';
195
+ /**
196
+ * パース設定
197
+ */
198
+ export interface ParseOptions {
199
+ /** 画像データを抽出するか */
200
+ extractImages?: boolean;
201
+ /** テーブルデータを抽出するか */
202
+ extractTables?: boolean;
203
+ /** 最大ファイルサイズ(バイト) */
204
+ maxFileSize?: number;
205
+ /** タイムアウト(ミリ秒) */
206
+ timeout?: number;
207
+ /** パスワード(暗号化ファイル用) */
208
+ password?: string;
209
+ /** 特定のページ/シートのみ抽出 */
210
+ pageRange?: {
211
+ start: number;
212
+ end: number;
213
+ };
214
+ /** シート名指定(XLSX用) */
215
+ sheetNames?: string[];
216
+ }
217
+ export declare const DEFAULT_PARSE_OPTIONS: Required<Omit<ParseOptions, 'password' | 'pageRange' | 'sheetNames'>>;
218
+ /**
219
+ * サポートフォーマット
220
+ */
221
+ export interface SupportedFormat {
222
+ extension: string;
223
+ mimeType: string;
224
+ description: string;
225
+ }
226
+ /**
227
+ * ドキュメントパーサーインターフェース
228
+ */
229
+ export interface IDocumentParser {
230
+ /**
231
+ * ファイルパスからドキュメントを解析
232
+ */
233
+ parse(filePath: string, options?: ParseOptions): Promise<Result<ParsedDocument, DocumentError>>;
234
+ /**
235
+ * バッファからドキュメントを解析
236
+ */
237
+ parseBuffer(buffer: Buffer, filename: string, options?: ParseOptions): Promise<Result<ParsedDocument, DocumentError>>;
238
+ /**
239
+ * ストリームからドキュメントを解析
240
+ */
241
+ parseStream(stream: NodeJS.ReadableStream, filename: string, options?: ParseOptions): Promise<Result<ParsedDocument, DocumentError>>;
242
+ /**
243
+ * サポートするファイル形式を取得
244
+ */
245
+ getSupportedFormats(): SupportedFormat[];
246
+ /**
247
+ * ファイルがサポートされているか確認
248
+ */
249
+ isSupported(filename: string): boolean;
250
+ }
251
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,0BAA0B,CAAC;AAEvD;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,+BAA+B;IAC/B,OAAO,EAAE,MAAM,CAAC;IAEhB,iBAAiB;IACjB,SAAS,EAAE,iBAAiB,CAAC;IAE7B,kBAAkB;IAClB,QAAQ,EAAE,gBAAgB,CAAC;IAE3B,2BAA2B;IAC3B,MAAM,CAAC,EAAE,SAAS,EAAE,CAAC;IAErB,qBAAqB;IACrB,MAAM,CAAC,EAAE,cAAc,EAAE,CAAC;IAE1B,yBAAyB;IACzB,KAAK,CAAC,EAAE,QAAQ,EAAE,CAAC;IAEnB,qBAAqB;IACrB,MAAM,CAAC,EAAE,SAAS,EAAE,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,YAAY;IACZ,QAAQ,EAAE,OAAO,EAAE,CAAC;IAEpB,WAAW;IACX,UAAU,EAAE,SAAS,EAAE,CAAC;IAExB,iBAAiB;IACjB,GAAG,CAAC,EAAE,eAAe,CAAC;IAEtB,cAAc;IACd,QAAQ,EAAE,OAAO,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,OAAO;IACtB,mBAAmB;IACnB,KAAK,EAAE,MAAM,CAAC;IAEd,cAAc;IACd,IAAI,EAAE,MAAM,CAAC;IAEb,0BAA0B;IAC1B,QAAQ,EAAE,MAAM,CAAC;IAEjB,yBAAyB;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,SAAS;IACxB,aAAa;IACb,IAAI,EAAE,MAAM,CAAC;IAEb,WAAW;IACX,KAAK,EAAE,MAAM,CAAC;IAEd,WAAW;IACX,GAAG,EAAE,MAAM,CAAC;IAEZ,aAAa;IACb,KAAK,CAAC,EAAE,cAAc,CAAC;CACxB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,SAAS,CAAC;CACrD;AAED,MAAM,WAAW,OAAO;IACtB,gBAAgB;IAChB,KAAK,EAAE,MAAM,CAAC;IAEd,cAAc;IACd,OAAO,EAAE,MAAM,CAAC;IAEhB,WAAW;IACX,KAAK,EAAE,MAAM,CAAC;IAEd,WAAW;IACX,GAAG,EAAE,MAAM,CAAC;IAEZ,aAAa;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,QAAQ,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,YAAY;IACZ,QAAQ,EAAE,MAAM,CAAC;IAEjB,mBAAmB;IACnB,QAAQ,EAAE,MAAM,CAAC;IAEjB,eAAe;IACf,QAAQ,EAAE,MAAM,CAAC;IAEjB,WAAW;IACX,SAAS,CAAC,EAAE,IAAI,CAAC;IAEjB,WAAW;IACX,UAAU,CAAC,EAAE,IAAI,CAAC;IAElB,UAAU;IACV,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,WAAW;IACX,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf,aAAa;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB,YAAY;IACZ,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IAEpB,gBAAgB;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,UAAU;IACV,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB,cAAc;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,aAAa;IACb,EAAE,EAAE,MAAM,CAAC;IAEX,mBAAmB;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,YAAY;IACZ,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IAEnB,WAAW;IACX,IAAI,EAAE,QAAQ,EAAE,CAAC;IAEjB,iBAAiB;IACjB,QAAQ,EAAE,MAAM,CAAC;IAEjB,YAAY;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,oBAAoB;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,SAAS,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,IAAI,CAAC;IACxC,IAAI,EAAE,QAAQ,GAAG,QAAQ,GAAG,SAAS,GAAG,MAAM,GAAG,SAAS,GAAG,OAAO,CAAC;IACrE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,WAAW;IACX,EAAE,EAAE,MAAM,CAAC;IAEX,YAAY;IACZ,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,eAAe;IACf,QAAQ,EAAE,MAAM,CAAC;IAEjB,cAAc;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf,eAAe;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,aAAa;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB,iBAAiB;IACjB,QAAQ,EAAE,MAAM,CAAC;IAEjB,+BAA+B;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,iBAAiB,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,iBAAiB,GACzB,gBAAgB,GAChB,mBAAmB,GACnB,oBAAoB,GACpB,aAAa,GACb,gBAAgB,GAChB,oBAAoB,GACpB,gBAAgB,GAChB,SAAS,GACT,eAAe,CAAC;AAEpB;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,kBAAkB;IAClB,aAAa,CAAC,EAAE,OAAO,CAAC;IAExB,oBAAoB;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;IAExB,qBAAqB;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,kBAAkB;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB,sBAAsB;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,qBAAqB;IACrB,SAAS,CAAC,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC;IAE3C,oBAAoB;IACpB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,eAAO,MAAM,qBAAqB,EAAE,QAAQ,CAC1C,IAAI,CAAC,YAAY,EAAE,UAAU,GAAG,WAAW,GAAG,YAAY,CAAC,CAM5D,CAAC;AAEF;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B;;OAEG;IACH,KAAK,CACH,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC,CAAC;IAElD;;OAEG;IACH,WAAW,CACT,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC,CAAC;IAElD;;OAEG;IACH,WAAW,CACT,MAAM,EAAE,MAAM,CAAC,cAAc,EAC7B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC,CAAC;IAElD;;OAEG;IACH,mBAAmB,IAAI,eAAe,EAAE,CAAC;IAEzC;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CACxC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * ドキュメントパーサー型定義
3
+ *
4
+ * @design DES-COLLECT-003 §2.1
5
+ * @task TASK-001-1
6
+ */
7
+ export const DEFAULT_PARSE_OPTIONS = {
8
+ extractImages: false,
9
+ extractTables: true,
10
+ maxFileSize: 100 * 1024 * 1024, // 100MB
11
+ timeout: 60000, // 60秒
12
+ };
13
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AA8RH,MAAM,CAAC,MAAM,qBAAqB,GAE9B;IACF,aAAa,EAAE,KAAK;IACpB,aAAa,EAAE,IAAI;IACnB,WAAW,EAAE,GAAG,GAAG,IAAI,GAAG,IAAI,EAAE,QAAQ;IACxC,OAAO,EAAE,KAAK,EAAE,MAAM;CACvB,CAAC"}
package/dist/index.d.ts CHANGED
@@ -3,8 +3,8 @@
3
3
  * 情報収集パッケージ
4
4
  *
5
5
  * @requirement REQ-COLLECT-001 ~ REQ-COLLECT-009
6
- * @design DES-KATASHIRO-001 §2.2 Collector Container
7
- * @task TSK-010 ~ TSK-015
6
+ * @design DES-KATASHIRO-001 §2.2 Collector Container, DES-COLLECT-003, DES-COLLECT-008
7
+ * @task TSK-010 ~ TSK-015, TASK-001, TASK-002
8
8
  */
9
9
  export type { IWebSearchClient, IWebScraper, IFeedReader, IAPIClient, IYouTubeTranscript, IMediaExtractor, } from './interfaces.js';
10
10
  export type { WebSearchOptions, ScrapingOptions, ScrapingResult, FeedItem, TranscriptSegment, MediaMetadata, } from './types.js';
@@ -16,4 +16,9 @@ export { APIClient, type APIClientOptions } from './api/index.js';
16
16
  export { MediaExtractor, type ExtractedMedia } from './media/index.js';
17
17
  export { APIClient as ApiClient } from './api/index.js';
18
18
  export { SourceTracker, type TrackedSource, type SourceMetadata, CredibilityScorer, type CredibilityFactors, type CredibilityScore, } from './source/index.js';
19
+ export { DocumentParser, PDFParser, DOCXParser, XLSXParser, DEFAULT_PARSE_OPTIONS, } from './document/index.js';
20
+ export type { ParsedDocument, DocumentError, DocumentErrorCode, ParseOptions, SupportedFormat, IDocumentParser, DocumentStructure, DocumentMetadata, Heading, Paragraph, ParagraphStyle, Section, TableOfContents, TocEntry, TableData, TableRow, TableCell, ImageReference, PageInfo, SheetInfo, } from './document/index.js';
21
+ export { WideResearchEngine, QueryPlanner, ResultAggregator, CoverageAnalyzer, WebSearchAgent, NewsSearchAgent, AcademicSearchAgent, EncyclopediaAgent, DEFAULT_RESEARCH_CONFIG, } from './research/index.js';
22
+ export type { WideResearchQuery, WideResearchResult, ResearchError, ResearchErrorCode, ResearchDepth, SourceType, Finding, SourceInfo, SourceStatus, CoverageReport, CoverageGap, TemporalCoverage, TimeDistribution, ResearchStatistics, CompletionStatus, AgentConfig, DateRange, DepthConfig, QueryPlan, ISearchAgent, AgentSearchQuery, AgentSearchResult, AgentExecutionResult, } from './research/index.js';
23
+ export { BrowserAutomation, BrowserAutomationError, ActionExecutor, ContentExtractor, SessionManager, DEFAULT_BROWSER_CONFIG, type BrowserConfig, type Viewport, type ProxyConfig, type ResourceLimits, type WaitUntilOption, type NavigationOptions, type ClickOptions, type TypeOptions, type ScrollOptions, type ScreenshotOptions, type PdfOptions, type NavigateAction, type ClickAction, type TypeAction, type WaitAction, type ScrollAction, type SelectAction, type HoverAction, type ScreenshotAction, type PdfAction, type EvaluateAction, type WaitForSelectorAction, type ExtractAction, type BrowserAction, type ActionResult, type PageScrapeResult, type PageLink, type PageImage, type PageMetadata, type Cookie, type SessionInfo, type AuthCredentials, type LoginSelectors, type BrowserScript, type ExtractorConfig, type ExtractionResult, type BrowserPage, } from './browser/index.js';
19
24
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,YAAY,EACV,gBAAgB,EAChB,WAAW,EACX,WAAW,EACX,UAAU,EACV,kBAAkB,EAClB,eAAe,GAChB,MAAM,iBAAiB,CAAC;AAGzB,YAAY,EACV,gBAAgB,EAChB,eAAe,EACf,cAAc,EACd,QAAQ,EACR,iBAAiB,EACjB,aAAa,GACd,MAAM,YAAY,CAAC;AAGpB,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,KAAK,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,KAAK,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAGvE,OAAO,EAAE,SAAS,IAAI,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAGxD,OAAO,EACL,aAAa,EACb,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,iBAAiB,EACjB,KAAK,kBAAkB,EACvB,KAAK,gBAAgB,GACtB,MAAM,mBAAmB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,YAAY,EACV,gBAAgB,EAChB,WAAW,EACX,WAAW,EACX,UAAU,EACV,kBAAkB,EAClB,eAAe,GAChB,MAAM,iBAAiB,CAAC;AAGzB,YAAY,EACV,gBAAgB,EAChB,eAAe,EACf,cAAc,EACd,QAAQ,EACR,iBAAiB,EACjB,aAAa,GACd,MAAM,YAAY,CAAC;AAGpB,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,KAAK,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,KAAK,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAGvE,OAAO,EAAE,SAAS,IAAI,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAGxD,OAAO,EACL,aAAa,EACb,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,iBAAiB,EACjB,KAAK,kBAAkB,EACvB,KAAK,gBAAgB,GACtB,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EACL,cAAc,EACd,SAAS,EACT,UAAU,EACV,UAAU,EACV,qBAAqB,GACtB,MAAM,qBAAqB,CAAC;AAE7B,YAAY,EACV,cAAc,EACd,aAAa,EACb,iBAAiB,EACjB,YAAY,EACZ,eAAe,EACf,eAAe,EACf,iBAAiB,EACjB,gBAAgB,EAChB,OAAO,EACP,SAAS,EACT,cAAc,EACd,OAAO,EACP,eAAe,EACf,QAAQ,EACR,SAAS,EACT,QAAQ,EACR,SAAS,EACT,cAAc,EACd,QAAQ,EACR,SAAS,GACV,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,kBAAkB,EAClB,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,mBAAmB,EACnB,iBAAiB,EACjB,uBAAuB,GACxB,MAAM,qBAAqB,CAAC;AAE7B,YAAY,EACV,iBAAiB,EACjB,kBAAkB,EAClB,aAAa,EACb,iBAAiB,EACjB,aAAa,EACb,UAAU,EACV,OAAO,EACP,UAAU,EACV,YAAY,EACZ,cAAc,EACd,WAAW,EACX,gBAAgB,EAChB,gBAAgB,EAChB,kBAAkB,EAClB,gBAAgB,EAChB,WAAW,EACX,SAAS,EACT,WAAW,EACX,SAAS,EACT,YAAY,EACZ,gBAAgB,EAChB,iBAAiB,EACjB,oBAAoB,GACrB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EAEL,iBAAiB,EACjB,sBAAsB,EAEtB,cAAc,EACd,gBAAgB,EAChB,cAAc,EAEd,sBAAsB,EAEtB,KAAK,aAAa,EAClB,KAAK,QAAQ,EACb,KAAK,WAAW,EAChB,KAAK,cAAc,EACnB,KAAK,eAAe,EACpB,KAAK,iBAAiB,EACtB,KAAK,YAAY,EACjB,KAAK,WAAW,EAChB,KAAK,aAAa,EAClB,KAAK,iBAAiB,EACtB,KAAK,UAAU,EACf,KAAK,cAAc,EACnB,KAAK,WAAW,EAChB,KAAK,UAAU,EACf,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,WAAW,EAChB,KAAK,gBAAgB,EACrB,KAAK,SAAS,EACd,KAAK,cAAc,EACnB,KAAK,qBAAqB,EAC1B,KAAK,aAAa,EAClB,KAAK,aAAa,EAClB,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,QAAQ,EACb,KAAK,SAAS,EACd,KAAK,YAAY,EACjB,KAAK,MAAM,EACX,KAAK,WAAW,EAChB,KAAK,eAAe,EACpB,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,eAAe,EACpB,KAAK,gBAAgB,EACrB,KAAK,WAAW,GACjB,MAAM,oBAAoB,CAAC"}