@nahisaho/katashiro-collector 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/dist/browser/ActionExecutor.d.ts +85 -0
  2. package/dist/browser/ActionExecutor.d.ts.map +1 -0
  3. package/dist/browser/ActionExecutor.js +171 -0
  4. package/dist/browser/ActionExecutor.js.map +1 -0
  5. package/dist/browser/BrowserAutomation.d.ts +147 -0
  6. package/dist/browser/BrowserAutomation.d.ts.map +1 -0
  7. package/dist/browser/BrowserAutomation.js +463 -0
  8. package/dist/browser/BrowserAutomation.js.map +1 -0
  9. package/dist/browser/ContentExtractor.d.ts +54 -0
  10. package/dist/browser/ContentExtractor.d.ts.map +1 -0
  11. package/dist/browser/ContentExtractor.js +159 -0
  12. package/dist/browser/ContentExtractor.js.map +1 -0
  13. package/dist/browser/SessionManager.d.ts +67 -0
  14. package/dist/browser/SessionManager.d.ts.map +1 -0
  15. package/dist/browser/SessionManager.js +173 -0
  16. package/dist/browser/SessionManager.js.map +1 -0
  17. package/dist/browser/index.d.ts +17 -0
  18. package/dist/browser/index.d.ts.map +1 -0
  19. package/dist/browser/index.js +17 -0
  20. package/dist/browser/index.js.map +1 -0
  21. package/dist/browser/types.d.ts +361 -0
  22. package/dist/browser/types.d.ts.map +1 -0
  23. package/dist/browser/types.js +23 -0
  24. package/dist/browser/types.js.map +1 -0
  25. package/dist/document/DocumentParser.d.ts +91 -0
  26. package/dist/document/DocumentParser.d.ts.map +1 -0
  27. package/dist/document/DocumentParser.js +234 -0
  28. package/dist/document/DocumentParser.js.map +1 -0
  29. package/dist/document/index.d.ts +11 -0
  30. package/dist/document/index.d.ts.map +1 -0
  31. package/dist/document/index.js +10 -0
  32. package/dist/document/index.js.map +1 -0
  33. package/dist/document/parsers/DOCXParser.d.ts +63 -0
  34. package/dist/document/parsers/DOCXParser.d.ts.map +1 -0
  35. package/dist/document/parsers/DOCXParser.js +362 -0
  36. package/dist/document/parsers/DOCXParser.js.map +1 -0
  37. package/dist/document/parsers/PDFParser.d.ts +60 -0
  38. package/dist/document/parsers/PDFParser.d.ts.map +1 -0
  39. package/dist/document/parsers/PDFParser.js +338 -0
  40. package/dist/document/parsers/PDFParser.js.map +1 -0
  41. package/dist/document/parsers/XLSXParser.d.ts +55 -0
  42. package/dist/document/parsers/XLSXParser.d.ts.map +1 -0
  43. package/dist/document/parsers/XLSXParser.js +314 -0
  44. package/dist/document/parsers/XLSXParser.js.map +1 -0
  45. package/dist/document/parsers/index.d.ts +10 -0
  46. package/dist/document/parsers/index.d.ts.map +1 -0
  47. package/dist/document/parsers/index.js +10 -0
  48. package/dist/document/parsers/index.js.map +1 -0
  49. package/dist/document/types.d.ts +251 -0
  50. package/dist/document/types.d.ts.map +1 -0
  51. package/dist/document/types.js +13 -0
  52. package/dist/document/types.js.map +1 -0
  53. package/dist/index.d.ts +7 -2
  54. package/dist/index.d.ts.map +1 -1
  55. package/dist/index.js +14 -2
  56. package/dist/index.js.map +1 -1
  57. package/dist/research/CoverageAnalyzer.d.ts +50 -0
  58. package/dist/research/CoverageAnalyzer.d.ts.map +1 -0
  59. package/dist/research/CoverageAnalyzer.js +169 -0
  60. package/dist/research/CoverageAnalyzer.js.map +1 -0
  61. package/dist/research/QueryPlanner.d.ts +57 -0
  62. package/dist/research/QueryPlanner.d.ts.map +1 -0
  63. package/dist/research/QueryPlanner.js +102 -0
  64. package/dist/research/QueryPlanner.js.map +1 -0
  65. package/dist/research/ResultAggregator.d.ts +39 -0
  66. package/dist/research/ResultAggregator.d.ts.map +1 -0
  67. package/dist/research/ResultAggregator.js +85 -0
  68. package/dist/research/ResultAggregator.js.map +1 -0
  69. package/dist/research/WideResearchEngine.d.ts +110 -0
  70. package/dist/research/WideResearchEngine.d.ts.map +1 -0
  71. package/dist/research/WideResearchEngine.js +330 -0
  72. package/dist/research/WideResearchEngine.js.map +1 -0
  73. package/dist/research/agents/AcademicSearchAgent.d.ts +57 -0
  74. package/dist/research/agents/AcademicSearchAgent.d.ts.map +1 -0
  75. package/dist/research/agents/AcademicSearchAgent.js +180 -0
  76. package/dist/research/agents/AcademicSearchAgent.js.map +1 -0
  77. package/dist/research/agents/EncyclopediaAgent.d.ts +49 -0
  78. package/dist/research/agents/EncyclopediaAgent.d.ts.map +1 -0
  79. package/dist/research/agents/EncyclopediaAgent.js +153 -0
  80. package/dist/research/agents/EncyclopediaAgent.js.map +1 -0
  81. package/dist/research/agents/NewsSearchAgent.d.ts +38 -0
  82. package/dist/research/agents/NewsSearchAgent.d.ts.map +1 -0
  83. package/dist/research/agents/NewsSearchAgent.js +146 -0
  84. package/dist/research/agents/NewsSearchAgent.js.map +1 -0
  85. package/dist/research/agents/WebSearchAgent.d.ts +45 -0
  86. package/dist/research/agents/WebSearchAgent.d.ts.map +1 -0
  87. package/dist/research/agents/WebSearchAgent.js +135 -0
  88. package/dist/research/agents/WebSearchAgent.js.map +1 -0
  89. package/dist/research/agents/index.d.ts +13 -0
  90. package/dist/research/agents/index.d.ts.map +1 -0
  91. package/dist/research/agents/index.js +12 -0
  92. package/dist/research/agents/index.js.map +1 -0
  93. package/dist/research/agents/types.d.ts +60 -0
  94. package/dist/research/agents/types.d.ts.map +1 -0
  95. package/dist/research/agents/types.js +9 -0
  96. package/dist/research/agents/types.js.map +1 -0
  97. package/dist/research/index.d.ts +16 -0
  98. package/dist/research/index.d.ts.map +1 -0
  99. package/dist/research/index.js +17 -0
  100. package/dist/research/index.js.map +1 -0
  101. package/dist/research/types.d.ts +206 -0
  102. package/dist/research/types.d.ts.map +1 -0
  103. package/dist/research/types.js +33 -0
  104. package/dist/research/types.js.map +1 -0
  105. package/package.json +1 -1
@@ -0,0 +1,338 @@
1
+ /**
2
+ * PDFパーサー
3
+ *
4
+ * @design DES-COLLECT-003 §2.3
5
+ * @task TASK-001-3
6
+ */
7
+ import { ok, err } from '@nahisaho/katashiro-core';
8
+ import { DEFAULT_PARSE_OPTIONS } from '../types.js';
9
+ /**
10
+ * PDFパーサー実装
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * const parser = new PDFParser();
15
+ * const result = await parser.parse('./document.pdf');
16
+ * ```
17
+ */
18
+ export class PDFParser {
19
+ pdfParseModule = null;
20
+ async loadPdfParse() {
21
+ if (!this.pdfParseModule) {
22
+ try {
23
+ // @ts-expect-error: pdf-parse is optional dependency
24
+ const module = await import('pdf-parse');
25
+ this.pdfParseModule = module.default || module;
26
+ }
27
+ catch {
28
+ throw new Error('pdf-parse is not installed. Run: npm install pdf-parse');
29
+ }
30
+ }
31
+ return this.pdfParseModule;
32
+ }
33
+ async parse(filePath, options = {}) {
34
+ const fs = await import('fs/promises');
35
+ const path = await import('path');
36
+ try {
37
+ const buffer = await fs.readFile(filePath);
38
+ const filename = path.basename(filePath);
39
+ return this.parseBuffer(buffer, filename, options);
40
+ }
41
+ catch (error) {
42
+ if (error.code === 'ENOENT') {
43
+ return err({
44
+ code: 'FILE_NOT_FOUND',
45
+ message: `File not found: ${filePath}`,
46
+ filePath,
47
+ });
48
+ }
49
+ if (error.code === 'EACCES') {
50
+ return err({
51
+ code: 'PERMISSION_DENIED',
52
+ message: `Permission denied: ${filePath}`,
53
+ filePath,
54
+ });
55
+ }
56
+ return err({
57
+ code: 'PARSE_ERROR',
58
+ message: `Failed to read file: ${error.message}`,
59
+ filePath,
60
+ details: error,
61
+ });
62
+ }
63
+ }
64
+ async parseBuffer(buffer, filename, options = {}) {
65
+ const mergedOptions = { ...DEFAULT_PARSE_OPTIONS, ...options };
66
+ // サイズチェック
67
+ if (buffer.length > mergedOptions.maxFileSize) {
68
+ return err({
69
+ code: 'FILE_TOO_LARGE',
70
+ message: `File size ${buffer.length} exceeds maximum ${mergedOptions.maxFileSize}`,
71
+ });
72
+ }
73
+ try {
74
+ const pdfParse = await this.loadPdfParse();
75
+ const data = await pdfParse(buffer);
76
+ const content = data.text;
77
+ const structure = this.extractStructure(content);
78
+ const metadata = this.extractMetadata(data, filename, buffer.length);
79
+ const pages = this.extractPages(content, data.numpages);
80
+ const result = {
81
+ content,
82
+ structure,
83
+ metadata,
84
+ pages,
85
+ };
86
+ // テーブル抽出(オプション)
87
+ if (mergedOptions.extractTables) {
88
+ result.tables = this.extractTables(content);
89
+ }
90
+ return ok(result);
91
+ }
92
+ catch (error) {
93
+ const errorMessage = error.message || 'Unknown error';
94
+ if (errorMessage.includes('password') || errorMessage.includes('encrypted')) {
95
+ return err({
96
+ code: 'PASSWORD_PROTECTED',
97
+ message: 'PDF is password protected',
98
+ details: error,
99
+ });
100
+ }
101
+ if (errorMessage.includes('Invalid') || errorMessage.includes('corrupt')) {
102
+ return err({
103
+ code: 'CORRUPTED_FILE',
104
+ message: 'PDF file is corrupted or invalid',
105
+ details: error,
106
+ });
107
+ }
108
+ return err({
109
+ code: 'PARSE_ERROR',
110
+ message: `Failed to parse PDF: ${errorMessage}`,
111
+ details: error,
112
+ });
113
+ }
114
+ }
115
+ async parseStream(stream, filename, options = {}) {
116
+ const chunks = [];
117
+ return new Promise((resolve) => {
118
+ stream.on('data', (chunk) => chunks.push(Buffer.from(chunk)));
119
+ stream.on('error', (error) => {
120
+ resolve(err({
121
+ code: 'PARSE_ERROR',
122
+ message: `Stream read error: ${error.message}`,
123
+ }));
124
+ });
125
+ stream.on('end', async () => {
126
+ const buffer = Buffer.concat(chunks);
127
+ const result = await this.parseBuffer(buffer, filename, options);
128
+ resolve(result);
129
+ });
130
+ });
131
+ }
132
+ getSupportedFormats() {
133
+ return [{ extension: '.pdf', mimeType: 'application/pdf', description: 'PDF Document' }];
134
+ }
135
+ isSupported(filename) {
136
+ return filename.toLowerCase().endsWith('.pdf');
137
+ }
138
+ /**
139
+ * テキストから構造を抽出
140
+ */
141
+ extractStructure(content) {
142
+ const headings = this.extractHeadings(content);
143
+ const paragraphs = this.extractParagraphs(content);
144
+ const sections = this.buildSections(content, headings);
145
+ return {
146
+ headings,
147
+ paragraphs,
148
+ sections,
149
+ };
150
+ }
151
+ /**
152
+ * 見出しを抽出(ヒューリスティック)
153
+ */
154
+ extractHeadings(content) {
155
+ const headings = [];
156
+ const lines = content.split('\n');
157
+ let position = 0;
158
+ for (const line of lines) {
159
+ const trimmed = line.trim();
160
+ // 見出しパターンの検出
161
+ // 1. 番号付き見出し (1. Title, 1.1 Subtitle, etc.)
162
+ const numberedMatch = trimmed.match(/^(\d+\.)+\s*(.+)$/);
163
+ if (numberedMatch) {
164
+ const level = Math.min((numberedMatch[1].match(/\d+\./g) || []).length, 6);
165
+ headings.push({
166
+ level,
167
+ text: numberedMatch[2].trim(),
168
+ position,
169
+ });
170
+ }
171
+ // 2. 大文字のみの短い行(見出しの可能性)
172
+ else if (trimmed.length > 0 &&
173
+ trimmed.length < 100 &&
174
+ trimmed === trimmed.toUpperCase() &&
175
+ /[A-Z]/.test(trimmed)) {
176
+ headings.push({
177
+ level: 1,
178
+ text: trimmed,
179
+ position,
180
+ });
181
+ }
182
+ position += line.length + 1;
183
+ }
184
+ return headings;
185
+ }
186
+ /**
187
+ * 段落を抽出
188
+ */
189
+ extractParagraphs(content) {
190
+ const paragraphs = [];
191
+ const blocks = content.split(/\n\s*\n/);
192
+ let position = 0;
193
+ for (const block of blocks) {
194
+ const trimmed = block.trim();
195
+ if (trimmed.length > 0) {
196
+ paragraphs.push({
197
+ text: trimmed,
198
+ start: position,
199
+ end: position + trimmed.length,
200
+ });
201
+ }
202
+ position += block.length + 2; // +2 for \n\n
203
+ }
204
+ return paragraphs;
205
+ }
206
+ /**
207
+ * セクションを構築
208
+ */
209
+ buildSections(content, headings) {
210
+ if (headings.length === 0) {
211
+ return [
212
+ {
213
+ title: 'Main Content',
214
+ content,
215
+ start: 0,
216
+ end: content.length,
217
+ },
218
+ ];
219
+ }
220
+ const sections = [];
221
+ for (let i = 0; i < headings.length; i++) {
222
+ const heading = headings[i];
223
+ const nextHeading = headings[i + 1];
224
+ const endPosition = nextHeading ? nextHeading.position : content.length;
225
+ sections.push({
226
+ title: heading.text,
227
+ content: content.slice(heading.position, endPosition).trim(),
228
+ start: heading.position,
229
+ end: endPosition,
230
+ });
231
+ }
232
+ return sections;
233
+ }
234
+ /**
235
+ * メタデータを抽出
236
+ */
237
+ extractMetadata(data, filename, fileSize) {
238
+ const info = data.info || {};
239
+ return {
240
+ filename,
241
+ fileSize,
242
+ mimeType: 'application/pdf',
243
+ title: info.Title || undefined,
244
+ author: info.Author || undefined,
245
+ subject: info.Subject || undefined,
246
+ keywords: info.Keywords ? info.Keywords.split(/[,;]/).map((k) => k.trim()) : undefined,
247
+ createdAt: info.CreationDate ? this.parsePdfDate(info.CreationDate) : undefined,
248
+ modifiedAt: info.ModDate ? this.parsePdfDate(info.ModDate) : undefined,
249
+ pageCount: data.numpages,
250
+ characterCount: data.text.length,
251
+ wordCount: data.text.split(/\s+/).filter((w) => w.length > 0).length,
252
+ };
253
+ }
254
+ /**
255
+ * PDFの日付形式をパース
256
+ */
257
+ parsePdfDate(dateStr) {
258
+ try {
259
+ // PDF日付形式: D:YYYYMMDDHHmmSS+HH'mm' or similar
260
+ const match = dateStr.match(/D:(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?/);
261
+ if (match) {
262
+ const [, year, month, day, hour = '0', min = '0', sec = '0'] = match;
263
+ return new Date(parseInt(year), parseInt(month) - 1, parseInt(day), parseInt(hour), parseInt(min), parseInt(sec));
264
+ }
265
+ return new Date(dateStr);
266
+ }
267
+ catch {
268
+ return undefined;
269
+ }
270
+ }
271
+ /**
272
+ * ページ情報を抽出
273
+ */
274
+ extractPages(content, numPages) {
275
+ // pdf-parseは全テキストを連結するので、ページ境界は推定
276
+ const avgPageLength = Math.ceil(content.length / numPages);
277
+ const pages = [];
278
+ for (let i = 0; i < numPages; i++) {
279
+ const start = i * avgPageLength;
280
+ const end = Math.min((i + 1) * avgPageLength, content.length);
281
+ pages.push({
282
+ pageNumber: i + 1,
283
+ content: content.slice(start, end),
284
+ startOffset: start,
285
+ endOffset: end,
286
+ });
287
+ }
288
+ return pages;
289
+ }
290
+ /**
291
+ * テーブルを抽出(簡易実装)
292
+ */
293
+ extractTables(content) {
294
+ const tables = [];
295
+ const lines = content.split('\n');
296
+ let tableId = 0;
297
+ // タブ区切りまたはパイプ区切りの行を検出
298
+ let currentTable = null;
299
+ let tableStart = 0;
300
+ let position = 0;
301
+ for (const line of lines) {
302
+ const cells = line.split(/\t|\|/).map((c) => c.trim());
303
+ if (cells.length > 1 && cells.some((c) => c.length > 0)) {
304
+ if (!currentTable) {
305
+ currentTable = [];
306
+ tableStart = position;
307
+ }
308
+ currentTable.push(cells);
309
+ }
310
+ else if (currentTable && currentTable.length > 1) {
311
+ // テーブル終了
312
+ tables.push(this.buildTableData(currentTable, tableId++, tableStart));
313
+ currentTable = null;
314
+ }
315
+ position += line.length + 1;
316
+ }
317
+ // 最後のテーブル
318
+ if (currentTable && currentTable.length > 1) {
319
+ tables.push(this.buildTableData(currentTable, tableId, tableStart));
320
+ }
321
+ return tables;
322
+ }
323
+ buildTableData(data, id, position) {
324
+ const [headerRow, ...dataRows] = data;
325
+ return {
326
+ id: `table-${id}`,
327
+ headers: headerRow,
328
+ rows: dataRows.map((row) => ({
329
+ cells: row.map((value) => ({
330
+ value,
331
+ type: 'string',
332
+ })),
333
+ })),
334
+ position,
335
+ };
336
+ }
337
+ }
338
+ //# sourceMappingURL=PDFParser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PDFParser.js","sourceRoot":"","sources":["../../../src/document/parsers/PDFParser.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,EAAE,EAAE,GAAG,EAAe,MAAM,0BAA0B,CAAC;AAehE,OAAO,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AAqBpD;;;;;;;;GAQG;AACH,MAAM,OAAO,SAAS;IACZ,cAAc,GAAkD,IAAI,CAAC;IAErE,KAAK,CAAC,YAAY;QACxB,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,qDAAqD;gBACrD,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC;gBACzC,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC;YACjD,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;YAC5E,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC,cAAe,CAAC;IAC9B,CAAC;IAED,KAAK,CAAC,KAAK,CACT,QAAgB,EAChB,UAAwB,EAAE;QAE1B,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;QACvC,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,CAAC;QAElC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YACzC,OAAO,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;QACrD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAK,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACvD,OAAO,GAAG,CAAC;oBACT,IAAI,EAAE,gBAAgB;oBACtB,OAAO,EAAE,mBAAmB,QAAQ,EAAE;oBACtC,QAAQ;iBACT,CAAC,CAAC;YACL,CAAC;YACD,IAAK,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACvD,OAAO,GAAG,CAAC;oBACT,IAAI,EAAE,mBAAmB;oBACzB,OAAO,EAAE,sBAAsB,QAAQ,EAAE;oBACzC,QAAQ;iBACT,CAAC,CAAC;YACL,CAAC;YACD,OAAO,GAAG,CAAC;gBACT,IAAI,EAAE,aAAa;gBACnB,OAAO,EAAE,wBAAyB,KAAe,CAAC,OAAO,EAAE;gBAC3D,QAAQ;gBACR,OAAO,EAAE,KAAK;aACf,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW,CACf,MAAc,EACd,QAAgB,EAChB,UAAwB,EAAE;QAE1B,MAAM,aAAa,GAAG,EAAE,GAAG,qBAAqB,EAAE,GAAG,OAAO,EAAE,CAAC;QAE/D,UAAU;QACV,IAAI,MAAM,CAAC,MAAM,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC;YAC9C,OAAO,GAAG,CAAC;gBACT,IAAI,EAAE,gBAAgB;gBACtB,OAAO,EAAE,aAAa,MAAM,CAAC,MAAM,oBAAoB,aAAa,CAAC,WAAW,EAAE;aACnF,CAAC,CAAC;QACL,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC3C,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;YAEpC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC;YAC1B,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;YACjD,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YACrE,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;YAExD,MAAM,MAAM,GAAmB;gBAC7B,OAAO;gBACP,SAAS;gBACT,QAAQ;gBACR,KAAK;aACN,CAAC;YAEF,gBAAgB;YAChB,IAAI,aAAa,CAAC,aAAa,EAAE,CAAC;gBAChC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;YAC9C,CAAC;YAED,OAAO,EAAE,CAAC,MAAM,CAAC,CAAC;QACpB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAAI,KAAe,CAAC,OAAO,IAAI,eAAe,CAAC;YAEjE,IAAI,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;gBAC5E,OAAO,GAAG,CAAC;oBACT,IAAI,EAAE,oBAAoB;oBAC1B,OAAO,EAAE,2BAA2B;oBACpC,OAAO,EAAE,KAAK;iBACf,CAAC,CAAC;YACL,CAAC;YAED,IAAI,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACzE,OAAO,GAAG,CAAC;oBACT,IAAI,EAAE,gBAAgB;oBACtB,OAAO,EAAE,kCAAkC;oBAC3C,OAAO,EAAE,KAAK;iBACf,CAAC,CAAC;YACL,CAAC;YAED,OAAO,GAAG,CAAC;gBACT,IAAI,EAAE,aAAa;gBACnB,OAAO,EAAE,wBAAwB,YAAY,EAAE;gBAC/C,OAAO,EAAE,KAAK;aACf,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW,CACf,MAA6B,EAC7B,QAAgB,EAChB,UAAwB,EAAE;QAE1B,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YAC7B,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9D,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;gBAC3B,OAAO,CACL,GAAG,CAAC;oBACF,IAAI,EAAE,aAAa;oBACnB,OAAO,EAAE,sBAAsB,KAAK,CAAC,OAAO,EAAE;iBAC/C,CAAC,CACH,CAAC;YACJ,CAAC,CAAC,CAAC;YACH,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,KAAK,IAAI,EAAE;gBAC1B,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;gBACrC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACjE,OAAO,CAAC,MAAM,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED,mBAAmB;QACjB,OAAO,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,iBAAiB,EAAE,WAAW,EAAE,cAAc,EAAE,CAAC,CAAC;IAC3F,CAAC;IAED,WAAW,CAAC,QAAgB;QAC1B,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IACjD,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,OAAe;QACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;QACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAEvD,OAAO;YACL,QAAQ;YACR,UAAU;YACV,QAAQ;SACT,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,OAAe;QACrC,MAAM,QAAQ,GAAc,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,QAAQ,GAAG,CAAC,CAAC;QAEjB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAE5B,aAAa;YACb,4CAA4C;YAC5C,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;YACzD,IAAI,aAAa,EAAE,CAAC;gBAClB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,aAAa,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;gBAC5E,QAAQ,CAAC,IAAI,CAAC;oBACZ,KAAK;oBACL,IAAI,EAAE,aAAa,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE;oBAC9B,QAAQ;iBACT,CAAC,CAAC;YACL,CAAC;YACD,wBAAwB;iBACnB,IACH,OAAO,CAAC,MAAM,GAAG,CAAC;gBAClB,OAAO,CAAC,MAAM,GAAG,GAAG;gBACpB,OAAO,KAAK,OAAO,CAAC,WAAW,EAAE;gBACjC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EACrB,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,KAAK,EAAE,CAAC;oBACR,IAAI,EAAE,OAAO;oBACb,QAAQ;iBACT,CAAC,CAAC;YACL,CAAC;YAED,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;QAC9B,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,OAAe;QACvC,MAAM,UAAU,GAAgB,EAAE,CAAC;QACnC,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QACxC,IAAI,QAAQ,GAAG,CAAC,CAAC;QAEjB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;YAC7B,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,OAAO;oBACb,KAAK,EAAE,QAAQ;oBACf,GAAG,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM;iBAC/B,CAAC,CAAC;YACL,CAAC;YACD,QAAQ,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,cAAc;QAC9C,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,OAAe,EAAE,QAAmB;QACxD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO;gBACL;oBACE,KAAK,EAAE,cAAc;oBACrB,OAAO;oBACP,KAAK,EAAE,CAAC;oBACR,GAAG,EAAE,OAAO,CAAC,MAAM;iBACpB;aACF,CAAC;QACJ,CAAC;QAED,MAAM,QAAQ,GAAc,EAAE,CAAC;QAE/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC;YAC7B,MAAM,WAAW,GAAG,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACpC,MAAM,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;YAExE,QAAQ,CAAC,IAAI,CAAC;gBACZ,KAAK,EAAE,OAAO,CAAC,IAAI;gBACnB,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC,IAAI,EAAE;gBAC5D,KAAK,EAAE,OAAO,CAAC,QAAQ;gBACvB,GAAG,EAAE,WAAW;aACjB,CAAC,CAAC;QACL,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,IAAa,EACb,QAAgB,EAChB,QAAgB;QAEhB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAE7B,OAAO;YACL,QAAQ;YACR,QAAQ;YACR,QAAQ,EAAE,iBAAiB;YAC3B,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,SAAS;YAC9B,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,SAAS;YAChC,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,SAAS;YAClC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;YACtF,SAAS,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,SAAS;YAC/E,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS;YACtE,SAAS,EAAE,IAAI,CAAC,QAAQ;YACxB,cAAc,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM;YAChC,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM;SACrE,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,OAAe;QAClC,IAAI,CAAC;YACH,8CAA8C;YAC9C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,iDAAiD,CAAC,CAAC;YAC/E,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,GAAG,GAAG,EAAE,GAAG,GAAG,GAAG,EAAE,GAAG,GAAG,GAAG,CAAC,GAAG,KAAK,CAAC;gBACrE,OAAO,IAAI,IAAI,CACb,QAAQ,CAAC,IAAK,CAAC,EACf,QAAQ,CAAC,KAAM,CAAC,GAAG,CAAC,EACpB,QAAQ,CAAC,GAAI,CAAC,EACd,QAAQ,CAAC,IAAI,CAAC,EACd,QAAQ,CAAC,GAAG,CAAC,EACb,QAAQ,CAAC,GAAG,CAAC,CACd,CAAC;YACJ,CAAC;YACD,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,CAAC;QAC3B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,OAAe,EAAE,QAAgB;QACpD,kCAAkC;QAClC,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC;QAC3D,MAAM,KAAK,GAAe,EAAE,CAAC;QAE7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;YAClC,MAAM,KAAK,GAAG,CAAC,GAAG,aAAa,CAAC;YAChC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,aAAa,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YAE9D,KAAK,CAAC,IAAI,CAAC;gBACT,UAAU,EAAE,CAAC,GAAG,CAAC;gBACjB,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC;gBAClC,WAAW,EAAE,KAAK;gBAClB,SAAS,EAAE,GAAG;aACf,CAAC,CAAC;QACL,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,OAAe;QACnC,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,sBAAsB;QACtB,IAAI,YAAY,GAAsB,IAAI,CAAC;QAC3C,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,QAAQ,GAAG,CAAC,CAAC;QAEjB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAEvD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;gBACxD,IAAI,CAAC,YAAY,EAAE,CAAC;oBAClB,YAAY,GAAG,EAAE,CAAC;oBAClB,UAAU,GAAG,QAAQ,CAAC;gBACxB,CAAC;gBACD,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAC3B,CAAC;iBAAM,IAAI,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACnD,SAAS;gBACT,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,YAAY,EAAE,OAAO,EAAE,EAAE,UAAU,CAAC,CAAC,CAAC;gBACtE,YAAY,GAAG,IAAI,CAAC;YACtB,CAAC;YAED,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;QAC9B,CAAC;QAED,UAAU;QACV,IAAI,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5C,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,YAAY,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC;QACtE,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,cAAc,CAAC,IAAgB,EAAE,EAAU,EAAE,QAAgB;QACnE,MAAM,CAAC,SAAS,EAAE,GAAG,QAAQ,CAAC,GAAG,IAAI,CAAC;QAEtC,OAAO;YACL,EAAE,EAAE,SAAS,EAAE,EAAE;YACjB,OAAO,EAAE,SAAS;YAClB,IAAI,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;gBAC3B,KAAK,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;oBACzB,KAAK;oBACL,IAAI,EAAE,QAAiB;iBACxB,CAAC,CAAC;aACJ,CAAC,CAAC;YACH,QAAQ;SACT,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,55 @@
1
+ /**
2
+ * XLSXパーサー
3
+ *
4
+ * @design DES-COLLECT-003 §2.3
5
+ * @task TASK-001-5
6
+ */
7
+ import { type Result } from '@nahisaho/katashiro-core';
8
+ import type { IDocumentParser, ParsedDocument, DocumentError, ParseOptions, SupportedFormat } from '../types.js';
9
+ /**
10
+ * XLSXパーサー実装
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * const parser = new XLSXParser();
15
+ * const result = await parser.parse('./data.xlsx');
16
+ * ```
17
+ */
18
+ export declare class XLSXParser implements IDocumentParser {
19
+ private xlsx;
20
+ private loadXLSX;
21
+ parse(filePath: string, options?: ParseOptions): Promise<Result<ParsedDocument, DocumentError>>;
22
+ parseBuffer(buffer: Buffer, filename: string, options?: ParseOptions): Promise<Result<ParsedDocument, DocumentError>>;
23
+ parseStream(stream: NodeJS.ReadableStream, filename: string, options?: ParseOptions): Promise<Result<ParsedDocument, DocumentError>>;
24
+ getSupportedFormats(): SupportedFormat[];
25
+ isSupported(filename: string): boolean;
26
+ /**
27
+ * コンテンツを抽出(テキスト形式)
28
+ */
29
+ private extractContent;
30
+ /**
31
+ * 構造を抽出
32
+ */
33
+ private extractStructure;
34
+ /**
35
+ * メタデータを抽出
36
+ */
37
+ private extractMetadata;
38
+ /**
39
+ * シート情報を抽出
40
+ */
41
+ private extractSheetInfo;
42
+ /**
43
+ * テーブルデータを抽出
44
+ */
45
+ private extractTables;
46
+ /**
47
+ * セルを変換
48
+ */
49
+ private convertCell;
50
+ /**
51
+ * セルタイプを変換
52
+ */
53
+ private getCellType;
54
+ }
55
+ //# sourceMappingURL=XLSXParser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"XLSXParser.d.ts","sourceRoot":"","sources":["../../../src/document/parsers/XLSXParser.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,0BAA0B,CAAC;AAChE,OAAO,KAAK,EACV,eAAe,EACf,cAAc,EACd,aAAa,EACb,YAAY,EACZ,eAAe,EAUhB,MAAM,aAAa,CAAC;AA0CrB;;;;;;;;GAQG;AACH,qBAAa,UAAW,YAAW,eAAe;IAChD,OAAO,CAAC,IAAI,CAA2B;YAEzB,QAAQ;IAYhB,KAAK,CACT,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IAgC3C,WAAW,CACf,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IAsE3C,WAAW,CACf,MAAM,EAAE,MAAM,CAAC,cAAc,EAC7B,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IAqBjD,mBAAmB,IAAI,eAAe,EAAE;IAUxC,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAItC;;OAEG;IACH,OAAO,CAAC,cAAc;IAyBtB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAsBxB;;OAEG;IACH,OAAO,CAAC,eAAe;IAsBvB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IA0BxB;;OAEG;IACH,OAAO,CAAC,aAAa;IAoDrB;;OAEG;IACH,OAAO,CAAC,WAAW;IAWnB;;OAEG;IACH,OAAO,CAAC,WAAW;CAgBpB"}