@nahisaho/katashiro-collector 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/dist/browser/ActionExecutor.d.ts +85 -0
  2. package/dist/browser/ActionExecutor.d.ts.map +1 -0
  3. package/dist/browser/ActionExecutor.js +171 -0
  4. package/dist/browser/ActionExecutor.js.map +1 -0
  5. package/dist/browser/BrowserAutomation.d.ts +147 -0
  6. package/dist/browser/BrowserAutomation.d.ts.map +1 -0
  7. package/dist/browser/BrowserAutomation.js +463 -0
  8. package/dist/browser/BrowserAutomation.js.map +1 -0
  9. package/dist/browser/ContentExtractor.d.ts +54 -0
  10. package/dist/browser/ContentExtractor.d.ts.map +1 -0
  11. package/dist/browser/ContentExtractor.js +159 -0
  12. package/dist/browser/ContentExtractor.js.map +1 -0
  13. package/dist/browser/SessionManager.d.ts +67 -0
  14. package/dist/browser/SessionManager.d.ts.map +1 -0
  15. package/dist/browser/SessionManager.js +173 -0
  16. package/dist/browser/SessionManager.js.map +1 -0
  17. package/dist/browser/index.d.ts +17 -0
  18. package/dist/browser/index.d.ts.map +1 -0
  19. package/dist/browser/index.js +17 -0
  20. package/dist/browser/index.js.map +1 -0
  21. package/dist/browser/types.d.ts +361 -0
  22. package/dist/browser/types.d.ts.map +1 -0
  23. package/dist/browser/types.js +23 -0
  24. package/dist/browser/types.js.map +1 -0
  25. package/dist/document/DocumentParser.d.ts +91 -0
  26. package/dist/document/DocumentParser.d.ts.map +1 -0
  27. package/dist/document/DocumentParser.js +234 -0
  28. package/dist/document/DocumentParser.js.map +1 -0
  29. package/dist/document/index.d.ts +11 -0
  30. package/dist/document/index.d.ts.map +1 -0
  31. package/dist/document/index.js +10 -0
  32. package/dist/document/index.js.map +1 -0
  33. package/dist/document/parsers/DOCXParser.d.ts +63 -0
  34. package/dist/document/parsers/DOCXParser.d.ts.map +1 -0
  35. package/dist/document/parsers/DOCXParser.js +362 -0
  36. package/dist/document/parsers/DOCXParser.js.map +1 -0
  37. package/dist/document/parsers/PDFParser.d.ts +60 -0
  38. package/dist/document/parsers/PDFParser.d.ts.map +1 -0
  39. package/dist/document/parsers/PDFParser.js +338 -0
  40. package/dist/document/parsers/PDFParser.js.map +1 -0
  41. package/dist/document/parsers/XLSXParser.d.ts +55 -0
  42. package/dist/document/parsers/XLSXParser.d.ts.map +1 -0
  43. package/dist/document/parsers/XLSXParser.js +314 -0
  44. package/dist/document/parsers/XLSXParser.js.map +1 -0
  45. package/dist/document/parsers/index.d.ts +10 -0
  46. package/dist/document/parsers/index.d.ts.map +1 -0
  47. package/dist/document/parsers/index.js +10 -0
  48. package/dist/document/parsers/index.js.map +1 -0
  49. package/dist/document/types.d.ts +251 -0
  50. package/dist/document/types.d.ts.map +1 -0
  51. package/dist/document/types.js +13 -0
  52. package/dist/document/types.js.map +1 -0
  53. package/dist/index.d.ts +7 -2
  54. package/dist/index.d.ts.map +1 -1
  55. package/dist/index.js +14 -2
  56. package/dist/index.js.map +1 -1
  57. package/dist/research/CoverageAnalyzer.d.ts +50 -0
  58. package/dist/research/CoverageAnalyzer.d.ts.map +1 -0
  59. package/dist/research/CoverageAnalyzer.js +169 -0
  60. package/dist/research/CoverageAnalyzer.js.map +1 -0
  61. package/dist/research/QueryPlanner.d.ts +57 -0
  62. package/dist/research/QueryPlanner.d.ts.map +1 -0
  63. package/dist/research/QueryPlanner.js +102 -0
  64. package/dist/research/QueryPlanner.js.map +1 -0
  65. package/dist/research/ResultAggregator.d.ts +39 -0
  66. package/dist/research/ResultAggregator.d.ts.map +1 -0
  67. package/dist/research/ResultAggregator.js +85 -0
  68. package/dist/research/ResultAggregator.js.map +1 -0
  69. package/dist/research/WideResearchEngine.d.ts +110 -0
  70. package/dist/research/WideResearchEngine.d.ts.map +1 -0
  71. package/dist/research/WideResearchEngine.js +330 -0
  72. package/dist/research/WideResearchEngine.js.map +1 -0
  73. package/dist/research/agents/AcademicSearchAgent.d.ts +57 -0
  74. package/dist/research/agents/AcademicSearchAgent.d.ts.map +1 -0
  75. package/dist/research/agents/AcademicSearchAgent.js +180 -0
  76. package/dist/research/agents/AcademicSearchAgent.js.map +1 -0
  77. package/dist/research/agents/EncyclopediaAgent.d.ts +49 -0
  78. package/dist/research/agents/EncyclopediaAgent.d.ts.map +1 -0
  79. package/dist/research/agents/EncyclopediaAgent.js +153 -0
  80. package/dist/research/agents/EncyclopediaAgent.js.map +1 -0
  81. package/dist/research/agents/NewsSearchAgent.d.ts +38 -0
  82. package/dist/research/agents/NewsSearchAgent.d.ts.map +1 -0
  83. package/dist/research/agents/NewsSearchAgent.js +146 -0
  84. package/dist/research/agents/NewsSearchAgent.js.map +1 -0
  85. package/dist/research/agents/WebSearchAgent.d.ts +45 -0
  86. package/dist/research/agents/WebSearchAgent.d.ts.map +1 -0
  87. package/dist/research/agents/WebSearchAgent.js +135 -0
  88. package/dist/research/agents/WebSearchAgent.js.map +1 -0
  89. package/dist/research/agents/index.d.ts +13 -0
  90. package/dist/research/agents/index.d.ts.map +1 -0
  91. package/dist/research/agents/index.js +12 -0
  92. package/dist/research/agents/index.js.map +1 -0
  93. package/dist/research/agents/types.d.ts +60 -0
  94. package/dist/research/agents/types.d.ts.map +1 -0
  95. package/dist/research/agents/types.js +9 -0
  96. package/dist/research/agents/types.js.map +1 -0
  97. package/dist/research/index.d.ts +16 -0
  98. package/dist/research/index.d.ts.map +1 -0
  99. package/dist/research/index.js +17 -0
  100. package/dist/research/index.js.map +1 -0
  101. package/dist/research/types.d.ts +206 -0
  102. package/dist/research/types.d.ts.map +1 -0
  103. package/dist/research/types.js +33 -0
  104. package/dist/research/types.js.map +1 -0
  105. package/package.json +1 -1
@@ -0,0 +1,159 @@
1
+ /**
2
+ * ContentExtractor - ページからコンテンツを抽出
3
+ *
4
+ * @requirement REQ-COLLECT-009
5
+ * @design DES-COLLECT-009-BrowserAutomation
6
+ */
7
+ /**
8
+ * ページからコンテンツを抽出
9
+ */
10
+ export class ContentExtractor {
11
+ /**
12
+ * ページからコンテンツを抽出
13
+ */
14
+ async extract(page, extractors) {
15
+ // メインコンテンツを抽出
16
+ const content = await this.extractMainContent(page);
17
+ // HTML全体を取得
18
+ const html = await this.extractHtml(page);
19
+ // リンクを抽出
20
+ const links = await this.extractLinks(page);
21
+ // 画像を抽出
22
+ const images = await this.extractImages(page);
23
+ // メタデータを抽出
24
+ const metadata = await this.extractMetadata(page);
25
+ // カスタム抽出
26
+ let extractedData;
27
+ if (extractors && extractors.length > 0) {
28
+ extractedData = await this.extractCustomData(page, extractors);
29
+ }
30
+ return {
31
+ content,
32
+ html,
33
+ extractedData,
34
+ links,
35
+ images,
36
+ metadata,
37
+ };
38
+ }
39
+ /**
40
+ * メインコンテンツを抽出
41
+ */
42
+ async extractMainContent(page) {
43
+ return page.evaluate(`
44
+ (() => {
45
+ const clone = document.body.cloneNode(true);
46
+ const removeSelectors = ['script', 'style', 'nav', 'footer', 'header', 'aside', 'noscript'];
47
+ removeSelectors.forEach(sel => {
48
+ clone.querySelectorAll(sel).forEach(el => el.remove());
49
+ });
50
+ return clone.textContent?.replace(/\\s+/g, ' ').trim() || '';
51
+ })()
52
+ `);
53
+ }
54
+ /**
55
+ * HTML全体を取得
56
+ */
57
+ async extractHtml(page) {
58
+ return page.evaluate('document.documentElement.outerHTML');
59
+ }
60
+ /**
61
+ * リンクを抽出
62
+ */
63
+ async extractLinks(page) {
64
+ return page.evaluate(`
65
+ (() => {
66
+ const anchors = document.querySelectorAll('a[href]');
67
+ return Array.from(anchors).map(a => ({
68
+ href: a.href,
69
+ text: a.textContent?.trim() || '',
70
+ rel: a.rel || undefined,
71
+ }));
72
+ })()
73
+ `);
74
+ }
75
+ /**
76
+ * 画像を抽出
77
+ */
78
+ async extractImages(page) {
79
+ return page.evaluate(`
80
+ (() => {
81
+ const imgs = document.querySelectorAll('img[src]');
82
+ return Array.from(imgs).map(img => ({
83
+ src: img.src,
84
+ alt: img.alt || undefined,
85
+ width: img.naturalWidth || undefined,
86
+ height: img.naturalHeight || undefined,
87
+ }));
88
+ })()
89
+ `);
90
+ }
91
+ /**
92
+ * メタデータを抽出
93
+ */
94
+ async extractMetadata(page) {
95
+ return page.evaluate(`
96
+ (() => {
97
+ const getMeta = (name) => {
98
+ const el = document.querySelector('meta[name="' + name + '"], meta[property="' + name + '"]');
99
+ return el?.getAttribute('content') || undefined;
100
+ };
101
+
102
+ const ogp = {};
103
+ document.querySelectorAll('meta[property^="og:"]').forEach(el => {
104
+ const property = el.getAttribute('property');
105
+ const content = el.getAttribute('content');
106
+ if (property && content) {
107
+ ogp[property.replace('og:', '')] = content;
108
+ }
109
+ });
110
+
111
+ return {
112
+ description: getMeta('description'),
113
+ keywords: getMeta('keywords')?.split(',').map(k => k.trim()),
114
+ ogp: Object.keys(ogp).length > 0 ? ogp : undefined,
115
+ language: document.documentElement.lang || undefined,
116
+ };
117
+ })()
118
+ `);
119
+ }
120
+ /**
121
+ * カスタム抽出
122
+ */
123
+ async extractCustomData(page, extractors) {
124
+ const extractedData = {};
125
+ for (const extractor of extractors) {
126
+ try {
127
+ if (extractor.multiple) {
128
+ extractedData[extractor.name] = await page.$$eval(extractor.selector, (els, attr) => els.map((el) => attr ? el.getAttribute(attr) : el.textContent?.trim()), extractor.attribute);
129
+ }
130
+ else {
131
+ extractedData[extractor.name] = await page.$eval(extractor.selector, (el, attr) => attr ? el.getAttribute(attr) : el.textContent?.trim(), extractor.attribute);
132
+ }
133
+ }
134
+ catch {
135
+ extractedData[extractor.name] = null;
136
+ }
137
+ }
138
+ return extractedData;
139
+ }
140
+ /**
141
+ * テキストコンテンツを抽出(シンプル版)
142
+ */
143
+ async extractText(page, selector) {
144
+ return page.$eval(selector, (el) => el.textContent?.trim() || '');
145
+ }
146
+ /**
147
+ * 属性を抽出
148
+ */
149
+ async extractAttribute(page, selector, attribute) {
150
+ return page.$eval(selector, (el, attr) => el.getAttribute(attr), attribute);
151
+ }
152
+ /**
153
+ * 複数要素のテキストを抽出
154
+ */
155
+ async extractAllText(page, selector) {
156
+ return page.$$eval(selector, (els) => els.map((el) => el.textContent?.trim() || ''));
157
+ }
158
+ }
159
+ //# sourceMappingURL=ContentExtractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ContentExtractor.js","sourceRoot":"","sources":["../../src/browser/ContentExtractor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAWH;;GAEG;AACH,MAAM,OAAO,gBAAgB;IAC3B;;OAEG;IACH,KAAK,CAAC,OAAO,CACX,IAAiB,EACjB,UAA8B;QAE9B,cAAc;QACd,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC;QAEpD,YAAY;QACZ,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAE1C,SAAS;QACT,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QAE5C,QAAQ;QACR,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAE9C,WAAW;QACX,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QAElD,SAAS;QACT,IAAI,aAAkD,CAAC;QACvD,IAAI,UAAU,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxC,aAAa,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;QACjE,CAAC;QAED,OAAO;YACL,OAAO;YACP,IAAI;YACJ,aAAa;YACb,KAAK;YACL,MAAM;YACN,QAAQ;SACT,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,kBAAkB,CAAC,IAAiB;QAChD,OAAO,IAAI,CAAC,QAAQ,CAAC;;;;;;;;;KASpB,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,WAAW,CAAC,IAAiB;QACzC,OAAO,IAAI,CAAC,QAAQ,CAAC,oCAAoC,CAAC,CAAC;IAC7D,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,YAAY,CAAC,IAAiB;QAC1C,OAAO,IAAI,CAAC,QAAQ,CAAC;;;;;;;;;KASpB,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa,CAAC,IAAiB;QAC3C,OAAO,IAAI,CAAC,QAAQ,CAAC;;;;;;;;;;KAUpB,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,eAAe,CAAC,IAAiB;QAC7C,OAAO,IAAI,CAAC,QAAQ,CAAC;;;;;;;;;;;;;;;;;;;;;;;KAuBpB,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB,CAC7B,IAAiB,EACjB,UAA6B;QAE7B,MAAM,aAAa,GAA4B,EAAE,CAAC;QAElD,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,IAAI,CAAC;gBACH,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;oBACvB,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,IAAI,CAAC,MAAM,CAC/C,SAAS,CAAC,QAAQ,EAClB,CAAC,GAAU,EAAE,IAAS,EAAE,EAAE,CACxB,GAAG,CAAC,GAAG,CAAC,CAAC,EAAO,EAAE,EAAE,CAClB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,CACtD,EACH,SAAS,CAAC,SAAS,CACpB,CAAC;gBACJ,CAAC;qBAAM,CAAC;oBACN,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,IAAI,CAAC,KAAK,CAC9C,SAAS,CAAC,QAAQ,EAClB,CAAC,EAAO,EAAE,IAAS,EAAE,EAAE,CACrB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,EACvD,SAAS,CAAC,SAAS,CACpB,CAAC;gBACJ,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;YACvC,CAAC;QACH,CAAC;QAED,OAAO,aAAa,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CAAC,IAAiB,EAAE,QAAgB;QACnD,OAAO,IAAI,CAAC,KAAK,CACf,QAAQ,EACR,CAAC,EAAO,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAC1C,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,gBAAgB,CACpB,IAAiB,EACjB,QAAgB,EAChB,SAAiB;QAEjB,OAAO,IAAI,CAAC,KAAK,CACf,QAAQ,EACR,CAAC,EAAO,EAAE,IAAS,EAAE,EAAE,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,EAC7C,SAAS,CACV,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,cAAc,CAAC,IAAiB,EAAE,QAAgB;QACtD,OAAO,IAAI,CAAC,MAAM,CAChB,QAAQ,EACR,CAAC,GAAU,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,EAAO,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CACnE,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,67 @@
1
+ /**
2
+ * SessionManager - ブラウザセッションを管理
3
+ *
4
+ * @requirement REQ-COLLECT-009
5
+ * @design DES-COLLECT-009-BrowserAutomation
6
+ */
7
+ import type { SessionInfo } from './types.js';
8
+ import type { BrowserPage } from './ActionExecutor.js';
9
+ /**
10
+ * ブラウザセッションを管理
11
+ */
12
+ export declare class SessionManager {
13
+ private sessions;
14
+ /**
15
+ * セッションを保存
16
+ */
17
+ save(page: BrowserPage, name: string): Promise<SessionInfo>;
18
+ /**
19
+ * セッションを復元
20
+ */
21
+ load(page: BrowserPage, name: string): Promise<void>;
22
+ /**
23
+ * セッションを削除
24
+ */
25
+ delete(name: string): boolean;
26
+ /**
27
+ * セッション一覧を取得
28
+ */
29
+ list(): string[];
30
+ /**
31
+ * セッションが存在するか
32
+ */
33
+ has(name: string): boolean;
34
+ /**
35
+ * 全セッションをクリア
36
+ */
37
+ clear(): void;
38
+ /**
39
+ * セッションを取得
40
+ */
41
+ get(name: string): SessionInfo | undefined;
42
+ /**
43
+ * セッションをエクスポート
44
+ */
45
+ export(name: string): string;
46
+ /**
47
+ * セッションをインポート
48
+ */
49
+ import(name: string, data: string): void;
50
+ /**
51
+ * Cookieを取得
52
+ */
53
+ private getCookies;
54
+ /**
55
+ * Cookieを設定
56
+ */
57
+ private setCookies;
58
+ /**
59
+ * ストレージを取得
60
+ */
61
+ private getStorage;
62
+ /**
63
+ * ストレージを設定
64
+ */
65
+ private setStorage;
66
+ }
67
+ //# sourceMappingURL=SessionManager.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SessionManager.d.ts","sourceRoot":"","sources":["../../src/browser/SessionManager.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAU,MAAM,YAAY,CAAC;AACtD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAUvD;;GAEG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAuC;IAEvD;;OAEG;IACG,IAAI,CAAC,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAoBjE;;OAEG;IACG,IAAI,CAAC,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAe1D;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IAI7B;;OAEG;IACH,IAAI,IAAI,MAAM,EAAE;IAIhB;;OAEG;IACH,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IAI1B;;OAEG;IACH,KAAK,IAAI,IAAI;IAIb;;OAEG;IACH,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS;IAI1C;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IAQ5B;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI;IAKxC;;OAEG;YACW,UAAU;IAWxB;;OAEG;YACW,UAAU;IAUxB;;OAEG;YACW,UAAU;IA4BxB;;OAEG;YACW,UAAU;CA2BzB"}
@@ -0,0 +1,173 @@
1
+ /**
2
+ * SessionManager - ブラウザセッションを管理
3
+ *
4
+ * @requirement REQ-COLLECT-009
5
+ * @design DES-COLLECT-009-BrowserAutomation
6
+ */
7
+ /**
8
+ * ブラウザセッションを管理
9
+ */
10
+ export class SessionManager {
11
+ sessions = new Map();
12
+ /**
13
+ * セッションを保存
14
+ */
15
+ async save(page, name) {
16
+ const pageWithCookies = page;
17
+ // Cookieを取得
18
+ const cookies = await this.getCookies(pageWithCookies);
19
+ // ストレージを取得
20
+ const { localStorage, sessionStorage } = await this.getStorage(page);
21
+ const session = {
22
+ id: `${name}-${Date.now()}`,
23
+ cookies,
24
+ localStorage,
25
+ sessionStorage,
26
+ };
27
+ this.sessions.set(name, session);
28
+ return session;
29
+ }
30
+ /**
31
+ * セッションを復元
32
+ */
33
+ async load(page, name) {
34
+ const session = this.sessions.get(name);
35
+ if (!session) {
36
+ throw new Error(`Session not found: ${name}`);
37
+ }
38
+ const pageWithCookies = page;
39
+ // Cookieを設定
40
+ await this.setCookies(pageWithCookies, session.cookies);
41
+ // ストレージを設定
42
+ await this.setStorage(page, session.localStorage, session.sessionStorage);
43
+ }
44
+ /**
45
+ * セッションを削除
46
+ */
47
+ delete(name) {
48
+ return this.sessions.delete(name);
49
+ }
50
+ /**
51
+ * セッション一覧を取得
52
+ */
53
+ list() {
54
+ return [...this.sessions.keys()];
55
+ }
56
+ /**
57
+ * セッションが存在するか
58
+ */
59
+ has(name) {
60
+ return this.sessions.has(name);
61
+ }
62
+ /**
63
+ * 全セッションをクリア
64
+ */
65
+ clear() {
66
+ this.sessions.clear();
67
+ }
68
+ /**
69
+ * セッションを取得
70
+ */
71
+ get(name) {
72
+ return this.sessions.get(name);
73
+ }
74
+ /**
75
+ * セッションをエクスポート
76
+ */
77
+ export(name) {
78
+ const session = this.sessions.get(name);
79
+ if (!session) {
80
+ throw new Error(`Session not found: ${name}`);
81
+ }
82
+ return JSON.stringify(session);
83
+ }
84
+ /**
85
+ * セッションをインポート
86
+ */
87
+ import(name, data) {
88
+ const session = JSON.parse(data);
89
+ this.sessions.set(name, session);
90
+ }
91
+ /**
92
+ * Cookieを取得
93
+ */
94
+ async getCookies(page) {
95
+ try {
96
+ if (typeof page.cookies === 'function') {
97
+ return await page.cookies();
98
+ }
99
+ return [];
100
+ }
101
+ catch {
102
+ return [];
103
+ }
104
+ }
105
+ /**
106
+ * Cookieを設定
107
+ */
108
+ async setCookies(page, cookies) {
109
+ try {
110
+ if (typeof page.setCookie === 'function' && cookies.length > 0) {
111
+ await page.setCookie(...cookies);
112
+ }
113
+ }
114
+ catch {
115
+ // Cookie設定失敗を無視
116
+ }
117
+ }
118
+ /**
119
+ * ストレージを取得
120
+ */
121
+ async getStorage(page) {
122
+ try {
123
+ return await page.evaluate(`
124
+ (() => {
125
+ const getStorageData = (storage) => {
126
+ const data = {};
127
+ for (let i = 0; i < storage.length; i++) {
128
+ const key = storage.key(i);
129
+ if (key) {
130
+ data[key] = storage.getItem(key) || '';
131
+ }
132
+ }
133
+ return data;
134
+ };
135
+ return {
136
+ localStorage: getStorageData(localStorage),
137
+ sessionStorage: getStorageData(sessionStorage),
138
+ };
139
+ })()
140
+ `);
141
+ }
142
+ catch {
143
+ return { localStorage: {}, sessionStorage: {} };
144
+ }
145
+ }
146
+ /**
147
+ * ストレージを設定
148
+ */
149
+ async setStorage(page, localStorage, sessionStorage) {
150
+ try {
151
+ const localStorageJson = JSON.stringify(localStorage);
152
+ const sessionStorageJson = JSON.stringify(sessionStorage);
153
+ await page.evaluate(`
154
+ (() => {
155
+ const localData = ${localStorageJson};
156
+ const sessionData = ${sessionStorageJson};
157
+
158
+ Object.entries(localData).forEach(([key, value]) => {
159
+ localStorage.setItem(key, value);
160
+ });
161
+
162
+ Object.entries(sessionData).forEach(([key, value]) => {
163
+ sessionStorage.setItem(key, value);
164
+ });
165
+ })()
166
+ `);
167
+ }
168
+ catch {
169
+ // ストレージ設定失敗を無視
170
+ }
171
+ }
172
+ }
173
+ //# sourceMappingURL=SessionManager.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SessionManager.js","sourceRoot":"","sources":["../../src/browser/SessionManager.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAaH;;GAEG;AACH,MAAM,OAAO,cAAc;IACjB,QAAQ,GAA6B,IAAI,GAAG,EAAE,CAAC;IAEvD;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,IAAiB,EAAE,IAAY;QACxC,MAAM,eAAe,GAAG,IAAuB,CAAC;QAEhD,YAAY;QACZ,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC;QAEvD,WAAW;QACX,MAAM,EAAE,YAAY,EAAE,cAAc,EAAE,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAErE,MAAM,OAAO,GAAgB;YAC3B,EAAE,EAAE,GAAG,IAAI,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE;YAC3B,OAAO;YACP,YAAY;YACZ,cAAc;SACf,CAAC;QAEF,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACjC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,IAAiB,EAAE,IAAY;QACxC,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,sBAAsB,IAAI,EAAE,CAAC,CAAC;QAChD,CAAC;QAED,MAAM,eAAe,GAAG,IAAuB,CAAC;QAEhD,YAAY;QACZ,MAAM,IAAI,CAAC,UAAU,CAAC,eAAe,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QAExD,WAAW;QACX,MAAM,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,YAAY,EAAE,OAAO,CAAC,cAAc,CAAC,CAAC;IAC5E,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,IAAY;QACjB,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACH,IAAI;QACF,OAAO,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,IAAY;QACd,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,IAAY;QACd,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,IAAY;QACjB,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,sBAAsB,IAAI,EAAE,CAAC,CAAC;QAChD,CAAC;QACD,OAAO,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,IAAY,EAAE,IAAY;QAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAgB,CAAC;QAChD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACnC,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CAAC,IAAqB;QAC5C,IAAI,CAAC;YACH,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,UAAU,EAAE,CAAC;gBACvC,OAAO,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YAC9B,CAAC;YACD,OAAO,EAAE,CAAC;QACZ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CAAC,IAAqB,EAAE,OAAiB;QAC/D,IAAI,CAAC;YACH,IAAI,OAAO,IAAI,CAAC,SAAS,KAAK,UAAU,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC/D,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,OAAO,CAAC,CAAC;YACnC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,gBAAgB;QAClB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CAAC,IAAiB;QAIxC,IAAI,CAAC;YACH,OAAO,MAAM,IAAI,CAAC,QAAQ,CAAC;;;;;;;;;;;;;;;;;OAiB1B,CAAC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,YAAY,EAAE,EAAE,EAAE,cAAc,EAAE,EAAE,EAAE,CAAC;QAClD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CACtB,IAAiB,EACjB,YAAoC,EACpC,cAAsC;QAEtC,IAAI,CAAC;YACH,MAAM,gBAAgB,GAAG,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;YACtD,MAAM,kBAAkB,GAAG,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;YAE1D,MAAM,IAAI,CAAC,QAAQ,CAAC;;8BAEI,gBAAgB;gCACd,kBAAkB;;;;;;;;;;OAU3C,CAAC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACP,eAAe;QACjB,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Browser Automation モジュール
3
+ *
4
+ * @module @nahisaho/katashiro-collector/browser
5
+ * @requirement REQ-COLLECT-009
6
+ * @design DES-COLLECT-009-BrowserAutomation
7
+ */
8
+ export type { BrowserConfig, Viewport, ProxyConfig, ResourceLimits, NavigationOptions, WaitUntilOption, ClickOptions, TypeOptions, ScrollOptions, WaitForSelectorOptions, ScreenshotOptions, PdfOptions, BrowserAction, NavigateAction, ClickAction, TypeAction, WaitAction, ScrollAction, SelectAction, HoverAction, ScreenshotAction, PdfAction, EvaluateAction, WaitForSelectorAction, ExtractAction, ActionResult, PageScrapeResult, PageLink, PageImage, PageMetadata, Cookie, SessionInfo, AuthCredentials, LoginSelectors, BrowserScript, ExtractorConfig, ExtractionResult, } from './types.js';
9
+ export { DEFAULT_BROWSER_CONFIG } from './types.js';
10
+ export { ActionExecutor } from './ActionExecutor.js';
11
+ export type { BrowserPage } from './ActionExecutor.js';
12
+ export { ContentExtractor } from './ContentExtractor.js';
13
+ export { SessionManager } from './SessionManager.js';
14
+ export { BrowserAutomation, BrowserAutomationError, } from './BrowserAutomation.js';
15
+ export type { Browser, PuppeteerLauncher, BrowserAutomationErrorCode, } from './BrowserAutomation.js';
16
+ export { BrowserAutomation as default } from './BrowserAutomation.js';
17
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/browser/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,YAAY,EACV,aAAa,EACb,QAAQ,EACR,WAAW,EACX,cAAc,EACd,iBAAiB,EACjB,eAAe,EACf,YAAY,EACZ,WAAW,EACX,aAAa,EACb,sBAAsB,EACtB,iBAAiB,EACjB,UAAU,EACV,aAAa,EACb,cAAc,EACd,WAAW,EACX,UAAU,EACV,UAAU,EACV,YAAY,EACZ,YAAY,EACZ,WAAW,EACX,gBAAgB,EAChB,SAAS,EACT,cAAc,EACd,qBAAqB,EACrB,aAAa,EACb,YAAY,EACZ,gBAAgB,EAChB,QAAQ,EACR,SAAS,EACT,YAAY,EACZ,MAAM,EACN,WAAW,EACX,eAAe,EACf,cAAc,EACd,aAAa,EACb,eAAe,EACf,gBAAgB,GACjB,MAAM,YAAY,CAAC;AAGpB,OAAO,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AAGpD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,YAAY,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAEvD,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAEzD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAErD,OAAO,EACL,iBAAiB,EACjB,sBAAsB,GACvB,MAAM,wBAAwB,CAAC;AAChC,YAAY,EACV,OAAO,EACP,iBAAiB,EACjB,0BAA0B,GAC3B,MAAM,wBAAwB,CAAC;AAGhC,OAAO,EAAE,iBAAiB,IAAI,OAAO,EAAE,MAAM,wBAAwB,CAAC"}
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Browser Automation モジュール
3
+ *
4
+ * @module @nahisaho/katashiro-collector/browser
5
+ * @requirement REQ-COLLECT-009
6
+ * @design DES-COLLECT-009-BrowserAutomation
7
+ */
8
+ // 定数のエクスポート
9
+ export { DEFAULT_BROWSER_CONFIG } from './types.js';
10
+ // クラスのエクスポート
11
+ export { ActionExecutor } from './ActionExecutor.js';
12
+ export { ContentExtractor } from './ContentExtractor.js';
13
+ export { SessionManager } from './SessionManager.js';
14
+ export { BrowserAutomation, BrowserAutomationError, } from './BrowserAutomation.js';
15
+ // デフォルトエクスポート
16
+ export { BrowserAutomation as default } from './BrowserAutomation.js';
17
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/browser/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AA2CH,YAAY;AACZ,OAAO,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AAEpD,aAAa;AACb,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAGrD,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAEzD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAErD,OAAO,EACL,iBAAiB,EACjB,sBAAsB,GACvB,MAAM,wBAAwB,CAAC;AAOhC,cAAc;AACd,OAAO,EAAE,iBAAiB,IAAI,OAAO,EAAE,MAAM,wBAAwB,CAAC"}