@nahisaho/shikigami-mcp-server 1.7.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +29 -0
  2. package/dist/cache/__tests__/global.test.d.ts +6 -0
  3. package/dist/cache/__tests__/global.test.d.ts.map +1 -0
  4. package/dist/cache/__tests__/global.test.js +269 -0
  5. package/dist/cache/__tests__/global.test.js.map +1 -0
  6. package/dist/cache/__tests__/manager.test.d.ts +6 -0
  7. package/dist/cache/__tests__/manager.test.d.ts.map +1 -0
  8. package/dist/cache/__tests__/manager.test.js +286 -0
  9. package/dist/cache/__tests__/manager.test.js.map +1 -0
  10. package/dist/cache/__tests__/semantic.test.d.ts +6 -0
  11. package/dist/cache/__tests__/semantic.test.d.ts.map +1 -0
  12. package/dist/cache/__tests__/semantic.test.js +271 -0
  13. package/dist/cache/__tests__/semantic.test.js.map +1 -0
  14. package/dist/cache/__tests__/store.test.d.ts +6 -0
  15. package/dist/cache/__tests__/store.test.d.ts.map +1 -0
  16. package/dist/cache/__tests__/store.test.js +289 -0
  17. package/dist/cache/__tests__/store.test.js.map +1 -0
  18. package/dist/cache/global.d.ts +140 -0
  19. package/dist/cache/global.d.ts.map +1 -0
  20. package/dist/cache/global.js +260 -0
  21. package/dist/cache/global.js.map +1 -0
  22. package/dist/cache/index.d.ts +10 -0
  23. package/dist/cache/index.d.ts.map +1 -0
  24. package/dist/cache/index.js +10 -0
  25. package/dist/cache/index.js.map +1 -0
  26. package/dist/cache/manager.d.ts +146 -0
  27. package/dist/cache/manager.d.ts.map +1 -0
  28. package/dist/cache/manager.js +229 -0
  29. package/dist/cache/manager.js.map +1 -0
  30. package/dist/cache/semantic.d.ts +164 -0
  31. package/dist/cache/semantic.d.ts.map +1 -0
  32. package/dist/cache/semantic.js +241 -0
  33. package/dist/cache/semantic.js.map +1 -0
  34. package/dist/cache/store.d.ts +98 -0
  35. package/dist/cache/store.d.ts.map +1 -0
  36. package/dist/cache/store.js +469 -0
  37. package/dist/cache/store.js.map +1 -0
  38. package/dist/cache/types.d.ts +171 -0
  39. package/dist/cache/types.d.ts.map +1 -0
  40. package/dist/cache/types.js +8 -0
  41. package/dist/cache/types.js.map +1 -0
  42. package/dist/config/types.d.ts +67 -0
  43. package/dist/config/types.d.ts.map +1 -1
  44. package/dist/config/types.js +30 -0
  45. package/dist/config/types.js.map +1 -1
  46. package/dist/tools/__tests__/multilingual-search.test.d.ts +7 -0
  47. package/dist/tools/__tests__/multilingual-search.test.d.ts.map +1 -0
  48. package/dist/tools/__tests__/multilingual-search.test.js +71 -0
  49. package/dist/tools/__tests__/multilingual-search.test.js.map +1 -0
  50. package/dist/tools/search/recovery/__tests__/logger.test.d.ts +8 -0
  51. package/dist/tools/search/recovery/__tests__/logger.test.d.ts.map +1 -0
  52. package/dist/tools/search/recovery/__tests__/logger.test.js +249 -0
  53. package/dist/tools/search/recovery/__tests__/logger.test.js.map +1 -0
  54. package/dist/tools/search/recovery/__tests__/manager-logger.test.d.ts +8 -0
  55. package/dist/tools/search/recovery/__tests__/manager-logger.test.d.ts.map +1 -0
  56. package/dist/tools/search/recovery/__tests__/manager-logger.test.js +158 -0
  57. package/dist/tools/search/recovery/__tests__/manager-logger.test.js.map +1 -0
  58. package/dist/tools/search/recovery/index.d.ts +31 -2
  59. package/dist/tools/search/recovery/index.d.ts.map +1 -1
  60. package/dist/tools/search/recovery/index.js +51 -7
  61. package/dist/tools/search/recovery/index.js.map +1 -1
  62. package/dist/tools/search/recovery/logger.d.ts +149 -0
  63. package/dist/tools/search/recovery/logger.d.ts.map +1 -0
  64. package/dist/tools/search/recovery/logger.js +218 -0
  65. package/dist/tools/search/recovery/logger.js.map +1 -0
  66. package/dist/tools/search.d.ts +48 -0
  67. package/dist/tools/search.d.ts.map +1 -1
  68. package/dist/tools/search.js +152 -0
  69. package/dist/tools/search.js.map +1 -1
  70. package/dist/tools/visit/recovery/__tests__/index.test.d.ts +10 -0
  71. package/dist/tools/visit/recovery/__tests__/index.test.d.ts.map +1 -0
  72. package/dist/tools/visit/recovery/__tests__/index.test.js +239 -0
  73. package/dist/tools/visit/recovery/__tests__/index.test.js.map +1 -0
  74. package/dist/tools/visit/recovery/__tests__/wayback.test.d.ts +8 -0
  75. package/dist/tools/visit/recovery/__tests__/wayback.test.d.ts.map +1 -0
  76. package/dist/tools/visit/recovery/__tests__/wayback.test.js +271 -0
  77. package/dist/tools/visit/recovery/__tests__/wayback.test.js.map +1 -0
  78. package/dist/tools/visit/recovery/index.d.ts +126 -0
  79. package/dist/tools/visit/recovery/index.d.ts.map +1 -0
  80. package/dist/tools/visit/recovery/index.js +203 -0
  81. package/dist/tools/visit/recovery/index.js.map +1 -0
  82. package/dist/tools/visit/recovery/wayback.d.ts +101 -0
  83. package/dist/tools/visit/recovery/wayback.d.ts.map +1 -0
  84. package/dist/tools/visit/recovery/wayback.js +140 -0
  85. package/dist/tools/visit/recovery/wayback.js.map +1 -0
  86. package/dist/tools/visit.d.ts +33 -0
  87. package/dist/tools/visit.d.ts.map +1 -1
  88. package/dist/tools/visit.js +127 -1
  89. package/dist/tools/visit.js.map +1 -1
  90. package/package.json +7 -3
  91. package/shikigami.config.example.yaml +9 -0
  92. package/src/cache/__tests__/global.test.ts +340 -0
  93. package/src/cache/__tests__/manager.test.ts +353 -0
  94. package/src/cache/__tests__/semantic.test.ts +331 -0
  95. package/src/cache/__tests__/store.test.ts +369 -0
  96. package/src/cache/global.ts +351 -0
  97. package/src/cache/index.ts +10 -0
  98. package/src/cache/manager.ts +325 -0
  99. package/src/cache/semantic.ts +368 -0
  100. package/src/cache/store.ts +555 -0
  101. package/src/cache/types.ts +189 -0
  102. package/src/config/types.ts +108 -0
  103. package/src/tools/__tests__/multilingual-search.test.ts +88 -0
  104. package/src/tools/search/recovery/__tests__/logger.test.ts +334 -0
  105. package/src/tools/search/recovery/__tests__/manager-logger.test.ts +199 -0
  106. package/src/tools/search/recovery/index.ts +67 -9
  107. package/src/tools/search/recovery/logger.ts +351 -0
  108. package/src/tools/search.ts +212 -0
  109. package/src/tools/visit/recovery/__tests__/index.test.ts +297 -0
  110. package/src/tools/visit/recovery/__tests__/wayback.test.ts +344 -0
  111. package/src/tools/visit/recovery/index.ts +312 -0
  112. package/src/tools/visit/recovery/wayback.ts +210 -0
  113. package/src/tools/visit.ts +159 -2
  114. package/vitest.config.ts +22 -0
@@ -0,0 +1,312 @@
1
+ /**
2
+ * VisitRecoveryManager - ページ訪問リカバリーマネージャー
3
+ *
4
+ * TSK-1-004: VisitRecoveryManager実装
5
+ * REQ-SRCH-004-01: visit失敗時フォールバック
6
+ * REQ-SRCH-004-02: 自動リトライ
7
+ * REQ-SRCH-004-03: 結果マージ
8
+ * DES-SRCH-004: VisitRecoveryManager設計
9
+ */
10
+
11
+ import { WaybackClient, type WaybackSnapshot, type WaybackClientConfig } from './wayback.js';
12
+ import { RecoveryLogger, type RecoveryLoggerConfig, type ExtendedLogEntry } from '../../search/recovery/logger.js';
13
+
14
+ /**
15
+ * ページ取得関数の型
16
+ */
17
+ export type FetchFunction = (url: string) => Promise<PageFetchResult>;
18
+
19
+ /**
20
+ * ページ取得結果
21
+ */
22
+ export interface PageFetchResult {
23
+ /** 取得に成功したかどうか */
24
+ success: boolean;
25
+ /** コンテンツ(成功時) */
26
+ content?: string;
27
+ /** タイトル(成功時) */
28
+ title?: string;
29
+ /** エラーメッセージ(失敗時) */
30
+ error?: string;
31
+ /** HTTPステータスコード */
32
+ statusCode?: number;
33
+ }
34
+
35
+ /**
36
+ * リカバリー結果
37
+ */
38
+ export interface VisitRecoveryResult {
39
+ /** 最終的に成功したかどうか */
40
+ success: boolean;
41
+ /** 元のURL */
42
+ originalUrl: string;
43
+ /** 実際に取得したURL(Wayback URLの可能性あり) */
44
+ usedUrl: string;
45
+ /** コンテンツ(成功時) */
46
+ content?: string;
47
+ /** タイトル(成功時) */
48
+ title?: string;
49
+ /** エラーメッセージ(失敗時) */
50
+ error?: string;
51
+ /** Wayback Machineを使用したかどうか */
52
+ usedWayback: boolean;
53
+ /** Waybackスナップショット情報 */
54
+ waybackSnapshot?: WaybackSnapshot;
55
+ /** 試行回数 */
56
+ attempts: number;
57
+ /** 処理時間(ms) */
58
+ durationMs: number;
59
+ }
60
+
61
+ /**
62
+ * VisitRecoveryManager 設定
63
+ */
64
+ export interface VisitRecoveryConfig {
65
+ /** 最大リトライ回数(デフォルト: 2) */
66
+ maxRetries?: number;
67
+ /** リトライ間隔(ms、デフォルト: 1000) */
68
+ retryDelayMs?: number;
69
+ /** タイムアウト(ms、デフォルト: 30000) */
70
+ timeoutMs?: number;
71
+ /** Wayback Machineを使用するかどうか(デフォルト: true) */
72
+ enableWayback?: boolean;
73
+ /** WaybackClient設定 */
74
+ waybackConfig?: Partial<WaybackClientConfig>;
75
+ /** RecoveryLogger設定 */
76
+ loggerConfig?: Partial<RecoveryLoggerConfig>;
77
+ /** RecoveryLoggerインスタンス(省略時は新規作成) */
78
+ logger?: RecoveryLogger;
79
+ }
80
+
81
+ /**
82
+ * デフォルト設定
83
+ */
84
+ export const DEFAULT_VISIT_RECOVERY_CONFIG: Required<Omit<VisitRecoveryConfig, 'waybackConfig' | 'loggerConfig' | 'logger'>> = {
85
+ maxRetries: 2,
86
+ retryDelayMs: 1000,
87
+ timeoutMs: 30000,
88
+ enableWayback: true,
89
+ };
90
+
91
+ /**
92
+ * VisitRecoveryManager - ページ訪問失敗時の自動リカバリー
93
+ */
94
+ export class VisitRecoveryManager {
95
+ private readonly config: Required<Omit<VisitRecoveryConfig, 'waybackConfig' | 'loggerConfig' | 'logger'>>;
96
+ private readonly waybackClient: WaybackClient | null;
97
+ private readonly logger: RecoveryLogger;
98
+
99
+ constructor(config?: VisitRecoveryConfig) {
100
+ const { waybackConfig, loggerConfig, logger, ...restConfig } = config ?? {};
101
+ this.config = { ...DEFAULT_VISIT_RECOVERY_CONFIG, ...restConfig };
102
+ this.waybackClient = this.config.enableWayback ? new WaybackClient(waybackConfig) : null;
103
+ this.logger = logger ?? new RecoveryLogger(loggerConfig);
104
+ }
105
+
106
+ /**
107
+ * リカバリー付きページ取得を実行
108
+ * @param url 取得対象のURL
109
+ * @param fetchFn ページ取得関数
110
+ */
111
+ async recover(url: string, fetchFn: FetchFunction): Promise<VisitRecoveryResult> {
112
+ const startTime = Date.now();
113
+ let attempts = 0;
114
+ let lastError: string | undefined;
115
+
116
+ // 1. 元のURLでリトライ
117
+ for (let i = 0; i <= this.config.maxRetries; i++) {
118
+ attempts++;
119
+ const attemptStart = Date.now();
120
+
121
+ try {
122
+ const result = await this.fetchWithTimeout(fetchFn, url);
123
+ const durationMs = Date.now() - attemptStart;
124
+
125
+ if (result.success) {
126
+ this.logAttempt(url, url, 'direct', true, durationMs);
127
+
128
+ return {
129
+ success: true,
130
+ originalUrl: url,
131
+ usedUrl: url,
132
+ content: result.content,
133
+ title: result.title,
134
+ usedWayback: false,
135
+ attempts,
136
+ durationMs: Date.now() - startTime,
137
+ };
138
+ }
139
+
140
+ lastError = result.error ?? 'Unknown error';
141
+ this.logAttempt(url, url, 'direct', false, durationMs, lastError);
142
+ } catch (error) {
143
+ const durationMs = Date.now() - attemptStart;
144
+ lastError = error instanceof Error ? error.message : String(error);
145
+ this.logAttempt(url, url, 'direct', false, durationMs, lastError);
146
+ }
147
+
148
+ // リトライ前に待機
149
+ if (i < this.config.maxRetries) {
150
+ await this.delay(this.config.retryDelayMs);
151
+ }
152
+ }
153
+
154
+ // 2. Wayback Machineを使用してリカバリー
155
+ if (this.waybackClient) {
156
+ const waybackResult = await this.tryWayback(url, fetchFn, startTime, attempts);
157
+ if (waybackResult) {
158
+ return waybackResult;
159
+ }
160
+ }
161
+
162
+ // 3. すべての試行が失敗
163
+ return {
164
+ success: false,
165
+ originalUrl: url,
166
+ usedUrl: url,
167
+ error: lastError ?? 'All recovery attempts failed',
168
+ usedWayback: false,
169
+ attempts,
170
+ durationMs: Date.now() - startTime,
171
+ };
172
+ }
173
+
174
+ /**
175
+ * Wayback Machineを使用してリカバリーを試行
176
+ */
177
+ private async tryWayback(
178
+ originalUrl: string,
179
+ fetchFn: FetchFunction,
180
+ startTime: number,
181
+ currentAttempts: number
182
+ ): Promise<VisitRecoveryResult | null> {
183
+ if (!this.waybackClient) {
184
+ return null;
185
+ }
186
+
187
+ const waybackAttemptStart = Date.now();
188
+ const snapshot = await this.waybackClient.getSnapshot(originalUrl);
189
+
190
+ if (!snapshot?.available) {
191
+ this.logAttempt(originalUrl, originalUrl, 'wayback-check', false, Date.now() - waybackAttemptStart, 'No archive available');
192
+ return null;
193
+ }
194
+
195
+ const attemptStart = Date.now();
196
+ try {
197
+ const result = await this.fetchWithTimeout(fetchFn, snapshot.url);
198
+ const durationMs = Date.now() - attemptStart;
199
+
200
+ if (result.success) {
201
+ this.logAttempt(originalUrl, snapshot.url, 'wayback', true, durationMs);
202
+
203
+ return {
204
+ success: true,
205
+ originalUrl,
206
+ usedUrl: snapshot.url,
207
+ content: result.content,
208
+ title: result.title,
209
+ usedWayback: true,
210
+ waybackSnapshot: snapshot,
211
+ attempts: currentAttempts + 1,
212
+ durationMs: Date.now() - startTime,
213
+ };
214
+ }
215
+
216
+ this.logAttempt(originalUrl, snapshot.url, 'wayback', false, durationMs, result.error);
217
+ } catch (error) {
218
+ const durationMs = Date.now() - attemptStart;
219
+ const errorMsg = error instanceof Error ? error.message : String(error);
220
+ this.logAttempt(originalUrl, snapshot.url, 'wayback', false, durationMs, errorMsg);
221
+ }
222
+
223
+ return null;
224
+ }
225
+
226
+ /**
227
+ * タイムアウト付きでページ取得を実行
228
+ */
229
+ private async fetchWithTimeout(fetchFn: FetchFunction, url: string): Promise<PageFetchResult> {
230
+ return new Promise<PageFetchResult>((resolve, reject) => {
231
+ const timer = setTimeout(() => {
232
+ reject(new Error(`Request timed out after ${this.config.timeoutMs}ms`));
233
+ }, this.config.timeoutMs);
234
+
235
+ fetchFn(url)
236
+ .then((result) => {
237
+ clearTimeout(timer);
238
+ resolve(result);
239
+ })
240
+ .catch((error) => {
241
+ clearTimeout(timer);
242
+ reject(error);
243
+ });
244
+ });
245
+ }
246
+
247
+ /**
248
+ * リカバリー試行をログに記録
249
+ */
250
+ private logAttempt(
251
+ originalUrl: string,
252
+ usedUrl: string,
253
+ strategy: 'direct' | 'wayback' | 'wayback-check',
254
+ success: boolean,
255
+ durationMs: number,
256
+ error?: string
257
+ ): void {
258
+ const entry: Omit<ExtendedLogEntry, 'id'> = {
259
+ originalQuery: originalUrl,
260
+ alternativeQuery: usedUrl,
261
+ strategy,
262
+ resultCount: success ? 1 : 0,
263
+ success,
264
+ timestamp: new Date(),
265
+ durationMs,
266
+ type: 'visit',
267
+ error,
268
+ };
269
+
270
+ this.logger.log(entry);
271
+
272
+ // stderr にログ出力
273
+ console.error(
274
+ `[VisitRecovery] ${success ? '✓' : '✗'} "${originalUrl}" ${usedUrl !== originalUrl ? `→ "${usedUrl}"` : ''} (${strategy}) ${durationMs}ms`
275
+ );
276
+ }
277
+
278
+ /**
279
+ * 指定時間待機
280
+ */
281
+ private delay(ms: number): Promise<void> {
282
+ return new Promise((resolve) => setTimeout(resolve, ms));
283
+ }
284
+
285
+ /**
286
+ * RecoveryLoggerインスタンスを取得
287
+ */
288
+ getLogger(): RecoveryLogger {
289
+ return this.logger;
290
+ }
291
+
292
+ /**
293
+ * 統計情報を取得
294
+ */
295
+ getStats(): ReturnType<RecoveryLogger['getStats']> {
296
+ return this.logger.getStats();
297
+ }
298
+
299
+ /**
300
+ * WaybackClientインスタンスを取得
301
+ */
302
+ getWaybackClient(): WaybackClient | null {
303
+ return this.waybackClient;
304
+ }
305
+ }
306
+
307
+ /**
308
+ * VisitRecoveryManagerのファクトリ関数
309
+ */
310
+ export function createVisitRecoveryManager(config?: VisitRecoveryConfig): VisitRecoveryManager {
311
+ return new VisitRecoveryManager(config);
312
+ }
@@ -0,0 +1,210 @@
1
+ /**
2
+ * WaybackClient - Wayback Machine API クライアント
3
+ *
4
+ * TSK-1-003: WaybackClient実装
5
+ * REQ-SRCH-004-01: visit失敗時フォールバック
6
+ * DES-SRCH-004: VisitRecoveryManager設計
7
+ */
8
+
9
+ /**
10
+ * Wayback Machine スナップショット情報
11
+ */
12
+ export interface WaybackSnapshot {
13
+ /** スナップショットURL */
14
+ url: string;
15
+ /** 元のURL */
16
+ originalUrl: string;
17
+ /** アーカイブ日時(ISO 8601形式) */
18
+ timestamp: string;
19
+ /** 利用可能かどうか */
20
+ available: boolean;
21
+ /** HTTPステータスコード */
22
+ status?: number;
23
+ }
24
+
25
+ /**
26
+ * Wayback Machine API レスポンス
27
+ */
28
+ export interface WaybackApiResponse {
29
+ archived_snapshots: {
30
+ closest?: {
31
+ url: string;
32
+ timestamp: string;
33
+ available: boolean;
34
+ status?: string;
35
+ };
36
+ };
37
+ url: string;
38
+ }
39
+
40
+ /**
41
+ * WaybackClient 設定
42
+ */
43
+ export interface WaybackClientConfig {
44
+ /** APIベースURL(デフォルト: https://archive.org/wayback/available) */
45
+ apiBaseUrl?: string;
46
+ /** タイムアウト(ms、デフォルト: 10000) */
47
+ timeoutMs?: number;
48
+ /** 最大リトライ回数(デフォルト: 2) */
49
+ maxRetries?: number;
50
+ /** リトライ間隔(ms、デフォルト: 1000) */
51
+ retryDelayMs?: number;
52
+ /** User-Agent(デフォルト: SHIKIGAMI/1.10.0) */
53
+ userAgent?: string;
54
+ }
55
+
56
+ /**
57
+ * デフォルト設定
58
+ */
59
+ export const DEFAULT_WAYBACK_CONFIG: Required<WaybackClientConfig> = {
60
+ apiBaseUrl: 'https://archive.org/wayback/available',
61
+ timeoutMs: 10000,
62
+ maxRetries: 2,
63
+ retryDelayMs: 1000,
64
+ userAgent: 'SHIKIGAMI/1.10.0',
65
+ };
66
+
67
+ /**
68
+ * WaybackClient - Wayback Machine APIを使ったページアーカイブ取得
69
+ */
70
+ export class WaybackClient {
71
+ private readonly config: Required<WaybackClientConfig>;
72
+
73
+ constructor(config?: Partial<WaybackClientConfig>) {
74
+ this.config = { ...DEFAULT_WAYBACK_CONFIG, ...config };
75
+ }
76
+
77
+ /**
78
+ * 指定URLの最新アーカイブスナップショットを取得
79
+ * @param url 検索対象のURL
80
+ * @returns スナップショット情報(見つからない場合はnull)
81
+ */
82
+ async getSnapshot(url: string): Promise<WaybackSnapshot | null> {
83
+ let lastError: Error | null = null;
84
+
85
+ for (let attempt = 0; attempt <= this.config.maxRetries; attempt++) {
86
+ try {
87
+ return await this.fetchSnapshot(url);
88
+ } catch (error) {
89
+ lastError = error instanceof Error ? error : new Error(String(error));
90
+
91
+ if (attempt < this.config.maxRetries) {
92
+ await this.delay(this.config.retryDelayMs);
93
+ }
94
+ }
95
+ }
96
+
97
+ console.error(
98
+ `[WaybackClient] Failed to get snapshot for "${url}" after ${this.config.maxRetries + 1} attempts: ${lastError?.message}`
99
+ );
100
+ return null;
101
+ }
102
+
103
+ /**
104
+ * 指定URLの利用可能なアーカイブがあるかチェック
105
+ * @param url チェック対象のURL
106
+ * @returns 利用可能な場合true
107
+ */
108
+ async isArchived(url: string): Promise<boolean> {
109
+ const snapshot = await this.getSnapshot(url);
110
+ return snapshot?.available ?? false;
111
+ }
112
+
113
+ /**
114
+ * アーカイブされたコンテンツのURLを取得(リダイレクト用)
115
+ * @param url 元のURL
116
+ * @returns アーカイブURL(見つからない場合はnull)
117
+ */
118
+ async getArchiveUrl(url: string): Promise<string | null> {
119
+ const snapshot = await this.getSnapshot(url);
120
+ return snapshot?.available ? snapshot.url : null;
121
+ }
122
+
123
+ /**
124
+ * Wayback Machine APIを呼び出してスナップショットを取得
125
+ */
126
+ private async fetchSnapshot(url: string): Promise<WaybackSnapshot | null> {
127
+ const apiUrl = `${this.config.apiBaseUrl}?url=${encodeURIComponent(url)}`;
128
+
129
+ const controller = new AbortController();
130
+ const timeoutId = setTimeout(() => controller.abort(), this.config.timeoutMs);
131
+
132
+ try {
133
+ const response = await fetch(apiUrl, {
134
+ method: 'GET',
135
+ headers: {
136
+ 'User-Agent': this.config.userAgent,
137
+ Accept: 'application/json',
138
+ },
139
+ signal: controller.signal,
140
+ });
141
+
142
+ clearTimeout(timeoutId);
143
+
144
+ if (!response.ok) {
145
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
146
+ }
147
+
148
+ const data = (await response.json()) as WaybackApiResponse;
149
+ return this.parseApiResponse(data, url);
150
+ } catch (error) {
151
+ clearTimeout(timeoutId);
152
+
153
+ if (error instanceof Error && error.name === 'AbortError') {
154
+ throw new Error(`Request timed out after ${this.config.timeoutMs}ms`);
155
+ }
156
+ throw error;
157
+ }
158
+ }
159
+
160
+ /**
161
+ * APIレスポンスをパース
162
+ */
163
+ private parseApiResponse(data: WaybackApiResponse, originalUrl: string): WaybackSnapshot | null {
164
+ const closest = data.archived_snapshots?.closest;
165
+
166
+ if (!closest) {
167
+ return null;
168
+ }
169
+
170
+ return {
171
+ url: closest.url,
172
+ originalUrl,
173
+ timestamp: this.formatTimestamp(closest.timestamp),
174
+ available: closest.available,
175
+ status: closest.status ? parseInt(closest.status, 10) : undefined,
176
+ };
177
+ }
178
+
179
+ /**
180
+ * Wayback Machine のタイムスタンプ(YYYYMMDDHHmmss)をISO 8601形式に変換
181
+ */
182
+ private formatTimestamp(timestamp: string): string {
183
+ if (timestamp.length !== 14) {
184
+ return timestamp;
185
+ }
186
+
187
+ const year = timestamp.substring(0, 4);
188
+ const month = timestamp.substring(4, 6);
189
+ const day = timestamp.substring(6, 8);
190
+ const hour = timestamp.substring(8, 10);
191
+ const minute = timestamp.substring(10, 12);
192
+ const second = timestamp.substring(12, 14);
193
+
194
+ return `${year}-${month}-${day}T${hour}:${minute}:${second}Z`;
195
+ }
196
+
197
+ /**
198
+ * 指定時間待機
199
+ */
200
+ private delay(ms: number): Promise<void> {
201
+ return new Promise((resolve) => setTimeout(resolve, ms));
202
+ }
203
+ }
204
+
205
+ /**
206
+ * WaybackClientのシングルトンインスタンスを作成
207
+ */
208
+ export function createWaybackClient(config?: Partial<WaybackClientConfig>): WaybackClient {
209
+ return new WaybackClient(config);
210
+ }
@@ -4,13 +4,19 @@
4
4
  * Implements REQ-DR-002: Web検索 (page fetching)
5
5
  * Implements REQ-NF-007: プロバイダー設定ファイル対応
6
6
  * Implements REQ-PARSE-001: PDFコンテンツ抽出 (v1.7.0)
7
+ * Implements REQ-SRCH-004: ページ訪問リカバリー (v1.10.0)
7
8
  * Uses Jina AI Reader for LLM-optimized text extraction
8
9
  */
9
10
 
10
11
  import { getConfig } from '../config/loader.js';
11
12
  import { PdfParser } from './pdf-parser/index.js';
12
- import type { PdfParsingConfig } from '../config/types.js';
13
- import { DEFAULT_PDF_PARSING_CONFIG } from '../config/types.js';
13
+ import type { PdfParsingConfig, VisitRecoveryConfig } from '../config/types.js';
14
+ import { DEFAULT_PDF_PARSING_CONFIG, DEFAULT_VISIT_RECOVERY_CONFIG } from '../config/types.js';
15
+ import {
16
+ VisitRecoveryManager,
17
+ type PageFetchResult,
18
+ type VisitRecoveryResult,
19
+ } from './visit/recovery/index.js';
14
20
 
15
21
  export interface PageContent {
16
22
  url: string;
@@ -26,6 +32,17 @@ export interface PageContent {
26
32
  author?: string;
27
33
  creationDate?: string;
28
34
  };
35
+ /** v1.10.0: リカバリー情報 */
36
+ recovery?: {
37
+ /** Wayback Machine を使用したかどうか */
38
+ usedWayback: boolean;
39
+ /** 実際に取得したURL */
40
+ usedUrl: string;
41
+ /** 試行回数 */
42
+ attempts: number;
43
+ /** Waybackスナップショット日時(使用時) */
44
+ waybackTimestamp?: string;
45
+ };
29
46
  }
30
47
 
31
48
  const USER_AGENT =
@@ -58,6 +75,14 @@ function getPdfConfig(): PdfParsingConfig {
58
75
  return config.pdfParsing ?? DEFAULT_PDF_PARSING_CONFIG;
59
76
  }
60
77
 
78
+ /**
79
+ * v1.10.0: Get visit recovery config
80
+ */
81
+ function getVisitRecoveryConfig(): VisitRecoveryConfig {
82
+ const config = getConfig();
83
+ return config.visitRecovery ?? DEFAULT_VISIT_RECOVERY_CONFIG;
84
+ }
85
+
61
86
  // v1.7.0: PDF parser instance (lazy initialized)
62
87
  let pdfParser: PdfParser | null = null;
63
88
 
@@ -68,6 +93,22 @@ function getPdfParser(): PdfParser {
68
93
  return pdfParser;
69
94
  }
70
95
 
96
+ // v1.10.0: Visit recovery manager instance (lazy initialized)
97
+ let visitRecoveryManager: VisitRecoveryManager | null = null;
98
+
99
+ function getVisitRecoveryManager(): VisitRecoveryManager {
100
+ if (!visitRecoveryManager) {
101
+ const config = getVisitRecoveryConfig();
102
+ visitRecoveryManager = new VisitRecoveryManager({
103
+ maxRetries: config.maxRetries,
104
+ retryDelayMs: config.retryDelayMs,
105
+ timeoutMs: config.timeoutMs,
106
+ enableWayback: config.enableWayback,
107
+ });
108
+ }
109
+ return visitRecoveryManager;
110
+ }
111
+
71
112
  async function rateLimit(): Promise<void> {
72
113
  const now = Date.now();
73
114
  const minInterval = getMinRequestInterval();
@@ -331,3 +372,119 @@ function extractTitleFromUrl(url: string): string {
331
372
  return 'PDF Document';
332
373
  }
333
374
  }
375
+
376
+ // ============================================================
377
+ // v1.10.0: リカバリー付きページ訪問 (REQ-SRCH-004)
378
+ // ============================================================
379
+
380
+ /**
381
+ * v1.10.0: ページ取得関数をPageFetchResult形式にラップ
382
+ */
383
+ async function fetchPageAsResult(url: string): Promise<PageFetchResult> {
384
+ try {
385
+ const result = await visitPage(url);
386
+
387
+ if (result.error) {
388
+ return {
389
+ success: false,
390
+ error: result.error,
391
+ };
392
+ }
393
+
394
+ return {
395
+ success: true,
396
+ content: result.content,
397
+ title: result.title,
398
+ };
399
+ } catch (error) {
400
+ return {
401
+ success: false,
402
+ error: error instanceof Error ? error.message : String(error),
403
+ };
404
+ }
405
+ }
406
+
407
+ /**
408
+ * v1.10.0: リカバリー付きページ訪問
409
+ *
410
+ * REQ-SRCH-004-01: visit失敗時フォールバック(Wayback Machine)
411
+ * REQ-SRCH-004-02: 自動リトライ
412
+ * REQ-SRCH-004-03: 結果マージ
413
+ *
414
+ * @param url 訪問対象のURL
415
+ * @param goal 訪問の目的(オプション)
416
+ * @returns ページコンテンツ
417
+ */
418
+ export async function visitPageWithRecovery(url: string, goal?: string): Promise<PageContent> {
419
+ const fetchedAt = new Date().toISOString();
420
+ const config = getVisitRecoveryConfig();
421
+
422
+ // リカバリーが無効な場合は従来の処理
423
+ if (!config.enabled) {
424
+ return await visitPage(url, goal);
425
+ }
426
+
427
+ const manager = getVisitRecoveryManager();
428
+
429
+ // v1.7.0: PDF検出は先に行う(リカバリー前)
430
+ const pdfConfig = getPdfConfig();
431
+ if (pdfConfig.enabled) {
432
+ const parser = getPdfParser();
433
+ const detection = parser.isPdfUrl(url);
434
+
435
+ if (detection.isPdf) {
436
+ console.error(`[SHIKIGAMI] PDF detected (before recovery): ${url}`);
437
+ // PDFはそのまま従来処理(リカバリー対象外)
438
+ return await visitPage(url, goal);
439
+ }
440
+ }
441
+
442
+ // リカバリー付き取得を実行
443
+ const result = await manager.recover(url, fetchPageAsResult);
444
+
445
+ if (result.success) {
446
+ return {
447
+ url: result.originalUrl,
448
+ title: result.title ?? '',
449
+ content: result.content ?? '',
450
+ fetchedAt,
451
+ recovery: {
452
+ usedWayback: result.usedWayback,
453
+ usedUrl: result.usedUrl,
454
+ attempts: result.attempts,
455
+ waybackTimestamp: result.waybackSnapshot?.timestamp,
456
+ },
457
+ };
458
+ }
459
+
460
+ // 全リカバリー失敗
461
+ return {
462
+ url,
463
+ title: '',
464
+ content: '',
465
+ fetchedAt,
466
+ error: result.error ?? 'Page fetch failed after all recovery attempts',
467
+ recovery: {
468
+ usedWayback: result.usedWayback,
469
+ usedUrl: result.usedUrl,
470
+ attempts: result.attempts,
471
+ },
472
+ };
473
+ }
474
+
475
+ /**
476
+ * v1.10.0: リカバリー統計を取得
477
+ */
478
+ export function getVisitRecoveryStats(): ReturnType<VisitRecoveryManager['getStats']> | null {
479
+ if (!visitRecoveryManager) {
480
+ return null;
481
+ }
482
+ return visitRecoveryManager.getStats();
483
+ }
484
+
485
+ /**
486
+ * v1.10.0: リカバリーマネージャーをリセット(テスト用)
487
+ */
488
+ export function resetVisitRecoveryManager(): void {
489
+ visitRecoveryManager = null;
490
+ }