botrun-crawler-2 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +126 -0
- package/dist/cli.d.ts +10 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +155 -0
- package/dist/cli.js.map +1 -0
- package/dist/crawler/cli.d.ts +19 -0
- package/dist/crawler/cli.d.ts.map +1 -0
- package/dist/crawler/cli.js +179 -0
- package/dist/crawler/cli.js.map +1 -0
- package/dist/crawler/index.d.ts +146 -0
- package/dist/crawler/index.d.ts.map +1 -0
- package/dist/crawler/index.js +670 -0
- package/dist/crawler/index.js.map +1 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +20 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/agent.d.ts +34 -0
- package/dist/lib/agent.d.ts.map +1 -0
- package/dist/lib/agent.js +73 -0
- package/dist/lib/agent.js.map +1 -0
- package/dist/lib/cache.d.ts +49 -0
- package/dist/lib/cache.d.ts.map +1 -0
- package/dist/lib/cache.js +141 -0
- package/dist/lib/cache.js.map +1 -0
- package/dist/lib/filename-decoder.d.ts +62 -0
- package/dist/lib/filename-decoder.d.ts.map +1 -0
- package/dist/lib/filename-decoder.js +229 -0
- package/dist/lib/filename-decoder.js.map +1 -0
- package/dist/lib/http-client.d.ts +86 -0
- package/dist/lib/http-client.d.ts.map +1 -0
- package/dist/lib/http-client.js +373 -0
- package/dist/lib/http-client.js.map +1 -0
- package/dist/lib/index.d.ts +15 -0
- package/dist/lib/index.d.ts.map +1 -0
- package/dist/lib/index.js +19 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/lib/logger.d.ts +41 -0
- package/dist/lib/logger.d.ts.map +1 -0
- package/dist/lib/logger.js +122 -0
- package/dist/lib/logger.js.map +1 -0
- package/dist/lib/scene-detector.d.ts +92 -0
- package/dist/lib/scene-detector.d.ts.map +1 -0
- package/dist/lib/scene-detector.js +297 -0
- package/dist/lib/scene-detector.js.map +1 -0
- package/dist/processors/audio.d.ts +20 -0
- package/dist/processors/audio.d.ts.map +1 -0
- package/dist/processors/audio.js +110 -0
- package/dist/processors/audio.js.map +1 -0
- package/dist/processors/base.d.ts +53 -0
- package/dist/processors/base.d.ts.map +1 -0
- package/dist/processors/base.js +194 -0
- package/dist/processors/base.js.map +1 -0
- package/dist/processors/data.d.ts +48 -0
- package/dist/processors/data.d.ts.map +1 -0
- package/dist/processors/data.js +206 -0
- package/dist/processors/data.js.map +1 -0
- package/dist/processors/document.d.ts +20 -0
- package/dist/processors/document.d.ts.map +1 -0
- package/dist/processors/document.js +137 -0
- package/dist/processors/document.js.map +1 -0
- package/dist/processors/image.d.ts +20 -0
- package/dist/processors/image.d.ts.map +1 -0
- package/dist/processors/image.js +92 -0
- package/dist/processors/image.js.map +1 -0
- package/dist/processors/index.d.ts +53 -0
- package/dist/processors/index.d.ts.map +1 -0
- package/dist/processors/index.js +177 -0
- package/dist/processors/index.js.map +1 -0
- package/dist/processors/text.d.ts +44 -0
- package/dist/processors/text.d.ts.map +1 -0
- package/dist/processors/text.js +262 -0
- package/dist/processors/text.js.map +1 -0
- package/dist/processors/video.d.ts +20 -0
- package/dist/processors/video.d.ts.map +1 -0
- package/dist/processors/video.js +93 -0
- package/dist/processors/video.js.map +1 -0
- package/dist/scraper/cli.d.ts +23 -0
- package/dist/scraper/cli.d.ts.map +1 -0
- package/dist/scraper/cli.js +118 -0
- package/dist/scraper/cli.js.map +1 -0
- package/dist/scraper/index.d.ts +120 -0
- package/dist/scraper/index.d.ts.map +1 -0
- package/dist/scraper/index.js +372 -0
- package/dist/scraper/index.js.map +1 -0
- package/dist/types/index.d.ts +123 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +40 -0
- package/dist/types/index.js.map +1 -0
- package/package.json +108 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 檔案名稱編碼修正模組
|
|
3
|
+
*
|
|
4
|
+
* 解決問題:許多伺服器錯誤地將 UTF-8 檔名當作 Latin-1 發送
|
|
5
|
+
* 例如:「申請」→ UTF-8 bytes [0xE7, 0x94, 0xB3, 0xE8, 0xAB, 0x8B]
|
|
6
|
+
* 被當作 Latin-1 解讀 → "ç³è«"(亂碼)
|
|
7
|
+
*
|
|
8
|
+
* 遵循原則:
|
|
9
|
+
* - KISS: 簡單的字節層級轉換
|
|
10
|
+
* - SOLID/SRP: 專注於編碼修正
|
|
11
|
+
* - DRY: 集中處理編碼問題
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* 檢測字串是否看起來像 UTF-8 被誤解為 Latin-1 的亂碼
|
|
15
|
+
*
|
|
16
|
+
* 亂碼特徵:
|
|
17
|
+
* 1. 包含多個 Latin-1 擴展字元 (0x80-0xFF)
|
|
18
|
+
* 2. 這些字元在正常中文/英文文字中不應該出現
|
|
19
|
+
* 3. 尤其是 ç, è, é, ã, ä, å, æ 等字元組合
|
|
20
|
+
*/
|
|
21
|
+
export function isGarbledUtf8(str) {
|
|
22
|
+
if (!str)
|
|
23
|
+
return false;
|
|
24
|
+
// 計算 Latin-1 擴展字元的比例
|
|
25
|
+
// 0x80-0xFF 在正常中英文文字中極少出現
|
|
26
|
+
let latin1ExtCount = 0;
|
|
27
|
+
let totalChars = 0;
|
|
28
|
+
for (const char of str) {
|
|
29
|
+
const code = char.charCodeAt(0);
|
|
30
|
+
// 只計算非 ASCII 字元
|
|
31
|
+
if (code > 127) {
|
|
32
|
+
totalChars++;
|
|
33
|
+
// Latin-1 擴展區 (0x80-0xFF)
|
|
34
|
+
if (code >= 0x80 && code <= 0xFF) {
|
|
35
|
+
latin1ExtCount++;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
// 如果超過 50% 的非 ASCII 字元都是 Latin-1 擴展區,很可能是亂碼
|
|
40
|
+
if (totalChars > 0 && latin1ExtCount / totalChars > 0.5) {
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
// 特定的亂碼模式:常見的 UTF-8 中文首字節被當作 Latin-1
|
|
44
|
+
// 例如 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9 對應 ä, å, æ, ç, è, é
|
|
45
|
+
const commonGarbledChars = /[äåæçèéêëìíîïðñòóôõöøùúûüýþ]/i;
|
|
46
|
+
const matches = str.match(new RegExp(commonGarbledChars.source, "gi"));
|
|
47
|
+
if (matches && matches.length >= 2) {
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* 嘗試修正 UTF-8 被誤解為 Latin-1 的亂碼
|
|
54
|
+
*
|
|
55
|
+
* 原理:將每個字元的 code point(0-255)視為原始 UTF-8 字節
|
|
56
|
+
* 然後用 UTF-8 重新解碼
|
|
57
|
+
*
|
|
58
|
+
* @example
|
|
59
|
+
* fixGarbledUtf8("ç³è«æ³¨æäºé") → "申請注意事項"
|
|
60
|
+
*/
|
|
61
|
+
export function fixGarbledUtf8(garbled) {
|
|
62
|
+
if (!garbled)
|
|
63
|
+
return null;
|
|
64
|
+
try {
|
|
65
|
+
// 將每個字元的 code point 視為字節
|
|
66
|
+
const bytes = new Uint8Array(garbled.length);
|
|
67
|
+
for (let i = 0; i < garbled.length; i++) {
|
|
68
|
+
const code = garbled.charCodeAt(i);
|
|
69
|
+
// 如果 code point 超過 255,不是 Latin-1 範圍,無法修正
|
|
70
|
+
if (code > 255) {
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
bytes[i] = code;
|
|
74
|
+
}
|
|
75
|
+
// 用 UTF-8 解碼
|
|
76
|
+
const decoder = new TextDecoder("utf-8", { fatal: true });
|
|
77
|
+
const decoded = decoder.decode(bytes);
|
|
78
|
+
// 驗證解碼結果
|
|
79
|
+
if (!decoded || decoded.length === 0) {
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
// 檢查結果是否合理(應該包含可讀字元)
|
|
83
|
+
// 如果解碼後仍然是亂碼或全是控制字元,視為失敗
|
|
84
|
+
const hasReadableChars = /[\u4e00-\u9fff\u3400-\u4dbf\u0020-\u007e]/.test(decoded);
|
|
85
|
+
if (!hasReadableChars) {
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
return decoded;
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
// UTF-8 解碼失敗,可能不是 UTF-8 編碼問題
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* 從 URL 解碼檔名(處理 URL 編碼的中文)
|
|
97
|
+
*/
|
|
98
|
+
export function decodeUrlEncodedFilename(filename) {
|
|
99
|
+
if (!filename)
|
|
100
|
+
return null;
|
|
101
|
+
try {
|
|
102
|
+
// 檢查是否包含 URL 編碼
|
|
103
|
+
if (!/%[0-9A-Fa-f]{2}/.test(filename)) {
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
const decoded = decodeURIComponent(filename);
|
|
107
|
+
// 確保解碼成功且與原始不同
|
|
108
|
+
if (decoded && decoded !== filename) {
|
|
109
|
+
return decoded;
|
|
110
|
+
}
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
catch {
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* 從連結文字推斷檔名
|
|
119
|
+
* 清理連結文字使其適合作為檔名
|
|
120
|
+
*/
|
|
121
|
+
export function deriveFilenameFromText(text, extension) {
|
|
122
|
+
if (!text)
|
|
123
|
+
return null;
|
|
124
|
+
// 移除常見的日期標記和多餘空白
|
|
125
|
+
let cleaned = text
|
|
126
|
+
.replace(/\r?\n/g, " ") // 換行轉空白
|
|
127
|
+
.replace(/\t+/g, " ") // Tab 轉空白
|
|
128
|
+
.replace(/\s+/g, " ") // 多個空白合併
|
|
129
|
+
.replace(/\(\d{4}\/\d{2}\/\d{2}[^)]*\)/g, "") // 移除日期 (2025/04/22更新)
|
|
130
|
+
.replace(/\([^)]*更新\)/g, "") // 移除「(xxx更新)」
|
|
131
|
+
.replace(/\([^)]*新增\)/g, "") // 移除「(xxx新增)」
|
|
132
|
+
.trim();
|
|
133
|
+
if (!cleaned || cleaned === "[無文字]") {
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
// 移除檔名不允許的字元
|
|
137
|
+
cleaned = cleaned.replace(/[<>:"/\\|?*]/g, "_");
|
|
138
|
+
// 限制檔名長度
|
|
139
|
+
if (cleaned.length > 100) {
|
|
140
|
+
cleaned = cleaned.substring(0, 100);
|
|
141
|
+
}
|
|
142
|
+
// 加上副檔名
|
|
143
|
+
if (extension && !cleaned.toLowerCase().endsWith(`.${extension.toLowerCase()}`)) {
|
|
144
|
+
cleaned = `${cleaned}.${extension}`;
|
|
145
|
+
}
|
|
146
|
+
return cleaned;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* 從 MIME type 推斷副檔名
|
|
150
|
+
*/
|
|
151
|
+
export function getExtensionFromMimeType(mimeType) {
|
|
152
|
+
const mimeToExt = {
|
|
153
|
+
"application/pdf": "pdf",
|
|
154
|
+
"application/msword": "doc",
|
|
155
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
|
156
|
+
"application/vnd.ms-excel": "xls",
|
|
157
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
|
158
|
+
"application/vnd.ms-powerpoint": "ppt",
|
|
159
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
|
|
160
|
+
"application/vnd.oasis.opendocument.text": "odt",
|
|
161
|
+
"application/vnd.oasis.opendocument.spreadsheet": "ods",
|
|
162
|
+
"application/vnd.oasis.opendocument.presentation": "odp",
|
|
163
|
+
"application/zip": "zip",
|
|
164
|
+
"application/x-rar-compressed": "rar",
|
|
165
|
+
"image/jpeg": "jpg",
|
|
166
|
+
"image/png": "png",
|
|
167
|
+
"text/plain": "txt",
|
|
168
|
+
"text/csv": "csv",
|
|
169
|
+
};
|
|
170
|
+
return mimeToExt[mimeType] || null;
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* 從檔名提取副檔名
|
|
174
|
+
*/
|
|
175
|
+
export function getExtensionFromFilename(filename) {
|
|
176
|
+
if (!filename)
|
|
177
|
+
return null;
|
|
178
|
+
const match = filename.match(/\.([a-z0-9]+)$/i);
|
|
179
|
+
return match ? match[1].toLowerCase() : null;
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* 智慧檔名修正
|
|
183
|
+
*
|
|
184
|
+
* 嘗試多種策略修正檔名:
|
|
185
|
+
* 1. 先嘗試修正 UTF-8/Latin-1 編碼問題
|
|
186
|
+
* 2. 嘗試 URL 解碼
|
|
187
|
+
* 3. 使用連結文字作為備選
|
|
188
|
+
*
|
|
189
|
+
* @param filename - 原始檔名(可能是亂碼)
|
|
190
|
+
* @param linkText - 連結文字(備選來源)
|
|
191
|
+
* @param mimeType - MIME type(用於推斷副檔名)
|
|
192
|
+
*/
|
|
193
|
+
export function smartFilenameDecoder(filename, linkText, mimeType) {
|
|
194
|
+
// 沒有檔名,直接從連結文字推斷
|
|
195
|
+
if (!filename) {
|
|
196
|
+
if (linkText && mimeType) {
|
|
197
|
+
const ext = getExtensionFromMimeType(mimeType);
|
|
198
|
+
if (ext) {
|
|
199
|
+
return deriveFilenameFromText(linkText, ext) || undefined;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
return undefined;
|
|
203
|
+
}
|
|
204
|
+
// 保留原始副檔名
|
|
205
|
+
const originalExt = getExtensionFromFilename(filename);
|
|
206
|
+
const ext = originalExt || (mimeType ? getExtensionFromMimeType(mimeType) : null);
|
|
207
|
+
// 策略 1: 修正 UTF-8/Latin-1 編碼問題
|
|
208
|
+
if (isGarbledUtf8(filename)) {
|
|
209
|
+
const fixed = fixGarbledUtf8(filename);
|
|
210
|
+
if (fixed) {
|
|
211
|
+
return fixed;
|
|
212
|
+
}
|
|
213
|
+
// 編碼修正失敗,使用連結文字
|
|
214
|
+
if (linkText && ext) {
|
|
215
|
+
const derived = deriveFilenameFromText(linkText, ext);
|
|
216
|
+
if (derived) {
|
|
217
|
+
return derived;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
// 策略 2: URL 解碼
|
|
222
|
+
const urlDecoded = decodeUrlEncodedFilename(filename);
|
|
223
|
+
if (urlDecoded) {
|
|
224
|
+
return urlDecoded;
|
|
225
|
+
}
|
|
226
|
+
// 策略 3: 檔名看起來正常,直接返回
|
|
227
|
+
return filename;
|
|
228
|
+
}
|
|
229
|
+
//# sourceMappingURL=filename-decoder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"filename-decoder.js","sourceRoot":"","sources":["../../src/lib/filename-decoder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH;;;;;;;GAOG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC,GAAG;QAAE,OAAO,KAAK,CAAC;IAEvB,qBAAqB;IACrB,0BAA0B;IAC1B,IAAI,cAAc,GAAG,CAAC,CAAC;IACvB,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAChC,gBAAgB;QAChB,IAAI,IAAI,GAAG,GAAG,EAAE,CAAC;YACf,UAAU,EAAE,CAAC;YACb,0BAA0B;YAC1B,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,EAAE,CAAC;gBACjC,cAAc,EAAE,CAAC;YACnB,CAAC;QACH,CAAC;IACH,CAAC;IAED,4CAA4C;IAC5C,IAAI,UAAU,GAAG,CAAC,IAAI,cAAc,GAAG,UAAU,GAAG,GAAG,EAAE,CAAC;QACxD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,qCAAqC;IACrC,4DAA4D;IAC5D,MAAM,kBAAkB,GAAG,+BAA+B,CAAC;IAC3D,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,kBAAkB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC;IACvE,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACnC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,cAAc,CAAC,OAAe;IAC5C,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,IAAI,CAAC;QACH,yBAAyB;QACzB,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,IAAI,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YACnC,0CAA0C;YAC1C,IAAI,IAAI,GAAG,GAAG,EAAE,CAAC;gBACf,OAAO,IAAI,CAAC;YACd,CAAC;YACD,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;QAClB,CAAC;QAED,aAAa;QACb,MAAM,OAAO,GAAG,IAAI,WAAW,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAEtC,SAAS;QACT,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,qBAAqB;QACrB,yBAAyB;QACzB,MAAM,gBAAgB,GAAG,2CAA2C,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnF,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,MAAM,CAAC;QACP,6BAA6B;QAC7B,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,wBAAwB,CAAC,QAAgB;IACvD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,IAAI,CAAC;QACH,gBAAgB;QAChB,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;YACtC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,OAAO,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAC7C,eAAe;QACf,IAAI,OAAO,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;YACpC,OAAO,OAAO,CAAC;QACjB,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,sBAAsB,CAAC,IAAY,EAAE,SAAiB;IACpE,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,iBAAiB;IACjB,IAAI,OAAO,GAAG,IAAI;SACf,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAW,QAAQ;SACzC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAc,UAAU;SAC5C,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAc,SAAS;SAC3C,OAAO,CAAC,+BAA+B,EAAE,EAAE,CAAC,CAAC,sBAAsB;SACnE,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAK,cAAc;SAC9C,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAK,cAAc;SAC9C,IAAI,EAAE,CAAC;IAEV,IAAI,CAAC,OAAO,IAAI,OAAO,KAAK,OAAO,EAAE,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,aAAa;IACb,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,eAAe,EAAE,GAAG,CAAC,CAAC;IAEhD,SAAS;IACT,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QACzB,OAAO,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACtC,CAAC;IAED,QAAQ;IACR,IAAI,SAAS,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,IAAI,SAAS,CAAC,WAAW,EAAE,EAAE,CAAC,EAAE,CAAC;QAChF,OAAO,GAAG,GAAG,OAAO,IAAI,SAAS,EAAE,CAAC;IACtC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,wBAAwB,CAAC,QAAgB;IACvD,MAAM,SAAS,GAA2B;QACxC,iBAAiB,EAAE,KAAK;QACxB,oBAAoB,EAAE,KAAK;QAC3B,yEAAyE,EAAE,MAAM;QACjF,0BAA0B,EAAE,KAAK;QACjC,mEAAmE,EAAE,MAAM;QAC3E,+BAA+B,EAAE,KAAK;QACtC,2EAA2E,EAAE,MAAM;QACnF,yCAAyC,EAAE,KAAK;QAChD,gDAAgD,EAAE,KAAK;QACvD,iDAAiD,EAAE,KAAK;QACxD,iBAAiB,EAAE,KAAK;QACxB,8BAA8B,EAAE,KAAK;QACrC,YAAY,EAAE,KAAK;QACnB,WAAW,EAAE,KAAK;QAClB,YAAY,EAAE,KAAK;QACnB,UAAU,EAAE,KAAK;KAClB,CAAC;IAEF,OAAO,SAAS,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,wBAAwB,CAAC,QAAgB;IACvD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC3B,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;IAChD,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;AAC/C,CAAC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,oBAAoB,CAClC,QAA4B,EAC5B,QAAiB,EACjB,QAAiB;IAEjB,iBAAiB;IACjB,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,IAAI,QAAQ,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,GAAG,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;YAC/C,IAAI,GAAG,EAAE,CAAC;gBACR,OAAO,sBAAsB,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,SAAS,CAAC;YAC5D,CAAC;QACH,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,UAAU;IACV,MAAM,WAAW,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;IACvD,MAAM,GAAG,GAAG,WAAW,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,wBAAwB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAElF,8BAA8B;IAC9B,IAAI,aAAa,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC5B,MAAM,KAAK,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;QACvC,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,KAAK,CAAC;QACf,CAAC;QAED,gBAAgB;QAChB,IAAI,QAAQ,IAAI,GAAG,EAAE,CAAC;YACpB,MAAM,OAAO,GAAG,sBAAsB,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;YACtD,IAAI,OAAO,EAAE,CAAC;gBACZ,OAAO,OAAO,CAAC;YACjB,CAAC;QACH,CAAC;IACH,CAAC;IAED,eAAe;IACf,MAAM,UAAU,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;IACtD,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,qBAAqB;IACrB,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTTP Client 模組 - 支援 HTTPS Proxy
|
|
3
|
+
*
|
|
4
|
+
* 相容環境變數:
|
|
5
|
+
* - HTTPS_PROXY / https_proxy
|
|
6
|
+
* - HTTP_PROXY / http_proxy
|
|
7
|
+
* - NO_PROXY / no_proxy
|
|
8
|
+
*/
|
|
9
|
+
export interface FetchOptions {
|
|
10
|
+
timeout?: number;
|
|
11
|
+
userAgent?: string;
|
|
12
|
+
headers?: Record<string, string>;
|
|
13
|
+
}
|
|
14
|
+
export interface FetchResult {
|
|
15
|
+
ok: boolean;
|
|
16
|
+
status: number;
|
|
17
|
+
text: string;
|
|
18
|
+
error?: string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* 取得 Proxy URL(從環境變數)
|
|
22
|
+
* 無參數版本,直接讀取環境變數
|
|
23
|
+
*/
|
|
24
|
+
export declare function getProxyUrl(): string | undefined;
|
|
25
|
+
/**
|
|
26
|
+
* 檢查是否應該 bypass proxy
|
|
27
|
+
*/
|
|
28
|
+
export declare function shouldBypassProxy(targetUrl: string): boolean;
|
|
29
|
+
/**
|
|
30
|
+
* 取得適用於目標 URL 的 Proxy URL
|
|
31
|
+
*/
|
|
32
|
+
export declare function getProxyUrlForTarget(targetUrl: string): string | undefined;
|
|
33
|
+
/**
|
|
34
|
+
* 使用 Proxy 發送 HTTP 請求
|
|
35
|
+
*
|
|
36
|
+
* Node.js 原生 fetch 不支援 proxy,需要透過 undici 的 ProxyAgent
|
|
37
|
+
* 但為了避免額外依賴,這裡使用環境變數讓 Node.js 自動處理
|
|
38
|
+
*
|
|
39
|
+
* 對於需要 proxy 的情況,建議使用 global-agent 或在啟動時設定
|
|
40
|
+
*/
|
|
41
|
+
export declare function fetchWithProxy(url: string, options?: FetchOptions): Promise<FetchResult>;
|
|
42
|
+
/**
|
|
43
|
+
* 檢查 Proxy 設定
|
|
44
|
+
*/
|
|
45
|
+
export declare function getProxyConfig(): {
|
|
46
|
+
httpsProxy?: string;
|
|
47
|
+
httpProxy?: string;
|
|
48
|
+
noProxy?: string;
|
|
49
|
+
};
|
|
50
|
+
/**
|
|
51
|
+
* 顯示 Proxy 狀態(用於 debug)
|
|
52
|
+
*/
|
|
53
|
+
export declare function logProxyStatus(): void;
|
|
54
|
+
export interface MimeTypeInfo {
|
|
55
|
+
mimeType: string;
|
|
56
|
+
charset?: string;
|
|
57
|
+
contentLength?: number;
|
|
58
|
+
filename?: string;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* 解析 Content-Type header
|
|
62
|
+
* @example "text/html; charset=utf-8" -> { mimeType: "text/html", charset: "utf-8" }
|
|
63
|
+
*/
|
|
64
|
+
export declare function parseContentType(contentType: string | null): MimeTypeInfo;
|
|
65
|
+
/**
|
|
66
|
+
* 解析 Content-Disposition header 取得檔名
|
|
67
|
+
* @example "attachment; filename=\"report.pdf\"" -> "report.pdf"
|
|
68
|
+
* @example "attachment; filename*=UTF-8''%E5%A0%B1%E5%91%8A.pdf" -> "報告.pdf"
|
|
69
|
+
*/
|
|
70
|
+
export declare function parseContentDisposition(header: string | null): string | undefined;
|
|
71
|
+
/**
|
|
72
|
+
* 從檔名推斷 MIME type
|
|
73
|
+
*/
|
|
74
|
+
export declare function inferMimeTypeFromFilename(filename: string): string | undefined;
|
|
75
|
+
/**
|
|
76
|
+
* 使用 HEAD 請求獲取資源的 MIME type(不下載內容)
|
|
77
|
+
* 如果是 octet-stream,會從 Content-Disposition 檔名推斷真實類型
|
|
78
|
+
*/
|
|
79
|
+
export declare function fetchMimeType(url: string, options?: FetchOptions): Promise<MimeTypeInfo>;
|
|
80
|
+
/**
|
|
81
|
+
* 批次獲取多個 URL 的 MIME type(並行處理)
|
|
82
|
+
*/
|
|
83
|
+
export declare function fetchMimeTypes(urls: string[], options?: FetchOptions & {
|
|
84
|
+
concurrency?: number;
|
|
85
|
+
}): Promise<Map<string, MimeTypeInfo>>;
|
|
86
|
+
//# sourceMappingURL=http-client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"http-client.d.ts","sourceRoot":"","sources":["../../src/lib/http-client.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,MAAM,WAAW,YAAY;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,OAAO,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;GAGG;AACH,wBAAgB,WAAW,IAAI,MAAM,GAAG,SAAS,CAOhD;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CA2B5D;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAG1E;AAED;;;;;;;GAOG;AACH,wBAAsB,cAAc,CAClC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,WAAW,CAAC,CAgDtB;AA4FD;;GAEG;AACH,wBAAgB,cAAc,IAAI;IAChC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,CAMA;AAED;;GAEG;AACH,wBAAgB,cAAc,IAAI,IAAI,CASrC;AAMD,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,GAAG,YAAY,CAkBzE;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,GAAG,SAAS,CA8BjF;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CA4C9E;AAED;;;GAGG;AACH,wBAAsB,aAAa,CACjC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,YAAY,CAAC,CA6DvB;AAED;;GAEG;AACH,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,YAAY,GAAG;IAAE,WAAW,CAAC,EAAE,MAAM,CAAA;CAAO,GACpD,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC,CAoBpC"}
|