excel-csv-handler 1.0.11 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +30 -30
- package/src/excel-csv-handler.d.ts +71 -0
- package/src/index.js +176 -12
package/package.json
CHANGED
|
@@ -1,31 +1,31 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
}
|
|
2
|
+
"name": "excel-csv-handler",
|
|
3
|
+
"version": "1.1.1",
|
|
4
|
+
"description": "A Node.js utility to read/write Excel and CSV files with GBK encoding support",
|
|
5
|
+
"main": "src/index.js",
|
|
6
|
+
"types": "src/excel-csv-handler.d.ts",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"files": [
|
|
9
|
+
"src/",
|
|
10
|
+
"README.md"
|
|
11
|
+
],
|
|
12
|
+
"scripts": {
|
|
13
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
14
|
+
},
|
|
15
|
+
"keywords": [
|
|
16
|
+
"excel",
|
|
17
|
+
"csv",
|
|
18
|
+
"xlsx",
|
|
19
|
+
"gbk",
|
|
20
|
+
"node",
|
|
21
|
+
"file"
|
|
22
|
+
],
|
|
23
|
+
"author": "Chao_bei",
|
|
24
|
+
"license": "MIT",
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"chardet": "^2.1.1",
|
|
27
|
+
"fast-csv": "^5.0.5",
|
|
28
|
+
"iconv-lite": "^0.7.0",
|
|
29
|
+
"xlsx": "^0.18.5"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -44,4 +44,75 @@ export default class ExcelCsvHandler {
|
|
|
44
44
|
headers?: string[] | null,
|
|
45
45
|
encoding?: string
|
|
46
46
|
): Promise<void>;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* 从调用文件的相对路径获取绝对路径
|
|
50
|
+
* @param importMetaUrl - 调用文件的 import.meta.url
|
|
51
|
+
* @param relativePath - 相对路径(如 './dataset/file.docx' 或 'dataset/file.docx')
|
|
52
|
+
* @returns 绝对路径
|
|
53
|
+
*
|
|
54
|
+
* @example
|
|
55
|
+
* ```ts
|
|
56
|
+
* import ExcelCsvHandler from 'excel-csv-handler';
|
|
57
|
+
* const docxPath = ExcelCsvHandler.getAbsolutePath(import.meta.url, './dataset/sci-high-question.docx');
|
|
58
|
+
* const csvPath = ExcelCsvHandler.getAbsolutePath(import.meta.url, 'data/output.csv');
|
|
59
|
+
* ```
|
|
60
|
+
*/
|
|
61
|
+
static getAbsolutePath(importMetaUrl: string, relativePath: string): string;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* 自动分页请求并汇总所有数据
|
|
65
|
+
* @param requestFn - 请求函数,接收一个参数对象,如 { pageNum: 1, pageSize: 50 }
|
|
66
|
+
* @param config - 配置对象
|
|
67
|
+
* @returns 完整的列表数据
|
|
68
|
+
*
|
|
69
|
+
* @example
|
|
70
|
+
* ```ts
|
|
71
|
+
* // 基本用法(使用默认的 pageNum 和 pageSize)
|
|
72
|
+
* const data = await ExcelCsvHandler.getPagination(fetchFn, {
|
|
73
|
+
* listPath: 'data.list',
|
|
74
|
+
* totalPath: 'data.total',
|
|
75
|
+
* pageSize: 50
|
|
76
|
+
* });
|
|
77
|
+
*
|
|
78
|
+
* // 自定义页码字段名
|
|
79
|
+
* const data = await ExcelCsvHandler.getPagination(fetchFn, {
|
|
80
|
+
* listPath: 'result.items',
|
|
81
|
+
* totalPath: 'result.count',
|
|
82
|
+
* pageSize: 100,
|
|
83
|
+
* pageNumKey: 'page', // 使用 page 而不是 pageNum
|
|
84
|
+
* pageSizeKey: 'size' // 使用 size 而不是 pageSize
|
|
85
|
+
* });
|
|
86
|
+
* ```
|
|
87
|
+
*/
|
|
88
|
+
static getPagination<T = any>(
|
|
89
|
+
requestFn: (params: Record<string, any>) => Promise<any>,
|
|
90
|
+
config: PaginationConfig
|
|
91
|
+
): Promise<T[]>;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* 分页请求配置对象
|
|
96
|
+
*/
|
|
97
|
+
export interface PaginationConfig {
|
|
98
|
+
/**
|
|
99
|
+
* 列表数据的属性路径,如 'data.pageData.list'
|
|
100
|
+
*/
|
|
101
|
+
listPath: string;
|
|
102
|
+
/**
|
|
103
|
+
* 总数量的属性路径,如 'data.pageData.count'
|
|
104
|
+
*/
|
|
105
|
+
totalPath: string;
|
|
106
|
+
/**
|
|
107
|
+
* 每页大小
|
|
108
|
+
*/
|
|
109
|
+
pageSize: number;
|
|
110
|
+
/**
|
|
111
|
+
* 请求参数中页码的字段名,默认 'pageNum'
|
|
112
|
+
*/
|
|
113
|
+
pageNumKey?: string;
|
|
114
|
+
/**
|
|
115
|
+
* 请求参数中每页大小的字段名,默认 'pageSize'
|
|
116
|
+
*/
|
|
117
|
+
pageSizeKey?: string;
|
|
47
118
|
}
|
package/src/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// excel-csv-handler.js
|
|
2
2
|
import { createRequire } from 'module';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
3
4
|
|
|
4
5
|
import * as fs from 'fs';
|
|
5
6
|
import * as path from 'path';
|
|
@@ -18,33 +19,33 @@ class ExcelCsvHandler {
|
|
|
18
19
|
detectEncoding(filePath) {
|
|
19
20
|
// 读取文件的前几个字节来检测编码
|
|
20
21
|
const buffer = fs.readFileSync(filePath);
|
|
21
|
-
|
|
22
|
+
|
|
22
23
|
// 检查 UTF-8 BOM
|
|
23
|
-
if (buffer.length >= 3 &&
|
|
24
|
-
buffer[0] === 0xEF &&
|
|
25
|
-
buffer[1] === 0xBB &&
|
|
24
|
+
if (buffer.length >= 3 &&
|
|
25
|
+
buffer[0] === 0xEF &&
|
|
26
|
+
buffer[1] === 0xBB &&
|
|
26
27
|
buffer[2] === 0xBF) {
|
|
27
28
|
return 'utf8';
|
|
28
29
|
}
|
|
29
|
-
|
|
30
|
+
|
|
30
31
|
// 使用 chardet 库检测编码
|
|
31
32
|
const detected = chardet.detect(buffer);
|
|
32
|
-
|
|
33
|
+
|
|
33
34
|
// 将检测结果映射到 iconv-lite 支持的编码名称
|
|
34
35
|
if (detected) {
|
|
35
36
|
const encoding = detected.toLowerCase();
|
|
36
|
-
|
|
37
|
+
|
|
37
38
|
// GB2312, GBK, GB18030 都映射为 gbk
|
|
38
39
|
if (encoding.includes('gb') || encoding.includes('gbk')) {
|
|
39
40
|
return 'gbk';
|
|
40
41
|
}
|
|
41
|
-
|
|
42
|
+
|
|
42
43
|
// UTF-8 相关
|
|
43
44
|
if (encoding.includes('utf-8') || encoding.includes('utf8')) {
|
|
44
45
|
return 'utf8';
|
|
45
46
|
}
|
|
46
47
|
}
|
|
47
|
-
|
|
48
|
+
|
|
48
49
|
// 默认返回 gbk(考虑到中国用户常用 GBK)
|
|
49
50
|
return 'gbk';
|
|
50
51
|
}
|
|
@@ -138,7 +139,7 @@ class ExcelCsvHandler {
|
|
|
138
139
|
async #readCsvFile(filePath, headerRow) {
|
|
139
140
|
// 自动检测编码
|
|
140
141
|
const encoding = this.detectEncoding(filePath);
|
|
141
|
-
|
|
142
|
+
|
|
142
143
|
return new Promise((resolve, reject) => {
|
|
143
144
|
const allRows = [];
|
|
144
145
|
const stream = fs.createReadStream(filePath)
|
|
@@ -195,7 +196,7 @@ class ExcelCsvHandler {
|
|
|
195
196
|
});
|
|
196
197
|
|
|
197
198
|
const buffer = iconv.encode(csvString, encoding);
|
|
198
|
-
|
|
199
|
+
|
|
199
200
|
// 如果是 UTF-8,添加 BOM 标记以确保 Excel 正确识别
|
|
200
201
|
if (encoding === 'utf8') {
|
|
201
202
|
const bom = Buffer.from([0xEF, 0xBB, 0xBF]);
|
|
@@ -227,7 +228,7 @@ class ExcelCsvHandler {
|
|
|
227
228
|
await this.#writeCsvFile(filePath, data, headers, encoding);
|
|
228
229
|
return;
|
|
229
230
|
}
|
|
230
|
-
|
|
231
|
+
|
|
231
232
|
// 文件存在时,检测现有文件的编码
|
|
232
233
|
const detectedEncoding = this.detectEncoding(filePath);
|
|
233
234
|
|
|
@@ -256,6 +257,169 @@ class ExcelCsvHandler {
|
|
|
256
257
|
}
|
|
257
258
|
}
|
|
258
259
|
|
|
260
|
+
/**
|
|
261
|
+
* 从调用文件的相对路径获取绝对路径
|
|
262
|
+
* @param {string} importMetaUrl - 调用文件的 import.meta.url
|
|
263
|
+
* @param {string} relativePath - 相对路径(如 './dataset/file.docx' 或 'dataset/file.docx')
|
|
264
|
+
* @returns {string} 绝对路径
|
|
265
|
+
*
|
|
266
|
+
* @example
|
|
267
|
+
* // 在你的文件中使用
|
|
268
|
+
* import ExcelCsvHandler from 'excel-csv-handler';
|
|
269
|
+
* const docxPath = ExcelCsvHandler.getAbsolutePath(import.meta.url, './dataset/sci-high-question.docx');
|
|
270
|
+
* const csvPath = ExcelCsvHandler.getAbsolutePath(import.meta.url, 'data/output.csv');
|
|
271
|
+
*/
|
|
272
|
+
ExcelCsvHandler.getAbsolutePath = function (importMetaUrl, relativePath) {
|
|
273
|
+
const __filename = fileURLToPath(importMetaUrl);
|
|
274
|
+
const __dirname = path.dirname(__filename);
|
|
275
|
+
// 去除开头的 './' 如果存在
|
|
276
|
+
const cleanPath = relativePath.startsWith('./') ? relativePath.slice(2) : relativePath;
|
|
277
|
+
return path.join(__dirname, cleanPath);
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* 自动分页请求并汇总所有数据
|
|
282
|
+
* @param {Function} requestFn - 请求函数,接收一个参数对象,如 { pageNum: 1, pageSize: 50 }
|
|
283
|
+
* @param {Object} config - 配置对象
|
|
284
|
+
* @param {string} config.listPath - 列表数据的属性路径,如 'data.pageData.list'
|
|
285
|
+
* @param {string} config.totalPath - 总数量的属性路径,如 'data.pageData.count'
|
|
286
|
+
* @param {number} config.pageSize - 每页大小
|
|
287
|
+
* @param {string} [config.pageNumKey='pageNum'] - 请求参数中页码的字段名,默认 'pageNum'
|
|
288
|
+
* @param {string} [config.pageSizeKey='pageSize'] - 请求参数中每页大小的字段名,默认 'pageSize'
|
|
289
|
+
* @returns {Promise<Array>} 完整的列表数据
|
|
290
|
+
*
|
|
291
|
+
* @example
|
|
292
|
+
* // 基本用法(使用默认的 pageNum 和 pageSize)
|
|
293
|
+
* const data = await ExcelCsvHandler.getPagination(fetchFn, {
|
|
294
|
+
* listPath: 'data.list',
|
|
295
|
+
* totalPath: 'data.total',
|
|
296
|
+
* pageSize: 50
|
|
297
|
+
* });
|
|
298
|
+
*
|
|
299
|
+
* @example
|
|
300
|
+
* // 自定义页码字段名
|
|
301
|
+
* const data = await ExcelCsvHandler.getPagination(fetchFn, {
|
|
302
|
+
* listPath: 'result.items',
|
|
303
|
+
* totalPath: 'result.count',
|
|
304
|
+
* pageSize: 100,
|
|
305
|
+
* pageNumKey: 'page', // 使用 page 而不是 pageNum
|
|
306
|
+
* pageSizeKey: 'size' // 使用 size 而不是 pageSize
|
|
307
|
+
* });
|
|
308
|
+
*/
|
|
309
|
+
ExcelCsvHandler.getPagination = async function (requestFn, config) {
|
|
310
|
+
// 参数校验
|
|
311
|
+
if (typeof requestFn !== 'function') {
|
|
312
|
+
throw new Error('请求函数必须是一个函数');
|
|
313
|
+
}
|
|
314
|
+
if (!config || typeof config !== 'object') {
|
|
315
|
+
throw new Error('配置参数必须是一个对象');
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
const {
|
|
319
|
+
listPath,
|
|
320
|
+
totalPath,
|
|
321
|
+
pageSize,
|
|
322
|
+
pageNumKey = 'pageNum',
|
|
323
|
+
pageSizeKey = 'pageSize'
|
|
324
|
+
} = config;
|
|
325
|
+
|
|
326
|
+
if (!listPath || typeof listPath !== 'string') {
|
|
327
|
+
throw new Error('列表路径(listPath)必须是一个字符串');
|
|
328
|
+
}
|
|
329
|
+
if (!totalPath || typeof totalPath !== 'string') {
|
|
330
|
+
throw new Error('总数量路径(totalPath)必须是一个字符串');
|
|
331
|
+
}
|
|
332
|
+
if (!pageSize || pageSize <= 0) {
|
|
333
|
+
throw new Error('每页大小(pageSize)必须大于0');
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* 根据路径获取对象属性值
|
|
338
|
+
* @param {Object} obj - 源对象
|
|
339
|
+
* @param {string} path - 属性路径,如 'data.pageData.list'
|
|
340
|
+
*/
|
|
341
|
+
function getValueByPath(obj, path) {
|
|
342
|
+
return path.split('.').reduce((current, key) => current?.[key], obj);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
try {
|
|
346
|
+
// 第一次请求,获取总数量和第一页数据
|
|
347
|
+
console.log(`开始分页请求,每页 ${pageSize} 条...`);
|
|
348
|
+
console.log(`使用参数名: ${pageNumKey}(页码), ${pageSizeKey}(每页大小)`);
|
|
349
|
+
|
|
350
|
+
const firstPageParams = {
|
|
351
|
+
[pageNumKey]: 1,
|
|
352
|
+
[pageSizeKey]: pageSize
|
|
353
|
+
};
|
|
354
|
+
const firstResponse = await requestFn(firstPageParams);
|
|
355
|
+
|
|
356
|
+
// 获取第一页数据和总数量
|
|
357
|
+
const firstPageList = getValueByPath(firstResponse, listPath);
|
|
358
|
+
const total = getValueByPath(firstResponse, totalPath);
|
|
359
|
+
|
|
360
|
+
if (!Array.isArray(firstPageList)) {
|
|
361
|
+
throw new Error(`无法从响应中获取列表数据,路径: ${listPath}`);
|
|
362
|
+
}
|
|
363
|
+
if (typeof total !== 'number') {
|
|
364
|
+
throw new Error(`无法从响应中获取总数量,路径: ${totalPath}`);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
console.log(`总计 ${total} 条数据`);
|
|
368
|
+
|
|
369
|
+
// 如果总数量为 0 或第一页已经包含所有数据,直接返回
|
|
370
|
+
if (total === 0) {
|
|
371
|
+
console.log('没有数据');
|
|
372
|
+
return [];
|
|
373
|
+
}
|
|
374
|
+
if (total <= pageSize) {
|
|
375
|
+
console.log('所有数据已在第一页');
|
|
376
|
+
return firstPageList;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// 计算总页数
|
|
380
|
+
const totalPages = Math.ceil(total / pageSize);
|
|
381
|
+
console.log(`需要请求 ${totalPages} 页`);
|
|
382
|
+
|
|
383
|
+
// 初始化结果数组,先放入第一页数据
|
|
384
|
+
const allData = [...firstPageList];
|
|
385
|
+
|
|
386
|
+
// 请求剩余的页面
|
|
387
|
+
const requests = [];
|
|
388
|
+
for (let page = 2; page <= totalPages; page++) {
|
|
389
|
+
const pageParams = {
|
|
390
|
+
[pageNumKey]: page,
|
|
391
|
+
[pageSizeKey]: pageSize
|
|
392
|
+
};
|
|
393
|
+
requests.push(
|
|
394
|
+
requestFn(pageParams).then(response => {
|
|
395
|
+
const pageList = getValueByPath(response, listPath);
|
|
396
|
+
if (!Array.isArray(pageList)) {
|
|
397
|
+
console.warn(`第 ${page} 页数据格式错误`);
|
|
398
|
+
return [];
|
|
399
|
+
}
|
|
400
|
+
console.log(`第 ${page}/${totalPages} 页请求完成,获取 ${pageList.length} 条数据`);
|
|
401
|
+
return pageList;
|
|
402
|
+
})
|
|
403
|
+
);
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// 并发请求所有剩余页面
|
|
407
|
+
const remainingPages = await Promise.all(requests);
|
|
408
|
+
|
|
409
|
+
// 合并所有数据
|
|
410
|
+
remainingPages.forEach(pageData => {
|
|
411
|
+
allData.push(...pageData);
|
|
412
|
+
});
|
|
413
|
+
|
|
414
|
+
console.log(`分页请求完成,共获取 ${allData.length} 条数据`);
|
|
415
|
+
return allData;
|
|
416
|
+
|
|
417
|
+
} catch (error) {
|
|
418
|
+
console.error('分页请求失败:', error.message);
|
|
419
|
+
throw error;
|
|
420
|
+
}
|
|
421
|
+
};
|
|
422
|
+
|
|
259
423
|
export default ExcelCsvHandler;
|
|
260
424
|
|
|
261
425
|
// 示例用法(取消注释可测试)
|