@raphaellcs/data-cleaner 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +134 -5
- package/package.json +2 -2
- package/src/grouper.js +360 -0
- package/src/index.js +260 -11
- package/src/validator.js +298 -0
- package/validation-rules.json +16 -0
package/README.md
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
# @
|
|
1
|
+
# @raphaellcs/data-cleaner
|
|
2
|
+
[](https://www.npmjs.com/package/@raphaellcs/data-cleaner)
|
|
3
|
+
[](https://www.npmjs.com/package/@raphaellcs/data-cleaner)
|
|
4
|
+
[](https://www.npmjs.com/package/@raphaellcs/data-cleaner)
|
|
2
5
|
|
|
3
6
|
> 数据清洗工具 - 快速清洗和转换数据文件
|
|
4
7
|
|
|
@@ -13,6 +16,9 @@
|
|
|
13
16
|
- **排序**:按列排序
|
|
14
17
|
- **格式转换**:JSON ↔ CSV
|
|
15
18
|
- **统计信息**:查看数据概况
|
|
19
|
+
- **数据验证**:内置验证规则和自定义规则(新)
|
|
20
|
+
- **分组聚合**:字段分组和时间分组(新)
|
|
21
|
+
- **透视表**:创建数据透视表(新)
|
|
16
22
|
|
|
17
23
|
## 📦 安装
|
|
18
24
|
|
|
@@ -296,11 +302,134 @@ done
|
|
|
296
302
|
## 🚧 待实现
|
|
297
303
|
|
|
298
304
|
- [ ] 支持更多文件格式(Excel、SQL)
|
|
299
|
-
- [ ] 自定义转换函数
|
|
300
|
-
- [ ] 正则表达式替换
|
|
301
|
-
- [ ] 数据验证规则
|
|
302
305
|
- [ ] 合并多个文件
|
|
303
|
-
|
|
306
|
+
|
|
307
|
+
---
|
|
308
|
+
|
|
309
|
+
## ✨ 新功能(v2.0.0)
|
|
310
|
+
|
|
311
|
+
### 数据验证
|
|
312
|
+
|
|
313
|
+
验证数据是否符合规则:
|
|
314
|
+
|
|
315
|
+
```bash
|
|
316
|
+
data-cleaner validate data.csv --config rules.json
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
创建验证规则配置 `rules.json`:
|
|
320
|
+
|
|
321
|
+
```json
|
|
322
|
+
{
|
|
323
|
+
"email": ["required", "email"],
|
|
324
|
+
"age": [
|
|
325
|
+
"required",
|
|
326
|
+
{"name": "number", "message": "年龄必须是数字"},
|
|
327
|
+
{"name": "min", "value": 0, "message": "年龄不能为负数"},
|
|
328
|
+
{"name": "max", "value": 120, "message": "年龄不能超过120"}
|
|
329
|
+
],
|
|
330
|
+
"phone": [
|
|
331
|
+
{"name": "pattern", "value": "^\\d{11}$", "message": "手机号必须是11位数字"}
|
|
332
|
+
],
|
|
333
|
+
"status": [
|
|
334
|
+
{"name": "enum", "value": ["active", "inactive", "pending"], "message": "状态值不合法"}
|
|
335
|
+
]
|
|
336
|
+
}
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
**内置验证规则:**
|
|
340
|
+
- `required` - 必填
|
|
341
|
+
- `email` - 邮箱格式
|
|
342
|
+
- `url` - URL 格式
|
|
343
|
+
- `number` - 数字
|
|
344
|
+
- `integer` - 整数
|
|
345
|
+
- `positive` - 正数
|
|
346
|
+
- `negative` - 负数
|
|
347
|
+
- `min:<value>` - 最小值
|
|
348
|
+
- `max:<value>` - 最大值
|
|
349
|
+
- `minLength:<length>` - 最小长度
|
|
350
|
+
- `maxLength:<length>` - 最大长度
|
|
351
|
+
- `pattern:<regex>` - 正则匹配
|
|
352
|
+
- `enum:[values]` - 枚举值
|
|
353
|
+
- `date` - 日期
|
|
354
|
+
- `future` - 未来日期
|
|
355
|
+
- `past` - 过去日期
|
|
356
|
+
- `phone` - 电话号码
|
|
357
|
+
|
|
358
|
+
输出错误报告:
|
|
359
|
+
|
|
360
|
+
```bash
|
|
361
|
+
data-cleaner validate data.csv --config rules.json --output errors.csv --format csv
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
### 分组聚合
|
|
365
|
+
|
|
366
|
+
按字段分组并聚合:
|
|
367
|
+
|
|
368
|
+
```bash
|
|
369
|
+
# 按部门分组,计算平均工资
|
|
370
|
+
data-cleaner group employees.csv --group-by department --aggregate "salary:avg" --stats
|
|
371
|
+
|
|
372
|
+
# 多字段分组
|
|
373
|
+
data-cleaner group sales.csv --group-by "region,category" --aggregate "revenue:sum,count" --output grouped.json
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
时间分组:
|
|
377
|
+
|
|
378
|
+
```bash
|
|
379
|
+
# 按天分组
|
|
380
|
+
data-cleaner group orders.csv --time-field created_at --interval day --aggregate "amount:sum" --stats
|
|
381
|
+
|
|
382
|
+
# 按月分组
|
|
383
|
+
data-cleaner group orders.csv --time-field created_at --interval month --aggregate "amount:sum,count" --stats
|
|
384
|
+
|
|
385
|
+
# 按小时分组
|
|
386
|
+
data-cleaner group logs.csv --time-field timestamp --interval hour --aggregate "errors:sum" --stats
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
**聚合类型:**
|
|
390
|
+
- `sum` - 求和
|
|
391
|
+
- `avg` - 平均值
|
|
392
|
+
- `min` - 最小值
|
|
393
|
+
- `max` - 最大值
|
|
394
|
+
- `count` - 计数
|
|
395
|
+
- `count_distinct` - 去重计数
|
|
396
|
+
- `first` - 第一个值
|
|
397
|
+
- `last` - 最后一个值
|
|
398
|
+
- `concat` - 拼接
|
|
399
|
+
- `array` - 数组
|
|
400
|
+
- `percentile:XX` - 百分位数(如 percentile:95)
|
|
401
|
+
|
|
402
|
+
### 透视表
|
|
403
|
+
|
|
404
|
+
创建数据透视表:
|
|
405
|
+
|
|
406
|
+
```bash
|
|
407
|
+
data-cleaner pivot sales.csv \
|
|
408
|
+
--rows region \
|
|
409
|
+
--columns product \
|
|
410
|
+
--values revenue \
|
|
411
|
+
--agg sum
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
示例输出:
|
|
415
|
+
|
|
416
|
+
```
|
|
417
|
+
productA productB productC
|
|
418
|
+
region1 15000.00 23000.00 18000.00
|
|
419
|
+
region2 12000.00 25000.00 21000.00
|
|
420
|
+
region3 18000.00 20000.00 22000.00
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
保存透视表:
|
|
424
|
+
|
|
425
|
+
```bash
|
|
426
|
+
data-cleaner pivot sales.csv \
|
|
427
|
+
--rows region \
|
|
428
|
+
--columns product \
|
|
429
|
+
--values revenue \
|
|
430
|
+
--agg sum \
|
|
431
|
+
--output pivot.json
|
|
432
|
+
```
|
|
304
433
|
|
|
305
434
|
## 🤝 贡献
|
|
306
435
|
|
package/package.json
CHANGED
package/src/grouper.js
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
// 分组统计模块
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* 按字段分组
|
|
5
|
+
* @param {Array} data - 数据数组
|
|
6
|
+
* @param {string} groupByField - 分组字段
|
|
7
|
+
* @returns {Object} 分组结果
|
|
8
|
+
*/
|
|
9
|
+
function groupBy(data, groupByField) {
|
|
10
|
+
const groups = {};
|
|
11
|
+
|
|
12
|
+
for (const item of data) {
|
|
13
|
+
const key = item[groupByField];
|
|
14
|
+
if (groups[key] === undefined) {
|
|
15
|
+
groups[key] = [];
|
|
16
|
+
}
|
|
17
|
+
groups[key].push(item);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
return groups;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* 按多个字段分组
|
|
25
|
+
* @param {Array} data - 数据数组
|
|
26
|
+
* @param {Array<string>} groupByFields - 分组字段数组
|
|
27
|
+
* @returns {Object} 分组结果
|
|
28
|
+
*/
|
|
29
|
+
function groupByMultiple(data, groupByFields) {
|
|
30
|
+
const groups = {};
|
|
31
|
+
|
|
32
|
+
for (const item of data) {
|
|
33
|
+
const keyParts = groupByFields.map(field => {
|
|
34
|
+
const value = item[field];
|
|
35
|
+
return value !== undefined && value !== null ? String(value) : '__null__';
|
|
36
|
+
});
|
|
37
|
+
const key = keyParts.join('|');
|
|
38
|
+
|
|
39
|
+
if (groups[key] === undefined) {
|
|
40
|
+
groups[key] = [];
|
|
41
|
+
}
|
|
42
|
+
groups[key].push(item);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return groups;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* 计算组的统计信息
|
|
50
|
+
* @param {Array} group - 组数据
|
|
51
|
+
* @param {Object} aggregations - 聚合规则
|
|
52
|
+
* @returns {Object} 统计结果
|
|
53
|
+
*/
|
|
54
|
+
function aggregateGroup(group, aggregations) {
|
|
55
|
+
const result = {};
|
|
56
|
+
|
|
57
|
+
for (const [fieldName, aggType] of Object.entries(aggregations)) {
|
|
58
|
+
const values = group
|
|
59
|
+
.map(item => item[fieldName])
|
|
60
|
+
.filter(v => v !== null && v !== undefined && v !== '');
|
|
61
|
+
|
|
62
|
+
switch (aggType) {
|
|
63
|
+
case 'sum':
|
|
64
|
+
result[fieldName] = values.reduce((sum, v) => sum + (Number(v) || 0), 0);
|
|
65
|
+
break;
|
|
66
|
+
|
|
67
|
+
case 'avg':
|
|
68
|
+
result[fieldName] = values.length > 0
|
|
69
|
+
? values.reduce((sum, v) => sum + (Number(v) || 0), 0) / values.length
|
|
70
|
+
: 0;
|
|
71
|
+
break;
|
|
72
|
+
|
|
73
|
+
case 'min':
|
|
74
|
+
result[fieldName] = Math.min(...values.map(v => Number(v) || Infinity));
|
|
75
|
+
break;
|
|
76
|
+
|
|
77
|
+
case 'max':
|
|
78
|
+
result[fieldName] = Math.max(...values.map(v => Number(v) || -Infinity));
|
|
79
|
+
break;
|
|
80
|
+
|
|
81
|
+
case 'count':
|
|
82
|
+
result[fieldName] = values.length;
|
|
83
|
+
break;
|
|
84
|
+
|
|
85
|
+
case 'count_distinct':
|
|
86
|
+
result[fieldName] = new Set(values).size;
|
|
87
|
+
break;
|
|
88
|
+
|
|
89
|
+
case 'first':
|
|
90
|
+
result[fieldName] = values[0];
|
|
91
|
+
break;
|
|
92
|
+
|
|
93
|
+
case 'last':
|
|
94
|
+
result[fieldName] = values[values.length - 1];
|
|
95
|
+
break;
|
|
96
|
+
|
|
97
|
+
case 'concat':
|
|
98
|
+
result[fieldName] = values.join(', ');
|
|
99
|
+
break;
|
|
100
|
+
|
|
101
|
+
case 'array':
|
|
102
|
+
result[fieldName] = values;
|
|
103
|
+
break;
|
|
104
|
+
|
|
105
|
+
default:
|
|
106
|
+
if (aggType.startsWith('percentile:')) {
|
|
107
|
+
const p = parseInt(aggType.split(':')[1]);
|
|
108
|
+
result[fieldName] = calculatePercentile(values.map(v => Number(v)), p);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return result;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* 计算百分位数
|
|
118
|
+
* @param {Array<number>} values - 数值数组
|
|
119
|
+
* @param {number} percentile - 百分位数(0-100)
|
|
120
|
+
* @returns {number}
|
|
121
|
+
*/
|
|
122
|
+
function calculatePercentile(values, percentile) {
|
|
123
|
+
if (values.length === 0) return 0;
|
|
124
|
+
|
|
125
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
126
|
+
const index = (percentile / 100) * (sorted.length - 1);
|
|
127
|
+
|
|
128
|
+
const lower = Math.floor(index);
|
|
129
|
+
const upper = Math.ceil(index);
|
|
130
|
+
const weight = index - lower;
|
|
131
|
+
|
|
132
|
+
if (upper >= sorted.length) {
|
|
133
|
+
return sorted[sorted.length - 1];
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return sorted[lower] * (1 - weight) + sorted[upper] * weight;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* 分组并聚合
|
|
141
|
+
* @param {Array} data - 数据数组
|
|
142
|
+
* @param {string|Array<string>} groupBy - 分组字段
|
|
143
|
+
* @param {Object} aggregations - 聚合规则
|
|
144
|
+
* @returns {Array} 分组聚合结果
|
|
145
|
+
*/
|
|
146
|
+
function groupAndAggregate(data, groupBy, aggregations) {
|
|
147
|
+
const groupByFields = Array.isArray(groupBy) ? groupBy : [groupBy];
|
|
148
|
+
const groups = groupByMultiple(data, groupByFields);
|
|
149
|
+
|
|
150
|
+
const result = [];
|
|
151
|
+
|
|
152
|
+
for (const [key, group] of Object.entries(groups)) {
|
|
153
|
+
const keyParts = key.split('|');
|
|
154
|
+
|
|
155
|
+
const groupResult = {
|
|
156
|
+
_group: key,
|
|
157
|
+
_count: group.length
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
// 添加分组字段
|
|
161
|
+
groupByFields.forEach((field, index) => {
|
|
162
|
+
groupResult[field] = keyParts[index] === '__null__' ? null : keyParts[index];
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
// 添加聚合结果
|
|
166
|
+
const aggResults = aggregateGroup(group, aggregations);
|
|
167
|
+
Object.assign(groupResult, aggResults);
|
|
168
|
+
|
|
169
|
+
result.push(groupResult);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return result;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* 按时间分组
|
|
177
|
+
* @param {Array} data - 数据数组
|
|
178
|
+
* @param {string} dateField - 日期字段
|
|
179
|
+
* @param {string} interval - 时间间隔(day/week/month/year/hour/minute)
|
|
180
|
+
* @returns {Object} 分组结果
|
|
181
|
+
*/
|
|
182
|
+
function groupByTime(data, dateField, interval = 'day') {
|
|
183
|
+
const groups = {};
|
|
184
|
+
|
|
185
|
+
for (const item of data) {
|
|
186
|
+
const date = new Date(item[dateField]);
|
|
187
|
+
if (isNaN(date.getTime())) continue;
|
|
188
|
+
|
|
189
|
+
let key;
|
|
190
|
+
switch (interval) {
|
|
191
|
+
case 'minute':
|
|
192
|
+
key = date.toISOString().substring(0, 16); // YYYY-MM-DDTHH:MM
|
|
193
|
+
break;
|
|
194
|
+
case 'hour':
|
|
195
|
+
key = date.toISOString().substring(0, 13); // YYYY-MM-DDTHH
|
|
196
|
+
break;
|
|
197
|
+
case 'day':
|
|
198
|
+
key = date.toISOString().substring(0, 10); // YYYY-MM-DD
|
|
199
|
+
break;
|
|
200
|
+
case 'week':
|
|
201
|
+
const weekStart = new Date(date);
|
|
202
|
+
weekStart.setDate(date.getDate() - date.getDay());
|
|
203
|
+
key = weekStart.toISOString().substring(0, 10);
|
|
204
|
+
break;
|
|
205
|
+
case 'month':
|
|
206
|
+
key = date.toISOString().substring(0, 7); // YYYY-MM
|
|
207
|
+
break;
|
|
208
|
+
case 'year':
|
|
209
|
+
key = date.toISOString().substring(0, 4); // YYYY
|
|
210
|
+
break;
|
|
211
|
+
default:
|
|
212
|
+
key = date.toISOString().substring(0, 10);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (!groups[key]) {
|
|
216
|
+
groups[key] = [];
|
|
217
|
+
}
|
|
218
|
+
groups[key].push(item);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return groups;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* 计算分组统计信息
|
|
226
|
+
* @param {Object} groups - 分组结果
|
|
227
|
+
* @param {string} statField - 统计字段
|
|
228
|
+
* @returns {Array} 统计信息
|
|
229
|
+
*/
|
|
230
|
+
function getGroupStats(groups, statField) {
|
|
231
|
+
const stats = [];
|
|
232
|
+
|
|
233
|
+
for (const [key, group] of Object.entries(groups)) {
|
|
234
|
+
const values = group
|
|
235
|
+
.map(item => Number(item[statField]))
|
|
236
|
+
.filter(v => !isNaN(v));
|
|
237
|
+
|
|
238
|
+
if (values.length === 0) {
|
|
239
|
+
stats.push({
|
|
240
|
+
group: key,
|
|
241
|
+
count: 0,
|
|
242
|
+
sum: 0,
|
|
243
|
+
avg: 0,
|
|
244
|
+
min: 0,
|
|
245
|
+
max: 0
|
|
246
|
+
});
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
stats.push({
|
|
251
|
+
group: key,
|
|
252
|
+
count: values.length,
|
|
253
|
+
sum: values.reduce((sum, v) => sum + v, 0),
|
|
254
|
+
avg: values.reduce((sum, v) => sum + v, 0) / values.length,
|
|
255
|
+
min: Math.min(...values),
|
|
256
|
+
max: Math.max(...values)
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return stats.sort((a, b) => a.group.localeCompare(b.group));
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* 数据透视表
|
|
265
|
+
* @param {Array} data - 数据数组
|
|
266
|
+
* @param {string} rowField - 行字段
|
|
267
|
+
* @param {string} columnField - 列字段
|
|
268
|
+
* @param {string} valueField - 值字段
|
|
269
|
+
* @param {string} aggFunction - 聚合函数(sum/avg/count/min/max)
|
|
270
|
+
* @returns {Object} 透视表
|
|
271
|
+
*/
|
|
272
|
+
function pivotTable(data, rowField, columnField, valueField, aggFunction = 'sum') {
|
|
273
|
+
const rows = new Set();
|
|
274
|
+
const columns = new Set();
|
|
275
|
+
const values = {};
|
|
276
|
+
|
|
277
|
+
// 收集行、列和值
|
|
278
|
+
for (const item of data) {
|
|
279
|
+
const rowKey = item[rowField];
|
|
280
|
+
const colKey = item[columnField];
|
|
281
|
+
const val = Number(item[valueField]) || 0;
|
|
282
|
+
|
|
283
|
+
rows.add(rowKey);
|
|
284
|
+
columns.add(colKey);
|
|
285
|
+
|
|
286
|
+
const key = `${rowKey}::${colKey}`;
|
|
287
|
+
if (!values[key]) {
|
|
288
|
+
values[key] = [];
|
|
289
|
+
}
|
|
290
|
+
values[key].push(val);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// 计算聚合值
|
|
294
|
+
const pivot = {};
|
|
295
|
+
|
|
296
|
+
for (const row of rows) {
|
|
297
|
+
pivot[row] = {};
|
|
298
|
+
for (const col of columns) {
|
|
299
|
+
const key = `${row}::${col}`;
|
|
300
|
+
const vals = values[key] || [];
|
|
301
|
+
|
|
302
|
+
let aggValue;
|
|
303
|
+
switch (aggFunction) {
|
|
304
|
+
case 'sum':
|
|
305
|
+
aggValue = vals.reduce((sum, v) => sum + v, 0);
|
|
306
|
+
break;
|
|
307
|
+
case 'avg':
|
|
308
|
+
aggValue = vals.length > 0 ? vals.reduce((sum, v) => sum + v, 0) / vals.length : 0;
|
|
309
|
+
break;
|
|
310
|
+
case 'count':
|
|
311
|
+
aggValue = vals.length;
|
|
312
|
+
break;
|
|
313
|
+
case 'min':
|
|
314
|
+
aggValue = vals.length > 0 ? Math.min(...vals) : 0;
|
|
315
|
+
break;
|
|
316
|
+
case 'max':
|
|
317
|
+
aggValue = vals.length > 0 ? Math.max(...vals) : 0;
|
|
318
|
+
break;
|
|
319
|
+
default:
|
|
320
|
+
aggValue = vals.reduce((sum, v) => sum + v, 0);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
pivot[row][col] = aggValue;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
return {
|
|
328
|
+
rows: Array.from(rows).sort(),
|
|
329
|
+
columns: Array.from(columns).sort(),
|
|
330
|
+
data: pivot
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* 打印分组统计
|
|
336
|
+
* @param {Array} stats - 统计信息
|
|
337
|
+
*/
|
|
338
|
+
function printGroupStats(stats) {
|
|
339
|
+
console.log('\n📊 分组统计\n');
|
|
340
|
+
|
|
341
|
+
for (const stat of stats) {
|
|
342
|
+
console.log(`${stat.group}:`);
|
|
343
|
+
console.log(` 数量: ${stat.count}`);
|
|
344
|
+
console.log(` 总和: ${stat.sum.toFixed(2)}`);
|
|
345
|
+
console.log(` 平均: ${stat.avg.toFixed(2)}`);
|
|
346
|
+
console.log(` 最小: ${stat.min.toFixed(2)}`);
|
|
347
|
+
console.log(` 最大: ${stat.max.toFixed(2)}`);
|
|
348
|
+
console.log();
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
module.exports = {
|
|
353
|
+
groupBy,
|
|
354
|
+
groupByMultiple,
|
|
355
|
+
groupAndAggregate,
|
|
356
|
+
groupByTime,
|
|
357
|
+
getGroupStats,
|
|
358
|
+
pivotTable,
|
|
359
|
+
printGroupStats
|
|
360
|
+
};
|
package/src/index.js
CHANGED
|
@@ -6,6 +6,17 @@ const { program } = require('commander');
|
|
|
6
6
|
const chalk = require('chalk');
|
|
7
7
|
const { parse } = require('csv-parse');
|
|
8
8
|
const { stringify } = require('csv-stringify');
|
|
9
|
+
const {
|
|
10
|
+
DataValidator,
|
|
11
|
+
createValidatorFromConfig
|
|
12
|
+
} = require('./validator.js');
|
|
13
|
+
const {
|
|
14
|
+
groupAndAggregate,
|
|
15
|
+
groupByTime,
|
|
16
|
+
getGroupStats,
|
|
17
|
+
pivotTable,
|
|
18
|
+
printGroupStats
|
|
19
|
+
} = require('./grouper.js');
|
|
9
20
|
|
|
10
21
|
// 读取文件
|
|
11
22
|
function readFile(filePath) {
|
|
@@ -347,23 +358,23 @@ program
|
|
|
347
358
|
console.log(chalk.red(`文件不存在: ${input}`));
|
|
348
359
|
process.exit(1);
|
|
349
360
|
}
|
|
350
|
-
|
|
361
|
+
|
|
351
362
|
const ext = path.extname(input).toLowerCase();
|
|
352
363
|
const outputFormat = options.format || (ext === '.json' ? 'json' : 'csv');
|
|
353
364
|
const outputFile = output || input.replace(/\.[^.]+$/, `.cleaned.${outputFormat}`);
|
|
354
|
-
|
|
365
|
+
|
|
355
366
|
console.log(chalk.cyan(`\n🔧 清洗数据\n`));
|
|
356
367
|
console.log(chalk.gray(`输入: ${input}`));
|
|
357
368
|
console.log(chalk.gray(`输出: ${outputFile}\n`));
|
|
358
|
-
|
|
369
|
+
|
|
359
370
|
const data = await readFile(input);
|
|
360
|
-
|
|
371
|
+
|
|
361
372
|
// 显示原始统计
|
|
362
373
|
if (options.stats) {
|
|
363
374
|
console.log(chalk.cyan('原始数据:'));
|
|
364
375
|
printStats(getStats(data));
|
|
365
376
|
}
|
|
366
|
-
|
|
377
|
+
|
|
367
378
|
// 解析过滤表达式
|
|
368
379
|
if (options.filter) {
|
|
369
380
|
const parts = options.filter.split(':');
|
|
@@ -391,29 +402,267 @@ program
|
|
|
391
402
|
if (options.columns) {
|
|
392
403
|
options.columns = options.columns.split(',');
|
|
393
404
|
}
|
|
394
|
-
|
|
405
|
+
|
|
395
406
|
// 清洗数据
|
|
396
407
|
const cleaned = cleanData(data, options);
|
|
397
|
-
|
|
408
|
+
|
|
398
409
|
// 显示清洗后统计
|
|
399
410
|
if (options.stats) {
|
|
400
411
|
console.log(chalk.cyan('清洗后数据:'));
|
|
401
412
|
printStats(getStats(cleaned));
|
|
402
413
|
}
|
|
403
|
-
|
|
414
|
+
|
|
404
415
|
// 写入文件
|
|
405
416
|
await writeFile(outputFile, cleaned, outputFormat);
|
|
406
|
-
|
|
417
|
+
|
|
407
418
|
console.log(chalk.green(`✅ 已保存到: ${outputFile}`));
|
|
408
|
-
|
|
419
|
+
|
|
409
420
|
// 显示差异
|
|
410
421
|
const originalCount = Array.isArray(data) ? data.length : 1;
|
|
411
422
|
const cleanedCount = Array.isArray(cleaned) ? cleaned.length : 1;
|
|
412
423
|
if (originalCount !== cleanedCount) {
|
|
413
424
|
console.log(chalk.yellow(` 从 ${originalCount} 行减少到 ${cleanedCount} 行`));
|
|
414
425
|
}
|
|
415
|
-
|
|
426
|
+
|
|
427
|
+
console.log();
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
// 验证命令
|
|
431
|
+
program
|
|
432
|
+
.command('validate <input>')
|
|
433
|
+
.option('-c, --config <path>', '验证规则配置文件(JSON)')
|
|
434
|
+
.option('-o, --output <path>', '输出错误报告到文件')
|
|
435
|
+
.option('--format <type>', '输出格式(json/csv)', 'json')
|
|
436
|
+
.description('验证数据')
|
|
437
|
+
.action(async (input, options) => {
|
|
438
|
+
if (!fs.existsSync(input)) {
|
|
439
|
+
console.log(chalk.red(`文件不存在: ${input}`));
|
|
440
|
+
process.exit(1);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
const data = await readFile(input);
|
|
444
|
+
|
|
445
|
+
if (!Array.isArray(data)) {
|
|
446
|
+
console.log(chalk.red('数据必须是数组格式'));
|
|
447
|
+
process.exit(1);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
console.log(chalk.cyan(`\n✅ 验证数据\n`));
|
|
451
|
+
|
|
452
|
+
let validator;
|
|
453
|
+
|
|
454
|
+
// 从配置文件加载规则
|
|
455
|
+
if (options.config) {
|
|
456
|
+
if (!fs.existsSync(options.config)) {
|
|
457
|
+
console.log(chalk.red(`配置文件不存在: ${options.config}`));
|
|
458
|
+
process.exit(1);
|
|
459
|
+
}
|
|
460
|
+
const configContent = fs.readFileSync(options.config, 'utf-8');
|
|
461
|
+
const config = JSON.parse(configContent);
|
|
462
|
+
validator = createValidatorFromConfig(config);
|
|
463
|
+
console.log(chalk.gray(`从配置文件加载规则: ${options.config}`));
|
|
464
|
+
} else {
|
|
465
|
+
// 没有配置,提示用户
|
|
466
|
+
console.log(chalk.yellow('未提供验证规则配置,跳过验证'));
|
|
467
|
+
console.log(chalk.gray('使用 --config 指定验证规则文件\n'));
|
|
468
|
+
process.exit(0);
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
console.log(chalk.gray(`规则数量: ${validator.getRuleCount()}`));
|
|
472
|
+
console.log();
|
|
473
|
+
|
|
474
|
+
// 执行验证
|
|
475
|
+
const errors = validator.getErrors(data);
|
|
476
|
+
|
|
477
|
+
if (errors.length === 0) {
|
|
478
|
+
console.log(chalk.green('✓ 所有数据验证通过!\n'));
|
|
479
|
+
} else {
|
|
480
|
+
console.log(chalk.red(`✗ 发现 ${errors.length} 个验证错误:\n`));
|
|
481
|
+
|
|
482
|
+
// 显示前 20 个错误
|
|
483
|
+
const displayErrors = errors.slice(0, 20);
|
|
484
|
+
for (const error of displayErrors) {
|
|
485
|
+
console.log(chalk.red(` [行 ${error.row}] ${error.field}`));
|
|
486
|
+
console.log(chalk.gray(` 规则: ${error.rule}`));
|
|
487
|
+
console.log(chalk.gray(` 值: ${error.value}`));
|
|
488
|
+
console.log(chalk.gray(` 消息: ${error.message}\n`));
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
if (errors.length > 20) {
|
|
492
|
+
console.log(chalk.yellow(`... 还有 ${errors.length - 20} 个错误\n`));
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// 输出错误报告
|
|
497
|
+
if (options.output && errors.length > 0) {
|
|
498
|
+
if (options.format === 'csv') {
|
|
499
|
+
const headers = ['row', 'field', 'rule', 'value', 'message'];
|
|
500
|
+
const rows = errors.map(e => [
|
|
501
|
+
e.row, e.field, e.rule,
|
|
502
|
+
`"${String(e.value).replace(/"/g, '""')}"`,
|
|
503
|
+
`"${e.message.replace(/"/g, '""')}"`
|
|
504
|
+
]);
|
|
505
|
+
const csv = [headers.join(','), ...rows.map(r => r.join(','))].join('\n');
|
|
506
|
+
fs.writeFileSync(options.output, csv, 'utf-8');
|
|
507
|
+
} else {
|
|
508
|
+
fs.writeFileSync(options.output, JSON.stringify(errors, null, 2), 'utf-8');
|
|
509
|
+
}
|
|
510
|
+
console.log(chalk.green(`✓ 错误报告已保存到: ${options.output}\n`));
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
process.exit(errors.length === 0 ? 0 : 1);
|
|
514
|
+
});
|
|
515
|
+
|
|
516
|
+
// 分组命令
|
|
517
|
+
program
|
|
518
|
+
.command('group <input>')
|
|
519
|
+
.option('-g, --group-by <field>', '分组字段(支持多个,逗号分隔)')
|
|
520
|
+
.option('-a, --aggregate <expr>', '聚合表达式(field:aggType,逗号分隔)')
|
|
521
|
+
.option('-t, --time-field <field>', '时间字段(用于时间分组)')
|
|
522
|
+
.option('-i, --interval <type>', '时间间隔(minute/hour/day/week/month/year)', 'day')
|
|
523
|
+
.option('-o, --output <path>', '输出文件')
|
|
524
|
+
.option('-f, --format <type>', '输出格式(json/csv)', 'json')
|
|
525
|
+
.option('--stats', '显示统计信息')
|
|
526
|
+
.description('分组和聚合数据')
|
|
527
|
+
.action(async (input, options) => {
|
|
528
|
+
if (!fs.existsSync(input)) {
|
|
529
|
+
console.log(chalk.red(`文件不存在: ${input}`));
|
|
530
|
+
process.exit(1);
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
const data = await readFile(input);
|
|
534
|
+
|
|
535
|
+
if (!Array.isArray(data)) {
|
|
536
|
+
console.log(chalk.red('数据必须是数组格式'));
|
|
537
|
+
process.exit(1);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
console.log(chalk.cyan(`\n📊 分组和聚合\n`));
|
|
541
|
+
|
|
542
|
+
let result;
|
|
543
|
+
|
|
544
|
+
// 时间分组
|
|
545
|
+
if (options.timeField) {
|
|
546
|
+
const groups = groupByTime(data, options.timeField, options.interval);
|
|
547
|
+
console.log(chalk.gray(`时间字段: ${options.timeField}`));
|
|
548
|
+
console.log(chalk.gray(`时间间隔: ${options.interval}`));
|
|
549
|
+
console.log(chalk.gray(`分组数量: ${Object.keys(groups).length}\n`));
|
|
550
|
+
|
|
551
|
+
if (options.stats && options.aggregate) {
|
|
552
|
+
const aggParts = options.aggregate.split(',');
|
|
553
|
+
const aggregations = {};
|
|
554
|
+
for (const part of aggParts) {
|
|
555
|
+
const [field, aggType] = part.split(':');
|
|
556
|
+
aggregations[field] = aggType;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
const stats = getGroupStats(groups, Object.keys(aggregations)[0]);
|
|
560
|
+
printGroupStats(stats);
|
|
561
|
+
|
|
562
|
+
// 转换为数组输出
|
|
563
|
+
result = groupAndAggregate(data, options.timeField, aggregations);
|
|
564
|
+
} else {
|
|
565
|
+
result = groups;
|
|
566
|
+
}
|
|
567
|
+
} else if (options.groupBy) {
|
|
568
|
+
// 字段分组
|
|
569
|
+
const groupByFields = options.groupBy.split(',');
|
|
570
|
+
const aggregations = {};
|
|
571
|
+
|
|
572
|
+
if (options.aggregate) {
|
|
573
|
+
const aggParts = options.aggregate.split(',');
|
|
574
|
+
for (const part of aggParts) {
|
|
575
|
+
const [field, aggType] = part.split(':');
|
|
576
|
+
aggregations[field] = aggType;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
console.log(chalk.gray(`分组字段: ${groupByFields.join(', ')}`));
|
|
581
|
+
console.log(chalk.gray(`聚合规则: ${Object.keys(aggregations).join(', ') || '无'}\n`));
|
|
582
|
+
|
|
583
|
+
result = groupAndAggregate(data, groupByFields, aggregations);
|
|
584
|
+
|
|
585
|
+
// 显示结果
|
|
586
|
+
if (options.stats) {
|
|
587
|
+
for (const item of result) {
|
|
588
|
+
console.log(chalk.cyan(` ${item._group}`));
|
|
589
|
+
console.log(chalk.gray(` 数量: ${item._count}`));
|
|
590
|
+
for (const [key, value] of Object.entries(item)) {
|
|
591
|
+
if (!key.startsWith('_')) {
|
|
592
|
+
console.log(chalk.gray(` ${key}: ${typeof value === 'number' ? value.toFixed(2) : value}`));
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
console.log();
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
} else {
|
|
599
|
+
console.log(chalk.red('必须指定 --group-by 或 --time-field'));
|
|
600
|
+
process.exit(1);
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
// 输出文件
|
|
604
|
+
if (options.output) {
|
|
605
|
+
if (options.format === 'csv') {
|
|
606
|
+
await writeFile(options.output, result, 'csv');
|
|
607
|
+
} else {
|
|
608
|
+
fs.writeFileSync(options.output, JSON.stringify(result, null, 2), 'utf-8');
|
|
609
|
+
}
|
|
610
|
+
console.log(chalk.green(`✓ 已保存到: ${options.output}\n`));
|
|
611
|
+
}
|
|
612
|
+
});
|
|
613
|
+
|
|
614
|
+
// 透视表命令
|
|
615
|
+
program
|
|
616
|
+
.command('pivot <input>')
|
|
617
|
+
.option('-r, --rows <field>', '行字段')
|
|
618
|
+
.option('-c, --columns <field>', '列字段')
|
|
619
|
+
.option('-v, --values <field>', '值字段')
|
|
620
|
+
.option('-a, --agg <func>', '聚合函数(sum/avg/count/min/max)', 'sum')
|
|
621
|
+
.option('-o, --output <path>', '输出文件')
|
|
622
|
+
.description('创建数据透视表')
|
|
623
|
+
.action(async (input, options) => {
|
|
624
|
+
if (!fs.existsSync(input)) {
|
|
625
|
+
console.log(chalk.red(`文件不存在: ${input}`));
|
|
626
|
+
process.exit(1);
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
if (!options.rows || !options.columns || !options.values) {
|
|
630
|
+
console.log(chalk.red('必须指定 --rows, --columns 和 --values'));
|
|
631
|
+
process.exit(1);
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
const data = await readFile(input);
|
|
635
|
+
|
|
636
|
+
if (!Array.isArray(data)) {
|
|
637
|
+
console.log(chalk.red('数据必须是数组格式'));
|
|
638
|
+
process.exit(1);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
console.log(chalk.cyan(`\n📊 数据透视表\n`));
|
|
642
|
+
console.log(chalk.gray(`行: ${options.rows}`));
|
|
643
|
+
console.log(chalk.gray(`列: ${options.columns}`));
|
|
644
|
+
console.log(chalk.gray(`值: ${options.values}`));
|
|
645
|
+
console.log(chalk.gray(`聚合: ${options.agg}\n`));
|
|
646
|
+
|
|
647
|
+
const pivot = pivotTable(data, options.rows, options.columns, options.values, options.agg);
|
|
648
|
+
|
|
649
|
+
// 打印透视表
|
|
650
|
+
console.log(chalk.cyan(` ${pivot.columns.join(' ')}`));
|
|
651
|
+
for (const row of pivot.rows) {
|
|
652
|
+
const rowData = [row];
|
|
653
|
+
for (const col of pivot.columns) {
|
|
654
|
+
const value = pivot.data[row][col];
|
|
655
|
+
rowData.push((typeof value === 'number' ? value.toFixed(2) : value).padStart(12));
|
|
656
|
+
}
|
|
657
|
+
console.log(chalk.cyan(rowData.join(' ')));
|
|
658
|
+
}
|
|
416
659
|
console.log();
|
|
660
|
+
|
|
661
|
+
// 输出文件
|
|
662
|
+
if (options.output) {
|
|
663
|
+
fs.writeFileSync(options.output, JSON.stringify(pivot, null, 2), 'utf-8');
|
|
664
|
+
console.log(chalk.green(`✓ 已保存到: ${options.output}\n`));
|
|
665
|
+
}
|
|
417
666
|
});
|
|
418
667
|
|
|
419
668
|
program.parse();
|
package/src/validator.js
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
// 数据验证模块
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* 验证规则类
|
|
5
|
+
*/
|
|
6
|
+
class ValidationRule {
|
|
7
|
+
constructor(name, validator, errorMessage) {
|
|
8
|
+
this.name = name;
|
|
9
|
+
this.validator = validator;
|
|
10
|
+
this.errorMessage = errorMessage;
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* 内置验证规则
|
|
16
|
+
*/
|
|
17
|
+
const BUILT_IN_RULES = {
|
|
18
|
+
required: (value) => {
|
|
19
|
+
return value !== null && value !== undefined && value !== '';
|
|
20
|
+
},
|
|
21
|
+
email: (value) => {
|
|
22
|
+
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
|
|
23
|
+
return emailRegex.test(value);
|
|
24
|
+
},
|
|
25
|
+
url: (value) => {
|
|
26
|
+
try {
|
|
27
|
+
new URL(value);
|
|
28
|
+
return true;
|
|
29
|
+
} catch {
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
number: (value) => {
|
|
34
|
+
return !isNaN(parseFloat(value)) && isFinite(value);
|
|
35
|
+
},
|
|
36
|
+
integer: (value) => {
|
|
37
|
+
return Number.isInteger(Number(value));
|
|
38
|
+
},
|
|
39
|
+
positive: (value) => {
|
|
40
|
+
return Number(value) > 0;
|
|
41
|
+
},
|
|
42
|
+
negative: (value) => {
|
|
43
|
+
return Number(value) < 0;
|
|
44
|
+
},
|
|
45
|
+
min: (value, min) => {
|
|
46
|
+
return Number(value) >= min;
|
|
47
|
+
},
|
|
48
|
+
max: (value, max) => {
|
|
49
|
+
return Number(value) <= max;
|
|
50
|
+
},
|
|
51
|
+
minLength: (value, min) => {
|
|
52
|
+
return String(value).length >= min;
|
|
53
|
+
},
|
|
54
|
+
maxLength: (value, max) => {
|
|
55
|
+
return String(value).length <= max;
|
|
56
|
+
},
|
|
57
|
+
pattern: (value, pattern) => {
|
|
58
|
+
return new RegExp(pattern).test(value);
|
|
59
|
+
},
|
|
60
|
+
enum: (value, values) => {
|
|
61
|
+
return values.includes(value);
|
|
62
|
+
},
|
|
63
|
+
date: (value) => {
|
|
64
|
+
return !isNaN(Date.parse(value));
|
|
65
|
+
},
|
|
66
|
+
future: (value) => {
|
|
67
|
+
return new Date(value) > new Date();
|
|
68
|
+
},
|
|
69
|
+
past: (value) => {
|
|
70
|
+
return new Date(value) < new Date();
|
|
71
|
+
},
|
|
72
|
+
phone: (value) => {
|
|
73
|
+
// 简单的电话号码验证(国际)
|
|
74
|
+
const phoneRegex = /^\+?[\d\s-()]+$/;
|
|
75
|
+
return phoneRegex.test(value) && value.replace(/\D/g, '').length >= 10;
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* 数据验证器
|
|
81
|
+
*/
|
|
82
|
+
class DataValidator {
|
|
83
|
+
constructor() {
|
|
84
|
+
this.rules = new Map(); // fieldName -> ValidationRule[]
|
|
85
|
+
this.customRules = new Map();
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* 添加内置规则
|
|
90
|
+
* @param {string} fieldName - 字段名
|
|
91
|
+
* @param {string} ruleName - 规则名
|
|
92
|
+
* @param {*} args - 规则参数
|
|
93
|
+
* @param {string} errorMessage - 自定义错误信息
|
|
94
|
+
*/
|
|
95
|
+
addRule(fieldName, ruleName, args, errorMessage) {
|
|
96
|
+
if (!this.rules.has(fieldName)) {
|
|
97
|
+
this.rules.set(fieldName, []);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const validator = (value) => {
|
|
101
|
+
const ruleFn = BUILT_IN_RULES[ruleName];
|
|
102
|
+
if (!ruleFn) {
|
|
103
|
+
throw new Error(`未知规则: ${ruleName}`);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// 规则参数
|
|
107
|
+
if (args !== undefined && args !== null) {
|
|
108
|
+
return ruleFn(value, args);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return ruleFn(value);
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
this.rules.get(fieldName).push(new ValidationRule(
|
|
115
|
+
ruleName,
|
|
116
|
+
validator,
|
|
117
|
+
errorMessage
|
|
118
|
+
));
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* 添加自定义规则
|
|
123
|
+
* @param {string} ruleName - 规则名
|
|
124
|
+
* @param {Function} validator - 验证函数
|
|
125
|
+
*/
|
|
126
|
+
addCustomRule(ruleName, validator) {
|
|
127
|
+
this.customRules.set(ruleName, validator);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* 添加正则规则
|
|
132
|
+
* @param {string} fieldName - 字段名
|
|
133
|
+
* @param {string} pattern - 正则表达式
|
|
134
|
+
* @param {string} errorMessage - 错误信息
|
|
135
|
+
*/
|
|
136
|
+
addPatternRule(fieldName, pattern, errorMessage) {
|
|
137
|
+
this.addRule(fieldName, 'pattern', pattern, errorMessage);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* 验证单个字段
|
|
142
|
+
* @param {string} fieldName - 字段名
|
|
143
|
+
* @param {*} value - 字段值
|
|
144
|
+
* @returns {Object} 验证结果 { valid: boolean, errors: Array }
|
|
145
|
+
*/
|
|
146
|
+
validateField(fieldName, value) {
|
|
147
|
+
const errors = [];
|
|
148
|
+
const rules = this.rules.get(fieldName) || [];
|
|
149
|
+
|
|
150
|
+
for (const rule of rules) {
|
|
151
|
+
try {
|
|
152
|
+
const isValid = rule.validator(value);
|
|
153
|
+
if (!isValid) {
|
|
154
|
+
errors.push({
|
|
155
|
+
rule: rule.name,
|
|
156
|
+
message: rule.errorMessage || `${fieldName} 验证失败: ${rule.name}`
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
} catch (error) {
|
|
160
|
+
errors.push({
|
|
161
|
+
rule: rule.name,
|
|
162
|
+
message: `${fieldName} 验证错误: ${error.message}`
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
valid: errors.length === 0,
|
|
169
|
+
errors
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* 验证整个数据对象
|
|
175
|
+
* @param {Object} data - 数据对象
|
|
176
|
+
* @returns {Object} 验证结果 { valid: boolean, fieldResults: Object }
|
|
177
|
+
*/
|
|
178
|
+
validate(data) {
|
|
179
|
+
const fieldResults = {};
|
|
180
|
+
let isValid = true;
|
|
181
|
+
|
|
182
|
+
for (const [fieldName] of this.rules) {
|
|
183
|
+
const result = this.validateField(fieldName, data[fieldName]);
|
|
184
|
+
fieldResults[fieldName] = result;
|
|
185
|
+
if (!result.valid) {
|
|
186
|
+
isValid = false;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
valid: isValid,
|
|
192
|
+
fieldResults
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* 验证数据数组
|
|
198
|
+
* @param {Array} dataArray - 数据数组
|
|
199
|
+
* @returns {Array} 验证结果数组
|
|
200
|
+
*/
|
|
201
|
+
validateArray(dataArray) {
|
|
202
|
+
return dataArray.map((data, index) => ({
|
|
203
|
+
index,
|
|
204
|
+
...this.validate(data)
|
|
205
|
+
}));
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* 批量验证并返回错误记录
|
|
210
|
+
* @param {Array} dataArray - 数据数组
|
|
211
|
+
* @returns {Array} 错误记录
|
|
212
|
+
*/
|
|
213
|
+
getErrors(dataArray) {
|
|
214
|
+
const errors = [];
|
|
215
|
+
|
|
216
|
+
for (let i = 0; i < dataArray.length; i++) {
|
|
217
|
+
const validation = this.validate(dataArray[i]);
|
|
218
|
+
if (!validation.valid) {
|
|
219
|
+
for (const [fieldName, result] of Object.entries(validation.fieldResults)) {
|
|
220
|
+
if (!result.valid) {
|
|
221
|
+
for (const error of result.errors) {
|
|
222
|
+
errors.push({
|
|
223
|
+
row: i,
|
|
224
|
+
field: fieldName,
|
|
225
|
+
rule: error.rule,
|
|
226
|
+
message: error.message,
|
|
227
|
+
value: dataArray[i][fieldName]
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return errors;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* 清除所有规则
|
|
240
|
+
*/
|
|
241
|
+
clear() {
|
|
242
|
+
this.rules.clear();
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* 获取规则数量
|
|
247
|
+
* @returns {number}
|
|
248
|
+
*/
|
|
249
|
+
getRuleCount() {
|
|
250
|
+
let count = 0;
|
|
251
|
+
for (const rules of this.rules.values()) {
|
|
252
|
+
count += rules.length;
|
|
253
|
+
}
|
|
254
|
+
return count;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* 从配置创建验证器
|
|
260
|
+
* @param {Object} config - 验证配置
|
|
261
|
+
* @returns {DataValidator}
|
|
262
|
+
*/
|
|
263
|
+
function createValidatorFromConfig(config) {
|
|
264
|
+
const validator = new DataValidator();
|
|
265
|
+
|
|
266
|
+
for (const [fieldName, fieldRules] of Object.entries(config)) {
|
|
267
|
+
if (Array.isArray(fieldRules)) {
|
|
268
|
+
for (const rule of fieldRules) {
|
|
269
|
+
if (typeof rule === 'string') {
|
|
270
|
+
// 简单规则名
|
|
271
|
+
validator.addRule(fieldName, rule);
|
|
272
|
+
} else if (typeof rule === 'object') {
|
|
273
|
+
// 带参数的规则
|
|
274
|
+
const args = rule.value !== undefined ? rule.value :
|
|
275
|
+
rule.arg !== undefined ? rule.arg :
|
|
276
|
+
rule.params;
|
|
277
|
+
const errorMessage = rule.message;
|
|
278
|
+
|
|
279
|
+
validator.addRule(
|
|
280
|
+
fieldName,
|
|
281
|
+
rule.name || rule.type || rule.rule,
|
|
282
|
+
args,
|
|
283
|
+
errorMessage
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
return validator;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
module.exports = {
|
|
294
|
+
DataValidator,
|
|
295
|
+
ValidationRule,
|
|
296
|
+
BUILT_IN_RULES,
|
|
297
|
+
createValidatorFromConfig
|
|
298
|
+
};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"email": ["required", "email"],
|
|
3
|
+
"age": [
|
|
4
|
+
"required",
|
|
5
|
+
{"name": "number"},
|
|
6
|
+
{"name": "min", "value": 0},
|
|
7
|
+
{"name": "max", "value": 120}
|
|
8
|
+
],
|
|
9
|
+
"phone": [
|
|
10
|
+
"required",
|
|
11
|
+
{"name": "pattern", "value": "^\\d{11}$", "message": "手机号必须是11位数字"}
|
|
12
|
+
],
|
|
13
|
+
"status": [
|
|
14
|
+
{"name": "enum", "value": ["active", "inactive", "pending"]}
|
|
15
|
+
]
|
|
16
|
+
}
|