datly 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.MD +1773 -2386
- package/dist/datly.cjs +1 -1
- package/dist/datly.mjs +1 -1
- package/dist/datly.umd.js +1 -1
- package/package.json +3 -3
- package/src/code.js +2466 -0
- package/src/index.js +236 -480
- package/src/plot.js +609 -0
- package/src/core/dataLoader.js +0 -407
- package/src/core/utils.js +0 -306
- package/src/core/validator.js +0 -205
- package/src/dataviz/index.js +0 -1566
- package/src/descriptive/centralTendency.js +0 -208
- package/src/descriptive/dispersion.js +0 -273
- package/src/descriptive/position.js +0 -268
- package/src/descriptive/shape.js +0 -336
- package/src/inferential/confidenceIntervals.js +0 -561
- package/src/inferential/hypothesisTesting.js +0 -527
- package/src/inferential/normalityTests.js +0 -587
- package/src/insights/autoAnalyser.js +0 -685
- package/src/insights/interpreter.js +0 -543
- package/src/insights/patternDetector.js +0 -897
- package/src/insights/reportGenerator.js +0 -1072
- package/src/ml/ClassificationMetrics.js +0 -336
- package/src/ml/DecisionTree.js +0 -412
- package/src/ml/KNearestNeighbors.js +0 -317
- package/src/ml/LinearRegression.js +0 -179
- package/src/ml/LogisticRegression.js +0 -396
- package/src/ml/MachineLearning.js +0 -490
- package/src/ml/NaiveBayes.js +0 -296
- package/src/ml/RandomForest.js +0 -323
- package/src/ml/SupportVectorMachine.js +0 -299
- package/src/ml/baseModel.js +0 -106
- package/src/multivariate/correlation.js +0 -653
- package/src/multivariate/regression.js +0 -660
@@ -1,1072 +0,0 @@
|
|
1
|
-
class ReportGenerator {
|
2
|
-
summary(dataset) {
|
3
|
-
if (!dataset || !dataset.data || !dataset.headers) {
|
4
|
-
throw new Error('Invalid dataset format');
|
5
|
-
}
|
6
|
-
|
7
|
-
const basicInfo = this.getBasicInfo(dataset);
|
8
|
-
const columnAnalysis = this.analyzeColumns(dataset);
|
9
|
-
const dataQuality = this.assessDataQuality(dataset);
|
10
|
-
const distributions = this.analyzeDistributions(dataset);
|
11
|
-
const relationships = this.analyzeRelationships(dataset);
|
12
|
-
const insights = this.generateKeyInsights(dataset, columnAnalysis, relationships);
|
13
|
-
|
14
|
-
return {
|
15
|
-
title: 'Statistical Summary Report',
|
16
|
-
generatedAt: new Date().toISOString(),
|
17
|
-
basicInfo: basicInfo,
|
18
|
-
columnAnalysis: columnAnalysis,
|
19
|
-
dataQuality: dataQuality,
|
20
|
-
distributions: distributions,
|
21
|
-
relationships: relationships,
|
22
|
-
keyInsights: insights,
|
23
|
-
recommendations: this.generateRecommendations(dataQuality, columnAnalysis, relationships)
|
24
|
-
};
|
25
|
-
}
|
26
|
-
|
27
|
-
getBasicInfo(dataset) {
|
28
|
-
return {
|
29
|
-
totalRows: dataset.length,
|
30
|
-
totalColumns: dataset.columns,
|
31
|
-
headers: dataset.headers,
|
32
|
-
memoryFootprint: this.estimateMemoryFootprint(dataset),
|
33
|
-
dataTypes: this.getDataTypes(dataset)
|
34
|
-
};
|
35
|
-
}
|
36
|
-
|
37
|
-
analyzeColumns(dataset) {
|
38
|
-
const analysis = {};
|
39
|
-
|
40
|
-
dataset.headers.forEach(header => {
|
41
|
-
const column = dataset.data.map(row => row[header]);
|
42
|
-
const columnType = this.inferColumnType(column);
|
43
|
-
|
44
|
-
analysis[header] = {
|
45
|
-
type: columnType,
|
46
|
-
totalCount: column.length,
|
47
|
-
validCount: this.getValidCount(column),
|
48
|
-
nullCount: this.getNullCount(column),
|
49
|
-
uniqueCount: this.getUniqueCount(column),
|
50
|
-
nullPercentage: this.getNullPercentage(column),
|
51
|
-
...this.getTypeSpecificAnalysis(column, columnType)
|
52
|
-
};
|
53
|
-
});
|
54
|
-
|
55
|
-
return analysis;
|
56
|
-
}
|
57
|
-
|
58
|
-
getTypeSpecificAnalysis(column, type) {
|
59
|
-
const validValues = column.filter(val => val !== null && val !== undefined);
|
60
|
-
|
61
|
-
if (type === 'numeric') {
|
62
|
-
const numericValues = validValues.filter(val =>
|
63
|
-
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
64
|
-
);
|
65
|
-
|
66
|
-
if (numericValues.length === 0) return {};
|
67
|
-
|
68
|
-
return {
|
69
|
-
min: Math.min(...numericValues),
|
70
|
-
max: Math.max(...numericValues),
|
71
|
-
mean: this.calculateMean(numericValues),
|
72
|
-
median: this.calculateMedian(numericValues),
|
73
|
-
standardDeviation: this.calculateStandardDeviation(numericValues),
|
74
|
-
variance: this.calculateVariance(numericValues),
|
75
|
-
skewness: this.calculateSkewness(numericValues),
|
76
|
-
kurtosis: this.calculateKurtosis(numericValues),
|
77
|
-
quartiles: this.calculateQuartiles(numericValues),
|
78
|
-
outliers: this.detectOutliers(numericValues),
|
79
|
-
distribution: this.classifyDistribution(numericValues)
|
80
|
-
};
|
81
|
-
} else if (type === 'categorical') {
|
82
|
-
const frequencyTable = this.calculateFrequencyTable(validValues);
|
83
|
-
return {
|
84
|
-
categories: frequencyTable,
|
85
|
-
mostFrequent: this.getMostFrequent(frequencyTable),
|
86
|
-
leastFrequent: this.getLeastFrequent(frequencyTable),
|
87
|
-
entropy: this.calculateEntropy(frequencyTable),
|
88
|
-
concentration: this.calculateConcentration(frequencyTable)
|
89
|
-
};
|
90
|
-
} else if (type === 'datetime') {
|
91
|
-
const dates = validValues.filter(val => !isNaN(new Date(val).getTime()));
|
92
|
-
if (dates.length === 0) return {};
|
93
|
-
|
94
|
-
const timestamps = dates.map(date => new Date(date).getTime());
|
95
|
-
return {
|
96
|
-
earliest: new Date(Math.min(...timestamps)).toISOString(),
|
97
|
-
latest: new Date(Math.max(...timestamps)).toISOString(),
|
98
|
-
span: Math.max(...timestamps) - Math.min(...timestamps),
|
99
|
-
frequency: this.analyzeDateFrequency(dates)
|
100
|
-
};
|
101
|
-
}
|
102
|
-
|
103
|
-
return {};
|
104
|
-
}
|
105
|
-
|
106
|
-
assessDataQuality(dataset) {
|
107
|
-
const issues = [];
|
108
|
-
let overallScore = 100;
|
109
|
-
|
110
|
-
const completenessScore = this.assessCompleteness(dataset);
|
111
|
-
const consistencyScore = this.assessConsistency(dataset);
|
112
|
-
const uniquenessScore = this.assessUniqueness(dataset);
|
113
|
-
const validityScore = this.assessValidity(dataset);
|
114
|
-
|
115
|
-
overallScore = (completenessScore + consistencyScore + uniquenessScore + validityScore) / 4;
|
116
|
-
|
117
|
-
if (completenessScore < 80) {
|
118
|
-
issues.push({
|
119
|
-
type: 'completeness',
|
120
|
-
severity: completenessScore < 50 ? 'high' : 'medium',
|
121
|
-
description: `${(100 - completenessScore).toFixed(1)}% of data is missing`
|
122
|
-
});
|
123
|
-
}
|
124
|
-
|
125
|
-
if (consistencyScore < 80) {
|
126
|
-
issues.push({
|
127
|
-
type: 'consistency',
|
128
|
-
severity: consistencyScore < 50 ? 'high' : 'medium',
|
129
|
-
description: 'Data consistency issues detected'
|
130
|
-
});
|
131
|
-
}
|
132
|
-
|
133
|
-
return {
|
134
|
-
overallScore: overallScore,
|
135
|
-
completenessScore: completenessScore,
|
136
|
-
consistencyScore: consistencyScore,
|
137
|
-
uniquenessScore: uniquenessScore,
|
138
|
-
validityScore: validityScore,
|
139
|
-
issues: issues,
|
140
|
-
recommendation: this.getQualityRecommendation(overallScore)
|
141
|
-
};
|
142
|
-
}
|
143
|
-
|
144
|
-
analyzeDistributions(dataset) {
|
145
|
-
const distributions = {};
|
146
|
-
|
147
|
-
dataset.headers.forEach(header => {
|
148
|
-
const column = dataset.data.map(row => row[header]);
|
149
|
-
const validValues = column.filter(val =>
|
150
|
-
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
151
|
-
);
|
152
|
-
|
153
|
-
if (validValues.length > 5) {
|
154
|
-
distributions[header] = {
|
155
|
-
type: this.classifyDistribution(validValues),
|
156
|
-
normalityTest: this.testNormality(validValues),
|
157
|
-
histogram: this.createHistogram(validValues),
|
158
|
-
descriptiveStats: this.getDescriptiveStats(validValues)
|
159
|
-
};
|
160
|
-
}
|
161
|
-
});
|
162
|
-
|
163
|
-
return distributions;
|
164
|
-
}
|
165
|
-
|
166
|
-
analyzeRelationships(dataset) {
|
167
|
-
const numericColumns = dataset.headers.filter(header => {
|
168
|
-
const column = dataset.data.map(row => row[header]);
|
169
|
-
const numericCount = column.filter(val =>
|
170
|
-
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
171
|
-
).length;
|
172
|
-
return numericCount > column.length * 0.5;
|
173
|
-
});
|
174
|
-
|
175
|
-
if (numericColumns.length < 2) {
|
176
|
-
return { correlations: {}, strongRelationships: [] };
|
177
|
-
}
|
178
|
-
|
179
|
-
const correlations = {};
|
180
|
-
const strongRelationships = [];
|
181
|
-
|
182
|
-
for (let i = 0; i < numericColumns.length; i++) {
|
183
|
-
correlations[numericColumns[i]] = {};
|
184
|
-
for (let j = 0; j < numericColumns.length; j++) {
|
185
|
-
if (i === j) {
|
186
|
-
correlations[numericColumns[i]][numericColumns[j]] = 1;
|
187
|
-
} else {
|
188
|
-
const col1 = dataset.data.map(row => row[numericColumns[i]]);
|
189
|
-
const col2 = dataset.data.map(row => row[numericColumns[j]]);
|
190
|
-
const correlation = this.calculatePearsonCorrelation(col1, col2);
|
191
|
-
correlations[numericColumns[i]][numericColumns[j]] = correlation;
|
192
|
-
|
193
|
-
if (Math.abs(correlation) > 0.7 && i < j) {
|
194
|
-
strongRelationships.push({
|
195
|
-
variable1: numericColumns[i],
|
196
|
-
variable2: numericColumns[j],
|
197
|
-
correlation: correlation,
|
198
|
-
strength: this.getCorrelationStrength(Math.abs(correlation)),
|
199
|
-
direction: correlation > 0 ? 'positive' : 'negative'
|
200
|
-
});
|
201
|
-
}
|
202
|
-
}
|
203
|
-
}
|
204
|
-
}
|
205
|
-
|
206
|
-
return {
|
207
|
-
correlations: correlations,
|
208
|
-
strongRelationships: strongRelationships,
|
209
|
-
averageCorrelation: this.calculateAverageCorrelation(correlations, numericColumns)
|
210
|
-
};
|
211
|
-
}
|
212
|
-
|
213
|
-
generateKeyInsights(dataset, columnAnalysis, relationships) {
|
214
|
-
const insights = [];
|
215
|
-
|
216
|
-
insights.push(...this.generateDataVolumeInsights(dataset));
|
217
|
-
insights.push(...this.generateColumnInsights(columnAnalysis));
|
218
|
-
insights.push(...this.generateRelationshipInsights(relationships));
|
219
|
-
insights.push(...this.generateDistributionInsights(columnAnalysis));
|
220
|
-
insights.push(...this.generateQualityInsights(dataset, columnAnalysis));
|
221
|
-
|
222
|
-
return insights.sort((a, b) => b.importance - a.importance).slice(0, 10);
|
223
|
-
}
|
224
|
-
|
225
|
-
generateDataVolumeInsights(dataset) {
|
226
|
-
const insights = [];
|
227
|
-
|
228
|
-
if (dataset.length > 10000) {
|
229
|
-
insights.push({
|
230
|
-
type: 'volume',
|
231
|
-
title: 'Large Dataset Detected',
|
232
|
-
description: `Dataset contains ${dataset.length.toLocaleString()} rows, which is suitable for robust statistical analysis.`,
|
233
|
-
importance: 7,
|
234
|
-
actionable: false
|
235
|
-
});
|
236
|
-
} else if (dataset.length < 30) {
|
237
|
-
insights.push({
|
238
|
-
type: 'volume',
|
239
|
-
title: 'Small Sample Size Warning',
|
240
|
-
description: `Dataset has only ${dataset.length} rows. Statistical tests may lack power.`,
|
241
|
-
importance: 8,
|
242
|
-
actionable: true,
|
243
|
-
recommendation: 'Consider collecting more data for reliable statistical inference.'
|
244
|
-
});
|
245
|
-
}
|
246
|
-
|
247
|
-
if (dataset.columns > 50) {
|
248
|
-
insights.push({
|
249
|
-
type: 'dimensionality',
|
250
|
-
title: 'High-Dimensional Dataset',
|
251
|
-
description: `Dataset has ${dataset.columns} columns, which may benefit from dimensionality reduction.`,
|
252
|
-
importance: 6,
|
253
|
-
actionable: true,
|
254
|
-
recommendation: 'Consider feature selection or PCA to reduce dimensionality.'
|
255
|
-
});
|
256
|
-
}
|
257
|
-
|
258
|
-
return insights;
|
259
|
-
}
|
260
|
-
|
261
|
-
generateColumnInsights(columnAnalysis) {
|
262
|
-
const insights = [];
|
263
|
-
const columns = Object.keys(columnAnalysis);
|
264
|
-
|
265
|
-
const highNullColumns = columns.filter(col =>
|
266
|
-
columnAnalysis[col].nullPercentage > 25
|
267
|
-
);
|
268
|
-
|
269
|
-
if (highNullColumns.length > 0) {
|
270
|
-
insights.push({
|
271
|
-
type: 'data_quality',
|
272
|
-
title: 'High Missing Data Detected',
|
273
|
-
description: `Columns ${highNullColumns.join(', ')} have >25% missing values.`,
|
274
|
-
importance: 9,
|
275
|
-
actionable: true,
|
276
|
-
recommendation: 'Consider imputation strategies or removing these columns.'
|
277
|
-
});
|
278
|
-
}
|
279
|
-
|
280
|
-
const skewedColumns = columns.filter(col => {
|
281
|
-
const analysis = columnAnalysis[col];
|
282
|
-
return analysis.skewness && Math.abs(analysis.skewness) > 2;
|
283
|
-
});
|
284
|
-
|
285
|
-
if (skewedColumns.length > 0) {
|
286
|
-
insights.push({
|
287
|
-
type: 'distribution',
|
288
|
-
title: 'Highly Skewed Variables Found',
|
289
|
-
description: `Columns ${skewedColumns.join(', ')} show extreme skewness.`,
|
290
|
-
importance: 7,
|
291
|
-
actionable: true,
|
292
|
-
recommendation: 'Consider log transformation or other normalization techniques.'
|
293
|
-
});
|
294
|
-
}
|
295
|
-
|
296
|
-
const constantColumns = columns.filter(col =>
|
297
|
-
columnAnalysis[col].uniqueCount === 1
|
298
|
-
);
|
299
|
-
|
300
|
-
if (constantColumns.length > 0) {
|
301
|
-
insights.push({
|
302
|
-
type: 'data_quality',
|
303
|
-
title: 'Constant Variables Detected',
|
304
|
-
description: `Columns ${constantColumns.join(', ')} have no variation.`,
|
305
|
-
importance: 8,
|
306
|
-
actionable: true,
|
307
|
-
recommendation: 'Remove these columns as they provide no information.'
|
308
|
-
});
|
309
|
-
}
|
310
|
-
|
311
|
-
return insights;
|
312
|
-
}
|
313
|
-
|
314
|
-
generateRelationshipInsights(relationships) {
|
315
|
-
const insights = [];
|
316
|
-
|
317
|
-
if (relationships.strongRelationships.length > 0) {
|
318
|
-
const strongest = relationships.strongRelationships[0];
|
319
|
-
insights.push({
|
320
|
-
type: 'correlation',
|
321
|
-
title: 'Strong Correlation Found',
|
322
|
-
description: `${strongest.variable1} and ${strongest.variable2} have a ${strongest.strength.toLowerCase()} ${strongest.direction} correlation (r = ${strongest.correlation.toFixed(3)}).`,
|
323
|
-
importance: 8,
|
324
|
-
actionable: true,
|
325
|
-
recommendation: 'Investigate this relationship further with regression analysis.'
|
326
|
-
});
|
327
|
-
}
|
328
|
-
|
329
|
-
const multicollinearPairs = relationships.strongRelationships.filter(rel =>
|
330
|
-
Math.abs(rel.correlation) > 0.9
|
331
|
-
);
|
332
|
-
|
333
|
-
if (multicollinearPairs.length > 0) {
|
334
|
-
insights.push({
|
335
|
-
type: 'multicollinearity',
|
336
|
-
title: 'Potential Multicollinearity Detected',
|
337
|
-
description: `Very high correlations found between some variables.`,
|
338
|
-
importance: 7,
|
339
|
-
actionable: true,
|
340
|
-
recommendation: 'Consider removing redundant variables before modeling.'
|
341
|
-
});
|
342
|
-
}
|
343
|
-
|
344
|
-
if (relationships.averageCorrelation > 0.5) {
|
345
|
-
insights.push({
|
346
|
-
type: 'correlation',
|
347
|
-
title: 'Generally High Inter-Variable Correlations',
|
348
|
-
description: `Average correlation is ${relationships.averageCorrelation.toFixed(3)}, indicating related variables.`,
|
349
|
-
importance: 6,
|
350
|
-
actionable: false
|
351
|
-
});
|
352
|
-
}
|
353
|
-
|
354
|
-
return insights;
|
355
|
-
}
|
356
|
-
|
357
|
-
generateDistributionInsights(columnAnalysis) {
|
358
|
-
const insights = [];
|
359
|
-
const numericColumns = Object.keys(columnAnalysis).filter(col =>
|
360
|
-
columnAnalysis[col].type === 'numeric'
|
361
|
-
);
|
362
|
-
|
363
|
-
const normalColumns = numericColumns.filter(col => {
|
364
|
-
const analysis = columnAnalysis[col];
|
365
|
-
return analysis.distribution === 'normal' ||
|
366
|
-
(Math.abs(analysis.skewness || 0) < 0.5 && Math.abs(analysis.kurtosis || 0) < 0.5);
|
367
|
-
});
|
368
|
-
|
369
|
-
if (normalColumns.length > numericColumns.length * 0.7) {
|
370
|
-
insights.push({
|
371
|
-
type: 'distribution',
|
372
|
-
title: 'Most Variables Normally Distributed',
|
373
|
-
description: `${normalColumns.length} out of ${numericColumns.length} numeric variables appear normally distributed.`,
|
374
|
-
importance: 6,
|
375
|
-
actionable: false
|
376
|
-
});
|
377
|
-
}
|
378
|
-
|
379
|
-
const outliersColumns = numericColumns.filter(col => {
|
380
|
-
const analysis = columnAnalysis[col];
|
381
|
-
return analysis.outliers && analysis.outliers.count > 0;
|
382
|
-
});
|
383
|
-
|
384
|
-
if (outliersColumns.length > 0) {
|
385
|
-
const totalOutliers = outliersColumns.reduce((sum, col) =>
|
386
|
-
sum + columnAnalysis[col].outliers.count, 0
|
387
|
-
);
|
388
|
-
|
389
|
-
insights.push({
|
390
|
-
type: 'outliers',
|
391
|
-
title: 'Outliers Detected',
|
392
|
-
description: `Found ${totalOutliers} outliers across ${outliersColumns.length} variables.`,
|
393
|
-
importance: 7,
|
394
|
-
actionable: true,
|
395
|
-
recommendation: 'Investigate outliers to determine if they represent errors or genuine extreme values.'
|
396
|
-
});
|
397
|
-
}
|
398
|
-
|
399
|
-
return insights;
|
400
|
-
}
|
401
|
-
|
402
|
-
generateQualityInsights(dataset, columnAnalysis) {
|
403
|
-
const insights = [];
|
404
|
-
const columns = Object.keys(columnAnalysis);
|
405
|
-
|
406
|
-
const duplicateRows = this.countDuplicateRows(dataset);
|
407
|
-
if (duplicateRows > 0) {
|
408
|
-
insights.push({
|
409
|
-
type: 'data_quality',
|
410
|
-
title: 'Duplicate Rows Found',
|
411
|
-
description: `Dataset contains ${duplicateRows} duplicate rows.`,
|
412
|
-
importance: 8,
|
413
|
-
actionable: true,
|
414
|
-
recommendation: 'Remove duplicate rows to avoid bias in analysis.'
|
415
|
-
});
|
416
|
-
}
|
417
|
-
|
418
|
-
const totalMissingCells = columns.reduce((sum, col) =>
|
419
|
-
sum + columnAnalysis[col].nullCount, 0
|
420
|
-
);
|
421
|
-
const totalCells = dataset.length * dataset.columns;
|
422
|
-
const missingPercentage = (totalMissingCells / totalCells) * 100;
|
423
|
-
|
424
|
-
if (missingPercentage > 10) {
|
425
|
-
insights.push({
|
426
|
-
type: 'data_quality',
|
427
|
-
title: 'Significant Missing Data',
|
428
|
-
description: `${missingPercentage.toFixed(1)}% of all data points are missing.`,
|
429
|
-
importance: 9,
|
430
|
-
actionable: true,
|
431
|
-
recommendation: 'Develop a comprehensive missing data strategy.'
|
432
|
-
});
|
433
|
-
}
|
434
|
-
|
435
|
-
return insights;
|
436
|
-
}
|
437
|
-
|
438
|
-
generateRecommendations(dataQuality, columnAnalysis, relationships) {
|
439
|
-
const recommendations = [];
|
440
|
-
|
441
|
-
if (dataQuality.overallScore < 70) {
|
442
|
-
recommendations.push({
|
443
|
-
priority: 'high',
|
444
|
-
category: 'data_cleaning',
|
445
|
-
title: 'Improve Data Quality',
|
446
|
-
description: 'Address missing values, outliers, and inconsistencies before analysis.',
|
447
|
-
steps: [
|
448
|
-
'Handle missing values through imputation or removal',
|
449
|
-
'Investigate and address outliers',
|
450
|
-
'Standardize data formats and categories',
|
451
|
-
'Validate data integrity'
|
452
|
-
]
|
453
|
-
});
|
454
|
-
}
|
455
|
-
|
456
|
-
if (relationships.strongRelationships.length > 0) {
|
457
|
-
recommendations.push({
|
458
|
-
priority: 'medium',
|
459
|
-
category: 'analysis',
|
460
|
-
title: 'Explore Strong Relationships',
|
461
|
-
description: 'Investigate detected correlations with deeper analysis.',
|
462
|
-
steps: [
|
463
|
-
'Perform regression analysis on highly correlated variables',
|
464
|
-
'Create visualizations to understand relationships',
|
465
|
-
'Test for causality where appropriate'
|
466
|
-
]
|
467
|
-
});
|
468
|
-
}
|
469
|
-
|
470
|
-
const numericColumns = Object.keys(columnAnalysis).filter(col =>
|
471
|
-
columnAnalysis[col].type === 'numeric'
|
472
|
-
).length;
|
473
|
-
|
474
|
-
if (numericColumns > 2) {
|
475
|
-
recommendations.push({
|
476
|
-
priority: 'low',
|
477
|
-
category: 'modeling',
|
478
|
-
title: 'Consider Advanced Analytics',
|
479
|
-
description: 'Dataset is suitable for machine learning approaches.',
|
480
|
-
steps: [
|
481
|
-
'Perform feature selection',
|
482
|
-
'Try different modeling approaches',
|
483
|
-
'Validate models with cross-validation',
|
484
|
-
'Interpret model results'
|
485
|
-
]
|
486
|
-
});
|
487
|
-
}
|
488
|
-
|
489
|
-
return recommendations;
|
490
|
-
}
|
491
|
-
|
492
|
-
inferColumnType(column) {
|
493
|
-
const validValues = column.filter(val => val !== null && val !== undefined);
|
494
|
-
if (validValues.length === 0) return 'unknown';
|
495
|
-
|
496
|
-
const numericCount = validValues.filter(val =>
|
497
|
-
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
498
|
-
).length;
|
499
|
-
|
500
|
-
const dateCount = validValues.filter(val => {
|
501
|
-
if (typeof val === 'string') {
|
502
|
-
const date = new Date(val);
|
503
|
-
return !isNaN(date.getTime());
|
504
|
-
}
|
505
|
-
return false;
|
506
|
-
}).length;
|
507
|
-
|
508
|
-
if (numericCount / validValues.length > 0.8) return 'numeric';
|
509
|
-
if (dateCount / validValues.length > 0.8) return 'datetime';
|
510
|
-
|
511
|
-
return 'categorical';
|
512
|
-
}
|
513
|
-
|
514
|
-
getValidCount(column) {
|
515
|
-
return column.filter(val => val !== null && val !== undefined).length;
|
516
|
-
}
|
517
|
-
|
518
|
-
getNullCount(column) {
|
519
|
-
return column.filter(val => val === null || val === undefined).length;
|
520
|
-
}
|
521
|
-
|
522
|
-
getUniqueCount(column) {
|
523
|
-
const validValues = column.filter(val => val !== null && val !== undefined);
|
524
|
-
return new Set(validValues).size;
|
525
|
-
}
|
526
|
-
|
527
|
-
getNullPercentage(column) {
|
528
|
-
return (this.getNullCount(column) / column.length) * 100;
|
529
|
-
}
|
530
|
-
|
531
|
-
calculateMean(values) {
|
532
|
-
return values.reduce((sum, val) => sum + val, 0) / values.length;
|
533
|
-
}
|
534
|
-
|
535
|
-
calculateMedian(values) {
|
536
|
-
const sorted = [...values].sort((a, b) => a - b);
|
537
|
-
const mid = Math.floor(sorted.length / 2);
|
538
|
-
return sorted.length % 2 === 0 ?
|
539
|
-
(sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
|
540
|
-
}
|
541
|
-
|
542
|
-
calculateStandardDeviation(values) {
|
543
|
-
const mean = this.calculateMean(values);
|
544
|
-
const variance = values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / (values.length - 1);
|
545
|
-
return Math.sqrt(variance);
|
546
|
-
}
|
547
|
-
|
548
|
-
calculateVariance(values) {
|
549
|
-
const mean = this.calculateMean(values);
|
550
|
-
return values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / (values.length - 1);
|
551
|
-
}
|
552
|
-
|
553
|
-
calculateSkewness(values) {
|
554
|
-
const n = values.length;
|
555
|
-
const mean = this.calculateMean(values);
|
556
|
-
const stdDev = this.calculateStandardDeviation(values);
|
557
|
-
|
558
|
-
if (stdDev === 0) return 0;
|
559
|
-
|
560
|
-
const skewSum = values.reduce((sum, val) => {
|
561
|
-
return sum + Math.pow((val - mean) / stdDev, 3);
|
562
|
-
}, 0);
|
563
|
-
|
564
|
-
return (n / ((n - 1) * (n - 2))) * skewSum;
|
565
|
-
}
|
566
|
-
|
567
|
-
calculateKurtosis(values) {
|
568
|
-
const n = values.length;
|
569
|
-
const mean = this.calculateMean(values);
|
570
|
-
const stdDev = this.calculateStandardDeviation(values);
|
571
|
-
|
572
|
-
if (stdDev === 0) return -3;
|
573
|
-
|
574
|
-
const kurtSum = values.reduce((sum, val) => {
|
575
|
-
return sum + Math.pow((val - mean) / stdDev, 4);
|
576
|
-
}, 0);
|
577
|
-
|
578
|
-
return ((n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3))) * kurtSum -
|
579
|
-
(3 * Math.pow(n - 1, 2)) / ((n - 2) * (n - 3));
|
580
|
-
}
|
581
|
-
|
582
|
-
calculateQuartiles(values) {
|
583
|
-
const sorted = [...values].sort((a, b) => a - b);
|
584
|
-
const n = sorted.length;
|
585
|
-
|
586
|
-
const q1Index = Math.floor(n * 0.25);
|
587
|
-
const q2Index = Math.floor(n * 0.5);
|
588
|
-
const q3Index = Math.floor(n * 0.75);
|
589
|
-
|
590
|
-
return {
|
591
|
-
q1: sorted[q1Index],
|
592
|
-
q2: sorted[q2Index],
|
593
|
-
q3: sorted[q3Index],
|
594
|
-
iqr: sorted[q3Index] - sorted[q1Index]
|
595
|
-
};
|
596
|
-
}
|
597
|
-
|
598
|
-
detectOutliers(values) {
|
599
|
-
const quartiles = this.calculateQuartiles(values);
|
600
|
-
const lowerBound = quartiles.q1 - 1.5 * quartiles.iqr;
|
601
|
-
const upperBound = quartiles.q3 + 1.5 * quartiles.iqr;
|
602
|
-
|
603
|
-
const outliers = values.filter(val => val < lowerBound || val > upperBound);
|
604
|
-
|
605
|
-
return {
|
606
|
-
count: outliers.length,
|
607
|
-
percentage: (outliers.length / values.length) * 100,
|
608
|
-
values: outliers,
|
609
|
-
lowerBound: lowerBound,
|
610
|
-
upperBound: upperBound
|
611
|
-
};
|
612
|
-
}
|
613
|
-
|
614
|
-
classifyDistribution(values) {
|
615
|
-
const skewness = this.calculateSkewness(values);
|
616
|
-
const kurtosis = this.calculateKurtosis(values);
|
617
|
-
|
618
|
-
if (Math.abs(skewness) < 0.5 && Math.abs(kurtosis) < 0.5) return 'normal';
|
619
|
-
if (skewness > 1) return 'right_skewed';
|
620
|
-
if (skewness < -1) return 'left_skewed';
|
621
|
-
if (kurtosis > 1) return 'heavy_tailed';
|
622
|
-
if (kurtosis < -1) return 'light_tailed';
|
623
|
-
|
624
|
-
return 'unknown';
|
625
|
-
}
|
626
|
-
|
627
|
-
calculateFrequencyTable(values) {
|
628
|
-
const frequencies = {};
|
629
|
-
values.forEach(value => {
|
630
|
-
const key = String(value);
|
631
|
-
frequencies[key] = (frequencies[key] || 0) + 1;
|
632
|
-
});
|
633
|
-
|
634
|
-
const total = values.length;
|
635
|
-
return Object.entries(frequencies).map(([value, count]) => ({
|
636
|
-
value: value,
|
637
|
-
count: count,
|
638
|
-
percentage: (count / total) * 100
|
639
|
-
})).sort((a, b) => b.count - a.count);
|
640
|
-
}
|
641
|
-
|
642
|
-
getMostFrequent(frequencyTable) {
|
643
|
-
return frequencyTable[0] || null;
|
644
|
-
}
|
645
|
-
|
646
|
-
getLeastFrequent(frequencyTable) {
|
647
|
-
return frequencyTable[frequencyTable.length - 1] || null;
|
648
|
-
}
|
649
|
-
|
650
|
-
calculateEntropy(frequencyTable) {
|
651
|
-
const total = frequencyTable.reduce((sum, item) => sum + item.count, 0);
|
652
|
-
return frequencyTable.reduce((entropy, item) => {
|
653
|
-
const probability = item.count / total;
|
654
|
-
return entropy - probability * Math.log2(probability);
|
655
|
-
}, 0);
|
656
|
-
}
|
657
|
-
|
658
|
-
calculateConcentration(frequencyTable) {
|
659
|
-
const total = frequencyTable.reduce((sum, item) => sum + item.count, 0);
|
660
|
-
const topCategory = frequencyTable[0];
|
661
|
-
return topCategory ? (topCategory.count / total) * 100 : 0;
|
662
|
-
}
|
663
|
-
|
664
|
-
analyzeDateFrequency(dates) {
|
665
|
-
const timestamps = dates.map(date => new Date(date).getTime());
|
666
|
-
const sorted = timestamps.sort((a, b) => a - b);
|
667
|
-
|
668
|
-
if (sorted.length < 2) return 'insufficient_data';
|
669
|
-
|
670
|
-
const intervals = [];
|
671
|
-
for (let i = 1; i < sorted.length; i++) {
|
672
|
-
intervals.push(sorted[i] - sorted[i - 1]);
|
673
|
-
}
|
674
|
-
|
675
|
-
const avgInterval = intervals.reduce((sum, interval) => sum + interval, 0) / intervals.length;
|
676
|
-
const dayInMs = 24 * 60 * 60 * 1000;
|
677
|
-
|
678
|
-
if (avgInterval < dayInMs) return 'sub_daily';
|
679
|
-
if (avgInterval < 7 * dayInMs) return 'daily';
|
680
|
-
if (avgInterval < 30 * dayInMs) return 'weekly';
|
681
|
-
if (avgInterval < 365 * dayInMs) return 'monthly';
|
682
|
-
|
683
|
-
return 'yearly';
|
684
|
-
}
|
685
|
-
|
686
|
-
assessCompleteness(dataset) {
|
687
|
-
const totalCells = dataset.length * dataset.columns;
|
688
|
-
let completeCells = 0;
|
689
|
-
|
690
|
-
dataset.data.forEach(row => {
|
691
|
-
dataset.headers.forEach(header => {
|
692
|
-
if (row[header] !== null && row[header] !== undefined) {
|
693
|
-
completeCells++;
|
694
|
-
}
|
695
|
-
});
|
696
|
-
});
|
697
|
-
|
698
|
-
return (completeCells / totalCells) * 100;
|
699
|
-
}
|
700
|
-
|
701
|
-
assessConsistency(dataset) {
|
702
|
-
let score = 100;
|
703
|
-
|
704
|
-
dataset.headers.forEach(header => {
|
705
|
-
const column = dataset.data.map(row => row[header]);
|
706
|
-
const types = new Set(column.filter(val => val !== null && val !== undefined)
|
707
|
-
.map(val => typeof val));
|
708
|
-
|
709
|
-
if (types.size > 1) {
|
710
|
-
score -= 10;
|
711
|
-
}
|
712
|
-
});
|
713
|
-
|
714
|
-
return Math.max(0, score);
|
715
|
-
}
|
716
|
-
|
717
|
-
assessUniqueness(dataset) {
|
718
|
-
const duplicates = this.countDuplicateRows(dataset);
|
719
|
-
return Math.max(0, 100 - (duplicates / dataset.length) * 100);
|
720
|
-
}
|
721
|
-
|
722
|
-
assessValidity(dataset) {
|
723
|
-
let score = 100;
|
724
|
-
let totalValues = 0;
|
725
|
-
let invalidValues = 0;
|
726
|
-
|
727
|
-
dataset.data.forEach(row => {
|
728
|
-
dataset.headers.forEach(header => {
|
729
|
-
const value = row[header];
|
730
|
-
if (value !== null && value !== undefined) {
|
731
|
-
totalValues++;
|
732
|
-
if (typeof value === 'number' && !isFinite(value)) {
|
733
|
-
invalidValues++;
|
734
|
-
}
|
735
|
-
}
|
736
|
-
});
|
737
|
-
});
|
738
|
-
|
739
|
-
if (totalValues > 0) {
|
740
|
-
score = Math.max(0, 100 - (invalidValues / totalValues) * 100);
|
741
|
-
}
|
742
|
-
|
743
|
-
return score;
|
744
|
-
}
|
745
|
-
|
746
|
-
getQualityRecommendation(score) {
|
747
|
-
if (score >= 90) return 'Excellent data quality - ready for analysis';
|
748
|
-
if (score >= 80) return 'Good data quality - minor cleaning recommended';
|
749
|
-
if (score >= 70) return 'Fair data quality - significant cleaning needed';
|
750
|
-
if (score >= 60) return 'Poor data quality - extensive preprocessing required';
|
751
|
-
return 'Very poor data quality - major data work needed before analysis';
|
752
|
-
}
|
753
|
-
|
754
|
-
testNormality(values) {
|
755
|
-
if (values.length < 8) return { test: 'insufficient_data' };
|
756
|
-
|
757
|
-
const mean = this.calculateMean(values);
|
758
|
-
const stdDev = this.calculateStandardDeviation(values);
|
759
|
-
const skewness = this.calculateSkewness(values);
|
760
|
-
const kurtosis = this.calculateKurtosis(values);
|
761
|
-
|
762
|
-
const jarqueBera = (values.length / 6) * (Math.pow(skewness, 2) + Math.pow(kurtosis, 2) / 4);
|
763
|
-
const pValue = 1 - this.chiSquareCDF(jarqueBera, 2);
|
764
|
-
|
765
|
-
return {
|
766
|
-
test: 'jarque_bera',
|
767
|
-
statistic: jarqueBera,
|
768
|
-
pValue: pValue,
|
769
|
-
isNormal: pValue > 0.05,
|
770
|
-
skewness: skewness,
|
771
|
-
kurtosis: kurtosis
|
772
|
-
};
|
773
|
-
}
|
774
|
-
|
775
|
-
createHistogram(values, bins = 10) {
|
776
|
-
const min = Math.min(...values);
|
777
|
-
const max = Math.max(...values);
|
778
|
-
const binWidth = (max - min) / bins;
|
779
|
-
|
780
|
-
const histogram = Array(bins).fill(0);
|
781
|
-
|
782
|
-
values.forEach(value => {
|
783
|
-
let binIndex = Math.floor((value - min) / binWidth);
|
784
|
-
if (binIndex === bins) binIndex = bins - 1;
|
785
|
-
histogram[binIndex]++;
|
786
|
-
});
|
787
|
-
|
788
|
-
return histogram.map((count, index) => ({
|
789
|
-
binStart: min + index * binWidth,
|
790
|
-
binEnd: min + (index + 1) * binWidth,
|
791
|
-
count: count,
|
792
|
-
percentage: (count / values.length) * 100
|
793
|
-
}));
|
794
|
-
}
|
795
|
-
|
796
|
-
getDescriptiveStats(values) {
|
797
|
-
return {
|
798
|
-
count: values.length,
|
799
|
-
mean: this.calculateMean(values),
|
800
|
-
median: this.calculateMedian(values),
|
801
|
-
min: Math.min(...values),
|
802
|
-
max: Math.max(...values),
|
803
|
-
std: this.calculateStandardDeviation(values),
|
804
|
-
var: this.calculateVariance(values),
|
805
|
-
skewness: this.calculateSkewness(values),
|
806
|
-
kurtosis: this.calculateKurtosis(values)
|
807
|
-
};
|
808
|
-
}
|
809
|
-
|
810
|
-
calculatePearsonCorrelation(x, y) {
|
811
|
-
const validPairs = [];
|
812
|
-
for (let i = 0; i < x.length; i++) {
|
813
|
-
if (typeof x[i] === 'number' && typeof y[i] === 'number' &&
|
814
|
-
!isNaN(x[i]) && !isNaN(y[i]) && isFinite(x[i]) && isFinite(y[i])) {
|
815
|
-
validPairs.push({ x: x[i], y: y[i] });
|
816
|
-
}
|
817
|
-
}
|
818
|
-
|
819
|
-
if (validPairs.length < 3) return 0;
|
820
|
-
|
821
|
-
const n = validPairs.length;
|
822
|
-
const xValues = validPairs.map(pair => pair.x);
|
823
|
-
const yValues = validPairs.map(pair => pair.y);
|
824
|
-
|
825
|
-
const meanX = xValues.reduce((sum, val) => sum + val, 0) / n;
|
826
|
-
const meanY = yValues.reduce((sum, val) => sum + val, 0) / n;
|
827
|
-
|
828
|
-
let numerator = 0;
|
829
|
-
let sumXSquared = 0;
|
830
|
-
let sumYSquared = 0;
|
831
|
-
|
832
|
-
for (let i = 0; i < n; i++) {
|
833
|
-
const xDiff = xValues[i] - meanX;
|
834
|
-
const yDiff = yValues[i] - meanY;
|
835
|
-
numerator += xDiff * yDiff;
|
836
|
-
sumXSquared += xDiff * xDiff;
|
837
|
-
sumYSquared += yDiff * yDiff;
|
838
|
-
}
|
839
|
-
|
840
|
-
const denominator = Math.sqrt(sumXSquared * sumYSquared);
|
841
|
-
return denominator === 0 ? 0 : numerator / denominator;
|
842
|
-
}
|
843
|
-
|
844
|
-
getCorrelationStrength(absCorrelation) {
|
845
|
-
if (absCorrelation >= 0.9) return 'Very Strong';
|
846
|
-
if (absCorrelation >= 0.7) return 'Strong';
|
847
|
-
if (absCorrelation >= 0.5) return 'Moderate';
|
848
|
-
if (absCorrelation >= 0.3) return 'Weak';
|
849
|
-
return 'Very Weak';
|
850
|
-
}
|
851
|
-
|
852
|
-
calculateAverageCorrelation(correlations, columns) {
|
853
|
-
let sum = 0;
|
854
|
-
let count = 0;
|
855
|
-
|
856
|
-
for (let i = 0; i < columns.length; i++) {
|
857
|
-
for (let j = i + 1; j < columns.length; j++) {
|
858
|
-
const correlation = correlations[columns[i]][columns[j]];
|
859
|
-
if (!isNaN(correlation)) {
|
860
|
-
sum += Math.abs(correlation);
|
861
|
-
count++;
|
862
|
-
}
|
863
|
-
}
|
864
|
-
}
|
865
|
-
|
866
|
-
return count > 0 ? sum / count : 0;
|
867
|
-
}
|
868
|
-
|
869
|
-
countDuplicateRows(dataset) {
|
870
|
-
const seen = new Set();
|
871
|
-
let duplicates = 0;
|
872
|
-
|
873
|
-
dataset.data.forEach(row => {
|
874
|
-
const rowString = JSON.stringify(row);
|
875
|
-
if (seen.has(rowString)) {
|
876
|
-
duplicates++;
|
877
|
-
} else {
|
878
|
-
seen.add(rowString);
|
879
|
-
}
|
880
|
-
});
|
881
|
-
|
882
|
-
return duplicates;
|
883
|
-
}
|
884
|
-
|
885
|
-
getDataTypes(dataset) {
|
886
|
-
const types = {};
|
887
|
-
|
888
|
-
dataset.headers.forEach(header => {
|
889
|
-
const column = dataset.data.map(row => row[header]);
|
890
|
-
const validValues = column.filter(val => val !== null && val !== undefined);
|
891
|
-
|
892
|
-
if (validValues.length === 0) {
|
893
|
-
types[header] = 'empty';
|
894
|
-
return;
|
895
|
-
}
|
896
|
-
|
897
|
-
const typeSet = new Set(validValues.map(val => typeof val));
|
898
|
-
if (typeSet.size === 1) {
|
899
|
-
types[header] = Array.from(typeSet)[0];
|
900
|
-
} else {
|
901
|
-
types[header] = 'mixed';
|
902
|
-
}
|
903
|
-
});
|
904
|
-
|
905
|
-
return types;
|
906
|
-
}
|
907
|
-
|
908
|
-
estimateMemoryFootprint(dataset) {
|
909
|
-
let totalBytes = 0;
|
910
|
-
|
911
|
-
dataset.data.forEach(row => {
|
912
|
-
dataset.headers.forEach(header => {
|
913
|
-
const value = row[header];
|
914
|
-
if (typeof value === 'string') {
|
915
|
-
totalBytes += value.length * 2;
|
916
|
-
} else if (typeof value === 'number') {
|
917
|
-
totalBytes += 8;
|
918
|
-
} else if (typeof value === 'boolean') {
|
919
|
-
totalBytes += 1;
|
920
|
-
} else {
|
921
|
-
totalBytes += 8;
|
922
|
-
}
|
923
|
-
});
|
924
|
-
});
|
925
|
-
|
926
|
-
const sizeInKB = totalBytes / 1024;
|
927
|
-
const sizeInMB = sizeInKB / 1024;
|
928
|
-
|
929
|
-
if (sizeInMB >= 1) {
|
930
|
-
return `${sizeInMB.toFixed(2)} MB`;
|
931
|
-
} else if (sizeInKB >= 1) {
|
932
|
-
return `${sizeInKB.toFixed(2)} KB`;
|
933
|
-
} else {
|
934
|
-
return `${totalBytes} bytes`;
|
935
|
-
}
|
936
|
-
}
|
937
|
-
|
938
|
-
chiSquareCDF(x, df) {
|
939
|
-
if (x <= 0) return 0;
|
940
|
-
return this.incompleteGamma(df / 2, x / 2) / this.gamma(df / 2);
|
941
|
-
}
|
942
|
-
|
943
|
-
incompleteGamma(a, x) {
|
944
|
-
if (x <= 0) return 0;
|
945
|
-
|
946
|
-
let sum = 1;
|
947
|
-
let term = 1;
|
948
|
-
|
949
|
-
for (let n = 1; n < 100; n++) {
|
950
|
-
term *= x / (a + n - 1);
|
951
|
-
sum += term;
|
952
|
-
if (Math.abs(term) < 1e-12) break;
|
953
|
-
}
|
954
|
-
|
955
|
-
return Math.pow(x, a) * Math.exp(-x) * sum;
|
956
|
-
}
|
957
|
-
|
958
|
-
gamma(x) {
|
959
|
-
const coefficients = [
|
960
|
-
0.99999999999980993, 676.5203681218851, -1259.1392167224028,
|
961
|
-
771.32342877765313, -176.61502916214059, 12.507343278686905,
|
962
|
-
-0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7
|
963
|
-
];
|
964
|
-
|
965
|
-
if (x < 0.5) {
|
966
|
-
return Math.PI / (Math.sin(Math.PI * x) * this.gamma(1 - x));
|
967
|
-
}
|
968
|
-
|
969
|
-
x -= 1;
|
970
|
-
let result = coefficients[0];
|
971
|
-
for (let i = 1; i < coefficients.length; i++) {
|
972
|
-
result += coefficients[i] / (x + i);
|
973
|
-
}
|
974
|
-
|
975
|
-
const t = x + coefficients.length - 1.5;
|
976
|
-
return Math.sqrt(2 * Math.PI) * Math.pow(t, x + 0.5) * Math.exp(-t) * result;
|
977
|
-
}
|
978
|
-
|
979
|
-
generateTextReport(summaryData) {
|
980
|
-
let report = '';
|
981
|
-
|
982
|
-
report += `STATISTICAL SUMMARY REPORT\n`;
|
983
|
-
report += `Generated: ${new Date(summaryData.generatedAt).toLocaleString()}\n`;
|
984
|
-
report += `${'='.repeat(50)}\n\n`;
|
985
|
-
|
986
|
-
report += `BASIC INFORMATION\n`;
|
987
|
-
report += `-`.repeat(20) + '\n';
|
988
|
-
report += `Rows: ${summaryData.basicInfo.totalRows.toLocaleString()}\n`;
|
989
|
-
report += `Columns: ${summaryData.basicInfo.totalColumns}\n`;
|
990
|
-
report += `Memory: ${summaryData.basicInfo.memoryFootprint}\n\n`;
|
991
|
-
|
992
|
-
report += `DATA QUALITY\n`;
|
993
|
-
report += `-`.repeat(20) + '\n';
|
994
|
-
report += `Overall Score: ${summaryData.dataQuality.overallScore.toFixed(1)}/100\n`;
|
995
|
-
report += `Completeness: ${summaryData.dataQuality.completenessScore.toFixed(1)}%\n`;
|
996
|
-
report += `Consistency: ${summaryData.dataQuality.consistencyScore.toFixed(1)}%\n`;
|
997
|
-
report += `${summaryData.dataQuality.recommendation}\n\n`;
|
998
|
-
|
999
|
-
if (summaryData.keyInsights.length > 0) {
|
1000
|
-
report += `KEY INSIGHTS\n`;
|
1001
|
-
report += `-`.repeat(20) + '\n';
|
1002
|
-
summaryData.keyInsights.slice(0, 5).forEach((insight, index) => {
|
1003
|
-
report += `${index + 1}. ${insight.title}\n`;
|
1004
|
-
report += ` ${insight.description}\n`;
|
1005
|
-
if (insight.recommendation) {
|
1006
|
-
report += ` → ${insight.recommendation}\n`;
|
1007
|
-
}
|
1008
|
-
report += '\n';
|
1009
|
-
});
|
1010
|
-
}
|
1011
|
-
|
1012
|
-
if (summaryData.relationships.strongRelationships.length > 0) {
|
1013
|
-
report += `STRONG RELATIONSHIPS\n`;
|
1014
|
-
report += `-`.repeat(20) + '\n';
|
1015
|
-
summaryData.relationships.strongRelationships.slice(0, 3).forEach(rel => {
|
1016
|
-
report += `${rel.variable1} ↔ ${rel.variable2}: ${rel.correlation.toFixed(3)} (${rel.strength})\n`;
|
1017
|
-
});
|
1018
|
-
report += '\n';
|
1019
|
-
}
|
1020
|
-
|
1021
|
-
if (summaryData.recommendations.length > 0) {
|
1022
|
-
report += `RECOMMENDATIONS\n`;
|
1023
|
-
report += `-`.repeat(20) + '\n';
|
1024
|
-
summaryData.recommendations.forEach((rec, index) => {
|
1025
|
-
report += `${index + 1}. [${rec.priority.toUpperCase()}] ${rec.title}\n`;
|
1026
|
-
report += ` ${rec.description}\n`;
|
1027
|
-
rec.steps.forEach(step => {
|
1028
|
-
report += ` • ${step}\n`;
|
1029
|
-
});
|
1030
|
-
report += '\n';
|
1031
|
-
});
|
1032
|
-
}
|
1033
|
-
|
1034
|
-
return report;
|
1035
|
-
}
|
1036
|
-
|
1037
|
-
exportSummary(summaryData, format = 'json') {
|
1038
|
-
switch (format) {
|
1039
|
-
case 'json':
|
1040
|
-
return JSON.stringify(summaryData, null, 2);
|
1041
|
-
case 'text':
|
1042
|
-
return this.generateTextReport(summaryData);
|
1043
|
-
case 'csv':
|
1044
|
-
return this.generateCSVReport(summaryData);
|
1045
|
-
default:
|
1046
|
-
throw new Error(`Unsupported export format: ${format}`);
|
1047
|
-
}
|
1048
|
-
}
|
1049
|
-
|
1050
|
-
generateCSVReport(summaryData) {
|
1051
|
-
let csv = 'Metric,Value\n';
|
1052
|
-
|
1053
|
-
csv += `Total Rows,${summaryData.basicInfo.totalRows}\n`;
|
1054
|
-
csv += `Total Columns,${summaryData.basicInfo.totalColumns}\n`;
|
1055
|
-
csv += `Overall Quality Score,${summaryData.dataQuality.overallScore.toFixed(1)}\n`;
|
1056
|
-
csv += `Completeness Score,${summaryData.dataQuality.completenessScore.toFixed(1)}\n`;
|
1057
|
-
csv += `Consistency Score,${summaryData.dataQuality.consistencyScore.toFixed(1)}\n`;
|
1058
|
-
csv += `Strong Relationships,${summaryData.relationships.strongRelationships.length}\n`;
|
1059
|
-
csv += `Key Insights,${summaryData.keyInsights.length}\n`;
|
1060
|
-
|
1061
|
-
if (summaryData.relationships.strongRelationships.length > 0) {
|
1062
|
-
csv += '\nVariable 1,Variable 2,Correlation,Strength\n';
|
1063
|
-
summaryData.relationships.strongRelationships.forEach(rel => {
|
1064
|
-
csv += `${rel.variable1},${rel.variable2},${rel.correlation.toFixed(4)},${rel.strength}\n`;
|
1065
|
-
});
|
1066
|
-
}
|
1067
|
-
|
1068
|
-
return csv;
|
1069
|
-
}
|
1070
|
-
}
|
1071
|
-
|
1072
|
-
export default ReportGenerator;
|