mintwaterfall 0.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +223 -0
- package/CONTRIBUTING.md +199 -0
- package/README.md +363 -0
- package/dist/index.d.ts +149 -0
- package/dist/mintwaterfall.cjs.js +7978 -0
- package/dist/mintwaterfall.esm.js +7907 -0
- package/dist/mintwaterfall.min.js +7 -0
- package/dist/mintwaterfall.umd.js +7978 -0
- package/index.d.ts +149 -0
- package/package.json +126 -0
- package/src/enterprise/enterprise-core.js +0 -0
- package/src/enterprise/enterprise-feature-template.js +0 -0
- package/src/enterprise/feature-registry.js +0 -0
- package/src/enterprise/features/breakdown.js +0 -0
- package/src/features/breakdown.js +0 -0
- package/src/features/conditional-formatting.js +0 -0
- package/src/index.js +111 -0
- package/src/mintwaterfall-accessibility.ts +680 -0
- package/src/mintwaterfall-advanced-data.ts +1034 -0
- package/src/mintwaterfall-advanced-interactions.ts +649 -0
- package/src/mintwaterfall-advanced-performance.ts +582 -0
- package/src/mintwaterfall-animations.ts +595 -0
- package/src/mintwaterfall-brush.ts +471 -0
- package/src/mintwaterfall-chart-core.ts +296 -0
- package/src/mintwaterfall-chart.ts +1915 -0
- package/src/mintwaterfall-data.ts +1100 -0
- package/src/mintwaterfall-export.ts +475 -0
- package/src/mintwaterfall-hierarchical-layouts.ts +724 -0
- package/src/mintwaterfall-layouts.ts +647 -0
- package/src/mintwaterfall-performance.ts +573 -0
- package/src/mintwaterfall-scales.ts +437 -0
- package/src/mintwaterfall-shapes.ts +385 -0
- package/src/mintwaterfall-statistics.ts +821 -0
- package/src/mintwaterfall-themes.ts +391 -0
- package/src/mintwaterfall-tooltip.ts +450 -0
- package/src/mintwaterfall-zoom.ts +399 -0
- package/src/types/js-modules.d.ts +25 -0
- package/src/utils/compatibility-layer.js +0 -0
|
@@ -0,0 +1,821 @@
|
|
|
1
|
+
// MintWaterfall Advanced Statistical Analysis - TypeScript Version
|
|
2
|
+
// Provides comprehensive statistical analysis features for waterfall chart data
|
|
3
|
+
|
|
4
|
+
import * as d3 from 'd3';
|
|
5
|
+
import { median, variance, deviation, quantile, bisector, ascending } from 'd3-array';
|
|
6
|
+
|
|
7
|
+
// ============================================================================
|
|
8
|
+
// TYPE DEFINITIONS
|
|
9
|
+
// ============================================================================
|
|
10
|
+
|
|
11
|
+
export interface StatisticalSummary {
|
|
12
|
+
count: number;
|
|
13
|
+
sum: number;
|
|
14
|
+
mean: number;
|
|
15
|
+
median: number;
|
|
16
|
+
mode: number[];
|
|
17
|
+
variance: number;
|
|
18
|
+
standardDeviation: number;
|
|
19
|
+
min: number;
|
|
20
|
+
max: number;
|
|
21
|
+
range: number;
|
|
22
|
+
quartiles: number[];
|
|
23
|
+
percentiles: {
|
|
24
|
+
p5: number;
|
|
25
|
+
p10: number;
|
|
26
|
+
p25: number;
|
|
27
|
+
p75: number;
|
|
28
|
+
p90: number;
|
|
29
|
+
p95: number;
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface OutlierAnalysis {
|
|
34
|
+
outliers: Array<{
|
|
35
|
+
value: number;
|
|
36
|
+
index: number;
|
|
37
|
+
label?: string;
|
|
38
|
+
severity: 'mild' | 'extreme';
|
|
39
|
+
type: 'lower' | 'upper';
|
|
40
|
+
}>;
|
|
41
|
+
cleanData: Array<{
|
|
42
|
+
value: number;
|
|
43
|
+
index: number;
|
|
44
|
+
label?: string;
|
|
45
|
+
}>;
|
|
46
|
+
method: string;
|
|
47
|
+
threshold: any;
|
|
48
|
+
statistics: {
|
|
49
|
+
mean: number;
|
|
50
|
+
median: number;
|
|
51
|
+
q1: number;
|
|
52
|
+
q3: number;
|
|
53
|
+
iqr: number;
|
|
54
|
+
};
|
|
55
|
+
summary: {
|
|
56
|
+
totalOutliers: number;
|
|
57
|
+
mildOutliers: number;
|
|
58
|
+
extremeOutliers: number;
|
|
59
|
+
outlierPercentage: number;
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export interface DataQualityAssessment {
|
|
64
|
+
completeness: number; // percentage of non-null values
|
|
65
|
+
consistency: number; // coefficient of variation
|
|
66
|
+
accuracy: number; // percentage within expected range
|
|
67
|
+
validity: number; // percentage of valid data types
|
|
68
|
+
duplicates: number; // count of duplicate values
|
|
69
|
+
issues: string[];
|
|
70
|
+
anomalies: OutlierAnalysis;
|
|
71
|
+
recommendations: string[];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export interface VarianceAnalysis {
|
|
75
|
+
totalVariance: number;
|
|
76
|
+
positiveVariance: number;
|
|
77
|
+
negativeVariance: number;
|
|
78
|
+
withinGroupVariance: number;
|
|
79
|
+
betweenGroupVariance: number;
|
|
80
|
+
fStatistic: number;
|
|
81
|
+
significance: string;
|
|
82
|
+
varianceContributions: Array<{
|
|
83
|
+
label: string;
|
|
84
|
+
value: number;
|
|
85
|
+
variance: number;
|
|
86
|
+
contribution: number; // percentage of total variance
|
|
87
|
+
}>;
|
|
88
|
+
significantFactors: Array<{
|
|
89
|
+
label: string;
|
|
90
|
+
impact: 'high' | 'medium' | 'low';
|
|
91
|
+
variance: number;
|
|
92
|
+
}>;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export interface TrendAnalysis {
|
|
96
|
+
slope: number;
|
|
97
|
+
intercept: number;
|
|
98
|
+
correlation: number;
|
|
99
|
+
rSquared: number;
|
|
100
|
+
direction: 'increasing' | 'decreasing' | 'stable';
|
|
101
|
+
strength: 'strong' | 'moderate' | 'weak' | 'none';
|
|
102
|
+
confidence: number;
|
|
103
|
+
trend: 'increasing' | 'decreasing' | 'stable';
|
|
104
|
+
projectedValues: Array<{
|
|
105
|
+
period: number;
|
|
106
|
+
value: number;
|
|
107
|
+
x: number;
|
|
108
|
+
y: number;
|
|
109
|
+
confidence: { lower: number; upper: number };
|
|
110
|
+
}>;
|
|
111
|
+
forecast: Array<{
|
|
112
|
+
period: number;
|
|
113
|
+
value: number;
|
|
114
|
+
x: number;
|
|
115
|
+
y: number;
|
|
116
|
+
confidence: { lower: number; upper: number };
|
|
117
|
+
}>;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export interface StatisticalSystem {
|
|
121
|
+
// Core statistical functions
|
|
122
|
+
calculateSummary(data: number[]): StatisticalSummary;
|
|
123
|
+
detectOutliers(data: number[], labels?: string[]): OutlierAnalysis;
|
|
124
|
+
assessDataQuality(data: any[], options?: DataQualityOptions): DataQualityAssessment;
|
|
125
|
+
|
|
126
|
+
// Advanced analysis
|
|
127
|
+
analyzeVariance(data: Array<{label: string, value: number}>): VarianceAnalysis;
|
|
128
|
+
analyzeTrend(data: Array<{x: number, y: number}>): TrendAnalysis;
|
|
129
|
+
|
|
130
|
+
// Data search and optimization
|
|
131
|
+
createBisector<T>(accessor: (d: T) => number): d3.Bisector<T, number>;
|
|
132
|
+
createSearch<T>(data: T[], accessor: (d: T) => number): (value: number) => T | undefined;
|
|
133
|
+
|
|
134
|
+
// Utility functions
|
|
135
|
+
calculateMovingAverage(data: number[], window: number): number[];
|
|
136
|
+
calculateExponentialSmoothing(data: number[], alpha: number): number[];
|
|
137
|
+
detectSeasonality(data: number[], period: number): boolean;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export interface DataQualityOptions {
|
|
141
|
+
expectedRange?: [number, number];
|
|
142
|
+
allowedTypes?: string[];
|
|
143
|
+
nullTolerance?: number;
|
|
144
|
+
duplicateTolerance?: number;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ============================================================================
|
|
148
|
+
// STATISTICAL SYSTEM IMPLEMENTATION
|
|
149
|
+
// ============================================================================
|
|
150
|
+
|
|
151
|
+
export function createStatisticalSystem(): StatisticalSystem {
|
|
152
|
+
|
|
153
|
+
// ========================================================================
|
|
154
|
+
// CORE STATISTICAL FUNCTIONS
|
|
155
|
+
// ========================================================================
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Calculate comprehensive statistical summary
|
|
159
|
+
* Enhanced with D3.js statistical functions
|
|
160
|
+
*/
|
|
161
|
+
function calculateSummary(data: number[]): StatisticalSummary {
|
|
162
|
+
// Filter out null/undefined values
|
|
163
|
+
const cleanData = data.filter(d => d != null && !isNaN(d)).sort(d3.ascending);
|
|
164
|
+
|
|
165
|
+
if (cleanData.length === 0) {
|
|
166
|
+
// Return empty statistical summary instead of throwing
|
|
167
|
+
return {
|
|
168
|
+
count: 0,
|
|
169
|
+
sum: 0,
|
|
170
|
+
mean: 0,
|
|
171
|
+
median: 0,
|
|
172
|
+
mode: [],
|
|
173
|
+
variance: 0,
|
|
174
|
+
standardDeviation: 0,
|
|
175
|
+
min: 0,
|
|
176
|
+
max: 0,
|
|
177
|
+
range: 0,
|
|
178
|
+
quartiles: [0, 0, 0],
|
|
179
|
+
percentiles: { p5: 0, p10: 0, p25: 0, p75: 0, p90: 0, p95: 0 }
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const count = cleanData.length;
|
|
184
|
+
const sum = d3.sum(cleanData);
|
|
185
|
+
const mean = d3.mean(cleanData) || 0;
|
|
186
|
+
const medianValue = median(cleanData) || 0;
|
|
187
|
+
const varianceValue = variance(cleanData) || 0;
|
|
188
|
+
const standardDeviation = deviation(cleanData) || 0;
|
|
189
|
+
const min = d3.min(cleanData) || 0;
|
|
190
|
+
const max = d3.max(cleanData) || 0;
|
|
191
|
+
const range = max - min;
|
|
192
|
+
|
|
193
|
+
// Calculate quartiles
|
|
194
|
+
const q1 = quantile(cleanData, 0.25) || 0;
|
|
195
|
+
const q2 = medianValue;
|
|
196
|
+
const q3 = quantile(cleanData, 0.75) || 0;
|
|
197
|
+
const iqr = q3 - q1;
|
|
198
|
+
|
|
199
|
+
// Calculate percentiles
|
|
200
|
+
const percentiles = {
|
|
201
|
+
p5: quantile(cleanData, 0.05) || 0,
|
|
202
|
+
p10: quantile(cleanData, 0.10) || 0,
|
|
203
|
+
p25: q1,
|
|
204
|
+
p75: q3,
|
|
205
|
+
p90: quantile(cleanData, 0.90) || 0,
|
|
206
|
+
p95: quantile(cleanData, 0.95) || 0
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
// Calculate mode (most frequent value)
|
|
210
|
+
const valueFreq = new Map<number, number>();
|
|
211
|
+
cleanData.forEach(value => {
|
|
212
|
+
valueFreq.set(value, (valueFreq.get(value) || 0) + 1);
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
let maxFreq = 0;
|
|
216
|
+
const modes: number[] = [];
|
|
217
|
+
valueFreq.forEach((freq, value) => {
|
|
218
|
+
if (freq > maxFreq) {
|
|
219
|
+
maxFreq = freq;
|
|
220
|
+
modes.length = 0;
|
|
221
|
+
modes.push(value);
|
|
222
|
+
} else if (freq === maxFreq) {
|
|
223
|
+
modes.push(value);
|
|
224
|
+
}
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
return {
|
|
228
|
+
count,
|
|
229
|
+
sum,
|
|
230
|
+
mean,
|
|
231
|
+
median: medianValue,
|
|
232
|
+
mode: modes,
|
|
233
|
+
variance: varianceValue,
|
|
234
|
+
standardDeviation,
|
|
235
|
+
min,
|
|
236
|
+
max,
|
|
237
|
+
range,
|
|
238
|
+
quartiles: [q1, q2, q3],
|
|
239
|
+
percentiles
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Detect outliers using IQR method and modified Z-score
|
|
245
|
+
* Enhanced with severity classification
|
|
246
|
+
*/
|
|
247
|
+
function detectOutliers(data: number[], labels: string[] = []): OutlierAnalysis {
|
|
248
|
+
const summary = calculateSummary(data);
|
|
249
|
+
const [q1, q2, q3] = summary.quartiles;
|
|
250
|
+
const iqr = q3 - q1;
|
|
251
|
+
|
|
252
|
+
// IQR method boundaries
|
|
253
|
+
const lowerBound = q1 - 1.5 * iqr;
|
|
254
|
+
const upperBound = q3 + 1.5 * iqr;
|
|
255
|
+
const extremeLowerBound = q1 - 3 * iqr;
|
|
256
|
+
const extremeUpperBound = q3 + 3 * iqr;
|
|
257
|
+
|
|
258
|
+
const outliers: OutlierAnalysis['outliers'] = [];
|
|
259
|
+
const cleanData: OutlierAnalysis['cleanData'] = [];
|
|
260
|
+
|
|
261
|
+
data.forEach((value, index) => {
|
|
262
|
+
if (value == null || isNaN(value)) return;
|
|
263
|
+
|
|
264
|
+
const isOutlier = value < lowerBound || value > upperBound;
|
|
265
|
+
const isExtreme = value < extremeLowerBound || value > extremeUpperBound;
|
|
266
|
+
|
|
267
|
+
if (isOutlier) {
|
|
268
|
+
outliers.push({
|
|
269
|
+
value,
|
|
270
|
+
index,
|
|
271
|
+
label: labels[index],
|
|
272
|
+
severity: isExtreme ? 'extreme' : 'mild',
|
|
273
|
+
type: value < lowerBound ? 'lower' : 'upper'
|
|
274
|
+
});
|
|
275
|
+
} else {
|
|
276
|
+
cleanData.push({
|
|
277
|
+
value,
|
|
278
|
+
index,
|
|
279
|
+
label: labels[index]
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
const mildOutliers = outliers.filter(o => o.severity === 'mild').length;
|
|
285
|
+
const extremeOutliers = outliers.filter(o => o.severity === 'extreme').length;
|
|
286
|
+
|
|
287
|
+
return {
|
|
288
|
+
outliers,
|
|
289
|
+
cleanData,
|
|
290
|
+
method: 'iqr',
|
|
291
|
+
threshold: { lowerBound, upperBound, extremeLowerBound, extremeUpperBound },
|
|
292
|
+
statistics: {
|
|
293
|
+
mean: summary.mean,
|
|
294
|
+
median: summary.median,
|
|
295
|
+
q1,
|
|
296
|
+
q3,
|
|
297
|
+
iqr
|
|
298
|
+
},
|
|
299
|
+
summary: {
|
|
300
|
+
totalOutliers: outliers.length,
|
|
301
|
+
mildOutliers,
|
|
302
|
+
extremeOutliers,
|
|
303
|
+
outlierPercentage: data.length > 0 ? (outliers.length / data.length) * 100 : 0
|
|
304
|
+
}
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Assess overall data quality
|
|
310
|
+
* Provides actionable recommendations for data improvement
|
|
311
|
+
*/
|
|
312
|
+
function assessDataQuality(
|
|
313
|
+
data: any[],
|
|
314
|
+
options: DataQualityOptions = {}
|
|
315
|
+
): DataQualityAssessment {
|
|
316
|
+
const {
|
|
317
|
+
expectedRange,
|
|
318
|
+
allowedTypes = ['number'],
|
|
319
|
+
nullTolerance = 0.05, // 5% null tolerance
|
|
320
|
+
duplicateTolerance = 0.1 // 10% duplicate tolerance
|
|
321
|
+
} = options;
|
|
322
|
+
|
|
323
|
+
const totalCount = data.length;
|
|
324
|
+
let validCount = 0;
|
|
325
|
+
let nullCount = 0;
|
|
326
|
+
let typeValidCount = 0;
|
|
327
|
+
let rangeValidCount = 0;
|
|
328
|
+
const duplicates = new Set();
|
|
329
|
+
const seen = new Set();
|
|
330
|
+
|
|
331
|
+
// Analyze each data point
|
|
332
|
+
data.forEach(item => {
|
|
333
|
+
// Check for null/undefined
|
|
334
|
+
if (item == null || (item && item.value == null)) {
|
|
335
|
+
nullCount++;
|
|
336
|
+
return;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
validCount++;
|
|
340
|
+
|
|
341
|
+
// Check data type (check the value property if it exists, otherwise the item itself)
|
|
342
|
+
const valueToCheck = item && typeof item === 'object' && 'value' in item ? item.value : item;
|
|
343
|
+
const itemType = typeof valueToCheck;
|
|
344
|
+
if (allowedTypes.includes(itemType)) {
|
|
345
|
+
typeValidCount++;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// Check range (for numbers)
|
|
349
|
+
if (itemType === 'number' && expectedRange) {
|
|
350
|
+
if (valueToCheck >= expectedRange[0] && valueToCheck <= expectedRange[1]) {
|
|
351
|
+
rangeValidCount++;
|
|
352
|
+
}
|
|
353
|
+
} else if (!expectedRange) {
|
|
354
|
+
rangeValidCount++; // No range constraint
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// Check duplicates
|
|
358
|
+
const itemStr = JSON.stringify(valueToCheck);
|
|
359
|
+
if (seen.has(itemStr)) {
|
|
360
|
+
duplicates.add(itemStr);
|
|
361
|
+
} else {
|
|
362
|
+
seen.add(itemStr);
|
|
363
|
+
}
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
// Calculate quality metrics
|
|
367
|
+
const completeness = (totalCount - nullCount) / totalCount;
|
|
368
|
+
const validity = typeValidCount / totalCount;
|
|
369
|
+
const accuracy = rangeValidCount / totalCount;
|
|
370
|
+
|
|
371
|
+
// Consistency (coefficient of variation for numeric data)
|
|
372
|
+
const numericData = data.filter(d => typeof d === 'number' && !isNaN(d));
|
|
373
|
+
const cv = numericData.length > 0 ?
|
|
374
|
+
(deviation(numericData) || 0) / (d3.mean(numericData) || 1) : 0;
|
|
375
|
+
const consistency = Math.max(0, 100 - (cv * 100)); // Invert CV for consistency score
|
|
376
|
+
|
|
377
|
+
// Outlier analysis for numeric data
|
|
378
|
+
const anomalies = numericData.length > 0 ?
|
|
379
|
+
detectOutliers(numericData) :
|
|
380
|
+
{
|
|
381
|
+
outliers: [],
|
|
382
|
+
cleanData: [],
|
|
383
|
+
method: 'None - No numeric data',
|
|
384
|
+
threshold: {},
|
|
385
|
+
statistics: { mean: 0, median: 0, q1: 0, q3: 0, iqr: 0 },
|
|
386
|
+
summary: { totalOutliers: 0, mildOutliers: 0, extremeOutliers: 0, outlierPercentage: 0 }
|
|
387
|
+
};
|
|
388
|
+
|
|
389
|
+
// Generate recommendations
|
|
390
|
+
const recommendations: string[] = [];
|
|
391
|
+
if (completeness < (1 - nullTolerance)) {
|
|
392
|
+
recommendations.push(`Improve data completeness: ${nullCount} missing values detected`);
|
|
393
|
+
recommendations.push('Remove or impute missing values');
|
|
394
|
+
}
|
|
395
|
+
if (validity < 0.95) {
|
|
396
|
+
recommendations.push(`Validate data types: ${totalCount - typeValidCount} invalid types found`);
|
|
397
|
+
}
|
|
398
|
+
if (accuracy < 0.90 && expectedRange) {
|
|
399
|
+
recommendations.push(`Check data accuracy: ${totalCount - rangeValidCount} values outside expected range`);
|
|
400
|
+
}
|
|
401
|
+
if (duplicates.size > duplicateTolerance * totalCount) {
|
|
402
|
+
recommendations.push(`Remove duplicates: ${duplicates.size} duplicate values detected`);
|
|
403
|
+
}
|
|
404
|
+
if (anomalies.summary.outlierPercentage > 5) {
|
|
405
|
+
recommendations.push(`Investigate outliers: ${anomalies.summary.totalOutliers} outliers detected (${anomalies.summary.outlierPercentage.toFixed(1)}%)`);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Generate issues list
|
|
409
|
+
const issues: string[] = [];
|
|
410
|
+
if (nullCount > 0) {
|
|
411
|
+
issues.push(`${nullCount} null or missing values found`);
|
|
412
|
+
}
|
|
413
|
+
if (totalCount - typeValidCount > 0) {
|
|
414
|
+
issues.push(`${totalCount - typeValidCount} invalid data types found`);
|
|
415
|
+
}
|
|
416
|
+
if (expectedRange && totalCount - rangeValidCount > 0) {
|
|
417
|
+
issues.push(`${totalCount - rangeValidCount} values outside expected range`);
|
|
418
|
+
}
|
|
419
|
+
if (duplicates.size > 0) {
|
|
420
|
+
issues.push(`${duplicates.size} duplicate values found`);
|
|
421
|
+
}
|
|
422
|
+
if (anomalies.summary.totalOutliers > 0) {
|
|
423
|
+
issues.push(`${anomalies.summary.totalOutliers} outliers detected`);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
return {
|
|
427
|
+
completeness,
|
|
428
|
+
consistency,
|
|
429
|
+
accuracy,
|
|
430
|
+
validity,
|
|
431
|
+
duplicates: duplicates.size,
|
|
432
|
+
issues,
|
|
433
|
+
anomalies,
|
|
434
|
+
recommendations
|
|
435
|
+
};
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// ========================================================================
|
|
439
|
+
// ADVANCED ANALYSIS FUNCTIONS
|
|
440
|
+
// ========================================================================
|
|
441
|
+
|
|
442
|
+
/**
|
|
443
|
+
* Analyze variance contributions in waterfall data
|
|
444
|
+
* Identifies key drivers of variability
|
|
445
|
+
*/
|
|
446
|
+
function analyzeVariance(data: Array<{label: string, value: number}>): VarianceAnalysis {
|
|
447
|
+
const values = data.map(d => d.value);
|
|
448
|
+
const totalVariance = variance(values) || 0;
|
|
449
|
+
|
|
450
|
+
// Separate positive and negative contributions
|
|
451
|
+
const positiveValues = values.filter(v => v > 0);
|
|
452
|
+
const negativeValues = values.filter(v => v < 0);
|
|
453
|
+
|
|
454
|
+
const positiveVariance = positiveValues.length > 0 ? (variance(positiveValues) || 0) : 0;
|
|
455
|
+
const negativeVariance = negativeValues.length > 0 ? (variance(negativeValues) || 0) : 0;
|
|
456
|
+
|
|
457
|
+
// Calculate individual contributions
|
|
458
|
+
const mean = d3.mean(values) || 0;
|
|
459
|
+
const varianceContributions = data.map(item => {
|
|
460
|
+
const variance = Math.pow(item.value - mean, 2);
|
|
461
|
+
const contribution = totalVariance > 0 ? (variance / totalVariance) * 100 : 0;
|
|
462
|
+
return {
|
|
463
|
+
label: item.label,
|
|
464
|
+
value: item.value,
|
|
465
|
+
variance,
|
|
466
|
+
contribution
|
|
467
|
+
};
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
// Identify significant factors (top contributors)
|
|
471
|
+
const sortedContributions = [...varianceContributions].sort((a, b) => b.contribution - a.contribution);
|
|
472
|
+
const significantFactors = sortedContributions.slice(0, Math.min(5, sortedContributions.length)).map(item => ({
|
|
473
|
+
label: item.label,
|
|
474
|
+
impact: item.contribution > 20 ? 'high' as const :
|
|
475
|
+
item.contribution > 10 ? 'medium' as const : 'low' as const,
|
|
476
|
+
variance: item.variance
|
|
477
|
+
}));
|
|
478
|
+
|
|
479
|
+
// Calculate additional statistical measures for ANOVA-style analysis
|
|
480
|
+
const groupMean = d3.mean(values) || 0;
|
|
481
|
+
|
|
482
|
+
// Group data by categories (try to extract category from label, fallback to positive/negative)
|
|
483
|
+
const categoryGroups = new Map();
|
|
484
|
+
data.forEach(item => {
|
|
485
|
+
// Try to extract category from label (e.g., "A1" -> "A", "Category1" -> "Category")
|
|
486
|
+
const category = item.label.match(/^([A-Za-z]+)/)?.[1] ||
|
|
487
|
+
(item.value > 0 ? 'positive' : 'negative');
|
|
488
|
+
|
|
489
|
+
if (!categoryGroups.has(category)) {
|
|
490
|
+
categoryGroups.set(category, []);
|
|
491
|
+
}
|
|
492
|
+
categoryGroups.get(category).push(item.value);
|
|
493
|
+
});
|
|
494
|
+
|
|
495
|
+
const groups = Array.from(categoryGroups.entries()).map(([name, values]) => ({
|
|
496
|
+
name, values
|
|
497
|
+
})).filter(g => g.values.length > 0);
|
|
498
|
+
|
|
499
|
+
// Calculate between-group variance
|
|
500
|
+
let betweenGroupVariance = 0;
|
|
501
|
+
if (groups.length > 1) {
|
|
502
|
+
const groupMeans = groups.map(g => d3.mean(g.values) || 0);
|
|
503
|
+
const groupSizes = groups.map(g => g.values.length);
|
|
504
|
+
const totalSize = values.length;
|
|
505
|
+
|
|
506
|
+
betweenGroupVariance = groups.reduce((sum, group, i) => {
|
|
507
|
+
const groupMeanValue = groupMeans[i];
|
|
508
|
+
const groupSize = groupSizes[i];
|
|
509
|
+
return sum + (groupSize * Math.pow(groupMeanValue - groupMean, 2));
|
|
510
|
+
}, 0) / (groups.length - 1);
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// Within-group variance
|
|
514
|
+
const withinGroupVariance = groups.length > 0 ?
|
|
515
|
+
groups.reduce((sum, group) => {
|
|
516
|
+
const groupVar = variance(group.values) || 0;
|
|
517
|
+
return sum + (groupVar * (group.values.length - 1));
|
|
518
|
+
}, 0) / Math.max(1, values.length - groups.length) : totalVariance;
|
|
519
|
+
|
|
520
|
+
// F-statistic for variance analysis
|
|
521
|
+
const fStatistic = betweenGroupVariance > 0 && withinGroupVariance > 0 ?
|
|
522
|
+
betweenGroupVariance / withinGroupVariance : 0;
|
|
523
|
+
|
|
524
|
+
// Significance level (simplified p-value approximation)
|
|
525
|
+
const significance = fStatistic > 4 ? 'significant' :
|
|
526
|
+
fStatistic > 2 ? 'moderate' : 'not significant';
|
|
527
|
+
|
|
528
|
+
return {
|
|
529
|
+
totalVariance,
|
|
530
|
+
positiveVariance,
|
|
531
|
+
negativeVariance,
|
|
532
|
+
withinGroupVariance,
|
|
533
|
+
betweenGroupVariance,
|
|
534
|
+
fStatistic,
|
|
535
|
+
significance,
|
|
536
|
+
varianceContributions,
|
|
537
|
+
significantFactors
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* Analyze trend patterns in time series data
|
|
543
|
+
* Provides statistical trend analysis with confidence intervals
|
|
544
|
+
*/
|
|
545
|
+
function analyzeTrend(data: Array<{x: number, y: number}>): TrendAnalysis {
|
|
546
|
+
if (data.length < 2) {
|
|
547
|
+
// Return empty trend analysis instead of throwing
|
|
548
|
+
return {
|
|
549
|
+
slope: 0,
|
|
550
|
+
intercept: 0,
|
|
551
|
+
correlation: 0,
|
|
552
|
+
rSquared: 0,
|
|
553
|
+
direction: 'stable',
|
|
554
|
+
strength: 'none',
|
|
555
|
+
confidence: 0,
|
|
556
|
+
trend: 'stable',
|
|
557
|
+
projectedValues: [],
|
|
558
|
+
forecast: []
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
const xValues = data.map(d => d.x);
|
|
563
|
+
const yValues = data.map(d => d.y);
|
|
564
|
+
|
|
565
|
+
// Calculate linear regression
|
|
566
|
+
const xMean = d3.mean(xValues) || 0;
|
|
567
|
+
const yMean = d3.mean(yValues) || 0;
|
|
568
|
+
|
|
569
|
+
let numerator = 0;
|
|
570
|
+
let denominator = 0;
|
|
571
|
+
|
|
572
|
+
for (let i = 0; i < data.length; i++) {
|
|
573
|
+
const xDiff = xValues[i] - xMean;
|
|
574
|
+
const yDiff = yValues[i] - yMean;
|
|
575
|
+
numerator += xDiff * yDiff;
|
|
576
|
+
denominator += xDiff * xDiff;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
const slope = denominator !== 0 ? numerator / denominator : 0;
|
|
580
|
+
|
|
581
|
+
// Calculate correlation coefficient
|
|
582
|
+
const xStd = deviation(xValues) || 0;
|
|
583
|
+
const yStd = deviation(yValues) || 0;
|
|
584
|
+
const correlation = (xStd * yStd) !== 0 ? numerator / (Math.sqrt(denominator) * yStd * Math.sqrt(data.length - 1)) : 0;
|
|
585
|
+
|
|
586
|
+
// Determine trend characteristics
|
|
587
|
+
const direction = slope > 0.01 ? 'increasing' : slope < -0.01 ? 'decreasing' : 'stable';
|
|
588
|
+
const strength = Math.abs(correlation) > 0.7 ? 'strong' :
|
|
589
|
+
Math.abs(correlation) > 0.3 ? 'moderate' : 'weak';
|
|
590
|
+
const confidence = Math.abs(correlation) * 100;
|
|
591
|
+
|
|
592
|
+
// Generate projections (simple linear extrapolation)
|
|
593
|
+
const lastX = Math.max(...xValues);
|
|
594
|
+
const projectedValues = Array.from({ length: 3 }, (_, i) => {
|
|
595
|
+
const period = lastX + (i + 1);
|
|
596
|
+
const value = yMean + slope * (period - xMean);
|
|
597
|
+
const standardError = Math.sqrt(variance(yValues) || 0) / Math.sqrt(data.length);
|
|
598
|
+
|
|
599
|
+
return {
|
|
600
|
+
period,
|
|
601
|
+
value,
|
|
602
|
+
x: period, // alias for backward compatibility
|
|
603
|
+
y: value, // alias for backward compatibility
|
|
604
|
+
confidence: {
|
|
605
|
+
lower: value - (1.96 * standardError),
|
|
606
|
+
upper: value + (1.96 * standardError)
|
|
607
|
+
}
|
|
608
|
+
};
|
|
609
|
+
});
|
|
610
|
+
|
|
611
|
+
// Calculate intercept and R-squared
|
|
612
|
+
const intercept = yMean - slope * xMean;
|
|
613
|
+
const rSquared = correlation * correlation;
|
|
614
|
+
|
|
615
|
+
return {
|
|
616
|
+
slope,
|
|
617
|
+
intercept,
|
|
618
|
+
correlation,
|
|
619
|
+
rSquared,
|
|
620
|
+
direction,
|
|
621
|
+
strength,
|
|
622
|
+
confidence,
|
|
623
|
+
trend: direction, // alias for backward compatibility
|
|
624
|
+
projectedValues,
|
|
625
|
+
forecast: projectedValues // alias for backward compatibility
|
|
626
|
+
};
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
// ========================================================================
|
|
630
|
+
// DATA SEARCH AND OPTIMIZATION
|
|
631
|
+
// ========================================================================
|
|
632
|
+
|
|
633
|
+
/**
|
|
634
|
+
* Create efficient bisector for data searching
|
|
635
|
+
* Uses D3.js bisector for O(log n) lookups
|
|
636
|
+
*/
|
|
637
|
+
function createBisector<T>(accessor: (d: T) => number): d3.Bisector<T, number> {
|
|
638
|
+
return bisector(accessor);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
/**
|
|
642
|
+
* Create fast search function for sorted data
|
|
643
|
+
* Returns the closest data point to a given value
|
|
644
|
+
*/
|
|
645
|
+
function createSearch<T>(data: T[], accessor: (d: T) => number): (value: number) => T | undefined {
|
|
646
|
+
const bisector = createBisector(accessor);
|
|
647
|
+
const sortedData = [...data].sort((a, b) => ascending(accessor(a), accessor(b)));
|
|
648
|
+
|
|
649
|
+
return (value: number): T | undefined => {
|
|
650
|
+
const index = bisector.left(sortedData, value);
|
|
651
|
+
|
|
652
|
+
if (index === 0) return sortedData[0];
|
|
653
|
+
if (index >= sortedData.length) return sortedData[sortedData.length - 1];
|
|
654
|
+
|
|
655
|
+
// Return the closest value
|
|
656
|
+
const leftItem = sortedData[index - 1];
|
|
657
|
+
const rightItem = sortedData[index];
|
|
658
|
+
|
|
659
|
+
const leftDistance = Math.abs(accessor(leftItem) - value);
|
|
660
|
+
const rightDistance = Math.abs(accessor(rightItem) - value);
|
|
661
|
+
|
|
662
|
+
return leftDistance <= rightDistance ? leftItem : rightItem;
|
|
663
|
+
};
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
// ========================================================================
|
|
667
|
+
// UTILITY FUNCTIONS
|
|
668
|
+
// ========================================================================
|
|
669
|
+
|
|
670
|
+
/**
|
|
671
|
+
* Calculate moving average with configurable window
|
|
672
|
+
*/
|
|
673
|
+
function calculateMovingAverage(data: number[], window: number): number[] {
|
|
674
|
+
if (window <= 0 || window > data.length || data.length === 0) {
|
|
675
|
+
return []; // Return empty array instead of throwing
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
const result: number[] = [];
|
|
679
|
+
for (let i = 0; i <= data.length - window; i++) {
|
|
680
|
+
const windowData = data.slice(i, i + window);
|
|
681
|
+
const average = d3.mean(windowData) || 0;
|
|
682
|
+
result.push(average);
|
|
683
|
+
}
|
|
684
|
+
return result;
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
/**
|
|
688
|
+
* Calculate exponential smoothing
|
|
689
|
+
*/
|
|
690
|
+
function calculateExponentialSmoothing(data: number[], alpha: number): number[] {
|
|
691
|
+
if (alpha < 0 || alpha > 1 || data.length === 0) {
|
|
692
|
+
return []; // Return empty array instead of throwing
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
const result: number[] = [];
|
|
696
|
+
let smoothed = data[0];
|
|
697
|
+
result.push(smoothed);
|
|
698
|
+
|
|
699
|
+
for (let i = 1; i < data.length; i++) {
|
|
700
|
+
smoothed = alpha * data[i] + (1 - alpha) * smoothed;
|
|
701
|
+
result.push(smoothed);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
return result;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
/**
|
|
708
|
+
* Detect seasonality in time series data
|
|
709
|
+
*/
|
|
710
|
+
function detectSeasonality(data: number[], period: number): boolean {
|
|
711
|
+
if (data.length < period * 2) {
|
|
712
|
+
return false; // Need at least 2 full periods
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
// Calculate autocorrelation at the specified period
|
|
716
|
+
const mean = d3.mean(data) || 0;
|
|
717
|
+
let numerator = 0;
|
|
718
|
+
let denominator = 0;
|
|
719
|
+
|
|
720
|
+
for (let i = 0; i < data.length - period; i++) {
|
|
721
|
+
numerator += (data[i] - mean) * (data[i + period] - mean);
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
for (let i = 0; i < data.length; i++) {
|
|
725
|
+
denominator += Math.pow(data[i] - mean, 2);
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
const autocorrelation = denominator !== 0 ? numerator / denominator : 0;
|
|
729
|
+
|
|
730
|
+
// Consider seasonal if autocorrelation is above threshold
|
|
731
|
+
return Math.abs(autocorrelation) > 0.3;
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
// ========================================================================
|
|
735
|
+
// RETURN API
|
|
736
|
+
// ========================================================================
|
|
737
|
+
|
|
738
|
+
return {
|
|
739
|
+
// Core statistical functions
|
|
740
|
+
calculateSummary,
|
|
741
|
+
detectOutliers,
|
|
742
|
+
assessDataQuality,
|
|
743
|
+
|
|
744
|
+
// Advanced analysis
|
|
745
|
+
analyzeVariance,
|
|
746
|
+
analyzeTrend,
|
|
747
|
+
|
|
748
|
+
// Data search and optimization
|
|
749
|
+
createBisector,
|
|
750
|
+
createSearch,
|
|
751
|
+
|
|
752
|
+
// Utility functions
|
|
753
|
+
calculateMovingAverage,
|
|
754
|
+
calculateExponentialSmoothing,
|
|
755
|
+
detectSeasonality
|
|
756
|
+
};
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
// ============================================================================
|
|
760
|
+
// WATERFALL-SPECIFIC STATISTICAL UTILITIES
|
|
761
|
+
// ============================================================================
|
|
762
|
+
|
|
763
|
+
/**
|
|
764
|
+
* Analyze waterfall chart statistical patterns
|
|
765
|
+
* Provides insights specific to waterfall financial data
|
|
766
|
+
*/
|
|
767
|
+
export function analyzeWaterfallStatistics(
|
|
768
|
+
data: Array<{label: string, value: number}>,
|
|
769
|
+
options: { includeTotal?: boolean, currency?: boolean } = {}
|
|
770
|
+
): {
|
|
771
|
+
summary: StatisticalSummary,
|
|
772
|
+
variance: VarianceAnalysis,
|
|
773
|
+
quality: DataQualityAssessment,
|
|
774
|
+
insights: string[]
|
|
775
|
+
} {
|
|
776
|
+
const stats = createStatisticalSystem();
|
|
777
|
+
const values = data.map(d => d.value);
|
|
778
|
+
|
|
779
|
+
// Calculate core statistics
|
|
780
|
+
const summary = stats.calculateSummary(values);
|
|
781
|
+
const variance = stats.analyzeVariance(data);
|
|
782
|
+
const quality = stats.assessDataQuality(values, {
|
|
783
|
+
expectedRange: options.currency ? [-1000000, 1000000] : undefined
|
|
784
|
+
});
|
|
785
|
+
|
|
786
|
+
// Generate business insights
|
|
787
|
+
const insights: string[] = [];
|
|
788
|
+
|
|
789
|
+
if (variance.significantFactors.length > 0) {
|
|
790
|
+
const topFactor = variance.significantFactors[0];
|
|
791
|
+
insights.push(`${topFactor.label} is the primary driver of variance (${topFactor.impact} impact)`);
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
if (summary.standardDeviation > Math.abs(summary.mean)) {
|
|
795
|
+
insights.push('High volatility detected - consider risk management strategies');
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
const positiveCount = values.filter(v => v > 0).length;
|
|
799
|
+
const negativeCount = values.filter(v => v < 0).length;
|
|
800
|
+
const ratio = positiveCount / negativeCount;
|
|
801
|
+
|
|
802
|
+
if (ratio > 2) {
|
|
803
|
+
insights.push('Predominantly positive contributors - strong growth pattern');
|
|
804
|
+
} else if (ratio < 0.5) {
|
|
805
|
+
insights.push('Predominantly negative contributors - potential cost management focus needed');
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
if (quality.anomalies.summary.outlierPercentage > 10) {
|
|
809
|
+
insights.push(`${quality.anomalies.summary.totalOutliers} outliers detected - data validation recommended`);
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
return {
|
|
813
|
+
summary,
|
|
814
|
+
variance,
|
|
815
|
+
quality,
|
|
816
|
+
insights
|
|
817
|
+
};
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
// Default export for convenience
|
|
821
|
+
export default createStatisticalSystem;
|