datly 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,897 @@
1
+ class PatternDetector {
2
+ analyze(dataset) {
3
+ if (!dataset || !dataset.data || !dataset.headers) {
4
+ throw new Error("Invalid dataset format");
5
+ }
6
+
7
+ const patterns = {
8
+ trends: this.detectTrends(dataset),
9
+ seasonality: this.detectSeasonality(dataset),
10
+ outliers: this.detectOutliers(dataset),
11
+ correlations: this.detectCorrelationPatterns(dataset),
12
+ distributions: this.detectDistributionPatterns(dataset),
13
+ clustering: this.detectSimpleClusters(dataset),
14
+ temporal: this.detectTemporalPatterns(dataset),
15
+ };
16
+
17
+ return {
18
+ timestamp: new Date().toISOString(),
19
+ datasetSize: dataset.length,
20
+ patterns: patterns,
21
+ summary: this.generateSummary(patterns),
22
+ insights: this.generateInsights(patterns),
23
+ };
24
+ }
25
+
26
+ detectTrends(dataset) {
27
+ const numericColumns = this.getNumericColumns(dataset);
28
+ const trends = [];
29
+
30
+ numericColumns.forEach((column) => {
31
+ const values = this.getColumnValues(dataset, column);
32
+ if (values.length < 5) return;
33
+
34
+ const trendData = values.map((value, index) => ({ x: index, y: value }));
35
+ const trendResult = this.calculateTrend(trendData);
36
+
37
+ if (Math.abs(trendResult.slope) > 0.01 && trendResult.rSquared > 0.3) {
38
+ trends.push({
39
+ column: column,
40
+ direction: trendResult.slope > 0 ? "increasing" : "decreasing",
41
+ slope: trendResult.slope,
42
+ rSquared: trendResult.rSquared,
43
+ strength: this.classifyTrendStrength(trendResult.rSquared),
44
+ significance: trendResult.pValue < 0.05,
45
+ });
46
+ }
47
+ });
48
+
49
+ return trends;
50
+ }
51
+
52
+ detectSeasonality(dataset) {
53
+ const dateColumns = this.getDateColumns(dataset);
54
+ const numericColumns = this.getNumericColumns(dataset);
55
+ const seasonality = [];
56
+
57
+ if (dateColumns.length === 0) return seasonality;
58
+
59
+ dateColumns.forEach((dateCol) => {
60
+ numericColumns.forEach((numCol) => {
61
+ const timeSeries = this.createTimeSeries(dataset, dateCol, numCol);
62
+ if (timeSeries.length < 12) return;
63
+
64
+ const seasonalPattern = this.analyzeSeasonalPattern(timeSeries);
65
+ if (seasonalPattern.strength > 0.3) {
66
+ seasonality.push({
67
+ dateColumn: dateCol,
68
+ valueColumn: numCol,
69
+ strength: seasonalPattern.strength,
70
+ period: seasonalPattern.period,
71
+ peaks: seasonalPattern.peaks,
72
+ pattern: seasonalPattern.type,
73
+ });
74
+ }
75
+ });
76
+ });
77
+
78
+ return seasonality;
79
+ }
80
+
81
+ detectOutliers(dataset) {
82
+ const numericColumns = this.getNumericColumns(dataset);
83
+ const outliers = [];
84
+
85
+ numericColumns.forEach((column) => {
86
+ const values = this.getColumnValues(dataset, column);
87
+ if (values.length < 10) return;
88
+
89
+ const outlierResults = this.findOutliers(values);
90
+ if (outlierResults.count > 0) {
91
+ outliers.push({
92
+ column: column,
93
+ count: outlierResults.count,
94
+ percentage: outlierResults.percentage,
95
+ method: "IQR",
96
+ outlierValues: outlierResults.values.slice(0, 5),
97
+ severity: this.classifyOutlierSeverity(outlierResults.percentage),
98
+ });
99
+ }
100
+ });
101
+
102
+ return outliers;
103
+ }
104
+
105
+ detectCorrelationPatterns(dataset) {
106
+ const numericColumns = this.getNumericColumns(dataset);
107
+ if (numericColumns.length < 2)
108
+ return { strongCorrelations: [], clusters: [] };
109
+
110
+ const correlationMatrix = this.buildCorrelationMatrix(
111
+ dataset,
112
+ numericColumns
113
+ );
114
+ const strongCorrelations = this.findStrongCorrelations(correlationMatrix);
115
+ const clusters = this.findCorrelationClusters(correlationMatrix);
116
+
117
+ return {
118
+ strongCorrelations: strongCorrelations,
119
+ clusters: clusters,
120
+ avgCorrelation: this.calculateAverageCorrelation(correlationMatrix),
121
+ };
122
+ }
123
+
124
+ detectDistributionPatterns(dataset) {
125
+ const numericColumns = this.getNumericColumns(dataset);
126
+ const distributions = [];
127
+
128
+ numericColumns.forEach((column) => {
129
+ const values = this.getColumnValues(dataset, column);
130
+ if (values.length < 20) return;
131
+
132
+ const distInfo = this.analyzeDistribution(values);
133
+ distributions.push({
134
+ column: column,
135
+ type: distInfo.type,
136
+ skewness: distInfo.skewness,
137
+ kurtosis: distInfo.kurtosis,
138
+ isNormal:
139
+ Math.abs(distInfo.skewness) < 1 && Math.abs(distInfo.kurtosis) < 1,
140
+ transformation: this.suggestTransformation(distInfo),
141
+ });
142
+ });
143
+
144
+ return distributions;
145
+ }
146
+
147
+ detectSimpleClusters(dataset) {
148
+ const numericColumns = this.getNumericColumns(dataset);
149
+ if (numericColumns.length < 2) return [];
150
+
151
+ const clusters = [];
152
+
153
+ for (let i = 0; i < numericColumns.length; i++) {
154
+ for (let j = i + 1; j < numericColumns.length; j++) {
155
+ const col1 = numericColumns[i];
156
+ const col2 = numericColumns[j];
157
+ const data = this.getTwoColumnData(dataset, col1, col2);
158
+
159
+ if (data.length < 10) continue;
160
+
161
+ const clusterResult = this.performSimpleKMeans(data, 3);
162
+ if (clusterResult.quality > 0.5) {
163
+ clusters.push({
164
+ variables: [col1, col2],
165
+ clusters: clusterResult.centers,
166
+ quality: clusterResult.quality,
167
+ interpretation: this.interpretClusterQuality(clusterResult.quality),
168
+ });
169
+ }
170
+ }
171
+ }
172
+
173
+ return clusters;
174
+ }
175
+
176
+ detectTemporalPatterns(dataset) {
177
+ const dateColumns = this.getDateColumns(dataset);
178
+ const patterns = [];
179
+
180
+ dateColumns.forEach((dateCol) => {
181
+ const dates = this.getColumnValues(dataset, dateCol)
182
+ .map((val) => new Date(val))
183
+ .filter((date) => !isNaN(date.getTime()))
184
+ .sort((a, b) => a - b);
185
+
186
+ if (dates.length < 5) return;
187
+
188
+ const intervals = this.calculateIntervals(dates);
189
+ const frequency = this.determineFrequency(intervals);
190
+ const gaps = this.findGaps(dates, frequency);
191
+
192
+ patterns.push({
193
+ column: dateCol,
194
+ frequency: frequency,
195
+ totalSpan: dates[dates.length - 1] - dates[0],
196
+ avgInterval:
197
+ intervals.reduce((sum, int) => sum + int, 0) / intervals.length,
198
+ gaps: gaps.length,
199
+ pattern: gaps.length > dates.length * 0.1 ? "irregular" : "regular",
200
+ });
201
+ });
202
+
203
+ return patterns;
204
+ }
205
+
206
+ getNumericColumns(dataset) {
207
+ return dataset.headers.filter((header) => {
208
+ const values = dataset.data.map((row) => row[header]);
209
+ const numericCount = values.filter(
210
+ (val) => typeof val === "number" && !isNaN(val) && isFinite(val)
211
+ ).length;
212
+ return numericCount > values.length * 0.7;
213
+ });
214
+ }
215
+
216
+ getDateColumns(dataset) {
217
+ return dataset.headers.filter((header) => {
218
+ const values = dataset.data.map((row) => row[header]);
219
+ const dateCount = values.filter((val) => {
220
+ if (typeof val === "string") {
221
+ const date = new Date(val);
222
+ return !isNaN(date.getTime());
223
+ }
224
+ return false;
225
+ }).length;
226
+ return dateCount > values.length * 0.7;
227
+ });
228
+ }
229
+
230
+ getColumnValues(dataset, column) {
231
+ return dataset.data
232
+ .map((row) => row[column])
233
+ .filter((val) => typeof val === "number" && !isNaN(val) && isFinite(val));
234
+ }
235
+
236
+ getTwoColumnData(dataset, col1, col2) {
237
+ return dataset.data
238
+ .map((row) => ({ x: row[col1], y: row[col2] }))
239
+ .filter(
240
+ (point) =>
241
+ typeof point.x === "number" &&
242
+ !isNaN(point.x) &&
243
+ isFinite(point.x) &&
244
+ typeof point.y === "number" &&
245
+ !isNaN(point.y) &&
246
+ isFinite(point.y)
247
+ );
248
+ }
249
+
250
+ createTimeSeries(dataset, dateCol, valueCol) {
251
+ return dataset.data
252
+ .map((row) => ({
253
+ date: new Date(row[dateCol]),
254
+ value: row[valueCol],
255
+ }))
256
+ .filter(
257
+ (point) =>
258
+ !isNaN(point.date.getTime()) &&
259
+ typeof point.value === "number" &&
260
+ !isNaN(point.value) &&
261
+ isFinite(point.value)
262
+ )
263
+ .sort((a, b) => a.date - b.date);
264
+ }
265
+
266
+ calculateTrend(data) {
267
+ const n = data.length;
268
+ const sumX = data.reduce((sum, point) => sum + point.x, 0);
269
+ const sumY = data.reduce((sum, point) => sum + point.y, 0);
270
+ const sumXY = data.reduce((sum, point) => sum + point.x * point.y, 0);
271
+ const sumXX = data.reduce((sum, point) => sum + point.x * point.x, 0);
272
+
273
+ const slope = (n * sumXY - sumX * sumY) / (n * sumXX - sumX * sumX);
274
+ const intercept = (sumY - slope * sumX) / n;
275
+
276
+ const predicted = data.map((point) => intercept + slope * point.x);
277
+ const ssRes = data.reduce(
278
+ (sum, point, i) => sum + Math.pow(point.y - predicted[i], 2),
279
+ 0
280
+ );
281
+ const ssTot = data.reduce(
282
+ (sum, point) => sum + Math.pow(point.y - sumY / n, 2),
283
+ 0
284
+ );
285
+ const rSquared = 1 - ssRes / ssTot;
286
+
287
+ const stderr = Math.sqrt(ssRes / (n - 2));
288
+ const tStat = slope / stderr;
289
+ const pValue = 2 * (1 - this.tCDF(Math.abs(tStat), n - 2));
290
+
291
+ return { slope, intercept, rSquared, pValue };
292
+ }
293
+
294
+ analyzeSeasonalPattern(timeSeries) {
295
+ const values = timeSeries.map((point) => point.value);
296
+ const n = values.length;
297
+
298
+ if (n < 12) return { strength: 0 };
299
+
300
+ let bestPeriod = 12;
301
+ let maxCorrelation = 0;
302
+
303
+ for (let period = 4; period <= Math.min(n / 3, 24); period++) {
304
+ const correlation = this.calculateAutoCorrelation(values, period);
305
+ if (correlation > maxCorrelation) {
306
+ maxCorrelation = correlation;
307
+ bestPeriod = period;
308
+ }
309
+ }
310
+
311
+ const peaks = this.findPeaks(values);
312
+ const valleys = this.findValleys(values);
313
+
314
+ return {
315
+ strength: maxCorrelation,
316
+ period: bestPeriod,
317
+ peaks: peaks.length,
318
+ valleys: valleys.length,
319
+ type: this.classifySeasonalType(
320
+ maxCorrelation,
321
+ peaks.length,
322
+ valleys.length
323
+ ),
324
+ };
325
+ }
326
+
327
+ findOutliers(values) {
328
+ const sorted = [...values].sort((a, b) => a - b);
329
+ const q1 = this.calculateQuantile(sorted, 0.25);
330
+ const q3 = this.calculateQuantile(sorted, 0.75);
331
+ const iqr = q3 - q1;
332
+
333
+ const lowerBound = q1 - 1.5 * iqr;
334
+ const upperBound = q3 + 1.5 * iqr;
335
+
336
+ const outlierValues = values.filter(
337
+ (val) => val < lowerBound || val > upperBound
338
+ );
339
+
340
+ return {
341
+ count: outlierValues.length,
342
+ percentage: (outlierValues.length / values.length) * 100,
343
+ values: outlierValues,
344
+ bounds: { lower: lowerBound, upper: upperBound },
345
+ };
346
+ }
347
+
348
+ buildCorrelationMatrix(dataset, columns) {
349
+ const matrix = {};
350
+
351
+ columns.forEach((col1) => {
352
+ matrix[col1] = {};
353
+ columns.forEach((col2) => {
354
+ if (col1 === col2) {
355
+ matrix[col1][col2] = 1;
356
+ } else {
357
+ const values1 = this.getColumnValues(dataset, col1);
358
+ const values2 = this.getColumnValues(dataset, col2);
359
+ matrix[col1][col2] = this.calculateCorrelation(values1, values2);
360
+ }
361
+ });
362
+ });
363
+
364
+ return matrix;
365
+ }
366
+
367
+ findStrongCorrelations(matrix) {
368
+ const correlations = [];
369
+ const columns = Object.keys(matrix);
370
+
371
+ for (let i = 0; i < columns.length; i++) {
372
+ for (let j = i + 1; j < columns.length; j++) {
373
+ const col1 = columns[i];
374
+ const col2 = columns[j];
375
+ const correlation = matrix[col1][col2];
376
+
377
+ if (Math.abs(correlation) > 0.7) {
378
+ correlations.push({
379
+ variable1: col1,
380
+ variable2: col2,
381
+ correlation: correlation,
382
+ strength: this.getCorrelationStrength(Math.abs(correlation)),
383
+ direction: correlation > 0 ? "positive" : "negative",
384
+ });
385
+ }
386
+ }
387
+ }
388
+
389
+ return correlations.sort(
390
+ (a, b) => Math.abs(b.correlation) - Math.abs(a.correlation)
391
+ );
392
+ }
393
+
394
+ findCorrelationClusters(matrix) {
395
+ const columns = Object.keys(matrix);
396
+ const clusters = [];
397
+ const visited = new Set();
398
+
399
+ columns.forEach((col) => {
400
+ if (visited.has(col)) return;
401
+
402
+ const cluster = [col];
403
+ visited.add(col);
404
+
405
+ columns.forEach((other) => {
406
+ if (!visited.has(other) && Math.abs(matrix[col][other]) > 0.7) {
407
+ cluster.push(other);
408
+ visited.add(other);
409
+ }
410
+ });
411
+
412
+ if (cluster.length > 1) {
413
+ clusters.push(cluster);
414
+ }
415
+ });
416
+
417
+ return clusters;
418
+ }
419
+
420
+ analyzeDistribution(values) {
421
+ const mean = values.reduce((sum, val) => sum + val, 0) / values.length;
422
+ const variance =
423
+ values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) /
424
+ (values.length - 1);
425
+ const stdDev = Math.sqrt(variance);
426
+
427
+ const skewness = this.calculateSkewness(values, mean, stdDev);
428
+ const kurtosis = this.calculateKurtosis(values, mean, stdDev);
429
+
430
+ return {
431
+ mean,
432
+ stdDev,
433
+ skewness,
434
+ kurtosis,
435
+ type: this.classifyDistribution(skewness, kurtosis),
436
+ };
437
+ }
438
+
439
+ performSimpleKMeans(data, k) {
440
+ let centers = this.initializeCenters(data, k);
441
+ let assignments = new Array(data.length);
442
+ let changed = true;
443
+ let iterations = 0;
444
+
445
+ while (changed && iterations < 50) {
446
+ changed = false;
447
+
448
+ for (let i = 0; i < data.length; i++) {
449
+ const distances = centers.map((center) =>
450
+ Math.sqrt(
451
+ Math.pow(data[i].x - center.x, 2) +
452
+ Math.pow(data[i].y - center.y, 2)
453
+ )
454
+ );
455
+ const newAssignment = distances.indexOf(Math.min(...distances));
456
+
457
+ if (assignments[i] !== newAssignment) {
458
+ changed = true;
459
+ assignments[i] = newAssignment;
460
+ }
461
+ }
462
+
463
+ for (let j = 0; j < k; j++) {
464
+ const clusterPoints = data.filter((_, i) => assignments[i] === j);
465
+ if (clusterPoints.length > 0) {
466
+ centers[j] = {
467
+ x:
468
+ clusterPoints.reduce((sum, p) => sum + p.x, 0) /
469
+ clusterPoints.length,
470
+ y:
471
+ clusterPoints.reduce((sum, p) => sum + p.y, 0) /
472
+ clusterPoints.length,
473
+ };
474
+ }
475
+ }
476
+
477
+ iterations++;
478
+ }
479
+
480
+ const quality = this.calculateClusterQuality(data, assignments, centers);
481
+
482
+ return { centers, assignments, quality };
483
+ }
484
+
485
+ calculateIntervals(dates) {
486
+ const intervals = [];
487
+ for (let i = 1; i < dates.length; i++) {
488
+ intervals.push(dates[i] - dates[i - 1]);
489
+ }
490
+ return intervals;
491
+ }
492
+
493
+ determineFrequency(intervals) {
494
+ if (intervals.length === 0) return "unknown";
495
+
496
+ const avgInterval =
497
+ intervals.reduce((sum, int) => sum + int, 0) / intervals.length;
498
+ const dayInMs = 24 * 60 * 60 * 1000;
499
+
500
+ if (avgInterval < dayInMs) return "sub_daily";
501
+ if (avgInterval < dayInMs * 2) return "daily";
502
+ if (avgInterval < dayInMs * 8) return "weekly";
503
+ if (avgInterval < dayInMs * 35) return "monthly";
504
+ return "yearly";
505
+ }
506
+
507
+ findGaps(dates, expectedFrequency) {
508
+ const expectedInterval = this.getExpectedInterval(expectedFrequency);
509
+ const gaps = [];
510
+
511
+ for (let i = 1; i < dates.length; i++) {
512
+ const actualInterval = dates[i] - dates[i - 1];
513
+ if (actualInterval > expectedInterval * 2) {
514
+ gaps.push({
515
+ start: dates[i - 1],
516
+ end: dates[i],
517
+ duration: actualInterval,
518
+ });
519
+ }
520
+ }
521
+
522
+ return gaps;
523
+ }
524
+
525
+ classifyTrendStrength(rSquared) {
526
+ if (rSquared > 0.8) return "very_strong";
527
+ if (rSquared > 0.6) return "strong";
528
+ if (rSquared > 0.4) return "moderate";
529
+ if (rSquared > 0.2) return "weak";
530
+ return "very_weak";
531
+ }
532
+
533
+ classifyOutlierSeverity(percentage) {
534
+ if (percentage > 10) return "severe";
535
+ if (percentage > 5) return "moderate";
536
+ if (percentage > 1) return "mild";
537
+ return "minimal";
538
+ }
539
+
540
+ classifySeasonalType(strength, peaks, valleys) {
541
+ if (strength > 0.7) return "strong_seasonal";
542
+ if (strength > 0.5) return "moderate_seasonal";
543
+ if (strength > 0.3) return "weak_seasonal";
544
+ return "no_seasonality";
545
+ }
546
+
547
+ classifyDistribution(skewness, kurtosis) {
548
+ if (Math.abs(skewness) < 0.5 && Math.abs(kurtosis) < 0.5) return "normal";
549
+ if (skewness > 1) return "right_skewed";
550
+ if (skewness < -1) return "left_skewed";
551
+ if (kurtosis > 1) return "heavy_tailed";
552
+ return "irregular";
553
+ }
554
+
555
+ suggestTransformation(distInfo) {
556
+ if (distInfo.type === "normal") return "none";
557
+ if (distInfo.skewness > 1) return "log_transform";
558
+ if (distInfo.skewness < -1) return "square_transform";
559
+ return "standardization";
560
+ }
561
+
562
+ getCorrelationStrength(r) {
563
+ if (r >= 0.9) return "very_strong";
564
+ if (r >= 0.7) return "strong";
565
+ if (r >= 0.5) return "moderate";
566
+ if (r >= 0.3) return "weak";
567
+ return "very_weak";
568
+ }
569
+
570
+ interpretClusterQuality(quality) {
571
+ if (quality > 0.7) return "excellent";
572
+ if (quality > 0.5) return "good";
573
+ if (quality > 0.3) return "fair";
574
+ return "poor";
575
+ }
576
+
577
+ generateSummary(patterns) {
578
+ const summary = {};
579
+
580
+ Object.keys(patterns).forEach((key) => {
581
+ if (Array.isArray(patterns[key])) {
582
+ summary[key] = patterns[key].length;
583
+ } else if (typeof patterns[key] === "object" && patterns[key] !== null) {
584
+ summary[key] = Object.keys(patterns[key]).length;
585
+ } else {
586
+ summary[key] = 0;
587
+ }
588
+ });
589
+
590
+ summary.totalPatterns = Object.values(summary).reduce(
591
+ (sum, count) => sum + count,
592
+ 0
593
+ );
594
+
595
+ return summary;
596
+ }
597
+
598
+ generateInsights(patterns) {
599
+ const insights = [];
600
+
601
+ if (patterns.trends.length > 0) {
602
+ const strongTrends = patterns.trends.filter(
603
+ (t) => t.strength === "strong" || t.strength === "very_strong"
604
+ );
605
+ if (strongTrends.length > 0) {
606
+ insights.push({
607
+ type: "trend",
608
+ importance: "high",
609
+ message: `Found ${strongTrends.length} strong trend(s) in your data`,
610
+ details: strongTrends.map((t) => `${t.column}: ${t.direction} trend`),
611
+ });
612
+ }
613
+ }
614
+
615
+ if (patterns.correlations.strongCorrelations.length > 0) {
616
+ insights.push({
617
+ type: "correlation",
618
+ importance: "medium",
619
+ message: `Discovered ${patterns.correlations.strongCorrelations.length} strong correlation(s)`,
620
+ details: patterns.correlations.strongCorrelations
621
+ .slice(0, 3)
622
+ .map(
623
+ (c) =>
624
+ `${c.variable1} ↔ ${c.variable2}: ${c.strength} ${c.direction}`
625
+ ),
626
+ });
627
+ }
628
+
629
+ if (patterns.outliers.length > 0) {
630
+ const severeOutliers = patterns.outliers.filter(
631
+ (o) => o.severity === "severe"
632
+ );
633
+ if (severeOutliers.length > 0) {
634
+ insights.push({
635
+ type: "outliers",
636
+ importance: "high",
637
+ message: `Detected severe outliers in ${severeOutliers.length} column(s)`,
638
+ details: severeOutliers.map(
639
+ (o) =>
640
+ `${o.column}: ${o.count} outliers (${o.percentage.toFixed(1)}%)`
641
+ ),
642
+ });
643
+ }
644
+ }
645
+
646
+ if (patterns.seasonality.length > 0) {
647
+ insights.push({
648
+ type: "seasonality",
649
+ importance: "medium",
650
+ message: `Found seasonal patterns in ${patterns.seasonality.length} time series`,
651
+ details: patterns.seasonality.map(
652
+ (s) => `${s.valueColumn}: ${s.pattern} (period: ${s.period})`
653
+ ),
654
+ });
655
+ }
656
+
657
+ if (patterns.clustering.length > 0) {
658
+ const goodClusters = patterns.clustering.filter((c) => c.quality > 0.5);
659
+ if (goodClusters.length > 0) {
660
+ insights.push({
661
+ type: "clustering",
662
+ importance: "medium",
663
+ message: `Identified ${goodClusters.length} natural cluster(s) in the data`,
664
+ details: goodClusters.map(
665
+ (c) => `${c.variables.join(" vs ")}: ${c.interpretation} clusters`
666
+ ),
667
+ });
668
+ }
669
+ }
670
+
671
+ return insights.sort((a, b) => {
672
+ const importance = { high: 3, medium: 2, low: 1 };
673
+ return importance[b.importance] - importance[a.importance];
674
+ });
675
+ }
676
+
677
+ calculateAutoCorrelation(values, lag) {
678
+ if (lag >= values.length) return 0;
679
+
680
+ const n = values.length - lag;
681
+ const mean1 = values.slice(0, n).reduce((sum, val) => sum + val, 0) / n;
682
+ const mean2 = values.slice(lag).reduce((sum, val) => sum + val, 0) / n;
683
+
684
+ let numerator = 0;
685
+ let sum1 = 0;
686
+ let sum2 = 0;
687
+
688
+ for (let i = 0; i < n; i++) {
689
+ const diff1 = values[i] - mean1;
690
+ const diff2 = values[i + lag] - mean2;
691
+ numerator += diff1 * diff2;
692
+ sum1 += diff1 * diff1;
693
+ sum2 += diff2 * diff2;
694
+ }
695
+
696
+ const denominator = Math.sqrt(sum1 * sum2);
697
+ return denominator === 0 ? 0 : numerator / denominator;
698
+ }
699
+
700
+ findPeaks(values) {
701
+ const peaks = [];
702
+ for (let i = 1; i < values.length - 1; i++) {
703
+ if (values[i] > values[i - 1] && values[i] > values[i + 1]) {
704
+ peaks.push(i);
705
+ }
706
+ }
707
+ return peaks;
708
+ }
709
+
710
+ findValleys(values) {
711
+ const valleys = [];
712
+ for (let i = 1; i < values.length - 1; i++) {
713
+ if (values[i] < values[i - 1] && values[i] < values[i + 1]) {
714
+ valleys.push(i);
715
+ }
716
+ }
717
+ return valleys;
718
+ }
719
+
720
+ calculateQuantile(sortedArray, q) {
721
+ const index = (sortedArray.length - 1) * q;
722
+ const lower = Math.floor(index);
723
+ const upper = Math.ceil(index);
724
+ const weight = index % 1;
725
+
726
+ if (lower === upper) {
727
+ return sortedArray[lower];
728
+ }
729
+
730
+ return sortedArray[lower] * (1 - weight) + sortedArray[upper] * weight;
731
+ }
732
+
733
+ calculateCorrelation(x, y) {
734
+ if (x.length !== y.length || x.length < 3) return 0;
735
+
736
+ const n = x.length;
737
+ const meanX = x.reduce((sum, val) => sum + val, 0) / n;
738
+ const meanY = y.reduce((sum, val) => sum + val, 0) / n;
739
+
740
+ let numerator = 0;
741
+ let sumXSquared = 0;
742
+ let sumYSquared = 0;
743
+
744
+ for (let i = 0; i < n; i++) {
745
+ const xDiff = x[i] - meanX;
746
+ const yDiff = y[i] - meanY;
747
+ numerator += xDiff * yDiff;
748
+ sumXSquared += xDiff * xDiff;
749
+ sumYSquared += yDiff * yDiff;
750
+ }
751
+
752
+ const denominator = Math.sqrt(sumXSquared * sumYSquared);
753
+ return denominator === 0 ? 0 : numerator / denominator;
754
+ }
755
+
756
+ calculateSkewness(values, mean, stdDev) {
757
+ if (stdDev === 0) return 0;
758
+
759
+ const n = values.length;
760
+ const skewSum = values.reduce((sum, val) => {
761
+ return sum + Math.pow((val - mean) / stdDev, 3);
762
+ }, 0);
763
+
764
+ return (n / ((n - 1) * (n - 2))) * skewSum;
765
+ }
766
+
767
+ calculateKurtosis(values, mean, stdDev) {
768
+ if (stdDev === 0) return 0;
769
+
770
+ const n = values.length;
771
+ const kurtSum = values.reduce((sum, val) => {
772
+ return sum + Math.pow((val - mean) / stdDev, 4);
773
+ }, 0);
774
+
775
+ return (
776
+ ((n * (n + 1)) / ((n - 1) * (n - 2) * (n - 3))) * kurtSum -
777
+ (3 * Math.pow(n - 1, 2)) / ((n - 2) * (n - 3))
778
+ );
779
+ }
780
+
781
+ initializeCenters(data, k) {
782
+ const centers = [];
783
+ const minX = Math.min(...data.map((p) => p.x));
784
+ const maxX = Math.max(...data.map((p) => p.x));
785
+ const minY = Math.min(...data.map((p) => p.y));
786
+ const maxY = Math.max(...data.map((p) => p.y));
787
+
788
+ for (let i = 0; i < k; i++) {
789
+ centers.push({
790
+ x: minX + Math.random() * (maxX - minX),
791
+ y: minY + Math.random() * (maxY - minY),
792
+ });
793
+ }
794
+
795
+ return centers;
796
+ }
797
+
798
+ calculateClusterQuality(data, assignments, centers) {
799
+ let totalWithinSS = 0;
800
+ let totalBetweenSS = 0;
801
+
802
+ const overallCentroid = {
803
+ x: data.reduce((sum, p) => sum + p.x, 0) / data.length,
804
+ y: data.reduce((sum, p) => sum + p.y, 0) / data.length,
805
+ };
806
+
807
+ centers.forEach((center, clusterIndex) => {
808
+ const clusterPoints = data.filter(
809
+ (_, i) => assignments[i] === clusterIndex
810
+ );
811
+
812
+ clusterPoints.forEach((point) => {
813
+ totalWithinSS +=
814
+ Math.pow(point.x - center.x, 2) + Math.pow(point.y - center.y, 2);
815
+ });
816
+
817
+ totalBetweenSS +=
818
+ clusterPoints.length *
819
+ (Math.pow(center.x - overallCentroid.x, 2) +
820
+ Math.pow(center.y - overallCentroid.y, 2));
821
+ });
822
+
823
+ const totalSS = totalWithinSS + totalBetweenSS;
824
+ return totalSS > 0 ? totalBetweenSS / totalSS : 0;
825
+ }
826
+
827
+ calculateAverageCorrelation(matrix) {
828
+ const columns = Object.keys(matrix);
829
+ let sum = 0;
830
+ let count = 0;
831
+
832
+ for (let i = 0; i < columns.length; i++) {
833
+ for (let j = i + 1; j < columns.length; j++) {
834
+ sum += Math.abs(matrix[columns[i]][columns[j]]);
835
+ count++;
836
+ }
837
+ }
838
+ }
839
+ calculateAverageCorrelation(matrix) {
840
+ const columns = Object.keys(matrix);
841
+ let sum = 0;
842
+ let count = 0;
843
+
844
+ for (let i = 0; i < columns.length; i++) {
845
+ for (let j = i + 1; j < columns.length; j++) {
846
+ sum += Math.abs(matrix[columns[i]][columns[j]]);
847
+ count++;
848
+ }
849
+ }
850
+
851
+ return count > 0 ? sum / count : 0;
852
+ }
853
+
854
+ getExpectedInterval(frequency) {
855
+ const dayInMs = 24 * 60 * 60 * 1000;
856
+
857
+ switch (frequency) {
858
+ case "daily":
859
+ return dayInMs;
860
+ case "weekly":
861
+ return dayInMs * 7;
862
+ case "monthly":
863
+ return dayInMs * 30;
864
+ case "yearly":
865
+ return dayInMs * 365;
866
+ default:
867
+ return dayInMs;
868
+ }
869
+ }
870
+
871
+ tCDF(t, df) {
872
+ if (df <= 0) return 0.5;
873
+
874
+ const x = df / (t * t + df);
875
+ return 1 - 0.5 * this.incompleteBeta(df / 2, 0.5, x);
876
+ }
877
+
878
+ incompleteBeta(a, b, x) {
879
+ if (x <= 0) return 0;
880
+ if (x >= 1) return 1;
881
+
882
+ let result = 0;
883
+ let term = 1;
884
+
885
+ for (let n = 0; n < 100; n++) {
886
+ if (n > 0) {
887
+ term *= (x * (a + n - 1)) / n;
888
+ }
889
+ result += term / (a + n);
890
+ if (Math.abs(term) < 1e-10) break;
891
+ }
892
+
893
+ return result * Math.pow(x, a) * Math.pow(1 - x, b);
894
+ }
895
+ }
896
+
897
+ export default PatternDetector;