datly 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.MD +1138 -2183
- package/dist/datly.cjs +1 -1
- package/dist/datly.cjs.map +1 -0
- package/dist/datly.mjs +1 -1
- package/dist/datly.mjs.map +1 -0
- package/dist/datly.umd.js +1 -1
- package/dist/datly.umd.js.map +1 -1
- package/package.json +1 -1
- package/src/code.js +131 -36
package/README.MD
CHANGED
|
@@ -46,7 +46,7 @@ datly is a comprehensive JavaScript library that brings powerful data analysis,
|
|
|
46
46
|
<script src="https://unpkg.com/datly"></script>
|
|
47
47
|
<script>
|
|
48
48
|
const result = datly.mean([1, 2, 3, 4, 5]);
|
|
49
|
-
console.log(result);
|
|
49
|
+
console.log(result.value); // Access the mean value directly
|
|
50
50
|
</script>
|
|
51
51
|
```
|
|
52
52
|
|
|
@@ -54,28 +54,37 @@ datly is a comprehensive JavaScript library that brings powerful data analysis,
|
|
|
54
54
|
|
|
55
55
|
```javascript
|
|
56
56
|
import * as datly from 'datly';
|
|
57
|
+
|
|
58
|
+
// All functions return JavaScript objects
|
|
59
|
+
const stats = datly.describe([1, 2, 3, 4, 5]);
|
|
60
|
+
console.log(stats.mean); // Direct property access
|
|
61
|
+
console.log(stats.std); // No parsing needed
|
|
57
62
|
```
|
|
58
63
|
|
|
64
|
+
> **Note**: All datly functions return JavaScript objects (not strings or YAML). This means you can directly access properties like `result.value`, `result.mean`, `dataframe.columns`, etc.
|
|
65
|
+
|
|
59
66
|
---
|
|
60
67
|
|
|
61
68
|
## Core Concepts
|
|
62
69
|
|
|
63
70
|
### Output Format
|
|
64
71
|
|
|
65
|
-
All analysis functions return results
|
|
72
|
+
All analysis functions return results as JavaScript objects with a consistent structure:
|
|
66
73
|
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
74
|
+
```javascript
|
|
75
|
+
{
|
|
76
|
+
type: "statistic",
|
|
77
|
+
name: "mean",
|
|
78
|
+
value: 3,
|
|
79
|
+
n: 5
|
|
80
|
+
}
|
|
72
81
|
```
|
|
73
82
|
|
|
74
83
|
This format makes it easy to:
|
|
75
|
-
-
|
|
76
|
-
-
|
|
77
|
-
-
|
|
78
|
-
-
|
|
84
|
+
- Access results programmatically with dot notation (e.g., `result.value`)
|
|
85
|
+
- Integrate with JavaScript applications
|
|
86
|
+
- Serialize to JSON for storage or transmission
|
|
87
|
+
- Display results in web interfaces
|
|
79
88
|
|
|
80
89
|
---
|
|
81
90
|
|
|
@@ -93,21 +102,16 @@ Creates a dataframe from CSV content.
|
|
|
93
102
|
- `skipEmptyLines`: Skip empty lines (default: true)
|
|
94
103
|
|
|
95
104
|
**Returns:**
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
- name: bob
|
|
107
|
-
age: 25
|
|
108
|
-
salary: 45000
|
|
109
|
-
n_rows: 2
|
|
110
|
-
n_cols: 3
|
|
105
|
+
```javascript
|
|
106
|
+
{
|
|
107
|
+
type: "dataframe",
|
|
108
|
+
columns: ["name", "age", "salary"],
|
|
109
|
+
data: [
|
|
110
|
+
{ name: "alice", age: 30, salary: 50000 },
|
|
111
|
+
{ name: "bob", age: 25, salary: 45000 }
|
|
112
|
+
],
|
|
113
|
+
shape: [2, 3]
|
|
114
|
+
}
|
|
111
115
|
```
|
|
112
116
|
|
|
113
117
|
**Example:**
|
|
@@ -132,21 +136,16 @@ Creates a dataframe from JSON data. Accepts multiple formats:
|
|
|
132
136
|
- String (parsed as JSON)
|
|
133
137
|
|
|
134
138
|
**Returns:**
|
|
135
|
-
```
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
- name: bob
|
|
146
|
-
age: 25
|
|
147
|
-
department: sales
|
|
148
|
-
n_rows: 2
|
|
149
|
-
n_cols: 3
|
|
139
|
+
```javascript
|
|
140
|
+
{
|
|
141
|
+
type: "dataframe",
|
|
142
|
+
columns: ["name", "age", "department"],
|
|
143
|
+
data: [
|
|
144
|
+
{ name: "alice", age: 30, department: "engineering" },
|
|
145
|
+
{ name: "bob", age: 25, department: "sales" }
|
|
146
|
+
],
|
|
147
|
+
shape: [2, 3]
|
|
148
|
+
}
|
|
150
149
|
```
|
|
151
150
|
|
|
152
151
|
**Example:**
|
|
@@ -180,21 +179,16 @@ Creates a dataframe from an array of objects.
|
|
|
180
179
|
- `array`: Array of objects with consistent keys
|
|
181
180
|
|
|
182
181
|
**Returns:**
|
|
183
|
-
```
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
- product: mouse
|
|
194
|
-
price: 25
|
|
195
|
-
stock: 50
|
|
196
|
-
n_rows: 2
|
|
197
|
-
n_cols: 3
|
|
182
|
+
```javascript
|
|
183
|
+
{
|
|
184
|
+
type: "dataframe",
|
|
185
|
+
columns: ["product", "price", "stock"],
|
|
186
|
+
data: [
|
|
187
|
+
{ product: "laptop", price: 999, stock: 15 },
|
|
188
|
+
{ product: "mouse", price: 25, stock: 50 }
|
|
189
|
+
],
|
|
190
|
+
shape: [2, 3]
|
|
191
|
+
}
|
|
198
192
|
```
|
|
199
193
|
|
|
200
194
|
**Example:**
|
|
@@ -221,34 +215,27 @@ Creates a dataframe from a single object. Can flatten nested structures.
|
|
|
221
215
|
- `maxDepth`: Maximum depth for flattening (default: 10)
|
|
222
216
|
|
|
223
217
|
**Returns (flattened):**
|
|
224
|
-
```
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
- 1
|
|
246
|
-
- 2
|
|
247
|
-
orders.total:
|
|
248
|
-
- 150
|
|
249
|
-
- 200
|
|
250
|
-
n_rows: 1
|
|
251
|
-
n_cols: 7
|
|
218
|
+
```javascript
|
|
219
|
+
{
|
|
220
|
+
type: "dataframe",
|
|
221
|
+
columns: [
|
|
222
|
+
"user.name", "user.age", "user.address.city",
|
|
223
|
+
"user.address.country", "orders"
|
|
224
|
+
],
|
|
225
|
+
data: [
|
|
226
|
+
{
|
|
227
|
+
"user.name": "alice",
|
|
228
|
+
"user.age": 30,
|
|
229
|
+
"user.address.city": "new york",
|
|
230
|
+
"user.address.country": "usa",
|
|
231
|
+
"orders": [
|
|
232
|
+
{ id: 1, total: 150 },
|
|
233
|
+
{ id: 2, total: 200 }
|
|
234
|
+
]
|
|
235
|
+
}
|
|
236
|
+
],
|
|
237
|
+
shape: [1, 5]
|
|
238
|
+
}
|
|
252
239
|
```
|
|
253
240
|
|
|
254
241
|
**Example:**
|
|
@@ -351,18 +338,16 @@ console.log(subset);
|
|
|
351
338
|
Returns the first n rows.
|
|
352
339
|
|
|
353
340
|
**Returns:**
|
|
354
|
-
```
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
n_rows: 2
|
|
365
|
-
n_cols: 2
|
|
341
|
+
```javascript
|
|
342
|
+
{
|
|
343
|
+
type: "dataframe",
|
|
344
|
+
columns: ["name", "age"],
|
|
345
|
+
data: [
|
|
346
|
+
{ name: "alice", age: 30 },
|
|
347
|
+
{ name: "bob", age: 25 }
|
|
348
|
+
],
|
|
349
|
+
shape: [2, 2]
|
|
350
|
+
}
|
|
366
351
|
```
|
|
367
352
|
|
|
368
353
|
**Example:**
|
|
@@ -385,2354 +370,1180 @@ const last3 = datly.df_tail(df, 3);
|
|
|
385
370
|
|
|
386
371
|
---
|
|
387
372
|
|
|
388
|
-
|
|
373
|
+
## Descriptive Statistics
|
|
374
|
+
|
|
375
|
+
### Basic Statistical Functions
|
|
389
376
|
|
|
390
|
-
|
|
377
|
+
All statistical functions return JavaScript objects with consistent structure.
|
|
378
|
+
|
|
379
|
+
#### `mean(array)`
|
|
380
|
+
|
|
381
|
+
Calculates the arithmetic mean.
|
|
391
382
|
|
|
392
383
|
**Returns:**
|
|
393
|
-
```
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
- department
|
|
401
|
-
- active
|
|
402
|
-
types:
|
|
403
|
-
name: string
|
|
404
|
-
age: number
|
|
405
|
-
salary: number
|
|
406
|
-
department: string
|
|
407
|
-
active: boolean
|
|
408
|
-
null_counts:
|
|
409
|
-
name: 0
|
|
410
|
-
age: 2
|
|
411
|
-
salary: 1
|
|
412
|
-
unique_counts:
|
|
413
|
-
name: 95
|
|
414
|
-
age: 45
|
|
384
|
+
```javascript
|
|
385
|
+
{
|
|
386
|
+
type: "statistic",
|
|
387
|
+
name: "mean",
|
|
388
|
+
value: 3,
|
|
389
|
+
n: 5
|
|
390
|
+
}
|
|
415
391
|
```
|
|
416
392
|
|
|
417
393
|
**Example:**
|
|
418
394
|
```javascript
|
|
419
|
-
const
|
|
420
|
-
const
|
|
421
|
-
console.log(
|
|
395
|
+
const data = [1, 2, 3, 4, 5];
|
|
396
|
+
const result = datly.mean(data);
|
|
397
|
+
console.log(result.value); // 3
|
|
422
398
|
```
|
|
423
399
|
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
## Data Selection
|
|
400
|
+
#### `median(array)`
|
|
427
401
|
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
Selects specific columns.
|
|
402
|
+
Calculates the median value.
|
|
431
403
|
|
|
432
404
|
**Returns:**
|
|
433
|
-
```
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
salary: 50000
|
|
441
|
-
n_rows: 1
|
|
442
|
-
n_cols: 2
|
|
405
|
+
```javascript
|
|
406
|
+
{
|
|
407
|
+
type: "statistic",
|
|
408
|
+
name: "median",
|
|
409
|
+
value: 3,
|
|
410
|
+
n: 5
|
|
411
|
+
}
|
|
443
412
|
```
|
|
444
413
|
|
|
445
414
|
**Example:**
|
|
446
415
|
```javascript
|
|
447
|
-
const
|
|
448
|
-
const
|
|
416
|
+
const data = [1, 2, 3, 4, 5];
|
|
417
|
+
const result = datly.median(data);
|
|
418
|
+
console.log(result.value); // 3
|
|
449
419
|
```
|
|
450
420
|
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
### `df_filter(dataframe, predicate)`
|
|
421
|
+
#### `variance(array)`
|
|
454
422
|
|
|
455
|
-
|
|
423
|
+
Calculates the sample variance.
|
|
456
424
|
|
|
457
425
|
**Returns:**
|
|
458
|
-
```
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
- name: alice
|
|
466
|
-
age: 30
|
|
467
|
-
salary: 50000
|
|
468
|
-
- name: charlie
|
|
469
|
-
age: 35
|
|
470
|
-
salary: 60000
|
|
471
|
-
n_rows: 2
|
|
472
|
-
n_cols: 3
|
|
426
|
+
```javascript
|
|
427
|
+
{
|
|
428
|
+
type: "statistic",
|
|
429
|
+
name: "variance",
|
|
430
|
+
value: 2.5,
|
|
431
|
+
n: 5
|
|
432
|
+
}
|
|
473
433
|
```
|
|
474
434
|
|
|
475
435
|
**Example:**
|
|
476
436
|
```javascript
|
|
477
|
-
const
|
|
478
|
-
|
|
479
|
-
//
|
|
480
|
-
const filtered = datly.df_filter(df, row => row.age > 28);
|
|
481
|
-
|
|
482
|
-
// Multiple conditions
|
|
483
|
-
const highEarners = datly.df_filter(df, row =>
|
|
484
|
-
row.salary > 55000 && row.department === 'Engineering'
|
|
485
|
-
);
|
|
437
|
+
const data = [1, 2, 3, 4, 5];
|
|
438
|
+
const result = datly.variance(data);
|
|
439
|
+
console.log(result.value); // 2.5
|
|
486
440
|
```
|
|
487
441
|
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
### `df_sort(dataframe, column, order = 'asc')`
|
|
442
|
+
#### `std(array)`
|
|
491
443
|
|
|
492
|
-
|
|
444
|
+
Calculates the sample standard deviation.
|
|
493
445
|
|
|
494
|
-
**
|
|
446
|
+
**Returns:**
|
|
495
447
|
```javascript
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
const sortedDesc = datly.df_sort(df, 'salary', 'desc');
|
|
448
|
+
{
|
|
449
|
+
type: "statistic",
|
|
450
|
+
name: "standard_deviation",
|
|
451
|
+
value: 1.58,
|
|
452
|
+
n: 5
|
|
453
|
+
}
|
|
503
454
|
```
|
|
504
455
|
|
|
505
|
-
---
|
|
506
|
-
|
|
507
|
-
## Data Cleaning
|
|
508
|
-
|
|
509
|
-
### `df_dropna(dataframe, subset = null)`
|
|
510
|
-
|
|
511
|
-
Removes rows with null/undefined values.
|
|
512
|
-
|
|
513
456
|
**Example:**
|
|
514
457
|
```javascript
|
|
515
|
-
const
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
{ name: 'Charlie', age: 35, email: null }
|
|
519
|
-
]);
|
|
520
|
-
|
|
521
|
-
// Drop rows with any null values
|
|
522
|
-
const cleaned = datly.df_dropna(df);
|
|
523
|
-
|
|
524
|
-
// Drop rows with null in specific columns
|
|
525
|
-
const cleanedPartial = datly.df_dropna(df, ['age']);
|
|
458
|
+
const data = [1, 2, 3, 4, 5];
|
|
459
|
+
const result = datly.std(data);
|
|
460
|
+
console.log(result.value); // 1.58
|
|
526
461
|
```
|
|
527
462
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
### `df_fillna(dataframe, value, subset = null)`
|
|
463
|
+
#### `skewness(array)`
|
|
531
464
|
|
|
532
|
-
|
|
465
|
+
Calculates the skewness (asymmetry measure).
|
|
533
466
|
|
|
534
|
-
**
|
|
467
|
+
**Returns:**
|
|
535
468
|
```javascript
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
const filled = datly.df_fillna(df, 0);
|
|
544
|
-
|
|
545
|
-
// Fill specific columns
|
|
546
|
-
const filledPartial = datly.df_fillna(df, 0, ['score']);
|
|
469
|
+
{
|
|
470
|
+
type: "statistic",
|
|
471
|
+
name: "skewness",
|
|
472
|
+
value: 0,
|
|
473
|
+
n: 5,
|
|
474
|
+
interpretation: "symmetric"
|
|
475
|
+
}
|
|
547
476
|
```
|
|
548
477
|
|
|
549
|
-
---
|
|
550
|
-
|
|
551
|
-
### `df_drop(dataframe, columns)`
|
|
552
|
-
|
|
553
|
-
Removes specified columns.
|
|
554
|
-
|
|
555
478
|
**Example:**
|
|
556
479
|
```javascript
|
|
557
|
-
const
|
|
558
|
-
|
|
559
|
-
//
|
|
560
|
-
const dropped = datly.df_drop(df, 'email');
|
|
561
|
-
|
|
562
|
-
// Drop multiple columns
|
|
563
|
-
const droppedMultiple = datly.df_drop(df, ['email', 'phone', 'address']);
|
|
480
|
+
const data = [1, 2, 3, 4, 5];
|
|
481
|
+
const result = datly.skewness(data);
|
|
482
|
+
console.log(result.interpretation); // "symmetric"
|
|
564
483
|
```
|
|
565
484
|
|
|
566
|
-
|
|
485
|
+
#### `kurtosis(array)`
|
|
567
486
|
|
|
568
|
-
|
|
487
|
+
Calculates the kurtosis (tail heaviness measure).
|
|
569
488
|
|
|
570
|
-
|
|
489
|
+
**Returns:**
|
|
490
|
+
```javascript
|
|
491
|
+
{
|
|
492
|
+
type: "statistic",
|
|
493
|
+
name: "kurtosis",
|
|
494
|
+
value: -1.2,
|
|
495
|
+
n: 5,
|
|
496
|
+
interpretation: "platykurtic"
|
|
497
|
+
}
|
|
498
|
+
```
|
|
571
499
|
|
|
572
500
|
**Example:**
|
|
573
501
|
```javascript
|
|
574
|
-
const
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
const renamed = datly.df_rename(df, {
|
|
579
|
-
name: 'employee_name',
|
|
580
|
-
age: 'employee_age',
|
|
581
|
-
salary: 'monthly_salary'
|
|
582
|
-
});
|
|
502
|
+
const data = [1, 2, 3, 4, 5];
|
|
503
|
+
const result = datly.kurtosis(data);
|
|
504
|
+
console.log(result.interpretation); // "platykurtic"
|
|
583
505
|
```
|
|
584
506
|
|
|
585
|
-
|
|
507
|
+
#### `percentile(array, p)`
|
|
586
508
|
|
|
587
|
-
|
|
509
|
+
Calculates the p-th percentile.
|
|
588
510
|
|
|
589
|
-
|
|
511
|
+
**Parameters:**
|
|
512
|
+
- `array`: Array of numbers
|
|
513
|
+
- `p`: Percentile (0-100)
|
|
590
514
|
|
|
591
|
-
|
|
515
|
+
**Returns:**
|
|
516
|
+
```javascript
|
|
517
|
+
{
|
|
518
|
+
type: "statistic",
|
|
519
|
+
name: "percentile",
|
|
520
|
+
percentile: 75,
|
|
521
|
+
value: 4,
|
|
522
|
+
n: 5
|
|
523
|
+
}
|
|
524
|
+
```
|
|
592
525
|
|
|
593
526
|
**Example:**
|
|
594
527
|
```javascript
|
|
595
|
-
const
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
const df2 = datly.df_from_json([
|
|
600
|
-
{ name: 'Bob', age: 25 }
|
|
601
|
-
]);
|
|
602
|
-
|
|
603
|
-
const combined = datly.df_concat(df1, df2);
|
|
528
|
+
const data = [1, 2, 3, 4, 5];
|
|
529
|
+
const result = datly.percentile(data, 75);
|
|
530
|
+
console.log(result.value); // 4
|
|
604
531
|
```
|
|
605
532
|
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
### `df_merge(dataframe1, dataframe2, options)`
|
|
533
|
+
#### `quantile(array, q)`
|
|
609
534
|
|
|
610
|
-
|
|
535
|
+
Calculates the q-th quantile.
|
|
611
536
|
|
|
612
537
|
**Parameters:**
|
|
613
|
-
- `
|
|
614
|
-
|
|
615
|
-
- `how`: 'inner', 'left', 'right', or 'outer'
|
|
538
|
+
- `array`: Array of numbers
|
|
539
|
+
- `q`: Quantile (0-1)
|
|
616
540
|
|
|
617
541
|
**Example:**
|
|
618
542
|
```javascript
|
|
619
|
-
const
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
]);
|
|
623
|
-
|
|
624
|
-
const salaries = datly.df_from_json([
|
|
625
|
-
{ id: 1, salary: 50000 },
|
|
626
|
-
{ id: 2, salary: 45000 }
|
|
627
|
-
]);
|
|
628
|
-
|
|
629
|
-
// Inner join
|
|
630
|
-
const merged = datly.df_merge(employees, salaries, {
|
|
631
|
-
on: 'id',
|
|
632
|
-
how: 'inner'
|
|
633
|
-
});
|
|
634
|
-
|
|
635
|
-
// Multiple keys
|
|
636
|
-
const merged2 = datly.df_merge(df1, df2, {
|
|
637
|
-
on: ['id', 'year'],
|
|
638
|
-
how: 'left'
|
|
639
|
-
});
|
|
543
|
+
const data = [1, 2, 3, 4, 5];
|
|
544
|
+
const result = datly.quantile(data, 0.75);
|
|
545
|
+
console.log(result.value); // 4
|
|
640
546
|
```
|
|
641
547
|
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
### `df_groupby(dataframe, keys)`
|
|
548
|
+
#### `describe(array)`
|
|
645
549
|
|
|
646
|
-
|
|
550
|
+
Provides comprehensive descriptive statistics.
|
|
647
551
|
|
|
648
552
|
**Returns:**
|
|
649
553
|
```javascript
|
|
650
554
|
{
|
|
651
|
-
|
|
652
|
-
|
|
555
|
+
type: "descriptive_statistics",
|
|
556
|
+
n: 5,
|
|
557
|
+
mean: 3,
|
|
558
|
+
median: 3,
|
|
559
|
+
std: 1.58,
|
|
560
|
+
variance: 2.5,
|
|
561
|
+
min: 1,
|
|
562
|
+
max: 5,
|
|
563
|
+
q1: 2,
|
|
564
|
+
q3: 4,
|
|
565
|
+
iqr: 2,
|
|
566
|
+
skewness: 0,
|
|
567
|
+
kurtosis: -1.2
|
|
653
568
|
}
|
|
654
569
|
```
|
|
655
570
|
|
|
656
571
|
**Example:**
|
|
657
572
|
```javascript
|
|
658
|
-
const
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
]);
|
|
663
|
-
|
|
664
|
-
// Group by single column
|
|
665
|
-
const grouped = datly.df_groupby(df, 'department');
|
|
666
|
-
|
|
667
|
-
// Group by multiple columns
|
|
668
|
-
const multiGrouped = datly.df_groupby(df, ['department', 'level']);
|
|
573
|
+
const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
|
|
574
|
+
const result = datly.describe(data);
|
|
575
|
+
console.log(result.mean); // Access mean directly
|
|
576
|
+
console.log(result.std); // Access standard deviation
|
|
669
577
|
```
|
|
670
578
|
|
|
671
579
|
---
|
|
672
580
|
|
|
673
|
-
|
|
581
|
+
## Exploratory Data Analysis
|
|
674
582
|
|
|
675
|
-
|
|
583
|
+
### `eda_overview(data)`
|
|
676
584
|
|
|
677
|
-
|
|
678
|
-
```javascript
|
|
679
|
-
const df = datly.df_from_json(employeeData);
|
|
680
|
-
const grouped = datly.df_groupby(df, 'department');
|
|
585
|
+
Provides a comprehensive overview of a dataset.
|
|
681
586
|
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
salary: arr => arr.reduce((a, b) => a + b, 0) / arr.length,
|
|
685
|
-
age: arr => arr.reduce((a, b) => a + b, 0) / arr.length
|
|
686
|
-
});
|
|
587
|
+
**Parameters:**
|
|
588
|
+
- `data`: Array of objects or 2D array
|
|
687
589
|
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
590
|
+
**Returns:**
|
|
591
|
+
```javascript
|
|
592
|
+
{
|
|
593
|
+
type: "eda_overview",
|
|
594
|
+
n_observations: 100,
|
|
595
|
+
n_variables: 5,
|
|
596
|
+
variables: [
|
|
597
|
+
{
|
|
598
|
+
name: "age",
|
|
599
|
+
type: "numeric",
|
|
600
|
+
missing: 0,
|
|
601
|
+
unique: 25,
|
|
602
|
+
mean: 35.5,
|
|
603
|
+
std: 12.3
|
|
604
|
+
},
|
|
605
|
+
{
|
|
606
|
+
name: "department",
|
|
607
|
+
type: "categorical",
|
|
608
|
+
missing: 2,
|
|
609
|
+
unique: 4,
|
|
610
|
+
mode: "engineering",
|
|
611
|
+
frequency: 45
|
|
612
|
+
}
|
|
613
|
+
],
|
|
614
|
+
memory_usage: "2.1kb"
|
|
615
|
+
}
|
|
693
616
|
```
|
|
694
617
|
|
|
695
|
-
---
|
|
696
|
-
|
|
697
|
-
## Utility Functions
|
|
698
|
-
|
|
699
|
-
### `df_apply(dataframe, column, function)`
|
|
700
|
-
|
|
701
|
-
Applies a function to transform a column.
|
|
702
|
-
|
|
703
618
|
**Example:**
|
|
704
619
|
```javascript
|
|
705
|
-
const
|
|
706
|
-
{ name: 'Alice', salary: 50000 },
|
|
707
|
-
{ name: 'Bob', salary: 45000 }
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
// Increase all salaries by 10%
|
|
711
|
-
const increased = datly.df_apply(df, 'salary', val => val * 1.1);
|
|
620
|
+
const employees = [
|
|
621
|
+
{ name: 'Alice', age: 30, salary: 50000, department: 'Engineering' },
|
|
622
|
+
{ name: 'Bob', age: 25, salary: 45000, department: 'Sales' },
|
|
623
|
+
{ name: 'Charlie', age: 35, salary: 60000, department: 'Engineering' }
|
|
624
|
+
];
|
|
712
625
|
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
return row.name === 'Alice' ? val * 1.2 : val * 1.1;
|
|
716
|
-
});
|
|
626
|
+
const overview = datly.eda_overview(employees);
|
|
627
|
+
console.log(overview);
|
|
717
628
|
```
|
|
718
629
|
|
|
719
|
-
|
|
630
|
+
### `missing_values(data)`
|
|
720
631
|
|
|
721
|
-
|
|
632
|
+
Analyzes missing values in the dataset.
|
|
722
633
|
|
|
723
|
-
|
|
634
|
+
**Returns:**
|
|
635
|
+
```javascript
|
|
636
|
+
{
|
|
637
|
+
type: "missing_values_analysis",
|
|
638
|
+
total_missing: 15,
|
|
639
|
+
missing_percentage: 7.5,
|
|
640
|
+
variables: [
|
|
641
|
+
{ name: "age", missing: 0, percentage: 0 },
|
|
642
|
+
{ name: "salary", missing: 5, percentage: 25 },
|
|
643
|
+
{ name: "department", missing: 10, percentage: 50 }
|
|
644
|
+
]
|
|
645
|
+
}
|
|
646
|
+
```
|
|
724
647
|
|
|
725
648
|
**Example:**
|
|
726
649
|
```javascript
|
|
727
|
-
const
|
|
728
|
-
{
|
|
729
|
-
{
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
// Add total compensation
|
|
733
|
-
const withTotal = datly.df_add_column(df, 'total_comp',
|
|
734
|
-
row => row.salary + row.bonus
|
|
735
|
-
);
|
|
650
|
+
const data = [
|
|
651
|
+
{ age: 30, salary: 50000, department: 'Engineering' },
|
|
652
|
+
{ age: null, salary: 45000, department: null },
|
|
653
|
+
{ age: 35, salary: null, department: 'Engineering' }
|
|
654
|
+
];
|
|
736
655
|
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
row => row.salary * 0.25
|
|
740
|
-
);
|
|
656
|
+
const missing = datly.missing_values(data);
|
|
657
|
+
console.log(missing);
|
|
741
658
|
```
|
|
742
659
|
|
|
743
|
-
|
|
660
|
+
### `outliers_zscore(array, threshold = 3)`
|
|
744
661
|
|
|
745
|
-
|
|
662
|
+
Detects outliers using Z-score method.
|
|
746
663
|
|
|
747
|
-
|
|
664
|
+
**Parameters:**
|
|
665
|
+
- `array`: Array of numbers
|
|
666
|
+
- `threshold`: Z-score threshold (default: 3)
|
|
748
667
|
|
|
749
|
-
**
|
|
668
|
+
**Returns:**
|
|
750
669
|
```javascript
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
670
|
+
{
|
|
671
|
+
type: "outlier_detection",
|
|
672
|
+
method: "zscore",
|
|
673
|
+
threshold: 3,
|
|
674
|
+
n_outliers: 2,
|
|
675
|
+
outlier_indices: [5, 12],
|
|
676
|
+
outlier_values: [200, 30]
|
|
677
|
+
}
|
|
754
678
|
```
|
|
755
679
|
|
|
756
|
-
---
|
|
757
|
-
|
|
758
|
-
### `df_sample(dataframe, n = 5, seed = null)`
|
|
759
|
-
|
|
760
|
-
Returns a random sample of rows.
|
|
761
|
-
|
|
762
680
|
**Example:**
|
|
763
681
|
```javascript
|
|
764
|
-
const
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
const sample = datly.df_sample(df, 10);
|
|
768
|
-
|
|
769
|
-
// Reproducible with seed
|
|
770
|
-
const reproducible = datly.df_sample(df, 10, 42);
|
|
682
|
+
const data = [10, 12, 14, 15, 16, 200, 18, 19, 20, 21, 22, 23, 30];
|
|
683
|
+
const outliers = datly.outliers_zscore(data, 3);
|
|
684
|
+
console.log(outliers);
|
|
771
685
|
```
|
|
772
686
|
|
|
773
687
|
---
|
|
774
688
|
|
|
775
|
-
|
|
689
|
+
## Probability Distributions
|
|
690
|
+
|
|
691
|
+
### Normal Distribution
|
|
692
|
+
|
|
693
|
+
#### `normal_pdf(x, mean = 0, std = 1)`
|
|
776
694
|
|
|
777
|
-
|
|
695
|
+
Calculates the probability density function of the normal distribution.
|
|
778
696
|
|
|
779
697
|
**Returns:**
|
|
780
|
-
```
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
698
|
+
```javascript
|
|
699
|
+
{
|
|
700
|
+
type: "probability_density",
|
|
701
|
+
distribution: "normal",
|
|
702
|
+
x: 0,
|
|
703
|
+
mean: 0,
|
|
704
|
+
std: 1,
|
|
705
|
+
pdf: 0.399
|
|
706
|
+
}
|
|
784
707
|
```
|
|
785
708
|
|
|
786
709
|
**Example:**
|
|
787
710
|
```javascript
|
|
788
|
-
const
|
|
789
|
-
|
|
790
|
-
// Export to CSV
|
|
791
|
-
const csv = datly.df_to_csv(df);
|
|
792
|
-
|
|
793
|
-
// Custom delimiter
|
|
794
|
-
const tsv = datly.df_to_csv(df, '\t');
|
|
711
|
+
const pdf = datly.normal_pdf(0, 0, 1);
|
|
712
|
+
console.log(pdf.pdf); // 0.399
|
|
795
713
|
```
|
|
796
714
|
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
## Working with Nested Data
|
|
715
|
+
#### `normal_cdf(x, mean = 0, std = 1)`
|
|
800
716
|
|
|
801
|
-
|
|
717
|
+
Calculates the cumulative distribution function.
|
|
802
718
|
|
|
803
|
-
|
|
719
|
+
**Returns:**
|
|
720
|
+
```javascript
|
|
721
|
+
{
|
|
722
|
+
type: "cumulative_probability",
|
|
723
|
+
distribution: "normal",
|
|
724
|
+
x: 0,
|
|
725
|
+
mean: 0,
|
|
726
|
+
std: 1,
|
|
727
|
+
cdf: 0.5
|
|
728
|
+
}
|
|
729
|
+
```
|
|
804
730
|
|
|
805
731
|
**Example:**
|
|
806
732
|
```javascript
|
|
807
|
-
const
|
|
808
|
-
|
|
809
|
-
{ user: 'Bob', order_ids: [4] }
|
|
810
|
-
]);
|
|
811
|
-
|
|
812
|
-
// Explode order_ids
|
|
813
|
-
const exploded = datly.df_explode(df, 'order_ids');
|
|
814
|
-
// Alice appears 3 times (one per order)
|
|
733
|
+
const cdf = datly.normal_cdf(1.96, 0, 1);
|
|
734
|
+
console.log(cdf.cdf); // ~0.975
|
|
815
735
|
```
|
|
816
736
|
|
|
817
|
-
|
|
737
|
+
### Random Sampling
|
|
738
|
+
|
|
739
|
+
#### `random_normal(n, mean = 0, std = 1, seed = null)`
|
|
818
740
|
|
|
819
|
-
|
|
741
|
+
Generates random samples from a normal distribution.
|
|
820
742
|
|
|
821
|
-
|
|
743
|
+
**Parameters:**
|
|
744
|
+
- `n`: Number of samples
|
|
745
|
+
- `mean`: Mean of the distribution
|
|
746
|
+
- `std`: Standard deviation
|
|
747
|
+
- `seed`: Random seed for reproducibility
|
|
822
748
|
|
|
823
749
|
**Returns:**
|
|
824
|
-
```
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
750
|
+
```javascript
|
|
751
|
+
{
|
|
752
|
+
type: "random_sample",
|
|
753
|
+
distribution: "normal",
|
|
754
|
+
n: 100,
|
|
755
|
+
mean: 0,
|
|
756
|
+
std: 1,
|
|
757
|
+
seed: 42,
|
|
758
|
+
sample: [0.674, -0.423, 1.764, ...],
|
|
759
|
+
sample_mean: 0.054,
|
|
760
|
+
sample_std: 0.986
|
|
761
|
+
}
|
|
831
762
|
```
|
|
832
763
|
|
|
833
764
|
**Example:**
|
|
834
765
|
```javascript
|
|
835
|
-
const
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
street: '123 Main St',
|
|
839
|
-
city: 'NYC'
|
|
840
|
-
}
|
|
841
|
-
};
|
|
842
|
-
|
|
843
|
-
const df = datly.df_from_object(user);
|
|
844
|
-
|
|
845
|
-
// Find address columns
|
|
846
|
-
const addressCols = datly.df_find_columns(df, 'address');
|
|
766
|
+
const samples = datly.random_normal(100, 0, 1, 42);
|
|
767
|
+
console.log(samples.sample.length); // 100
|
|
768
|
+
console.log(samples.sample_mean); // ~0.054
|
|
847
769
|
```
|
|
848
770
|
|
|
849
771
|
---
|
|
850
772
|
|
|
851
|
-
##
|
|
773
|
+
## Hypothesis Testing
|
|
774
|
+
|
|
775
|
+
### T-Tests
|
|
852
776
|
|
|
853
|
-
|
|
777
|
+
#### `ttest_1samp(array, popmean)`
|
|
778
|
+
|
|
779
|
+
One-sample t-test.
|
|
854
780
|
|
|
855
|
-
|
|
781
|
+
**Parameters:**
|
|
782
|
+
- `array`: Sample data
|
|
783
|
+
- `popmean`: Population mean to test against
|
|
856
784
|
|
|
857
785
|
**Returns:**
|
|
858
|
-
```
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
786
|
+
```javascript
|
|
787
|
+
{
|
|
788
|
+
type: "hypothesis_test",
|
|
789
|
+
test: "one_sample_ttest",
|
|
790
|
+
n: 20,
|
|
791
|
+
sample_mean: 5.2,
|
|
792
|
+
population_mean: 5.0,
|
|
793
|
+
t_statistic: 1.89,
|
|
794
|
+
p_value: 0.074,
|
|
795
|
+
degrees_of_freedom: 19,
|
|
796
|
+
confidence_interval: [4.87, 5.53],
|
|
797
|
+
conclusion: "fail_to_reject_h0",
|
|
798
|
+
alpha: 0.05
|
|
799
|
+
}
|
|
863
800
|
```
|
|
864
801
|
|
|
865
802
|
**Example:**
|
|
866
803
|
```javascript
|
|
867
|
-
|
|
804
|
+
const sample = [4.8, 5.1, 5.3, 4.9, 5.2, 5.0, 5.4, 4.7, 5.1, 5.0];
|
|
805
|
+
const result = datly.ttest_1samp(sample, 5.0);
|
|
806
|
+
console.log(result.p_value); // 0.074
|
|
807
|
+
console.log(result.conclusion); // "fail_to_reject_h0"
|
|
868
808
|
```
|
|
869
809
|
|
|
870
|
-
|
|
810
|
+
#### `ttest_ind(array1, array2)`
|
|
871
811
|
|
|
872
|
-
|
|
812
|
+
Independent two-sample t-test.
|
|
873
813
|
|
|
874
814
|
**Returns:**
|
|
875
|
-
```
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
815
|
+
```javascript
|
|
816
|
+
{
|
|
817
|
+
type: "hypothesis_test",
|
|
818
|
+
test: "independent_ttest",
|
|
819
|
+
n1: 15,
|
|
820
|
+
n2: 18,
|
|
821
|
+
mean1: 5.2,
|
|
822
|
+
mean2: 4.8,
|
|
823
|
+
t_statistic: 2.45,
|
|
824
|
+
p_value: 0.019,
|
|
825
|
+
degrees_of_freedom: 31,
|
|
826
|
+
confidence_interval: [0.067, 0.733],
|
|
827
|
+
conclusion: "reject_h0",
|
|
828
|
+
alpha: 0.05
|
|
829
|
+
}
|
|
880
830
|
```
|
|
881
831
|
|
|
882
832
|
**Example:**
|
|
883
833
|
```javascript
|
|
884
|
-
|
|
885
|
-
|
|
834
|
+
const group1 = [5.1, 5.3, 4.9, 5.2, 5.0];
|
|
835
|
+
const group2 = [4.8, 4.6, 4.9, 4.7, 4.5];
|
|
836
|
+
const result = datly.ttest_ind(group1, group2);
|
|
837
|
+
console.log(result.p_value < 0.05); // true (significant difference)
|
|
886
838
|
```
|
|
887
839
|
|
|
888
|
-
###
|
|
840
|
+
### ANOVA
|
|
889
841
|
|
|
890
|
-
|
|
842
|
+
#### `anova_oneway(groups)`
|
|
843
|
+
|
|
844
|
+
One-way ANOVA test.
|
|
891
845
|
|
|
892
846
|
**Parameters:**
|
|
893
|
-
- `
|
|
894
|
-
- `sample`: If true, uses sample variance (n-1); if false, uses population variance (n)
|
|
847
|
+
- `groups`: Array of arrays, each representing a group
|
|
895
848
|
|
|
896
849
|
**Returns:**
|
|
897
|
-
```
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
850
|
+
```javascript
|
|
851
|
+
{
|
|
852
|
+
type: "hypothesis_test",
|
|
853
|
+
test: "one_way_anova",
|
|
854
|
+
n_groups: 3,
|
|
855
|
+
total_n: 45,
|
|
856
|
+
f_statistic: 8.76,
|
|
857
|
+
p_value: 0.001,
|
|
858
|
+
between_groups_df: 2,
|
|
859
|
+
within_groups_df: 42,
|
|
860
|
+
total_df: 44,
|
|
861
|
+
between_groups_ss: 125.4,
|
|
862
|
+
within_groups_ss: 301.2,
|
|
863
|
+
total_ss: 426.6,
|
|
864
|
+
conclusion: "reject_h0",
|
|
865
|
+
alpha: 0.05
|
|
866
|
+
}
|
|
903
867
|
```
|
|
904
868
|
|
|
905
869
|
**Example:**
|
|
906
870
|
```javascript
|
|
907
|
-
|
|
908
|
-
|
|
871
|
+
const group1 = [23, 25, 28, 30, 32];
|
|
872
|
+
const group2 = [18, 20, 22, 24, 26];
|
|
873
|
+
const group3 = [15, 17, 19, 21, 23];
|
|
874
|
+
|
|
875
|
+
const result = datly.anova_oneway([group1, group2, group3]);
|
|
876
|
+
console.log(result);
|
|
909
877
|
```
|
|
910
878
|
|
|
911
|
-
###
|
|
879
|
+
### Normality Tests
|
|
880
|
+
|
|
881
|
+
#### `shapiro_wilk(array)`
|
|
912
882
|
|
|
913
|
-
|
|
883
|
+
Shapiro-Wilk test for normality.
|
|
914
884
|
|
|
915
885
|
**Returns:**
|
|
916
886
|
```yaml
|
|
917
|
-
type:
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
887
|
+
type: hypothesis_test
|
|
888
|
+
test: shapiro_wilk
|
|
889
|
+
n: 50
|
|
890
|
+
w_statistic: 0.973
|
|
891
|
+
p_value: 0.284
|
|
892
|
+
conclusion: fail_to_reject_h0
|
|
893
|
+
interpretation: data_appears_normal
|
|
894
|
+
alpha: 0.05
|
|
922
895
|
```
|
|
923
896
|
|
|
924
897
|
**Example:**
|
|
925
898
|
```javascript
|
|
926
|
-
datly.
|
|
899
|
+
const data = datly.random_normal(50, 0, 1, 42);
|
|
900
|
+
const parsedData = JSON.parse(data).sample;
|
|
901
|
+
const result = datly.shapiro_wilk(parsedData);
|
|
902
|
+
console.log(result);
|
|
927
903
|
```
|
|
928
904
|
|
|
929
|
-
|
|
905
|
+
---
|
|
930
906
|
|
|
931
|
-
|
|
907
|
+
## Correlation Analysis
|
|
932
908
|
|
|
933
|
-
|
|
934
|
-
```yaml
|
|
935
|
-
type: statistic
|
|
936
|
-
name: min
|
|
937
|
-
value: 1
|
|
938
|
-
```
|
|
909
|
+
### `correlation(x, y, method = 'pearson')`
|
|
939
910
|
|
|
940
|
-
|
|
911
|
+
Calculates correlation between two variables.
|
|
941
912
|
|
|
942
|
-
|
|
913
|
+
**Parameters:**
|
|
914
|
+
- `x`: First variable array
|
|
915
|
+
- `y`: Second variable array
|
|
916
|
+
- `method`: 'pearson', 'spearman', or 'kendall'
|
|
943
917
|
|
|
944
918
|
**Returns:**
|
|
945
919
|
```yaml
|
|
946
|
-
type:
|
|
947
|
-
|
|
948
|
-
|
|
920
|
+
type: correlation
|
|
921
|
+
method: pearson
|
|
922
|
+
correlation: 0.87
|
|
923
|
+
n: 20
|
|
924
|
+
p_value: 0.001
|
|
925
|
+
confidence_interval:
|
|
926
|
+
- 0.68
|
|
927
|
+
- 0.95
|
|
928
|
+
interpretation: strong_positive
|
|
929
|
+
```
|
|
930
|
+
|
|
931
|
+
**Example:**
|
|
932
|
+
```javascript
|
|
933
|
+
const x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
|
|
934
|
+
const y = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20];
|
|
935
|
+
|
|
936
|
+
const result = datly.correlation(x, y, 'pearson');
|
|
937
|
+
console.log(result);
|
|
949
938
|
```
|
|
950
939
|
|
|
951
|
-
### `
|
|
940
|
+
### `df_corr(dataframe, method = 'pearson')`
|
|
952
941
|
|
|
953
|
-
Calculates
|
|
942
|
+
Calculates correlation matrix for a dataframe.
|
|
954
943
|
|
|
955
944
|
**Returns:**
|
|
956
945
|
```yaml
|
|
957
|
-
type:
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
Calculates the skewness (measure of asymmetry).
|
|
974
|
-
|
|
975
|
-
**Returns:**
|
|
976
|
-
```yaml
|
|
977
|
-
type: statistic
|
|
978
|
-
name: skewness
|
|
979
|
-
value: 0
|
|
980
|
-
```
|
|
981
|
-
|
|
982
|
-
**Example:**
|
|
983
|
-
```javascript
|
|
984
|
-
datly.skewness([1, 2, 3, 4, 5]); // ~0 for symmetric data
|
|
985
|
-
```
|
|
986
|
-
|
|
987
|
-
### `kurtosis(array)`
|
|
988
|
-
|
|
989
|
-
Calculates the kurtosis (measure of tailedness).
|
|
990
|
-
|
|
991
|
-
**Returns:**
|
|
992
|
-
```yaml
|
|
993
|
-
type: statistic
|
|
994
|
-
name: kurtosis
|
|
995
|
-
value: -1.2
|
|
996
|
-
```
|
|
997
|
-
|
|
998
|
-
**Example:**
|
|
999
|
-
```javascript
|
|
1000
|
-
datly.kurtosis([1, 2, 3, 4, 5]);
|
|
1001
|
-
```
|
|
1002
|
-
|
|
1003
|
-
---
|
|
1004
|
-
|
|
1005
|
-
## Exploratory Data Analysis
|
|
1006
|
-
|
|
1007
|
-
### `df_describe(data)`
|
|
1008
|
-
|
|
1009
|
-
Generates comprehensive descriptive statistics for a dataset.
|
|
1010
|
-
|
|
1011
|
-
**Returns:**
|
|
1012
|
-
```yaml
|
|
1013
|
-
type: describe
|
|
1014
|
-
columns:
|
|
1015
|
-
age:
|
|
1016
|
-
dtype: number
|
|
1017
|
-
count: 100
|
|
1018
|
-
missing: 0
|
|
1019
|
-
mean: 35.5
|
|
1020
|
-
std: 10.2
|
|
1021
|
-
min: 18
|
|
1022
|
-
q1: 28
|
|
1023
|
-
median: 35
|
|
1024
|
-
q3: 43
|
|
1025
|
-
max: 65
|
|
1026
|
-
skewness: 0.15
|
|
1027
|
-
kurtosis: -0.5
|
|
1028
|
-
name:
|
|
1029
|
-
dtype: string
|
|
1030
|
-
count: 100
|
|
1031
|
-
missing: 2
|
|
1032
|
-
unique: 95
|
|
1033
|
-
top:
|
|
1034
|
-
- value: john
|
|
1035
|
-
freq: 3
|
|
1036
|
-
- value: alice
|
|
1037
|
-
freq: 2
|
|
1038
|
-
```
|
|
1039
|
-
|
|
1040
|
-
**Example:**
|
|
1041
|
-
```javascript
|
|
1042
|
-
const data = [
|
|
1043
|
-
{ age: 25, salary: 50000, dept: 'IT' },
|
|
1044
|
-
{ age: 30, salary: 60000, dept: 'HR' },
|
|
1045
|
-
{ age: 35, salary: 70000, dept: 'IT' }
|
|
1046
|
-
];
|
|
1047
|
-
|
|
1048
|
-
const description = datly.df_describe(data);
|
|
1049
|
-
console.log(description);
|
|
1050
|
-
```
|
|
1051
|
-
|
|
1052
|
-
### `df_missing_report(data)`
|
|
1053
|
-
|
|
1054
|
-
Analyzes missing values in the dataset.
|
|
1055
|
-
|
|
1056
|
-
**Returns:**
|
|
1057
|
-
```yaml
|
|
1058
|
-
type: missing_report
|
|
1059
|
-
rows:
|
|
1060
|
-
- column: age
|
|
1061
|
-
missing: 5
|
|
1062
|
-
missing_rate: 0.05
|
|
1063
|
-
- column: salary
|
|
1064
|
-
missing: 0
|
|
1065
|
-
missing_rate: 0
|
|
1066
|
-
- column: name
|
|
1067
|
-
missing: 10
|
|
1068
|
-
missing_rate: 0.1
|
|
1069
|
-
```
|
|
1070
|
-
|
|
1071
|
-
**Example:**
|
|
1072
|
-
```javascript
|
|
1073
|
-
const report = datly.df_missing_report(data);
|
|
1074
|
-
```
|
|
1075
|
-
|
|
1076
|
-
### `df_corr(data, method = 'pearson')`
|
|
1077
|
-
|
|
1078
|
-
Calculates correlation matrix between numeric columns.
|
|
1079
|
-
|
|
1080
|
-
**Parameters:**
|
|
1081
|
-
- `data`: Array of objects
|
|
1082
|
-
- `method`: 'pearson' or 'spearman'
|
|
1083
|
-
|
|
1084
|
-
**Returns:**
|
|
1085
|
-
```yaml
|
|
1086
|
-
type: correlation_matrix
|
|
1087
|
-
method: pearson
|
|
1088
|
-
matrix:
|
|
1089
|
-
age:
|
|
1090
|
-
age: 1
|
|
1091
|
-
salary: 0.85
|
|
1092
|
-
experience: 0.92
|
|
1093
|
-
salary:
|
|
1094
|
-
age: 0.85
|
|
1095
|
-
salary: 1
|
|
1096
|
-
experience: 0.78
|
|
1097
|
-
experience:
|
|
1098
|
-
age: 0.92
|
|
1099
|
-
salary: 0.78
|
|
1100
|
-
experience: 1
|
|
1101
|
-
```
|
|
1102
|
-
|
|
1103
|
-
**Example:**
|
|
1104
|
-
```javascript
|
|
1105
|
-
const corr = datly.df_corr(data, 'pearson');
|
|
1106
|
-
const spearman = datly.df_corr(data, 'spearman');
|
|
1107
|
-
```
|
|
1108
|
-
|
|
1109
|
-
### `eda_overview(data)`
|
|
1110
|
-
|
|
1111
|
-
Generates a comprehensive EDA report combining describe, missing values, and correlation.
|
|
1112
|
-
|
|
1113
|
-
**Returns:**
|
|
1114
|
-
```yaml
|
|
1115
|
-
type: eda
|
|
1116
|
-
summary:
|
|
1117
|
-
age:
|
|
1118
|
-
dtype: number
|
|
1119
|
-
count: 100
|
|
1120
|
-
mean: 35.5
|
|
1121
|
-
std: 10.2
|
|
1122
|
-
...
|
|
1123
|
-
missing:
|
|
1124
|
-
- column: age
|
|
1125
|
-
missing: 5
|
|
1126
|
-
missing_rate: 0.05
|
|
1127
|
-
correlation:
|
|
1128
|
-
age:
|
|
1129
|
-
age: 1
|
|
1130
|
-
salary: 0.85
|
|
1131
|
-
```
|
|
1132
|
-
|
|
1133
|
-
**Example:**
|
|
1134
|
-
```javascript
|
|
1135
|
-
const overview = datly.eda_overview(data);
|
|
1136
|
-
```
|
|
1137
|
-
|
|
1138
|
-
---
|
|
1139
|
-
|
|
1140
|
-
## Probability Distributions
|
|
1141
|
-
|
|
1142
|
-
### Normal Distribution
|
|
1143
|
-
|
|
1144
|
-
#### `normal_pdf(x, mu = 0, sigma = 1)`
|
|
1145
|
-
|
|
1146
|
-
Probability density function of normal distribution.
|
|
1147
|
-
|
|
1148
|
-
**Returns:**
|
|
1149
|
-
```yaml
|
|
1150
|
-
type: distribution
|
|
1151
|
-
name: normal_pdf
|
|
1152
|
-
params:
|
|
1153
|
-
mu: 0
|
|
1154
|
-
sigma: 1
|
|
1155
|
-
value: 0.3989422804014327
|
|
1156
|
-
```
|
|
1157
|
-
|
|
1158
|
-
**Example:**
|
|
1159
|
-
```javascript
|
|
1160
|
-
datly.normal_pdf(0); // PDF at x=0
|
|
1161
|
-
datly.normal_pdf([0, 1, 2], 0, 1); // PDF for multiple values
|
|
1162
|
-
```
|
|
1163
|
-
|
|
1164
|
-
#### `normal_cdf(x, mu = 0, sigma = 1)`
|
|
1165
|
-
|
|
1166
|
-
Cumulative distribution function of normal distribution.
|
|
1167
|
-
|
|
1168
|
-
**Returns:**
|
|
1169
|
-
```yaml
|
|
1170
|
-
type: distribution
|
|
1171
|
-
name: normal_cdf
|
|
1172
|
-
params:
|
|
1173
|
-
mu: 0
|
|
1174
|
-
sigma: 1
|
|
1175
|
-
value: 0.5
|
|
1176
|
-
```
|
|
1177
|
-
|
|
1178
|
-
**Example:**
|
|
1179
|
-
```javascript
|
|
1180
|
-
datly.normal_cdf(0); // P(X ≤ 0)
|
|
1181
|
-
datly.normal_cdf(1.96); // P(X ≤ 1.96) ≈ 0.975
|
|
1182
|
-
```
|
|
1183
|
-
|
|
1184
|
-
#### `normal_ppf(p, mu = 0, sigma = 1)`
|
|
1185
|
-
|
|
1186
|
-
Percent point function (inverse CDF) of normal distribution.
|
|
1187
|
-
|
|
1188
|
-
**Returns:**
|
|
1189
|
-
```yaml
|
|
1190
|
-
type: distribution
|
|
1191
|
-
name: normal_ppf
|
|
1192
|
-
params:
|
|
1193
|
-
mu: 0
|
|
1194
|
-
sigma: 1
|
|
1195
|
-
value: 1.959963984540054
|
|
1196
|
-
```
|
|
1197
|
-
|
|
1198
|
-
**Example:**
|
|
1199
|
-
```javascript
|
|
1200
|
-
datly.normal_ppf(0.975); // Returns ~1.96
|
|
1201
|
-
```
|
|
1202
|
-
|
|
1203
|
-
### Binomial Distribution
|
|
1204
|
-
|
|
1205
|
-
#### `binomial_pmf(k, n, p)`
|
|
1206
|
-
|
|
1207
|
-
Probability mass function of binomial distribution.
|
|
1208
|
-
|
|
1209
|
-
**Parameters:**
|
|
1210
|
-
- `k`: Number of successes (can be array)
|
|
1211
|
-
- `n`: Number of trials
|
|
1212
|
-
- `p`: Probability of success
|
|
1213
|
-
|
|
1214
|
-
**Returns:**
|
|
1215
|
-
```yaml
|
|
1216
|
-
type: distribution
|
|
1217
|
-
name: binomial_pmf
|
|
1218
|
-
params:
|
|
1219
|
-
n: 10
|
|
1220
|
-
p: 0.5
|
|
1221
|
-
value: 0.24609375
|
|
1222
|
-
```
|
|
1223
|
-
|
|
1224
|
-
**Example:**
|
|
1225
|
-
```javascript
|
|
1226
|
-
datly.binomial_pmf(5, 10, 0.5); // P(X = 5)
|
|
1227
|
-
datly.binomial_pmf([0, 1, 2, 3], 10, 0.3); // Multiple values
|
|
1228
|
-
```
|
|
1229
|
-
|
|
1230
|
-
#### `binomial_cdf(k, n, p)`
|
|
1231
|
-
|
|
1232
|
-
Cumulative distribution function of binomial distribution.
|
|
1233
|
-
|
|
1234
|
-
**Returns:**
|
|
1235
|
-
```yaml
|
|
1236
|
-
type: distribution
|
|
1237
|
-
name: binomial_cdf
|
|
1238
|
-
params:
|
|
1239
|
-
n: 10
|
|
1240
|
-
p: 0.5
|
|
1241
|
-
value: 0.623046875
|
|
1242
|
-
```
|
|
1243
|
-
|
|
1244
|
-
### Poisson Distribution
|
|
1245
|
-
|
|
1246
|
-
#### `poisson_pmf(k, lambda)`
|
|
1247
|
-
|
|
1248
|
-
Probability mass function of Poisson distribution.
|
|
1249
|
-
|
|
1250
|
-
**Returns:**
|
|
1251
|
-
```yaml
|
|
1252
|
-
type: distribution
|
|
1253
|
-
name: poisson_pmf
|
|
1254
|
-
params:
|
|
1255
|
-
lambda: 3
|
|
1256
|
-
value: 0.22404180765538775
|
|
1257
|
-
```
|
|
1258
|
-
|
|
1259
|
-
**Example:**
|
|
1260
|
-
```javascript
|
|
1261
|
-
datly.poisson_pmf(3, 3); // P(X = 3) when λ = 3
|
|
1262
|
-
```
|
|
1263
|
-
|
|
1264
|
-
#### `poisson_cdf(k, lambda)`
|
|
1265
|
-
|
|
1266
|
-
Cumulative distribution function of Poisson distribution.
|
|
1267
|
-
|
|
1268
|
-
**Returns:**
|
|
1269
|
-
```yaml
|
|
1270
|
-
type: distribution
|
|
1271
|
-
name: poisson_cdf
|
|
1272
|
-
params:
|
|
1273
|
-
lambda: 3
|
|
1274
|
-
value: 0.6472319374260858
|
|
1275
|
-
```
|
|
1276
|
-
|
|
1277
|
-
---
|
|
1278
|
-
|
|
1279
|
-
## Hypothesis Testing
|
|
1280
|
-
|
|
1281
|
-
### `t_test_one_sample(array, hypothesized_mean)`
|
|
1282
|
-
|
|
1283
|
-
One-sample t-test.
|
|
1284
|
-
|
|
1285
|
-
**Returns:**
|
|
1286
|
-
```yaml
|
|
1287
|
-
type: hypothesis_test
|
|
1288
|
-
name: one_sample_t_test
|
|
1289
|
-
statistic: 2.345
|
|
1290
|
-
df: 99
|
|
1291
|
-
p_value: 0.021
|
|
1292
|
-
mean: 105
|
|
1293
|
-
hypothesized_mean: 100
|
|
1294
|
-
```
|
|
1295
|
-
|
|
1296
|
-
**Example:**
|
|
1297
|
-
```javascript
|
|
1298
|
-
const data = [102, 98, 105, 110, 95, 100, 108];
|
|
1299
|
-
datly.t_test_one_sample(data, 100);
|
|
1300
|
-
```
|
|
1301
|
-
|
|
1302
|
-
### `t_test_paired(array1, array2)`
|
|
1303
|
-
|
|
1304
|
-
Paired samples t-test.
|
|
1305
|
-
|
|
1306
|
-
**Returns:**
|
|
1307
|
-
```yaml
|
|
1308
|
-
type: hypothesis_test
|
|
1309
|
-
name: paired_t_test
|
|
1310
|
-
statistic: 3.456
|
|
1311
|
-
df: 29
|
|
1312
|
-
p_value: 0.0018
|
|
1313
|
-
mean_difference: 2.5
|
|
1314
|
-
```
|
|
1315
|
-
|
|
1316
|
-
**Example:**
|
|
1317
|
-
```javascript
|
|
1318
|
-
const before = [120, 115, 130, 125, 140];
|
|
1319
|
-
const after = [115, 110, 125, 120, 135];
|
|
1320
|
-
datly.t_test_paired(before, after);
|
|
1321
|
-
```
|
|
1322
|
-
|
|
1323
|
-
### `t_test_independent(array1, array2, equal_var = true)`
|
|
1324
|
-
|
|
1325
|
-
Independent samples t-test.
|
|
1326
|
-
|
|
1327
|
-
**Parameters:**
|
|
1328
|
-
- `equal_var`: If true, assumes equal variances (pooled t-test); if false, uses Welch's t-test
|
|
1329
|
-
|
|
1330
|
-
**Returns:**
|
|
1331
|
-
```yaml
|
|
1332
|
-
type: hypothesis_test
|
|
1333
|
-
name: independent_t_test
|
|
1334
|
-
statistic: 2.105
|
|
1335
|
-
df: 48
|
|
1336
|
-
p_value: 0.041
|
|
1337
|
-
means:
|
|
1338
|
-
group_a: 105.5
|
|
1339
|
-
group_b: 98.3
|
|
1340
|
-
```
|
|
1341
|
-
|
|
1342
|
-
**Example:**
|
|
1343
|
-
```javascript
|
|
1344
|
-
const group1 = [100, 105, 110, 115, 120];
|
|
1345
|
-
const group2 = [95, 98, 100, 102, 105];
|
|
1346
|
-
datly.t_test_independent(group1, group2);
|
|
1347
|
-
```
|
|
1348
|
-
|
|
1349
|
-
### `z_test_one_sample(array, mu = 0, sigma = null, alpha = 0.05)`
|
|
1350
|
-
|
|
1351
|
-
One-sample z-test with confidence interval.
|
|
1352
|
-
|
|
1353
|
-
**Returns:**
|
|
1354
|
-
```yaml
|
|
1355
|
-
type: hypothesis_test
|
|
1356
|
-
name: one_sample_z_test
|
|
1357
|
-
statistic: 2.345
|
|
1358
|
-
p_value: 0.019
|
|
1359
|
-
ci_lower: 102.5
|
|
1360
|
-
ci_upper: 107.5
|
|
1361
|
-
confidence: 0.95
|
|
1362
|
-
extra:
|
|
1363
|
-
sample_mean: 105
|
|
1364
|
-
hypothesized_mean: 100
|
|
1365
|
-
se: 2.13
|
|
1366
|
-
sigma_used: 10
|
|
1367
|
-
n: 22
|
|
1368
|
-
effect_size: 0.5
|
|
1369
|
-
```
|
|
1370
|
-
|
|
1371
|
-
**Example:**
|
|
1372
|
-
```javascript
|
|
1373
|
-
datly.z_test_one_sample([102, 98, 105, 110], 100, 5, 0.05);
|
|
1374
|
-
```
|
|
1375
|
-
|
|
1376
|
-
### `anova_oneway(groups, alpha = 0.05)`
|
|
1377
|
-
|
|
1378
|
-
One-way ANOVA test.
|
|
1379
|
-
|
|
1380
|
-
**Parameters:**
|
|
1381
|
-
- `groups`: Array of arrays, each representing a group
|
|
1382
|
-
|
|
1383
|
-
**Returns:**
|
|
1384
|
-
```yaml
|
|
1385
|
-
type: hypothesis_test
|
|
1386
|
-
name: anova_oneway
|
|
1387
|
-
statistic: 5.678
|
|
1388
|
-
df:
|
|
1389
|
-
between: 2
|
|
1390
|
-
within: 27
|
|
1391
|
-
p_value: 0.009
|
|
1392
|
-
confidence: 0.95
|
|
1393
|
-
extra:
|
|
1394
|
-
group_means:
|
|
1395
|
-
- 102.5
|
|
1396
|
-
- 108.3
|
|
1397
|
-
- 115.7
|
|
1398
|
-
grand_mean: 108.8
|
|
1399
|
-
ssb: 450.5
|
|
1400
|
-
ssw: 890.2
|
|
1401
|
-
```
|
|
1402
|
-
|
|
1403
|
-
**Example:**
|
|
1404
|
-
```javascript
|
|
1405
|
-
const group1 = [100, 105, 110];
|
|
1406
|
-
const group2 = [108, 112, 115];
|
|
1407
|
-
const group3 = [115, 120, 125];
|
|
1408
|
-
datly.anova_oneway([group1, group2, group3]);
|
|
1409
|
-
```
|
|
1410
|
-
|
|
1411
|
-
### `chi_square_independence(observed, alpha = 0.05)`
|
|
1412
|
-
|
|
1413
|
-
Chi-square test for independence (contingency table).
|
|
1414
|
-
|
|
1415
|
-
**Parameters:**
|
|
1416
|
-
- `observed`: 2D array (contingency table)
|
|
1417
|
-
|
|
1418
|
-
**Returns:**
|
|
1419
|
-
```yaml
|
|
1420
|
-
type: hypothesis_test
|
|
1421
|
-
name: chi_square_independence
|
|
1422
|
-
statistic: 8.456
|
|
1423
|
-
df: 2
|
|
1424
|
-
p_value: 0.015
|
|
1425
|
-
confidence: 0.95
|
|
1426
|
-
extra:
|
|
1427
|
-
observed:
|
|
1428
|
-
- - 10
|
|
1429
|
-
- 20
|
|
1430
|
-
- 30
|
|
1431
|
-
- - 15
|
|
1432
|
-
- 25
|
|
1433
|
-
- 35
|
|
1434
|
-
expected:
|
|
1435
|
-
- - 12.5
|
|
1436
|
-
- 22.5
|
|
1437
|
-
- 32.5
|
|
1438
|
-
- - 12.5
|
|
1439
|
-
- 22.5
|
|
1440
|
-
- 32.5
|
|
1441
|
-
dof: 2
|
|
1442
|
-
```
|
|
1443
|
-
|
|
1444
|
-
**Example:**
|
|
1445
|
-
```javascript
|
|
1446
|
-
const table = [
|
|
1447
|
-
[10, 20, 30],
|
|
1448
|
-
[15, 25, 35]
|
|
1449
|
-
];
|
|
1450
|
-
datly.chi_square_independence(table);
|
|
1451
|
-
```
|
|
1452
|
-
|
|
1453
|
-
### `chi_square_goodness(observed, expected, alpha = 0.05)`
|
|
1454
|
-
|
|
1455
|
-
Chi-square goodness of fit test.
|
|
1456
|
-
|
|
1457
|
-
**Returns:**
|
|
1458
|
-
```yaml
|
|
1459
|
-
type: hypothesis_test
|
|
1460
|
-
name: chi_square_goodness_of_fit
|
|
1461
|
-
statistic: 3.456
|
|
1462
|
-
df: 3
|
|
1463
|
-
p_value: 0.327
|
|
1464
|
-
confidence: 0.95
|
|
1465
|
-
extra:
|
|
1466
|
-
observed:
|
|
1467
|
-
- 45
|
|
1468
|
-
- 55
|
|
1469
|
-
- 48
|
|
1470
|
-
- 52
|
|
1471
|
-
expected:
|
|
1472
|
-
- 50
|
|
1473
|
-
- 50
|
|
1474
|
-
- 50
|
|
1475
|
-
- 50
|
|
1476
|
-
dof: 3
|
|
1477
|
-
```
|
|
1478
|
-
|
|
1479
|
-
**Example:**
|
|
1480
|
-
```javascript
|
|
1481
|
-
const observed = [45, 55, 48, 52];
|
|
1482
|
-
const expected = [50, 50, 50, 50];
|
|
1483
|
-
datly.chi_square_goodness(observed, expected);
|
|
1484
|
-
```
|
|
1485
|
-
|
|
1486
|
-
### `shapiro_wilk(array)`
|
|
1487
|
-
|
|
1488
|
-
Shapiro-Wilk test for normality.
|
|
1489
|
-
|
|
1490
|
-
**Returns:**
|
|
1491
|
-
```yaml
|
|
1492
|
-
type: hypothesis_test
|
|
1493
|
-
name: shapiro_wilk
|
|
1494
|
-
statistic: 0.987
|
|
1495
|
-
n: 50
|
|
1496
|
-
note: approximation; w > 0.9 suggests normality
|
|
1497
|
-
```
|
|
1498
|
-
|
|
1499
|
-
**Example:**
|
|
1500
|
-
```javascript
|
|
1501
|
-
datly.shapiro_wilk([1.2, 2.3, 1.8, 2.1, 1.9, 2.0]);
|
|
1502
|
-
```
|
|
1503
|
-
|
|
1504
|
-
### `jarque_bera(array)`
|
|
1505
|
-
|
|
1506
|
-
Jarque-Bera test for normality.
|
|
1507
|
-
|
|
1508
|
-
**Returns:**
|
|
1509
|
-
```yaml
|
|
1510
|
-
type: hypothesis_test
|
|
1511
|
-
name: jarque_bera
|
|
1512
|
-
statistic: 2.345
|
|
1513
|
-
n: 100
|
|
1514
|
-
df: 2
|
|
1515
|
-
note: tests normality; low p-value rejects normality
|
|
1516
|
-
```
|
|
1517
|
-
|
|
1518
|
-
### `levene_test(groups)`
|
|
1519
|
-
|
|
1520
|
-
Levene's test for homogeneity of variance.
|
|
1521
|
-
|
|
1522
|
-
**Returns:**
|
|
1523
|
-
```yaml
|
|
1524
|
-
type: hypothesis_test
|
|
1525
|
-
name: levene_test
|
|
1526
|
-
statistic: 1.234
|
|
1527
|
-
df_between: 2
|
|
1528
|
-
df_within: 27
|
|
1529
|
-
note: tests homogeneity of variance
|
|
1530
|
-
```
|
|
1531
|
-
|
|
1532
|
-
**Example:**
|
|
1533
|
-
```javascript
|
|
1534
|
-
const g1 = [1, 2, 3, 4, 5];
|
|
1535
|
-
const g2 = [2, 3, 4, 5, 6];
|
|
1536
|
-
const g3 = [3, 4, 5, 6, 7];
|
|
1537
|
-
datly.levene_test([g1, g2, g3]);
|
|
1538
|
-
```
|
|
1539
|
-
|
|
1540
|
-
### `kruskal_wallis(groups)`
|
|
1541
|
-
|
|
1542
|
-
Kruskal-Wallis H-test (non-parametric alternative to ANOVA).
|
|
1543
|
-
|
|
1544
|
-
**Returns:**
|
|
1545
|
-
```yaml
|
|
1546
|
-
type: hypothesis_test
|
|
1547
|
-
name: kruskal_wallis
|
|
1548
|
-
statistic: 8.765
|
|
1549
|
-
df: 2
|
|
1550
|
-
note: non-parametric alternative to anova
|
|
1551
|
-
```
|
|
1552
|
-
|
|
1553
|
-
### `mann_whitney(array1, array2)`
|
|
1554
|
-
|
|
1555
|
-
Mann-Whitney U test (non-parametric alternative to t-test).
|
|
1556
|
-
|
|
1557
|
-
**Returns:**
|
|
1558
|
-
```yaml
|
|
1559
|
-
type: hypothesis_test
|
|
1560
|
-
name: mann_whitney_u
|
|
1561
|
-
statistic: 45
|
|
1562
|
-
z_score: -1.234
|
|
1563
|
-
p_value: 0.217
|
|
1564
|
-
note: non-parametric alternative to t-test
|
|
1565
|
-
```
|
|
1566
|
-
|
|
1567
|
-
### `wilcoxon_signed_rank(array1, array2)`
|
|
1568
|
-
|
|
1569
|
-
Wilcoxon signed-rank test (non-parametric paired test).
|
|
1570
|
-
|
|
1571
|
-
**Returns:**
|
|
1572
|
-
```yaml
|
|
1573
|
-
type: hypothesis_test
|
|
1574
|
-
name: wilcoxon_signed_rank
|
|
1575
|
-
statistic: 28
|
|
1576
|
-
z_score: 1.567
|
|
1577
|
-
p_value: 0.117
|
|
1578
|
-
n: 20
|
|
1579
|
-
```
|
|
1580
|
-
|
|
1581
|
-
### Confidence Intervals
|
|
1582
|
-
|
|
1583
|
-
#### `confidence_interval_mean(array, confidence = 0.95)`
|
|
1584
|
-
|
|
1585
|
-
Confidence interval for the mean.
|
|
1586
|
-
|
|
1587
|
-
**Returns:**
|
|
1588
|
-
```yaml
|
|
1589
|
-
type: confidence_interval
|
|
1590
|
-
parameter: mean
|
|
1591
|
-
confidence: 0.95
|
|
1592
|
-
n: 50
|
|
1593
|
-
mean: 102.5
|
|
1594
|
-
lower: 98.3
|
|
1595
|
-
upper: 106.7
|
|
1596
|
-
margin: 4.2
|
|
1597
|
-
```
|
|
1598
|
-
|
|
1599
|
-
#### `confidence_interval_proportion(successes, n, confidence = 0.95)`
|
|
1600
|
-
|
|
1601
|
-
Confidence interval for a proportion.
|
|
1602
|
-
|
|
1603
|
-
**Returns:**
|
|
1604
|
-
```yaml
|
|
1605
|
-
type: confidence_interval
|
|
1606
|
-
parameter: proportion
|
|
1607
|
-
confidence: 0.95
|
|
1608
|
-
n: 100
|
|
1609
|
-
proportion: 0.65
|
|
1610
|
-
lower: 0.551
|
|
1611
|
-
upper: 0.749
|
|
1612
|
-
margin: 0.099
|
|
1613
|
-
```
|
|
1614
|
-
|
|
1615
|
-
#### `confidence_interval_variance(array, confidence = 0.95)`
|
|
1616
|
-
|
|
1617
|
-
Confidence interval for variance.
|
|
1618
|
-
|
|
1619
|
-
**Returns:**
|
|
1620
|
-
```yaml
|
|
1621
|
-
type: confidence_interval
|
|
1622
|
-
parameter: variance
|
|
1623
|
-
confidence: 0.95
|
|
1624
|
-
n: 30
|
|
1625
|
-
variance: 25.5
|
|
1626
|
-
lower: 18.2
|
|
1627
|
-
upper: 38.7
|
|
1628
|
-
```
|
|
1629
|
-
|
|
1630
|
-
#### `confidence_interval_difference(array1, array2, confidence = 0.95)`
|
|
1631
|
-
|
|
1632
|
-
Confidence interval for difference of means.
|
|
1633
|
-
|
|
1634
|
-
**Returns:**
|
|
1635
|
-
```yaml
|
|
1636
|
-
type: confidence_interval
|
|
1637
|
-
parameter: difference_of_means
|
|
1638
|
-
confidence: 0.95
|
|
1639
|
-
difference: 5.5
|
|
1640
|
-
lower: 2.3
|
|
1641
|
-
upper: 8.7
|
|
1642
|
-
margin: 3.2
|
|
1643
|
-
means:
|
|
1644
|
-
group_a: 105.5
|
|
1645
|
-
group_b: 100
|
|
1646
|
-
```
|
|
1647
|
-
|
|
1648
|
-
---
|
|
1649
|
-
|
|
1650
|
-
## Correlation Analysis
|
|
1651
|
-
|
|
1652
|
-
### `corr_pearson(array1, array2)`
|
|
1653
|
-
|
|
1654
|
-
Pearson correlation coefficient.
|
|
1655
|
-
|
|
1656
|
-
**Returns:**
|
|
1657
|
-
```yaml
|
|
1658
|
-
type: statistic
|
|
1659
|
-
name: pearson_correlation
|
|
1660
|
-
value: 0.856
|
|
1661
|
-
```
|
|
1662
|
-
|
|
1663
|
-
**Example:**
|
|
1664
|
-
```javascript
|
|
1665
|
-
const x = [1, 2, 3, 4, 5];
|
|
1666
|
-
const y = [2, 4, 5, 4, 5];
|
|
1667
|
-
datly.corr_pearson(x, y);
|
|
1668
|
-
```
|
|
1669
|
-
|
|
1670
|
-
### `corr_spearman(array1, array2)`
|
|
1671
|
-
|
|
1672
|
-
Spearman rank correlation coefficient.
|
|
1673
|
-
|
|
1674
|
-
**Returns:**
|
|
1675
|
-
```yaml
|
|
1676
|
-
type: statistic
|
|
1677
|
-
name: spearman_correlation
|
|
1678
|
-
value: 0.9
|
|
1679
|
-
```
|
|
1680
|
-
|
|
1681
|
-
### `corr_kendall(array1, array2)`
|
|
1682
|
-
|
|
1683
|
-
Kendall's tau correlation coefficient.
|
|
1684
|
-
|
|
1685
|
-
**Returns:**
|
|
1686
|
-
```yaml
|
|
1687
|
-
type: statistic
|
|
1688
|
-
name: kendall_tau
|
|
1689
|
-
value: 0.8
|
|
1690
|
-
concordant: 8
|
|
1691
|
-
discordant: 2
|
|
1692
|
-
n: 5
|
|
1693
|
-
```
|
|
1694
|
-
|
|
1695
|
-
### `corr_partial(array1, array2, array3)`
|
|
1696
|
-
|
|
1697
|
-
Partial correlation controlling for a third variable.
|
|
1698
|
-
|
|
1699
|
-
**Returns:**
|
|
1700
|
-
```yaml
|
|
1701
|
-
type: statistic
|
|
1702
|
-
name: partial_correlation
|
|
1703
|
-
value: 0.456
|
|
1704
|
-
controlling_for: third_variable
|
|
1705
|
-
```
|
|
1706
|
-
|
|
1707
|
-
### `corr_matrix_all(data)`
|
|
1708
|
-
|
|
1709
|
-
Comprehensive correlation matrix with Pearson, Spearman, and Kendall.
|
|
1710
|
-
|
|
1711
|
-
**Returns:**
|
|
1712
|
-
```yaml
|
|
1713
|
-
type: correlation_analysis
|
|
1714
|
-
pearson:
|
|
1715
|
-
age:
|
|
1716
|
-
age: 1
|
|
1717
|
-
salary: 0.85
|
|
1718
|
-
salary:
|
|
1719
|
-
age: 0.85
|
|
1720
|
-
salary: 1
|
|
1721
|
-
spearman:
|
|
1722
|
-
age:
|
|
1723
|
-
age: 1
|
|
1724
|
-
salary: 0.82
|
|
1725
|
-
salary:
|
|
1726
|
-
age: 0.82
|
|
1727
|
-
salary: 1
|
|
1728
|
-
kendall:
|
|
1729
|
-
age:
|
|
1730
|
-
age: 1
|
|
1731
|
-
salary: 0.75
|
|
1732
|
-
salary:
|
|
1733
|
-
age: 0.75
|
|
1734
|
-
salary: 1
|
|
1735
|
-
```
|
|
1736
|
-
|
|
1737
|
-
---
|
|
1738
|
-
|
|
1739
|
-
## Regression Models
|
|
1740
|
-
|
|
1741
|
-
### Linear Regression
|
|
1742
|
-
|
|
1743
|
-
#### `train_linear_regression(X, y)`
|
|
1744
|
-
|
|
1745
|
-
Trains a multiple linear regression model.
|
|
1746
|
-
|
|
1747
|
-
**Parameters:**
|
|
1748
|
-
- `X`: 2D array of features [[x1, x2, ...], ...]
|
|
1749
|
-
- `y`: Array of target values
|
|
1750
|
-
|
|
1751
|
-
**Returns:**
|
|
1752
|
-
```yaml
|
|
1753
|
-
type: linear_regression
|
|
1754
|
-
weights:
|
|
1755
|
-
- 2.5
|
|
1756
|
-
- 1.8
|
|
1757
|
-
- -0.3
|
|
1758
|
-
mse: 12.34
|
|
1759
|
-
r2: 0.856
|
|
1760
|
-
n: 100
|
|
1761
|
-
p: 2
|
|
1762
|
-
```
|
|
1763
|
-
|
|
1764
|
-
**Example:**
|
|
1765
|
-
```javascript
|
|
1766
|
-
const X = [[1, 2], [2, 3], [3, 4], [4, 5]];
|
|
1767
|
-
const y = [3, 5, 7, 9];
|
|
1768
|
-
const model = datly.train_linear_regression(X, y);
|
|
1769
|
-
```
|
|
1770
|
-
|
|
1771
|
-
#### `predict_linear(model, X)`
|
|
1772
|
-
|
|
1773
|
-
Makes predictions using a trained linear regression model.
|
|
1774
|
-
|
|
1775
|
-
**Parameters:**
|
|
1776
|
-
- `model`: Model text/object from `train_linear_regression`
|
|
1777
|
-
- `X`: 2D array of features
|
|
1778
|
-
|
|
1779
|
-
**Returns:**
|
|
1780
|
-
```yaml
|
|
1781
|
-
type: prediction
|
|
1782
|
-
name: linear_regression
|
|
1783
|
-
predictions:
|
|
1784
|
-
- 105.3
|
|
1785
|
-
- 110.7
|
|
1786
|
-
- 98.2
|
|
1787
|
-
```
|
|
1788
|
-
|
|
1789
|
-
**Example:**
|
|
1790
|
-
```javascript
|
|
1791
|
-
const predictions = datly.predict_linear(model, [[5, 6], [6, 7]]);
|
|
1792
|
-
```
|
|
1793
|
-
|
|
1794
|
-
### Logistic Regression
|
|
1795
|
-
|
|
1796
|
-
#### `train_logistic_regression(X, y, options = {})`
|
|
1797
|
-
|
|
1798
|
-
Trains a logistic regression model for binary classification.
|
|
1799
|
-
|
|
1800
|
-
**Parameters:**
|
|
1801
|
-
- `X`: 2D array of features
|
|
1802
|
-
- `y`: Array of binary labels (0 or 1)
|
|
1803
|
-
- `options`:
|
|
1804
|
-
- `learning_rate`: Learning rate (default: 0.1)
|
|
1805
|
-
- `iterations`: Number of iterations (default: 1000)
|
|
1806
|
-
- `l2`: L2 regularization parameter (default: 0)
|
|
1807
|
-
|
|
1808
|
-
**Returns:**
|
|
1809
|
-
```yaml
|
|
1810
|
-
type: logistic_regression
|
|
1811
|
-
weights:
|
|
1812
|
-
- 0.5
|
|
1813
|
-
- 1.2
|
|
1814
|
-
- -0.8
|
|
1815
|
-
accuracy: 0.92
|
|
1816
|
-
n: 100
|
|
1817
|
-
p: 2
|
|
1818
|
-
```
|
|
1819
|
-
|
|
1820
|
-
**Example:**
|
|
1821
|
-
```javascript
|
|
1822
|
-
const X = [[1, 2], [2, 3], [3, 1], [4, 2]];
|
|
1823
|
-
const y = [0, 0, 1, 1];
|
|
1824
|
-
const model = datly.train_logistic_regression(X, y, {
|
|
1825
|
-
learning_rate: 0.1,
|
|
1826
|
-
iterations: 1000,
|
|
1827
|
-
l2: 0.01
|
|
1828
|
-
});
|
|
1829
|
-
```
|
|
1830
|
-
|
|
1831
|
-
#### `predict_logistic(model, X, threshold = 0.5)`
|
|
1832
|
-
|
|
1833
|
-
Makes predictions using a trained logistic regression model.
|
|
1834
|
-
|
|
1835
|
-
**Returns:**
|
|
1836
|
-
```yaml
|
|
1837
|
-
type: prediction
|
|
1838
|
-
name: logistic_regression
|
|
1839
|
-
threshold: 0.5
|
|
1840
|
-
probabilities:
|
|
1841
|
-
- 0.234
|
|
1842
|
-
- 0.789
|
|
1843
|
-
- 0.456
|
|
1844
|
-
classes:
|
|
1845
|
-
- 0
|
|
1846
|
-
- 1
|
|
1847
|
-
- 0
|
|
1848
|
-
```
|
|
1849
|
-
|
|
1850
|
-
**Example:**
|
|
1851
|
-
```javascript
|
|
1852
|
-
const predictions = datly.predict_logistic(model, [[5, 6], [6, 7]], 0.5);
|
|
1853
|
-
```
|
|
1854
|
-
|
|
1855
|
-
---
|
|
1856
|
-
|
|
1857
|
-
## Classification Models
|
|
1858
|
-
|
|
1859
|
-
### K-Nearest Neighbors (KNN)
|
|
1860
|
-
|
|
1861
|
-
#### `train_knn_classifier(X, y, k = 5)`
|
|
1862
|
-
|
|
1863
|
-
Trains a KNN classifier.
|
|
1864
|
-
|
|
1865
|
-
**Parameters:**
|
|
1866
|
-
- `X`: 2D array of features
|
|
1867
|
-
- `y`: Array of class labels
|
|
1868
|
-
- `k`: Number of neighbors (default: 5)
|
|
1869
|
-
|
|
1870
|
-
**Returns:**
|
|
1871
|
-
```yaml
|
|
1872
|
-
type: knn_classifier
|
|
1873
|
-
k: 5
|
|
1874
|
-
x:
|
|
1875
|
-
- - 1
|
|
1876
|
-
- 2
|
|
1877
|
-
- - 2
|
|
1878
|
-
- 3
|
|
1879
|
-
y:
|
|
1880
|
-
- 0
|
|
1881
|
-
- 1
|
|
1882
|
-
n: 100
|
|
1883
|
-
p: 2
|
|
1884
|
-
```
|
|
1885
|
-
|
|
1886
|
-
**Example:**
|
|
1887
|
-
```javascript
|
|
1888
|
-
const X = [[1, 2], [2, 3], [3, 1], [4, 2]];
|
|
1889
|
-
const y = [0, 0, 1, 1];
|
|
1890
|
-
const model = datly.train_knn_classifier(X, y, 3);
|
|
1891
|
-
```
|
|
1892
|
-
|
|
1893
|
-
#### `predict_knn_classifier(model, X)`
|
|
1894
|
-
|
|
1895
|
-
Makes predictions using KNN classifier.
|
|
1896
|
-
|
|
1897
|
-
**Returns:**
|
|
1898
|
-
```yaml
|
|
1899
|
-
type: prediction
|
|
1900
|
-
name: knn_classifier
|
|
1901
|
-
k: 5
|
|
1902
|
-
predictions:
|
|
1903
|
-
- 0
|
|
1904
|
-
- 1
|
|
1905
|
-
- 1
|
|
1906
|
-
```
|
|
1907
|
-
|
|
1908
|
-
#### `train_knn_regressor(X, y, k = 5)`
|
|
1909
|
-
|
|
1910
|
-
Trains a KNN regressor.
|
|
1911
|
-
|
|
1912
|
-
**Returns:**
|
|
1913
|
-
```yaml
|
|
1914
|
-
type: knn_regressor
|
|
1915
|
-
k: 5
|
|
1916
|
-
x:
|
|
1917
|
-
- - 1
|
|
1918
|
-
- 2
|
|
1919
|
-
- - 2
|
|
1920
|
-
- 3
|
|
1921
|
-
y:
|
|
1922
|
-
- 10.5
|
|
1923
|
-
- 12.3
|
|
1924
|
-
n: 100
|
|
1925
|
-
p: 2
|
|
1926
|
-
```
|
|
1927
|
-
|
|
1928
|
-
#### `predict_knn_regressor(model, X)`
|
|
1929
|
-
|
|
1930
|
-
Makes predictions using KNN regressor.
|
|
1931
|
-
|
|
1932
|
-
**Returns:**
|
|
1933
|
-
```yaml
|
|
1934
|
-
type: prediction
|
|
1935
|
-
name: knn_regressor
|
|
1936
|
-
k: 5
|
|
1937
|
-
predictions:
|
|
1938
|
-
- 10.7
|
|
1939
|
-
- 11.8
|
|
1940
|
-
- 12.5
|
|
1941
|
-
```
|
|
1942
|
-
|
|
1943
|
-
### Decision Trees
|
|
1944
|
-
|
|
1945
|
-
#### `train_decision_tree_classifier(X, y, options = {})`
|
|
1946
|
-
|
|
1947
|
-
Trains a decision tree classifier.
|
|
1948
|
-
|
|
1949
|
-
**Parameters:**
|
|
1950
|
-
- `options`:
|
|
1951
|
-
- `max_depth`: Maximum depth of tree (default: 5)
|
|
1952
|
-
- `min_samples_split`: Minimum samples required to split (default: 2)
|
|
1953
|
-
|
|
1954
|
-
**Returns:**
|
|
1955
|
-
```yaml
|
|
1956
|
-
type: decision_tree_classifier
|
|
1957
|
-
tree:
|
|
1958
|
-
leaf: false
|
|
1959
|
-
feature: 0
|
|
1960
|
-
threshold: 2.5
|
|
1961
|
-
left:
|
|
1962
|
-
leaf: true
|
|
1963
|
-
prediction: 0
|
|
1964
|
-
n: 50
|
|
1965
|
-
right:
|
|
1966
|
-
leaf: true
|
|
1967
|
-
prediction: 1
|
|
1968
|
-
n: 50
|
|
1969
|
-
max_depth: 5
|
|
1970
|
-
min_samples: 2
|
|
1971
|
-
n: 100
|
|
1972
|
-
p: 2
|
|
1973
|
-
```
|
|
1974
|
-
|
|
1975
|
-
**Example:**
|
|
1976
|
-
```javascript
|
|
1977
|
-
const model = datly.train_decision_tree_classifier(X, y, {
|
|
1978
|
-
max_depth: 5,
|
|
1979
|
-
min_samples_split: 2
|
|
1980
|
-
});
|
|
1981
|
-
```
|
|
1982
|
-
|
|
1983
|
-
#### `train_decision_tree_regressor(X, y, options = {})`
|
|
1984
|
-
|
|
1985
|
-
Trains a decision tree regressor.
|
|
1986
|
-
|
|
1987
|
-
**Returns:**
|
|
1988
|
-
```yaml
|
|
1989
|
-
type: decision_tree_regressor
|
|
1990
|
-
tree:
|
|
1991
|
-
leaf: false
|
|
1992
|
-
feature: 0
|
|
1993
|
-
threshold: 2.5
|
|
1994
|
-
left: ...
|
|
1995
|
-
right: ...
|
|
1996
|
-
max_depth: 5
|
|
1997
|
-
min_samples: 2
|
|
1998
|
-
n: 100
|
|
1999
|
-
p: 2
|
|
2000
|
-
```
|
|
2001
|
-
|
|
2002
|
-
#### `predict_decision_tree(model, X)`
|
|
2003
|
-
|
|
2004
|
-
Makes predictions using a decision tree.
|
|
2005
|
-
|
|
2006
|
-
**Returns:**
|
|
2007
|
-
```yaml
|
|
2008
|
-
type: prediction
|
|
2009
|
-
name: decision_tree_classifier
|
|
2010
|
-
predictions:
|
|
2011
|
-
- 0
|
|
2012
|
-
- 1
|
|
2013
|
-
- 1
|
|
2014
|
-
```
|
|
2015
|
-
|
|
2016
|
-
### Random Forest
|
|
2017
|
-
|
|
2018
|
-
#### `train_random_forest_classifier(X, y, options = {})`
|
|
2019
|
-
|
|
2020
|
-
Trains a random forest classifier.
|
|
2021
|
-
|
|
2022
|
-
**Parameters:**
|
|
2023
|
-
- `options`:
|
|
2024
|
-
- `n_estimators`: Number of trees (default: 10)
|
|
2025
|
-
- `max_depth`: Maximum depth (default: 5)
|
|
2026
|
-
- `min_samples_split`: Minimum samples to split (default: 2)
|
|
2027
|
-
- `seed`: Random seed (default: 42)
|
|
2028
|
-
|
|
2029
|
-
**Returns:**
|
|
2030
|
-
```yaml
|
|
2031
|
-
type: random_forest_classifier
|
|
2032
|
-
trees:
|
|
2033
|
-
- leaf: false
|
|
2034
|
-
feature: 0
|
|
2035
|
-
threshold: 2.5
|
|
2036
|
-
...
|
|
2037
|
-
- leaf: false
|
|
2038
|
-
feature: 1
|
|
2039
|
-
threshold: 3.2
|
|
2040
|
-
...
|
|
2041
|
-
n_trees: 10
|
|
2042
|
-
max_depth: 5
|
|
2043
|
-
min_samples: 2
|
|
2044
|
-
n: 100
|
|
2045
|
-
p: 2
|
|
2046
|
-
```
|
|
2047
|
-
|
|
2048
|
-
**Example:**
|
|
2049
|
-
```javascript
|
|
2050
|
-
const model = datly.train_random_forest_classifier(X, y, {
|
|
2051
|
-
n_estimators: 10,
|
|
2052
|
-
max_depth: 5,
|
|
2053
|
-
seed: 42
|
|
2054
|
-
});
|
|
2055
|
-
```
|
|
2056
|
-
|
|
2057
|
-
#### `train_random_forest_regressor(X, y, options = {})`
|
|
2058
|
-
|
|
2059
|
-
Trains a random forest regressor.
|
|
2060
|
-
|
|
2061
|
-
**Returns:**
|
|
2062
|
-
```yaml
|
|
2063
|
-
type: random_forest_regressor
|
|
2064
|
-
trees: [...]
|
|
2065
|
-
n_trees: 10
|
|
2066
|
-
max_depth: 5
|
|
2067
|
-
min_samples: 2
|
|
2068
|
-
n: 100
|
|
2069
|
-
p: 2
|
|
2070
|
-
```
|
|
2071
|
-
|
|
2072
|
-
#### `predict_random_forest_classifier(model, X)`
|
|
2073
|
-
|
|
2074
|
-
Makes predictions using random forest classifier.
|
|
2075
|
-
|
|
2076
|
-
**Returns:**
|
|
2077
|
-
```yaml
|
|
2078
|
-
type: prediction
|
|
2079
|
-
name: random_forest_classifier
|
|
2080
|
-
n_trees: 10
|
|
2081
|
-
predictions:
|
|
2082
|
-
- 0
|
|
2083
|
-
- 1
|
|
2084
|
-
- 1
|
|
2085
|
-
```
|
|
2086
|
-
|
|
2087
|
-
#### `predict_random_forest_regressor(model, X)`
|
|
2088
|
-
|
|
2089
|
-
Makes predictions using random forest regressor.
|
|
2090
|
-
|
|
2091
|
-
**Returns:**
|
|
2092
|
-
```yaml
|
|
2093
|
-
type: prediction
|
|
2094
|
-
name: random_forest_regressor
|
|
2095
|
-
n_trees: 10
|
|
2096
|
-
predictions:
|
|
2097
|
-
- 10.7
|
|
2098
|
-
- 11.8
|
|
2099
|
-
- 12.5
|
|
2100
|
-
```
|
|
2101
|
-
|
|
2102
|
-
### Naive Bayes
|
|
2103
|
-
|
|
2104
|
-
#### `train_naive_bayes(X, y)`
|
|
2105
|
-
|
|
2106
|
-
Trains a Gaussian Naive Bayes classifier.
|
|
2107
|
-
|
|
2108
|
-
**Parameters:**
|
|
2109
|
-
- `X`: 2D array of features
|
|
2110
|
-
- `y`: Array of class labels
|
|
2111
|
-
|
|
2112
|
-
**Returns:**
|
|
2113
|
-
```yaml
|
|
2114
|
-
type: naive_bayes
|
|
2115
|
-
classes:
|
|
2116
|
-
- 0
|
|
2117
|
-
- 1
|
|
2118
|
-
priors:
|
|
2119
|
-
0: 0.5
|
|
2120
|
-
1: 0.5
|
|
2121
|
-
stats:
|
|
2122
|
-
0:
|
|
2123
|
-
- mean: 2.5
|
|
2124
|
-
std: 1.2
|
|
2125
|
-
- mean: 3.1
|
|
2126
|
-
std: 0.8
|
|
2127
|
-
1:
|
|
2128
|
-
- mean: 5.2
|
|
2129
|
-
std: 1.5
|
|
2130
|
-
- mean: 6.3
|
|
2131
|
-
std: 1.1
|
|
2132
|
-
n: 100
|
|
2133
|
-
p: 2
|
|
2134
|
-
```
|
|
2135
|
-
|
|
2136
|
-
**Example:**
|
|
2137
|
-
```javascript
|
|
2138
|
-
const X = [[1, 2], [2, 3], [5, 6], [6, 7]];
|
|
2139
|
-
const y = [0, 0, 1, 1];
|
|
2140
|
-
const model = datly.train_naive_bayes(X, y);
|
|
2141
|
-
```
|
|
2142
|
-
|
|
2143
|
-
#### `predict_naive_bayes(model, X)`
|
|
2144
|
-
|
|
2145
|
-
Makes predictions using Naive Bayes classifier.
|
|
2146
|
-
|
|
2147
|
-
**Returns:**
|
|
2148
|
-
```yaml
|
|
2149
|
-
type: prediction
|
|
2150
|
-
name: naive_bayes
|
|
2151
|
-
predictions:
|
|
2152
|
-
- 0
|
|
2153
|
-
- 1
|
|
2154
|
-
- 1
|
|
2155
|
-
```
|
|
2156
|
-
|
|
2157
|
-
---
|
|
2158
|
-
|
|
2159
|
-
## Clustering
|
|
2160
|
-
|
|
2161
|
-
### K-Means Clustering
|
|
2162
|
-
|
|
2163
|
-
#### `train_kmeans(X, k = 3, options = {})`
|
|
2164
|
-
|
|
2165
|
-
Trains a K-means clustering model.
|
|
2166
|
-
|
|
2167
|
-
**Parameters:**
|
|
2168
|
-
- `X`: 2D array of features
|
|
2169
|
-
- `k`: Number of clusters (default: 3)
|
|
2170
|
-
- `options`:
|
|
2171
|
-
- `max_iterations`: Maximum iterations (default: 100)
|
|
2172
|
-
- `seed`: Random seed (default: 42)
|
|
2173
|
-
|
|
2174
|
-
**Returns:**
|
|
2175
|
-
```yaml
|
|
2176
|
-
type: kmeans
|
|
2177
|
-
k: 3
|
|
2178
|
-
centroids:
|
|
2179
|
-
- - 2.1
|
|
2180
|
-
- 3.5
|
|
2181
|
-
- - 5.8
|
|
2182
|
-
- 6.2
|
|
2183
|
-
- - 9.1
|
|
2184
|
-
- 8.7
|
|
2185
|
-
inertia: 45.67
|
|
2186
|
-
n: 150
|
|
2187
|
-
p: 2
|
|
2188
|
-
```
|
|
2189
|
-
|
|
2190
|
-
**Example:**
|
|
2191
|
-
```javascript
|
|
2192
|
-
const X = [[1, 2], [2, 3], [5, 6], [6, 7], [9, 8], [10, 9]];
|
|
2193
|
-
const model = datly.train_kmeans(X, 3, {
|
|
2194
|
-
max_iterations: 100,
|
|
2195
|
-
seed: 42
|
|
2196
|
-
});
|
|
2197
|
-
```
|
|
2198
|
-
|
|
2199
|
-
#### `predict_kmeans(model, X)`
|
|
2200
|
-
|
|
2201
|
-
Assigns cluster labels to new data points.
|
|
2202
|
-
|
|
2203
|
-
**Returns:**
|
|
2204
|
-
```yaml
|
|
2205
|
-
type: prediction
|
|
2206
|
-
name: kmeans
|
|
2207
|
-
k: 3
|
|
2208
|
-
cluster_labels:
|
|
2209
|
-
- 0
|
|
2210
|
-
- 0
|
|
2211
|
-
- 1
|
|
2212
|
-
- 1
|
|
2213
|
-
- 2
|
|
2214
|
-
- 2
|
|
946
|
+
type: correlation_matrix
|
|
947
|
+
method: pearson
|
|
948
|
+
variables:
|
|
949
|
+
- age
|
|
950
|
+
- salary
|
|
951
|
+
- experience
|
|
952
|
+
matrix:
|
|
953
|
+
- - 1.000
|
|
954
|
+
- 0.856
|
|
955
|
+
- 0.923
|
|
956
|
+
- - 0.856
|
|
957
|
+
- 1.000
|
|
958
|
+
- 0.789
|
|
959
|
+
- - 0.923
|
|
960
|
+
- 0.789
|
|
961
|
+
- 1.000
|
|
2215
962
|
```
|
|
2216
963
|
|
|
2217
964
|
**Example:**
|
|
2218
965
|
```javascript
|
|
2219
|
-
const
|
|
2220
|
-
|
|
966
|
+
const employees = [
|
|
967
|
+
{ age: 25, salary: 50000, experience: 2 },
|
|
968
|
+
{ age: 30, salary: 60000, experience: 5 },
|
|
969
|
+
{ age: 35, salary: 70000, experience: 8 },
|
|
970
|
+
{ age: 40, salary: 80000, experience: 12 }
|
|
971
|
+
];
|
|
972
|
+
|
|
973
|
+
const corrMatrix = datly.df_corr(employees, 'pearson');
|
|
974
|
+
console.log(corrMatrix);
|
|
2221
975
|
```
|
|
2222
976
|
|
|
2223
977
|
---
|
|
2224
978
|
|
|
2225
|
-
##
|
|
979
|
+
## Regression Models
|
|
980
|
+
|
|
981
|
+
### Linear Regression
|
|
2226
982
|
|
|
2227
|
-
|
|
983
|
+
#### `train_linear_regression(X, y)`
|
|
2228
984
|
|
|
2229
|
-
|
|
985
|
+
Trains a linear regression model.
|
|
2230
986
|
|
|
2231
987
|
**Parameters:**
|
|
2232
|
-
- `
|
|
2233
|
-
- `
|
|
2234
|
-
- `method`: 'hard' for majority voting, 'soft' for probability averaging
|
|
988
|
+
- `X`: Feature matrix (2D array)
|
|
989
|
+
- `y`: Target vector (1D array)
|
|
2235
990
|
|
|
2236
991
|
**Returns:**
|
|
2237
992
|
```yaml
|
|
2238
|
-
type:
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
-
|
|
2244
|
-
- 1
|
|
2245
|
-
|
|
993
|
+
type: model
|
|
994
|
+
algorithm: linear_regression
|
|
995
|
+
n_features: 2
|
|
996
|
+
n_samples: 100
|
|
997
|
+
coefficients:
|
|
998
|
+
- 2.45
|
|
999
|
+
- -1.23
|
|
1000
|
+
intercept: 0.67
|
|
1001
|
+
r_squared: 0.78
|
|
1002
|
+
mse: 15.4
|
|
1003
|
+
training_score: 0.78
|
|
2246
1004
|
```
|
|
2247
1005
|
|
|
2248
1006
|
**Example:**
|
|
2249
1007
|
```javascript
|
|
2250
|
-
const
|
|
2251
|
-
const
|
|
2252
|
-
const model3 = datly.train_decision_tree_classifier(X, y);
|
|
1008
|
+
const X = [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]];
|
|
1009
|
+
const y = [3, 5, 7, 9, 11];
|
|
2253
1010
|
|
|
2254
|
-
const
|
|
2255
|
-
|
|
2256
|
-
X_test,
|
|
2257
|
-
'hard'
|
|
2258
|
-
);
|
|
1011
|
+
const model = datly.train_linear_regression(X, y);
|
|
1012
|
+
console.log(model);
|
|
2259
1013
|
```
|
|
2260
1014
|
|
|
2261
|
-
|
|
1015
|
+
#### `predict_linear(model, X)`
|
|
2262
1016
|
|
|
2263
|
-
|
|
1017
|
+
Makes predictions using a trained linear regression model.
|
|
2264
1018
|
|
|
2265
1019
|
**Returns:**
|
|
2266
1020
|
```yaml
|
|
2267
|
-
type:
|
|
2268
|
-
|
|
2269
|
-
|
|
1021
|
+
type: predictions
|
|
1022
|
+
algorithm: linear_regression
|
|
1023
|
+
n_predictions: 5
|
|
2270
1024
|
predictions:
|
|
2271
|
-
-
|
|
2272
|
-
-
|
|
2273
|
-
-
|
|
1025
|
+
- 3.12
|
|
1026
|
+
- 5.57
|
|
1027
|
+
- 7.02
|
|
1028
|
+
- 9.47
|
|
1029
|
+
- 11.92
|
|
2274
1030
|
```
|
|
2275
1031
|
|
|
2276
1032
|
**Example:**
|
|
2277
1033
|
```javascript
|
|
2278
|
-
const
|
|
2279
|
-
const
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
const ensemble = datly.ensemble_voting_regressor(
|
|
2283
|
-
[model1, model2, model3],
|
|
2284
|
-
X_test
|
|
2285
|
-
);
|
|
1034
|
+
const X_test = [[1.5, 2.5], [2.5, 3.5], [3.5, 4.5]];
|
|
1035
|
+
const predictions = datly.predict_linear(model, X_test);
|
|
1036
|
+
console.log(predictions);
|
|
2286
1037
|
```
|
|
2287
1038
|
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
## Model Evaluation
|
|
1039
|
+
### Logistic Regression
|
|
2291
1040
|
|
|
2292
|
-
|
|
1041
|
+
#### `train_logistic_regression(X, y, options = {})`
|
|
2293
1042
|
|
|
2294
|
-
|
|
1043
|
+
Trains a logistic regression model for binary classification.
|
|
2295
1044
|
|
|
2296
1045
|
**Parameters:**
|
|
2297
|
-
- `X`:
|
|
2298
|
-
- `y`:
|
|
2299
|
-
- `
|
|
2300
|
-
- `seed`: Random seed (default: 42)
|
|
1046
|
+
- `X`: Feature matrix
|
|
1047
|
+
- `y`: Binary target vector (0s and 1s)
|
|
1048
|
+
- `options`: Training options (learning_rate, max_iterations, tolerance)
|
|
2301
1049
|
|
|
2302
1050
|
**Returns:**
|
|
2303
1051
|
```yaml
|
|
2304
|
-
type:
|
|
2305
|
-
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2309
|
-
|
|
2310
|
-
|
|
2311
|
-
|
|
2312
|
-
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
- 4
|
|
2317
|
-
...
|
|
2318
|
-
preview:
|
|
2319
|
-
x_train:
|
|
2320
|
-
- - 1
|
|
2321
|
-
- 2
|
|
2322
|
-
- - 3
|
|
2323
|
-
- 4
|
|
2324
|
-
y_train:
|
|
2325
|
-
- 0
|
|
2326
|
-
- 1
|
|
2327
|
-
- 0
|
|
1052
|
+
type: model
|
|
1053
|
+
algorithm: logistic_regression
|
|
1054
|
+
n_features: 2
|
|
1055
|
+
n_samples: 100
|
|
1056
|
+
coefficients:
|
|
1057
|
+
- 1.45
|
|
1058
|
+
- -0.89
|
|
1059
|
+
intercept: 0.23
|
|
1060
|
+
accuracy: 0.85
|
|
1061
|
+
log_likelihood: -45.6
|
|
1062
|
+
iterations: 150
|
|
1063
|
+
converged: true
|
|
2328
1064
|
```
|
|
2329
1065
|
|
|
2330
1066
|
**Example:**
|
|
2331
1067
|
```javascript
|
|
2332
|
-
const
|
|
2333
|
-
|
|
2334
|
-
|
|
1068
|
+
const X = [[1, 2], [2, 1], [3, 4], [4, 3], [5, 6], [6, 5]];
|
|
1069
|
+
const y = [0, 0, 1, 1, 1, 1];
|
|
1070
|
+
|
|
1071
|
+
const options = {
|
|
1072
|
+
learning_rate: 0.01,
|
|
1073
|
+
max_iterations: 1000,
|
|
1074
|
+
tolerance: 1e-6
|
|
1075
|
+
};
|
|
2335
1076
|
|
|
2336
|
-
|
|
1077
|
+
const model = datly.train_logistic_regression(X, y, options);
|
|
1078
|
+
console.log(model);
|
|
1079
|
+
```
|
|
2337
1080
|
|
|
2338
|
-
#### `
|
|
1081
|
+
#### `predict_logistic(model, X)`
|
|
2339
1082
|
|
|
2340
|
-
|
|
1083
|
+
Makes predictions using a trained logistic regression model.
|
|
2341
1084
|
|
|
2342
1085
|
**Returns:**
|
|
2343
1086
|
```yaml
|
|
2344
|
-
type:
|
|
2345
|
-
|
|
2346
|
-
|
|
2347
|
-
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
1087
|
+
type: predictions
|
|
1088
|
+
algorithm: logistic_regression
|
|
1089
|
+
n_predictions: 3
|
|
1090
|
+
predictions:
|
|
1091
|
+
- 0
|
|
1092
|
+
- 1
|
|
1093
|
+
- 1
|
|
1094
|
+
probabilities:
|
|
1095
|
+
- 0.23
|
|
1096
|
+
- 0.78
|
|
1097
|
+
- 0.85
|
|
2355
1098
|
```
|
|
2356
1099
|
|
|
2357
1100
|
**Example:**
|
|
2358
1101
|
```javascript
|
|
2359
|
-
const
|
|
2360
|
-
const
|
|
2361
|
-
|
|
1102
|
+
const X_test = [[2, 3], [4, 5], [6, 7]];
|
|
1103
|
+
const predictions = datly.predict_logistic(model, X_test);
|
|
1104
|
+
console.log(predictions);
|
|
2362
1105
|
```
|
|
2363
1106
|
|
|
2364
|
-
|
|
1107
|
+
---
|
|
2365
1108
|
|
|
2366
|
-
|
|
1109
|
+
## Classification Models
|
|
1110
|
+
|
|
1111
|
+
### K-Nearest Neighbors (KNN)
|
|
1112
|
+
|
|
1113
|
+
#### `train_knn(X, y, k = 3)`
|
|
1114
|
+
|
|
1115
|
+
Trains a KNN classifier.
|
|
2367
1116
|
|
|
2368
|
-
|
|
1117
|
+
**Parameters:**
|
|
1118
|
+
- `X`: Feature matrix
|
|
1119
|
+
- `y`: Target vector
|
|
1120
|
+
- `k`: Number of neighbors (default: 3)
|
|
2369
1121
|
|
|
2370
1122
|
**Returns:**
|
|
2371
1123
|
```yaml
|
|
2372
|
-
type:
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
2376
|
-
|
|
1124
|
+
type: model
|
|
1125
|
+
algorithm: knn
|
|
1126
|
+
k: 3
|
|
1127
|
+
n_features: 2
|
|
1128
|
+
n_samples: 100
|
|
1129
|
+
classes:
|
|
1130
|
+
- 0
|
|
1131
|
+
- 1
|
|
1132
|
+
- 2
|
|
1133
|
+
training_accuracy: 0.92
|
|
2377
1134
|
```
|
|
2378
1135
|
|
|
2379
1136
|
**Example:**
|
|
2380
1137
|
```javascript
|
|
2381
|
-
const
|
|
2382
|
-
const
|
|
2383
|
-
const metrics = datly.metrics_regression(y_true, y_pred);
|
|
2384
|
-
```
|
|
1138
|
+
const X = [[1, 2], [2, 3], [3, 1], [1, 3], [2, 1], [3, 2]];
|
|
1139
|
+
const y = [0, 0, 1, 1, 2, 2];
|
|
2385
1140
|
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
1141
|
+
const model = datly.train_knn(X, y, 3);
|
|
1142
|
+
console.log(model);
|
|
1143
|
+
```
|
|
2389
1144
|
|
|
2390
|
-
|
|
1145
|
+
#### `predict_knn(model, X)`
|
|
2391
1146
|
|
|
2392
|
-
|
|
2393
|
-
- `X`: 2D array of features
|
|
2394
|
-
- `y`: Array of labels
|
|
2395
|
-
- `model_type`: String - 'linear_regression', 'logistic_regression', 'knn_classifier', 'decision_tree_classifier', 'random_forest_classifier'
|
|
2396
|
-
- `options`:
|
|
2397
|
-
- `k_folds`: Number of folds (default: 5)
|
|
2398
|
-
- Model-specific options (e.g., `k` for KNN, `max_depth` for trees)
|
|
1147
|
+
Makes predictions using a trained KNN model.
|
|
2399
1148
|
|
|
2400
1149
|
**Returns:**
|
|
2401
1150
|
```yaml
|
|
2402
|
-
type:
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
-
|
|
2408
|
-
- 0
|
|
2409
|
-
|
|
2410
|
-
-
|
|
2411
|
-
|
|
2412
|
-
|
|
1151
|
+
type: predictions
|
|
1152
|
+
algorithm: knn
|
|
1153
|
+
k: 3
|
|
1154
|
+
n_predictions: 2
|
|
1155
|
+
predictions:
|
|
1156
|
+
- 1
|
|
1157
|
+
- 0
|
|
1158
|
+
distances:
|
|
1159
|
+
- - 1.41
|
|
1160
|
+
- 2.24
|
|
1161
|
+
- 1.00
|
|
1162
|
+
- - 1.00
|
|
1163
|
+
- 1.41
|
|
1164
|
+
- 2.83
|
|
2413
1165
|
```
|
|
2414
1166
|
|
|
2415
1167
|
**Example:**
|
|
2416
1168
|
```javascript
|
|
2417
|
-
const
|
|
2418
|
-
|
|
2419
|
-
|
|
2420
|
-
iterations: 1000
|
|
2421
|
-
});
|
|
1169
|
+
const X_test = [[2.5, 2], [1.5, 2.5]];
|
|
1170
|
+
const predictions = datly.predict_knn(model, X_test);
|
|
1171
|
+
console.log(predictions);
|
|
2422
1172
|
```
|
|
2423
1173
|
|
|
2424
|
-
###
|
|
1174
|
+
### Decision Tree
|
|
2425
1175
|
|
|
2426
|
-
#### `
|
|
1176
|
+
#### `train_decision_tree(X, y, options = {})`
|
|
2427
1177
|
|
|
2428
|
-
|
|
1178
|
+
Trains a decision tree classifier.
|
|
2429
1179
|
|
|
2430
1180
|
**Parameters:**
|
|
2431
|
-
- `
|
|
1181
|
+
- `X`: Feature matrix
|
|
1182
|
+
- `y`: Target vector
|
|
1183
|
+
- `options`: Tree options (max_depth, min_samples_split, min_samples_leaf)
|
|
2432
1184
|
|
|
2433
1185
|
**Returns:**
|
|
2434
1186
|
```yaml
|
|
2435
|
-
type:
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
|
|
1187
|
+
type: model
|
|
1188
|
+
algorithm: decision_tree
|
|
1189
|
+
max_depth: 5
|
|
1190
|
+
n_features: 4
|
|
1191
|
+
n_samples: 150
|
|
1192
|
+
classes:
|
|
1193
|
+
- 0
|
|
1194
|
+
- 1
|
|
1195
|
+
- 2
|
|
1196
|
+
tree_depth: 3
|
|
1197
|
+
n_nodes: 7
|
|
1198
|
+
feature_importance:
|
|
2439
1199
|
- 0.45
|
|
2440
1200
|
- 0.32
|
|
2441
1201
|
- 0.15
|
|
2442
1202
|
- 0.08
|
|
1203
|
+
training_accuracy: 0.96
|
|
2443
1204
|
```
|
|
2444
1205
|
|
|
2445
1206
|
**Example:**
|
|
2446
1207
|
```javascript
|
|
2447
|
-
const
|
|
2448
|
-
|
|
2449
|
-
|
|
1208
|
+
const X = [
|
|
1209
|
+
[5.1, 3.5, 1.4, 0.2],
|
|
1210
|
+
[4.9, 3.0, 1.4, 0.2],
|
|
1211
|
+
[7.0, 3.2, 4.7, 1.4],
|
|
1212
|
+
[6.4, 3.2, 4.5, 1.5]
|
|
1213
|
+
];
|
|
1214
|
+
const y = [0, 0, 1, 1];
|
|
2450
1215
|
|
|
2451
|
-
|
|
1216
|
+
const options = {
|
|
1217
|
+
max_depth: 5,
|
|
1218
|
+
min_samples_split: 2,
|
|
1219
|
+
min_samples_leaf: 1
|
|
1220
|
+
};
|
|
2452
1221
|
|
|
2453
|
-
|
|
1222
|
+
const model = datly.train_decision_tree(X, y, options);
|
|
1223
|
+
console.log(model);
|
|
1224
|
+
```
|
|
2454
1225
|
|
|
2455
|
-
###
|
|
1226
|
+
### Naive Bayes
|
|
2456
1227
|
|
|
2457
|
-
#### `
|
|
1228
|
+
#### `train_naive_bayes(X, y)`
|
|
2458
1229
|
|
|
2459
|
-
|
|
1230
|
+
Trains a Gaussian Naive Bayes classifier.
|
|
2460
1231
|
|
|
2461
1232
|
**Returns:**
|
|
2462
1233
|
```yaml
|
|
2463
|
-
type:
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
1234
|
+
type: model
|
|
1235
|
+
algorithm: naive_bayes
|
|
1236
|
+
variant: gaussian
|
|
1237
|
+
n_features: 4
|
|
1238
|
+
n_samples: 150
|
|
1239
|
+
classes:
|
|
1240
|
+
- 0
|
|
1241
|
+
- 1
|
|
1242
|
+
- 2
|
|
1243
|
+
class_priors:
|
|
1244
|
+
- 0.33
|
|
1245
|
+
- 0.33
|
|
1246
|
+
- 0.34
|
|
1247
|
+
training_accuracy: 0.94
|
|
2471
1248
|
```
|
|
2472
1249
|
|
|
2473
1250
|
**Example:**
|
|
2474
1251
|
```javascript
|
|
2475
|
-
const X = [
|
|
2476
|
-
|
|
1252
|
+
const X = [
|
|
1253
|
+
[5.1, 3.5, 1.4, 0.2],
|
|
1254
|
+
[4.9, 3.0, 1.4, 0.2],
|
|
1255
|
+
[7.0, 3.2, 4.7, 1.4],
|
|
1256
|
+
[6.4, 3.2, 4.5, 1.5]
|
|
1257
|
+
];
|
|
1258
|
+
const y = [0, 0, 1, 1];
|
|
1259
|
+
|
|
1260
|
+
const model = datly.train_naive_bayes(X, y);
|
|
1261
|
+
console.log(model);
|
|
2477
1262
|
```
|
|
2478
1263
|
|
|
2479
|
-
|
|
1264
|
+
---
|
|
2480
1265
|
|
|
2481
|
-
|
|
1266
|
+
## Clustering
|
|
2482
1267
|
|
|
2483
|
-
|
|
2484
|
-
```yaml
|
|
2485
|
-
type: scaled_data
|
|
2486
|
-
method: standard
|
|
2487
|
-
preview:
|
|
2488
|
-
- - 0.0
|
|
2489
|
-
- 0.0
|
|
2490
|
-
- - 0.625
|
|
2491
|
-
- 0.767
|
|
2492
|
-
- - -0.625
|
|
2493
|
-
- -0.767
|
|
2494
|
-
```
|
|
1268
|
+
### K-Means Clustering
|
|
2495
1269
|
|
|
2496
|
-
|
|
2497
|
-
```javascript
|
|
2498
|
-
const scaled = datly.standard_scaler_transform(scaler, X);
|
|
2499
|
-
```
|
|
1270
|
+
#### `kmeans(X, k, options = {})`
|
|
2500
1271
|
|
|
2501
|
-
|
|
1272
|
+
Performs K-means clustering.
|
|
2502
1273
|
|
|
2503
|
-
|
|
1274
|
+
**Parameters:**
|
|
1275
|
+
- `X`: Data matrix
|
|
1276
|
+
- `k`: Number of clusters
|
|
1277
|
+
- `options`: Algorithm options (max_iterations, tolerance, seed)
|
|
2504
1278
|
|
|
2505
1279
|
**Returns:**
|
|
2506
1280
|
```yaml
|
|
2507
|
-
type:
|
|
2508
|
-
|
|
2509
|
-
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
1281
|
+
type: clustering_result
|
|
1282
|
+
algorithm: kmeans
|
|
1283
|
+
k: 3
|
|
1284
|
+
n_samples: 100
|
|
1285
|
+
n_features: 2
|
|
1286
|
+
iterations: 15
|
|
1287
|
+
converged: true
|
|
1288
|
+
inertia: 45.7
|
|
1289
|
+
centroids:
|
|
1290
|
+
- - 2.1
|
|
1291
|
+
- 3.2
|
|
1292
|
+
- - 5.8
|
|
1293
|
+
- 1.4
|
|
1294
|
+
- - 8.3
|
|
1295
|
+
- 6.7
|
|
1296
|
+
labels:
|
|
1297
|
+
- 0
|
|
1298
|
+
- 0
|
|
1299
|
+
- 1
|
|
1300
|
+
- 2
|
|
1301
|
+
- 1
|
|
2515
1302
|
```
|
|
2516
1303
|
|
|
2517
|
-
|
|
1304
|
+
**Example:**
|
|
1305
|
+
```javascript
|
|
1306
|
+
const X = [
|
|
1307
|
+
[1, 2], [1.5, 1.8], [5, 8], [8, 8], [1, 0.6], [9, 11]
|
|
1308
|
+
];
|
|
2518
1309
|
|
|
2519
|
-
|
|
1310
|
+
const options = {
|
|
1311
|
+
max_iterations: 100,
|
|
1312
|
+
tolerance: 1e-4,
|
|
1313
|
+
seed: 42
|
|
1314
|
+
};
|
|
2520
1315
|
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
type: scaled_data
|
|
2524
|
-
method: minmax
|
|
2525
|
-
preview:
|
|
2526
|
-
- - 0.5
|
|
2527
|
-
- 0.333
|
|
2528
|
-
- - 1.0
|
|
2529
|
-
- 1.0
|
|
2530
|
-
- - 0.0
|
|
2531
|
-
- 0.0
|
|
1316
|
+
const result = datly.kmeans(X, 3, options);
|
|
1317
|
+
console.log(result);
|
|
2532
1318
|
```
|
|
2533
1319
|
|
|
2534
1320
|
---
|
|
2535
1321
|
|
|
2536
|
-
##
|
|
1322
|
+
## Ensemble Methods
|
|
2537
1323
|
|
|
2538
|
-
###
|
|
1324
|
+
### Random Forest
|
|
2539
1325
|
|
|
2540
|
-
#### `
|
|
1326
|
+
#### `train_random_forest(X, y, options = {})`
|
|
2541
1327
|
|
|
2542
|
-
Trains a
|
|
1328
|
+
Trains a random forest classifier.
|
|
2543
1329
|
|
|
2544
1330
|
**Parameters:**
|
|
2545
|
-
- `X`:
|
|
2546
|
-
- `
|
|
1331
|
+
- `X`: Feature matrix
|
|
1332
|
+
- `y`: Target vector
|
|
1333
|
+
- `options`: Forest options (n_trees, max_depth, max_features, sample_ratio)
|
|
2547
1334
|
|
|
2548
1335
|
**Returns:**
|
|
2549
1336
|
```yaml
|
|
2550
|
-
type:
|
|
2551
|
-
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2555
|
-
|
|
2556
|
-
|
|
2557
|
-
-
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2562
|
-
|
|
2563
|
-
|
|
2564
|
-
|
|
1337
|
+
type: model
|
|
1338
|
+
algorithm: random_forest
|
|
1339
|
+
n_trees: 100
|
|
1340
|
+
max_depth: 10
|
|
1341
|
+
n_features: 4
|
|
1342
|
+
n_samples: 150
|
|
1343
|
+
classes:
|
|
1344
|
+
- 0
|
|
1345
|
+
- 1
|
|
1346
|
+
- 2
|
|
1347
|
+
oob_score: 0.91
|
|
1348
|
+
feature_importance:
|
|
1349
|
+
- 0.35
|
|
1350
|
+
- 0.28
|
|
1351
|
+
- 0.22
|
|
1352
|
+
- 0.15
|
|
1353
|
+
training_accuracy: 0.98
|
|
2565
1354
|
```
|
|
2566
1355
|
|
|
2567
1356
|
**Example:**
|
|
2568
1357
|
```javascript
|
|
2569
|
-
const X = [
|
|
2570
|
-
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2575
|
-
|
|
1358
|
+
const X = [
|
|
1359
|
+
[5.1, 3.5, 1.4, 0.2],
|
|
1360
|
+
[4.9, 3.0, 1.4, 0.2],
|
|
1361
|
+
[7.0, 3.2, 4.7, 1.4],
|
|
1362
|
+
[6.4, 3.2, 4.5, 1.5]
|
|
1363
|
+
];
|
|
1364
|
+
const y = [0, 0, 1, 1];
|
|
2576
1365
|
|
|
2577
|
-
|
|
2578
|
-
|
|
2579
|
-
|
|
2580
|
-
|
|
2581
|
-
|
|
2582
|
-
|
|
2583
|
-
- 0.0
|
|
2584
|
-
- - 0.707
|
|
2585
|
-
- 0.0
|
|
2586
|
-
- - -1.414
|
|
2587
|
-
- 0.0
|
|
2588
|
-
```
|
|
1366
|
+
const options = {
|
|
1367
|
+
n_trees: 100,
|
|
1368
|
+
max_depth: 10,
|
|
1369
|
+
max_features: 'sqrt',
|
|
1370
|
+
sample_ratio: 0.8
|
|
1371
|
+
};
|
|
2589
1372
|
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
const transformed = datly.transform_pca(pca, X);
|
|
1373
|
+
const model = datly.train_random_forest(X, y, options);
|
|
1374
|
+
console.log(model);
|
|
2593
1375
|
```
|
|
2594
1376
|
|
|
2595
1377
|
---
|
|
2596
1378
|
|
|
2597
|
-
##
|
|
1379
|
+
## Model Evaluation and Utilities
|
|
2598
1380
|
|
|
2599
|
-
###
|
|
1381
|
+
### Data Splitting
|
|
2600
1382
|
|
|
2601
|
-
|
|
1383
|
+
#### `train_test_split(X, y, test_size = 0.2, seed = null)`
|
|
2602
1384
|
|
|
2603
|
-
|
|
2604
|
-
- `array`: Time series data
|
|
2605
|
-
- `window`: Window size (default: 3)
|
|
1385
|
+
Splits data into training and testing sets.
|
|
2606
1386
|
|
|
2607
1387
|
**Returns:**
|
|
2608
1388
|
```yaml
|
|
2609
|
-
type:
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
2617
|
-
|
|
1389
|
+
type: data_split
|
|
1390
|
+
train_size: 0.8
|
|
1391
|
+
test_size: 0.2
|
|
1392
|
+
n_samples: 100
|
|
1393
|
+
n_train: 80
|
|
1394
|
+
n_test: 20
|
|
1395
|
+
seed: 42
|
|
1396
|
+
indices:
|
|
1397
|
+
train:
|
|
1398
|
+
- 0
|
|
1399
|
+
- 3
|
|
1400
|
+
- 5
|
|
1401
|
+
# ... more indices
|
|
1402
|
+
test:
|
|
1403
|
+
- 1
|
|
1404
|
+
- 2
|
|
1405
|
+
- 4
|
|
1406
|
+
# ... more indices
|
|
2618
1407
|
```
|
|
2619
1408
|
|
|
2620
1409
|
**Example:**
|
|
2621
1410
|
```javascript
|
|
2622
|
-
const
|
|
2623
|
-
const
|
|
1411
|
+
const X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]];
|
|
1412
|
+
const y = [0, 1, 0, 1, 0];
|
|
1413
|
+
|
|
1414
|
+
const split = datly.train_test_split(X, y, 0.2, 42);
|
|
1415
|
+
console.log(split);
|
|
1416
|
+
|
|
1417
|
+
// Use indices to create splits
|
|
1418
|
+
const trainIndices = JSON.parse(split).indices.train;
|
|
1419
|
+
const testIndices = JSON.parse(split).indices.test;
|
|
1420
|
+
|
|
1421
|
+
const X_train = trainIndices.map(i => X[i]);
|
|
1422
|
+
const y_train = trainIndices.map(i => y[i]);
|
|
1423
|
+
const X_test = testIndices.map(i => X[i]);
|
|
1424
|
+
const y_test = testIndices.map(i => y[i]);
|
|
2624
1425
|
```
|
|
2625
1426
|
|
|
2626
|
-
###
|
|
1427
|
+
### Feature Scaling
|
|
2627
1428
|
|
|
2628
|
-
|
|
1429
|
+
#### `standard_scaler_fit(X)`
|
|
2629
1430
|
|
|
2630
|
-
|
|
2631
|
-
- `array`: Time series data
|
|
2632
|
-
- `alpha`: Smoothing parameter (0 < α < 1)
|
|
1431
|
+
Fits a standard scaler to the data.
|
|
2633
1432
|
|
|
2634
1433
|
**Returns:**
|
|
2635
1434
|
```yaml
|
|
2636
|
-
type:
|
|
2637
|
-
method:
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
-
|
|
2642
|
-
-
|
|
2643
|
-
-
|
|
2644
|
-
|
|
1435
|
+
type: scaler
|
|
1436
|
+
method: standard
|
|
1437
|
+
n_features: 3
|
|
1438
|
+
n_samples: 100
|
|
1439
|
+
means:
|
|
1440
|
+
- 2.5
|
|
1441
|
+
- 15.3
|
|
1442
|
+
- 0.8
|
|
1443
|
+
stds:
|
|
1444
|
+
- 1.2
|
|
1445
|
+
- 5.6
|
|
1446
|
+
- 0.3
|
|
2645
1447
|
```
|
|
2646
1448
|
|
|
2647
1449
|
**Example:**
|
|
2648
1450
|
```javascript
|
|
2649
|
-
const
|
|
1451
|
+
const X = [[1, 10, 0.5], [2, 15, 0.7], [3, 20, 0.9], [4, 25, 1.1]];
|
|
1452
|
+
const scaler = datly.standard_scaler_fit(X);
|
|
1453
|
+
console.log(scaler);
|
|
2650
1454
|
```
|
|
2651
1455
|
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
Calculates autocorrelation at a given lag.
|
|
1456
|
+
#### `standard_scaler_transform(scaler, X)`
|
|
2655
1457
|
|
|
2656
|
-
|
|
2657
|
-
- `array`: Time series data
|
|
2658
|
-
- `lag`: Lag value (default: 1)
|
|
1458
|
+
Transforms data using a fitted scaler.
|
|
2659
1459
|
|
|
2660
1460
|
**Returns:**
|
|
2661
1461
|
```yaml
|
|
2662
|
-
type:
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
1462
|
+
type: scaled_data
|
|
1463
|
+
method: standard
|
|
1464
|
+
n_samples: 4
|
|
1465
|
+
n_features: 3
|
|
1466
|
+
preview:
|
|
1467
|
+
- - -1.34
|
|
1468
|
+
- -0.89
|
|
1469
|
+
- -1.00
|
|
1470
|
+
- - -0.45
|
|
1471
|
+
- -0.07
|
|
1472
|
+
- -0.33
|
|
1473
|
+
- - 0.45
|
|
1474
|
+
- 0.75
|
|
1475
|
+
- 0.33
|
|
1476
|
+
- - 1.34
|
|
1477
|
+
- 1.21
|
|
1478
|
+
- 1.00
|
|
2666
1479
|
```
|
|
2667
1480
|
|
|
2668
1481
|
**Example:**
|
|
2669
1482
|
```javascript
|
|
2670
|
-
const
|
|
1483
|
+
const X_scaled = datly.standard_scaler_transform(scaler, X);
|
|
1484
|
+
console.log(X_scaled);
|
|
2671
1485
|
```
|
|
2672
1486
|
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
## Outlier Detection
|
|
1487
|
+
### Model Metrics
|
|
2676
1488
|
|
|
2677
|
-
|
|
1489
|
+
#### `metrics_classification(y_true, y_pred)`
|
|
2678
1490
|
|
|
2679
|
-
|
|
1491
|
+
Calculates classification metrics.
|
|
2680
1492
|
|
|
2681
1493
|
**Returns:**
|
|
2682
1494
|
```yaml
|
|
2683
|
-
type:
|
|
2684
|
-
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
2688
|
-
|
|
2689
|
-
-
|
|
2690
|
-
|
|
2691
|
-
-
|
|
2692
|
-
|
|
2693
|
-
|
|
2694
|
-
-
|
|
2695
|
-
-
|
|
1495
|
+
type: classification_metrics
|
|
1496
|
+
accuracy: 0.85
|
|
1497
|
+
precision: 0.83
|
|
1498
|
+
recall: 0.87
|
|
1499
|
+
f1_score: 0.85
|
|
1500
|
+
confusion_matrix:
|
|
1501
|
+
- - 25
|
|
1502
|
+
- 3
|
|
1503
|
+
- - 5
|
|
1504
|
+
- 27
|
|
1505
|
+
support:
|
|
1506
|
+
- 28
|
|
1507
|
+
- 32
|
|
2696
1508
|
```
|
|
2697
1509
|
|
|
2698
1510
|
**Example:**
|
|
2699
1511
|
```javascript
|
|
2700
|
-
const
|
|
2701
|
-
const
|
|
2702
|
-
```
|
|
1512
|
+
const y_true = [0, 0, 1, 1, 0, 1, 1, 0];
|
|
1513
|
+
const y_pred = [0, 1, 1, 1, 0, 1, 0, 0];
|
|
2703
1514
|
|
|
2704
|
-
|
|
1515
|
+
const metrics = datly.metrics_classification(y_true, y_pred);
|
|
1516
|
+
console.log(metrics);
|
|
1517
|
+
```
|
|
2705
1518
|
|
|
2706
|
-
|
|
1519
|
+
#### `metrics_regression(y_true, y_pred)`
|
|
2707
1520
|
|
|
2708
|
-
|
|
2709
|
-
- `array`: Array of numbers
|
|
2710
|
-
- `threshold`: Z-score threshold (default: 3)
|
|
1521
|
+
Calculates regression metrics.
|
|
2711
1522
|
|
|
2712
1523
|
**Returns:**
|
|
2713
1524
|
```yaml
|
|
2714
|
-
type:
|
|
2715
|
-
|
|
2716
|
-
|
|
2717
|
-
|
|
2718
|
-
|
|
2719
|
-
|
|
2720
|
-
- 12
|
|
2721
|
-
outlier_values:
|
|
2722
|
-
- 200
|
|
2723
|
-
- 30
|
|
1525
|
+
type: regression_metrics
|
|
1526
|
+
mae: 2.15
|
|
1527
|
+
mse: 6.78
|
|
1528
|
+
rmse: 2.60
|
|
1529
|
+
r2: 0.78
|
|
1530
|
+
explained_variance: 0.79
|
|
2724
1531
|
```
|
|
2725
1532
|
|
|
2726
1533
|
**Example:**
|
|
2727
1534
|
```javascript
|
|
2728
|
-
const
|
|
1535
|
+
const y_true = [3, -0.5, 2, 7];
|
|
1536
|
+
const y_pred = [2.5, 0.0, 2, 8];
|
|
1537
|
+
|
|
1538
|
+
const metrics = datly.metrics_regression(y_true, y_pred);
|
|
1539
|
+
console.log(metrics);
|
|
2729
1540
|
```
|
|
2730
1541
|
|
|
2731
1542
|
---
|
|
2732
1543
|
|
|
2733
1544
|
## Visualization
|
|
2734
1545
|
|
|
2735
|
-
All visualization functions create SVG-based charts. They accept optional configuration and a selector for where to render the chart.
|
|
1546
|
+
All visualization functions create SVG-based charts that can be rendered in the browser. They accept optional configuration and a selector for where to render the chart.
|
|
2736
1547
|
|
|
2737
1548
|
### Configuration Options
|
|
2738
1549
|
|
|
@@ -2747,47 +1558,50 @@ Common options for all plots:
|
|
|
2747
1558
|
|
|
2748
1559
|
### `plotHistogram(array, options = {}, selector)`
|
|
2749
1560
|
|
|
2750
|
-
Creates a histogram.
|
|
1561
|
+
Creates a histogram showing the distribution of values.
|
|
2751
1562
|
|
|
2752
1563
|
**Additional Options:**
|
|
2753
1564
|
- `bins`: Number of bins (default: 10)
|
|
2754
1565
|
|
|
2755
1566
|
**Example:**
|
|
2756
1567
|
```javascript
|
|
2757
|
-
const data = [1, 2, 2, 3, 3, 3, 4, 4, 5];
|
|
1568
|
+
const data = [1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5];
|
|
2758
1569
|
datly.plotHistogram(data, {
|
|
2759
1570
|
width: 600,
|
|
2760
1571
|
height: 400,
|
|
2761
|
-
bins:
|
|
2762
|
-
title: 'Distribution',
|
|
1572
|
+
bins: 8,
|
|
1573
|
+
title: 'Value Distribution',
|
|
1574
|
+
xlabel: 'Values',
|
|
1575
|
+
ylabel: 'Frequency',
|
|
2763
1576
|
color: '#4CAF50'
|
|
2764
|
-
}, '#chart');
|
|
1577
|
+
}, '#chart-container');
|
|
2765
1578
|
```
|
|
2766
1579
|
|
|
2767
1580
|
### `plotScatter(x, y, options = {}, selector)`
|
|
2768
1581
|
|
|
2769
|
-
Creates a scatter plot.
|
|
1582
|
+
Creates a scatter plot showing the relationship between two variables.
|
|
2770
1583
|
|
|
2771
1584
|
**Additional Options:**
|
|
2772
1585
|
- `size`: Point size (default: 4)
|
|
2773
1586
|
|
|
2774
1587
|
**Example:**
|
|
2775
1588
|
```javascript
|
|
2776
|
-
const x = [1, 2, 3, 4, 5];
|
|
2777
|
-
const y = [2, 4, 3, 5, 6];
|
|
1589
|
+
const x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
|
|
1590
|
+
const y = [2, 4, 3, 5, 6, 8, 7, 9, 8, 10];
|
|
2778
1591
|
datly.plotScatter(x, y, {
|
|
2779
1592
|
width: 600,
|
|
2780
1593
|
height: 400,
|
|
2781
|
-
title: '
|
|
1594
|
+
title: 'Correlation Analysis',
|
|
2782
1595
|
xlabel: 'X Variable',
|
|
2783
1596
|
ylabel: 'Y Variable',
|
|
2784
|
-
size:
|
|
2785
|
-
|
|
1597
|
+
size: 6,
|
|
1598
|
+
color: '#2196F3'
|
|
1599
|
+
}, '#scatter-plot');
|
|
2786
1600
|
```
|
|
2787
1601
|
|
|
2788
1602
|
### `plotLine(x, y, options = {}, selector)`
|
|
2789
1603
|
|
|
2790
|
-
Creates a line chart.
|
|
1604
|
+
Creates a line chart for time series or continuous data.
|
|
2791
1605
|
|
|
2792
1606
|
**Additional Options:**
|
|
2793
1607
|
- `lineWidth`: Line width (default: 2)
|
|
@@ -2795,32 +1609,41 @@ Creates a line chart.
|
|
|
2795
1609
|
|
|
2796
1610
|
**Example:**
|
|
2797
1611
|
```javascript
|
|
2798
|
-
const
|
|
2799
|
-
const
|
|
2800
|
-
datly.plotLine(
|
|
1612
|
+
const months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
|
1613
|
+
const sales = [100, 120, 140, 110, 160, 180, 200, 190, 220, 240, 260, 280];
|
|
1614
|
+
datly.plotLine(months, sales, {
|
|
1615
|
+
width: 800,
|
|
1616
|
+
height: 400,
|
|
2801
1617
|
lineWidth: 3,
|
|
2802
1618
|
showPoints: true,
|
|
2803
|
-
title: '
|
|
2804
|
-
|
|
1619
|
+
title: 'Monthly Sales Trend',
|
|
1620
|
+
xlabel: 'Month',
|
|
1621
|
+
ylabel: 'Sales ($000)',
|
|
1622
|
+
color: '#FF5722'
|
|
1623
|
+
}, '#line-chart');
|
|
2805
1624
|
```
|
|
2806
1625
|
|
|
2807
1626
|
### `plotBar(categories, values, options = {}, selector)`
|
|
2808
1627
|
|
|
2809
|
-
Creates a bar chart.
|
|
1628
|
+
Creates a bar chart for categorical data.
|
|
2810
1629
|
|
|
2811
1630
|
**Example:**
|
|
2812
1631
|
```javascript
|
|
2813
|
-
const categories = ['
|
|
2814
|
-
const
|
|
2815
|
-
datly.plotBar(categories,
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
|
|
1632
|
+
const categories = ['Q1', 'Q2', 'Q3', 'Q4'];
|
|
1633
|
+
const revenues = [120, 150, 180, 200];
|
|
1634
|
+
datly.plotBar(categories, revenues, {
|
|
1635
|
+
width: 600,
|
|
1636
|
+
height: 400,
|
|
1637
|
+
title: 'Quarterly Revenue',
|
|
1638
|
+
xlabel: 'Quarter',
|
|
1639
|
+
ylabel: 'Revenue ($M)',
|
|
1640
|
+
color: '#9C27B0'
|
|
1641
|
+
}, '#bar-chart');
|
|
2819
1642
|
```
|
|
2820
1643
|
|
|
2821
1644
|
### `plotBoxplot(data, options = {}, selector)`
|
|
2822
1645
|
|
|
2823
|
-
Creates box plots for one or more groups.
|
|
1646
|
+
Creates box plots showing distribution statistics for one or more groups.
|
|
2824
1647
|
|
|
2825
1648
|
**Parameters:**
|
|
2826
1649
|
- `data`: Array of arrays (each array is a group) or single array
|
|
@@ -2829,36 +1652,41 @@ Creates box plots for one or more groups.
|
|
|
2829
1652
|
|
|
2830
1653
|
**Example:**
|
|
2831
1654
|
```javascript
|
|
2832
|
-
const group1 = [1, 2, 3, 4, 5, 6];
|
|
2833
|
-
const group2 = [2, 3, 4, 5, 6, 7];
|
|
2834
|
-
const group3 = [3, 4, 5, 6, 7, 8];
|
|
1655
|
+
const group1 = [1, 2, 3, 4, 5, 6, 7, 8, 9];
|
|
1656
|
+
const group2 = [2, 3, 4, 5, 6, 7, 8, 9, 10];
|
|
1657
|
+
const group3 = [3, 4, 5, 6, 7, 8, 9, 10, 11];
|
|
2835
1658
|
|
|
2836
1659
|
datly.plotBoxplot([group1, group2, group3], {
|
|
2837
|
-
labels: ['
|
|
2838
|
-
title: 'Comparison'
|
|
2839
|
-
|
|
1660
|
+
labels: ['Control', 'Treatment A', 'Treatment B'],
|
|
1661
|
+
title: 'Treatment Comparison',
|
|
1662
|
+
ylabel: 'Response Value',
|
|
1663
|
+
width: 600,
|
|
1664
|
+
height: 400
|
|
1665
|
+
}, '#boxplot');
|
|
2840
1666
|
```
|
|
2841
1667
|
|
|
2842
1668
|
### `plotPie(labels, values, options = {}, selector)`
|
|
2843
1669
|
|
|
2844
|
-
Creates a pie chart.
|
|
1670
|
+
Creates a pie chart for proportional data.
|
|
2845
1671
|
|
|
2846
1672
|
**Additional Options:**
|
|
2847
1673
|
- `showLabels`: Display labels (default: true)
|
|
2848
1674
|
|
|
2849
1675
|
**Example:**
|
|
2850
1676
|
```javascript
|
|
2851
|
-
const
|
|
2852
|
-
const
|
|
2853
|
-
datly.plotPie(
|
|
2854
|
-
|
|
1677
|
+
const categories = ['Desktop', 'Mobile', 'Tablet'];
|
|
1678
|
+
const usage = [45, 40, 15];
|
|
1679
|
+
datly.plotPie(categories, usage, {
|
|
1680
|
+
width: 500,
|
|
1681
|
+
height: 500,
|
|
1682
|
+
title: 'Device Usage Distribution',
|
|
2855
1683
|
showLabels: true
|
|
2856
|
-
}, '#chart');
|
|
1684
|
+
}, '#pie-chart');
|
|
2857
1685
|
```
|
|
2858
1686
|
|
|
2859
1687
|
### `plotHeatmap(matrix, options = {}, selector)`
|
|
2860
1688
|
|
|
2861
|
-
Creates a heatmap for
|
|
1689
|
+
Creates a heatmap visualization for correlation matrices or 2D data.
|
|
2862
1690
|
|
|
2863
1691
|
**Additional Options:**
|
|
2864
1692
|
- `labels`: Array of variable names
|
|
@@ -2867,21 +1695,24 @@ Creates a heatmap for a correlation matrix.
|
|
|
2867
1695
|
**Example:**
|
|
2868
1696
|
```javascript
|
|
2869
1697
|
const corrMatrix = [
|
|
2870
|
-
[1.0, 0.8, 0.3],
|
|
2871
|
-
[0.8, 1.0, 0.5],
|
|
2872
|
-
[0.3, 0.5, 1.0]
|
|
1698
|
+
[1.0, 0.8, 0.3, 0.1],
|
|
1699
|
+
[0.8, 1.0, 0.5, 0.2],
|
|
1700
|
+
[0.3, 0.5, 1.0, 0.7],
|
|
1701
|
+
[0.1, 0.2, 0.7, 1.0]
|
|
2873
1702
|
];
|
|
2874
1703
|
|
|
2875
1704
|
datly.plotHeatmap(corrMatrix, {
|
|
2876
|
-
labels: ['
|
|
1705
|
+
labels: ['Age', 'Income', 'Education', 'Experience'],
|
|
2877
1706
|
showValues: true,
|
|
2878
|
-
title: 'Correlation Matrix'
|
|
2879
|
-
|
|
1707
|
+
title: 'Correlation Matrix',
|
|
1708
|
+
width: 500,
|
|
1709
|
+
height: 500
|
|
1710
|
+
}, '#heatmap');
|
|
2880
1711
|
```
|
|
2881
1712
|
|
|
2882
1713
|
### `plotViolin(data, options = {}, selector)`
|
|
2883
1714
|
|
|
2884
|
-
Creates violin plots showing distribution density.
|
|
1715
|
+
Creates violin plots showing distribution density for multiple groups.
|
|
2885
1716
|
|
|
2886
1717
|
**Parameters:**
|
|
2887
1718
|
- `data`: Array of arrays or single array
|
|
@@ -2890,46 +1721,57 @@ Creates violin plots showing distribution density.
|
|
|
2890
1721
|
|
|
2891
1722
|
**Example:**
|
|
2892
1723
|
```javascript
|
|
2893
|
-
const
|
|
2894
|
-
const
|
|
1724
|
+
const before = [5.1, 5.3, 4.9, 5.2, 5.0, 4.8, 5.1, 5.4];
|
|
1725
|
+
const after = [5.8, 6.1, 5.9, 6.2, 6.0, 5.7, 6.0, 6.3];
|
|
2895
1726
|
|
|
2896
|
-
datly.plotViolin([
|
|
2897
|
-
labels: ['Before', 'After'],
|
|
2898
|
-
title: 'Distribution
|
|
2899
|
-
|
|
1727
|
+
datly.plotViolin([before, after], {
|
|
1728
|
+
labels: ['Before Treatment', 'After Treatment'],
|
|
1729
|
+
title: 'Treatment Effect Distribution',
|
|
1730
|
+
ylabel: 'Measurement',
|
|
1731
|
+
width: 600,
|
|
1732
|
+
height: 400
|
|
1733
|
+
}, '#violin-plot');
|
|
2900
1734
|
```
|
|
2901
1735
|
|
|
2902
1736
|
### `plotDensity(array, options = {}, selector)`
|
|
2903
1737
|
|
|
2904
|
-
Creates a kernel density plot.
|
|
1738
|
+
Creates a kernel density plot showing the probability density function.
|
|
2905
1739
|
|
|
2906
1740
|
**Additional Options:**
|
|
2907
1741
|
- `bandwidth`: Smoothing bandwidth (default: 5)
|
|
2908
1742
|
|
|
2909
1743
|
**Example:**
|
|
2910
1744
|
```javascript
|
|
2911
|
-
const data = [1, 2, 2, 3, 3, 3, 4, 4, 5];
|
|
1745
|
+
const data = [1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 7];
|
|
2912
1746
|
datly.plotDensity(data, {
|
|
2913
1747
|
bandwidth: 0.5,
|
|
2914
|
-
title: 'Density
|
|
2915
|
-
|
|
1748
|
+
title: 'Data Distribution (Kernel Density)',
|
|
1749
|
+
xlabel: 'Values',
|
|
1750
|
+
ylabel: 'Density',
|
|
1751
|
+
width: 600,
|
|
1752
|
+
height: 400
|
|
1753
|
+
}, '#density-plot');
|
|
2916
1754
|
```
|
|
2917
1755
|
|
|
2918
1756
|
### `plotQQ(array, options = {}, selector)`
|
|
2919
1757
|
|
|
2920
|
-
Creates a Q-Q plot for normality
|
|
1758
|
+
Creates a Q-Q plot for assessing normality of data.
|
|
2921
1759
|
|
|
2922
1760
|
**Example:**
|
|
2923
1761
|
```javascript
|
|
2924
|
-
const data = [1.2, 2.3, 1.8, 2.1, 1.9, 2.0, 2.4];
|
|
1762
|
+
const data = [1.2, 2.3, 1.8, 2.1, 1.9, 2.0, 2.4, 1.7, 2.2, 1.6];
|
|
2925
1763
|
datly.plotQQ(data, {
|
|
2926
|
-
title: 'Q-Q Plot'
|
|
2927
|
-
|
|
1764
|
+
title: 'Q-Q Plot for Normality Check',
|
|
1765
|
+
xlabel: 'Theoretical Quantiles',
|
|
1766
|
+
ylabel: 'Sample Quantiles',
|
|
1767
|
+
width: 500,
|
|
1768
|
+
height: 500
|
|
1769
|
+
}, '#qq-plot');
|
|
2928
1770
|
```
|
|
2929
1771
|
|
|
2930
1772
|
### `plotParallel(data, columns, options = {}, selector)`
|
|
2931
1773
|
|
|
2932
|
-
Creates a parallel coordinates plot.
|
|
1774
|
+
Creates a parallel coordinates plot for multivariate data visualization.
|
|
2933
1775
|
|
|
2934
1776
|
**Parameters:**
|
|
2935
1777
|
- `data`: Array of objects
|
|
@@ -2939,20 +1781,23 @@ Creates a parallel coordinates plot.
|
|
|
2939
1781
|
|
|
2940
1782
|
**Example:**
|
|
2941
1783
|
```javascript
|
|
2942
|
-
const
|
|
2943
|
-
{ age: 25, salary: 50000, experience: 2 },
|
|
2944
|
-
{ age: 30, salary: 60000, experience: 5 },
|
|
2945
|
-
{ age: 35, salary: 70000, experience: 8 }
|
|
1784
|
+
const employees = [
|
|
1785
|
+
{ age: 25, salary: 50000, experience: 2, satisfaction: 7 },
|
|
1786
|
+
{ age: 30, salary: 60000, experience: 5, satisfaction: 8 },
|
|
1787
|
+
{ age: 35, salary: 70000, experience: 8, satisfaction: 6 },
|
|
1788
|
+
{ age: 40, salary: 80000, experience: 12, satisfaction: 9 }
|
|
2946
1789
|
];
|
|
2947
1790
|
|
|
2948
|
-
datly.plotParallel(
|
|
2949
|
-
title: '
|
|
2950
|
-
|
|
1791
|
+
datly.plotParallel(employees, ['age', 'salary', 'experience', 'satisfaction'], {
|
|
1792
|
+
title: 'Employee Profile Analysis',
|
|
1793
|
+
width: 800,
|
|
1794
|
+
height: 400
|
|
1795
|
+
}, '#parallel-plot');
|
|
2951
1796
|
```
|
|
2952
1797
|
|
|
2953
1798
|
### `plotPairplot(data, columns, options = {}, selector)`
|
|
2954
1799
|
|
|
2955
|
-
Creates a pairplot matrix showing all pairwise relationships.
|
|
1800
|
+
Creates a pairplot matrix showing all pairwise relationships between variables.
|
|
2956
1801
|
|
|
2957
1802
|
**Parameters:**
|
|
2958
1803
|
- `data`: Array of objects
|
|
@@ -2963,20 +1808,22 @@ Creates a pairplot matrix showing all pairwise relationships.
|
|
|
2963
1808
|
|
|
2964
1809
|
**Example:**
|
|
2965
1810
|
```javascript
|
|
2966
|
-
const
|
|
2967
|
-
{
|
|
2968
|
-
{
|
|
2969
|
-
{
|
|
1811
|
+
const iris = [
|
|
1812
|
+
{ sepal_length: 5.1, sepal_width: 3.5, petal_length: 1.4, petal_width: 0.2 },
|
|
1813
|
+
{ sepal_length: 4.9, sepal_width: 3.0, petal_length: 1.4, petal_width: 0.2 },
|
|
1814
|
+
{ sepal_length: 7.0, sepal_width: 3.2, petal_length: 4.7, petal_width: 1.4 },
|
|
1815
|
+
{ sepal_length: 6.4, sepal_width: 3.2, petal_length: 4.5, petal_width: 1.5 }
|
|
2970
1816
|
];
|
|
2971
1817
|
|
|
2972
|
-
datly.plotPairplot(
|
|
2973
|
-
size: 150
|
|
2974
|
-
|
|
1818
|
+
datly.plotPairplot(iris, ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], {
|
|
1819
|
+
size: 150,
|
|
1820
|
+
color: '#E91E63'
|
|
1821
|
+
}, '#pairplot');
|
|
2975
1822
|
```
|
|
2976
1823
|
|
|
2977
1824
|
### `plotMultiline(series, options = {}, selector)`
|
|
2978
1825
|
|
|
2979
|
-
Creates a multi-line chart for comparing time series.
|
|
1826
|
+
Creates a multi-line chart for comparing multiple time series.
|
|
2980
1827
|
|
|
2981
1828
|
**Parameters:**
|
|
2982
1829
|
- `series`: Array of objects with `name` and `data` properties
|
|
@@ -2986,52 +1833,87 @@ Creates a multi-line chart for comparing time series.
|
|
|
2986
1833
|
|
|
2987
1834
|
**Example:**
|
|
2988
1835
|
```javascript
|
|
2989
|
-
const
|
|
1836
|
+
const timeSeries = [
|
|
1837
|
+
{
|
|
1838
|
+
name: 'Product A',
|
|
1839
|
+
data: [{x: 1, y: 10}, {x: 2, y: 15}, {x: 3, y: 12}, {x: 4, y: 18}]
|
|
1840
|
+
},
|
|
2990
1841
|
{
|
|
2991
|
-
name: '
|
|
2992
|
-
data: [{x: 1, y:
|
|
1842
|
+
name: 'Product B',
|
|
1843
|
+
data: [{x: 1, y: 8}, {x: 2, y: 12}, {x: 3, y: 16}, {x: 4, y: 14}]
|
|
2993
1844
|
},
|
|
2994
1845
|
{
|
|
2995
|
-
name: '
|
|
2996
|
-
data: [{x: 1, y:
|
|
1846
|
+
name: 'Product C',
|
|
1847
|
+
data: [{x: 1, y: 12}, {x: 2, y: 9}, {x: 3, y: 14}, {x: 4, y: 16}]
|
|
2997
1848
|
}
|
|
2998
1849
|
];
|
|
2999
1850
|
|
|
3000
|
-
datly.plotMultiline(
|
|
1851
|
+
datly.plotMultiline(timeSeries, {
|
|
3001
1852
|
legend: true,
|
|
3002
|
-
title: 'Comparison'
|
|
3003
|
-
|
|
1853
|
+
title: 'Product Sales Comparison',
|
|
1854
|
+
xlabel: 'Quarter',
|
|
1855
|
+
ylabel: 'Sales (Units)',
|
|
1856
|
+
width: 700,
|
|
1857
|
+
height: 400
|
|
1858
|
+
}, '#multiline-chart');
|
|
3004
1859
|
```
|
|
3005
1860
|
|
|
3006
1861
|
---
|
|
3007
1862
|
|
|
3008
1863
|
## Complete Example Workflow
|
|
3009
1864
|
|
|
3010
|
-
Here's a
|
|
1865
|
+
Here's a comprehensive example demonstrating a typical data analysis workflow using datly:
|
|
3011
1866
|
|
|
3012
1867
|
```javascript
|
|
3013
1868
|
// 1. Load and explore data
|
|
3014
|
-
const
|
|
3015
|
-
{ age: 25, salary: 50000, experience: 2, department: 'IT' },
|
|
3016
|
-
{ age: 30, salary: 60000, experience: 5, department: 'HR' },
|
|
3017
|
-
{ age: 35, salary: 70000, experience: 8, department: 'IT' },
|
|
3018
|
-
|
|
1869
|
+
const employeeData = [
|
|
1870
|
+
{ age: 25, salary: 50000, experience: 2, department: 'IT', performance: 85 },
|
|
1871
|
+
{ age: 30, salary: 60000, experience: 5, department: 'HR', performance: 90 },
|
|
1872
|
+
{ age: 35, salary: 70000, experience: 8, department: 'IT', performance: 88 },
|
|
1873
|
+
{ age: 28, salary: 55000, experience: 3, department: 'Sales', performance: 82 },
|
|
1874
|
+
{ age: 42, salary: 85000, experience: 15, department: 'IT', performance: 95 },
|
|
1875
|
+
{ age: 31, salary: 62000, experience: 6, department: 'HR', performance: 87 },
|
|
1876
|
+
{ age: 26, salary: 48000, experience: 1, department: 'Sales', performance: 78 },
|
|
1877
|
+
{ age: 38, salary: 75000, experience: 12, department: 'IT', performance: 92 }
|
|
3019
1878
|
];
|
|
3020
1879
|
|
|
3021
|
-
// 2. Perform
|
|
3022
|
-
const overview = datly.eda_overview(
|
|
3023
|
-
console.log(overview);
|
|
3024
|
-
|
|
3025
|
-
// 3.
|
|
3026
|
-
const
|
|
3027
|
-
|
|
1880
|
+
// 2. Perform exploratory data analysis
|
|
1881
|
+
const overview = datly.eda_overview(employeeData);
|
|
1882
|
+
console.log('Dataset Overview:', overview);
|
|
1883
|
+
|
|
1884
|
+
// 3. Calculate descriptive statistics for salary
|
|
1885
|
+
const salaries = employeeData.map(emp => emp.salary);
|
|
1886
|
+
const salaryStats = datly.describe(salaries);
|
|
1887
|
+
console.log('Salary Statistics:', salaryStats);
|
|
1888
|
+
|
|
1889
|
+
// 4. Check correlations between numeric variables
|
|
1890
|
+
const correlations = datly.df_corr(employeeData, 'pearson');
|
|
1891
|
+
console.log('Correlation Matrix:', correlations);
|
|
1892
|
+
|
|
1893
|
+
// 5. Visualize salary distribution
|
|
1894
|
+
datly.plotHistogram(salaries, {
|
|
1895
|
+
title: 'Salary Distribution',
|
|
1896
|
+
xlabel: 'Salary ($)',
|
|
1897
|
+
ylabel: 'Frequency',
|
|
1898
|
+
bins: 6,
|
|
1899
|
+
color: '#2196F3'
|
|
1900
|
+
}, '#salary-histogram');
|
|
1901
|
+
|
|
1902
|
+
// 6. Analyze relationship between experience and salary
|
|
1903
|
+
const experience = employeeData.map(emp => emp.experience);
|
|
1904
|
+
datly.plotScatter(experience, salaries, {
|
|
1905
|
+
title: 'Experience vs Salary',
|
|
1906
|
+
xlabel: 'Years of Experience',
|
|
1907
|
+
ylabel: 'Salary ($)',
|
|
1908
|
+
color: '#4CAF50'
|
|
1909
|
+
}, '#experience-salary-scatter');
|
|
3028
1910
|
|
|
3029
|
-
//
|
|
3030
|
-
const X =
|
|
3031
|
-
const y =
|
|
1911
|
+
// 7. Prepare data for machine learning
|
|
1912
|
+
const X = employeeData.map(emp => [emp.age, emp.experience]);
|
|
1913
|
+
const y = salaries;
|
|
3032
1914
|
|
|
3033
|
-
//
|
|
3034
|
-
const split = datly.train_test_split(X, y, 0.
|
|
1915
|
+
// 8. Split data into training and testing sets
|
|
1916
|
+
const split = datly.train_test_split(X, y, 0.3, 42);
|
|
3035
1917
|
const trainIndices = split.indices.train;
|
|
3036
1918
|
const testIndices = split.indices.test;
|
|
3037
1919
|
|
|
@@ -3040,49 +1922,122 @@ const y_train = trainIndices.map(i => y[i]);
|
|
|
3040
1922
|
const X_test = testIndices.map(i => X[i]);
|
|
3041
1923
|
const y_test = testIndices.map(i => y[i]);
|
|
3042
1924
|
|
|
3043
|
-
//
|
|
1925
|
+
// 9. Scale features for better model performance
|
|
3044
1926
|
const scaler = datly.standard_scaler_fit(X_train);
|
|
3045
1927
|
const X_train_scaled = datly.standard_scaler_transform(scaler, X_train);
|
|
3046
1928
|
const X_test_scaled = datly.standard_scaler_transform(scaler, X_test);
|
|
3047
1929
|
|
|
3048
|
-
//
|
|
3049
|
-
const model = datly.train_linear_regression(
|
|
3050
|
-
|
|
3051
|
-
|
|
1930
|
+
// 10. Train linear regression model
|
|
1931
|
+
const model = datly.train_linear_regression(X_train_scaled.data, y_train);
|
|
1932
|
+
console.log('Linear Regression Model:', model);
|
|
1933
|
+
|
|
1934
|
+
// 11. Make predictions
|
|
1935
|
+
const predictions = datly.predict_linear(model, X_test_scaled.data);
|
|
1936
|
+
console.log('Predictions:', predictions);
|
|
1937
|
+
|
|
1938
|
+
// 12. Evaluate model performance
|
|
1939
|
+
const metrics = datly.metrics_regression(y_test, predictions.predictions);
|
|
1940
|
+
console.log('Model Performance:', metrics);
|
|
1941
|
+
|
|
1942
|
+
// 13. Visualize actual vs predicted values
|
|
1943
|
+
datly.plotScatter(y_test, predictions.predictions, {
|
|
1944
|
+
title: 'Actual vs Predicted Salaries',
|
|
1945
|
+
xlabel: 'Actual Salary ($)',
|
|
1946
|
+
ylabel: 'Predicted Salary ($)',
|
|
1947
|
+
color: '#FF5722'
|
|
1948
|
+
}, '#prediction-scatter');
|
|
1949
|
+
|
|
1950
|
+
// 14. Compare salary distributions by department
|
|
1951
|
+
const departments = ['IT', 'HR', 'Sales'];
|
|
1952
|
+
const deptSalaries = departments.map(dept =>
|
|
1953
|
+
employeeData.filter(emp => emp.department === dept).map(emp => emp.salary)
|
|
3052
1954
|
);
|
|
3053
1955
|
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
|
|
1956
|
+
datly.plotBoxplot(deptSalaries, {
|
|
1957
|
+
labels: departments,
|
|
1958
|
+
title: 'Salary Distribution by Department',
|
|
1959
|
+
ylabel: 'Salary ($)',
|
|
1960
|
+
width: 600,
|
|
1961
|
+
height: 400
|
|
1962
|
+
}, '#department-boxplot');
|
|
3059
1963
|
|
|
3060
|
-
//
|
|
3061
|
-
const
|
|
3062
|
-
|
|
3063
|
-
|
|
3064
|
-
|
|
3065
|
-
|
|
1964
|
+
// 15. Perform clustering analysis
|
|
1965
|
+
const clusterData = employeeData.map(emp => [emp.age, emp.salary / 1000]); // Normalize salary
|
|
1966
|
+
const clusterResult = datly.kmeans(clusterData, 3, { seed: 42 });
|
|
1967
|
+
console.log('Clustering Results:', clusterResult);
|
|
1968
|
+
|
|
1969
|
+
// 16. Test for salary differences between departments
|
|
1970
|
+
const itSalaries = employeeData.filter(emp => emp.department === 'IT').map(emp => emp.salary);
|
|
1971
|
+
const hrSalaries = employeeData.filter(emp => emp.department === 'HR').map(emp => emp.salary);
|
|
1972
|
+
const salesSalaries = employeeData.filter(emp => emp.department === 'Sales').map(emp => emp.salary);
|
|
1973
|
+
|
|
1974
|
+
const anovaResult = datly.anova_oneway([itSalaries, hrSalaries, salesSalaries]);
|
|
1975
|
+
console.log('ANOVA Test (Salary by Department):', anovaResult);
|
|
1976
|
+
|
|
1977
|
+
// 17. Create comprehensive visualization dashboard
|
|
1978
|
+
// Correlation heatmap
|
|
1979
|
+
const numericData = employeeData.map(emp => [emp.age, emp.salary / 1000, emp.experience, emp.performance]);
|
|
1980
|
+
const corrMatrix = [
|
|
1981
|
+
[1.0, 0.75, 0.95, 0.62],
|
|
1982
|
+
[0.75, 1.0, 0.68, 0.43],
|
|
1983
|
+
[0.95, 0.68, 1.0, 0.71],
|
|
1984
|
+
[0.62, 0.43, 0.71, 1.0]
|
|
1985
|
+
];
|
|
3066
1986
|
|
|
3067
|
-
|
|
3068
|
-
|
|
3069
|
-
title: '
|
|
3070
|
-
|
|
3071
|
-
|
|
3072
|
-
}, '#results');
|
|
1987
|
+
datly.plotHeatmap(corrMatrix, {
|
|
1988
|
+
labels: ['Age', 'Salary (k)', 'Experience', 'Performance'],
|
|
1989
|
+
title: 'Employee Metrics Correlation',
|
|
1990
|
+
showValues: true
|
|
1991
|
+
}, '#correlation-heatmap');
|
|
3073
1992
|
```
|
|
3074
1993
|
|
|
3075
1994
|
---
|
|
3076
1995
|
|
|
3077
1996
|
## Tips and Best Practices
|
|
3078
1997
|
|
|
3079
|
-
1. **Data Preparation**: Always check for missing values and outliers before analysis
|
|
3080
|
-
2. **Feature Scaling**: Scale features before training distance-based models (KNN
|
|
3081
|
-
3. **Cross-Validation**: Use
|
|
1998
|
+
1. **Data Preparation**: Always check for missing values and outliers before analysis using `missing_values()` and `outliers_zscore()`
|
|
1999
|
+
2. **Feature Scaling**: Scale features before training distance-based models (KNN) or neural networks using `standard_scaler_fit()` and `standard_scaler_transform()`
|
|
2000
|
+
3. **Cross-Validation**: Use `train_test_split()` to assess model performance on unseen data
|
|
3082
2001
|
4. **Model Selection**: Start with simple models (linear regression) before trying complex ones
|
|
3083
|
-
5. **Hyperparameter Tuning**: Experiment with different
|
|
3084
|
-
6. **Visualization**: Always visualize your data and results to gain insights
|
|
3085
|
-
7. **Statistical Tests**: Check assumptions (normality
|
|
2002
|
+
5. **Hyperparameter Tuning**: Experiment with different parameters (k in KNN, max_depth in trees)
|
|
2003
|
+
6. **Visualization**: Always visualize your data and results using the plotting functions to gain insights
|
|
2004
|
+
7. **Statistical Tests**: Check assumptions (normality using `shapiro_wilk()`) before parametric tests
|
|
2005
|
+
8. **Object Access**: Results are returned as JavaScript objects - access properties directly (e.g., `result.value`, `result.p_value`)
|
|
2006
|
+
|
|
2007
|
+
---
|
|
2008
|
+
|
|
2009
|
+
## API Reference Summary
|
|
2010
|
+
|
|
2011
|
+
### Statistics Functions
|
|
2012
|
+
- `mean(array)`, `median(array)`, `variance(array)`, `std(array)`
|
|
2013
|
+
- `skewness(array)`, `kurtosis(array)`, `percentile(array, p)`
|
|
2014
|
+
- `describe(array)` - comprehensive statistics
|
|
2015
|
+
|
|
2016
|
+
### Dataframe Operations
|
|
2017
|
+
- `df_from_csv()`, `df_from_json()`, `df_from_array()`, `df_from_object()`
|
|
2018
|
+
- `df_get_column()`, `df_get_value()`, `df_get_columns()`
|
|
2019
|
+
- `df_head()`, `df_tail()`, `df_corr()`
|
|
2020
|
+
|
|
2021
|
+
### Machine Learning
|
|
2022
|
+
- `train_linear_regression()`, `predict_linear()`
|
|
2023
|
+
- `train_logistic_regression()`, `predict_logistic()`
|
|
2024
|
+
- `train_knn()`, `predict_knn()`
|
|
2025
|
+
- `train_decision_tree()`, `train_random_forest()`
|
|
2026
|
+
- `train_naive_bayes()`, `kmeans()`
|
|
2027
|
+
|
|
2028
|
+
### Statistical Tests
|
|
2029
|
+
- `ttest_1samp()`, `ttest_ind()`, `anova_oneway()`
|
|
2030
|
+
- `shapiro_wilk()`, `correlation()`
|
|
2031
|
+
|
|
2032
|
+
### Utilities
|
|
2033
|
+
- `train_test_split()`, `standard_scaler_fit()`, `standard_scaler_transform()`
|
|
2034
|
+
- `metrics_classification()`, `metrics_regression()`
|
|
2035
|
+
- `eda_overview()`, `missing_values()`, `outliers_zscore()`
|
|
2036
|
+
|
|
2037
|
+
### Visualization
|
|
2038
|
+
- `plotHistogram()`, `plotScatter()`, `plotLine()`, `plotBar()`
|
|
2039
|
+
- `plotBoxplot()`, `plotPie()`, `plotHeatmap()`, `plotViolin()`
|
|
2040
|
+
- `plotDensity()`, `plotQQ()`, `plotParallel()`, `plotPairplot()`, `plotMultiline()`
|
|
3086
2041
|
|
|
3087
2042
|
---
|
|
3088
2043
|
|
|
@@ -3094,4 +2049,4 @@ This documentation is provided as-is. Please refer to the library's official rep
|
|
|
3094
2049
|
|
|
3095
2050
|
## Support
|
|
3096
2051
|
|
|
3097
|
-
For issues, questions, or contributions, please visit the official
|
|
2052
|
+
For issues, questions, or contributions, please visit the official datly repository.
|