deepbox 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +344 -0
- package/dist/CSRMatrix-CwGwQRea.d.cts +219 -0
- package/dist/CSRMatrix-KzNt6QpS.d.ts +219 -0
- package/dist/Tensor-BQLk1ltW.d.cts +147 -0
- package/dist/Tensor-g8mUClel.d.ts +147 -0
- package/dist/chunk-4S73VUBD.js +677 -0
- package/dist/chunk-4S73VUBD.js.map +1 -0
- package/dist/chunk-5R4S63PF.js +2925 -0
- package/dist/chunk-5R4S63PF.js.map +1 -0
- package/dist/chunk-6AE5FKKQ.cjs +9264 -0
- package/dist/chunk-6AE5FKKQ.cjs.map +1 -0
- package/dist/chunk-AD436M45.js +3854 -0
- package/dist/chunk-AD436M45.js.map +1 -0
- package/dist/chunk-ALS7ETWZ.cjs +4263 -0
- package/dist/chunk-ALS7ETWZ.cjs.map +1 -0
- package/dist/chunk-AU7XHGKJ.js +2092 -0
- package/dist/chunk-AU7XHGKJ.js.map +1 -0
- package/dist/chunk-B5TNKUEY.js +1481 -0
- package/dist/chunk-B5TNKUEY.js.map +1 -0
- package/dist/chunk-BCR7G3A6.js +9136 -0
- package/dist/chunk-BCR7G3A6.js.map +1 -0
- package/dist/chunk-C4PKXY74.cjs +1917 -0
- package/dist/chunk-C4PKXY74.cjs.map +1 -0
- package/dist/chunk-DWZY6PIP.cjs +6400 -0
- package/dist/chunk-DWZY6PIP.cjs.map +1 -0
- package/dist/chunk-E3EU5FZO.cjs +2113 -0
- package/dist/chunk-E3EU5FZO.cjs.map +1 -0
- package/dist/chunk-F3JWBINJ.js +1054 -0
- package/dist/chunk-F3JWBINJ.js.map +1 -0
- package/dist/chunk-FJYLIGJX.js +1940 -0
- package/dist/chunk-FJYLIGJX.js.map +1 -0
- package/dist/chunk-JSCDE774.cjs +729 -0
- package/dist/chunk-JSCDE774.cjs.map +1 -0
- package/dist/chunk-LWECRCW2.cjs +2412 -0
- package/dist/chunk-LWECRCW2.cjs.map +1 -0
- package/dist/chunk-MLBMYKCG.js +6379 -0
- package/dist/chunk-MLBMYKCG.js.map +1 -0
- package/dist/chunk-OX6QXFMV.cjs +3874 -0
- package/dist/chunk-OX6QXFMV.cjs.map +1 -0
- package/dist/chunk-PHV2DKRS.cjs +1072 -0
- package/dist/chunk-PHV2DKRS.cjs.map +1 -0
- package/dist/chunk-PL7TAYKI.js +4056 -0
- package/dist/chunk-PL7TAYKI.js.map +1 -0
- package/dist/chunk-PR647I7R.js +1898 -0
- package/dist/chunk-PR647I7R.js.map +1 -0
- package/dist/chunk-QERHVCHC.cjs +2960 -0
- package/dist/chunk-QERHVCHC.cjs.map +1 -0
- package/dist/chunk-XEG44RF6.cjs +1514 -0
- package/dist/chunk-XEG44RF6.cjs.map +1 -0
- package/dist/chunk-XMWVME2W.js +2377 -0
- package/dist/chunk-XMWVME2W.js.map +1 -0
- package/dist/chunk-ZB75FESB.cjs +1979 -0
- package/dist/chunk-ZB75FESB.cjs.map +1 -0
- package/dist/chunk-ZLW62TJG.cjs +4061 -0
- package/dist/chunk-ZLW62TJG.cjs.map +1 -0
- package/dist/chunk-ZXKBDFP3.js +4235 -0
- package/dist/chunk-ZXKBDFP3.js.map +1 -0
- package/dist/core/index.cjs +204 -0
- package/dist/core/index.cjs.map +1 -0
- package/dist/core/index.d.cts +2 -0
- package/dist/core/index.d.ts +2 -0
- package/dist/core/index.js +3 -0
- package/dist/core/index.js.map +1 -0
- package/dist/dataframe/index.cjs +22 -0
- package/dist/dataframe/index.cjs.map +1 -0
- package/dist/dataframe/index.d.cts +3 -0
- package/dist/dataframe/index.d.ts +3 -0
- package/dist/dataframe/index.js +5 -0
- package/dist/dataframe/index.js.map +1 -0
- package/dist/datasets/index.cjs +134 -0
- package/dist/datasets/index.cjs.map +1 -0
- package/dist/datasets/index.d.cts +3 -0
- package/dist/datasets/index.d.ts +3 -0
- package/dist/datasets/index.js +5 -0
- package/dist/datasets/index.js.map +1 -0
- package/dist/index-74AB8Cyh.d.cts +1126 -0
- package/dist/index-9oQx1HgV.d.cts +1180 -0
- package/dist/index-BJY2SI4i.d.ts +483 -0
- package/dist/index-BWGhrDlr.d.ts +733 -0
- package/dist/index-B_DK4FKY.d.cts +242 -0
- package/dist/index-BbA2Gxfl.d.ts +456 -0
- package/dist/index-BgHYAoSS.d.cts +837 -0
- package/dist/index-BndMbqsM.d.ts +1439 -0
- package/dist/index-C1mfVYoo.d.ts +2517 -0
- package/dist/index-CCvlwAmL.d.cts +809 -0
- package/dist/index-CDw5CnOU.d.ts +785 -0
- package/dist/index-Cn3SdB0O.d.ts +1126 -0
- package/dist/index-CrqLlS-a.d.ts +776 -0
- package/dist/index-D61yaSMY.d.cts +483 -0
- package/dist/index-D9Loo1_A.d.cts +2517 -0
- package/dist/index-DIT_OO9C.d.cts +785 -0
- package/dist/index-DIp_RrRt.d.ts +242 -0
- package/dist/index-DbultU6X.d.cts +1427 -0
- package/dist/index-DmEg_LCm.d.cts +776 -0
- package/dist/index-DoPWVxPo.d.cts +1439 -0
- package/dist/index-DuCxd-8d.d.ts +837 -0
- package/dist/index-Dx42TZaY.d.ts +809 -0
- package/dist/index-DyZ4QQf5.d.cts +456 -0
- package/dist/index-GFAVyOWO.d.ts +1427 -0
- package/dist/index-WHQLn0e8.d.cts +733 -0
- package/dist/index-ZtI1Iy4L.d.ts +1180 -0
- package/dist/index-eJgeni9c.d.cts +1911 -0
- package/dist/index-tk4lSYod.d.ts +1911 -0
- package/dist/index.cjs +72 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +17 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +15 -0
- package/dist/index.js.map +1 -0
- package/dist/linalg/index.cjs +86 -0
- package/dist/linalg/index.cjs.map +1 -0
- package/dist/linalg/index.d.cts +3 -0
- package/dist/linalg/index.d.ts +3 -0
- package/dist/linalg/index.js +5 -0
- package/dist/linalg/index.js.map +1 -0
- package/dist/metrics/index.cjs +158 -0
- package/dist/metrics/index.cjs.map +1 -0
- package/dist/metrics/index.d.cts +3 -0
- package/dist/metrics/index.d.ts +3 -0
- package/dist/metrics/index.js +5 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/ml/index.cjs +87 -0
- package/dist/ml/index.cjs.map +1 -0
- package/dist/ml/index.d.cts +3 -0
- package/dist/ml/index.d.ts +3 -0
- package/dist/ml/index.js +6 -0
- package/dist/ml/index.js.map +1 -0
- package/dist/ndarray/index.cjs +501 -0
- package/dist/ndarray/index.cjs.map +1 -0
- package/dist/ndarray/index.d.cts +5 -0
- package/dist/ndarray/index.d.ts +5 -0
- package/dist/ndarray/index.js +4 -0
- package/dist/ndarray/index.js.map +1 -0
- package/dist/nn/index.cjs +142 -0
- package/dist/nn/index.cjs.map +1 -0
- package/dist/nn/index.d.cts +6 -0
- package/dist/nn/index.d.ts +6 -0
- package/dist/nn/index.js +5 -0
- package/dist/nn/index.js.map +1 -0
- package/dist/optim/index.cjs +77 -0
- package/dist/optim/index.cjs.map +1 -0
- package/dist/optim/index.d.cts +4 -0
- package/dist/optim/index.d.ts +4 -0
- package/dist/optim/index.js +4 -0
- package/dist/optim/index.js.map +1 -0
- package/dist/plot/index.cjs +114 -0
- package/dist/plot/index.cjs.map +1 -0
- package/dist/plot/index.d.cts +6 -0
- package/dist/plot/index.d.ts +6 -0
- package/dist/plot/index.js +5 -0
- package/dist/plot/index.js.map +1 -0
- package/dist/preprocess/index.cjs +82 -0
- package/dist/preprocess/index.cjs.map +1 -0
- package/dist/preprocess/index.d.cts +4 -0
- package/dist/preprocess/index.d.ts +4 -0
- package/dist/preprocess/index.js +5 -0
- package/dist/preprocess/index.js.map +1 -0
- package/dist/random/index.cjs +74 -0
- package/dist/random/index.cjs.map +1 -0
- package/dist/random/index.d.cts +3 -0
- package/dist/random/index.d.ts +3 -0
- package/dist/random/index.js +5 -0
- package/dist/random/index.js.map +1 -0
- package/dist/stats/index.cjs +142 -0
- package/dist/stats/index.cjs.map +1 -0
- package/dist/stats/index.d.cts +3 -0
- package/dist/stats/index.d.ts +3 -0
- package/dist/stats/index.js +5 -0
- package/dist/stats/index.js.map +1 -0
- package/dist/tensor-B96jjJLQ.d.cts +205 -0
- package/dist/tensor-B96jjJLQ.d.ts +205 -0
- package/package.json +226 -0
|
@@ -0,0 +1,4061 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var chunk6AE5FKKQ_cjs = require('./chunk-6AE5FKKQ.cjs');
|
|
4
|
+
var chunkJSCDE774_cjs = require('./chunk-JSCDE774.cjs');
|
|
5
|
+
|
|
6
|
+
// src/dataframe/index.ts
|
|
7
|
+
var dataframe_exports = {};
|
|
8
|
+
chunkJSCDE774_cjs.__export(dataframe_exports, {
|
|
9
|
+
DataFrame: () => DataFrame,
|
|
10
|
+
DataFrameGroupBy: () => DataFrameGroupBy,
|
|
11
|
+
Series: () => Series
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
// src/dataframe/utils.ts
|
|
15
|
+
var isRecord = (value) => typeof value === "object" && value !== null && !Array.isArray(value);
|
|
16
|
+
var createKey = (value) => {
|
|
17
|
+
if (value === null) return "null";
|
|
18
|
+
if (value === void 0) return "undefined";
|
|
19
|
+
const type = typeof value;
|
|
20
|
+
if (type === "number") {
|
|
21
|
+
if (Number.isNaN(value)) return "NaN";
|
|
22
|
+
if (value === Infinity) return "Infinity";
|
|
23
|
+
if (value === -Infinity) return "-Infinity";
|
|
24
|
+
return `n:${value}`;
|
|
25
|
+
}
|
|
26
|
+
if (type === "string") {
|
|
27
|
+
return `s:${value}`;
|
|
28
|
+
}
|
|
29
|
+
if (type === "boolean") {
|
|
30
|
+
return `b:${value}`;
|
|
31
|
+
}
|
|
32
|
+
if (type === "bigint") {
|
|
33
|
+
return `bi:${value.toString()}`;
|
|
34
|
+
}
|
|
35
|
+
if (Array.isArray(value)) {
|
|
36
|
+
return `[${value.map(createKey).join(",")}]`;
|
|
37
|
+
}
|
|
38
|
+
if (isRecord(value)) {
|
|
39
|
+
const keys = Object.keys(value).sort();
|
|
40
|
+
const parts = keys.map((k) => `${createKey(k)}:${createKey(value[k])}`);
|
|
41
|
+
return `{${parts.join(",")}}`;
|
|
42
|
+
}
|
|
43
|
+
return String(value);
|
|
44
|
+
};
|
|
45
|
+
var isValidNumber = (value) => {
|
|
46
|
+
return typeof value === "number" && !Number.isNaN(value) && Number.isFinite(value);
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
// src/dataframe/Series.ts
|
|
50
|
+
var Series = class _Series {
|
|
51
|
+
// Internal storage for the actual data values
|
|
52
|
+
_data;
|
|
53
|
+
// Internal storage for index labels (can be strings or numbers)
|
|
54
|
+
_index;
|
|
55
|
+
// Fast label -> position lookup for O(1) label-based access
|
|
56
|
+
_indexPos;
|
|
57
|
+
// Optional name for this Series
|
|
58
|
+
_name;
|
|
59
|
+
/**
|
|
60
|
+
* Creates a new Series instance.
|
|
61
|
+
*
|
|
62
|
+
* @param data - Array of values to store in the Series
|
|
63
|
+
* @param options - Configuration options
|
|
64
|
+
* @param options.index - Custom index labels (defaults to 0, 1, 2, ...)
|
|
65
|
+
* @param options.name - Optional name for the Series
|
|
66
|
+
*
|
|
67
|
+
* @example
|
|
68
|
+
* ```ts
|
|
69
|
+
* const s = new Series([10, 20, 30], {
|
|
70
|
+
* index: ['a', 'b', 'c'],
|
|
71
|
+
* name: 'values'
|
|
72
|
+
* });
|
|
73
|
+
* ```
|
|
74
|
+
*/
|
|
75
|
+
constructor(data, options = {}) {
|
|
76
|
+
this._data = options.copy === false ? data : [...data];
|
|
77
|
+
this._index = options.index ? options.copy === false ? options.index : [...options.index] : Array.from({ length: this._data.length }, (_, i) => i);
|
|
78
|
+
if (this._index.length !== this._data.length) {
|
|
79
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
80
|
+
`Index length (${this._index.length}) must match data length (${this._data.length})`
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
this._indexPos = /* @__PURE__ */ new Map();
|
|
84
|
+
for (let i = 0; i < this._index.length; i++) {
|
|
85
|
+
const label = this._index[i];
|
|
86
|
+
if (label === void 0) {
|
|
87
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Index labels cannot be undefined");
|
|
88
|
+
}
|
|
89
|
+
if (this._indexPos.has(label)) {
|
|
90
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Duplicate index label '${String(label)}' is not supported`);
|
|
91
|
+
}
|
|
92
|
+
this._indexPos.set(label, i);
|
|
93
|
+
}
|
|
94
|
+
this._name = options.name;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Get the underlying data array.
|
|
98
|
+
*
|
|
99
|
+
* @returns Read-only view of the data array
|
|
100
|
+
*/
|
|
101
|
+
get data() {
|
|
102
|
+
return this._data;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Get the index labels.
|
|
106
|
+
*
|
|
107
|
+
* @returns Read-only view of the index array
|
|
108
|
+
*/
|
|
109
|
+
get index() {
|
|
110
|
+
return this._index;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Get the Series name.
|
|
114
|
+
*
|
|
115
|
+
* @returns The name of this Series, or undefined if not set
|
|
116
|
+
*/
|
|
117
|
+
get name() {
|
|
118
|
+
return this._name;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Get the number of elements in the Series.
|
|
122
|
+
*
|
|
123
|
+
* @returns Length of the Series
|
|
124
|
+
*/
|
|
125
|
+
get length() {
|
|
126
|
+
return this._data.length;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Get a value by label.
|
|
130
|
+
*
|
|
131
|
+
* This method is an alias for `loc()`. It performs strict label-based lookup.
|
|
132
|
+
* For positional access, use `iloc()`.
|
|
133
|
+
*
|
|
134
|
+
* @param label - The index label to look up
|
|
135
|
+
* @returns The value at that label, or undefined if not found
|
|
136
|
+
*
|
|
137
|
+
* @example
|
|
138
|
+
* ```ts
|
|
139
|
+
* const s = new Series([10, 20, 30], { index: ['a', 'b', 'c'] });
|
|
140
|
+
* s.get('a'); // 10
|
|
141
|
+
* s.get('z'); // undefined
|
|
142
|
+
* ```
|
|
143
|
+
*/
|
|
144
|
+
get(label) {
|
|
145
|
+
const position = this._indexPos.get(label);
|
|
146
|
+
return position === void 0 ? void 0 : this._data[position];
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Access a value by label (label-based indexing).
|
|
150
|
+
*
|
|
151
|
+
* @param label - The index label to look up
|
|
152
|
+
* @returns The value at that label, or undefined if not found
|
|
153
|
+
*
|
|
154
|
+
* @example
|
|
155
|
+
* ```ts
|
|
156
|
+
* const s = new Series([10, 20], { index: ['a', 'b'] });
|
|
157
|
+
* s.loc('a'); // 10
|
|
158
|
+
* ```
|
|
159
|
+
*/
|
|
160
|
+
loc(label) {
|
|
161
|
+
const position = this._indexPos.get(label);
|
|
162
|
+
return position === void 0 ? void 0 : this._data[position];
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Access a value by integer position (position-based indexing).
|
|
166
|
+
*
|
|
167
|
+
* @param position - The integer position (0-based)
|
|
168
|
+
* @returns The value at that position, or undefined if out of bounds
|
|
169
|
+
* @throws {IndexError} If position is out of bounds
|
|
170
|
+
*
|
|
171
|
+
* @example
|
|
172
|
+
* ```ts
|
|
173
|
+
* const s = new Series([10, 20, 30]);
|
|
174
|
+
* s.iloc(0); // 10
|
|
175
|
+
* s.iloc(2); // 30
|
|
176
|
+
* ```
|
|
177
|
+
*/
|
|
178
|
+
iloc(position) {
|
|
179
|
+
if (this._data.length === 0) {
|
|
180
|
+
throw new chunkJSCDE774_cjs.IndexError(`Series is empty`, {
|
|
181
|
+
index: position,
|
|
182
|
+
validRange: [0, 0]
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
if (position < 0 || position >= this._data.length) {
|
|
186
|
+
throw new chunkJSCDE774_cjs.IndexError(`Position ${position} is out of bounds (0-${this._data.length - 1})`, {
|
|
187
|
+
index: position,
|
|
188
|
+
validRange: [0, this._data.length - 1]
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
return this._data[position];
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Return the first n elements.
|
|
195
|
+
*
|
|
196
|
+
* @param n - Number of elements to return (default: 5)
|
|
197
|
+
* @returns New Series with the first n elements
|
|
198
|
+
*
|
|
199
|
+
* @example
|
|
200
|
+
* ```ts
|
|
201
|
+
* const s = new Series([1, 2, 3, 4, 5, 6]);
|
|
202
|
+
* s.head(3); // Series([1, 2, 3])
|
|
203
|
+
* ```
|
|
204
|
+
*/
|
|
205
|
+
head(n = 5) {
|
|
206
|
+
if (!Number.isFinite(n) || !Number.isInteger(n) || n < 0) {
|
|
207
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("n must be a non-negative integer", "n", n);
|
|
208
|
+
}
|
|
209
|
+
const options = {
|
|
210
|
+
index: this._index.slice(0, n)
|
|
211
|
+
};
|
|
212
|
+
if (this._name !== void 0) {
|
|
213
|
+
options.name = this._name;
|
|
214
|
+
}
|
|
215
|
+
return new _Series(this._data.slice(0, n), options);
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Return the last n elements.
|
|
219
|
+
*
|
|
220
|
+
* @param n - Number of elements to return (default: 5)
|
|
221
|
+
* @returns New Series with the last n elements
|
|
222
|
+
*
|
|
223
|
+
* @example
|
|
224
|
+
* ```ts
|
|
225
|
+
* const s = new Series([1, 2, 3, 4, 5, 6]);
|
|
226
|
+
* s.tail(3); // Series([4, 5, 6])
|
|
227
|
+
* ```
|
|
228
|
+
*/
|
|
229
|
+
tail(n = 5) {
|
|
230
|
+
if (!Number.isFinite(n) || !Number.isInteger(n) || n < 0) {
|
|
231
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("n must be a non-negative integer", "n", n);
|
|
232
|
+
}
|
|
233
|
+
const sliceStart = this._data.length - n;
|
|
234
|
+
const options = {
|
|
235
|
+
index: this._index.slice(sliceStart)
|
|
236
|
+
};
|
|
237
|
+
if (this._name !== void 0) {
|
|
238
|
+
options.name = this._name;
|
|
239
|
+
}
|
|
240
|
+
return new _Series(this._data.slice(sliceStart), options);
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Filter Series by a boolean predicate function.
|
|
244
|
+
*
|
|
245
|
+
* Filters both data AND index to maintain alignment.
|
|
246
|
+
*
|
|
247
|
+
* @param predicate - Function that returns true for elements to keep
|
|
248
|
+
* @returns New Series with only elements that passed the predicate
|
|
249
|
+
*
|
|
250
|
+
* @example
|
|
251
|
+
* ```ts
|
|
252
|
+
* const s = new Series([1, 2, 3, 4, 5]);
|
|
253
|
+
* s.filter(x => x > 2); // Series([3, 4, 5])
|
|
254
|
+
* ```
|
|
255
|
+
*/
|
|
256
|
+
filter(predicate) {
|
|
257
|
+
const filteredData = [];
|
|
258
|
+
const filteredIndex = [];
|
|
259
|
+
let dataIndex = 0;
|
|
260
|
+
for (const dataItem of this._data) {
|
|
261
|
+
const indexItem = this._index[dataIndex];
|
|
262
|
+
if (indexItem === void 0) {
|
|
263
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Index labels cannot be undefined");
|
|
264
|
+
}
|
|
265
|
+
if (predicate(dataItem, dataIndex)) {
|
|
266
|
+
filteredData.push(dataItem);
|
|
267
|
+
filteredIndex.push(indexItem);
|
|
268
|
+
}
|
|
269
|
+
dataIndex++;
|
|
270
|
+
}
|
|
271
|
+
const options = {
|
|
272
|
+
index: filteredIndex
|
|
273
|
+
};
|
|
274
|
+
if (this._name !== void 0) {
|
|
275
|
+
options.name = this._name;
|
|
276
|
+
}
|
|
277
|
+
return new _Series(filteredData, options);
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Transform each element using a mapping function.
|
|
281
|
+
*
|
|
282
|
+
* @template U - The type of the transformed values
|
|
283
|
+
* @param fn - Function to apply to each element
|
|
284
|
+
* @returns New Series with transformed values
|
|
285
|
+
*
|
|
286
|
+
* @example
|
|
287
|
+
* ```ts
|
|
288
|
+
* const s = new Series([1, 2, 3]);
|
|
289
|
+
* s.map(x => x * 2); // Series([2, 4, 6])
|
|
290
|
+
* ```
|
|
291
|
+
*/
|
|
292
|
+
map(fn) {
|
|
293
|
+
const options = {
|
|
294
|
+
index: this._index
|
|
295
|
+
};
|
|
296
|
+
if (this._name !== void 0) {
|
|
297
|
+
options.name = this._name;
|
|
298
|
+
}
|
|
299
|
+
return new _Series(this._data.map(fn), options);
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Sort the Series values.
|
|
303
|
+
*
|
|
304
|
+
* Preserves index-value mapping by sorting `[value, index]` pairs.
|
|
305
|
+
*
|
|
306
|
+
* @param ascending - Sort in ascending order (default: true)
|
|
307
|
+
* @returns New sorted Series with index reordered to match
|
|
308
|
+
*
|
|
309
|
+
* @example
|
|
310
|
+
* ```ts
|
|
311
|
+
* const s = new Series([3, 1, 2], { index: ['a', 'b', 'c'] });
|
|
312
|
+
* s.sort(); // Series([1, 2, 3]) with index ['b', 'c', 'a']
|
|
313
|
+
* ```
|
|
314
|
+
*/
|
|
315
|
+
sort(ascending = true) {
|
|
316
|
+
const paired = [];
|
|
317
|
+
let pairIndex = 0;
|
|
318
|
+
for (const value of this._data) {
|
|
319
|
+
const idx = this._index[pairIndex];
|
|
320
|
+
if (idx === void 0) {
|
|
321
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Index labels cannot be undefined");
|
|
322
|
+
}
|
|
323
|
+
paired.push([value, idx]);
|
|
324
|
+
pairIndex++;
|
|
325
|
+
}
|
|
326
|
+
paired.sort((a, b) => {
|
|
327
|
+
const aVal = a[0];
|
|
328
|
+
const bVal = b[0];
|
|
329
|
+
if (typeof aVal === "number" && typeof bVal === "number") {
|
|
330
|
+
const aIsNaN = Number.isNaN(aVal);
|
|
331
|
+
const bIsNaN = Number.isNaN(bVal);
|
|
332
|
+
if (aIsNaN && bIsNaN) return 0;
|
|
333
|
+
if (aIsNaN) return 1;
|
|
334
|
+
if (bIsNaN) return -1;
|
|
335
|
+
return ascending ? aVal - bVal : bVal - aVal;
|
|
336
|
+
}
|
|
337
|
+
if (typeof aVal === "string" && typeof bVal === "string") {
|
|
338
|
+
return ascending ? aVal.localeCompare(bVal) : bVal.localeCompare(aVal);
|
|
339
|
+
}
|
|
340
|
+
const aStr = String(aVal);
|
|
341
|
+
const bStr = String(bVal);
|
|
342
|
+
return ascending ? aStr.localeCompare(bStr) : bStr.localeCompare(aStr);
|
|
343
|
+
});
|
|
344
|
+
const sortedData = paired.map((p) => p[0]);
|
|
345
|
+
const sortedIndex = paired.map((p) => p[1]);
|
|
346
|
+
const options = {
|
|
347
|
+
index: sortedIndex
|
|
348
|
+
};
|
|
349
|
+
if (this._name !== void 0) {
|
|
350
|
+
options.name = this._name;
|
|
351
|
+
}
|
|
352
|
+
return new _Series(sortedData, options);
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Get unique values in the Series.
|
|
356
|
+
*
|
|
357
|
+
* @returns Array of unique values (order preserved)
|
|
358
|
+
*
|
|
359
|
+
* @example
|
|
360
|
+
* ```ts
|
|
361
|
+
* const s = new Series([1, 2, 2, 3, 1]);
|
|
362
|
+
* s.unique(); // [1, 2, 3]
|
|
363
|
+
* ```
|
|
364
|
+
*/
|
|
365
|
+
unique() {
|
|
366
|
+
return [...new Set(this._data)];
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Count occurrences of unique values.
|
|
370
|
+
*
|
|
371
|
+
* Returns a Series where index is the unique values and data is their counts.
|
|
372
|
+
*
|
|
373
|
+
* @returns Series where index is unique values and data is their counts
|
|
374
|
+
*
|
|
375
|
+
* @example
|
|
376
|
+
* ```ts
|
|
377
|
+
* const s = new Series(['a', 'b', 'a', 'c', 'a']);
|
|
378
|
+
* s.valueCounts(); // Series([3, 1, 1]) with index ['a', 'b', 'c']
|
|
379
|
+
* ```
|
|
380
|
+
*/
|
|
381
|
+
valueCounts() {
|
|
382
|
+
for (const v of this._data) {
|
|
383
|
+
if (typeof v !== "string" && typeof v !== "number" && v !== null && v !== void 0) {
|
|
384
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Series.valueCounts() only supports Series<string | number>");
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
const counts = /* @__PURE__ */ new Map();
|
|
388
|
+
const keyToValue = /* @__PURE__ */ new Map();
|
|
389
|
+
for (const v of this._data) {
|
|
390
|
+
const key = createKey(v);
|
|
391
|
+
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
392
|
+
if (!keyToValue.has(key)) {
|
|
393
|
+
keyToValue.set(key, v);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
const sortedKeys = [...counts.keys()].sort((a, b) => {
|
|
397
|
+
const countA = counts.get(a) ?? 0;
|
|
398
|
+
const countB = counts.get(b) ?? 0;
|
|
399
|
+
return countB - countA;
|
|
400
|
+
});
|
|
401
|
+
const values = sortedKeys.map((k) => counts.get(k) ?? 0);
|
|
402
|
+
const index = sortedKeys.map((k) => {
|
|
403
|
+
const val = keyToValue.get(k);
|
|
404
|
+
if (typeof val === "string" || typeof val === "number") {
|
|
405
|
+
return val;
|
|
406
|
+
}
|
|
407
|
+
return String(val);
|
|
408
|
+
});
|
|
409
|
+
return new _Series(values, {
|
|
410
|
+
index,
|
|
411
|
+
name: this._name ? `${this._name}_counts` : "counts"
|
|
412
|
+
});
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* Calculate the sum of all values.
|
|
416
|
+
*
|
|
417
|
+
* Skips null, undefined, and NaN values.
|
|
418
|
+
*
|
|
419
|
+
* @returns Sum of all numeric values.
|
|
420
|
+
* @throws {DataValidationError} If Series is empty or contains non-numeric data
|
|
421
|
+
*
|
|
422
|
+
* @example
|
|
423
|
+
* ```ts
|
|
424
|
+
* const s = new Series([1, 2, null, 3, 4]);
|
|
425
|
+
* s.sum(); // 10
|
|
426
|
+
* ```
|
|
427
|
+
*/
|
|
428
|
+
sum() {
|
|
429
|
+
if (this._data.length === 0) {
|
|
430
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Cannot get sum of empty Series");
|
|
431
|
+
}
|
|
432
|
+
let total = 0;
|
|
433
|
+
for (const val of this._data) {
|
|
434
|
+
if (val === null || val === void 0) continue;
|
|
435
|
+
if (typeof val !== "number") {
|
|
436
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Series.sum() only works on numeric data");
|
|
437
|
+
}
|
|
438
|
+
if (Number.isNaN(val)) continue;
|
|
439
|
+
total += val;
|
|
440
|
+
}
|
|
441
|
+
return total;
|
|
442
|
+
}
|
|
443
|
+
/**
|
|
444
|
+
* Calculate the arithmetic mean (average) of all values.
|
|
445
|
+
*
|
|
446
|
+
* Skips null, undefined, and NaN values.
|
|
447
|
+
*
|
|
448
|
+
* @returns Mean of all numeric values.
|
|
449
|
+
* @throws {DataValidationError} If Series is empty or contains non-numeric data
|
|
450
|
+
*
|
|
451
|
+
* @example
|
|
452
|
+
* ```ts
|
|
453
|
+
* const s = new Series([1, 2, null, 3, 4]);
|
|
454
|
+
* s.mean(); // 2.5
|
|
455
|
+
* ```
|
|
456
|
+
*/
|
|
457
|
+
mean() {
|
|
458
|
+
if (this._data.length === 0) {
|
|
459
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Cannot get mean of empty Series");
|
|
460
|
+
}
|
|
461
|
+
let total = 0;
|
|
462
|
+
let count = 0;
|
|
463
|
+
for (const val of this._data) {
|
|
464
|
+
if (val === null || val === void 0) continue;
|
|
465
|
+
if (typeof val !== "number") {
|
|
466
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Series.mean() only works on numeric data");
|
|
467
|
+
}
|
|
468
|
+
if (Number.isNaN(val)) continue;
|
|
469
|
+
total += val;
|
|
470
|
+
count++;
|
|
471
|
+
}
|
|
472
|
+
return count > 0 ? total / count : NaN;
|
|
473
|
+
}
|
|
474
|
+
/**
|
|
475
|
+
* Calculate the median (middle value) of all values.
|
|
476
|
+
*
|
|
477
|
+
* Skips null, undefined, and NaN values.
|
|
478
|
+
* For even-length Series, returns the average of the two middle values.
|
|
479
|
+
*
|
|
480
|
+
* @returns Median value.
|
|
481
|
+
* @throws {DataValidationError} If Series is empty or contains non-numeric data
|
|
482
|
+
*
|
|
483
|
+
* @example
|
|
484
|
+
* ```ts
|
|
485
|
+
* const s = new Series([1, 2, 3, 4, 5]);
|
|
486
|
+
* s.median(); // 3
|
|
487
|
+
* ```
|
|
488
|
+
*/
|
|
489
|
+
median() {
|
|
490
|
+
if (this._data.length === 0) {
|
|
491
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Cannot get median of empty Series");
|
|
492
|
+
}
|
|
493
|
+
const numericData = [];
|
|
494
|
+
for (const value of this._data) {
|
|
495
|
+
if (value === null || value === void 0) continue;
|
|
496
|
+
if (typeof value !== "number") {
|
|
497
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Series.median() only works on numeric data");
|
|
498
|
+
}
|
|
499
|
+
if (!Number.isNaN(value)) {
|
|
500
|
+
numericData.push(value);
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
if (numericData.length === 0) {
|
|
504
|
+
return NaN;
|
|
505
|
+
}
|
|
506
|
+
const sorted = [...numericData].sort((a, b) => a - b);
|
|
507
|
+
const middle = Math.floor(sorted.length / 2);
|
|
508
|
+
if (sorted.length % 2 === 0) {
|
|
509
|
+
const val1 = sorted[middle - 1];
|
|
510
|
+
const val2 = sorted[middle];
|
|
511
|
+
if (val1 === void 0 || val2 === void 0) {
|
|
512
|
+
return NaN;
|
|
513
|
+
}
|
|
514
|
+
return (val1 + val2) / 2;
|
|
515
|
+
}
|
|
516
|
+
const val = sorted[middle];
|
|
517
|
+
return val !== void 0 ? val : NaN;
|
|
518
|
+
}
|
|
519
|
+
/**
|
|
520
|
+
* Calculate the standard deviation of all values.
|
|
521
|
+
*
|
|
522
|
+
* Skips null, undefined, and NaN values.
|
|
523
|
+
* Uses sample standard deviation (divides by n-1).
|
|
524
|
+
*
|
|
525
|
+
* @returns Standard deviation.
|
|
526
|
+
* @throws {DataValidationError} If Series is empty or contains non-numeric data
|
|
527
|
+
*
|
|
528
|
+
* @example
|
|
529
|
+
* ```ts
|
|
530
|
+
* const s = new Series([2, 4, 6, 8]);
|
|
531
|
+
* s.std(); // ~2.58
|
|
532
|
+
* ```
|
|
533
|
+
*/
|
|
534
|
+
std() {
|
|
535
|
+
if (this._data.length === 0) {
|
|
536
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Cannot get std of empty Series");
|
|
537
|
+
}
|
|
538
|
+
const numericData = [];
|
|
539
|
+
for (const value of this._data) {
|
|
540
|
+
if (value === null || value === void 0) continue;
|
|
541
|
+
if (typeof value !== "number") {
|
|
542
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Series.std() only works on numeric data");
|
|
543
|
+
}
|
|
544
|
+
if (!Number.isNaN(value)) {
|
|
545
|
+
numericData.push(value);
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
if (numericData.length < 2) {
|
|
549
|
+
return NaN;
|
|
550
|
+
}
|
|
551
|
+
const sum = numericData.reduce((acc, val) => acc + val, 0);
|
|
552
|
+
const meanVal = sum / numericData.length;
|
|
553
|
+
let sumSquaredDiff = 0;
|
|
554
|
+
for (const val of numericData) {
|
|
555
|
+
const diff = val - meanVal;
|
|
556
|
+
sumSquaredDiff += diff * diff;
|
|
557
|
+
}
|
|
558
|
+
return Math.sqrt(sumSquaredDiff / (numericData.length - 1));
|
|
559
|
+
}
|
|
560
|
+
/**
|
|
561
|
+
* Calculate the variance of all values.
|
|
562
|
+
*
|
|
563
|
+
* Skips null, undefined, and NaN values.
|
|
564
|
+
* Uses sample variance (divides by n-1).
|
|
565
|
+
*
|
|
566
|
+
* @returns Variance.
|
|
567
|
+
* @throws {DataValidationError} If Series is empty or contains non-numeric data
|
|
568
|
+
*
|
|
569
|
+
* @example
|
|
570
|
+
* ```ts
|
|
571
|
+
* const s = new Series([2, 4, 6, 8]);
|
|
572
|
+
* s.var(); // ~6.67
|
|
573
|
+
* ```
|
|
574
|
+
*/
|
|
575
|
+
var() {
|
|
576
|
+
if (this._data.length === 0) {
|
|
577
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Cannot get variance of empty Series");
|
|
578
|
+
}
|
|
579
|
+
const numericData = [];
|
|
580
|
+
for (const value of this._data) {
|
|
581
|
+
if (value === null || value === void 0) continue;
|
|
582
|
+
if (typeof value !== "number") {
|
|
583
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Series.var() only works on numeric data");
|
|
584
|
+
}
|
|
585
|
+
if (!Number.isNaN(value)) {
|
|
586
|
+
numericData.push(value);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
if (numericData.length < 2) {
|
|
590
|
+
return NaN;
|
|
591
|
+
}
|
|
592
|
+
const sum = numericData.reduce((acc, val) => acc + val, 0);
|
|
593
|
+
const meanVal = sum / numericData.length;
|
|
594
|
+
let sumSquaredDiff = 0;
|
|
595
|
+
for (const val of numericData) {
|
|
596
|
+
const diff = val - meanVal;
|
|
597
|
+
sumSquaredDiff += diff * diff;
|
|
598
|
+
}
|
|
599
|
+
return sumSquaredDiff / (numericData.length - 1);
|
|
600
|
+
}
|
|
601
|
+
/**
|
|
602
|
+
* Find the minimum value in the Series.
|
|
603
|
+
*
|
|
604
|
+
* Skips null, undefined, and NaN values.
|
|
605
|
+
*
|
|
606
|
+
* @returns Minimum value.
|
|
607
|
+
* @throws {DataValidationError} If Series is empty or contains non-numeric data
|
|
608
|
+
*
|
|
609
|
+
* @example
|
|
610
|
+
* ```ts
|
|
611
|
+
* const s = new Series([5, 2, 8, 1, 9]);
|
|
612
|
+
* s.min(); // 1
|
|
613
|
+
* ```
|
|
614
|
+
*/
|
|
615
|
+
min() {
|
|
616
|
+
if (this._data.length === 0) {
|
|
617
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Cannot get min of empty Series");
|
|
618
|
+
}
|
|
619
|
+
let minVal = Infinity;
|
|
620
|
+
let hasNumeric = false;
|
|
621
|
+
for (const val of this._data) {
|
|
622
|
+
if (val === null || val === void 0) continue;
|
|
623
|
+
if (typeof val !== "number") {
|
|
624
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Series.min() only works on numeric data");
|
|
625
|
+
}
|
|
626
|
+
if (!Number.isNaN(val)) {
|
|
627
|
+
if (val < minVal) {
|
|
628
|
+
minVal = val;
|
|
629
|
+
}
|
|
630
|
+
hasNumeric = true;
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
return hasNumeric ? minVal : NaN;
|
|
634
|
+
}
|
|
635
|
+
/**
|
|
636
|
+
* Find the maximum value in the Series.
|
|
637
|
+
*
|
|
638
|
+
* Skips null, undefined, and NaN values.
|
|
639
|
+
*
|
|
640
|
+
* @returns Maximum value.
|
|
641
|
+
* @throws {DataValidationError} If Series is empty or contains non-numeric data
|
|
642
|
+
*
|
|
643
|
+
* @example
|
|
644
|
+
* ```ts
|
|
645
|
+
* const s = new Series([5, 2, 8, 1, 9]);
|
|
646
|
+
* s.max(); // 9
|
|
647
|
+
* ```
|
|
648
|
+
*/
|
|
649
|
+
max() {
|
|
650
|
+
if (this._data.length === 0) {
|
|
651
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Cannot get max of empty Series");
|
|
652
|
+
}
|
|
653
|
+
let maxVal = -Infinity;
|
|
654
|
+
let hasNumeric = false;
|
|
655
|
+
for (const val of this._data) {
|
|
656
|
+
if (val === null || val === void 0) continue;
|
|
657
|
+
if (typeof val !== "number") {
|
|
658
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Series.max() only works on numeric data");
|
|
659
|
+
}
|
|
660
|
+
if (!Number.isNaN(val)) {
|
|
661
|
+
if (val > maxVal) {
|
|
662
|
+
maxVal = val;
|
|
663
|
+
}
|
|
664
|
+
hasNumeric = true;
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
return hasNumeric ? maxVal : NaN;
|
|
668
|
+
}
|
|
669
|
+
/**
|
|
670
|
+
* Convert the Series to a plain JavaScript array.
|
|
671
|
+
*
|
|
672
|
+
* Returns a shallow copy of the data.
|
|
673
|
+
*
|
|
674
|
+
* @returns Array copy of the data
|
|
675
|
+
*
|
|
676
|
+
* @example
|
|
677
|
+
* ```ts
|
|
678
|
+
* const s = new Series([1, 2, 3]);
|
|
679
|
+
* const arr = s.toArray(); // [1, 2, 3]
|
|
680
|
+
* ```
|
|
681
|
+
*/
|
|
682
|
+
toArray() {
|
|
683
|
+
return [...this._data];
|
|
684
|
+
}
|
|
685
|
+
/**
|
|
686
|
+
* Convert the Series to an ndarray Tensor.
|
|
687
|
+
*
|
|
688
|
+
* Uses the `tensor()` factory function.
|
|
689
|
+
*
|
|
690
|
+
* @returns Tensor containing the Series data
|
|
691
|
+
* @throws {DataValidationError} If data cannot be converted to Tensor
|
|
692
|
+
*
|
|
693
|
+
* @example
|
|
694
|
+
* ```ts
|
|
695
|
+
* import { Series } from 'deepbox/dataframe';
|
|
696
|
+
*
|
|
697
|
+
* const s = new Series([1, 2, 3, 4]);
|
|
698
|
+
* const t = s.toTensor(); // Tensor([1, 2, 3, 4])
|
|
699
|
+
* ```
|
|
700
|
+
*/
|
|
701
|
+
toTensor() {
|
|
702
|
+
const numeric = [];
|
|
703
|
+
for (const v of this._data) {
|
|
704
|
+
if (typeof v === "number") {
|
|
705
|
+
numeric.push(v);
|
|
706
|
+
} else if (v === null || v === void 0) {
|
|
707
|
+
numeric.push(NaN);
|
|
708
|
+
} else {
|
|
709
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
710
|
+
"Series.toTensor() only works on numeric data (or null/undefined)"
|
|
711
|
+
);
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
return chunk6AE5FKKQ_cjs.tensor(numeric);
|
|
715
|
+
}
|
|
716
|
+
/**
|
|
717
|
+
* Return a human-readable string representation of this Series.
|
|
718
|
+
*
|
|
719
|
+
* Each row is printed as `index value`, with an optional name/dtype
|
|
720
|
+
* footer. Large Series are truncated with an ellipsis.
|
|
721
|
+
*
|
|
722
|
+
* @param maxRows - Maximum rows to display before summarizing (default: 20).
|
|
723
|
+
* @returns Formatted string representation
|
|
724
|
+
*
|
|
725
|
+
* @example
|
|
726
|
+
* ```ts
|
|
727
|
+
* const s = new Series([10, 20, 30], { name: 'values' });
|
|
728
|
+
* s.toString();
|
|
729
|
+
* // "0 10\n1 20\n2 30\nName: values, Length: 3"
|
|
730
|
+
* ```
|
|
731
|
+
*/
|
|
732
|
+
toString(maxRows = 20) {
|
|
733
|
+
const n = this._data.length;
|
|
734
|
+
const half = Math.floor(maxRows / 2);
|
|
735
|
+
const showAll = n <= maxRows;
|
|
736
|
+
const rows = [];
|
|
737
|
+
const topCount = showAll ? n : half;
|
|
738
|
+
const bottomCount = showAll ? 0 : half;
|
|
739
|
+
for (let i = 0; i < topCount; i++) {
|
|
740
|
+
const idx = this._index[i];
|
|
741
|
+
const val = this._data[i];
|
|
742
|
+
rows.push([String(idx ?? i), val === null || val === void 0 ? "null" : String(val)]);
|
|
743
|
+
}
|
|
744
|
+
if (!showAll) {
|
|
745
|
+
rows.push(["...", "..."]);
|
|
746
|
+
for (let i = n - bottomCount; i < n; i++) {
|
|
747
|
+
const idx = this._index[i];
|
|
748
|
+
const val = this._data[i];
|
|
749
|
+
rows.push([String(idx ?? i), val === null || val === void 0 ? "null" : String(val)]);
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
let idxWidth = 0;
|
|
753
|
+
let valWidth = 0;
|
|
754
|
+
for (const [idx, val] of rows) {
|
|
755
|
+
if ((idx ?? "").length > idxWidth) idxWidth = (idx ?? "").length;
|
|
756
|
+
if ((val ?? "").length > valWidth) valWidth = (val ?? "").length;
|
|
757
|
+
}
|
|
758
|
+
const lines = [];
|
|
759
|
+
for (const [idx, val] of rows) {
|
|
760
|
+
lines.push(`${(idx ?? "").padStart(idxWidth)} ${val ?? ""}`);
|
|
761
|
+
}
|
|
762
|
+
const parts = [];
|
|
763
|
+
if (this._name !== void 0) parts.push(`Name: ${this._name}`);
|
|
764
|
+
parts.push(`Length: ${n}`);
|
|
765
|
+
lines.push(parts.join(", "));
|
|
766
|
+
return lines.join("\n");
|
|
767
|
+
}
|
|
768
|
+
};
|
|
769
|
+
|
|
770
|
+
// src/dataframe/DataFrame.ts
|
|
771
|
+
var isNumberValue = (value) => typeof value === "number";
|
|
772
|
+
var isIndexLabel = (value) => typeof value === "string" || typeof value === "number";
|
|
773
|
+
var isStringArray = (value) => Array.isArray(value) && value.every((entry) => typeof entry === "string");
|
|
774
|
+
var isIndexLabelArray = (value) => Array.isArray(value) && value.every(isIndexLabel);
|
|
775
|
+
var ensureUniqueLabels = (labels, labelName) => {
|
|
776
|
+
const seen = /* @__PURE__ */ new Set();
|
|
777
|
+
for (const label of labels) {
|
|
778
|
+
if (seen.has(label)) {
|
|
779
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Duplicate ${labelName} '${label}' is not supported`);
|
|
780
|
+
}
|
|
781
|
+
seen.add(label);
|
|
782
|
+
}
|
|
783
|
+
};
|
|
784
|
+
var toNumericValues = (values) => values.filter(isValidNumber);
|
|
785
|
+
var DataFrame = class _DataFrame {
|
|
786
|
+
// Internal storage: Map of column names to data arrays
|
|
787
|
+
_data;
|
|
788
|
+
// Row labels (can be strings or numbers)
|
|
789
|
+
_index;
|
|
790
|
+
// Fast label -> position lookup for O(1) loc() access
|
|
791
|
+
_indexPos;
|
|
792
|
+
// Column names
|
|
793
|
+
_columns;
|
|
794
|
+
/**
|
|
795
|
+
* Creates a new DataFrame instance.
|
|
796
|
+
*
|
|
797
|
+
* @param data - Object mapping column names to arrays of values.
|
|
798
|
+
* All arrays must have the same length.
|
|
799
|
+
* @param options - Configuration options
|
|
800
|
+
* @param options.columns - Custom column order (defaults to Object.keys(data))
|
|
801
|
+
* @param options.index - Custom row labels (defaults to 0, 1, 2, ...)
|
|
802
|
+
*
|
|
803
|
+
* @example
|
|
804
|
+
* ```ts
|
|
805
|
+
* const df = new DataFrame({
|
|
806
|
+
* col1: [1, 2, 3],
|
|
807
|
+
* col2: ['a', 'b', 'c']
|
|
808
|
+
* }, {
|
|
809
|
+
* index: ['row1', 'row2', 'row3']
|
|
810
|
+
* });
|
|
811
|
+
* ```
|
|
812
|
+
*/
|
|
813
|
+
constructor(data, options = {}) {
|
|
814
|
+
this._columns = options.columns ? [...options.columns] : Object.keys(data);
|
|
815
|
+
ensureUniqueLabels(this._columns, "column name");
|
|
816
|
+
for (const col of this._columns) {
|
|
817
|
+
if (!(col in data)) {
|
|
818
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${col}' not found in DataFrame data`);
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
let firstColumnLength = 0;
|
|
822
|
+
if (this._columns.length > 0) {
|
|
823
|
+
const firstCol = this._columns[0];
|
|
824
|
+
if (firstCol === void 0) {
|
|
825
|
+
throw new chunkJSCDE774_cjs.DataValidationError("First column is undefined");
|
|
826
|
+
}
|
|
827
|
+
const firstColData = data[firstCol];
|
|
828
|
+
if (!Array.isArray(firstColData)) {
|
|
829
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${firstCol}' must be an array`);
|
|
830
|
+
}
|
|
831
|
+
firstColumnLength = firstColData.length;
|
|
832
|
+
}
|
|
833
|
+
this._index = options.index ? options.copy === false ? options.index : [...options.index] : Array.from({ length: firstColumnLength }, (_, i) => i);
|
|
834
|
+
if (this._columns.length > 0 && this._index.length !== firstColumnLength) {
|
|
835
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
836
|
+
`Index length (${this._index.length}) must match row count (${firstColumnLength})`
|
|
837
|
+
);
|
|
838
|
+
}
|
|
839
|
+
this._indexPos = /* @__PURE__ */ new Map();
|
|
840
|
+
for (let i = 0; i < this._index.length; i++) {
|
|
841
|
+
const label = this._index[i];
|
|
842
|
+
if (label === void 0) {
|
|
843
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Index label at position ${i} is undefined`);
|
|
844
|
+
}
|
|
845
|
+
if (this._indexPos.has(label)) {
|
|
846
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Duplicate index label '${String(label)}' is not supported`);
|
|
847
|
+
}
|
|
848
|
+
this._indexPos.set(label, i);
|
|
849
|
+
}
|
|
850
|
+
this._data = /* @__PURE__ */ new Map();
|
|
851
|
+
for (const col of this._columns) {
|
|
852
|
+
const colData = data[col];
|
|
853
|
+
if (!Array.isArray(colData)) {
|
|
854
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${col}' not found in DataFrame data`);
|
|
855
|
+
}
|
|
856
|
+
if (colData.length !== firstColumnLength) {
|
|
857
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
858
|
+
`Column '${col}' length (${colData.length}) must match row count (${firstColumnLength})`
|
|
859
|
+
);
|
|
860
|
+
}
|
|
861
|
+
this._data.set(col, options.copy === false ? colData : [...colData]);
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
/**
|
|
865
|
+
* Get the dimensions of the DataFrame.
|
|
866
|
+
*
|
|
867
|
+
* @returns Tuple of [rows, columns]
|
|
868
|
+
*
|
|
869
|
+
* @example
|
|
870
|
+
* ```ts
|
|
871
|
+
* const df = new DataFrame({ a: [1, 2, 3], b: [4, 5, 6] });
|
|
872
|
+
* df.shape; // [3, 2]
|
|
873
|
+
* ```
|
|
874
|
+
*/
|
|
875
|
+
get shape() {
|
|
876
|
+
return [this._index.length, this._columns.length];
|
|
877
|
+
}
|
|
878
|
+
/**
|
|
879
|
+
* Get the column names.
|
|
880
|
+
*
|
|
881
|
+
* @returns Array of column names (copy)
|
|
882
|
+
*/
|
|
883
|
+
get columns() {
|
|
884
|
+
return [...this._columns];
|
|
885
|
+
}
|
|
886
|
+
/**
|
|
887
|
+
* Get the row index labels.
|
|
888
|
+
*
|
|
889
|
+
* @returns Array of index labels (copy)
|
|
890
|
+
*/
|
|
891
|
+
get index() {
|
|
892
|
+
return [...this._index];
|
|
893
|
+
}
|
|
894
|
+
get(column, guard) {
|
|
895
|
+
const data = this._data.get(column);
|
|
896
|
+
if (data === void 0) {
|
|
897
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
898
|
+
`Column '${column}' not found in DataFrame`,
|
|
899
|
+
"column",
|
|
900
|
+
column
|
|
901
|
+
);
|
|
902
|
+
}
|
|
903
|
+
if (guard) {
|
|
904
|
+
const validated = [];
|
|
905
|
+
for (const value of data) {
|
|
906
|
+
if (!guard(value)) {
|
|
907
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
908
|
+
`Column '${column}' contains values that do not match the requested type`
|
|
909
|
+
);
|
|
910
|
+
}
|
|
911
|
+
validated.push(value);
|
|
912
|
+
}
|
|
913
|
+
return new Series(validated, {
|
|
914
|
+
index: this._index,
|
|
915
|
+
name: column,
|
|
916
|
+
copy: false
|
|
917
|
+
});
|
|
918
|
+
}
|
|
919
|
+
return new Series(data, {
|
|
920
|
+
index: this._index,
|
|
921
|
+
name: column,
|
|
922
|
+
copy: false
|
|
923
|
+
});
|
|
924
|
+
}
|
|
925
|
+
/**
|
|
926
|
+
* Access a row by label (label-based indexing).
|
|
927
|
+
*
|
|
928
|
+
* @param row - The index label of the row
|
|
929
|
+
* @returns Object mapping column names to values for that row
|
|
930
|
+
* @throws {IndexError} If row label not found
|
|
931
|
+
*
|
|
932
|
+
* @example
|
|
933
|
+
* ```ts
|
|
934
|
+
* const df = new DataFrame(
|
|
935
|
+
* { age: [25, 30], name: ['Alice', 'Bob'] },
|
|
936
|
+
* { index: ['row1', 'row2'] }
|
|
937
|
+
* );
|
|
938
|
+
* df.loc('row1'); // { age: 25, name: 'Alice' }
|
|
939
|
+
* ```
|
|
940
|
+
*/
|
|
941
|
+
loc(row) {
|
|
942
|
+
const position = this._indexPos.get(row) ?? -1;
|
|
943
|
+
if (position === -1) {
|
|
944
|
+
throw new chunkJSCDE774_cjs.IndexError(`Row label '${row}' not found in index`);
|
|
945
|
+
}
|
|
946
|
+
const result = {};
|
|
947
|
+
for (const col of this._columns) {
|
|
948
|
+
const colData = this._data.get(col);
|
|
949
|
+
if (colData) {
|
|
950
|
+
result[col] = colData[position];
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
return result;
|
|
954
|
+
}
|
|
955
|
+
/**
|
|
956
|
+
* Access a row by integer position (position-based indexing).
|
|
957
|
+
*
|
|
958
|
+
* @param position - The integer position (0-based)
|
|
959
|
+
* @returns Object mapping column names to values for that row
|
|
960
|
+
* @throws {IndexError} If position is out of bounds
|
|
961
|
+
*
|
|
962
|
+
* @example
|
|
963
|
+
* ```ts
|
|
964
|
+
* const df = new DataFrame({ age: [25, 30], name: ['Alice', 'Bob'] });
|
|
965
|
+
* df.iloc(0); // { age: 25, name: 'Alice' }
|
|
966
|
+
* df.iloc(1); // { age: 30, name: 'Bob' }
|
|
967
|
+
* ```
|
|
968
|
+
*/
|
|
969
|
+
iloc(position) {
|
|
970
|
+
if (this._index.length === 0) {
|
|
971
|
+
throw new chunkJSCDE774_cjs.IndexError(`DataFrame is empty`, {
|
|
972
|
+
index: position,
|
|
973
|
+
validRange: [0, 0]
|
|
974
|
+
});
|
|
975
|
+
}
|
|
976
|
+
if (position < 0 || position >= this._index.length) {
|
|
977
|
+
throw new chunkJSCDE774_cjs.IndexError(`Position ${position} is out of bounds (0-${this._index.length - 1})`, {
|
|
978
|
+
index: position,
|
|
979
|
+
validRange: [0, this._index.length - 1]
|
|
980
|
+
});
|
|
981
|
+
}
|
|
982
|
+
const result = {};
|
|
983
|
+
for (const col of this._columns) {
|
|
984
|
+
const colData = this._data.get(col);
|
|
985
|
+
if (colData) {
|
|
986
|
+
result[col] = colData[position];
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
return result;
|
|
990
|
+
}
|
|
991
|
+
/**
|
|
992
|
+
* Return the first n rows.
|
|
993
|
+
*
|
|
994
|
+
* @param n - Number of rows to return (default: 5)
|
|
995
|
+
* @returns New DataFrame with first n rows
|
|
996
|
+
*
|
|
997
|
+
* @example
|
|
998
|
+
* ```ts
|
|
999
|
+
* const df = new DataFrame({ a: [1, 2, 3, 4, 5], b: [6, 7, 8, 9, 10] });
|
|
1000
|
+
* df.head(3); // DataFrame with rows 0-2
|
|
1001
|
+
* ```
|
|
1002
|
+
*/
|
|
1003
|
+
head(n = 5) {
|
|
1004
|
+
if (!Number.isFinite(n) || !Number.isInteger(n) || n < 0) {
|
|
1005
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("n must be a non-negative integer", "n", n);
|
|
1006
|
+
}
|
|
1007
|
+
const newData = {};
|
|
1008
|
+
for (const col of this._columns) {
|
|
1009
|
+
const colData = this._data.get(col);
|
|
1010
|
+
newData[col] = colData ? colData.slice(0, n) : [];
|
|
1011
|
+
}
|
|
1012
|
+
return new _DataFrame(newData, {
|
|
1013
|
+
columns: this._columns,
|
|
1014
|
+
index: this._index.slice(0, n)
|
|
1015
|
+
});
|
|
1016
|
+
}
|
|
1017
|
+
/**
|
|
1018
|
+
* Return the last n rows.
|
|
1019
|
+
*
|
|
1020
|
+
* @param n - Number of rows to return (default: 5)
|
|
1021
|
+
* @returns New DataFrame with last n rows
|
|
1022
|
+
*
|
|
1023
|
+
* @example
|
|
1024
|
+
* ```ts
|
|
1025
|
+
* const df = new DataFrame({ a: [1, 2, 3, 4, 5], b: [6, 7, 8, 9, 10] });
|
|
1026
|
+
* df.tail(3); // DataFrame with rows 2-4
|
|
1027
|
+
* ```
|
|
1028
|
+
*/
|
|
1029
|
+
tail(n = 5) {
|
|
1030
|
+
if (!Number.isFinite(n) || !Number.isInteger(n) || n < 0) {
|
|
1031
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("n must be a non-negative integer", "n", n);
|
|
1032
|
+
}
|
|
1033
|
+
const sliceStart = this._index.length - n;
|
|
1034
|
+
const newData = {};
|
|
1035
|
+
for (const col of this._columns) {
|
|
1036
|
+
const colData = this._data.get(col);
|
|
1037
|
+
newData[col] = colData ? colData.slice(sliceStart) : [];
|
|
1038
|
+
}
|
|
1039
|
+
return new _DataFrame(newData, {
|
|
1040
|
+
columns: this._columns,
|
|
1041
|
+
index: this._index.slice(sliceStart)
|
|
1042
|
+
});
|
|
1043
|
+
}
|
|
1044
|
+
/**
|
|
1045
|
+
* Filter rows based on a boolean predicate function.
|
|
1046
|
+
*
|
|
1047
|
+
* @param predicate - Function that returns true for rows to keep
|
|
1048
|
+
* @returns New DataFrame with filtered rows
|
|
1049
|
+
*
|
|
1050
|
+
* @example
|
|
1051
|
+
* ```ts
|
|
1052
|
+
* const df = new DataFrame({ age: [25, 30, 35], name: ['Alice', 'Bob', 'Carol'] });
|
|
1053
|
+
* const filtered = df.filter(row => row.age > 28);
|
|
1054
|
+
* // DataFrame with Bob and Carol
|
|
1055
|
+
* ```
|
|
1056
|
+
*/
|
|
1057
|
+
filter(predicate) {
|
|
1058
|
+
const nCols = this._columns.length;
|
|
1059
|
+
const nRows = this._index.length;
|
|
1060
|
+
const colArrays = new Array(nCols);
|
|
1061
|
+
for (let c = 0; c < nCols; c++) {
|
|
1062
|
+
colArrays[c] = this._data.get(this._columns[c]) ?? [];
|
|
1063
|
+
}
|
|
1064
|
+
const matchIndices = [];
|
|
1065
|
+
const row = {};
|
|
1066
|
+
for (let i = 0; i < nRows; i++) {
|
|
1067
|
+
for (let c = 0; c < nCols; c++) {
|
|
1068
|
+
row[this._columns[c]] = colArrays[c][i];
|
|
1069
|
+
}
|
|
1070
|
+
if (predicate(row)) {
|
|
1071
|
+
matchIndices.push(i);
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
const matchCount = matchIndices.length;
|
|
1075
|
+
const filteredData = {};
|
|
1076
|
+
for (let c = 0; c < nCols; c++) {
|
|
1077
|
+
const src = colArrays[c];
|
|
1078
|
+
const dst = new Array(matchCount);
|
|
1079
|
+
for (let m = 0; m < matchCount; m++) {
|
|
1080
|
+
dst[m] = src[matchIndices[m]];
|
|
1081
|
+
}
|
|
1082
|
+
filteredData[this._columns[c]] = dst;
|
|
1083
|
+
}
|
|
1084
|
+
const filteredIndex = new Array(matchCount);
|
|
1085
|
+
for (let m = 0; m < matchCount; m++) {
|
|
1086
|
+
filteredIndex[m] = this._index[matchIndices[m]];
|
|
1087
|
+
}
|
|
1088
|
+
return new _DataFrame(filteredData, {
|
|
1089
|
+
columns: this._columns,
|
|
1090
|
+
index: filteredIndex,
|
|
1091
|
+
copy: false
|
|
1092
|
+
});
|
|
1093
|
+
}
|
|
1094
|
+
/**
|
|
1095
|
+
* Select a subset of columns.
|
|
1096
|
+
*
|
|
1097
|
+
* @param columns - Array of column names to select
|
|
1098
|
+
* @returns New DataFrame with only specified columns
|
|
1099
|
+
* @throws {InvalidParameterError} If any column doesn't exist
|
|
1100
|
+
*
|
|
1101
|
+
* @example
|
|
1102
|
+
* ```ts
|
|
1103
|
+
* const df = new DataFrame({ a: [1, 2], b: [3, 4], c: [5, 6] });
|
|
1104
|
+
* df.select(['a', 'c']); // DataFrame with only columns a and c
|
|
1105
|
+
* ```
|
|
1106
|
+
*/
|
|
1107
|
+
select(columns) {
|
|
1108
|
+
for (const col of columns) {
|
|
1109
|
+
if (!this._data.has(col)) {
|
|
1110
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(`Column '${col}' not found in DataFrame`, "columns", col);
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
const newData = {};
|
|
1114
|
+
for (const col of columns) {
|
|
1115
|
+
const colData = this._data.get(col);
|
|
1116
|
+
newData[col] = colData ? colData.slice() : [];
|
|
1117
|
+
}
|
|
1118
|
+
return new _DataFrame(newData, {
|
|
1119
|
+
columns,
|
|
1120
|
+
index: this._index,
|
|
1121
|
+
copy: false
|
|
1122
|
+
});
|
|
1123
|
+
}
|
|
1124
|
+
/**
|
|
1125
|
+
* Drop (remove) specified columns.
|
|
1126
|
+
*
|
|
1127
|
+
* @param columns - Array of column names to drop
|
|
1128
|
+
* @returns New DataFrame without the dropped columns
|
|
1129
|
+
*
|
|
1130
|
+
* @example
|
|
1131
|
+
* ```ts
|
|
1132
|
+
* const df = new DataFrame({ a: [1, 2], b: [3, 4], c: [5, 6] });
|
|
1133
|
+
* df.drop(['b']); // DataFrame with only columns a and c
|
|
1134
|
+
* ```
|
|
1135
|
+
*/
|
|
1136
|
+
drop(columns) {
|
|
1137
|
+
if (!isStringArray(columns)) {
|
|
1138
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("columns must be an array of strings", "columns", columns);
|
|
1139
|
+
}
|
|
1140
|
+
ensureUniqueLabels(columns, "column name");
|
|
1141
|
+
for (const col of columns) {
|
|
1142
|
+
if (!this._data.has(col)) {
|
|
1143
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(`Column '${col}' not found in DataFrame`, "columns", col);
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
const columnsToKeep = this._columns.filter((col) => !columns.includes(col));
|
|
1147
|
+
const newData = {};
|
|
1148
|
+
for (const col of columnsToKeep) {
|
|
1149
|
+
const colData = this._data.get(col);
|
|
1150
|
+
newData[col] = colData ? [...colData] : [];
|
|
1151
|
+
}
|
|
1152
|
+
return new _DataFrame(newData, {
|
|
1153
|
+
columns: columnsToKeep,
|
|
1154
|
+
index: this._index
|
|
1155
|
+
});
|
|
1156
|
+
}
|
|
1157
|
+
/**
|
|
1158
|
+
* Sort DataFrame by one or more columns.
|
|
1159
|
+
*
|
|
1160
|
+
* @param by - Column name or array of column names to sort by
|
|
1161
|
+
* @param ascending - Sort in ascending order (default: true)
|
|
1162
|
+
* @returns New sorted DataFrame
|
|
1163
|
+
*
|
|
1164
|
+
* @example
|
|
1165
|
+
* ```ts
|
|
1166
|
+
* const df = new DataFrame({ age: [30, 25, 35], name: ['Bob', 'Alice', 'Carol'] });
|
|
1167
|
+
* df.sort('age'); // Sorted by age ascending
|
|
1168
|
+
* df.sort(['age'], false); // Sorted by age descending
|
|
1169
|
+
* ```
|
|
1170
|
+
*/
|
|
1171
|
+
sort(by, ascending = true) {
|
|
1172
|
+
const sortCols = Array.isArray(by) ? by : [by];
|
|
1173
|
+
for (const col of sortCols) {
|
|
1174
|
+
if (!this._data.has(col)) {
|
|
1175
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(`Column '${col}' not found in DataFrame`, "by", col);
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
const nRows = this._index.length;
|
|
1179
|
+
const sortColArrays = new Array(sortCols.length);
|
|
1180
|
+
for (let c = 0; c < sortCols.length; c++) {
|
|
1181
|
+
sortColArrays[c] = this._data.get(sortCols[c]) ?? [];
|
|
1182
|
+
}
|
|
1183
|
+
const indices = new Array(nRows);
|
|
1184
|
+
for (let i = 0; i < nRows; i++) indices[i] = i;
|
|
1185
|
+
indices.sort((ai, bi) => {
|
|
1186
|
+
for (let c = 0; c < sortColArrays.length; c++) {
|
|
1187
|
+
const colArr = sortColArrays[c];
|
|
1188
|
+
const aVal = colArr[ai];
|
|
1189
|
+
const bVal = colArr[bi];
|
|
1190
|
+
if (isNumberValue(aVal) && isNumberValue(bVal)) {
|
|
1191
|
+
const aIsNaN = Number.isNaN(aVal);
|
|
1192
|
+
const bIsNaN = Number.isNaN(bVal);
|
|
1193
|
+
if (aIsNaN && bIsNaN) continue;
|
|
1194
|
+
if (aIsNaN) return 1;
|
|
1195
|
+
if (bIsNaN) return -1;
|
|
1196
|
+
const diff = aVal - bVal;
|
|
1197
|
+
if (diff !== 0) return ascending ? diff : -diff;
|
|
1198
|
+
} else if (typeof aVal === "string" && typeof bVal === "string") {
|
|
1199
|
+
const cmp = aVal.localeCompare(bVal);
|
|
1200
|
+
if (cmp !== 0) return ascending ? cmp : -cmp;
|
|
1201
|
+
} else {
|
|
1202
|
+
const cmp = String(aVal).localeCompare(String(bVal));
|
|
1203
|
+
if (cmp !== 0) return ascending ? cmp : -cmp;
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
return 0;
|
|
1207
|
+
});
|
|
1208
|
+
const sortedData = {};
|
|
1209
|
+
for (const col of this._columns) {
|
|
1210
|
+
const src = this._data.get(col) ?? [];
|
|
1211
|
+
const dst = new Array(nRows);
|
|
1212
|
+
for (let i = 0; i < nRows; i++) {
|
|
1213
|
+
dst[i] = src[indices[i]];
|
|
1214
|
+
}
|
|
1215
|
+
sortedData[col] = dst;
|
|
1216
|
+
}
|
|
1217
|
+
const sortedIndex = new Array(nRows);
|
|
1218
|
+
for (let i = 0; i < nRows; i++) {
|
|
1219
|
+
sortedIndex[i] = this._index[indices[i]];
|
|
1220
|
+
}
|
|
1221
|
+
return new _DataFrame(sortedData, {
|
|
1222
|
+
columns: this._columns,
|
|
1223
|
+
index: sortedIndex
|
|
1224
|
+
});
|
|
1225
|
+
}
|
|
1226
|
+
/**
|
|
1227
|
+
* Group DataFrame by one or more columns.
|
|
1228
|
+
*
|
|
1229
|
+
* Returns a DataFrameGroupBy object for performing aggregations.
|
|
1230
|
+
*
|
|
1231
|
+
* @param by - Column name or array of column names to group by
|
|
1232
|
+
* @returns DataFrameGroupBy object for aggregation operations
|
|
1233
|
+
*
|
|
1234
|
+
* @example
|
|
1235
|
+
* ```ts
|
|
1236
|
+
* const df = new DataFrame({
|
|
1237
|
+
* category: ['A', 'B', 'A', 'B'],
|
|
1238
|
+
* value: [10, 20, 30, 40]
|
|
1239
|
+
* });
|
|
1240
|
+
* const grouped = df.groupBy('category');
|
|
1241
|
+
* grouped.sum(); // Sum values by category
|
|
1242
|
+
* ```
|
|
1243
|
+
*/
|
|
1244
|
+
groupBy(by) {
|
|
1245
|
+
return new DataFrameGroupBy(this, by);
|
|
1246
|
+
}
|
|
1247
|
+
/**
|
|
1248
|
+
* Join with another DataFrame using SQL-style join.
|
|
1249
|
+
*
|
|
1250
|
+
* Uses hash join algorithm for O(n + m) time complexity.
|
|
1251
|
+
* Optimized for large datasets with minimal memory overhead.
|
|
1252
|
+
*
|
|
1253
|
+
* @param other - DataFrame to join with
|
|
1254
|
+
* @param on - Column name to join on (must exist in both DataFrames)
|
|
1255
|
+
* @param how - Type of join operation
|
|
1256
|
+
* - 'inner': Only rows with matching keys in both DataFrames
|
|
1257
|
+
* - 'left': All rows from left, matched rows from right (nulls for non-matches)
|
|
1258
|
+
* - 'right': All rows from right, matched rows from left (nulls for non-matches)
|
|
1259
|
+
* - 'outer': All rows from both DataFrames (nulls for non-matches)
|
|
1260
|
+
* @returns New DataFrame with joined data
|
|
1261
|
+
*
|
|
1262
|
+
* @throws {InvalidParameterError} If join column doesn't exist in either DataFrame
|
|
1263
|
+
*
|
|
1264
|
+
* @example
|
|
1265
|
+
* ```ts
|
|
1266
|
+
* const customers = new DataFrame({
|
|
1267
|
+
* id: [1, 2, 3],
|
|
1268
|
+
* name: ['Alice', 'Bob', 'Charlie']
|
|
1269
|
+
* });
|
|
1270
|
+
* const orders = new DataFrame({
|
|
1271
|
+
* id: [1, 1, 2, 4],
|
|
1272
|
+
* product: ['Laptop', 'Mouse', 'Keyboard', 'Monitor']
|
|
1273
|
+
* });
|
|
1274
|
+
*
|
|
1275
|
+
* // Inner join - only customers with orders
|
|
1276
|
+
* const inner = customers.join(orders, 'id', 'inner');
|
|
1277
|
+
* // Result: Alice with 2 orders, Bob with 1 order
|
|
1278
|
+
*
|
|
1279
|
+
* // Left join - all customers, with/without orders
|
|
1280
|
+
* const left = customers.join(orders, 'id', 'left');
|
|
1281
|
+
* // Result: Alice, Bob, Charlie (Charlie has null for product)
|
|
1282
|
+
* ```
|
|
1283
|
+
*
|
|
1284
|
+
* @see {@link https://en.wikipedia.org/wiki/Hash_join | Hash Join Algorithm}
|
|
1285
|
+
*/
|
|
1286
|
+
join(other, on, how = "inner") {
|
|
1287
|
+
if (!["inner", "left", "right", "outer"].includes(how)) {
|
|
1288
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1289
|
+
'how must be one of "inner", "left", "right", or "outer"',
|
|
1290
|
+
"how",
|
|
1291
|
+
how
|
|
1292
|
+
);
|
|
1293
|
+
}
|
|
1294
|
+
if (!this._columns.includes(on)) {
|
|
1295
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(`Join column '${on}' not found in left DataFrame`, "on", on);
|
|
1296
|
+
}
|
|
1297
|
+
if (!other._columns.includes(on)) {
|
|
1298
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(`Join column '${on}' not found in right DataFrame`, "on", on);
|
|
1299
|
+
}
|
|
1300
|
+
const rightHash = /* @__PURE__ */ new Map();
|
|
1301
|
+
const rightData = other._data.get(on) ?? [];
|
|
1302
|
+
for (let i = 0; i < rightData.length; i++) {
|
|
1303
|
+
const val = rightData[i];
|
|
1304
|
+
if (val === null || val === void 0) continue;
|
|
1305
|
+
const key = createKey(val);
|
|
1306
|
+
const indices = rightHash.get(key) ?? [];
|
|
1307
|
+
indices.push(i);
|
|
1308
|
+
rightHash.set(key, indices);
|
|
1309
|
+
}
|
|
1310
|
+
const matchedRightRows = /* @__PURE__ */ new Set();
|
|
1311
|
+
const rightNonKeyColumns = other._columns.filter((col) => col !== on);
|
|
1312
|
+
const overlapping = /* @__PURE__ */ new Set();
|
|
1313
|
+
for (const col of rightNonKeyColumns) {
|
|
1314
|
+
if (this._columns.includes(col)) {
|
|
1315
|
+
overlapping.add(col);
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
const leftOutputNames = [];
|
|
1319
|
+
for (const col of this._columns) {
|
|
1320
|
+
if (col !== on && overlapping.has(col)) {
|
|
1321
|
+
leftOutputNames.push(`${col}_left`);
|
|
1322
|
+
} else {
|
|
1323
|
+
leftOutputNames.push(col);
|
|
1324
|
+
}
|
|
1325
|
+
}
|
|
1326
|
+
const rightOutputNames = [];
|
|
1327
|
+
for (const col of rightNonKeyColumns) {
|
|
1328
|
+
if (overlapping.has(col)) {
|
|
1329
|
+
rightOutputNames.push(`${col}_right`);
|
|
1330
|
+
} else {
|
|
1331
|
+
rightOutputNames.push(col);
|
|
1332
|
+
}
|
|
1333
|
+
}
|
|
1334
|
+
const allColumns = [...leftOutputNames, ...rightOutputNames];
|
|
1335
|
+
const resultData = {};
|
|
1336
|
+
for (const col of allColumns) {
|
|
1337
|
+
resultData[col] = [];
|
|
1338
|
+
}
|
|
1339
|
+
const leftData = this._data.get(on) ?? [];
|
|
1340
|
+
for (let i = 0; i < leftData.length; i++) {
|
|
1341
|
+
const leftKey = createKey(leftData[i]);
|
|
1342
|
+
const matches = rightHash.get(leftKey) ?? [];
|
|
1343
|
+
if (matches.length > 0) {
|
|
1344
|
+
for (const rightIdx of matches) {
|
|
1345
|
+
matchedRightRows.add(rightIdx);
|
|
1346
|
+
for (let j = 0; j < this._columns.length; j++) {
|
|
1347
|
+
const originalCol = this._columns[j];
|
|
1348
|
+
const outputCol = leftOutputNames[j];
|
|
1349
|
+
if (originalCol && outputCol) {
|
|
1350
|
+
const colData = this._data.get(originalCol);
|
|
1351
|
+
resultData[outputCol]?.push(colData?.[i] ?? null);
|
|
1352
|
+
}
|
|
1353
|
+
}
|
|
1354
|
+
for (let j = 0; j < rightNonKeyColumns.length; j++) {
|
|
1355
|
+
const originalCol = rightNonKeyColumns[j];
|
|
1356
|
+
const outputCol = rightOutputNames[j];
|
|
1357
|
+
if (originalCol && outputCol) {
|
|
1358
|
+
const colData = other._data.get(originalCol);
|
|
1359
|
+
resultData[outputCol]?.push(colData?.[rightIdx] ?? null);
|
|
1360
|
+
}
|
|
1361
|
+
}
|
|
1362
|
+
}
|
|
1363
|
+
} else if (how === "left" || how === "outer") {
|
|
1364
|
+
for (let j = 0; j < this._columns.length; j++) {
|
|
1365
|
+
const originalCol = this._columns[j];
|
|
1366
|
+
const outputCol = leftOutputNames[j];
|
|
1367
|
+
if (originalCol && outputCol) {
|
|
1368
|
+
const colData = this._data.get(originalCol);
|
|
1369
|
+
resultData[outputCol]?.push(colData?.[i] ?? null);
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
for (const col of rightOutputNames) {
|
|
1373
|
+
resultData[col]?.push(null);
|
|
1374
|
+
}
|
|
1375
|
+
}
|
|
1376
|
+
}
|
|
1377
|
+
if (how === "right" || how === "outer") {
|
|
1378
|
+
for (let i = 0; i < rightData.length; i++) {
|
|
1379
|
+
if (!matchedRightRows.has(i)) {
|
|
1380
|
+
for (let j = 0; j < this._columns.length; j++) {
|
|
1381
|
+
const originalCol = this._columns[j];
|
|
1382
|
+
const outputCol = leftOutputNames[j];
|
|
1383
|
+
if (originalCol && outputCol) {
|
|
1384
|
+
if (originalCol === on) {
|
|
1385
|
+
const colData = other._data.get(on);
|
|
1386
|
+
resultData[outputCol]?.push(colData?.[i] ?? null);
|
|
1387
|
+
} else {
|
|
1388
|
+
resultData[outputCol]?.push(null);
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1391
|
+
}
|
|
1392
|
+
for (let j = 0; j < rightNonKeyColumns.length; j++) {
|
|
1393
|
+
const originalCol = rightNonKeyColumns[j];
|
|
1394
|
+
const outputCol = rightOutputNames[j];
|
|
1395
|
+
if (originalCol && outputCol) {
|
|
1396
|
+
const colData = other._data.get(originalCol);
|
|
1397
|
+
resultData[outputCol]?.push(colData?.[i] ?? null);
|
|
1398
|
+
}
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
return new _DataFrame(resultData, { columns: allColumns });
|
|
1404
|
+
}
|
|
1405
|
+
/**
|
|
1406
|
+
* Merge with another DataFrame using pandas-style merge.
|
|
1407
|
+
*
|
|
1408
|
+
* More flexible than join() - supports different column names for join keys.
|
|
1409
|
+
* Uses hash join algorithm for O(n + m) complexity.
|
|
1410
|
+
*
|
|
1411
|
+
* @param other - DataFrame to merge with
|
|
1412
|
+
* @param options - Merge configuration
|
|
1413
|
+
* - on: Column name to join on (must exist in both DataFrames)
|
|
1414
|
+
* - left_on: Column name in left DataFrame
|
|
1415
|
+
* - right_on: Column name in right DataFrame
|
|
1416
|
+
* - how: Join type ('inner', 'left', 'right', 'outer')
|
|
1417
|
+
* - suffixes: Suffix for duplicate column names ['_x', '_y']
|
|
1418
|
+
* @returns New DataFrame with merged data
|
|
1419
|
+
*
|
|
1420
|
+
* @throws {InvalidParameterError} If merge columns don't exist or conflicting options provided
|
|
1421
|
+
*
|
|
1422
|
+
* @example
|
|
1423
|
+
* ```ts
|
|
1424
|
+
* const employees = new DataFrame({
|
|
1425
|
+
* emp_id: [1, 2, 3],
|
|
1426
|
+
* name: ['Alice', 'Bob', 'Charlie']
|
|
1427
|
+
* });
|
|
1428
|
+
* const salaries = new DataFrame({
|
|
1429
|
+
* employee_id: [1, 2, 4],
|
|
1430
|
+
* salary: [50000, 60000, 55000]
|
|
1431
|
+
* });
|
|
1432
|
+
*
|
|
1433
|
+
* // Merge on different column names
|
|
1434
|
+
* const result = employees.merge(salaries, {
|
|
1435
|
+
* left_on: 'emp_id',
|
|
1436
|
+
* right_on: 'employee_id',
|
|
1437
|
+
* how: 'left'
|
|
1438
|
+
* });
|
|
1439
|
+
* ```
|
|
1440
|
+
*
|
|
1441
|
+
* @see {@link https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html | Pandas merge}
|
|
1442
|
+
*/
|
|
1443
|
+
merge(other, options = {}) {
|
|
1444
|
+
const how = options.how ?? "inner";
|
|
1445
|
+
if (!["inner", "left", "right", "outer"].includes(how)) {
|
|
1446
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1447
|
+
'how must be one of "inner", "left", "right", or "outer"',
|
|
1448
|
+
"how",
|
|
1449
|
+
how
|
|
1450
|
+
);
|
|
1451
|
+
}
|
|
1452
|
+
if (options.suffixes !== void 0) {
|
|
1453
|
+
if (!Array.isArray(options.suffixes) || options.suffixes.length !== 2 || typeof options.suffixes[0] !== "string" || typeof options.suffixes[1] !== "string") {
|
|
1454
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1455
|
+
"suffixes must be a tuple of two strings",
|
|
1456
|
+
"suffixes",
|
|
1457
|
+
options.suffixes
|
|
1458
|
+
);
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
const suffixes = options.suffixes ?? ["_x", "_y"];
|
|
1462
|
+
let leftOn;
|
|
1463
|
+
let rightOn;
|
|
1464
|
+
if (options.on) {
|
|
1465
|
+
if (typeof options.on !== "string") {
|
|
1466
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("on must be a string", "on", options.on);
|
|
1467
|
+
}
|
|
1468
|
+
if (options.left_on || options.right_on) {
|
|
1469
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError('Cannot specify both "on" and "left_on"/"right_on"');
|
|
1470
|
+
}
|
|
1471
|
+
leftOn = options.on;
|
|
1472
|
+
rightOn = options.on;
|
|
1473
|
+
} else if (options.left_on && options.right_on) {
|
|
1474
|
+
if (typeof options.left_on !== "string") {
|
|
1475
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("left_on must be a string", "left_on", options.left_on);
|
|
1476
|
+
}
|
|
1477
|
+
if (typeof options.right_on !== "string") {
|
|
1478
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("right_on must be a string", "right_on", options.right_on);
|
|
1479
|
+
}
|
|
1480
|
+
leftOn = options.left_on;
|
|
1481
|
+
rightOn = options.right_on;
|
|
1482
|
+
} else {
|
|
1483
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError('Must specify either "on" or both "left_on" and "right_on"');
|
|
1484
|
+
}
|
|
1485
|
+
if (!this._columns.includes(leftOn)) {
|
|
1486
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1487
|
+
`Column '${leftOn}' not found in left DataFrame`,
|
|
1488
|
+
"left_on",
|
|
1489
|
+
leftOn
|
|
1490
|
+
);
|
|
1491
|
+
}
|
|
1492
|
+
if (!other._columns.includes(rightOn)) {
|
|
1493
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1494
|
+
`Column '${rightOn}' not found in right DataFrame`,
|
|
1495
|
+
"right_on",
|
|
1496
|
+
rightOn
|
|
1497
|
+
);
|
|
1498
|
+
}
|
|
1499
|
+
const rightHash = /* @__PURE__ */ new Map();
|
|
1500
|
+
const rightData = other._data.get(rightOn) ?? [];
|
|
1501
|
+
for (let i = 0; i < rightData.length; i++) {
|
|
1502
|
+
const val = rightData[i];
|
|
1503
|
+
if (val === null || val === void 0) continue;
|
|
1504
|
+
const key = createKey(val);
|
|
1505
|
+
const indices = rightHash.get(key) ?? [];
|
|
1506
|
+
indices.push(i);
|
|
1507
|
+
rightHash.set(key, indices);
|
|
1508
|
+
}
|
|
1509
|
+
const matchedRightRows = /* @__PURE__ */ new Set();
|
|
1510
|
+
const resultData = {};
|
|
1511
|
+
const leftColumns = this._columns.map((col) => {
|
|
1512
|
+
if (col === leftOn) return col;
|
|
1513
|
+
if (other._columns.includes(col) && col !== rightOn) {
|
|
1514
|
+
return col + suffixes[0];
|
|
1515
|
+
}
|
|
1516
|
+
return col;
|
|
1517
|
+
});
|
|
1518
|
+
const leftColumnSet = new Set(leftColumns);
|
|
1519
|
+
const rightColumns = [];
|
|
1520
|
+
const originalLeftColumns = new Set(this._columns);
|
|
1521
|
+
for (const col of other._columns) {
|
|
1522
|
+
if (leftOn === rightOn && col === rightOn) {
|
|
1523
|
+
continue;
|
|
1524
|
+
}
|
|
1525
|
+
let resultCol = col;
|
|
1526
|
+
if (originalLeftColumns.has(col) && col !== leftOn) {
|
|
1527
|
+
resultCol = `${col}${suffixes[1]}`;
|
|
1528
|
+
}
|
|
1529
|
+
if (leftColumnSet.has(resultCol)) {
|
|
1530
|
+
let suffixIndex = 0;
|
|
1531
|
+
let candidate = `${resultCol}`;
|
|
1532
|
+
while (leftColumnSet.has(candidate)) {
|
|
1533
|
+
suffixIndex++;
|
|
1534
|
+
candidate = `${resultCol}_${suffixIndex}`;
|
|
1535
|
+
}
|
|
1536
|
+
resultCol = candidate;
|
|
1537
|
+
}
|
|
1538
|
+
rightColumns.push(resultCol);
|
|
1539
|
+
leftColumnSet.add(resultCol);
|
|
1540
|
+
}
|
|
1541
|
+
const allColumns = [...leftColumns, ...rightColumns];
|
|
1542
|
+
for (const col of allColumns) {
|
|
1543
|
+
resultData[col] = [];
|
|
1544
|
+
}
|
|
1545
|
+
const leftData = this._data.get(leftOn) ?? [];
|
|
1546
|
+
for (let i = 0; i < leftData.length; i++) {
|
|
1547
|
+
const key = createKey(leftData[i]);
|
|
1548
|
+
const rightIndices = rightHash.get(key) ?? [];
|
|
1549
|
+
if (rightIndices.length > 0) {
|
|
1550
|
+
for (const rightIdx of rightIndices) {
|
|
1551
|
+
matchedRightRows.add(rightIdx);
|
|
1552
|
+
for (let j = 0; j < this._columns.length; j++) {
|
|
1553
|
+
const originalCol = this._columns[j];
|
|
1554
|
+
if (!originalCol) continue;
|
|
1555
|
+
const resultCol = leftColumns[j];
|
|
1556
|
+
const colData = this._data.get(originalCol);
|
|
1557
|
+
if (resultCol) resultData[resultCol]?.push(colData?.[i] ?? null);
|
|
1558
|
+
}
|
|
1559
|
+
let rightColIdx = 0;
|
|
1560
|
+
for (const originalCol of other._columns) {
|
|
1561
|
+
const shouldSkip = leftOn === rightOn && originalCol === rightOn;
|
|
1562
|
+
if (shouldSkip || !originalCol) continue;
|
|
1563
|
+
const resultCol = rightColumns[rightColIdx];
|
|
1564
|
+
const colData = other._data.get(originalCol);
|
|
1565
|
+
if (resultCol) resultData[resultCol]?.push(colData?.[rightIdx] ?? null);
|
|
1566
|
+
rightColIdx++;
|
|
1567
|
+
}
|
|
1568
|
+
}
|
|
1569
|
+
} else if (how === "left" || how === "outer") {
|
|
1570
|
+
for (let j = 0; j < this._columns.length; j++) {
|
|
1571
|
+
const originalCol = this._columns[j];
|
|
1572
|
+
if (!originalCol) continue;
|
|
1573
|
+
const resultCol = leftColumns[j];
|
|
1574
|
+
const colData = this._data.get(originalCol);
|
|
1575
|
+
if (resultCol) resultData[resultCol]?.push(colData?.[i] ?? null);
|
|
1576
|
+
}
|
|
1577
|
+
for (const col of rightColumns) {
|
|
1578
|
+
resultData[col]?.push(null);
|
|
1579
|
+
}
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
if (how === "right" || how === "outer") {
|
|
1583
|
+
for (let i = 0; i < rightData.length; i++) {
|
|
1584
|
+
if (!matchedRightRows.has(i)) {
|
|
1585
|
+
for (let j = 0; j < this._columns.length; j++) {
|
|
1586
|
+
const originalCol = this._columns[j];
|
|
1587
|
+
const resultCol = leftColumns[j];
|
|
1588
|
+
if (originalCol && resultCol) {
|
|
1589
|
+
if (originalCol === leftOn && leftOn === rightOn) {
|
|
1590
|
+
const rightJoinData = other._data.get(rightOn);
|
|
1591
|
+
resultData[resultCol]?.push(rightJoinData?.[i] ?? null);
|
|
1592
|
+
} else {
|
|
1593
|
+
resultData[resultCol]?.push(null);
|
|
1594
|
+
}
|
|
1595
|
+
}
|
|
1596
|
+
}
|
|
1597
|
+
let rightColIdx = 0;
|
|
1598
|
+
for (const originalCol of other._columns) {
|
|
1599
|
+
const shouldSkip = leftOn === rightOn && originalCol === rightOn;
|
|
1600
|
+
if (shouldSkip) {
|
|
1601
|
+
continue;
|
|
1602
|
+
}
|
|
1603
|
+
const resultCol = rightColumns[rightColIdx];
|
|
1604
|
+
const colData = other._data.get(originalCol);
|
|
1605
|
+
if (resultCol) resultData[resultCol]?.push(colData?.[i] ?? null);
|
|
1606
|
+
rightColIdx++;
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
return new _DataFrame(resultData, { columns: allColumns });
|
|
1612
|
+
}
|
|
1613
|
+
/**
|
|
1614
|
+
* Concatenate with another DataFrame.
|
|
1615
|
+
*
|
|
1616
|
+
* @param other - DataFrame to concatenate
|
|
1617
|
+
* @param axis - Axis to concatenate along.
|
|
1618
|
+
* - 0 or "rows" or "index": Stack vertically (append rows)
|
|
1619
|
+
* - 1 or "columns": Stack horizontally (append columns)
|
|
1620
|
+
* @returns Concatenated DataFrame
|
|
1621
|
+
*
|
|
1622
|
+
* @example
|
|
1623
|
+
* ```ts
|
|
1624
|
+
* const df1 = new DataFrame({ a: [1, 2], b: [3, 4] });
|
|
1625
|
+
* const df2 = new DataFrame({ a: [5, 6], b: [7, 8] });
|
|
1626
|
+
* df1.concat(df2, "rows"); // Stack vertically: 4 rows
|
|
1627
|
+
* df1.concat(df2, "columns"); // Stack horizontally: 4 columns
|
|
1628
|
+
* ```
|
|
1629
|
+
*/
|
|
1630
|
+
concat(other, axis = 0) {
|
|
1631
|
+
const ax = chunkJSCDE774_cjs.normalizeAxis(axis, 2);
|
|
1632
|
+
if (ax === 0) {
|
|
1633
|
+
for (const col of this._columns) {
|
|
1634
|
+
if (!other._columns.includes(col)) {
|
|
1635
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
1636
|
+
`Cannot concat on axis=0: missing column '${col}' in other DataFrame`
|
|
1637
|
+
);
|
|
1638
|
+
}
|
|
1639
|
+
}
|
|
1640
|
+
for (const col of other._columns) {
|
|
1641
|
+
if (!this._columns.includes(col)) {
|
|
1642
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
1643
|
+
`Cannot concat on axis=0: extra column '${col}' in other DataFrame`
|
|
1644
|
+
);
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
const newData = {};
|
|
1648
|
+
for (const col of this._columns) {
|
|
1649
|
+
const thisColData = this._data.get(col) ?? [];
|
|
1650
|
+
const otherColData = other._data.get(col) ?? [];
|
|
1651
|
+
newData[col] = [...thisColData, ...otherColData];
|
|
1652
|
+
}
|
|
1653
|
+
const totalRows = this._index.length + other._index.length;
|
|
1654
|
+
const newIndex = Array.from({ length: totalRows }, (_, i) => i);
|
|
1655
|
+
return new _DataFrame(newData, {
|
|
1656
|
+
columns: this._columns,
|
|
1657
|
+
index: newIndex
|
|
1658
|
+
});
|
|
1659
|
+
} else {
|
|
1660
|
+
const newIndex = [...this._index];
|
|
1661
|
+
const seenIndices = new Set(this._index);
|
|
1662
|
+
for (const idx of other._index) {
|
|
1663
|
+
if (!seenIndices.has(idx)) {
|
|
1664
|
+
newIndex.push(idx);
|
|
1665
|
+
seenIndices.add(idx);
|
|
1666
|
+
}
|
|
1667
|
+
}
|
|
1668
|
+
const newData = {};
|
|
1669
|
+
const newColumns = [];
|
|
1670
|
+
const alignColumn = (df, col, targetIndex) => {
|
|
1671
|
+
const sourceData = df._data.get(col);
|
|
1672
|
+
if (!sourceData) return [];
|
|
1673
|
+
const indexPos = df._indexPos;
|
|
1674
|
+
return targetIndex.map((label) => {
|
|
1675
|
+
const pos = indexPos.get(label);
|
|
1676
|
+
if (pos !== void 0) {
|
|
1677
|
+
return sourceData[pos];
|
|
1678
|
+
}
|
|
1679
|
+
return null;
|
|
1680
|
+
});
|
|
1681
|
+
};
|
|
1682
|
+
const rightColSet = new Set(other._columns);
|
|
1683
|
+
const overlapping = /* @__PURE__ */ new Set();
|
|
1684
|
+
for (const col of this._columns) {
|
|
1685
|
+
if (rightColSet.has(col)) {
|
|
1686
|
+
overlapping.add(col);
|
|
1687
|
+
}
|
|
1688
|
+
}
|
|
1689
|
+
for (const col of this._columns) {
|
|
1690
|
+
const outputName = overlapping.has(col) ? `${col}_left` : col;
|
|
1691
|
+
newData[outputName] = alignColumn(this, col, newIndex);
|
|
1692
|
+
newColumns.push(outputName);
|
|
1693
|
+
}
|
|
1694
|
+
for (const col of other._columns) {
|
|
1695
|
+
const outputName = overlapping.has(col) ? `${col}_right` : col;
|
|
1696
|
+
newData[outputName] = alignColumn(other, col, newIndex);
|
|
1697
|
+
newColumns.push(outputName);
|
|
1698
|
+
}
|
|
1699
|
+
return new _DataFrame(newData, {
|
|
1700
|
+
columns: newColumns,
|
|
1701
|
+
index: newIndex
|
|
1702
|
+
});
|
|
1703
|
+
}
|
|
1704
|
+
}
|
|
1705
|
+
/**
|
|
1706
|
+
* Fill missing values (null or undefined) with a specified value.
|
|
1707
|
+
*
|
|
1708
|
+
* @param value - Value to use for filling missing values
|
|
1709
|
+
* @returns New DataFrame with missing values filled
|
|
1710
|
+
*
|
|
1711
|
+
* @example
|
|
1712
|
+
* ```ts
|
|
1713
|
+
* const df = new DataFrame({ a: [1, null, 3], b: [4, 5, undefined] });
|
|
1714
|
+
* df.fillna(0); // Replace null/undefined with 0
|
|
1715
|
+
* ```
|
|
1716
|
+
*/
|
|
1717
|
+
fillna(value) {
|
|
1718
|
+
const newData = {};
|
|
1719
|
+
for (const col of this._columns) {
|
|
1720
|
+
const colData = this._data.get(col);
|
|
1721
|
+
if (colData) {
|
|
1722
|
+
newData[col] = colData.map(
|
|
1723
|
+
(v) => v === null || v === void 0 || typeof v === "number" && Number.isNaN(v) ? value : v
|
|
1724
|
+
);
|
|
1725
|
+
}
|
|
1726
|
+
}
|
|
1727
|
+
return new _DataFrame(newData, {
|
|
1728
|
+
columns: this._columns,
|
|
1729
|
+
index: this._index
|
|
1730
|
+
});
|
|
1731
|
+
}
|
|
1732
|
+
/**
|
|
1733
|
+
* Drop rows that contain any missing values (null or undefined).
|
|
1734
|
+
*
|
|
1735
|
+
* @returns New DataFrame with rows containing missing values removed
|
|
1736
|
+
*
|
|
1737
|
+
* @example
|
|
1738
|
+
* ```ts
|
|
1739
|
+
* const df = new DataFrame({ a: [1, null, 3], b: [4, 5, 6] });
|
|
1740
|
+
* df.dropna(); // Only keeps rows 0 and 2
|
|
1741
|
+
* ```
|
|
1742
|
+
*/
|
|
1743
|
+
dropna() {
|
|
1744
|
+
const newData = {};
|
|
1745
|
+
const newIndex = [];
|
|
1746
|
+
for (const col of this._columns) {
|
|
1747
|
+
newData[col] = [];
|
|
1748
|
+
}
|
|
1749
|
+
for (let i = 0; i < this._index.length; i++) {
|
|
1750
|
+
let hasNA = false;
|
|
1751
|
+
for (const col of this._columns) {
|
|
1752
|
+
const colData = this._data.get(col);
|
|
1753
|
+
if (colData) {
|
|
1754
|
+
const val = colData[i];
|
|
1755
|
+
if (val === null || val === void 0 || typeof val === "number" && Number.isNaN(val)) {
|
|
1756
|
+
hasNA = true;
|
|
1757
|
+
break;
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
}
|
|
1761
|
+
if (!hasNA) {
|
|
1762
|
+
const idx = this._index[i];
|
|
1763
|
+
if (idx !== void 0) newIndex.push(idx);
|
|
1764
|
+
for (const col of this._columns) {
|
|
1765
|
+
const colData = this._data.get(col);
|
|
1766
|
+
if (colData) {
|
|
1767
|
+
newData[col]?.push(colData[i]);
|
|
1768
|
+
}
|
|
1769
|
+
}
|
|
1770
|
+
}
|
|
1771
|
+
}
|
|
1772
|
+
return new _DataFrame(newData, {
|
|
1773
|
+
columns: this._columns,
|
|
1774
|
+
index: newIndex
|
|
1775
|
+
});
|
|
1776
|
+
}
|
|
1777
|
+
/**
|
|
1778
|
+
* Generate descriptive statistics.
|
|
1779
|
+
*
|
|
1780
|
+
* Computes count, mean, std, min, 25%, 50%, 75%, max for numeric columns.
|
|
1781
|
+
*
|
|
1782
|
+
* @returns DataFrame with statistics
|
|
1783
|
+
*/
|
|
1784
|
+
describe() {
|
|
1785
|
+
const stats = {};
|
|
1786
|
+
const metrics = ["count", "mean", "std", "min", "25%", "50%", "75%", "max"];
|
|
1787
|
+
if (this._columns.length === 0 || this._index.length === 0) {
|
|
1788
|
+
return new _DataFrame({}, { columns: [], index: metrics });
|
|
1789
|
+
}
|
|
1790
|
+
for (const col of this._columns) {
|
|
1791
|
+
const colData = this._data.get(col);
|
|
1792
|
+
if (!colData) continue;
|
|
1793
|
+
const numericData = colData.filter(isValidNumber);
|
|
1794
|
+
if (numericData.length === 0) continue;
|
|
1795
|
+
const sorted = [...numericData].sort((a, b) => a - b);
|
|
1796
|
+
const sum = numericData.reduce((acc, val) => acc + val, 0);
|
|
1797
|
+
const mean = sum / numericData.length;
|
|
1798
|
+
let variance;
|
|
1799
|
+
let std;
|
|
1800
|
+
if (numericData.length > 1) {
|
|
1801
|
+
variance = numericData.reduce((acc, val) => acc + (val - mean) ** 2, 0) / (numericData.length - 1);
|
|
1802
|
+
std = Math.sqrt(variance);
|
|
1803
|
+
} else {
|
|
1804
|
+
variance = NaN;
|
|
1805
|
+
std = NaN;
|
|
1806
|
+
}
|
|
1807
|
+
const getPercentile = (p) => {
|
|
1808
|
+
const idx = p / 100 * (sorted.length - 1);
|
|
1809
|
+
const lower = Math.floor(idx);
|
|
1810
|
+
const upper = Math.ceil(idx);
|
|
1811
|
+
const weight = idx - lower;
|
|
1812
|
+
return (sorted[lower] ?? 0) * (1 - weight) + (sorted[upper] ?? 0) * weight;
|
|
1813
|
+
};
|
|
1814
|
+
const minVal = sorted[0];
|
|
1815
|
+
const maxVal = sorted[sorted.length - 1];
|
|
1816
|
+
if (minVal === void 0 || maxVal === void 0) {
|
|
1817
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Unable to compute min/max for column '${col}'`);
|
|
1818
|
+
}
|
|
1819
|
+
stats[col] = [
|
|
1820
|
+
numericData.length,
|
|
1821
|
+
mean,
|
|
1822
|
+
std,
|
|
1823
|
+
minVal,
|
|
1824
|
+
getPercentile(25),
|
|
1825
|
+
getPercentile(50),
|
|
1826
|
+
getPercentile(75),
|
|
1827
|
+
maxVal
|
|
1828
|
+
];
|
|
1829
|
+
}
|
|
1830
|
+
if (Object.keys(stats).length === 0) {
|
|
1831
|
+
return new _DataFrame({}, { columns: [], index: metrics });
|
|
1832
|
+
}
|
|
1833
|
+
return new _DataFrame(stats, { index: metrics });
|
|
1834
|
+
}
|
|
1835
|
+
/**
|
|
1836
|
+
* Compute correlation matrix.
|
|
1837
|
+
*
|
|
1838
|
+
* Uses pairwise complete observations (ignores missing values for each pair).
|
|
1839
|
+
*
|
|
1840
|
+
* @returns DataFrame containing pairwise correlations
|
|
1841
|
+
*/
|
|
1842
|
+
corr() {
|
|
1843
|
+
const numericCols = [];
|
|
1844
|
+
for (const col of this._columns) {
|
|
1845
|
+
const colData = this._data.get(col);
|
|
1846
|
+
if (!colData) continue;
|
|
1847
|
+
if (colData.some(isValidNumber)) {
|
|
1848
|
+
numericCols.push(col);
|
|
1849
|
+
}
|
|
1850
|
+
}
|
|
1851
|
+
const corrMatrix = {};
|
|
1852
|
+
for (const col1 of numericCols) {
|
|
1853
|
+
corrMatrix[col1] = [];
|
|
1854
|
+
const data1 = this._data.get(col1);
|
|
1855
|
+
for (const col2 of numericCols) {
|
|
1856
|
+
const data2 = this._data.get(col2);
|
|
1857
|
+
if (!data1 || !data2) {
|
|
1858
|
+
corrMatrix[col1]?.push(NaN);
|
|
1859
|
+
continue;
|
|
1860
|
+
}
|
|
1861
|
+
const valid1 = [];
|
|
1862
|
+
const valid2 = [];
|
|
1863
|
+
for (let i = 0; i < this._index.length; i++) {
|
|
1864
|
+
const v1 = data1[i];
|
|
1865
|
+
const v2 = data2[i];
|
|
1866
|
+
if (isValidNumber(v1) && isValidNumber(v2)) {
|
|
1867
|
+
valid1.push(v1);
|
|
1868
|
+
valid2.push(v2);
|
|
1869
|
+
}
|
|
1870
|
+
}
|
|
1871
|
+
if (valid1.length < 2) {
|
|
1872
|
+
corrMatrix[col1]?.push(NaN);
|
|
1873
|
+
continue;
|
|
1874
|
+
}
|
|
1875
|
+
const mean1 = valid1.reduce((a, b) => a + b, 0) / valid1.length;
|
|
1876
|
+
const mean2 = valid2.reduce((a, b) => a + b, 0) / valid2.length;
|
|
1877
|
+
let num = 0;
|
|
1878
|
+
let den1 = 0;
|
|
1879
|
+
let den2 = 0;
|
|
1880
|
+
for (let k = 0; k < valid1.length; k++) {
|
|
1881
|
+
const val1 = valid1[k];
|
|
1882
|
+
const val2 = valid2[k];
|
|
1883
|
+
if (val1 === void 0 || val2 === void 0) continue;
|
|
1884
|
+
const diff1 = val1 - mean1;
|
|
1885
|
+
const diff2 = val2 - mean2;
|
|
1886
|
+
num += diff1 * diff2;
|
|
1887
|
+
den1 += diff1 * diff1;
|
|
1888
|
+
den2 += diff2 * diff2;
|
|
1889
|
+
}
|
|
1890
|
+
const corr = den1 === 0 || den2 === 0 ? NaN : num / Math.sqrt(den1 * den2);
|
|
1891
|
+
corrMatrix[col1]?.push(corr);
|
|
1892
|
+
}
|
|
1893
|
+
}
|
|
1894
|
+
return new _DataFrame(corrMatrix, {
|
|
1895
|
+
index: numericCols,
|
|
1896
|
+
columns: numericCols
|
|
1897
|
+
});
|
|
1898
|
+
}
|
|
1899
|
+
/**
|
|
1900
|
+
* Compute covariance matrix.
|
|
1901
|
+
*
|
|
1902
|
+
* Uses pairwise complete observations.
|
|
1903
|
+
*
|
|
1904
|
+
* @returns DataFrame containing pairwise covariances
|
|
1905
|
+
*/
|
|
1906
|
+
cov() {
|
|
1907
|
+
const numericCols = [];
|
|
1908
|
+
for (const col of this._columns) {
|
|
1909
|
+
const colData = this._data.get(col);
|
|
1910
|
+
if (!colData) continue;
|
|
1911
|
+
if (colData.some(isValidNumber)) {
|
|
1912
|
+
numericCols.push(col);
|
|
1913
|
+
}
|
|
1914
|
+
}
|
|
1915
|
+
const covMatrix = {};
|
|
1916
|
+
for (const col1 of numericCols) {
|
|
1917
|
+
covMatrix[col1] = [];
|
|
1918
|
+
const data1 = this._data.get(col1);
|
|
1919
|
+
for (const col2 of numericCols) {
|
|
1920
|
+
const data2 = this._data.get(col2);
|
|
1921
|
+
if (!data1 || !data2) {
|
|
1922
|
+
covMatrix[col1]?.push(NaN);
|
|
1923
|
+
continue;
|
|
1924
|
+
}
|
|
1925
|
+
const valid1 = [];
|
|
1926
|
+
const valid2 = [];
|
|
1927
|
+
for (let i = 0; i < this._index.length; i++) {
|
|
1928
|
+
const v1 = data1[i];
|
|
1929
|
+
const v2 = data2[i];
|
|
1930
|
+
if (isValidNumber(v1) && isValidNumber(v2)) {
|
|
1931
|
+
valid1.push(v1);
|
|
1932
|
+
valid2.push(v2);
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
if (valid1.length < 2) {
|
|
1936
|
+
covMatrix[col1]?.push(NaN);
|
|
1937
|
+
continue;
|
|
1938
|
+
}
|
|
1939
|
+
const mean1 = valid1.reduce((a, b) => a + b, 0) / valid1.length;
|
|
1940
|
+
const mean2 = valid2.reduce((a, b) => a + b, 0) / valid2.length;
|
|
1941
|
+
let cov = 0;
|
|
1942
|
+
for (let k = 0; k < valid1.length; k++) {
|
|
1943
|
+
const val1 = valid1[k];
|
|
1944
|
+
const val2 = valid2[k];
|
|
1945
|
+
if (val1 === void 0 || val2 === void 0) continue;
|
|
1946
|
+
cov += (val1 - mean1) * (val2 - mean2);
|
|
1947
|
+
}
|
|
1948
|
+
cov /= valid1.length - 1;
|
|
1949
|
+
covMatrix[col1]?.push(cov);
|
|
1950
|
+
}
|
|
1951
|
+
}
|
|
1952
|
+
return new _DataFrame(covMatrix, {
|
|
1953
|
+
index: numericCols,
|
|
1954
|
+
columns: numericCols
|
|
1955
|
+
});
|
|
1956
|
+
}
|
|
1957
|
+
/**
|
|
1958
|
+
* Apply a function along an axis of the DataFrame.
|
|
1959
|
+
*
|
|
1960
|
+
* When `axis=1`, the provided Series is indexed by column names.
|
|
1961
|
+
*
|
|
1962
|
+
* @param fn - Function to apply to each Series
|
|
1963
|
+
* @param axis - Axis to apply along (0=columns, 1=rows)
|
|
1964
|
+
* @returns New DataFrame with function applied
|
|
1965
|
+
*
|
|
1966
|
+
* @example
|
|
1967
|
+
* ```ts
|
|
1968
|
+
* const df = new DataFrame({ a: [1, 2, 3], b: [4, 5, 6] });
|
|
1969
|
+
* // Apply function to each column
|
|
1970
|
+
* df.apply(series => series.map(x => Number(x) * 2), 0);
|
|
1971
|
+
* ```
|
|
1972
|
+
*/
|
|
1973
|
+
apply(fn, axis = 0) {
|
|
1974
|
+
const ax = chunkJSCDE774_cjs.normalizeAxis(axis, 2);
|
|
1975
|
+
if (ax === 0) {
|
|
1976
|
+
const newData = {};
|
|
1977
|
+
for (const col of this._columns) {
|
|
1978
|
+
const series = this.get(col);
|
|
1979
|
+
const result = fn(series);
|
|
1980
|
+
if (!(result instanceof Series)) {
|
|
1981
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Function must return a Series when axis=0");
|
|
1982
|
+
}
|
|
1983
|
+
newData[col] = [...result.data];
|
|
1984
|
+
}
|
|
1985
|
+
return new _DataFrame(newData, {
|
|
1986
|
+
columns: this._columns,
|
|
1987
|
+
index: this._index
|
|
1988
|
+
});
|
|
1989
|
+
} else {
|
|
1990
|
+
const results = [];
|
|
1991
|
+
const columnLabelMap = /* @__PURE__ */ new Map();
|
|
1992
|
+
const newColumns = [];
|
|
1993
|
+
for (let i = 0; i < this._index.length; i++) {
|
|
1994
|
+
const rowValues = [];
|
|
1995
|
+
for (const col of this._columns) {
|
|
1996
|
+
rowValues.push(this._data.get(col)?.[i]);
|
|
1997
|
+
}
|
|
1998
|
+
const rowSeries = new Series(rowValues, {
|
|
1999
|
+
name: "row",
|
|
2000
|
+
index: this._columns,
|
|
2001
|
+
copy: false
|
|
2002
|
+
});
|
|
2003
|
+
const result = fn(rowSeries);
|
|
2004
|
+
if (!(result instanceof Series)) {
|
|
2005
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Function must return a Series when axis=1");
|
|
2006
|
+
}
|
|
2007
|
+
results.push(result);
|
|
2008
|
+
for (const label of result.index) {
|
|
2009
|
+
const columnName = String(label);
|
|
2010
|
+
const existing = columnLabelMap.get(columnName);
|
|
2011
|
+
if (existing !== void 0 && existing !== label) {
|
|
2012
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2013
|
+
`Column label '${columnName}' is ambiguous between '${String(
|
|
2014
|
+
existing
|
|
2015
|
+
)}' and '${String(label)}'`
|
|
2016
|
+
);
|
|
2017
|
+
}
|
|
2018
|
+
if (!columnLabelMap.has(columnName)) {
|
|
2019
|
+
newColumns.push(columnName);
|
|
2020
|
+
columnLabelMap.set(columnName, label);
|
|
2021
|
+
}
|
|
2022
|
+
}
|
|
2023
|
+
}
|
|
2024
|
+
const newData = {};
|
|
2025
|
+
for (const col of newColumns) {
|
|
2026
|
+
newData[col] = [];
|
|
2027
|
+
}
|
|
2028
|
+
for (const result of results) {
|
|
2029
|
+
for (const col of newColumns) {
|
|
2030
|
+
const label = columnLabelMap.get(col);
|
|
2031
|
+
if (label === void 0) {
|
|
2032
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Missing label mapping for column '${col}'`);
|
|
2033
|
+
}
|
|
2034
|
+
const val = result.get(label);
|
|
2035
|
+
newData[col]?.push(val === void 0 ? null : val);
|
|
2036
|
+
}
|
|
2037
|
+
}
|
|
2038
|
+
return new _DataFrame(newData, {
|
|
2039
|
+
columns: newColumns,
|
|
2040
|
+
index: this._index
|
|
2041
|
+
});
|
|
2042
|
+
}
|
|
2043
|
+
}
|
|
2044
|
+
/**
|
|
2045
|
+
* Convert DataFrame to a 2D Tensor.
|
|
2046
|
+
*
|
|
2047
|
+
* All columns must contain numeric data.
|
|
2048
|
+
*
|
|
2049
|
+
* @returns 2D Tensor with shape [rows, columns]
|
|
2050
|
+
* @throws {DataValidationError} If data is non-numeric
|
|
2051
|
+
*
|
|
2052
|
+
* @example
|
|
2053
|
+
* ```ts
|
|
2054
|
+
* const df = new DataFrame({ a: [1, 2, 3], b: [4, 5, 6] });
|
|
2055
|
+
* const t = df.toTensor(); // 2D tensor [[1,4], [2,5], [3,6]]
|
|
2056
|
+
* ```
|
|
2057
|
+
*/
|
|
2058
|
+
toTensor() {
|
|
2059
|
+
const arr = this.toArray();
|
|
2060
|
+
const flat = [];
|
|
2061
|
+
for (const row of arr) {
|
|
2062
|
+
for (const val of row) {
|
|
2063
|
+
if (typeof val === "number") {
|
|
2064
|
+
flat.push(val);
|
|
2065
|
+
} else if (val === null || val === void 0) {
|
|
2066
|
+
flat.push(NaN);
|
|
2067
|
+
} else {
|
|
2068
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2069
|
+
`Non-numeric value found: ${val}. All data must be numeric (or null/undefined) for tensor conversion.`
|
|
2070
|
+
);
|
|
2071
|
+
}
|
|
2072
|
+
}
|
|
2073
|
+
}
|
|
2074
|
+
const t = chunk6AE5FKKQ_cjs.tensor(flat);
|
|
2075
|
+
const [rows, cols] = this.shape;
|
|
2076
|
+
return chunk6AE5FKKQ_cjs.reshape(t, [rows, cols]);
|
|
2077
|
+
}
|
|
2078
|
+
/**
|
|
2079
|
+
* Convert DataFrame to a 2D JavaScript array.
|
|
2080
|
+
*
|
|
2081
|
+
* Each inner array represents a row.
|
|
2082
|
+
*
|
|
2083
|
+
* @returns 2D array of values
|
|
2084
|
+
*
|
|
2085
|
+
* @example
|
|
2086
|
+
* ```ts
|
|
2087
|
+
* const df = new DataFrame({ a: [1, 2], b: [3, 4] });
|
|
2088
|
+
* df.toArray(); // [[1, 3], [2, 4]]
|
|
2089
|
+
* ```
|
|
2090
|
+
*/
|
|
2091
|
+
toArray() {
|
|
2092
|
+
const result = [];
|
|
2093
|
+
for (let i = 0; i < this._index.length; i++) {
|
|
2094
|
+
const row = [];
|
|
2095
|
+
for (const col of this._columns) {
|
|
2096
|
+
const colData = this._data.get(col);
|
|
2097
|
+
row.push(colData ? colData[i] : void 0);
|
|
2098
|
+
}
|
|
2099
|
+
result.push(row);
|
|
2100
|
+
}
|
|
2101
|
+
return result;
|
|
2102
|
+
}
|
|
2103
|
+
/**
|
|
2104
|
+
* Parse CSV string into DataFrame with full type inference and quote handling.
|
|
2105
|
+
* Time complexity: O(n) where n is number of characters.
|
|
2106
|
+
*/
|
|
2107
|
+
static fromCsvString(csvString, options = {}) {
|
|
2108
|
+
const delimiter = options.delimiter ?? ",";
|
|
2109
|
+
const quoteChar = options.quoteChar ?? '"';
|
|
2110
|
+
const hasHeader = options.hasHeader ?? true;
|
|
2111
|
+
const skipRows = options.skipRows ?? 0;
|
|
2112
|
+
const rows = [];
|
|
2113
|
+
let fields = [];
|
|
2114
|
+
let currentField = "";
|
|
2115
|
+
let inQuotes = false;
|
|
2116
|
+
let rowCount = 0;
|
|
2117
|
+
for (let i = 0; i < csvString.length; i++) {
|
|
2118
|
+
const char = csvString[i];
|
|
2119
|
+
const nextChar = csvString[i + 1];
|
|
2120
|
+
if (char === quoteChar) {
|
|
2121
|
+
if (inQuotes && nextChar === quoteChar) {
|
|
2122
|
+
currentField += quoteChar;
|
|
2123
|
+
i++;
|
|
2124
|
+
} else {
|
|
2125
|
+
inQuotes = !inQuotes;
|
|
2126
|
+
}
|
|
2127
|
+
} else if (char === delimiter && !inQuotes) {
|
|
2128
|
+
fields.push(currentField);
|
|
2129
|
+
currentField = "";
|
|
2130
|
+
} else if ((char === "\n" || char === "\r") && !inQuotes) {
|
|
2131
|
+
if (char === "\r" && nextChar === "\n") {
|
|
2132
|
+
i++;
|
|
2133
|
+
}
|
|
2134
|
+
fields.push(currentField);
|
|
2135
|
+
currentField = "";
|
|
2136
|
+
if (fields.some((f) => f.trim() !== "")) {
|
|
2137
|
+
if (rowCount >= skipRows) {
|
|
2138
|
+
rows.push(fields);
|
|
2139
|
+
}
|
|
2140
|
+
rowCount++;
|
|
2141
|
+
}
|
|
2142
|
+
fields = [];
|
|
2143
|
+
} else {
|
|
2144
|
+
currentField += char;
|
|
2145
|
+
}
|
|
2146
|
+
}
|
|
2147
|
+
if (currentField !== "" || fields.length > 0) {
|
|
2148
|
+
fields.push(currentField);
|
|
2149
|
+
if (fields.some((f) => f.trim() !== "") && rowCount >= skipRows) {
|
|
2150
|
+
rows.push(fields);
|
|
2151
|
+
}
|
|
2152
|
+
}
|
|
2153
|
+
if (inQuotes) {
|
|
2154
|
+
throw new chunkJSCDE774_cjs.DataValidationError("CSV contains an unmatched quote");
|
|
2155
|
+
}
|
|
2156
|
+
if (rows.length === 0) {
|
|
2157
|
+
throw new chunkJSCDE774_cjs.DataValidationError("CSV contains no data rows");
|
|
2158
|
+
}
|
|
2159
|
+
let columns;
|
|
2160
|
+
let dataRows;
|
|
2161
|
+
if (hasHeader) {
|
|
2162
|
+
const firstRow = rows[0];
|
|
2163
|
+
if (!firstRow) throw new chunkJSCDE774_cjs.DataValidationError("CSV has no header row");
|
|
2164
|
+
columns = firstRow;
|
|
2165
|
+
ensureUniqueLabels(columns, "column name");
|
|
2166
|
+
dataRows = rows.slice(1);
|
|
2167
|
+
} else {
|
|
2168
|
+
const numCols = rows[0]?.length ?? 0;
|
|
2169
|
+
columns = Array.from({ length: numCols }, (_, i) => `col${i}`);
|
|
2170
|
+
dataRows = rows;
|
|
2171
|
+
}
|
|
2172
|
+
for (let i = 0; i < dataRows.length; i++) {
|
|
2173
|
+
const row = dataRows[i];
|
|
2174
|
+
if (row && row.length !== columns.length) {
|
|
2175
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2176
|
+
`Row ${i + (hasHeader ? 2 : 1)} has ${row.length} fields, expected ${columns.length}`
|
|
2177
|
+
);
|
|
2178
|
+
}
|
|
2179
|
+
}
|
|
2180
|
+
const data = {};
|
|
2181
|
+
for (let colIdx = 0; colIdx < columns.length; colIdx++) {
|
|
2182
|
+
const colName = columns[colIdx];
|
|
2183
|
+
const colData = [];
|
|
2184
|
+
for (const row of dataRows) {
|
|
2185
|
+
const value = row[colIdx];
|
|
2186
|
+
if (value === void 0 || value === "" || value === "null" || value === "undefined") {
|
|
2187
|
+
colData.push(null);
|
|
2188
|
+
} else if (!Number.isNaN(Number(value)) && value !== "" && // Allow "0", "0.5", "10", but not "01" (unless it's "0.1")
|
|
2189
|
+
(value === "0" || !value.startsWith("0") || value.startsWith("0."))) {
|
|
2190
|
+
colData.push(Number(value));
|
|
2191
|
+
} else if (value === "true" || value === "false") {
|
|
2192
|
+
colData.push(value === "true");
|
|
2193
|
+
} else {
|
|
2194
|
+
colData.push(value);
|
|
2195
|
+
}
|
|
2196
|
+
}
|
|
2197
|
+
if (colName) data[colName] = colData;
|
|
2198
|
+
}
|
|
2199
|
+
return new _DataFrame(data, { columns });
|
|
2200
|
+
}
|
|
2201
|
+
/**
|
|
2202
|
+
* Read CSV file - environment-aware (Node.js fs or browser fetch).
|
|
2203
|
+
* Time complexity: O(n) for file read + O(m) for parsing.
|
|
2204
|
+
*/
|
|
2205
|
+
static async readCsv(path, options = {}) {
|
|
2206
|
+
let csvString;
|
|
2207
|
+
if (typeof process !== "undefined" && process.versions?.node) {
|
|
2208
|
+
try {
|
|
2209
|
+
const fs = await import('fs/promises');
|
|
2210
|
+
csvString = await fs.readFile(path, "utf-8");
|
|
2211
|
+
} catch (error) {
|
|
2212
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2213
|
+
`Failed to read CSV file: ${error instanceof Error ? error.message : String(error)}`
|
|
2214
|
+
);
|
|
2215
|
+
}
|
|
2216
|
+
} else if (typeof fetch !== "undefined") {
|
|
2217
|
+
try {
|
|
2218
|
+
const response = await fetch(path);
|
|
2219
|
+
if (!response.ok) {
|
|
2220
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`HTTP ${response.status}: ${response.statusText}`);
|
|
2221
|
+
}
|
|
2222
|
+
csvString = await response.text();
|
|
2223
|
+
} catch (error) {
|
|
2224
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2225
|
+
`Failed to fetch CSV: ${error instanceof Error ? error.message : String(error)}`
|
|
2226
|
+
);
|
|
2227
|
+
}
|
|
2228
|
+
} else {
|
|
2229
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Environment not supported");
|
|
2230
|
+
}
|
|
2231
|
+
return _DataFrame.fromCsvString(csvString, options);
|
|
2232
|
+
}
|
|
2233
|
+
/**
|
|
2234
|
+
* Convert DataFrame to CSV string with proper quoting and escaping.
|
|
2235
|
+
* Time complexity: O(n × m) where n is rows, m is columns.
|
|
2236
|
+
*/
|
|
2237
|
+
toCsvString(options = {}) {
|
|
2238
|
+
const delimiter = options.delimiter ?? ",";
|
|
2239
|
+
const quoteChar = options.quoteChar ?? '"';
|
|
2240
|
+
const includeIndex = options.includeIndex ?? false;
|
|
2241
|
+
const header = options.header ?? true;
|
|
2242
|
+
const lines = [];
|
|
2243
|
+
const escapeField = (value) => {
|
|
2244
|
+
const str = String(value ?? "");
|
|
2245
|
+
if (str.includes(delimiter) || str.includes(quoteChar) || str.includes("\n") || str.includes("\r")) {
|
|
2246
|
+
return quoteChar + str.split(quoteChar).join(quoteChar + quoteChar) + quoteChar;
|
|
2247
|
+
}
|
|
2248
|
+
return str;
|
|
2249
|
+
};
|
|
2250
|
+
if (header) {
|
|
2251
|
+
const headerFields = includeIndex ? ["index", ...this._columns] : [...this._columns];
|
|
2252
|
+
lines.push(headerFields.map(escapeField).join(delimiter));
|
|
2253
|
+
}
|
|
2254
|
+
for (let i = 0; i < this._index.length; i++) {
|
|
2255
|
+
const rowFields = [];
|
|
2256
|
+
if (includeIndex) {
|
|
2257
|
+
rowFields.push(this._index[i]);
|
|
2258
|
+
}
|
|
2259
|
+
for (const col of this._columns) {
|
|
2260
|
+
const colData = this._data.get(col);
|
|
2261
|
+
rowFields.push(colData?.[i] ?? "");
|
|
2262
|
+
}
|
|
2263
|
+
lines.push(rowFields.map(escapeField).join(delimiter));
|
|
2264
|
+
}
|
|
2265
|
+
return lines.join("\n");
|
|
2266
|
+
}
|
|
2267
|
+
/**
|
|
2268
|
+
* Write DataFrame to CSV file - environment-aware.
|
|
2269
|
+
* Time complexity: O(n × m) for generation + O(k) for write.
|
|
2270
|
+
*/
|
|
2271
|
+
async toCsv(path, options = {}) {
|
|
2272
|
+
const csvString = this.toCsvString(options);
|
|
2273
|
+
if (typeof process !== "undefined" && process.versions?.node) {
|
|
2274
|
+
try {
|
|
2275
|
+
const fs = await import('fs/promises');
|
|
2276
|
+
await fs.writeFile(path, csvString, "utf-8");
|
|
2277
|
+
} catch (error) {
|
|
2278
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2279
|
+
`Failed to write CSV file: ${error instanceof Error ? error.message : String(error)}`
|
|
2280
|
+
);
|
|
2281
|
+
}
|
|
2282
|
+
} else if (typeof document !== "undefined" && typeof URL !== "undefined") {
|
|
2283
|
+
const blob = new Blob([csvString], { type: "text/csv;charset=utf-8;" });
|
|
2284
|
+
const url = URL.createObjectURL(blob);
|
|
2285
|
+
const link = document.createElement("a");
|
|
2286
|
+
link.href = url;
|
|
2287
|
+
link.download = path;
|
|
2288
|
+
link.style.display = "none";
|
|
2289
|
+
document.body.appendChild(link);
|
|
2290
|
+
link.click();
|
|
2291
|
+
document.body.removeChild(link);
|
|
2292
|
+
URL.revokeObjectURL(url);
|
|
2293
|
+
} else {
|
|
2294
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Environment not supported");
|
|
2295
|
+
}
|
|
2296
|
+
}
|
|
2297
|
+
/**
|
|
2298
|
+
* Serialize DataFrame to JSON string.
|
|
2299
|
+
* Time complexity: O(n × m).
|
|
2300
|
+
*/
|
|
2301
|
+
toJsonString() {
|
|
2302
|
+
return JSON.stringify(
|
|
2303
|
+
{
|
|
2304
|
+
columns: this._columns,
|
|
2305
|
+
index: this._index,
|
|
2306
|
+
data: Object.fromEntries(this._data)
|
|
2307
|
+
},
|
|
2308
|
+
null,
|
|
2309
|
+
2
|
|
2310
|
+
);
|
|
2311
|
+
}
|
|
2312
|
+
/**
|
|
2313
|
+
* Create DataFrame from JSON string.
|
|
2314
|
+
* Time complexity: O(n × m).
|
|
2315
|
+
*/
|
|
2316
|
+
static fromJsonString(jsonStr) {
|
|
2317
|
+
let parsed;
|
|
2318
|
+
try {
|
|
2319
|
+
parsed = JSON.parse(jsonStr);
|
|
2320
|
+
} catch (error) {
|
|
2321
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2322
|
+
`Failed to parse JSON: ${error instanceof Error ? error.message : String(error)}`
|
|
2323
|
+
);
|
|
2324
|
+
}
|
|
2325
|
+
if (!isRecord(parsed)) {
|
|
2326
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Invalid JSON: expected object (not array)");
|
|
2327
|
+
}
|
|
2328
|
+
const obj = parsed;
|
|
2329
|
+
if (!isStringArray(obj["columns"])) {
|
|
2330
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2331
|
+
'Invalid JSON: missing or invalid "columns" field (expected array)'
|
|
2332
|
+
);
|
|
2333
|
+
}
|
|
2334
|
+
if (!isIndexLabelArray(obj["index"])) {
|
|
2335
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2336
|
+
'Invalid JSON: missing or invalid "index" field (expected array)'
|
|
2337
|
+
);
|
|
2338
|
+
}
|
|
2339
|
+
if (!isRecord(obj["data"])) {
|
|
2340
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2341
|
+
'Invalid JSON: missing or invalid "data" field (expected object)'
|
|
2342
|
+
);
|
|
2343
|
+
}
|
|
2344
|
+
const columns = obj["columns"];
|
|
2345
|
+
const index = obj["index"];
|
|
2346
|
+
const rawData = obj["data"];
|
|
2347
|
+
ensureUniqueLabels(columns, "column name");
|
|
2348
|
+
const dataKeys = Object.keys(rawData);
|
|
2349
|
+
for (const col of columns) {
|
|
2350
|
+
if (!(col in rawData)) {
|
|
2351
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Missing data for column '${col}'`);
|
|
2352
|
+
}
|
|
2353
|
+
}
|
|
2354
|
+
for (const key of dataKeys) {
|
|
2355
|
+
if (!columns.includes(key)) {
|
|
2356
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Unexpected data column '${key}' not listed in columns`);
|
|
2357
|
+
}
|
|
2358
|
+
}
|
|
2359
|
+
const data = {};
|
|
2360
|
+
for (const [key, value] of Object.entries(rawData)) {
|
|
2361
|
+
if (!Array.isArray(value)) {
|
|
2362
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Invalid data for column '${key}': expected array`);
|
|
2363
|
+
}
|
|
2364
|
+
data[key] = value;
|
|
2365
|
+
}
|
|
2366
|
+
return new _DataFrame(data, {
|
|
2367
|
+
columns,
|
|
2368
|
+
index
|
|
2369
|
+
});
|
|
2370
|
+
}
|
|
2371
|
+
/**
|
|
2372
|
+
* Read JSON file - environment-aware.
|
|
2373
|
+
* Time complexity: O(n) for file read + O(m) for parsing.
|
|
2374
|
+
*/
|
|
2375
|
+
static async readJson(path) {
|
|
2376
|
+
let jsonString;
|
|
2377
|
+
if (typeof process !== "undefined" && process.versions?.node) {
|
|
2378
|
+
try {
|
|
2379
|
+
const fs = await import('fs/promises');
|
|
2380
|
+
jsonString = await fs.readFile(path, "utf-8");
|
|
2381
|
+
} catch (error) {
|
|
2382
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2383
|
+
`Failed to read JSON file: ${error instanceof Error ? error.message : String(error)}`
|
|
2384
|
+
);
|
|
2385
|
+
}
|
|
2386
|
+
} else if (typeof fetch !== "undefined") {
|
|
2387
|
+
try {
|
|
2388
|
+
const response = await fetch(path);
|
|
2389
|
+
if (!response.ok) {
|
|
2390
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`HTTP ${response.status}: ${response.statusText}`);
|
|
2391
|
+
}
|
|
2392
|
+
jsonString = await response.text();
|
|
2393
|
+
} catch (error) {
|
|
2394
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2395
|
+
`Failed to fetch JSON: ${error instanceof Error ? error.message : String(error)}`
|
|
2396
|
+
);
|
|
2397
|
+
}
|
|
2398
|
+
} else {
|
|
2399
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Environment not supported");
|
|
2400
|
+
}
|
|
2401
|
+
return _DataFrame.fromJsonString(jsonString);
|
|
2402
|
+
}
|
|
2403
|
+
/**
|
|
2404
|
+
* Write DataFrame to JSON file - environment-aware.
|
|
2405
|
+
* Time complexity: O(n × m) for generation + O(k) for write.
|
|
2406
|
+
*/
|
|
2407
|
+
async toJson(path) {
|
|
2408
|
+
const jsonString = this.toJsonString();
|
|
2409
|
+
if (typeof process !== "undefined" && process.versions?.node) {
|
|
2410
|
+
try {
|
|
2411
|
+
const fs = await import('fs/promises');
|
|
2412
|
+
await fs.writeFile(path, jsonString, "utf-8");
|
|
2413
|
+
} catch (error) {
|
|
2414
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2415
|
+
`Failed to write JSON file: ${error instanceof Error ? error.message : String(error)}`
|
|
2416
|
+
);
|
|
2417
|
+
}
|
|
2418
|
+
} else if (typeof document !== "undefined" && typeof URL !== "undefined") {
|
|
2419
|
+
const blob = new Blob([jsonString], {
|
|
2420
|
+
type: "application/json;charset=utf-8;"
|
|
2421
|
+
});
|
|
2422
|
+
const url = URL.createObjectURL(blob);
|
|
2423
|
+
const link = document.createElement("a");
|
|
2424
|
+
link.href = url;
|
|
2425
|
+
link.download = path;
|
|
2426
|
+
link.style.display = "none";
|
|
2427
|
+
document.body.appendChild(link);
|
|
2428
|
+
link.click();
|
|
2429
|
+
document.body.removeChild(link);
|
|
2430
|
+
URL.revokeObjectURL(url);
|
|
2431
|
+
} else {
|
|
2432
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Environment not supported");
|
|
2433
|
+
}
|
|
2434
|
+
}
|
|
2435
|
+
/**
|
|
2436
|
+
* Create DataFrame from a Tensor.
|
|
2437
|
+
*
|
|
2438
|
+
* @param tensor - Tensor to convert (must be 1D or 2D)
|
|
2439
|
+
* @param columns - Column names (optional). If provided, length must match tensor columns.
|
|
2440
|
+
* @returns DataFrame
|
|
2441
|
+
*
|
|
2442
|
+
* @example
|
|
2443
|
+
* ```ts
|
|
2444
|
+
* import { tensor } from 'deepbox/ndarray';
|
|
2445
|
+
*
|
|
2446
|
+
* const t = tensor([[1, 2], [3, 4], [5, 6]]);
|
|
2447
|
+
* const df = DataFrame.fromTensor(t, ['col1', 'col2']);
|
|
2448
|
+
* ```
|
|
2449
|
+
*/
|
|
2450
|
+
static fromTensor(tensor2, columns) {
|
|
2451
|
+
const storage = tensor2.data;
|
|
2452
|
+
let data;
|
|
2453
|
+
if (storage instanceof BigInt64Array) {
|
|
2454
|
+
data = Array.from(storage, (v) => Number(v));
|
|
2455
|
+
} else if (ArrayBuffer.isView(storage)) {
|
|
2456
|
+
data = Array.from(storage, (v) => Number(v));
|
|
2457
|
+
} else if (Array.isArray(storage)) {
|
|
2458
|
+
data = [...storage];
|
|
2459
|
+
} else {
|
|
2460
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Unsupported tensor storage type");
|
|
2461
|
+
}
|
|
2462
|
+
if (tensor2.ndim === 1) {
|
|
2463
|
+
if (columns && columns.length !== 1) {
|
|
2464
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2465
|
+
`Expected exactly 1 column name for 1D tensor, received ${columns.length}`
|
|
2466
|
+
);
|
|
2467
|
+
}
|
|
2468
|
+
const colName = columns?.[0] ?? "col0";
|
|
2469
|
+
return new _DataFrame({ [colName]: data });
|
|
2470
|
+
}
|
|
2471
|
+
if (tensor2.ndim === 2) {
|
|
2472
|
+
const rows = tensor2.shape[0];
|
|
2473
|
+
const cols = tensor2.shape[1];
|
|
2474
|
+
if (rows === void 0 || cols === void 0) {
|
|
2475
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Invalid tensor shape");
|
|
2476
|
+
}
|
|
2477
|
+
if (columns && columns.length !== cols) {
|
|
2478
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2479
|
+
`Column count (${columns.length}) must match tensor columns (${cols})`
|
|
2480
|
+
);
|
|
2481
|
+
}
|
|
2482
|
+
const dfData = {};
|
|
2483
|
+
for (let c = 0; c < cols; c++) {
|
|
2484
|
+
const colName = columns?.[c] ?? `col${c}`;
|
|
2485
|
+
const colData = [];
|
|
2486
|
+
for (let r = 0; r < rows; r++) {
|
|
2487
|
+
colData.push(data[r * cols + c]);
|
|
2488
|
+
}
|
|
2489
|
+
dfData[colName] = colData;
|
|
2490
|
+
}
|
|
2491
|
+
return new _DataFrame(dfData, {
|
|
2492
|
+
columns: columns ?? Array.from({ length: cols }, (_, i) => `col${i}`)
|
|
2493
|
+
});
|
|
2494
|
+
}
|
|
2495
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
2496
|
+
`Cannot create DataFrame from ${tensor2.ndim}D tensor. Only 1D and 2D tensors supported.`
|
|
2497
|
+
);
|
|
2498
|
+
}
|
|
2499
|
+
/**
|
|
2500
|
+
* Remove duplicate rows from DataFrame.
|
|
2501
|
+
* Time complexity: O(n × m) where n is rows, m is columns.
|
|
2502
|
+
*
|
|
2503
|
+
* @param subset - Columns to consider for identifying duplicates (default: all columns)
|
|
2504
|
+
* @param keep - Which duplicates to keep: 'first', 'last', or false (remove all)
|
|
2505
|
+
* @returns New DataFrame with duplicates removed
|
|
2506
|
+
*
|
|
2507
|
+
* @example
|
|
2508
|
+
* ```ts
|
|
2509
|
+
* const df = new DataFrame({ a: [1, 1, 2], b: [3, 3, 4] });
|
|
2510
|
+
* df.drop_duplicates(); // Keeps first occurrence: [[1, 3], [2, 4]]
|
|
2511
|
+
* df.drop_duplicates(undefined, 'last'); // Keeps last occurrence
|
|
2512
|
+
* ```
|
|
2513
|
+
*/
|
|
2514
|
+
drop_duplicates(subset, keep = "first") {
|
|
2515
|
+
const checkCols = subset ?? this._columns;
|
|
2516
|
+
for (const col of checkCols) {
|
|
2517
|
+
if (!this._columns.includes(col)) {
|
|
2518
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${col}' not found in DataFrame`);
|
|
2519
|
+
}
|
|
2520
|
+
}
|
|
2521
|
+
const seen = /* @__PURE__ */ new Map();
|
|
2522
|
+
const keepIndices = [];
|
|
2523
|
+
for (let i = 0; i < this._index.length; i++) {
|
|
2524
|
+
const signature = [];
|
|
2525
|
+
for (const col of checkCols) {
|
|
2526
|
+
signature.push(this._data.get(col)?.[i]);
|
|
2527
|
+
}
|
|
2528
|
+
const key = createKey(signature);
|
|
2529
|
+
const existing = seen.get(key);
|
|
2530
|
+
if (existing === void 0) {
|
|
2531
|
+
seen.set(key, [i]);
|
|
2532
|
+
} else {
|
|
2533
|
+
existing.push(i);
|
|
2534
|
+
}
|
|
2535
|
+
}
|
|
2536
|
+
for (const [_key, indices] of seen.entries()) {
|
|
2537
|
+
if (keep === "first") {
|
|
2538
|
+
const firstIndex = indices[0];
|
|
2539
|
+
if (firstIndex !== void 0) {
|
|
2540
|
+
keepIndices.push(firstIndex);
|
|
2541
|
+
}
|
|
2542
|
+
} else if (keep === "last") {
|
|
2543
|
+
const lastIndex = indices[indices.length - 1];
|
|
2544
|
+
if (lastIndex !== void 0) {
|
|
2545
|
+
keepIndices.push(lastIndex);
|
|
2546
|
+
}
|
|
2547
|
+
} else if (keep === false && indices.length === 1) {
|
|
2548
|
+
const onlyIndex = indices[0];
|
|
2549
|
+
if (onlyIndex !== void 0) {
|
|
2550
|
+
keepIndices.push(onlyIndex);
|
|
2551
|
+
}
|
|
2552
|
+
}
|
|
2553
|
+
}
|
|
2554
|
+
keepIndices.sort((a, b) => a - b);
|
|
2555
|
+
const newData = {};
|
|
2556
|
+
const newIndex = [];
|
|
2557
|
+
for (const col of this._columns) {
|
|
2558
|
+
newData[col] = [];
|
|
2559
|
+
}
|
|
2560
|
+
for (const idx of keepIndices) {
|
|
2561
|
+
const label = this._index[idx];
|
|
2562
|
+
if (label === void 0) {
|
|
2563
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Index label at position ${idx} is undefined`);
|
|
2564
|
+
}
|
|
2565
|
+
newIndex.push(label);
|
|
2566
|
+
for (const col of this._columns) {
|
|
2567
|
+
newData[col]?.push(this._data.get(col)?.[idx]);
|
|
2568
|
+
}
|
|
2569
|
+
}
|
|
2570
|
+
return new _DataFrame(newData, {
|
|
2571
|
+
columns: this._columns,
|
|
2572
|
+
index: newIndex
|
|
2573
|
+
});
|
|
2574
|
+
}
|
|
2575
|
+
/**
|
|
2576
|
+
* Return boolean Series indicating duplicate rows.
|
|
2577
|
+
* Time complexity: O(n × m).
|
|
2578
|
+
*
|
|
2579
|
+
* @param subset - Columns to consider for identifying duplicates
|
|
2580
|
+
* @param keep - Which duplicates to mark as False: 'first', 'last', or false (mark all)
|
|
2581
|
+
* @returns Series of booleans (true = duplicate, false = unique)
|
|
2582
|
+
*
|
|
2583
|
+
* @example
|
|
2584
|
+
* ```ts
|
|
2585
|
+
* const df = new DataFrame({ a: [1, 1, 2], b: [3, 3, 4] });
|
|
2586
|
+
* df.duplicated(); // Series([false, true, false])
|
|
2587
|
+
* ```
|
|
2588
|
+
*/
|
|
2589
|
+
duplicated(subset, keep = "first") {
|
|
2590
|
+
const checkCols = subset ?? this._columns;
|
|
2591
|
+
for (const col of checkCols) {
|
|
2592
|
+
if (!this._columns.includes(col)) {
|
|
2593
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${col}' not found in DataFrame`);
|
|
2594
|
+
}
|
|
2595
|
+
}
|
|
2596
|
+
const seen = /* @__PURE__ */ new Map();
|
|
2597
|
+
for (let i = 0; i < this._index.length; i++) {
|
|
2598
|
+
const signature = [];
|
|
2599
|
+
for (const col of checkCols) {
|
|
2600
|
+
signature.push(this._data.get(col)?.[i]);
|
|
2601
|
+
}
|
|
2602
|
+
const key = createKey(signature);
|
|
2603
|
+
const existing = seen.get(key);
|
|
2604
|
+
if (existing === void 0) {
|
|
2605
|
+
seen.set(key, [i]);
|
|
2606
|
+
} else {
|
|
2607
|
+
existing.push(i);
|
|
2608
|
+
}
|
|
2609
|
+
}
|
|
2610
|
+
const isDuplicate = new Array(this._index.length).fill(false);
|
|
2611
|
+
for (const [_key, indices] of seen.entries()) {
|
|
2612
|
+
if (indices.length > 1) {
|
|
2613
|
+
if (keep === "first") {
|
|
2614
|
+
for (let i = 1; i < indices.length; i++) {
|
|
2615
|
+
const idx = indices[i];
|
|
2616
|
+
if (idx !== void 0) isDuplicate[idx] = true;
|
|
2617
|
+
}
|
|
2618
|
+
} else if (keep === "last") {
|
|
2619
|
+
for (let i = 0; i < indices.length - 1; i++) {
|
|
2620
|
+
const idx = indices[i];
|
|
2621
|
+
if (idx !== void 0) isDuplicate[idx] = true;
|
|
2622
|
+
}
|
|
2623
|
+
} else if (keep === false) {
|
|
2624
|
+
for (const idx of indices) {
|
|
2625
|
+
isDuplicate[idx] = true;
|
|
2626
|
+
}
|
|
2627
|
+
}
|
|
2628
|
+
}
|
|
2629
|
+
}
|
|
2630
|
+
return new Series(isDuplicate, { index: this._index });
|
|
2631
|
+
}
|
|
2632
|
+
/**
|
|
2633
|
+
* Rename columns or index labels.
|
|
2634
|
+
* Time complexity: O(m) for columns, O(n) for index.
|
|
2635
|
+
*
|
|
2636
|
+
* @param mapper - Object mapping old names to new names, or function to transform names
|
|
2637
|
+
* @param axis - 0 for index, 1 for columns
|
|
2638
|
+
* @returns New DataFrame with renamed labels
|
|
2639
|
+
*
|
|
2640
|
+
* @example
|
|
2641
|
+
* ```ts
|
|
2642
|
+
* const df = new DataFrame({ a: [1, 2], b: [3, 4] });
|
|
2643
|
+
* df.rename({ a: 'x', b: 'y' }, 1); // Rename columns a->x, b->y
|
|
2644
|
+
* df.rename((name) => name.toUpperCase(), 1); // Uppercase all column names
|
|
2645
|
+
* ```
|
|
2646
|
+
*/
|
|
2647
|
+
rename(mapper, axis = 1) {
|
|
2648
|
+
if (axis === 1) {
|
|
2649
|
+
const newColumns = this._columns.map((col) => {
|
|
2650
|
+
if (typeof mapper === "function") {
|
|
2651
|
+
return mapper(col);
|
|
2652
|
+
}
|
|
2653
|
+
return mapper[col] ?? col;
|
|
2654
|
+
});
|
|
2655
|
+
const newData = {};
|
|
2656
|
+
for (let i = 0; i < this._columns.length; i++) {
|
|
2657
|
+
const oldCol = this._columns[i];
|
|
2658
|
+
const newCol = newColumns[i];
|
|
2659
|
+
if (oldCol && newCol) {
|
|
2660
|
+
const colData = this._data.get(oldCol);
|
|
2661
|
+
if (colData) {
|
|
2662
|
+
newData[newCol] = [...colData];
|
|
2663
|
+
}
|
|
2664
|
+
}
|
|
2665
|
+
}
|
|
2666
|
+
return new _DataFrame(newData, {
|
|
2667
|
+
columns: newColumns,
|
|
2668
|
+
index: this._index
|
|
2669
|
+
});
|
|
2670
|
+
} else {
|
|
2671
|
+
const newIndex = this._index.map((label) => {
|
|
2672
|
+
const labelStr = String(label);
|
|
2673
|
+
if (typeof mapper === "function") {
|
|
2674
|
+
return mapper(labelStr);
|
|
2675
|
+
}
|
|
2676
|
+
return mapper[labelStr] ?? label;
|
|
2677
|
+
});
|
|
2678
|
+
const newData = {};
|
|
2679
|
+
for (const col of this._columns) {
|
|
2680
|
+
const colData = this._data.get(col);
|
|
2681
|
+
if (colData) {
|
|
2682
|
+
newData[col] = [...colData];
|
|
2683
|
+
}
|
|
2684
|
+
}
|
|
2685
|
+
return new _DataFrame(newData, {
|
|
2686
|
+
columns: this._columns,
|
|
2687
|
+
index: newIndex
|
|
2688
|
+
});
|
|
2689
|
+
}
|
|
2690
|
+
}
|
|
2691
|
+
/**
|
|
2692
|
+
* Reset index to default integer index.
|
|
2693
|
+
* Time complexity: O(n).
|
|
2694
|
+
*
|
|
2695
|
+
* @param drop - If true, don't add old index as column.
|
|
2696
|
+
* If a column named "index" already exists, the new column will be
|
|
2697
|
+
* named "index_1", "index_2", etc.
|
|
2698
|
+
* @returns New DataFrame with reset index
|
|
2699
|
+
*
|
|
2700
|
+
* @example
|
|
2701
|
+
* ```ts
|
|
2702
|
+
* const df = new DataFrame({ a: [1, 2] }, { index: ['x', 'y'] });
|
|
2703
|
+
* df.reset_index(); // Index becomes [0, 1], adds 'index' column with ['x', 'y']
|
|
2704
|
+
* df.reset_index(true); // Index becomes [0, 1], no new column
|
|
2705
|
+
* ```
|
|
2706
|
+
*/
|
|
2707
|
+
reset_index(drop = false) {
|
|
2708
|
+
const newData = {};
|
|
2709
|
+
let indexName = "index";
|
|
2710
|
+
if (!drop) {
|
|
2711
|
+
if (this._columns.includes(indexName)) {
|
|
2712
|
+
let suffix = 1;
|
|
2713
|
+
while (this._columns.includes(`${indexName}_${suffix}`)) {
|
|
2714
|
+
suffix++;
|
|
2715
|
+
}
|
|
2716
|
+
indexName = `${indexName}_${suffix}`;
|
|
2717
|
+
}
|
|
2718
|
+
newData[indexName] = [...this._index];
|
|
2719
|
+
}
|
|
2720
|
+
for (const col of this._columns) {
|
|
2721
|
+
const colData = this._data.get(col);
|
|
2722
|
+
if (colData) {
|
|
2723
|
+
newData[col] = [...colData];
|
|
2724
|
+
}
|
|
2725
|
+
}
|
|
2726
|
+
const newColumns = drop ? this._columns : [indexName, ...this._columns];
|
|
2727
|
+
return new _DataFrame(newData, {
|
|
2728
|
+
columns: newColumns,
|
|
2729
|
+
index: Array.from({ length: this._index.length }, (_, i) => i)
|
|
2730
|
+
});
|
|
2731
|
+
}
|
|
2732
|
+
/**
|
|
2733
|
+
* Set a column as the index.
|
|
2734
|
+
* Time complexity: O(n).
|
|
2735
|
+
*
|
|
2736
|
+
* @param column - Column name to use as index
|
|
2737
|
+
* @param drop - If true, remove the column after setting it as index
|
|
2738
|
+
* @returns New DataFrame with new index
|
|
2739
|
+
*
|
|
2740
|
+
* @example
|
|
2741
|
+
* ```ts
|
|
2742
|
+
* const df = new DataFrame({ id: ['a', 'b', 'c'], value: [1, 2, 3] });
|
|
2743
|
+
* df.set_index('id'); // Index becomes ['a', 'b', 'c']
|
|
2744
|
+
* ```
|
|
2745
|
+
*/
|
|
2746
|
+
set_index(column, drop = true) {
|
|
2747
|
+
if (!this._columns.includes(column)) {
|
|
2748
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
2749
|
+
`Column '${column}' not found in DataFrame`,
|
|
2750
|
+
"column",
|
|
2751
|
+
column
|
|
2752
|
+
);
|
|
2753
|
+
}
|
|
2754
|
+
const newIndexData = this._data.get(column);
|
|
2755
|
+
if (!newIndexData) {
|
|
2756
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${column}' has no data`);
|
|
2757
|
+
}
|
|
2758
|
+
const newIndex = newIndexData.map(
|
|
2759
|
+
(v) => typeof v === "string" || typeof v === "number" ? v : String(v)
|
|
2760
|
+
);
|
|
2761
|
+
const newData = {};
|
|
2762
|
+
const newColumns = [];
|
|
2763
|
+
for (const col of this._columns) {
|
|
2764
|
+
if (col === column && drop) continue;
|
|
2765
|
+
const colData = this._data.get(col);
|
|
2766
|
+
if (colData) {
|
|
2767
|
+
newData[col] = [...colData];
|
|
2768
|
+
newColumns.push(col);
|
|
2769
|
+
}
|
|
2770
|
+
}
|
|
2771
|
+
return new _DataFrame(newData, {
|
|
2772
|
+
columns: newColumns,
|
|
2773
|
+
index: newIndex
|
|
2774
|
+
});
|
|
2775
|
+
}
|
|
2776
|
+
/**
|
|
2777
|
+
* Return boolean DataFrame showing null values.
|
|
2778
|
+
* Time complexity: O(n × m).
|
|
2779
|
+
*
|
|
2780
|
+
* @returns DataFrame of booleans (true = null/undefined, false = not null)
|
|
2781
|
+
*
|
|
2782
|
+
* @example
|
|
2783
|
+
* ```ts
|
|
2784
|
+
* const df = new DataFrame({ a: [1, null, 3], b: [4, 5, undefined] });
|
|
2785
|
+
* df.isnull(); // [[false, false], [true, false], [false, true]]
|
|
2786
|
+
* ```
|
|
2787
|
+
*/
|
|
2788
|
+
isnull() {
|
|
2789
|
+
const newData = {};
|
|
2790
|
+
for (const col of this._columns) {
|
|
2791
|
+
const colData = this._data.get(col);
|
|
2792
|
+
if (colData) {
|
|
2793
|
+
newData[col] = colData.map((v) => v === null || v === void 0);
|
|
2794
|
+
}
|
|
2795
|
+
}
|
|
2796
|
+
return new _DataFrame(newData, {
|
|
2797
|
+
columns: this._columns,
|
|
2798
|
+
index: this._index
|
|
2799
|
+
});
|
|
2800
|
+
}
|
|
2801
|
+
/**
|
|
2802
|
+
* Return boolean DataFrame showing non-null values.
|
|
2803
|
+
* Time complexity: O(n × m).
|
|
2804
|
+
*
|
|
2805
|
+
* @returns DataFrame of booleans (true = not null, false = null/undefined)
|
|
2806
|
+
*
|
|
2807
|
+
* @example
|
|
2808
|
+
* ```ts
|
|
2809
|
+
* const df = new DataFrame({ a: [1, null, 3], b: [4, 5, undefined] });
|
|
2810
|
+
* df.notnull(); // [[true, true], [false, true], [true, false]]
|
|
2811
|
+
* ```
|
|
2812
|
+
*/
|
|
2813
|
+
notnull() {
|
|
2814
|
+
const newData = {};
|
|
2815
|
+
for (const col of this._columns) {
|
|
2816
|
+
const colData = this._data.get(col);
|
|
2817
|
+
if (colData) {
|
|
2818
|
+
newData[col] = colData.map((v) => v !== null && v !== void 0);
|
|
2819
|
+
}
|
|
2820
|
+
}
|
|
2821
|
+
return new _DataFrame(newData, {
|
|
2822
|
+
columns: this._columns,
|
|
2823
|
+
index: this._index
|
|
2824
|
+
});
|
|
2825
|
+
}
|
|
2826
|
+
/**
|
|
2827
|
+
* Replace values in DataFrame.
|
|
2828
|
+
* Time complexity: O(n × m).
|
|
2829
|
+
*
|
|
2830
|
+
* @param toReplace - Value or array of values to replace
|
|
2831
|
+
* @param value - Replacement value
|
|
2832
|
+
* @returns New DataFrame with replaced values
|
|
2833
|
+
*
|
|
2834
|
+
* @example
|
|
2835
|
+
* ```ts
|
|
2836
|
+
* const df = new DataFrame({ a: [1, 2, 3], b: [4, 5, 6] });
|
|
2837
|
+
* df.replace(2, 99); // Replace all 2s with 99
|
|
2838
|
+
* df.replace([1, 2], 0); // Replace 1s and 2s with 0
|
|
2839
|
+
* ```
|
|
2840
|
+
*/
|
|
2841
|
+
replace(toReplace, value) {
|
|
2842
|
+
const replaceSet = new Set(Array.isArray(toReplace) ? toReplace : [toReplace]);
|
|
2843
|
+
const newData = {};
|
|
2844
|
+
for (const col of this._columns) {
|
|
2845
|
+
const colData = this._data.get(col);
|
|
2846
|
+
if (colData) {
|
|
2847
|
+
newData[col] = colData.map((v) => replaceSet.has(v) ? value : v);
|
|
2848
|
+
}
|
|
2849
|
+
}
|
|
2850
|
+
return new _DataFrame(newData, {
|
|
2851
|
+
columns: this._columns,
|
|
2852
|
+
index: this._index
|
|
2853
|
+
});
|
|
2854
|
+
}
|
|
2855
|
+
/**
|
|
2856
|
+
* Clip (limit) values in a range.
|
|
2857
|
+
* Time complexity: O(n × m).
|
|
2858
|
+
*
|
|
2859
|
+
* @param lower - Minimum value (values below are set to this)
|
|
2860
|
+
* @param upper - Maximum value (values above are set to this)
|
|
2861
|
+
* @returns New DataFrame with clipped values
|
|
2862
|
+
*
|
|
2863
|
+
* @example
|
|
2864
|
+
* ```ts
|
|
2865
|
+
* const df = new DataFrame({ a: [1, 5, 10], b: [2, 8, 15] });
|
|
2866
|
+
* df.clip(3, 9); // [[3, 3], [5, 8], [9, 9]]
|
|
2867
|
+
* ```
|
|
2868
|
+
*/
|
|
2869
|
+
clip(lower, upper) {
|
|
2870
|
+
const newData = {};
|
|
2871
|
+
for (const col of this._columns) {
|
|
2872
|
+
const colData = this._data.get(col);
|
|
2873
|
+
if (colData) {
|
|
2874
|
+
newData[col] = colData.map((v) => {
|
|
2875
|
+
if (typeof v !== "number") return v;
|
|
2876
|
+
let result = v;
|
|
2877
|
+
if (lower !== void 0 && result < lower) result = lower;
|
|
2878
|
+
if (upper !== void 0 && result > upper) result = upper;
|
|
2879
|
+
return result;
|
|
2880
|
+
});
|
|
2881
|
+
}
|
|
2882
|
+
}
|
|
2883
|
+
return new _DataFrame(newData, {
|
|
2884
|
+
columns: this._columns,
|
|
2885
|
+
index: this._index
|
|
2886
|
+
});
|
|
2887
|
+
}
|
|
2888
|
+
/**
|
|
2889
|
+
* Return a random sample of rows.
|
|
2890
|
+
* Time complexity: O(n) for sampling.
|
|
2891
|
+
*
|
|
2892
|
+
* @param n - Number of rows to sample
|
|
2893
|
+
* @param random_state - Random seed for reproducibility
|
|
2894
|
+
* @returns New DataFrame with sampled rows
|
|
2895
|
+
*
|
|
2896
|
+
* @example
|
|
2897
|
+
* ```ts
|
|
2898
|
+
* const df = new DataFrame({ a: [1, 2, 3, 4, 5], b: [6, 7, 8, 9, 10] });
|
|
2899
|
+
* df.sample(3); // Random 3 rows
|
|
2900
|
+
* ```
|
|
2901
|
+
*/
|
|
2902
|
+
sample(n, random_state) {
|
|
2903
|
+
if (!Number.isFinite(n) || !Number.isInteger(n)) {
|
|
2904
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("n must be a finite integer", "n", n);
|
|
2905
|
+
}
|
|
2906
|
+
if (random_state !== void 0) {
|
|
2907
|
+
if (!Number.isFinite(random_state) || !Number.isInteger(random_state)) {
|
|
2908
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
2909
|
+
"random_state must be a finite integer",
|
|
2910
|
+
"random_state",
|
|
2911
|
+
random_state
|
|
2912
|
+
);
|
|
2913
|
+
}
|
|
2914
|
+
}
|
|
2915
|
+
if (n < 0 || n > this._index.length) {
|
|
2916
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Sample size ${n} must be between 0 and ${this._index.length}`);
|
|
2917
|
+
}
|
|
2918
|
+
const rng = random_state !== void 0 ? this.seededRandom(random_state) : Math.random;
|
|
2919
|
+
const indices = Array.from({ length: this._index.length }, (_, i) => i);
|
|
2920
|
+
for (let i = indices.length - 1; i > 0; i--) {
|
|
2921
|
+
const j = Math.floor(rng() * (i + 1));
|
|
2922
|
+
const current = indices[i];
|
|
2923
|
+
const swap = indices[j];
|
|
2924
|
+
if (current === void 0 || swap === void 0) {
|
|
2925
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Sample index resolution failed");
|
|
2926
|
+
}
|
|
2927
|
+
indices[i] = swap;
|
|
2928
|
+
indices[j] = current;
|
|
2929
|
+
}
|
|
2930
|
+
const sampledIndices = indices.slice(0, n);
|
|
2931
|
+
const newData = {};
|
|
2932
|
+
const newIndex = [];
|
|
2933
|
+
for (const col of this._columns) {
|
|
2934
|
+
newData[col] = [];
|
|
2935
|
+
}
|
|
2936
|
+
for (const idx of sampledIndices) {
|
|
2937
|
+
const label = this._index[idx];
|
|
2938
|
+
if (label === void 0) {
|
|
2939
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Index label at position ${idx} is undefined`);
|
|
2940
|
+
}
|
|
2941
|
+
newIndex.push(label);
|
|
2942
|
+
for (const col of this._columns) {
|
|
2943
|
+
newData[col]?.push(this._data.get(col)?.[idx]);
|
|
2944
|
+
}
|
|
2945
|
+
}
|
|
2946
|
+
return new _DataFrame(newData, {
|
|
2947
|
+
columns: this._columns,
|
|
2948
|
+
index: newIndex
|
|
2949
|
+
});
|
|
2950
|
+
}
|
|
2951
|
+
/**
|
|
2952
|
+
* Seeded random number generator for reproducibility.
|
|
2953
|
+
* @private
|
|
2954
|
+
*/
|
|
2955
|
+
seededRandom(seed) {
|
|
2956
|
+
let state = seed >>> 0;
|
|
2957
|
+
return () => {
|
|
2958
|
+
state = (state * 1664525 + 1013904223) % 2 ** 32;
|
|
2959
|
+
return state / 2 ** 32;
|
|
2960
|
+
};
|
|
2961
|
+
}
|
|
2962
|
+
/**
|
|
2963
|
+
* Return values at the given quantile.
|
|
2964
|
+
* Time complexity: O(n log n) per column due to sorting.
|
|
2965
|
+
*
|
|
2966
|
+
* @param q - Quantile to compute (0 to 1)
|
|
2967
|
+
* @returns Series with quantile values for each numeric column
|
|
2968
|
+
*
|
|
2969
|
+
* @example
|
|
2970
|
+
* ```ts
|
|
2971
|
+
* const df = new DataFrame({ a: [1, 2, 3, 4, 5], b: [10, 20, 30, 40, 50] });
|
|
2972
|
+
* df.quantile(0.5); // Median: Series({ a: 3, b: 30 })
|
|
2973
|
+
* df.quantile(0.25); // 25th percentile
|
|
2974
|
+
* ```
|
|
2975
|
+
*/
|
|
2976
|
+
quantile(q) {
|
|
2977
|
+
if (!Number.isFinite(q) || q < 0 || q > 1) {
|
|
2978
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("q must be a finite number between 0 and 1", "q", q);
|
|
2979
|
+
}
|
|
2980
|
+
const result = [];
|
|
2981
|
+
const resultIndex = [];
|
|
2982
|
+
for (const col of this._columns) {
|
|
2983
|
+
const colData = this._data.get(col);
|
|
2984
|
+
if (!colData) continue;
|
|
2985
|
+
const numericData = toNumericValues(colData);
|
|
2986
|
+
if (numericData.length === 0) {
|
|
2987
|
+
result.push(NaN);
|
|
2988
|
+
resultIndex.push(col);
|
|
2989
|
+
continue;
|
|
2990
|
+
}
|
|
2991
|
+
const sorted = [...numericData].sort((a, b) => a - b);
|
|
2992
|
+
const idx = q * (sorted.length - 1);
|
|
2993
|
+
const lower = Math.floor(idx);
|
|
2994
|
+
const upper = Math.ceil(idx);
|
|
2995
|
+
const weight = idx - lower;
|
|
2996
|
+
const value = (sorted[lower] ?? 0) * (1 - weight) + (sorted[upper] ?? 0) * weight;
|
|
2997
|
+
result.push(value);
|
|
2998
|
+
resultIndex.push(col);
|
|
2999
|
+
}
|
|
3000
|
+
return new Series(result, { index: resultIndex });
|
|
3001
|
+
}
|
|
3002
|
+
/**
|
|
3003
|
+
* Compute numerical rank of values (1 through n) along axis.
|
|
3004
|
+
* Time complexity: O(n log n) per column.
|
|
3005
|
+
*
|
|
3006
|
+
* @param method - How to rank ties: 'average', 'min', 'max', 'first', 'dense'
|
|
3007
|
+
* @param ascending - Rank in ascending order
|
|
3008
|
+
* @returns New DataFrame with ranks
|
|
3009
|
+
*
|
|
3010
|
+
* @example
|
|
3011
|
+
* ```ts
|
|
3012
|
+
* const df = new DataFrame({ a: [3, 1, 2, 1] });
|
|
3013
|
+
* df.rank(); // [[4], [1.5], [3], [1.5]] (average method)
|
|
3014
|
+
* df.rank('min'); // [[4], [1], [3], [1]]
|
|
3015
|
+
* ```
|
|
3016
|
+
*/
|
|
3017
|
+
rank(method = "average", ascending = true) {
|
|
3018
|
+
const newData = {};
|
|
3019
|
+
for (const col of this._columns) {
|
|
3020
|
+
const colData = this._data.get(col);
|
|
3021
|
+
if (!colData) continue;
|
|
3022
|
+
const numericData = toNumericValues(colData);
|
|
3023
|
+
if (numericData.length === 0) {
|
|
3024
|
+
newData[col] = colData.map(() => null);
|
|
3025
|
+
continue;
|
|
3026
|
+
}
|
|
3027
|
+
const indexed = colData.map((v, i2) => ({ value: v, index: i2 }));
|
|
3028
|
+
const numericIndexed = indexed.filter(
|
|
3029
|
+
(item) => isValidNumber(item.value)
|
|
3030
|
+
);
|
|
3031
|
+
numericIndexed.sort((a, b) => {
|
|
3032
|
+
if (ascending) {
|
|
3033
|
+
return a.value - b.value;
|
|
3034
|
+
}
|
|
3035
|
+
return b.value - a.value;
|
|
3036
|
+
});
|
|
3037
|
+
const ranks = new Array(colData.length).fill(null);
|
|
3038
|
+
let i = 0;
|
|
3039
|
+
let denseRank = 0;
|
|
3040
|
+
while (i < numericIndexed.length) {
|
|
3041
|
+
const currentItem = numericIndexed[i];
|
|
3042
|
+
if (!currentItem) {
|
|
3043
|
+
break;
|
|
3044
|
+
}
|
|
3045
|
+
const currentValue = currentItem.value;
|
|
3046
|
+
const tieStart = i;
|
|
3047
|
+
while (i < numericIndexed.length) {
|
|
3048
|
+
const nextItem = numericIndexed[i];
|
|
3049
|
+
if (!nextItem || nextItem.value !== currentValue) {
|
|
3050
|
+
break;
|
|
3051
|
+
}
|
|
3052
|
+
i++;
|
|
3053
|
+
}
|
|
3054
|
+
const tieEnd = i;
|
|
3055
|
+
denseRank++;
|
|
3056
|
+
for (let j = tieStart; j < tieEnd; j++) {
|
|
3057
|
+
const item = numericIndexed[j];
|
|
3058
|
+
if (!item) continue;
|
|
3059
|
+
let rank;
|
|
3060
|
+
if (method === "average") {
|
|
3061
|
+
rank = (tieStart + tieEnd + 1) / 2;
|
|
3062
|
+
} else if (method === "min") {
|
|
3063
|
+
rank = tieStart + 1;
|
|
3064
|
+
} else if (method === "max") {
|
|
3065
|
+
rank = tieEnd;
|
|
3066
|
+
} else if (method === "first") {
|
|
3067
|
+
rank = j + 1;
|
|
3068
|
+
} else {
|
|
3069
|
+
rank = denseRank;
|
|
3070
|
+
}
|
|
3071
|
+
ranks[item.index] = rank;
|
|
3072
|
+
}
|
|
3073
|
+
}
|
|
3074
|
+
newData[col] = ranks;
|
|
3075
|
+
}
|
|
3076
|
+
return new _DataFrame(newData, {
|
|
3077
|
+
columns: this._columns,
|
|
3078
|
+
index: this._index
|
|
3079
|
+
});
|
|
3080
|
+
}
|
|
3081
|
+
/**
|
|
3082
|
+
* Calculate the difference between consecutive rows.
|
|
3083
|
+
* Time complexity: O(n × m).
|
|
3084
|
+
*
|
|
3085
|
+
* @param periods - Number of periods to shift (default: 1)
|
|
3086
|
+
* @returns New DataFrame with differences
|
|
3087
|
+
*
|
|
3088
|
+
* @example
|
|
3089
|
+
* ```ts
|
|
3090
|
+
* const df = new DataFrame({ a: [1, 3, 6, 10] });
|
|
3091
|
+
* df.diff(); // [[null], [2], [3], [4]]
|
|
3092
|
+
* df.diff(2); // [[null], [null], [5], [7]]
|
|
3093
|
+
* ```
|
|
3094
|
+
*/
|
|
3095
|
+
diff(periods = 1) {
|
|
3096
|
+
if (!Number.isFinite(periods) || !Number.isInteger(periods) || periods < 0) {
|
|
3097
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("periods must be a non-negative integer", "periods", periods);
|
|
3098
|
+
}
|
|
3099
|
+
const newData = {};
|
|
3100
|
+
for (const col of this._columns) {
|
|
3101
|
+
const colData = this._data.get(col);
|
|
3102
|
+
if (!colData) continue;
|
|
3103
|
+
const diffData = [];
|
|
3104
|
+
for (let i = 0; i < colData.length; i++) {
|
|
3105
|
+
if (i < periods) {
|
|
3106
|
+
diffData.push(null);
|
|
3107
|
+
} else {
|
|
3108
|
+
const current = colData[i];
|
|
3109
|
+
const previous = colData[i - periods];
|
|
3110
|
+
if (typeof current === "number" && typeof previous === "number") {
|
|
3111
|
+
diffData.push(current - previous);
|
|
3112
|
+
} else {
|
|
3113
|
+
diffData.push(null);
|
|
3114
|
+
}
|
|
3115
|
+
}
|
|
3116
|
+
}
|
|
3117
|
+
newData[col] = diffData;
|
|
3118
|
+
}
|
|
3119
|
+
return new _DataFrame(newData, {
|
|
3120
|
+
columns: this._columns,
|
|
3121
|
+
index: this._index
|
|
3122
|
+
});
|
|
3123
|
+
}
|
|
3124
|
+
/**
|
|
3125
|
+
* Calculate percentage change between consecutive rows.
|
|
3126
|
+
* Time complexity: O(n × m).
|
|
3127
|
+
*
|
|
3128
|
+
* @param periods - Number of periods to shift (default: 1)
|
|
3129
|
+
* @returns New DataFrame with percentage changes
|
|
3130
|
+
*
|
|
3131
|
+
* @example
|
|
3132
|
+
* ```ts
|
|
3133
|
+
* const df = new DataFrame({ a: [100, 110, 121] });
|
|
3134
|
+
* df.pct_change(); // [[null], [0.1], [0.1]] (10% increase each time)
|
|
3135
|
+
* ```
|
|
3136
|
+
*/
|
|
3137
|
+
pct_change(periods = 1) {
|
|
3138
|
+
if (!Number.isFinite(periods) || !Number.isInteger(periods) || periods < 0) {
|
|
3139
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("periods must be a non-negative integer", "periods", periods);
|
|
3140
|
+
}
|
|
3141
|
+
const newData = {};
|
|
3142
|
+
for (const col of this._columns) {
|
|
3143
|
+
const colData = this._data.get(col);
|
|
3144
|
+
if (!colData) continue;
|
|
3145
|
+
const pctData = [];
|
|
3146
|
+
for (let i = 0; i < colData.length; i++) {
|
|
3147
|
+
if (i < periods) {
|
|
3148
|
+
pctData.push(null);
|
|
3149
|
+
} else {
|
|
3150
|
+
const current = colData[i];
|
|
3151
|
+
const previous = colData[i - periods];
|
|
3152
|
+
if (typeof current === "number" && typeof previous === "number" && previous !== 0) {
|
|
3153
|
+
pctData.push((current - previous) / previous);
|
|
3154
|
+
} else {
|
|
3155
|
+
pctData.push(null);
|
|
3156
|
+
}
|
|
3157
|
+
}
|
|
3158
|
+
}
|
|
3159
|
+
newData[col] = pctData;
|
|
3160
|
+
}
|
|
3161
|
+
return new _DataFrame(newData, {
|
|
3162
|
+
columns: this._columns,
|
|
3163
|
+
index: this._index
|
|
3164
|
+
});
|
|
3165
|
+
}
|
|
3166
|
+
/**
|
|
3167
|
+
* Return cumulative sum over DataFrame axis.
|
|
3168
|
+
* Time complexity: O(n × m).
|
|
3169
|
+
*
|
|
3170
|
+
* @returns New DataFrame with cumulative sums
|
|
3171
|
+
*
|
|
3172
|
+
* @example
|
|
3173
|
+
* ```ts
|
|
3174
|
+
* const df = new DataFrame({ a: [1, 2, 3], b: [4, 5, 6] });
|
|
3175
|
+
* df.cumsum(); // [[1, 4], [3, 9], [6, 15]]
|
|
3176
|
+
* ```
|
|
3177
|
+
*/
|
|
3178
|
+
cumsum() {
|
|
3179
|
+
const newData = {};
|
|
3180
|
+
for (const col of this._columns) {
|
|
3181
|
+
const colData = this._data.get(col);
|
|
3182
|
+
if (!colData) continue;
|
|
3183
|
+
const cumData = [];
|
|
3184
|
+
let cumSum = 0;
|
|
3185
|
+
for (const value of colData) {
|
|
3186
|
+
if (typeof value === "number") {
|
|
3187
|
+
cumSum += value;
|
|
3188
|
+
cumData.push(cumSum);
|
|
3189
|
+
} else {
|
|
3190
|
+
cumData.push(null);
|
|
3191
|
+
}
|
|
3192
|
+
}
|
|
3193
|
+
newData[col] = cumData;
|
|
3194
|
+
}
|
|
3195
|
+
return new _DataFrame(newData, {
|
|
3196
|
+
columns: this._columns,
|
|
3197
|
+
index: this._index
|
|
3198
|
+
});
|
|
3199
|
+
}
|
|
3200
|
+
/**
|
|
3201
|
+
* Return cumulative product over DataFrame axis.
|
|
3202
|
+
* Time complexity: O(n × m).
|
|
3203
|
+
*
|
|
3204
|
+
* @returns New DataFrame with cumulative products
|
|
3205
|
+
*
|
|
3206
|
+
* @example
|
|
3207
|
+
* ```ts
|
|
3208
|
+
* const df = new DataFrame({ a: [2, 3, 4] });
|
|
3209
|
+
* df.cumprod(); // [[2], [6], [24]]
|
|
3210
|
+
* ```
|
|
3211
|
+
*/
|
|
3212
|
+
cumprod() {
|
|
3213
|
+
const newData = {};
|
|
3214
|
+
for (const col of this._columns) {
|
|
3215
|
+
const colData = this._data.get(col);
|
|
3216
|
+
if (!colData) continue;
|
|
3217
|
+
const cumData = [];
|
|
3218
|
+
let cumProd = 1;
|
|
3219
|
+
for (const value of colData) {
|
|
3220
|
+
if (typeof value === "number") {
|
|
3221
|
+
cumProd *= value;
|
|
3222
|
+
cumData.push(cumProd);
|
|
3223
|
+
} else {
|
|
3224
|
+
cumData.push(null);
|
|
3225
|
+
}
|
|
3226
|
+
}
|
|
3227
|
+
newData[col] = cumData;
|
|
3228
|
+
}
|
|
3229
|
+
return new _DataFrame(newData, {
|
|
3230
|
+
columns: this._columns,
|
|
3231
|
+
index: this._index
|
|
3232
|
+
});
|
|
3233
|
+
}
|
|
3234
|
+
/**
|
|
3235
|
+
* Return cumulative maximum over DataFrame axis.
|
|
3236
|
+
* Time complexity: O(n × m).
|
|
3237
|
+
*
|
|
3238
|
+
* @returns New DataFrame with cumulative maximums
|
|
3239
|
+
*
|
|
3240
|
+
* @example
|
|
3241
|
+
* ```ts
|
|
3242
|
+
* const df = new DataFrame({ a: [3, 1, 5, 2] });
|
|
3243
|
+
* df.cummax(); // [[3], [3], [5], [5]]
|
|
3244
|
+
* ```
|
|
3245
|
+
*/
|
|
3246
|
+
cummax() {
|
|
3247
|
+
const newData = {};
|
|
3248
|
+
for (const col of this._columns) {
|
|
3249
|
+
const colData = this._data.get(col);
|
|
3250
|
+
if (!colData) continue;
|
|
3251
|
+
const cumData = [];
|
|
3252
|
+
let cumMax = -Infinity;
|
|
3253
|
+
for (const value of colData) {
|
|
3254
|
+
if (typeof value === "number") {
|
|
3255
|
+
cumMax = Math.max(cumMax, value);
|
|
3256
|
+
cumData.push(cumMax);
|
|
3257
|
+
} else {
|
|
3258
|
+
cumData.push(null);
|
|
3259
|
+
}
|
|
3260
|
+
}
|
|
3261
|
+
newData[col] = cumData;
|
|
3262
|
+
}
|
|
3263
|
+
return new _DataFrame(newData, {
|
|
3264
|
+
columns: this._columns,
|
|
3265
|
+
index: this._index
|
|
3266
|
+
});
|
|
3267
|
+
}
|
|
3268
|
+
/**
|
|
3269
|
+
* Return cumulative minimum over DataFrame axis.
|
|
3270
|
+
* Time complexity: O(n × m).
|
|
3271
|
+
*
|
|
3272
|
+
* @returns New DataFrame with cumulative minimums
|
|
3273
|
+
*
|
|
3274
|
+
* @example
|
|
3275
|
+
* ```ts
|
|
3276
|
+
* const df = new DataFrame({ a: [3, 1, 5, 2] });
|
|
3277
|
+
* df.cummin(); // [[3], [1], [1], [1]]
|
|
3278
|
+
* ```
|
|
3279
|
+
*/
|
|
3280
|
+
cummin() {
|
|
3281
|
+
const newData = {};
|
|
3282
|
+
for (const col of this._columns) {
|
|
3283
|
+
const colData = this._data.get(col);
|
|
3284
|
+
if (!colData) continue;
|
|
3285
|
+
const cumData = [];
|
|
3286
|
+
let cumMin = Infinity;
|
|
3287
|
+
for (const value of colData) {
|
|
3288
|
+
if (typeof value === "number") {
|
|
3289
|
+
cumMin = Math.min(cumMin, value);
|
|
3290
|
+
cumData.push(cumMin);
|
|
3291
|
+
} else {
|
|
3292
|
+
cumData.push(null);
|
|
3293
|
+
}
|
|
3294
|
+
}
|
|
3295
|
+
newData[col] = cumData;
|
|
3296
|
+
}
|
|
3297
|
+
return new _DataFrame(newData, {
|
|
3298
|
+
columns: this._columns,
|
|
3299
|
+
index: this._index
|
|
3300
|
+
});
|
|
3301
|
+
}
|
|
3302
|
+
/**
|
|
3303
|
+
* Shift index by desired number of periods.
|
|
3304
|
+
* Time complexity: O(n × m).
|
|
3305
|
+
*
|
|
3306
|
+
* @param periods - Number of periods to shift (positive = down, negative = up)
|
|
3307
|
+
* @param fill_value - Value to use for newly introduced missing values
|
|
3308
|
+
* @returns New DataFrame with shifted data
|
|
3309
|
+
*
|
|
3310
|
+
* @example
|
|
3311
|
+
* ```ts
|
|
3312
|
+
* const df = new DataFrame({ a: [1, 2, 3, 4] });
|
|
3313
|
+
* df.shift(1); // [[null], [1], [2], [3]]
|
|
3314
|
+
* df.shift(-1); // [[2], [3], [4], [null]]
|
|
3315
|
+
* df.shift(1, 0); // [[0], [1], [2], [3]]
|
|
3316
|
+
* ```
|
|
3317
|
+
*/
|
|
3318
|
+
shift(periods = 1, fill_value = null) {
|
|
3319
|
+
if (!Number.isFinite(periods) || !Number.isInteger(periods)) {
|
|
3320
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("periods must be a finite integer", "periods", periods);
|
|
3321
|
+
}
|
|
3322
|
+
const newData = {};
|
|
3323
|
+
for (const col of this._columns) {
|
|
3324
|
+
const colData = this._data.get(col);
|
|
3325
|
+
if (!colData) continue;
|
|
3326
|
+
const shiftedData = [];
|
|
3327
|
+
const rowCount = colData.length;
|
|
3328
|
+
if (periods > 0) {
|
|
3329
|
+
const shift = Math.min(periods, rowCount);
|
|
3330
|
+
for (let i = 0; i < shift; i++) {
|
|
3331
|
+
shiftedData.push(fill_value);
|
|
3332
|
+
}
|
|
3333
|
+
for (let i = 0; i < rowCount - shift; i++) {
|
|
3334
|
+
shiftedData.push(colData[i]);
|
|
3335
|
+
}
|
|
3336
|
+
} else if (periods < 0) {
|
|
3337
|
+
const absPeriods = Math.min(Math.abs(periods), rowCount);
|
|
3338
|
+
for (let i = absPeriods; i < rowCount; i++) {
|
|
3339
|
+
shiftedData.push(colData[i]);
|
|
3340
|
+
}
|
|
3341
|
+
for (let i = 0; i < absPeriods; i++) {
|
|
3342
|
+
shiftedData.push(fill_value);
|
|
3343
|
+
}
|
|
3344
|
+
} else {
|
|
3345
|
+
shiftedData.push(...colData);
|
|
3346
|
+
}
|
|
3347
|
+
newData[col] = shiftedData;
|
|
3348
|
+
}
|
|
3349
|
+
return new _DataFrame(newData, {
|
|
3350
|
+
columns: this._columns,
|
|
3351
|
+
index: this._index
|
|
3352
|
+
});
|
|
3353
|
+
}
|
|
3354
|
+
/**
|
|
3355
|
+
* Pivot DataFrame.
|
|
3356
|
+
* Time complexity: O(n × m).
|
|
3357
|
+
*
|
|
3358
|
+
* @param index - Column to use as index
|
|
3359
|
+
* @param columns - Column to use as column headers
|
|
3360
|
+
* @param values - Column to use as values
|
|
3361
|
+
* @returns New DataFrame with pivoted data
|
|
3362
|
+
*
|
|
3363
|
+
* @example
|
|
3364
|
+
* ```ts
|
|
3365
|
+
* const df = new DataFrame({
|
|
3366
|
+
* country: ['USA', 'USA', 'Canada', 'Canada'],
|
|
3367
|
+
* year: [2010, 2011, 2010, 2011],
|
|
3368
|
+
* value: [100, 200, 300, 400]
|
|
3369
|
+
* });
|
|
3370
|
+
* df.pivot('country', 'year', 'value');
|
|
3371
|
+
* // country | 2010 | 2011
|
|
3372
|
+
* // USA | 100 | 200
|
|
3373
|
+
* // Canada | 300 | 400
|
|
3374
|
+
* ```
|
|
3375
|
+
*/
|
|
3376
|
+
pivot(index, columns, values) {
|
|
3377
|
+
if (!this._columns.includes(index)) {
|
|
3378
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${index}' not found in DataFrame`);
|
|
3379
|
+
}
|
|
3380
|
+
if (!this._columns.includes(columns)) {
|
|
3381
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${columns}' not found in DataFrame`);
|
|
3382
|
+
}
|
|
3383
|
+
if (!this._columns.includes(values)) {
|
|
3384
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${values}' not found in DataFrame`);
|
|
3385
|
+
}
|
|
3386
|
+
const indexData = this._data.get(index);
|
|
3387
|
+
const columnData = this._data.get(columns);
|
|
3388
|
+
const valueData = this._data.get(values);
|
|
3389
|
+
if (!indexData || !columnData || !valueData) {
|
|
3390
|
+
throw new chunkJSCDE774_cjs.DataValidationError("Pivot columns have no data");
|
|
3391
|
+
}
|
|
3392
|
+
const pivotData = {};
|
|
3393
|
+
const pivotIndex = [];
|
|
3394
|
+
const uniqueIndices = /* @__PURE__ */ new Set();
|
|
3395
|
+
const uniqueColumns = [];
|
|
3396
|
+
const seenColumns = /* @__PURE__ */ new Set();
|
|
3397
|
+
for (const idx of indexData) {
|
|
3398
|
+
if (idx === null || idx === void 0) {
|
|
3399
|
+
continue;
|
|
3400
|
+
}
|
|
3401
|
+
const key = typeof idx === "string" || typeof idx === "number" ? idx : String(idx);
|
|
3402
|
+
if (!uniqueIndices.has(key)) {
|
|
3403
|
+
uniqueIndices.add(key);
|
|
3404
|
+
pivotIndex.push(key);
|
|
3405
|
+
}
|
|
3406
|
+
}
|
|
3407
|
+
for (const col of columnData) {
|
|
3408
|
+
if (col === null || col === void 0) {
|
|
3409
|
+
continue;
|
|
3410
|
+
}
|
|
3411
|
+
const colKey = String(col);
|
|
3412
|
+
if (!seenColumns.has(colKey)) {
|
|
3413
|
+
seenColumns.add(colKey);
|
|
3414
|
+
uniqueColumns.push(colKey);
|
|
3415
|
+
}
|
|
3416
|
+
}
|
|
3417
|
+
const rowPositionByIndex = /* @__PURE__ */ new Map();
|
|
3418
|
+
for (let i = 0; i < pivotIndex.length; i++) {
|
|
3419
|
+
const key = pivotIndex[i];
|
|
3420
|
+
if (key !== void 0) {
|
|
3421
|
+
rowPositionByIndex.set(key, i);
|
|
3422
|
+
}
|
|
3423
|
+
}
|
|
3424
|
+
for (const colKey of uniqueColumns) {
|
|
3425
|
+
pivotData[colKey] = new Array(pivotIndex.length).fill(null);
|
|
3426
|
+
}
|
|
3427
|
+
const visited = /* @__PURE__ */ new Set();
|
|
3428
|
+
for (let i = 0; i < indexData.length; i++) {
|
|
3429
|
+
const idx = indexData[i];
|
|
3430
|
+
const col = columnData[i];
|
|
3431
|
+
const value = valueData[i];
|
|
3432
|
+
if (idx !== null && idx !== void 0 && col !== null && col !== void 0) {
|
|
3433
|
+
const indexKey = typeof idx === "string" || typeof idx === "number" ? idx : String(idx);
|
|
3434
|
+
const colKey = String(col);
|
|
3435
|
+
const rowPos = rowPositionByIndex.get(indexKey);
|
|
3436
|
+
if (rowPos === void 0) {
|
|
3437
|
+
continue;
|
|
3438
|
+
}
|
|
3439
|
+
const cellKey = `${rowPos}:${colKey}`;
|
|
3440
|
+
if (visited.has(cellKey)) {
|
|
3441
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
3442
|
+
`Duplicate pivot entry for index '${String(indexKey)}' and column '${colKey}'`
|
|
3443
|
+
);
|
|
3444
|
+
}
|
|
3445
|
+
visited.add(cellKey);
|
|
3446
|
+
const targetColumn = pivotData[colKey];
|
|
3447
|
+
if (targetColumn) {
|
|
3448
|
+
targetColumn[rowPos] = value;
|
|
3449
|
+
}
|
|
3450
|
+
}
|
|
3451
|
+
}
|
|
3452
|
+
return new _DataFrame(pivotData, {
|
|
3453
|
+
columns: uniqueColumns,
|
|
3454
|
+
index: pivotIndex
|
|
3455
|
+
});
|
|
3456
|
+
}
|
|
3457
|
+
/**
|
|
3458
|
+
* Melt DataFrame.
|
|
3459
|
+
* Time complexity: O(n × m).
|
|
3460
|
+
*
|
|
3461
|
+
* @param id_vars - Columns to keep as is
|
|
3462
|
+
* @param value_vars - Columns to melt
|
|
3463
|
+
* @param var_name - Name for new column with melted variable names
|
|
3464
|
+
* @param value_name - Name for new column with melted values.
|
|
3465
|
+
* Must not conflict with existing columns or var_name.
|
|
3466
|
+
* @returns New DataFrame with melted data
|
|
3467
|
+
*
|
|
3468
|
+
* @example
|
|
3469
|
+
* ```ts
|
|
3470
|
+
* const df = new DataFrame({
|
|
3471
|
+
* id: ['a', 'b'],
|
|
3472
|
+
* x: [1, 2],
|
|
3473
|
+
* y: [3, 4]
|
|
3474
|
+
* });
|
|
3475
|
+
* df.melt(['id'], ['x', 'y'], 'variable', 'value');
|
|
3476
|
+
* // id | variable | value
|
|
3477
|
+
* // a | x | 1
|
|
3478
|
+
* // a | y | 3
|
|
3479
|
+
* // b | x | 2
|
|
3480
|
+
* // b | y | 4
|
|
3481
|
+
* ```
|
|
3482
|
+
*/
|
|
3483
|
+
melt(id_vars, value_vars, var_name = "variable", value_name = "value") {
|
|
3484
|
+
const idVars = [...id_vars];
|
|
3485
|
+
const valueVars = [...value_vars];
|
|
3486
|
+
ensureUniqueLabels(idVars, "id_var");
|
|
3487
|
+
ensureUniqueLabels(valueVars, "value_var");
|
|
3488
|
+
for (const idVar of idVars) {
|
|
3489
|
+
if (!this._columns.includes(idVar)) {
|
|
3490
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${idVar}' not found in DataFrame`);
|
|
3491
|
+
}
|
|
3492
|
+
}
|
|
3493
|
+
for (const valueVar of valueVars) {
|
|
3494
|
+
if (!this._columns.includes(valueVar)) {
|
|
3495
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${valueVar}' not found in DataFrame`);
|
|
3496
|
+
}
|
|
3497
|
+
}
|
|
3498
|
+
if (var_name === value_name) {
|
|
3499
|
+
throw new chunkJSCDE774_cjs.DataValidationError("var_name and value_name must be different");
|
|
3500
|
+
}
|
|
3501
|
+
const reservedNames = /* @__PURE__ */ new Set([...idVars, ...valueVars]);
|
|
3502
|
+
if (reservedNames.has(var_name) || reservedNames.has(value_name)) {
|
|
3503
|
+
throw new chunkJSCDE774_cjs.DataValidationError(
|
|
3504
|
+
"var_name and value_name must not conflict with existing columns"
|
|
3505
|
+
);
|
|
3506
|
+
}
|
|
3507
|
+
const newData = {};
|
|
3508
|
+
for (const idVar of idVars) {
|
|
3509
|
+
newData[idVar] = [];
|
|
3510
|
+
}
|
|
3511
|
+
newData[var_name] = [];
|
|
3512
|
+
newData[value_name] = [];
|
|
3513
|
+
for (let i = 0; i < this._index.length; i++) {
|
|
3514
|
+
for (const valueVar of valueVars) {
|
|
3515
|
+
for (const idVar of idVars) {
|
|
3516
|
+
newData[idVar]?.push(this._data.get(idVar)?.[i]);
|
|
3517
|
+
}
|
|
3518
|
+
newData[var_name]?.push(valueVar);
|
|
3519
|
+
newData[value_name]?.push(this._data.get(valueVar)?.[i]);
|
|
3520
|
+
}
|
|
3521
|
+
}
|
|
3522
|
+
return new _DataFrame(newData, {
|
|
3523
|
+
columns: [...idVars, var_name, value_name]
|
|
3524
|
+
});
|
|
3525
|
+
}
|
|
3526
|
+
/**
|
|
3527
|
+
* Rolling window mean calculation.
|
|
3528
|
+
*
|
|
3529
|
+
* @param window - Size of the rolling window
|
|
3530
|
+
* @param on - Column to apply rolling calculation to (if omitted, applies to all columns)
|
|
3531
|
+
* @returns New DataFrame with rolling mean values
|
|
3532
|
+
*
|
|
3533
|
+
* @example
|
|
3534
|
+
* ```ts
|
|
3535
|
+
* const df = new DataFrame({ a: [1, 2, 3, 4, 5] });
|
|
3536
|
+
* df.rolling(3); // [[null], [null], [2], [3], [4]]
|
|
3537
|
+
* ```
|
|
3538
|
+
*/
|
|
3539
|
+
rolling(window, on) {
|
|
3540
|
+
const newData = {};
|
|
3541
|
+
if (!Number.isFinite(window) || !Number.isInteger(window) || window <= 0) {
|
|
3542
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("window must be a positive integer", "window", window);
|
|
3543
|
+
}
|
|
3544
|
+
if (on && !this._columns.includes(on)) {
|
|
3545
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Column '${on}' not found in DataFrame`);
|
|
3546
|
+
}
|
|
3547
|
+
for (const col of this._columns) {
|
|
3548
|
+
if (col === on || !on) {
|
|
3549
|
+
const colData = this._data.get(col);
|
|
3550
|
+
if (!colData) continue;
|
|
3551
|
+
const rollingData = [];
|
|
3552
|
+
let windowSum = 0;
|
|
3553
|
+
let windowCount = 0;
|
|
3554
|
+
for (let i = 0; i < colData.length; i++) {
|
|
3555
|
+
const incoming = colData[i];
|
|
3556
|
+
if (isValidNumber(incoming)) {
|
|
3557
|
+
windowSum += incoming;
|
|
3558
|
+
windowCount++;
|
|
3559
|
+
}
|
|
3560
|
+
if (i >= window) {
|
|
3561
|
+
const outgoing = colData[i - window];
|
|
3562
|
+
if (isValidNumber(outgoing)) {
|
|
3563
|
+
windowSum -= outgoing;
|
|
3564
|
+
windowCount--;
|
|
3565
|
+
}
|
|
3566
|
+
}
|
|
3567
|
+
if (i < window - 1) {
|
|
3568
|
+
rollingData.push(null);
|
|
3569
|
+
} else if (windowCount === 0) {
|
|
3570
|
+
rollingData.push(null);
|
|
3571
|
+
} else {
|
|
3572
|
+
rollingData.push(windowSum / windowCount);
|
|
3573
|
+
}
|
|
3574
|
+
}
|
|
3575
|
+
newData[col] = rollingData;
|
|
3576
|
+
}
|
|
3577
|
+
}
|
|
3578
|
+
const outColumns = on ? [on] : this._columns;
|
|
3579
|
+
return new _DataFrame(newData, {
|
|
3580
|
+
columns: outColumns,
|
|
3581
|
+
index: this._index
|
|
3582
|
+
});
|
|
3583
|
+
}
|
|
3584
|
+
/**
|
|
3585
|
+
* Return a human-readable tabular string representation.
|
|
3586
|
+
*
|
|
3587
|
+
* Columns are right-aligned and padded so that rows line up.
|
|
3588
|
+
* Large DataFrames are truncated with an ellipsis row.
|
|
3589
|
+
*
|
|
3590
|
+
* @param maxRows - Maximum rows to display before summarizing (default: 20).
|
|
3591
|
+
* @returns Formatted table string
|
|
3592
|
+
*
|
|
3593
|
+
* @example
|
|
3594
|
+
* ```ts
|
|
3595
|
+
* const df = new DataFrame({ a: [1, 2], b: [3, 4] });
|
|
3596
|
+
* df.toString();
|
|
3597
|
+
* // " a b\n0 1 3\n1 2 4"
|
|
3598
|
+
* ```
|
|
3599
|
+
*/
|
|
3600
|
+
toString(maxRows = 20) {
|
|
3601
|
+
const nRows = this.shape[0] ?? 0;
|
|
3602
|
+
const cols = this._columns;
|
|
3603
|
+
const half = Math.floor(maxRows / 2);
|
|
3604
|
+
const showAll = nRows <= maxRows;
|
|
3605
|
+
const topCount = showAll ? nRows : half;
|
|
3606
|
+
const bottomCount = showAll ? 0 : half;
|
|
3607
|
+
const allRows = [];
|
|
3608
|
+
allRows.push(["", ...cols]);
|
|
3609
|
+
for (let i = 0; i < topCount; i++) {
|
|
3610
|
+
const idx = this._index[i];
|
|
3611
|
+
const row = [String(idx ?? i)];
|
|
3612
|
+
for (const col of cols) {
|
|
3613
|
+
const colData = this._data.get(col);
|
|
3614
|
+
const val = colData ? colData[i] : void 0;
|
|
3615
|
+
row.push(val === null || val === void 0 ? "null" : String(val));
|
|
3616
|
+
}
|
|
3617
|
+
allRows.push(row);
|
|
3618
|
+
}
|
|
3619
|
+
if (!showAll) {
|
|
3620
|
+
allRows.push(["...", ...cols.map(() => "...")]);
|
|
3621
|
+
for (let i = nRows - bottomCount; i < nRows; i++) {
|
|
3622
|
+
const idx = this._index[i];
|
|
3623
|
+
const row = [String(idx ?? i)];
|
|
3624
|
+
for (const col of cols) {
|
|
3625
|
+
const colData = this._data.get(col);
|
|
3626
|
+
const val = colData ? colData[i] : void 0;
|
|
3627
|
+
row.push(val === null || val === void 0 ? "null" : String(val));
|
|
3628
|
+
}
|
|
3629
|
+
allRows.push(row);
|
|
3630
|
+
}
|
|
3631
|
+
}
|
|
3632
|
+
const numCols = cols.length + 1;
|
|
3633
|
+
const widths = new Array(numCols).fill(0);
|
|
3634
|
+
for (const row of allRows) {
|
|
3635
|
+
for (let c = 0; c < numCols; c++) {
|
|
3636
|
+
const cell = row[c] ?? "";
|
|
3637
|
+
if (cell.length > (widths[c] ?? 0)) {
|
|
3638
|
+
widths[c] = cell.length;
|
|
3639
|
+
}
|
|
3640
|
+
}
|
|
3641
|
+
}
|
|
3642
|
+
const lines = [];
|
|
3643
|
+
for (const row of allRows) {
|
|
3644
|
+
const cells = [];
|
|
3645
|
+
for (let c = 0; c < numCols; c++) {
|
|
3646
|
+
const cell = row[c] ?? "";
|
|
3647
|
+
const w = widths[c] ?? 0;
|
|
3648
|
+
cells.push(cell.padStart(w));
|
|
3649
|
+
}
|
|
3650
|
+
lines.push(cells.join(" "));
|
|
3651
|
+
}
|
|
3652
|
+
return lines.join("\n");
|
|
3653
|
+
}
|
|
3654
|
+
};
|
|
3655
|
+
var DataFrameGroupBy = class {
|
|
3656
|
+
// Store the group mapping (computed once)
|
|
3657
|
+
groupMap;
|
|
3658
|
+
// Store the original key values for each group key (to avoid parsing)
|
|
3659
|
+
keyValuesMap;
|
|
3660
|
+
df;
|
|
3661
|
+
by;
|
|
3662
|
+
constructor(df, by) {
|
|
3663
|
+
this.df = df;
|
|
3664
|
+
this.by = by;
|
|
3665
|
+
const buildResult = this.buildGroupMap();
|
|
3666
|
+
this.groupMap = buildResult.groupMap;
|
|
3667
|
+
this.keyValuesMap = buildResult.keyValuesMap;
|
|
3668
|
+
}
|
|
3669
|
+
/**
|
|
3670
|
+
* Build the grouping map: group key -> array of row indices.
|
|
3671
|
+
*
|
|
3672
|
+
* @private
|
|
3673
|
+
*/
|
|
3674
|
+
buildGroupMap() {
|
|
3675
|
+
const groupByCols = Array.isArray(this.by) ? this.by : [this.by];
|
|
3676
|
+
const groupMap = /* @__PURE__ */ new Map();
|
|
3677
|
+
const keyValuesMap = /* @__PURE__ */ new Map();
|
|
3678
|
+
const numRows = this.df.shape[0];
|
|
3679
|
+
if (groupByCols.length === 1) {
|
|
3680
|
+
const colData = this.df.get(groupByCols[0]).data;
|
|
3681
|
+
for (let i = 0; i < numRows; i++) {
|
|
3682
|
+
const val = colData[i];
|
|
3683
|
+
const key = createKey(val);
|
|
3684
|
+
let bucket = groupMap.get(key);
|
|
3685
|
+
if (bucket === void 0) {
|
|
3686
|
+
bucket = [];
|
|
3687
|
+
groupMap.set(key, bucket);
|
|
3688
|
+
keyValuesMap.set(key, [val]);
|
|
3689
|
+
}
|
|
3690
|
+
bucket.push(i);
|
|
3691
|
+
}
|
|
3692
|
+
} else {
|
|
3693
|
+
const colDataArrays = [];
|
|
3694
|
+
for (let c = 0; c < groupByCols.length; c++) {
|
|
3695
|
+
colDataArrays.push(this.df.get(groupByCols[c]).data);
|
|
3696
|
+
}
|
|
3697
|
+
for (let i = 0; i < numRows; i++) {
|
|
3698
|
+
const keyParts = new Array(groupByCols.length);
|
|
3699
|
+
for (let c = 0; c < groupByCols.length; c++) {
|
|
3700
|
+
const colArr = colDataArrays[c];
|
|
3701
|
+
keyParts[c] = colArr !== void 0 ? colArr[i] : void 0;
|
|
3702
|
+
}
|
|
3703
|
+
const key = createKey(keyParts);
|
|
3704
|
+
let bucket = groupMap.get(key);
|
|
3705
|
+
if (bucket === void 0) {
|
|
3706
|
+
bucket = [];
|
|
3707
|
+
groupMap.set(key, bucket);
|
|
3708
|
+
keyValuesMap.set(key, keyParts);
|
|
3709
|
+
}
|
|
3710
|
+
bucket.push(i);
|
|
3711
|
+
}
|
|
3712
|
+
}
|
|
3713
|
+
return { groupMap, keyValuesMap };
|
|
3714
|
+
}
|
|
3715
|
+
/**
|
|
3716
|
+
* Aggregate grouped data.
|
|
3717
|
+
*
|
|
3718
|
+
* @param operations - Dictionary of column name to aggregation function
|
|
3719
|
+
* @returns New DataFrame with aggregated data
|
|
3720
|
+
*
|
|
3721
|
+
* @example
|
|
3722
|
+
* ```ts
|
|
3723
|
+
* const grouped = df.groupBy('category');
|
|
3724
|
+
* const result = grouped.agg({ value: 'sum', count: 'count' });
|
|
3725
|
+
* ```
|
|
3726
|
+
*/
|
|
3727
|
+
agg(operations) {
|
|
3728
|
+
const groupByCols = Array.isArray(this.by) ? this.by : [this.by];
|
|
3729
|
+
const resultData = {};
|
|
3730
|
+
const outputColumns = [];
|
|
3731
|
+
for (const col of groupByCols) {
|
|
3732
|
+
resultData[col] = [];
|
|
3733
|
+
outputColumns.push(col);
|
|
3734
|
+
}
|
|
3735
|
+
for (const [col, aggFunc] of Object.entries(operations)) {
|
|
3736
|
+
if (Array.isArray(aggFunc)) {
|
|
3737
|
+
for (const fn of aggFunc) {
|
|
3738
|
+
const outCol = `${col}_${fn}`;
|
|
3739
|
+
resultData[outCol] = [];
|
|
3740
|
+
outputColumns.push(outCol);
|
|
3741
|
+
}
|
|
3742
|
+
} else {
|
|
3743
|
+
resultData[col] = [];
|
|
3744
|
+
outputColumns.push(col);
|
|
3745
|
+
}
|
|
3746
|
+
}
|
|
3747
|
+
for (const [keyStr, indices] of this.groupMap.entries()) {
|
|
3748
|
+
const keyParts = this.keyValuesMap.get(keyStr);
|
|
3749
|
+
if (!keyParts) {
|
|
3750
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Missing key values for group: ${keyStr}`);
|
|
3751
|
+
}
|
|
3752
|
+
for (let i = 0; i < groupByCols.length; i++) {
|
|
3753
|
+
const groupCol = groupByCols[i];
|
|
3754
|
+
if (groupCol) resultData[groupCol]?.push(keyParts[i]);
|
|
3755
|
+
}
|
|
3756
|
+
for (const [col, aggFunc] of Object.entries(operations)) {
|
|
3757
|
+
const seriesData = this.df.get(col).data;
|
|
3758
|
+
const funcs = Array.isArray(aggFunc) ? aggFunc : [aggFunc];
|
|
3759
|
+
for (const func of funcs) {
|
|
3760
|
+
let result;
|
|
3761
|
+
switch (func) {
|
|
3762
|
+
case "count": {
|
|
3763
|
+
let count = 0;
|
|
3764
|
+
for (const idx of indices) {
|
|
3765
|
+
const val = seriesData[idx];
|
|
3766
|
+
if (val !== null && val !== void 0) count++;
|
|
3767
|
+
}
|
|
3768
|
+
result = count;
|
|
3769
|
+
break;
|
|
3770
|
+
}
|
|
3771
|
+
case "first": {
|
|
3772
|
+
const firstIdx = indices[0];
|
|
3773
|
+
result = firstIdx !== void 0 ? seriesData[firstIdx] : void 0;
|
|
3774
|
+
break;
|
|
3775
|
+
}
|
|
3776
|
+
case "last": {
|
|
3777
|
+
const lastIdx = indices[indices.length - 1];
|
|
3778
|
+
result = lastIdx !== void 0 ? seriesData[lastIdx] : void 0;
|
|
3779
|
+
break;
|
|
3780
|
+
}
|
|
3781
|
+
case "sum": {
|
|
3782
|
+
let sum = 0;
|
|
3783
|
+
let hasNumeric = false;
|
|
3784
|
+
for (const idx of indices) {
|
|
3785
|
+
const val = seriesData[idx];
|
|
3786
|
+
if (val === null || val === void 0) continue;
|
|
3787
|
+
if (typeof val !== "number") {
|
|
3788
|
+
throw new chunkJSCDE774_cjs.DataValidationError("sum() only works on numbers");
|
|
3789
|
+
}
|
|
3790
|
+
if (isValidNumber(val)) {
|
|
3791
|
+
sum += val;
|
|
3792
|
+
hasNumeric = true;
|
|
3793
|
+
}
|
|
3794
|
+
}
|
|
3795
|
+
result = hasNumeric ? sum : 0;
|
|
3796
|
+
break;
|
|
3797
|
+
}
|
|
3798
|
+
case "mean": {
|
|
3799
|
+
let sum = 0;
|
|
3800
|
+
let count = 0;
|
|
3801
|
+
for (const idx of indices) {
|
|
3802
|
+
const val = seriesData[idx];
|
|
3803
|
+
if (val === null || val === void 0) continue;
|
|
3804
|
+
if (typeof val !== "number") {
|
|
3805
|
+
throw new chunkJSCDE774_cjs.DataValidationError("mean() only works on numbers");
|
|
3806
|
+
}
|
|
3807
|
+
if (isValidNumber(val)) {
|
|
3808
|
+
sum += val;
|
|
3809
|
+
count++;
|
|
3810
|
+
}
|
|
3811
|
+
}
|
|
3812
|
+
result = count > 0 ? sum / count : NaN;
|
|
3813
|
+
break;
|
|
3814
|
+
}
|
|
3815
|
+
case "median": {
|
|
3816
|
+
const nums = [];
|
|
3817
|
+
for (const idx of indices) {
|
|
3818
|
+
const val = seriesData[idx];
|
|
3819
|
+
if (val === null || val === void 0) continue;
|
|
3820
|
+
if (typeof val !== "number") {
|
|
3821
|
+
throw new chunkJSCDE774_cjs.DataValidationError("median() only works on numbers");
|
|
3822
|
+
}
|
|
3823
|
+
if (isValidNumber(val)) nums.push(val);
|
|
3824
|
+
}
|
|
3825
|
+
if (nums.length === 0) {
|
|
3826
|
+
result = NaN;
|
|
3827
|
+
} else {
|
|
3828
|
+
nums.sort((a, b) => a - b);
|
|
3829
|
+
const mid = Math.floor(nums.length / 2);
|
|
3830
|
+
if (nums.length % 2 === 0) {
|
|
3831
|
+
const v1 = nums[mid - 1];
|
|
3832
|
+
const v2 = nums[mid];
|
|
3833
|
+
result = v1 !== void 0 && v2 !== void 0 ? (v1 + v2) / 2 : NaN;
|
|
3834
|
+
} else {
|
|
3835
|
+
result = nums[mid] ?? NaN;
|
|
3836
|
+
}
|
|
3837
|
+
}
|
|
3838
|
+
break;
|
|
3839
|
+
}
|
|
3840
|
+
case "min": {
|
|
3841
|
+
let min = Infinity;
|
|
3842
|
+
let hasNumeric = false;
|
|
3843
|
+
for (const idx of indices) {
|
|
3844
|
+
const val = seriesData[idx];
|
|
3845
|
+
if (val === null || val === void 0) continue;
|
|
3846
|
+
if (typeof val !== "number") {
|
|
3847
|
+
throw new chunkJSCDE774_cjs.DataValidationError("min() only works on numbers");
|
|
3848
|
+
}
|
|
3849
|
+
if (isValidNumber(val)) {
|
|
3850
|
+
if (val < min) min = val;
|
|
3851
|
+
hasNumeric = true;
|
|
3852
|
+
}
|
|
3853
|
+
}
|
|
3854
|
+
result = hasNumeric ? min : NaN;
|
|
3855
|
+
break;
|
|
3856
|
+
}
|
|
3857
|
+
case "max": {
|
|
3858
|
+
let max = -Infinity;
|
|
3859
|
+
let hasNumeric = false;
|
|
3860
|
+
for (const idx of indices) {
|
|
3861
|
+
const val = seriesData[idx];
|
|
3862
|
+
if (val === null || val === void 0) continue;
|
|
3863
|
+
if (typeof val !== "number") {
|
|
3864
|
+
throw new chunkJSCDE774_cjs.DataValidationError("max() only works on numbers");
|
|
3865
|
+
}
|
|
3866
|
+
if (isValidNumber(val)) {
|
|
3867
|
+
if (val > max) max = val;
|
|
3868
|
+
hasNumeric = true;
|
|
3869
|
+
}
|
|
3870
|
+
}
|
|
3871
|
+
result = hasNumeric ? max : NaN;
|
|
3872
|
+
break;
|
|
3873
|
+
}
|
|
3874
|
+
case "std": {
|
|
3875
|
+
let sum = 0;
|
|
3876
|
+
let count = 0;
|
|
3877
|
+
const nums = [];
|
|
3878
|
+
for (const idx of indices) {
|
|
3879
|
+
const val = seriesData[idx];
|
|
3880
|
+
if (val === null || val === void 0) continue;
|
|
3881
|
+
if (typeof val !== "number") {
|
|
3882
|
+
throw new chunkJSCDE774_cjs.DataValidationError("std() only works on numbers");
|
|
3883
|
+
}
|
|
3884
|
+
if (isValidNumber(val)) {
|
|
3885
|
+
sum += val;
|
|
3886
|
+
count++;
|
|
3887
|
+
nums.push(val);
|
|
3888
|
+
}
|
|
3889
|
+
}
|
|
3890
|
+
if (count < 2) {
|
|
3891
|
+
result = NaN;
|
|
3892
|
+
} else {
|
|
3893
|
+
const mean = sum / count;
|
|
3894
|
+
let sumSq = 0;
|
|
3895
|
+
for (const val of nums) {
|
|
3896
|
+
sumSq += (val - mean) ** 2;
|
|
3897
|
+
}
|
|
3898
|
+
result = Math.sqrt(sumSq / (count - 1));
|
|
3899
|
+
}
|
|
3900
|
+
break;
|
|
3901
|
+
}
|
|
3902
|
+
case "var": {
|
|
3903
|
+
let sum = 0;
|
|
3904
|
+
let count = 0;
|
|
3905
|
+
const nums = [];
|
|
3906
|
+
for (const idx of indices) {
|
|
3907
|
+
const val = seriesData[idx];
|
|
3908
|
+
if (val === null || val === void 0) continue;
|
|
3909
|
+
if (typeof val !== "number") {
|
|
3910
|
+
throw new chunkJSCDE774_cjs.DataValidationError("var() only works on numbers");
|
|
3911
|
+
}
|
|
3912
|
+
if (isValidNumber(val)) {
|
|
3913
|
+
sum += val;
|
|
3914
|
+
count++;
|
|
3915
|
+
nums.push(val);
|
|
3916
|
+
}
|
|
3917
|
+
}
|
|
3918
|
+
if (count < 2) {
|
|
3919
|
+
result = NaN;
|
|
3920
|
+
} else {
|
|
3921
|
+
const mean = sum / count;
|
|
3922
|
+
let sumSq = 0;
|
|
3923
|
+
for (const val of nums) {
|
|
3924
|
+
sumSq += (val - mean) ** 2;
|
|
3925
|
+
}
|
|
3926
|
+
result = sumSq / (count - 1);
|
|
3927
|
+
}
|
|
3928
|
+
break;
|
|
3929
|
+
}
|
|
3930
|
+
default:
|
|
3931
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`Unsupported aggregation function: ${func}`);
|
|
3932
|
+
}
|
|
3933
|
+
const outCol = Array.isArray(aggFunc) ? `${col}_${func}` : col;
|
|
3934
|
+
resultData[outCol]?.push(result);
|
|
3935
|
+
}
|
|
3936
|
+
}
|
|
3937
|
+
}
|
|
3938
|
+
return new DataFrame(resultData, { columns: outputColumns });
|
|
3939
|
+
}
|
|
3940
|
+
/**
|
|
3941
|
+
* Helper to identify numeric columns (excluding grouping columns).
|
|
3942
|
+
* @private
|
|
3943
|
+
*/
|
|
3944
|
+
getNumericColumns() {
|
|
3945
|
+
const groupByCols = Array.isArray(this.by) ? this.by : [this.by];
|
|
3946
|
+
const otherCols = this.df.columns.filter((c) => !groupByCols.includes(c));
|
|
3947
|
+
return otherCols.filter((col) => {
|
|
3948
|
+
const colData = this.df.get(col);
|
|
3949
|
+
return colData.data.some(isValidNumber);
|
|
3950
|
+
});
|
|
3951
|
+
}
|
|
3952
|
+
/**
|
|
3953
|
+
* Helper method to perform same aggregation on all numeric non-grouping columns.
|
|
3954
|
+
* @private
|
|
3955
|
+
*/
|
|
3956
|
+
aggNumeric(operation) {
|
|
3957
|
+
const numericCols = this.getNumericColumns();
|
|
3958
|
+
const operations = {};
|
|
3959
|
+
for (const col of numericCols) {
|
|
3960
|
+
operations[col] = operation;
|
|
3961
|
+
}
|
|
3962
|
+
return this.agg(operations);
|
|
3963
|
+
}
|
|
3964
|
+
/**
|
|
3965
|
+
* Helper method to perform same aggregation on all non-grouping columns.
|
|
3966
|
+
*
|
|
3967
|
+
* @private
|
|
3968
|
+
*/
|
|
3969
|
+
aggAll(operation) {
|
|
3970
|
+
const groupByCols = Array.isArray(this.by) ? this.by : [this.by];
|
|
3971
|
+
const otherCols = this.df.columns.filter((c) => !groupByCols.includes(c));
|
|
3972
|
+
const operations = {};
|
|
3973
|
+
for (const col of otherCols) {
|
|
3974
|
+
operations[col] = operation;
|
|
3975
|
+
}
|
|
3976
|
+
return this.agg(operations);
|
|
3977
|
+
}
|
|
3978
|
+
/**
|
|
3979
|
+
* Compute sum for each group.
|
|
3980
|
+
*
|
|
3981
|
+
* @returns DataFrame with summed values by group
|
|
3982
|
+
*
|
|
3983
|
+
* @example
|
|
3984
|
+
* ```ts
|
|
3985
|
+
* const df = new DataFrame({
|
|
3986
|
+
* category: ['A', 'A', 'B', 'B'],
|
|
3987
|
+
* value: [1, 2, 3, 4]
|
|
3988
|
+
* });
|
|
3989
|
+
* df.groupBy('category').sum();
|
|
3990
|
+
* // category | value
|
|
3991
|
+
* // A | 3
|
|
3992
|
+
* // B | 7
|
|
3993
|
+
* ```
|
|
3994
|
+
*/
|
|
3995
|
+
sum() {
|
|
3996
|
+
return this.aggNumeric("sum");
|
|
3997
|
+
}
|
|
3998
|
+
/**
|
|
3999
|
+
* Compute mean (average) for each group.
|
|
4000
|
+
*
|
|
4001
|
+
* @returns DataFrame with mean values by group
|
|
4002
|
+
*/
|
|
4003
|
+
mean() {
|
|
4004
|
+
return this.aggNumeric("mean");
|
|
4005
|
+
}
|
|
4006
|
+
/**
|
|
4007
|
+
* Count non-null values in each non-grouping column for every group.
|
|
4008
|
+
*
|
|
4009
|
+
* @returns DataFrame with per-column non-null counts by group
|
|
4010
|
+
*/
|
|
4011
|
+
count() {
|
|
4012
|
+
return this.aggAll("count");
|
|
4013
|
+
}
|
|
4014
|
+
/**
|
|
4015
|
+
* Compute minimum value for each group.
|
|
4016
|
+
*
|
|
4017
|
+
* @returns DataFrame with minimum values by group
|
|
4018
|
+
*/
|
|
4019
|
+
min() {
|
|
4020
|
+
return this.aggNumeric("min");
|
|
4021
|
+
}
|
|
4022
|
+
/**
|
|
4023
|
+
* Compute maximum value for each group.
|
|
4024
|
+
*
|
|
4025
|
+
* @returns DataFrame with maximum values by group
|
|
4026
|
+
*/
|
|
4027
|
+
max() {
|
|
4028
|
+
return this.aggNumeric("max");
|
|
4029
|
+
}
|
|
4030
|
+
/**
|
|
4031
|
+
* Compute standard deviation for each group.
|
|
4032
|
+
*
|
|
4033
|
+
* @returns DataFrame with standard deviation values by group
|
|
4034
|
+
*/
|
|
4035
|
+
std() {
|
|
4036
|
+
return this.aggNumeric("std");
|
|
4037
|
+
}
|
|
4038
|
+
/**
|
|
4039
|
+
* Compute variance for each group.
|
|
4040
|
+
*
|
|
4041
|
+
* @returns DataFrame with variance values by group
|
|
4042
|
+
*/
|
|
4043
|
+
var() {
|
|
4044
|
+
return this.aggNumeric("var");
|
|
4045
|
+
}
|
|
4046
|
+
/**
|
|
4047
|
+
* Compute median for each group.
|
|
4048
|
+
*
|
|
4049
|
+
* @returns DataFrame with median values by group
|
|
4050
|
+
*/
|
|
4051
|
+
median() {
|
|
4052
|
+
return this.aggNumeric("median");
|
|
4053
|
+
}
|
|
4054
|
+
};
|
|
4055
|
+
|
|
4056
|
+
exports.DataFrame = DataFrame;
|
|
4057
|
+
exports.DataFrameGroupBy = DataFrameGroupBy;
|
|
4058
|
+
exports.Series = Series;
|
|
4059
|
+
exports.dataframe_exports = dataframe_exports;
|
|
4060
|
+
//# sourceMappingURL=chunk-ZLW62TJG.cjs.map
|
|
4061
|
+
//# sourceMappingURL=chunk-ZLW62TJG.cjs.map
|