deepbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +344 -0
  3. package/dist/CSRMatrix-CwGwQRea.d.cts +219 -0
  4. package/dist/CSRMatrix-KzNt6QpS.d.ts +219 -0
  5. package/dist/Tensor-BQLk1ltW.d.cts +147 -0
  6. package/dist/Tensor-g8mUClel.d.ts +147 -0
  7. package/dist/chunk-4S73VUBD.js +677 -0
  8. package/dist/chunk-4S73VUBD.js.map +1 -0
  9. package/dist/chunk-5R4S63PF.js +2925 -0
  10. package/dist/chunk-5R4S63PF.js.map +1 -0
  11. package/dist/chunk-6AE5FKKQ.cjs +9264 -0
  12. package/dist/chunk-6AE5FKKQ.cjs.map +1 -0
  13. package/dist/chunk-AD436M45.js +3854 -0
  14. package/dist/chunk-AD436M45.js.map +1 -0
  15. package/dist/chunk-ALS7ETWZ.cjs +4263 -0
  16. package/dist/chunk-ALS7ETWZ.cjs.map +1 -0
  17. package/dist/chunk-AU7XHGKJ.js +2092 -0
  18. package/dist/chunk-AU7XHGKJ.js.map +1 -0
  19. package/dist/chunk-B5TNKUEY.js +1481 -0
  20. package/dist/chunk-B5TNKUEY.js.map +1 -0
  21. package/dist/chunk-BCR7G3A6.js +9136 -0
  22. package/dist/chunk-BCR7G3A6.js.map +1 -0
  23. package/dist/chunk-C4PKXY74.cjs +1917 -0
  24. package/dist/chunk-C4PKXY74.cjs.map +1 -0
  25. package/dist/chunk-DWZY6PIP.cjs +6400 -0
  26. package/dist/chunk-DWZY6PIP.cjs.map +1 -0
  27. package/dist/chunk-E3EU5FZO.cjs +2113 -0
  28. package/dist/chunk-E3EU5FZO.cjs.map +1 -0
  29. package/dist/chunk-F3JWBINJ.js +1054 -0
  30. package/dist/chunk-F3JWBINJ.js.map +1 -0
  31. package/dist/chunk-FJYLIGJX.js +1940 -0
  32. package/dist/chunk-FJYLIGJX.js.map +1 -0
  33. package/dist/chunk-JSCDE774.cjs +729 -0
  34. package/dist/chunk-JSCDE774.cjs.map +1 -0
  35. package/dist/chunk-LWECRCW2.cjs +2412 -0
  36. package/dist/chunk-LWECRCW2.cjs.map +1 -0
  37. package/dist/chunk-MLBMYKCG.js +6379 -0
  38. package/dist/chunk-MLBMYKCG.js.map +1 -0
  39. package/dist/chunk-OX6QXFMV.cjs +3874 -0
  40. package/dist/chunk-OX6QXFMV.cjs.map +1 -0
  41. package/dist/chunk-PHV2DKRS.cjs +1072 -0
  42. package/dist/chunk-PHV2DKRS.cjs.map +1 -0
  43. package/dist/chunk-PL7TAYKI.js +4056 -0
  44. package/dist/chunk-PL7TAYKI.js.map +1 -0
  45. package/dist/chunk-PR647I7R.js +1898 -0
  46. package/dist/chunk-PR647I7R.js.map +1 -0
  47. package/dist/chunk-QERHVCHC.cjs +2960 -0
  48. package/dist/chunk-QERHVCHC.cjs.map +1 -0
  49. package/dist/chunk-XEG44RF6.cjs +1514 -0
  50. package/dist/chunk-XEG44RF6.cjs.map +1 -0
  51. package/dist/chunk-XMWVME2W.js +2377 -0
  52. package/dist/chunk-XMWVME2W.js.map +1 -0
  53. package/dist/chunk-ZB75FESB.cjs +1979 -0
  54. package/dist/chunk-ZB75FESB.cjs.map +1 -0
  55. package/dist/chunk-ZLW62TJG.cjs +4061 -0
  56. package/dist/chunk-ZLW62TJG.cjs.map +1 -0
  57. package/dist/chunk-ZXKBDFP3.js +4235 -0
  58. package/dist/chunk-ZXKBDFP3.js.map +1 -0
  59. package/dist/core/index.cjs +204 -0
  60. package/dist/core/index.cjs.map +1 -0
  61. package/dist/core/index.d.cts +2 -0
  62. package/dist/core/index.d.ts +2 -0
  63. package/dist/core/index.js +3 -0
  64. package/dist/core/index.js.map +1 -0
  65. package/dist/dataframe/index.cjs +22 -0
  66. package/dist/dataframe/index.cjs.map +1 -0
  67. package/dist/dataframe/index.d.cts +3 -0
  68. package/dist/dataframe/index.d.ts +3 -0
  69. package/dist/dataframe/index.js +5 -0
  70. package/dist/dataframe/index.js.map +1 -0
  71. package/dist/datasets/index.cjs +134 -0
  72. package/dist/datasets/index.cjs.map +1 -0
  73. package/dist/datasets/index.d.cts +3 -0
  74. package/dist/datasets/index.d.ts +3 -0
  75. package/dist/datasets/index.js +5 -0
  76. package/dist/datasets/index.js.map +1 -0
  77. package/dist/index-74AB8Cyh.d.cts +1126 -0
  78. package/dist/index-9oQx1HgV.d.cts +1180 -0
  79. package/dist/index-BJY2SI4i.d.ts +483 -0
  80. package/dist/index-BWGhrDlr.d.ts +733 -0
  81. package/dist/index-B_DK4FKY.d.cts +242 -0
  82. package/dist/index-BbA2Gxfl.d.ts +456 -0
  83. package/dist/index-BgHYAoSS.d.cts +837 -0
  84. package/dist/index-BndMbqsM.d.ts +1439 -0
  85. package/dist/index-C1mfVYoo.d.ts +2517 -0
  86. package/dist/index-CCvlwAmL.d.cts +809 -0
  87. package/dist/index-CDw5CnOU.d.ts +785 -0
  88. package/dist/index-Cn3SdB0O.d.ts +1126 -0
  89. package/dist/index-CrqLlS-a.d.ts +776 -0
  90. package/dist/index-D61yaSMY.d.cts +483 -0
  91. package/dist/index-D9Loo1_A.d.cts +2517 -0
  92. package/dist/index-DIT_OO9C.d.cts +785 -0
  93. package/dist/index-DIp_RrRt.d.ts +242 -0
  94. package/dist/index-DbultU6X.d.cts +1427 -0
  95. package/dist/index-DmEg_LCm.d.cts +776 -0
  96. package/dist/index-DoPWVxPo.d.cts +1439 -0
  97. package/dist/index-DuCxd-8d.d.ts +837 -0
  98. package/dist/index-Dx42TZaY.d.ts +809 -0
  99. package/dist/index-DyZ4QQf5.d.cts +456 -0
  100. package/dist/index-GFAVyOWO.d.ts +1427 -0
  101. package/dist/index-WHQLn0e8.d.cts +733 -0
  102. package/dist/index-ZtI1Iy4L.d.ts +1180 -0
  103. package/dist/index-eJgeni9c.d.cts +1911 -0
  104. package/dist/index-tk4lSYod.d.ts +1911 -0
  105. package/dist/index.cjs +72 -0
  106. package/dist/index.cjs.map +1 -0
  107. package/dist/index.d.cts +17 -0
  108. package/dist/index.d.ts +17 -0
  109. package/dist/index.js +15 -0
  110. package/dist/index.js.map +1 -0
  111. package/dist/linalg/index.cjs +86 -0
  112. package/dist/linalg/index.cjs.map +1 -0
  113. package/dist/linalg/index.d.cts +3 -0
  114. package/dist/linalg/index.d.ts +3 -0
  115. package/dist/linalg/index.js +5 -0
  116. package/dist/linalg/index.js.map +1 -0
  117. package/dist/metrics/index.cjs +158 -0
  118. package/dist/metrics/index.cjs.map +1 -0
  119. package/dist/metrics/index.d.cts +3 -0
  120. package/dist/metrics/index.d.ts +3 -0
  121. package/dist/metrics/index.js +5 -0
  122. package/dist/metrics/index.js.map +1 -0
  123. package/dist/ml/index.cjs +87 -0
  124. package/dist/ml/index.cjs.map +1 -0
  125. package/dist/ml/index.d.cts +3 -0
  126. package/dist/ml/index.d.ts +3 -0
  127. package/dist/ml/index.js +6 -0
  128. package/dist/ml/index.js.map +1 -0
  129. package/dist/ndarray/index.cjs +501 -0
  130. package/dist/ndarray/index.cjs.map +1 -0
  131. package/dist/ndarray/index.d.cts +5 -0
  132. package/dist/ndarray/index.d.ts +5 -0
  133. package/dist/ndarray/index.js +4 -0
  134. package/dist/ndarray/index.js.map +1 -0
  135. package/dist/nn/index.cjs +142 -0
  136. package/dist/nn/index.cjs.map +1 -0
  137. package/dist/nn/index.d.cts +6 -0
  138. package/dist/nn/index.d.ts +6 -0
  139. package/dist/nn/index.js +5 -0
  140. package/dist/nn/index.js.map +1 -0
  141. package/dist/optim/index.cjs +77 -0
  142. package/dist/optim/index.cjs.map +1 -0
  143. package/dist/optim/index.d.cts +4 -0
  144. package/dist/optim/index.d.ts +4 -0
  145. package/dist/optim/index.js +4 -0
  146. package/dist/optim/index.js.map +1 -0
  147. package/dist/plot/index.cjs +114 -0
  148. package/dist/plot/index.cjs.map +1 -0
  149. package/dist/plot/index.d.cts +6 -0
  150. package/dist/plot/index.d.ts +6 -0
  151. package/dist/plot/index.js +5 -0
  152. package/dist/plot/index.js.map +1 -0
  153. package/dist/preprocess/index.cjs +82 -0
  154. package/dist/preprocess/index.cjs.map +1 -0
  155. package/dist/preprocess/index.d.cts +4 -0
  156. package/dist/preprocess/index.d.ts +4 -0
  157. package/dist/preprocess/index.js +5 -0
  158. package/dist/preprocess/index.js.map +1 -0
  159. package/dist/random/index.cjs +74 -0
  160. package/dist/random/index.cjs.map +1 -0
  161. package/dist/random/index.d.cts +3 -0
  162. package/dist/random/index.d.ts +3 -0
  163. package/dist/random/index.js +5 -0
  164. package/dist/random/index.js.map +1 -0
  165. package/dist/stats/index.cjs +142 -0
  166. package/dist/stats/index.cjs.map +1 -0
  167. package/dist/stats/index.d.cts +3 -0
  168. package/dist/stats/index.d.ts +3 -0
  169. package/dist/stats/index.js +5 -0
  170. package/dist/stats/index.js.map +1 -0
  171. package/dist/tensor-B96jjJLQ.d.cts +205 -0
  172. package/dist/tensor-B96jjJLQ.d.ts +205 -0
  173. package/package.json +226 -0
@@ -0,0 +1,4056 @@
1
+ import { tensor, reshape } from './chunk-BCR7G3A6.js';
2
+ import { __export, DataValidationError, IndexError, InvalidParameterError, normalizeAxis } from './chunk-4S73VUBD.js';
3
+
4
+ // src/dataframe/index.ts
5
+ var dataframe_exports = {};
6
+ __export(dataframe_exports, {
7
+ DataFrame: () => DataFrame,
8
+ DataFrameGroupBy: () => DataFrameGroupBy,
9
+ Series: () => Series
10
+ });
11
+
12
+ // src/dataframe/utils.ts
13
+ var isRecord = (value) => typeof value === "object" && value !== null && !Array.isArray(value);
14
+ var createKey = (value) => {
15
+ if (value === null) return "null";
16
+ if (value === void 0) return "undefined";
17
+ const type = typeof value;
18
+ if (type === "number") {
19
+ if (Number.isNaN(value)) return "NaN";
20
+ if (value === Infinity) return "Infinity";
21
+ if (value === -Infinity) return "-Infinity";
22
+ return `n:${value}`;
23
+ }
24
+ if (type === "string") {
25
+ return `s:${value}`;
26
+ }
27
+ if (type === "boolean") {
28
+ return `b:${value}`;
29
+ }
30
+ if (type === "bigint") {
31
+ return `bi:${value.toString()}`;
32
+ }
33
+ if (Array.isArray(value)) {
34
+ return `[${value.map(createKey).join(",")}]`;
35
+ }
36
+ if (isRecord(value)) {
37
+ const keys = Object.keys(value).sort();
38
+ const parts = keys.map((k) => `${createKey(k)}:${createKey(value[k])}`);
39
+ return `{${parts.join(",")}}`;
40
+ }
41
+ return String(value);
42
+ };
43
+ var isValidNumber = (value) => {
44
+ return typeof value === "number" && !Number.isNaN(value) && Number.isFinite(value);
45
+ };
46
+
47
+ // src/dataframe/Series.ts
48
+ var Series = class _Series {
49
+ // Internal storage for the actual data values
50
+ _data;
51
+ // Internal storage for index labels (can be strings or numbers)
52
+ _index;
53
+ // Fast label -> position lookup for O(1) label-based access
54
+ _indexPos;
55
+ // Optional name for this Series
56
+ _name;
57
+ /**
58
+ * Creates a new Series instance.
59
+ *
60
+ * @param data - Array of values to store in the Series
61
+ * @param options - Configuration options
62
+ * @param options.index - Custom index labels (defaults to 0, 1, 2, ...)
63
+ * @param options.name - Optional name for the Series
64
+ *
65
+ * @example
66
+ * ```ts
67
+ * const s = new Series([10, 20, 30], {
68
+ * index: ['a', 'b', 'c'],
69
+ * name: 'values'
70
+ * });
71
+ * ```
72
+ */
73
+ constructor(data, options = {}) {
74
+ this._data = options.copy === false ? data : [...data];
75
+ this._index = options.index ? options.copy === false ? options.index : [...options.index] : Array.from({ length: this._data.length }, (_, i) => i);
76
+ if (this._index.length !== this._data.length) {
77
+ throw new DataValidationError(
78
+ `Index length (${this._index.length}) must match data length (${this._data.length})`
79
+ );
80
+ }
81
+ this._indexPos = /* @__PURE__ */ new Map();
82
+ for (let i = 0; i < this._index.length; i++) {
83
+ const label = this._index[i];
84
+ if (label === void 0) {
85
+ throw new DataValidationError("Index labels cannot be undefined");
86
+ }
87
+ if (this._indexPos.has(label)) {
88
+ throw new DataValidationError(`Duplicate index label '${String(label)}' is not supported`);
89
+ }
90
+ this._indexPos.set(label, i);
91
+ }
92
+ this._name = options.name;
93
+ }
94
+ /**
95
+ * Get the underlying data array.
96
+ *
97
+ * @returns Read-only view of the data array
98
+ */
99
+ get data() {
100
+ return this._data;
101
+ }
102
+ /**
103
+ * Get the index labels.
104
+ *
105
+ * @returns Read-only view of the index array
106
+ */
107
+ get index() {
108
+ return this._index;
109
+ }
110
+ /**
111
+ * Get the Series name.
112
+ *
113
+ * @returns The name of this Series, or undefined if not set
114
+ */
115
+ get name() {
116
+ return this._name;
117
+ }
118
+ /**
119
+ * Get the number of elements in the Series.
120
+ *
121
+ * @returns Length of the Series
122
+ */
123
+ get length() {
124
+ return this._data.length;
125
+ }
126
+ /**
127
+ * Get a value by label.
128
+ *
129
+ * This method is an alias for `loc()`. It performs strict label-based lookup.
130
+ * For positional access, use `iloc()`.
131
+ *
132
+ * @param label - The index label to look up
133
+ * @returns The value at that label, or undefined if not found
134
+ *
135
+ * @example
136
+ * ```ts
137
+ * const s = new Series([10, 20, 30], { index: ['a', 'b', 'c'] });
138
+ * s.get('a'); // 10
139
+ * s.get('z'); // undefined
140
+ * ```
141
+ */
142
+ get(label) {
143
+ const position = this._indexPos.get(label);
144
+ return position === void 0 ? void 0 : this._data[position];
145
+ }
146
+ /**
147
+ * Access a value by label (label-based indexing).
148
+ *
149
+ * @param label - The index label to look up
150
+ * @returns The value at that label, or undefined if not found
151
+ *
152
+ * @example
153
+ * ```ts
154
+ * const s = new Series([10, 20], { index: ['a', 'b'] });
155
+ * s.loc('a'); // 10
156
+ * ```
157
+ */
158
+ loc(label) {
159
+ const position = this._indexPos.get(label);
160
+ return position === void 0 ? void 0 : this._data[position];
161
+ }
162
+ /**
163
+ * Access a value by integer position (position-based indexing).
164
+ *
165
+ * @param position - The integer position (0-based)
166
+ * @returns The value at that position, or undefined if out of bounds
167
+ * @throws {IndexError} If position is out of bounds
168
+ *
169
+ * @example
170
+ * ```ts
171
+ * const s = new Series([10, 20, 30]);
172
+ * s.iloc(0); // 10
173
+ * s.iloc(2); // 30
174
+ * ```
175
+ */
176
+ iloc(position) {
177
+ if (this._data.length === 0) {
178
+ throw new IndexError(`Series is empty`, {
179
+ index: position,
180
+ validRange: [0, 0]
181
+ });
182
+ }
183
+ if (position < 0 || position >= this._data.length) {
184
+ throw new IndexError(`Position ${position} is out of bounds (0-${this._data.length - 1})`, {
185
+ index: position,
186
+ validRange: [0, this._data.length - 1]
187
+ });
188
+ }
189
+ return this._data[position];
190
+ }
191
+ /**
192
+ * Return the first n elements.
193
+ *
194
+ * @param n - Number of elements to return (default: 5)
195
+ * @returns New Series with the first n elements
196
+ *
197
+ * @example
198
+ * ```ts
199
+ * const s = new Series([1, 2, 3, 4, 5, 6]);
200
+ * s.head(3); // Series([1, 2, 3])
201
+ * ```
202
+ */
203
+ head(n = 5) {
204
+ if (!Number.isFinite(n) || !Number.isInteger(n) || n < 0) {
205
+ throw new InvalidParameterError("n must be a non-negative integer", "n", n);
206
+ }
207
+ const options = {
208
+ index: this._index.slice(0, n)
209
+ };
210
+ if (this._name !== void 0) {
211
+ options.name = this._name;
212
+ }
213
+ return new _Series(this._data.slice(0, n), options);
214
+ }
215
+ /**
216
+ * Return the last n elements.
217
+ *
218
+ * @param n - Number of elements to return (default: 5)
219
+ * @returns New Series with the last n elements
220
+ *
221
+ * @example
222
+ * ```ts
223
+ * const s = new Series([1, 2, 3, 4, 5, 6]);
224
+ * s.tail(3); // Series([4, 5, 6])
225
+ * ```
226
+ */
227
+ tail(n = 5) {
228
+ if (!Number.isFinite(n) || !Number.isInteger(n) || n < 0) {
229
+ throw new InvalidParameterError("n must be a non-negative integer", "n", n);
230
+ }
231
+ const sliceStart = this._data.length - n;
232
+ const options = {
233
+ index: this._index.slice(sliceStart)
234
+ };
235
+ if (this._name !== void 0) {
236
+ options.name = this._name;
237
+ }
238
+ return new _Series(this._data.slice(sliceStart), options);
239
+ }
240
+ /**
241
+ * Filter Series by a boolean predicate function.
242
+ *
243
+ * Filters both data AND index to maintain alignment.
244
+ *
245
+ * @param predicate - Function that returns true for elements to keep
246
+ * @returns New Series with only elements that passed the predicate
247
+ *
248
+ * @example
249
+ * ```ts
250
+ * const s = new Series([1, 2, 3, 4, 5]);
251
+ * s.filter(x => x > 2); // Series([3, 4, 5])
252
+ * ```
253
+ */
254
+ filter(predicate) {
255
+ const filteredData = [];
256
+ const filteredIndex = [];
257
+ let dataIndex = 0;
258
+ for (const dataItem of this._data) {
259
+ const indexItem = this._index[dataIndex];
260
+ if (indexItem === void 0) {
261
+ throw new DataValidationError("Index labels cannot be undefined");
262
+ }
263
+ if (predicate(dataItem, dataIndex)) {
264
+ filteredData.push(dataItem);
265
+ filteredIndex.push(indexItem);
266
+ }
267
+ dataIndex++;
268
+ }
269
+ const options = {
270
+ index: filteredIndex
271
+ };
272
+ if (this._name !== void 0) {
273
+ options.name = this._name;
274
+ }
275
+ return new _Series(filteredData, options);
276
+ }
277
+ /**
278
+ * Transform each element using a mapping function.
279
+ *
280
+ * @template U - The type of the transformed values
281
+ * @param fn - Function to apply to each element
282
+ * @returns New Series with transformed values
283
+ *
284
+ * @example
285
+ * ```ts
286
+ * const s = new Series([1, 2, 3]);
287
+ * s.map(x => x * 2); // Series([2, 4, 6])
288
+ * ```
289
+ */
290
+ map(fn) {
291
+ const options = {
292
+ index: this._index
293
+ };
294
+ if (this._name !== void 0) {
295
+ options.name = this._name;
296
+ }
297
+ return new _Series(this._data.map(fn), options);
298
+ }
299
+ /**
300
+ * Sort the Series values.
301
+ *
302
+ * Preserves index-value mapping by sorting `[value, index]` pairs.
303
+ *
304
+ * @param ascending - Sort in ascending order (default: true)
305
+ * @returns New sorted Series with index reordered to match
306
+ *
307
+ * @example
308
+ * ```ts
309
+ * const s = new Series([3, 1, 2], { index: ['a', 'b', 'c'] });
310
+ * s.sort(); // Series([1, 2, 3]) with index ['b', 'c', 'a']
311
+ * ```
312
+ */
313
+ sort(ascending = true) {
314
+ const paired = [];
315
+ let pairIndex = 0;
316
+ for (const value of this._data) {
317
+ const idx = this._index[pairIndex];
318
+ if (idx === void 0) {
319
+ throw new DataValidationError("Index labels cannot be undefined");
320
+ }
321
+ paired.push([value, idx]);
322
+ pairIndex++;
323
+ }
324
+ paired.sort((a, b) => {
325
+ const aVal = a[0];
326
+ const bVal = b[0];
327
+ if (typeof aVal === "number" && typeof bVal === "number") {
328
+ const aIsNaN = Number.isNaN(aVal);
329
+ const bIsNaN = Number.isNaN(bVal);
330
+ if (aIsNaN && bIsNaN) return 0;
331
+ if (aIsNaN) return 1;
332
+ if (bIsNaN) return -1;
333
+ return ascending ? aVal - bVal : bVal - aVal;
334
+ }
335
+ if (typeof aVal === "string" && typeof bVal === "string") {
336
+ return ascending ? aVal.localeCompare(bVal) : bVal.localeCompare(aVal);
337
+ }
338
+ const aStr = String(aVal);
339
+ const bStr = String(bVal);
340
+ return ascending ? aStr.localeCompare(bStr) : bStr.localeCompare(aStr);
341
+ });
342
+ const sortedData = paired.map((p) => p[0]);
343
+ const sortedIndex = paired.map((p) => p[1]);
344
+ const options = {
345
+ index: sortedIndex
346
+ };
347
+ if (this._name !== void 0) {
348
+ options.name = this._name;
349
+ }
350
+ return new _Series(sortedData, options);
351
+ }
352
+ /**
353
+ * Get unique values in the Series.
354
+ *
355
+ * @returns Array of unique values (order preserved)
356
+ *
357
+ * @example
358
+ * ```ts
359
+ * const s = new Series([1, 2, 2, 3, 1]);
360
+ * s.unique(); // [1, 2, 3]
361
+ * ```
362
+ */
363
+ unique() {
364
+ return [...new Set(this._data)];
365
+ }
366
+ /**
367
+ * Count occurrences of unique values.
368
+ *
369
+ * Returns a Series where index is the unique values and data is their counts.
370
+ *
371
+ * @returns Series where index is unique values and data is their counts
372
+ *
373
+ * @example
374
+ * ```ts
375
+ * const s = new Series(['a', 'b', 'a', 'c', 'a']);
376
+ * s.valueCounts(); // Series([3, 1, 1]) with index ['a', 'b', 'c']
377
+ * ```
378
+ */
379
+ valueCounts() {
380
+ for (const v of this._data) {
381
+ if (typeof v !== "string" && typeof v !== "number" && v !== null && v !== void 0) {
382
+ throw new DataValidationError("Series.valueCounts() only supports Series<string | number>");
383
+ }
384
+ }
385
+ const counts = /* @__PURE__ */ new Map();
386
+ const keyToValue = /* @__PURE__ */ new Map();
387
+ for (const v of this._data) {
388
+ const key = createKey(v);
389
+ counts.set(key, (counts.get(key) ?? 0) + 1);
390
+ if (!keyToValue.has(key)) {
391
+ keyToValue.set(key, v);
392
+ }
393
+ }
394
+ const sortedKeys = [...counts.keys()].sort((a, b) => {
395
+ const countA = counts.get(a) ?? 0;
396
+ const countB = counts.get(b) ?? 0;
397
+ return countB - countA;
398
+ });
399
+ const values = sortedKeys.map((k) => counts.get(k) ?? 0);
400
+ const index = sortedKeys.map((k) => {
401
+ const val = keyToValue.get(k);
402
+ if (typeof val === "string" || typeof val === "number") {
403
+ return val;
404
+ }
405
+ return String(val);
406
+ });
407
+ return new _Series(values, {
408
+ index,
409
+ name: this._name ? `${this._name}_counts` : "counts"
410
+ });
411
+ }
412
+ /**
413
+ * Calculate the sum of all values.
414
+ *
415
+ * Skips null, undefined, and NaN values.
416
+ *
417
+ * @returns Sum of all numeric values.
418
+ * @throws {DataValidationError} If Series is empty or contains non-numeric data
419
+ *
420
+ * @example
421
+ * ```ts
422
+ * const s = new Series([1, 2, null, 3, 4]);
423
+ * s.sum(); // 10
424
+ * ```
425
+ */
426
+ sum() {
427
+ if (this._data.length === 0) {
428
+ throw new DataValidationError("Cannot get sum of empty Series");
429
+ }
430
+ let total = 0;
431
+ for (const val of this._data) {
432
+ if (val === null || val === void 0) continue;
433
+ if (typeof val !== "number") {
434
+ throw new DataValidationError("Series.sum() only works on numeric data");
435
+ }
436
+ if (Number.isNaN(val)) continue;
437
+ total += val;
438
+ }
439
+ return total;
440
+ }
441
+ /**
442
+ * Calculate the arithmetic mean (average) of all values.
443
+ *
444
+ * Skips null, undefined, and NaN values.
445
+ *
446
+ * @returns Mean of all numeric values.
447
+ * @throws {DataValidationError} If Series is empty or contains non-numeric data
448
+ *
449
+ * @example
450
+ * ```ts
451
+ * const s = new Series([1, 2, null, 3, 4]);
452
+ * s.mean(); // 2.5
453
+ * ```
454
+ */
455
+ mean() {
456
+ if (this._data.length === 0) {
457
+ throw new DataValidationError("Cannot get mean of empty Series");
458
+ }
459
+ let total = 0;
460
+ let count = 0;
461
+ for (const val of this._data) {
462
+ if (val === null || val === void 0) continue;
463
+ if (typeof val !== "number") {
464
+ throw new DataValidationError("Series.mean() only works on numeric data");
465
+ }
466
+ if (Number.isNaN(val)) continue;
467
+ total += val;
468
+ count++;
469
+ }
470
+ return count > 0 ? total / count : NaN;
471
+ }
472
+ /**
473
+ * Calculate the median (middle value) of all values.
474
+ *
475
+ * Skips null, undefined, and NaN values.
476
+ * For even-length Series, returns the average of the two middle values.
477
+ *
478
+ * @returns Median value.
479
+ * @throws {DataValidationError} If Series is empty or contains non-numeric data
480
+ *
481
+ * @example
482
+ * ```ts
483
+ * const s = new Series([1, 2, 3, 4, 5]);
484
+ * s.median(); // 3
485
+ * ```
486
+ */
487
+ median() {
488
+ if (this._data.length === 0) {
489
+ throw new DataValidationError("Cannot get median of empty Series");
490
+ }
491
+ const numericData = [];
492
+ for (const value of this._data) {
493
+ if (value === null || value === void 0) continue;
494
+ if (typeof value !== "number") {
495
+ throw new DataValidationError("Series.median() only works on numeric data");
496
+ }
497
+ if (!Number.isNaN(value)) {
498
+ numericData.push(value);
499
+ }
500
+ }
501
+ if (numericData.length === 0) {
502
+ return NaN;
503
+ }
504
+ const sorted = [...numericData].sort((a, b) => a - b);
505
+ const middle = Math.floor(sorted.length / 2);
506
+ if (sorted.length % 2 === 0) {
507
+ const val1 = sorted[middle - 1];
508
+ const val2 = sorted[middle];
509
+ if (val1 === void 0 || val2 === void 0) {
510
+ return NaN;
511
+ }
512
+ return (val1 + val2) / 2;
513
+ }
514
+ const val = sorted[middle];
515
+ return val !== void 0 ? val : NaN;
516
+ }
517
+ /**
518
+ * Calculate the standard deviation of all values.
519
+ *
520
+ * Skips null, undefined, and NaN values.
521
+ * Uses sample standard deviation (divides by n-1).
522
+ *
523
+ * @returns Standard deviation.
524
+ * @throws {DataValidationError} If Series is empty or contains non-numeric data
525
+ *
526
+ * @example
527
+ * ```ts
528
+ * const s = new Series([2, 4, 6, 8]);
529
+ * s.std(); // ~2.58
530
+ * ```
531
+ */
532
+ std() {
533
+ if (this._data.length === 0) {
534
+ throw new DataValidationError("Cannot get std of empty Series");
535
+ }
536
+ const numericData = [];
537
+ for (const value of this._data) {
538
+ if (value === null || value === void 0) continue;
539
+ if (typeof value !== "number") {
540
+ throw new DataValidationError("Series.std() only works on numeric data");
541
+ }
542
+ if (!Number.isNaN(value)) {
543
+ numericData.push(value);
544
+ }
545
+ }
546
+ if (numericData.length < 2) {
547
+ return NaN;
548
+ }
549
+ const sum = numericData.reduce((acc, val) => acc + val, 0);
550
+ const meanVal = sum / numericData.length;
551
+ let sumSquaredDiff = 0;
552
+ for (const val of numericData) {
553
+ const diff = val - meanVal;
554
+ sumSquaredDiff += diff * diff;
555
+ }
556
+ return Math.sqrt(sumSquaredDiff / (numericData.length - 1));
557
+ }
558
+ /**
559
+ * Calculate the variance of all values.
560
+ *
561
+ * Skips null, undefined, and NaN values.
562
+ * Uses sample variance (divides by n-1).
563
+ *
564
+ * @returns Variance.
565
+ * @throws {DataValidationError} If Series is empty or contains non-numeric data
566
+ *
567
+ * @example
568
+ * ```ts
569
+ * const s = new Series([2, 4, 6, 8]);
570
+ * s.var(); // ~6.67
571
+ * ```
572
+ */
573
+ var() {
574
+ if (this._data.length === 0) {
575
+ throw new DataValidationError("Cannot get variance of empty Series");
576
+ }
577
+ const numericData = [];
578
+ for (const value of this._data) {
579
+ if (value === null || value === void 0) continue;
580
+ if (typeof value !== "number") {
581
+ throw new DataValidationError("Series.var() only works on numeric data");
582
+ }
583
+ if (!Number.isNaN(value)) {
584
+ numericData.push(value);
585
+ }
586
+ }
587
+ if (numericData.length < 2) {
588
+ return NaN;
589
+ }
590
+ const sum = numericData.reduce((acc, val) => acc + val, 0);
591
+ const meanVal = sum / numericData.length;
592
+ let sumSquaredDiff = 0;
593
+ for (const val of numericData) {
594
+ const diff = val - meanVal;
595
+ sumSquaredDiff += diff * diff;
596
+ }
597
+ return sumSquaredDiff / (numericData.length - 1);
598
+ }
599
+ /**
600
+ * Find the minimum value in the Series.
601
+ *
602
+ * Skips null, undefined, and NaN values.
603
+ *
604
+ * @returns Minimum value.
605
+ * @throws {DataValidationError} If Series is empty or contains non-numeric data
606
+ *
607
+ * @example
608
+ * ```ts
609
+ * const s = new Series([5, 2, 8, 1, 9]);
610
+ * s.min(); // 1
611
+ * ```
612
+ */
613
+ min() {
614
+ if (this._data.length === 0) {
615
+ throw new DataValidationError("Cannot get min of empty Series");
616
+ }
617
+ let minVal = Infinity;
618
+ let hasNumeric = false;
619
+ for (const val of this._data) {
620
+ if (val === null || val === void 0) continue;
621
+ if (typeof val !== "number") {
622
+ throw new DataValidationError("Series.min() only works on numeric data");
623
+ }
624
+ if (!Number.isNaN(val)) {
625
+ if (val < minVal) {
626
+ minVal = val;
627
+ }
628
+ hasNumeric = true;
629
+ }
630
+ }
631
+ return hasNumeric ? minVal : NaN;
632
+ }
633
+ /**
634
+ * Find the maximum value in the Series.
635
+ *
636
+ * Skips null, undefined, and NaN values.
637
+ *
638
+ * @returns Maximum value.
639
+ * @throws {DataValidationError} If Series is empty or contains non-numeric data
640
+ *
641
+ * @example
642
+ * ```ts
643
+ * const s = new Series([5, 2, 8, 1, 9]);
644
+ * s.max(); // 9
645
+ * ```
646
+ */
647
+ max() {
648
+ if (this._data.length === 0) {
649
+ throw new DataValidationError("Cannot get max of empty Series");
650
+ }
651
+ let maxVal = -Infinity;
652
+ let hasNumeric = false;
653
+ for (const val of this._data) {
654
+ if (val === null || val === void 0) continue;
655
+ if (typeof val !== "number") {
656
+ throw new DataValidationError("Series.max() only works on numeric data");
657
+ }
658
+ if (!Number.isNaN(val)) {
659
+ if (val > maxVal) {
660
+ maxVal = val;
661
+ }
662
+ hasNumeric = true;
663
+ }
664
+ }
665
+ return hasNumeric ? maxVal : NaN;
666
+ }
667
+ /**
668
+ * Convert the Series to a plain JavaScript array.
669
+ *
670
+ * Returns a shallow copy of the data.
671
+ *
672
+ * @returns Array copy of the data
673
+ *
674
+ * @example
675
+ * ```ts
676
+ * const s = new Series([1, 2, 3]);
677
+ * const arr = s.toArray(); // [1, 2, 3]
678
+ * ```
679
+ */
680
+ toArray() {
681
+ return [...this._data];
682
+ }
683
+ /**
684
+ * Convert the Series to an ndarray Tensor.
685
+ *
686
+ * Uses the `tensor()` factory function.
687
+ *
688
+ * @returns Tensor containing the Series data
689
+ * @throws {DataValidationError} If data cannot be converted to Tensor
690
+ *
691
+ * @example
692
+ * ```ts
693
+ * import { Series } from 'deepbox/dataframe';
694
+ *
695
+ * const s = new Series([1, 2, 3, 4]);
696
+ * const t = s.toTensor(); // Tensor([1, 2, 3, 4])
697
+ * ```
698
+ */
699
+ toTensor() {
700
+ const numeric = [];
701
+ for (const v of this._data) {
702
+ if (typeof v === "number") {
703
+ numeric.push(v);
704
+ } else if (v === null || v === void 0) {
705
+ numeric.push(NaN);
706
+ } else {
707
+ throw new DataValidationError(
708
+ "Series.toTensor() only works on numeric data (or null/undefined)"
709
+ );
710
+ }
711
+ }
712
+ return tensor(numeric);
713
+ }
714
+ /**
715
+ * Return a human-readable string representation of this Series.
716
+ *
717
+ * Each row is printed as `index value`, with an optional name/dtype
718
+ * footer. Large Series are truncated with an ellipsis.
719
+ *
720
+ * @param maxRows - Maximum rows to display before summarizing (default: 20).
721
+ * @returns Formatted string representation
722
+ *
723
+ * @example
724
+ * ```ts
725
+ * const s = new Series([10, 20, 30], { name: 'values' });
726
+ * s.toString();
727
+ * // "0 10\n1 20\n2 30\nName: values, Length: 3"
728
+ * ```
729
+ */
730
+ toString(maxRows = 20) {
731
+ const n = this._data.length;
732
+ const half = Math.floor(maxRows / 2);
733
+ const showAll = n <= maxRows;
734
+ const rows = [];
735
+ const topCount = showAll ? n : half;
736
+ const bottomCount = showAll ? 0 : half;
737
+ for (let i = 0; i < topCount; i++) {
738
+ const idx = this._index[i];
739
+ const val = this._data[i];
740
+ rows.push([String(idx ?? i), val === null || val === void 0 ? "null" : String(val)]);
741
+ }
742
+ if (!showAll) {
743
+ rows.push(["...", "..."]);
744
+ for (let i = n - bottomCount; i < n; i++) {
745
+ const idx = this._index[i];
746
+ const val = this._data[i];
747
+ rows.push([String(idx ?? i), val === null || val === void 0 ? "null" : String(val)]);
748
+ }
749
+ }
750
+ let idxWidth = 0;
751
+ let valWidth = 0;
752
+ for (const [idx, val] of rows) {
753
+ if ((idx ?? "").length > idxWidth) idxWidth = (idx ?? "").length;
754
+ if ((val ?? "").length > valWidth) valWidth = (val ?? "").length;
755
+ }
756
+ const lines = [];
757
+ for (const [idx, val] of rows) {
758
+ lines.push(`${(idx ?? "").padStart(idxWidth)} ${val ?? ""}`);
759
+ }
760
+ const parts = [];
761
+ if (this._name !== void 0) parts.push(`Name: ${this._name}`);
762
+ parts.push(`Length: ${n}`);
763
+ lines.push(parts.join(", "));
764
+ return lines.join("\n");
765
+ }
766
+ };
767
+
768
+ // src/dataframe/DataFrame.ts
769
+ var isNumberValue = (value) => typeof value === "number";
770
+ var isIndexLabel = (value) => typeof value === "string" || typeof value === "number";
771
+ var isStringArray = (value) => Array.isArray(value) && value.every((entry) => typeof entry === "string");
772
+ var isIndexLabelArray = (value) => Array.isArray(value) && value.every(isIndexLabel);
773
+ var ensureUniqueLabels = (labels, labelName) => {
774
+ const seen = /* @__PURE__ */ new Set();
775
+ for (const label of labels) {
776
+ if (seen.has(label)) {
777
+ throw new DataValidationError(`Duplicate ${labelName} '${label}' is not supported`);
778
+ }
779
+ seen.add(label);
780
+ }
781
+ };
782
+ var toNumericValues = (values) => values.filter(isValidNumber);
783
+ var DataFrame = class _DataFrame {
784
+ // Internal storage: Map of column names to data arrays
785
+ _data;
786
+ // Row labels (can be strings or numbers)
787
+ _index;
788
+ // Fast label -> position lookup for O(1) loc() access
789
+ _indexPos;
790
+ // Column names
791
+ _columns;
792
+ /**
793
+ * Creates a new DataFrame instance.
794
+ *
795
+ * @param data - Object mapping column names to arrays of values.
796
+ * All arrays must have the same length.
797
+ * @param options - Configuration options
798
+ * @param options.columns - Custom column order (defaults to Object.keys(data))
799
+ * @param options.index - Custom row labels (defaults to 0, 1, 2, ...)
800
+ *
801
+ * @example
802
+ * ```ts
803
+ * const df = new DataFrame({
804
+ * col1: [1, 2, 3],
805
+ * col2: ['a', 'b', 'c']
806
+ * }, {
807
+ * index: ['row1', 'row2', 'row3']
808
+ * });
809
+ * ```
810
+ */
811
+ constructor(data, options = {}) {
812
+ this._columns = options.columns ? [...options.columns] : Object.keys(data);
813
+ ensureUniqueLabels(this._columns, "column name");
814
+ for (const col of this._columns) {
815
+ if (!(col in data)) {
816
+ throw new DataValidationError(`Column '${col}' not found in DataFrame data`);
817
+ }
818
+ }
819
+ let firstColumnLength = 0;
820
+ if (this._columns.length > 0) {
821
+ const firstCol = this._columns[0];
822
+ if (firstCol === void 0) {
823
+ throw new DataValidationError("First column is undefined");
824
+ }
825
+ const firstColData = data[firstCol];
826
+ if (!Array.isArray(firstColData)) {
827
+ throw new DataValidationError(`Column '${firstCol}' must be an array`);
828
+ }
829
+ firstColumnLength = firstColData.length;
830
+ }
831
+ this._index = options.index ? options.copy === false ? options.index : [...options.index] : Array.from({ length: firstColumnLength }, (_, i) => i);
832
+ if (this._columns.length > 0 && this._index.length !== firstColumnLength) {
833
+ throw new DataValidationError(
834
+ `Index length (${this._index.length}) must match row count (${firstColumnLength})`
835
+ );
836
+ }
837
+ this._indexPos = /* @__PURE__ */ new Map();
838
+ for (let i = 0; i < this._index.length; i++) {
839
+ const label = this._index[i];
840
+ if (label === void 0) {
841
+ throw new DataValidationError(`Index label at position ${i} is undefined`);
842
+ }
843
+ if (this._indexPos.has(label)) {
844
+ throw new DataValidationError(`Duplicate index label '${String(label)}' is not supported`);
845
+ }
846
+ this._indexPos.set(label, i);
847
+ }
848
+ this._data = /* @__PURE__ */ new Map();
849
+ for (const col of this._columns) {
850
+ const colData = data[col];
851
+ if (!Array.isArray(colData)) {
852
+ throw new DataValidationError(`Column '${col}' not found in DataFrame data`);
853
+ }
854
+ if (colData.length !== firstColumnLength) {
855
+ throw new DataValidationError(
856
+ `Column '${col}' length (${colData.length}) must match row count (${firstColumnLength})`
857
+ );
858
+ }
859
+ this._data.set(col, options.copy === false ? colData : [...colData]);
860
+ }
861
+ }
862
+ /**
863
+ * Get the dimensions of the DataFrame.
864
+ *
865
+ * @returns Tuple of [rows, columns]
866
+ *
867
+ * @example
868
+ * ```ts
869
+ * const df = new DataFrame({ a: [1, 2, 3], b: [4, 5, 6] });
870
+ * df.shape; // [3, 2]
871
+ * ```
872
+ */
873
+ get shape() {
874
+ return [this._index.length, this._columns.length];
875
+ }
876
+ /**
877
+ * Get the column names.
878
+ *
879
+ * @returns Array of column names (copy)
880
+ */
881
+ get columns() {
882
+ return [...this._columns];
883
+ }
884
+ /**
885
+ * Get the row index labels.
886
+ *
887
+ * @returns Array of index labels (copy)
888
+ */
889
+ get index() {
890
+ return [...this._index];
891
+ }
892
+ get(column, guard) {
893
+ const data = this._data.get(column);
894
+ if (data === void 0) {
895
+ throw new InvalidParameterError(
896
+ `Column '${column}' not found in DataFrame`,
897
+ "column",
898
+ column
899
+ );
900
+ }
901
+ if (guard) {
902
+ const validated = [];
903
+ for (const value of data) {
904
+ if (!guard(value)) {
905
+ throw new DataValidationError(
906
+ `Column '${column}' contains values that do not match the requested type`
907
+ );
908
+ }
909
+ validated.push(value);
910
+ }
911
+ return new Series(validated, {
912
+ index: this._index,
913
+ name: column,
914
+ copy: false
915
+ });
916
+ }
917
+ return new Series(data, {
918
+ index: this._index,
919
+ name: column,
920
+ copy: false
921
+ });
922
+ }
923
+ /**
924
+ * Access a row by label (label-based indexing).
925
+ *
926
+ * @param row - The index label of the row
927
+ * @returns Object mapping column names to values for that row
928
+ * @throws {IndexError} If row label not found
929
+ *
930
+ * @example
931
+ * ```ts
932
+ * const df = new DataFrame(
933
+ * { age: [25, 30], name: ['Alice', 'Bob'] },
934
+ * { index: ['row1', 'row2'] }
935
+ * );
936
+ * df.loc('row1'); // { age: 25, name: 'Alice' }
937
+ * ```
938
+ */
939
+ loc(row) {
940
+ const position = this._indexPos.get(row) ?? -1;
941
+ if (position === -1) {
942
+ throw new IndexError(`Row label '${row}' not found in index`);
943
+ }
944
+ const result = {};
945
+ for (const col of this._columns) {
946
+ const colData = this._data.get(col);
947
+ if (colData) {
948
+ result[col] = colData[position];
949
+ }
950
+ }
951
+ return result;
952
+ }
953
+ /**
954
+ * Access a row by integer position (position-based indexing).
955
+ *
956
+ * @param position - The integer position (0-based)
957
+ * @returns Object mapping column names to values for that row
958
+ * @throws {IndexError} If position is out of bounds
959
+ *
960
+ * @example
961
+ * ```ts
962
+ * const df = new DataFrame({ age: [25, 30], name: ['Alice', 'Bob'] });
963
+ * df.iloc(0); // { age: 25, name: 'Alice' }
964
+ * df.iloc(1); // { age: 30, name: 'Bob' }
965
+ * ```
966
+ */
967
+ iloc(position) {
968
+ if (this._index.length === 0) {
969
+ throw new IndexError(`DataFrame is empty`, {
970
+ index: position,
971
+ validRange: [0, 0]
972
+ });
973
+ }
974
+ if (position < 0 || position >= this._index.length) {
975
+ throw new IndexError(`Position ${position} is out of bounds (0-${this._index.length - 1})`, {
976
+ index: position,
977
+ validRange: [0, this._index.length - 1]
978
+ });
979
+ }
980
+ const result = {};
981
+ for (const col of this._columns) {
982
+ const colData = this._data.get(col);
983
+ if (colData) {
984
+ result[col] = colData[position];
985
+ }
986
+ }
987
+ return result;
988
+ }
989
+ /**
990
+ * Return the first n rows.
991
+ *
992
+ * @param n - Number of rows to return (default: 5)
993
+ * @returns New DataFrame with first n rows
994
+ *
995
+ * @example
996
+ * ```ts
997
+ * const df = new DataFrame({ a: [1, 2, 3, 4, 5], b: [6, 7, 8, 9, 10] });
998
+ * df.head(3); // DataFrame with rows 0-2
999
+ * ```
1000
+ */
1001
+ head(n = 5) {
1002
+ if (!Number.isFinite(n) || !Number.isInteger(n) || n < 0) {
1003
+ throw new InvalidParameterError("n must be a non-negative integer", "n", n);
1004
+ }
1005
+ const newData = {};
1006
+ for (const col of this._columns) {
1007
+ const colData = this._data.get(col);
1008
+ newData[col] = colData ? colData.slice(0, n) : [];
1009
+ }
1010
+ return new _DataFrame(newData, {
1011
+ columns: this._columns,
1012
+ index: this._index.slice(0, n)
1013
+ });
1014
+ }
1015
+ /**
1016
+ * Return the last n rows.
1017
+ *
1018
+ * @param n - Number of rows to return (default: 5)
1019
+ * @returns New DataFrame with last n rows
1020
+ *
1021
+ * @example
1022
+ * ```ts
1023
+ * const df = new DataFrame({ a: [1, 2, 3, 4, 5], b: [6, 7, 8, 9, 10] });
1024
+ * df.tail(3); // DataFrame with rows 2-4
1025
+ * ```
1026
+ */
1027
+ tail(n = 5) {
1028
+ if (!Number.isFinite(n) || !Number.isInteger(n) || n < 0) {
1029
+ throw new InvalidParameterError("n must be a non-negative integer", "n", n);
1030
+ }
1031
+ const sliceStart = this._index.length - n;
1032
+ const newData = {};
1033
+ for (const col of this._columns) {
1034
+ const colData = this._data.get(col);
1035
+ newData[col] = colData ? colData.slice(sliceStart) : [];
1036
+ }
1037
+ return new _DataFrame(newData, {
1038
+ columns: this._columns,
1039
+ index: this._index.slice(sliceStart)
1040
+ });
1041
+ }
1042
+ /**
1043
+ * Filter rows based on a boolean predicate function.
1044
+ *
1045
+ * @param predicate - Function that returns true for rows to keep
1046
+ * @returns New DataFrame with filtered rows
1047
+ *
1048
+ * @example
1049
+ * ```ts
1050
+ * const df = new DataFrame({ age: [25, 30, 35], name: ['Alice', 'Bob', 'Carol'] });
1051
+ * const filtered = df.filter(row => row.age > 28);
1052
+ * // DataFrame with Bob and Carol
1053
+ * ```
1054
+ */
1055
+ filter(predicate) {
1056
+ const nCols = this._columns.length;
1057
+ const nRows = this._index.length;
1058
+ const colArrays = new Array(nCols);
1059
+ for (let c = 0; c < nCols; c++) {
1060
+ colArrays[c] = this._data.get(this._columns[c]) ?? [];
1061
+ }
1062
+ const matchIndices = [];
1063
+ const row = {};
1064
+ for (let i = 0; i < nRows; i++) {
1065
+ for (let c = 0; c < nCols; c++) {
1066
+ row[this._columns[c]] = colArrays[c][i];
1067
+ }
1068
+ if (predicate(row)) {
1069
+ matchIndices.push(i);
1070
+ }
1071
+ }
1072
+ const matchCount = matchIndices.length;
1073
+ const filteredData = {};
1074
+ for (let c = 0; c < nCols; c++) {
1075
+ const src = colArrays[c];
1076
+ const dst = new Array(matchCount);
1077
+ for (let m = 0; m < matchCount; m++) {
1078
+ dst[m] = src[matchIndices[m]];
1079
+ }
1080
+ filteredData[this._columns[c]] = dst;
1081
+ }
1082
+ const filteredIndex = new Array(matchCount);
1083
+ for (let m = 0; m < matchCount; m++) {
1084
+ filteredIndex[m] = this._index[matchIndices[m]];
1085
+ }
1086
+ return new _DataFrame(filteredData, {
1087
+ columns: this._columns,
1088
+ index: filteredIndex,
1089
+ copy: false
1090
+ });
1091
+ }
1092
+ /**
1093
+ * Select a subset of columns.
1094
+ *
1095
+ * @param columns - Array of column names to select
1096
+ * @returns New DataFrame with only specified columns
1097
+ * @throws {InvalidParameterError} If any column doesn't exist
1098
+ *
1099
+ * @example
1100
+ * ```ts
1101
+ * const df = new DataFrame({ a: [1, 2], b: [3, 4], c: [5, 6] });
1102
+ * df.select(['a', 'c']); // DataFrame with only columns a and c
1103
+ * ```
1104
+ */
1105
+ select(columns) {
1106
+ for (const col of columns) {
1107
+ if (!this._data.has(col)) {
1108
+ throw new InvalidParameterError(`Column '${col}' not found in DataFrame`, "columns", col);
1109
+ }
1110
+ }
1111
+ const newData = {};
1112
+ for (const col of columns) {
1113
+ const colData = this._data.get(col);
1114
+ newData[col] = colData ? colData.slice() : [];
1115
+ }
1116
+ return new _DataFrame(newData, {
1117
+ columns,
1118
+ index: this._index,
1119
+ copy: false
1120
+ });
1121
+ }
1122
+ /**
1123
+ * Drop (remove) specified columns.
1124
+ *
1125
+ * @param columns - Array of column names to drop
1126
+ * @returns New DataFrame without the dropped columns
1127
+ *
1128
+ * @example
1129
+ * ```ts
1130
+ * const df = new DataFrame({ a: [1, 2], b: [3, 4], c: [5, 6] });
1131
+ * df.drop(['b']); // DataFrame with only columns a and c
1132
+ * ```
1133
+ */
1134
+ drop(columns) {
1135
+ if (!isStringArray(columns)) {
1136
+ throw new InvalidParameterError("columns must be an array of strings", "columns", columns);
1137
+ }
1138
+ ensureUniqueLabels(columns, "column name");
1139
+ for (const col of columns) {
1140
+ if (!this._data.has(col)) {
1141
+ throw new InvalidParameterError(`Column '${col}' not found in DataFrame`, "columns", col);
1142
+ }
1143
+ }
1144
+ const columnsToKeep = this._columns.filter((col) => !columns.includes(col));
1145
+ const newData = {};
1146
+ for (const col of columnsToKeep) {
1147
+ const colData = this._data.get(col);
1148
+ newData[col] = colData ? [...colData] : [];
1149
+ }
1150
+ return new _DataFrame(newData, {
1151
+ columns: columnsToKeep,
1152
+ index: this._index
1153
+ });
1154
+ }
1155
+ /**
1156
+ * Sort DataFrame by one or more columns.
1157
+ *
1158
+ * @param by - Column name or array of column names to sort by
1159
+ * @param ascending - Sort in ascending order (default: true)
1160
+ * @returns New sorted DataFrame
1161
+ *
1162
+ * @example
1163
+ * ```ts
1164
+ * const df = new DataFrame({ age: [30, 25, 35], name: ['Bob', 'Alice', 'Carol'] });
1165
+ * df.sort('age'); // Sorted by age ascending
1166
+ * df.sort(['age'], false); // Sorted by age descending
1167
+ * ```
1168
+ */
1169
+ sort(by, ascending = true) {
1170
+ const sortCols = Array.isArray(by) ? by : [by];
1171
+ for (const col of sortCols) {
1172
+ if (!this._data.has(col)) {
1173
+ throw new InvalidParameterError(`Column '${col}' not found in DataFrame`, "by", col);
1174
+ }
1175
+ }
1176
+ const nRows = this._index.length;
1177
+ const sortColArrays = new Array(sortCols.length);
1178
+ for (let c = 0; c < sortCols.length; c++) {
1179
+ sortColArrays[c] = this._data.get(sortCols[c]) ?? [];
1180
+ }
1181
+ const indices = new Array(nRows);
1182
+ for (let i = 0; i < nRows; i++) indices[i] = i;
1183
+ indices.sort((ai, bi) => {
1184
+ for (let c = 0; c < sortColArrays.length; c++) {
1185
+ const colArr = sortColArrays[c];
1186
+ const aVal = colArr[ai];
1187
+ const bVal = colArr[bi];
1188
+ if (isNumberValue(aVal) && isNumberValue(bVal)) {
1189
+ const aIsNaN = Number.isNaN(aVal);
1190
+ const bIsNaN = Number.isNaN(bVal);
1191
+ if (aIsNaN && bIsNaN) continue;
1192
+ if (aIsNaN) return 1;
1193
+ if (bIsNaN) return -1;
1194
+ const diff = aVal - bVal;
1195
+ if (diff !== 0) return ascending ? diff : -diff;
1196
+ } else if (typeof aVal === "string" && typeof bVal === "string") {
1197
+ const cmp = aVal.localeCompare(bVal);
1198
+ if (cmp !== 0) return ascending ? cmp : -cmp;
1199
+ } else {
1200
+ const cmp = String(aVal).localeCompare(String(bVal));
1201
+ if (cmp !== 0) return ascending ? cmp : -cmp;
1202
+ }
1203
+ }
1204
+ return 0;
1205
+ });
1206
+ const sortedData = {};
1207
+ for (const col of this._columns) {
1208
+ const src = this._data.get(col) ?? [];
1209
+ const dst = new Array(nRows);
1210
+ for (let i = 0; i < nRows; i++) {
1211
+ dst[i] = src[indices[i]];
1212
+ }
1213
+ sortedData[col] = dst;
1214
+ }
1215
+ const sortedIndex = new Array(nRows);
1216
+ for (let i = 0; i < nRows; i++) {
1217
+ sortedIndex[i] = this._index[indices[i]];
1218
+ }
1219
+ return new _DataFrame(sortedData, {
1220
+ columns: this._columns,
1221
+ index: sortedIndex
1222
+ });
1223
+ }
1224
+ /**
1225
+ * Group DataFrame by one or more columns.
1226
+ *
1227
+ * Returns a DataFrameGroupBy object for performing aggregations.
1228
+ *
1229
+ * @param by - Column name or array of column names to group by
1230
+ * @returns DataFrameGroupBy object for aggregation operations
1231
+ *
1232
+ * @example
1233
+ * ```ts
1234
+ * const df = new DataFrame({
1235
+ * category: ['A', 'B', 'A', 'B'],
1236
+ * value: [10, 20, 30, 40]
1237
+ * });
1238
+ * const grouped = df.groupBy('category');
1239
+ * grouped.sum(); // Sum values by category
1240
+ * ```
1241
+ */
1242
+ groupBy(by) {
1243
+ return new DataFrameGroupBy(this, by);
1244
+ }
1245
+ /**
1246
+ * Join with another DataFrame using SQL-style join.
1247
+ *
1248
+ * Uses hash join algorithm for O(n + m) time complexity.
1249
+ * Optimized for large datasets with minimal memory overhead.
1250
+ *
1251
+ * @param other - DataFrame to join with
1252
+ * @param on - Column name to join on (must exist in both DataFrames)
1253
+ * @param how - Type of join operation
1254
+ * - 'inner': Only rows with matching keys in both DataFrames
1255
+ * - 'left': All rows from left, matched rows from right (nulls for non-matches)
1256
+ * - 'right': All rows from right, matched rows from left (nulls for non-matches)
1257
+ * - 'outer': All rows from both DataFrames (nulls for non-matches)
1258
+ * @returns New DataFrame with joined data
1259
+ *
1260
+ * @throws {InvalidParameterError} If join column doesn't exist in either DataFrame
1261
+ *
1262
+ * @example
1263
+ * ```ts
1264
+ * const customers = new DataFrame({
1265
+ * id: [1, 2, 3],
1266
+ * name: ['Alice', 'Bob', 'Charlie']
1267
+ * });
1268
+ * const orders = new DataFrame({
1269
+ * id: [1, 1, 2, 4],
1270
+ * product: ['Laptop', 'Mouse', 'Keyboard', 'Monitor']
1271
+ * });
1272
+ *
1273
+ * // Inner join - only customers with orders
1274
+ * const inner = customers.join(orders, 'id', 'inner');
1275
+ * // Result: Alice with 2 orders, Bob with 1 order
1276
+ *
1277
+ * // Left join - all customers, with/without orders
1278
+ * const left = customers.join(orders, 'id', 'left');
1279
+ * // Result: Alice, Bob, Charlie (Charlie has null for product)
1280
+ * ```
1281
+ *
1282
+ * @see {@link https://en.wikipedia.org/wiki/Hash_join | Hash Join Algorithm}
1283
+ */
1284
+ join(other, on, how = "inner") {
1285
+ if (!["inner", "left", "right", "outer"].includes(how)) {
1286
+ throw new InvalidParameterError(
1287
+ 'how must be one of "inner", "left", "right", or "outer"',
1288
+ "how",
1289
+ how
1290
+ );
1291
+ }
1292
+ if (!this._columns.includes(on)) {
1293
+ throw new InvalidParameterError(`Join column '${on}' not found in left DataFrame`, "on", on);
1294
+ }
1295
+ if (!other._columns.includes(on)) {
1296
+ throw new InvalidParameterError(`Join column '${on}' not found in right DataFrame`, "on", on);
1297
+ }
1298
+ const rightHash = /* @__PURE__ */ new Map();
1299
+ const rightData = other._data.get(on) ?? [];
1300
+ for (let i = 0; i < rightData.length; i++) {
1301
+ const val = rightData[i];
1302
+ if (val === null || val === void 0) continue;
1303
+ const key = createKey(val);
1304
+ const indices = rightHash.get(key) ?? [];
1305
+ indices.push(i);
1306
+ rightHash.set(key, indices);
1307
+ }
1308
+ const matchedRightRows = /* @__PURE__ */ new Set();
1309
+ const rightNonKeyColumns = other._columns.filter((col) => col !== on);
1310
+ const overlapping = /* @__PURE__ */ new Set();
1311
+ for (const col of rightNonKeyColumns) {
1312
+ if (this._columns.includes(col)) {
1313
+ overlapping.add(col);
1314
+ }
1315
+ }
1316
+ const leftOutputNames = [];
1317
+ for (const col of this._columns) {
1318
+ if (col !== on && overlapping.has(col)) {
1319
+ leftOutputNames.push(`${col}_left`);
1320
+ } else {
1321
+ leftOutputNames.push(col);
1322
+ }
1323
+ }
1324
+ const rightOutputNames = [];
1325
+ for (const col of rightNonKeyColumns) {
1326
+ if (overlapping.has(col)) {
1327
+ rightOutputNames.push(`${col}_right`);
1328
+ } else {
1329
+ rightOutputNames.push(col);
1330
+ }
1331
+ }
1332
+ const allColumns = [...leftOutputNames, ...rightOutputNames];
1333
+ const resultData = {};
1334
+ for (const col of allColumns) {
1335
+ resultData[col] = [];
1336
+ }
1337
+ const leftData = this._data.get(on) ?? [];
1338
+ for (let i = 0; i < leftData.length; i++) {
1339
+ const leftKey = createKey(leftData[i]);
1340
+ const matches = rightHash.get(leftKey) ?? [];
1341
+ if (matches.length > 0) {
1342
+ for (const rightIdx of matches) {
1343
+ matchedRightRows.add(rightIdx);
1344
+ for (let j = 0; j < this._columns.length; j++) {
1345
+ const originalCol = this._columns[j];
1346
+ const outputCol = leftOutputNames[j];
1347
+ if (originalCol && outputCol) {
1348
+ const colData = this._data.get(originalCol);
1349
+ resultData[outputCol]?.push(colData?.[i] ?? null);
1350
+ }
1351
+ }
1352
+ for (let j = 0; j < rightNonKeyColumns.length; j++) {
1353
+ const originalCol = rightNonKeyColumns[j];
1354
+ const outputCol = rightOutputNames[j];
1355
+ if (originalCol && outputCol) {
1356
+ const colData = other._data.get(originalCol);
1357
+ resultData[outputCol]?.push(colData?.[rightIdx] ?? null);
1358
+ }
1359
+ }
1360
+ }
1361
+ } else if (how === "left" || how === "outer") {
1362
+ for (let j = 0; j < this._columns.length; j++) {
1363
+ const originalCol = this._columns[j];
1364
+ const outputCol = leftOutputNames[j];
1365
+ if (originalCol && outputCol) {
1366
+ const colData = this._data.get(originalCol);
1367
+ resultData[outputCol]?.push(colData?.[i] ?? null);
1368
+ }
1369
+ }
1370
+ for (const col of rightOutputNames) {
1371
+ resultData[col]?.push(null);
1372
+ }
1373
+ }
1374
+ }
1375
+ if (how === "right" || how === "outer") {
1376
+ for (let i = 0; i < rightData.length; i++) {
1377
+ if (!matchedRightRows.has(i)) {
1378
+ for (let j = 0; j < this._columns.length; j++) {
1379
+ const originalCol = this._columns[j];
1380
+ const outputCol = leftOutputNames[j];
1381
+ if (originalCol && outputCol) {
1382
+ if (originalCol === on) {
1383
+ const colData = other._data.get(on);
1384
+ resultData[outputCol]?.push(colData?.[i] ?? null);
1385
+ } else {
1386
+ resultData[outputCol]?.push(null);
1387
+ }
1388
+ }
1389
+ }
1390
+ for (let j = 0; j < rightNonKeyColumns.length; j++) {
1391
+ const originalCol = rightNonKeyColumns[j];
1392
+ const outputCol = rightOutputNames[j];
1393
+ if (originalCol && outputCol) {
1394
+ const colData = other._data.get(originalCol);
1395
+ resultData[outputCol]?.push(colData?.[i] ?? null);
1396
+ }
1397
+ }
1398
+ }
1399
+ }
1400
+ }
1401
+ return new _DataFrame(resultData, { columns: allColumns });
1402
+ }
1403
+ /**
1404
+ * Merge with another DataFrame using pandas-style merge.
1405
+ *
1406
+ * More flexible than join() - supports different column names for join keys.
1407
+ * Uses hash join algorithm for O(n + m) complexity.
1408
+ *
1409
+ * @param other - DataFrame to merge with
1410
+ * @param options - Merge configuration
1411
+ * - on: Column name to join on (must exist in both DataFrames)
1412
+ * - left_on: Column name in left DataFrame
1413
+ * - right_on: Column name in right DataFrame
1414
+ * - how: Join type ('inner', 'left', 'right', 'outer')
1415
+ * - suffixes: Suffix for duplicate column names ['_x', '_y']
1416
+ * @returns New DataFrame with merged data
1417
+ *
1418
+ * @throws {InvalidParameterError} If merge columns don't exist or conflicting options provided
1419
+ *
1420
+ * @example
1421
+ * ```ts
1422
+ * const employees = new DataFrame({
1423
+ * emp_id: [1, 2, 3],
1424
+ * name: ['Alice', 'Bob', 'Charlie']
1425
+ * });
1426
+ * const salaries = new DataFrame({
1427
+ * employee_id: [1, 2, 4],
1428
+ * salary: [50000, 60000, 55000]
1429
+ * });
1430
+ *
1431
+ * // Merge on different column names
1432
+ * const result = employees.merge(salaries, {
1433
+ * left_on: 'emp_id',
1434
+ * right_on: 'employee_id',
1435
+ * how: 'left'
1436
+ * });
1437
+ * ```
1438
+ *
1439
+ * @see {@link https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html | Pandas merge}
1440
+ */
1441
+ merge(other, options = {}) {
1442
+ const how = options.how ?? "inner";
1443
+ if (!["inner", "left", "right", "outer"].includes(how)) {
1444
+ throw new InvalidParameterError(
1445
+ 'how must be one of "inner", "left", "right", or "outer"',
1446
+ "how",
1447
+ how
1448
+ );
1449
+ }
1450
+ if (options.suffixes !== void 0) {
1451
+ if (!Array.isArray(options.suffixes) || options.suffixes.length !== 2 || typeof options.suffixes[0] !== "string" || typeof options.suffixes[1] !== "string") {
1452
+ throw new InvalidParameterError(
1453
+ "suffixes must be a tuple of two strings",
1454
+ "suffixes",
1455
+ options.suffixes
1456
+ );
1457
+ }
1458
+ }
1459
+ const suffixes = options.suffixes ?? ["_x", "_y"];
1460
+ let leftOn;
1461
+ let rightOn;
1462
+ if (options.on) {
1463
+ if (typeof options.on !== "string") {
1464
+ throw new InvalidParameterError("on must be a string", "on", options.on);
1465
+ }
1466
+ if (options.left_on || options.right_on) {
1467
+ throw new InvalidParameterError('Cannot specify both "on" and "left_on"/"right_on"');
1468
+ }
1469
+ leftOn = options.on;
1470
+ rightOn = options.on;
1471
+ } else if (options.left_on && options.right_on) {
1472
+ if (typeof options.left_on !== "string") {
1473
+ throw new InvalidParameterError("left_on must be a string", "left_on", options.left_on);
1474
+ }
1475
+ if (typeof options.right_on !== "string") {
1476
+ throw new InvalidParameterError("right_on must be a string", "right_on", options.right_on);
1477
+ }
1478
+ leftOn = options.left_on;
1479
+ rightOn = options.right_on;
1480
+ } else {
1481
+ throw new InvalidParameterError('Must specify either "on" or both "left_on" and "right_on"');
1482
+ }
1483
+ if (!this._columns.includes(leftOn)) {
1484
+ throw new InvalidParameterError(
1485
+ `Column '${leftOn}' not found in left DataFrame`,
1486
+ "left_on",
1487
+ leftOn
1488
+ );
1489
+ }
1490
+ if (!other._columns.includes(rightOn)) {
1491
+ throw new InvalidParameterError(
1492
+ `Column '${rightOn}' not found in right DataFrame`,
1493
+ "right_on",
1494
+ rightOn
1495
+ );
1496
+ }
1497
+ const rightHash = /* @__PURE__ */ new Map();
1498
+ const rightData = other._data.get(rightOn) ?? [];
1499
+ for (let i = 0; i < rightData.length; i++) {
1500
+ const val = rightData[i];
1501
+ if (val === null || val === void 0) continue;
1502
+ const key = createKey(val);
1503
+ const indices = rightHash.get(key) ?? [];
1504
+ indices.push(i);
1505
+ rightHash.set(key, indices);
1506
+ }
1507
+ const matchedRightRows = /* @__PURE__ */ new Set();
1508
+ const resultData = {};
1509
+ const leftColumns = this._columns.map((col) => {
1510
+ if (col === leftOn) return col;
1511
+ if (other._columns.includes(col) && col !== rightOn) {
1512
+ return col + suffixes[0];
1513
+ }
1514
+ return col;
1515
+ });
1516
+ const leftColumnSet = new Set(leftColumns);
1517
+ const rightColumns = [];
1518
+ const originalLeftColumns = new Set(this._columns);
1519
+ for (const col of other._columns) {
1520
+ if (leftOn === rightOn && col === rightOn) {
1521
+ continue;
1522
+ }
1523
+ let resultCol = col;
1524
+ if (originalLeftColumns.has(col) && col !== leftOn) {
1525
+ resultCol = `${col}${suffixes[1]}`;
1526
+ }
1527
+ if (leftColumnSet.has(resultCol)) {
1528
+ let suffixIndex = 0;
1529
+ let candidate = `${resultCol}`;
1530
+ while (leftColumnSet.has(candidate)) {
1531
+ suffixIndex++;
1532
+ candidate = `${resultCol}_${suffixIndex}`;
1533
+ }
1534
+ resultCol = candidate;
1535
+ }
1536
+ rightColumns.push(resultCol);
1537
+ leftColumnSet.add(resultCol);
1538
+ }
1539
+ const allColumns = [...leftColumns, ...rightColumns];
1540
+ for (const col of allColumns) {
1541
+ resultData[col] = [];
1542
+ }
1543
+ const leftData = this._data.get(leftOn) ?? [];
1544
+ for (let i = 0; i < leftData.length; i++) {
1545
+ const key = createKey(leftData[i]);
1546
+ const rightIndices = rightHash.get(key) ?? [];
1547
+ if (rightIndices.length > 0) {
1548
+ for (const rightIdx of rightIndices) {
1549
+ matchedRightRows.add(rightIdx);
1550
+ for (let j = 0; j < this._columns.length; j++) {
1551
+ const originalCol = this._columns[j];
1552
+ if (!originalCol) continue;
1553
+ const resultCol = leftColumns[j];
1554
+ const colData = this._data.get(originalCol);
1555
+ if (resultCol) resultData[resultCol]?.push(colData?.[i] ?? null);
1556
+ }
1557
+ let rightColIdx = 0;
1558
+ for (const originalCol of other._columns) {
1559
+ const shouldSkip = leftOn === rightOn && originalCol === rightOn;
1560
+ if (shouldSkip || !originalCol) continue;
1561
+ const resultCol = rightColumns[rightColIdx];
1562
+ const colData = other._data.get(originalCol);
1563
+ if (resultCol) resultData[resultCol]?.push(colData?.[rightIdx] ?? null);
1564
+ rightColIdx++;
1565
+ }
1566
+ }
1567
+ } else if (how === "left" || how === "outer") {
1568
+ for (let j = 0; j < this._columns.length; j++) {
1569
+ const originalCol = this._columns[j];
1570
+ if (!originalCol) continue;
1571
+ const resultCol = leftColumns[j];
1572
+ const colData = this._data.get(originalCol);
1573
+ if (resultCol) resultData[resultCol]?.push(colData?.[i] ?? null);
1574
+ }
1575
+ for (const col of rightColumns) {
1576
+ resultData[col]?.push(null);
1577
+ }
1578
+ }
1579
+ }
1580
+ if (how === "right" || how === "outer") {
1581
+ for (let i = 0; i < rightData.length; i++) {
1582
+ if (!matchedRightRows.has(i)) {
1583
+ for (let j = 0; j < this._columns.length; j++) {
1584
+ const originalCol = this._columns[j];
1585
+ const resultCol = leftColumns[j];
1586
+ if (originalCol && resultCol) {
1587
+ if (originalCol === leftOn && leftOn === rightOn) {
1588
+ const rightJoinData = other._data.get(rightOn);
1589
+ resultData[resultCol]?.push(rightJoinData?.[i] ?? null);
1590
+ } else {
1591
+ resultData[resultCol]?.push(null);
1592
+ }
1593
+ }
1594
+ }
1595
+ let rightColIdx = 0;
1596
+ for (const originalCol of other._columns) {
1597
+ const shouldSkip = leftOn === rightOn && originalCol === rightOn;
1598
+ if (shouldSkip) {
1599
+ continue;
1600
+ }
1601
+ const resultCol = rightColumns[rightColIdx];
1602
+ const colData = other._data.get(originalCol);
1603
+ if (resultCol) resultData[resultCol]?.push(colData?.[i] ?? null);
1604
+ rightColIdx++;
1605
+ }
1606
+ }
1607
+ }
1608
+ }
1609
+ return new _DataFrame(resultData, { columns: allColumns });
1610
+ }
1611
+ /**
1612
+ * Concatenate with another DataFrame.
1613
+ *
1614
+ * @param other - DataFrame to concatenate
1615
+ * @param axis - Axis to concatenate along.
1616
+ * - 0 or "rows" or "index": Stack vertically (append rows)
1617
+ * - 1 or "columns": Stack horizontally (append columns)
1618
+ * @returns Concatenated DataFrame
1619
+ *
1620
+ * @example
1621
+ * ```ts
1622
+ * const df1 = new DataFrame({ a: [1, 2], b: [3, 4] });
1623
+ * const df2 = new DataFrame({ a: [5, 6], b: [7, 8] });
1624
+ * df1.concat(df2, "rows"); // Stack vertically: 4 rows
1625
+ * df1.concat(df2, "columns"); // Stack horizontally: 4 columns
1626
+ * ```
1627
+ */
1628
+ concat(other, axis = 0) {
1629
+ const ax = normalizeAxis(axis, 2);
1630
+ if (ax === 0) {
1631
+ for (const col of this._columns) {
1632
+ if (!other._columns.includes(col)) {
1633
+ throw new DataValidationError(
1634
+ `Cannot concat on axis=0: missing column '${col}' in other DataFrame`
1635
+ );
1636
+ }
1637
+ }
1638
+ for (const col of other._columns) {
1639
+ if (!this._columns.includes(col)) {
1640
+ throw new DataValidationError(
1641
+ `Cannot concat on axis=0: extra column '${col}' in other DataFrame`
1642
+ );
1643
+ }
1644
+ }
1645
+ const newData = {};
1646
+ for (const col of this._columns) {
1647
+ const thisColData = this._data.get(col) ?? [];
1648
+ const otherColData = other._data.get(col) ?? [];
1649
+ newData[col] = [...thisColData, ...otherColData];
1650
+ }
1651
+ const totalRows = this._index.length + other._index.length;
1652
+ const newIndex = Array.from({ length: totalRows }, (_, i) => i);
1653
+ return new _DataFrame(newData, {
1654
+ columns: this._columns,
1655
+ index: newIndex
1656
+ });
1657
+ } else {
1658
+ const newIndex = [...this._index];
1659
+ const seenIndices = new Set(this._index);
1660
+ for (const idx of other._index) {
1661
+ if (!seenIndices.has(idx)) {
1662
+ newIndex.push(idx);
1663
+ seenIndices.add(idx);
1664
+ }
1665
+ }
1666
+ const newData = {};
1667
+ const newColumns = [];
1668
+ const alignColumn = (df, col, targetIndex) => {
1669
+ const sourceData = df._data.get(col);
1670
+ if (!sourceData) return [];
1671
+ const indexPos = df._indexPos;
1672
+ return targetIndex.map((label) => {
1673
+ const pos = indexPos.get(label);
1674
+ if (pos !== void 0) {
1675
+ return sourceData[pos];
1676
+ }
1677
+ return null;
1678
+ });
1679
+ };
1680
+ const rightColSet = new Set(other._columns);
1681
+ const overlapping = /* @__PURE__ */ new Set();
1682
+ for (const col of this._columns) {
1683
+ if (rightColSet.has(col)) {
1684
+ overlapping.add(col);
1685
+ }
1686
+ }
1687
+ for (const col of this._columns) {
1688
+ const outputName = overlapping.has(col) ? `${col}_left` : col;
1689
+ newData[outputName] = alignColumn(this, col, newIndex);
1690
+ newColumns.push(outputName);
1691
+ }
1692
+ for (const col of other._columns) {
1693
+ const outputName = overlapping.has(col) ? `${col}_right` : col;
1694
+ newData[outputName] = alignColumn(other, col, newIndex);
1695
+ newColumns.push(outputName);
1696
+ }
1697
+ return new _DataFrame(newData, {
1698
+ columns: newColumns,
1699
+ index: newIndex
1700
+ });
1701
+ }
1702
+ }
1703
+ /**
1704
+ * Fill missing values (null or undefined) with a specified value.
1705
+ *
1706
+ * @param value - Value to use for filling missing values
1707
+ * @returns New DataFrame with missing values filled
1708
+ *
1709
+ * @example
1710
+ * ```ts
1711
+ * const df = new DataFrame({ a: [1, null, 3], b: [4, 5, undefined] });
1712
+ * df.fillna(0); // Replace null/undefined with 0
1713
+ * ```
1714
+ */
1715
+ fillna(value) {
1716
+ const newData = {};
1717
+ for (const col of this._columns) {
1718
+ const colData = this._data.get(col);
1719
+ if (colData) {
1720
+ newData[col] = colData.map(
1721
+ (v) => v === null || v === void 0 || typeof v === "number" && Number.isNaN(v) ? value : v
1722
+ );
1723
+ }
1724
+ }
1725
+ return new _DataFrame(newData, {
1726
+ columns: this._columns,
1727
+ index: this._index
1728
+ });
1729
+ }
1730
+ /**
1731
+ * Drop rows that contain any missing values (null or undefined).
1732
+ *
1733
+ * @returns New DataFrame with rows containing missing values removed
1734
+ *
1735
+ * @example
1736
+ * ```ts
1737
+ * const df = new DataFrame({ a: [1, null, 3], b: [4, 5, 6] });
1738
+ * df.dropna(); // Only keeps rows 0 and 2
1739
+ * ```
1740
+ */
1741
+ dropna() {
1742
+ const newData = {};
1743
+ const newIndex = [];
1744
+ for (const col of this._columns) {
1745
+ newData[col] = [];
1746
+ }
1747
+ for (let i = 0; i < this._index.length; i++) {
1748
+ let hasNA = false;
1749
+ for (const col of this._columns) {
1750
+ const colData = this._data.get(col);
1751
+ if (colData) {
1752
+ const val = colData[i];
1753
+ if (val === null || val === void 0 || typeof val === "number" && Number.isNaN(val)) {
1754
+ hasNA = true;
1755
+ break;
1756
+ }
1757
+ }
1758
+ }
1759
+ if (!hasNA) {
1760
+ const idx = this._index[i];
1761
+ if (idx !== void 0) newIndex.push(idx);
1762
+ for (const col of this._columns) {
1763
+ const colData = this._data.get(col);
1764
+ if (colData) {
1765
+ newData[col]?.push(colData[i]);
1766
+ }
1767
+ }
1768
+ }
1769
+ }
1770
+ return new _DataFrame(newData, {
1771
+ columns: this._columns,
1772
+ index: newIndex
1773
+ });
1774
+ }
1775
+ /**
1776
+ * Generate descriptive statistics.
1777
+ *
1778
+ * Computes count, mean, std, min, 25%, 50%, 75%, max for numeric columns.
1779
+ *
1780
+ * @returns DataFrame with statistics
1781
+ */
1782
+ describe() {
1783
+ const stats = {};
1784
+ const metrics = ["count", "mean", "std", "min", "25%", "50%", "75%", "max"];
1785
+ if (this._columns.length === 0 || this._index.length === 0) {
1786
+ return new _DataFrame({}, { columns: [], index: metrics });
1787
+ }
1788
+ for (const col of this._columns) {
1789
+ const colData = this._data.get(col);
1790
+ if (!colData) continue;
1791
+ const numericData = colData.filter(isValidNumber);
1792
+ if (numericData.length === 0) continue;
1793
+ const sorted = [...numericData].sort((a, b) => a - b);
1794
+ const sum = numericData.reduce((acc, val) => acc + val, 0);
1795
+ const mean = sum / numericData.length;
1796
+ let variance;
1797
+ let std;
1798
+ if (numericData.length > 1) {
1799
+ variance = numericData.reduce((acc, val) => acc + (val - mean) ** 2, 0) / (numericData.length - 1);
1800
+ std = Math.sqrt(variance);
1801
+ } else {
1802
+ variance = NaN;
1803
+ std = NaN;
1804
+ }
1805
+ const getPercentile = (p) => {
1806
+ const idx = p / 100 * (sorted.length - 1);
1807
+ const lower = Math.floor(idx);
1808
+ const upper = Math.ceil(idx);
1809
+ const weight = idx - lower;
1810
+ return (sorted[lower] ?? 0) * (1 - weight) + (sorted[upper] ?? 0) * weight;
1811
+ };
1812
+ const minVal = sorted[0];
1813
+ const maxVal = sorted[sorted.length - 1];
1814
+ if (minVal === void 0 || maxVal === void 0) {
1815
+ throw new DataValidationError(`Unable to compute min/max for column '${col}'`);
1816
+ }
1817
+ stats[col] = [
1818
+ numericData.length,
1819
+ mean,
1820
+ std,
1821
+ minVal,
1822
+ getPercentile(25),
1823
+ getPercentile(50),
1824
+ getPercentile(75),
1825
+ maxVal
1826
+ ];
1827
+ }
1828
+ if (Object.keys(stats).length === 0) {
1829
+ return new _DataFrame({}, { columns: [], index: metrics });
1830
+ }
1831
+ return new _DataFrame(stats, { index: metrics });
1832
+ }
1833
+ /**
1834
+ * Compute correlation matrix.
1835
+ *
1836
+ * Uses pairwise complete observations (ignores missing values for each pair).
1837
+ *
1838
+ * @returns DataFrame containing pairwise correlations
1839
+ */
1840
+ corr() {
1841
+ const numericCols = [];
1842
+ for (const col of this._columns) {
1843
+ const colData = this._data.get(col);
1844
+ if (!colData) continue;
1845
+ if (colData.some(isValidNumber)) {
1846
+ numericCols.push(col);
1847
+ }
1848
+ }
1849
+ const corrMatrix = {};
1850
+ for (const col1 of numericCols) {
1851
+ corrMatrix[col1] = [];
1852
+ const data1 = this._data.get(col1);
1853
+ for (const col2 of numericCols) {
1854
+ const data2 = this._data.get(col2);
1855
+ if (!data1 || !data2) {
1856
+ corrMatrix[col1]?.push(NaN);
1857
+ continue;
1858
+ }
1859
+ const valid1 = [];
1860
+ const valid2 = [];
1861
+ for (let i = 0; i < this._index.length; i++) {
1862
+ const v1 = data1[i];
1863
+ const v2 = data2[i];
1864
+ if (isValidNumber(v1) && isValidNumber(v2)) {
1865
+ valid1.push(v1);
1866
+ valid2.push(v2);
1867
+ }
1868
+ }
1869
+ if (valid1.length < 2) {
1870
+ corrMatrix[col1]?.push(NaN);
1871
+ continue;
1872
+ }
1873
+ const mean1 = valid1.reduce((a, b) => a + b, 0) / valid1.length;
1874
+ const mean2 = valid2.reduce((a, b) => a + b, 0) / valid2.length;
1875
+ let num = 0;
1876
+ let den1 = 0;
1877
+ let den2 = 0;
1878
+ for (let k = 0; k < valid1.length; k++) {
1879
+ const val1 = valid1[k];
1880
+ const val2 = valid2[k];
1881
+ if (val1 === void 0 || val2 === void 0) continue;
1882
+ const diff1 = val1 - mean1;
1883
+ const diff2 = val2 - mean2;
1884
+ num += diff1 * diff2;
1885
+ den1 += diff1 * diff1;
1886
+ den2 += diff2 * diff2;
1887
+ }
1888
+ const corr = den1 === 0 || den2 === 0 ? NaN : num / Math.sqrt(den1 * den2);
1889
+ corrMatrix[col1]?.push(corr);
1890
+ }
1891
+ }
1892
+ return new _DataFrame(corrMatrix, {
1893
+ index: numericCols,
1894
+ columns: numericCols
1895
+ });
1896
+ }
1897
+ /**
1898
+ * Compute covariance matrix.
1899
+ *
1900
+ * Uses pairwise complete observations.
1901
+ *
1902
+ * @returns DataFrame containing pairwise covariances
1903
+ */
1904
+ cov() {
1905
+ const numericCols = [];
1906
+ for (const col of this._columns) {
1907
+ const colData = this._data.get(col);
1908
+ if (!colData) continue;
1909
+ if (colData.some(isValidNumber)) {
1910
+ numericCols.push(col);
1911
+ }
1912
+ }
1913
+ const covMatrix = {};
1914
+ for (const col1 of numericCols) {
1915
+ covMatrix[col1] = [];
1916
+ const data1 = this._data.get(col1);
1917
+ for (const col2 of numericCols) {
1918
+ const data2 = this._data.get(col2);
1919
+ if (!data1 || !data2) {
1920
+ covMatrix[col1]?.push(NaN);
1921
+ continue;
1922
+ }
1923
+ const valid1 = [];
1924
+ const valid2 = [];
1925
+ for (let i = 0; i < this._index.length; i++) {
1926
+ const v1 = data1[i];
1927
+ const v2 = data2[i];
1928
+ if (isValidNumber(v1) && isValidNumber(v2)) {
1929
+ valid1.push(v1);
1930
+ valid2.push(v2);
1931
+ }
1932
+ }
1933
+ if (valid1.length < 2) {
1934
+ covMatrix[col1]?.push(NaN);
1935
+ continue;
1936
+ }
1937
+ const mean1 = valid1.reduce((a, b) => a + b, 0) / valid1.length;
1938
+ const mean2 = valid2.reduce((a, b) => a + b, 0) / valid2.length;
1939
+ let cov = 0;
1940
+ for (let k = 0; k < valid1.length; k++) {
1941
+ const val1 = valid1[k];
1942
+ const val2 = valid2[k];
1943
+ if (val1 === void 0 || val2 === void 0) continue;
1944
+ cov += (val1 - mean1) * (val2 - mean2);
1945
+ }
1946
+ cov /= valid1.length - 1;
1947
+ covMatrix[col1]?.push(cov);
1948
+ }
1949
+ }
1950
+ return new _DataFrame(covMatrix, {
1951
+ index: numericCols,
1952
+ columns: numericCols
1953
+ });
1954
+ }
1955
+ /**
1956
+ * Apply a function along an axis of the DataFrame.
1957
+ *
1958
+ * When `axis=1`, the provided Series is indexed by column names.
1959
+ *
1960
+ * @param fn - Function to apply to each Series
1961
+ * @param axis - Axis to apply along (0=columns, 1=rows)
1962
+ * @returns New DataFrame with function applied
1963
+ *
1964
+ * @example
1965
+ * ```ts
1966
+ * const df = new DataFrame({ a: [1, 2, 3], b: [4, 5, 6] });
1967
+ * // Apply function to each column
1968
+ * df.apply(series => series.map(x => Number(x) * 2), 0);
1969
+ * ```
1970
+ */
1971
+ apply(fn, axis = 0) {
1972
+ const ax = normalizeAxis(axis, 2);
1973
+ if (ax === 0) {
1974
+ const newData = {};
1975
+ for (const col of this._columns) {
1976
+ const series = this.get(col);
1977
+ const result = fn(series);
1978
+ if (!(result instanceof Series)) {
1979
+ throw new DataValidationError("Function must return a Series when axis=0");
1980
+ }
1981
+ newData[col] = [...result.data];
1982
+ }
1983
+ return new _DataFrame(newData, {
1984
+ columns: this._columns,
1985
+ index: this._index
1986
+ });
1987
+ } else {
1988
+ const results = [];
1989
+ const columnLabelMap = /* @__PURE__ */ new Map();
1990
+ const newColumns = [];
1991
+ for (let i = 0; i < this._index.length; i++) {
1992
+ const rowValues = [];
1993
+ for (const col of this._columns) {
1994
+ rowValues.push(this._data.get(col)?.[i]);
1995
+ }
1996
+ const rowSeries = new Series(rowValues, {
1997
+ name: "row",
1998
+ index: this._columns,
1999
+ copy: false
2000
+ });
2001
+ const result = fn(rowSeries);
2002
+ if (!(result instanceof Series)) {
2003
+ throw new DataValidationError("Function must return a Series when axis=1");
2004
+ }
2005
+ results.push(result);
2006
+ for (const label of result.index) {
2007
+ const columnName = String(label);
2008
+ const existing = columnLabelMap.get(columnName);
2009
+ if (existing !== void 0 && existing !== label) {
2010
+ throw new DataValidationError(
2011
+ `Column label '${columnName}' is ambiguous between '${String(
2012
+ existing
2013
+ )}' and '${String(label)}'`
2014
+ );
2015
+ }
2016
+ if (!columnLabelMap.has(columnName)) {
2017
+ newColumns.push(columnName);
2018
+ columnLabelMap.set(columnName, label);
2019
+ }
2020
+ }
2021
+ }
2022
+ const newData = {};
2023
+ for (const col of newColumns) {
2024
+ newData[col] = [];
2025
+ }
2026
+ for (const result of results) {
2027
+ for (const col of newColumns) {
2028
+ const label = columnLabelMap.get(col);
2029
+ if (label === void 0) {
2030
+ throw new DataValidationError(`Missing label mapping for column '${col}'`);
2031
+ }
2032
+ const val = result.get(label);
2033
+ newData[col]?.push(val === void 0 ? null : val);
2034
+ }
2035
+ }
2036
+ return new _DataFrame(newData, {
2037
+ columns: newColumns,
2038
+ index: this._index
2039
+ });
2040
+ }
2041
+ }
2042
+ /**
2043
+ * Convert DataFrame to a 2D Tensor.
2044
+ *
2045
+ * All columns must contain numeric data.
2046
+ *
2047
+ * @returns 2D Tensor with shape [rows, columns]
2048
+ * @throws {DataValidationError} If data is non-numeric
2049
+ *
2050
+ * @example
2051
+ * ```ts
2052
+ * const df = new DataFrame({ a: [1, 2, 3], b: [4, 5, 6] });
2053
+ * const t = df.toTensor(); // 2D tensor [[1,4], [2,5], [3,6]]
2054
+ * ```
2055
+ */
2056
+ toTensor() {
2057
+ const arr = this.toArray();
2058
+ const flat = [];
2059
+ for (const row of arr) {
2060
+ for (const val of row) {
2061
+ if (typeof val === "number") {
2062
+ flat.push(val);
2063
+ } else if (val === null || val === void 0) {
2064
+ flat.push(NaN);
2065
+ } else {
2066
+ throw new DataValidationError(
2067
+ `Non-numeric value found: ${val}. All data must be numeric (or null/undefined) for tensor conversion.`
2068
+ );
2069
+ }
2070
+ }
2071
+ }
2072
+ const t = tensor(flat);
2073
+ const [rows, cols] = this.shape;
2074
+ return reshape(t, [rows, cols]);
2075
+ }
2076
+ /**
2077
+ * Convert DataFrame to a 2D JavaScript array.
2078
+ *
2079
+ * Each inner array represents a row.
2080
+ *
2081
+ * @returns 2D array of values
2082
+ *
2083
+ * @example
2084
+ * ```ts
2085
+ * const df = new DataFrame({ a: [1, 2], b: [3, 4] });
2086
+ * df.toArray(); // [[1, 3], [2, 4]]
2087
+ * ```
2088
+ */
2089
+ toArray() {
2090
+ const result = [];
2091
+ for (let i = 0; i < this._index.length; i++) {
2092
+ const row = [];
2093
+ for (const col of this._columns) {
2094
+ const colData = this._data.get(col);
2095
+ row.push(colData ? colData[i] : void 0);
2096
+ }
2097
+ result.push(row);
2098
+ }
2099
+ return result;
2100
+ }
2101
+ /**
2102
+ * Parse CSV string into DataFrame with full type inference and quote handling.
2103
+ * Time complexity: O(n) where n is number of characters.
2104
+ */
2105
+ static fromCsvString(csvString, options = {}) {
2106
+ const delimiter = options.delimiter ?? ",";
2107
+ const quoteChar = options.quoteChar ?? '"';
2108
+ const hasHeader = options.hasHeader ?? true;
2109
+ const skipRows = options.skipRows ?? 0;
2110
+ const rows = [];
2111
+ let fields = [];
2112
+ let currentField = "";
2113
+ let inQuotes = false;
2114
+ let rowCount = 0;
2115
+ for (let i = 0; i < csvString.length; i++) {
2116
+ const char = csvString[i];
2117
+ const nextChar = csvString[i + 1];
2118
+ if (char === quoteChar) {
2119
+ if (inQuotes && nextChar === quoteChar) {
2120
+ currentField += quoteChar;
2121
+ i++;
2122
+ } else {
2123
+ inQuotes = !inQuotes;
2124
+ }
2125
+ } else if (char === delimiter && !inQuotes) {
2126
+ fields.push(currentField);
2127
+ currentField = "";
2128
+ } else if ((char === "\n" || char === "\r") && !inQuotes) {
2129
+ if (char === "\r" && nextChar === "\n") {
2130
+ i++;
2131
+ }
2132
+ fields.push(currentField);
2133
+ currentField = "";
2134
+ if (fields.some((f) => f.trim() !== "")) {
2135
+ if (rowCount >= skipRows) {
2136
+ rows.push(fields);
2137
+ }
2138
+ rowCount++;
2139
+ }
2140
+ fields = [];
2141
+ } else {
2142
+ currentField += char;
2143
+ }
2144
+ }
2145
+ if (currentField !== "" || fields.length > 0) {
2146
+ fields.push(currentField);
2147
+ if (fields.some((f) => f.trim() !== "") && rowCount >= skipRows) {
2148
+ rows.push(fields);
2149
+ }
2150
+ }
2151
+ if (inQuotes) {
2152
+ throw new DataValidationError("CSV contains an unmatched quote");
2153
+ }
2154
+ if (rows.length === 0) {
2155
+ throw new DataValidationError("CSV contains no data rows");
2156
+ }
2157
+ let columns;
2158
+ let dataRows;
2159
+ if (hasHeader) {
2160
+ const firstRow = rows[0];
2161
+ if (!firstRow) throw new DataValidationError("CSV has no header row");
2162
+ columns = firstRow;
2163
+ ensureUniqueLabels(columns, "column name");
2164
+ dataRows = rows.slice(1);
2165
+ } else {
2166
+ const numCols = rows[0]?.length ?? 0;
2167
+ columns = Array.from({ length: numCols }, (_, i) => `col${i}`);
2168
+ dataRows = rows;
2169
+ }
2170
+ for (let i = 0; i < dataRows.length; i++) {
2171
+ const row = dataRows[i];
2172
+ if (row && row.length !== columns.length) {
2173
+ throw new DataValidationError(
2174
+ `Row ${i + (hasHeader ? 2 : 1)} has ${row.length} fields, expected ${columns.length}`
2175
+ );
2176
+ }
2177
+ }
2178
+ const data = {};
2179
+ for (let colIdx = 0; colIdx < columns.length; colIdx++) {
2180
+ const colName = columns[colIdx];
2181
+ const colData = [];
2182
+ for (const row of dataRows) {
2183
+ const value = row[colIdx];
2184
+ if (value === void 0 || value === "" || value === "null" || value === "undefined") {
2185
+ colData.push(null);
2186
+ } else if (!Number.isNaN(Number(value)) && value !== "" && // Allow "0", "0.5", "10", but not "01" (unless it's "0.1")
2187
+ (value === "0" || !value.startsWith("0") || value.startsWith("0."))) {
2188
+ colData.push(Number(value));
2189
+ } else if (value === "true" || value === "false") {
2190
+ colData.push(value === "true");
2191
+ } else {
2192
+ colData.push(value);
2193
+ }
2194
+ }
2195
+ if (colName) data[colName] = colData;
2196
+ }
2197
+ return new _DataFrame(data, { columns });
2198
+ }
2199
+ /**
2200
+ * Read CSV file - environment-aware (Node.js fs or browser fetch).
2201
+ * Time complexity: O(n) for file read + O(m) for parsing.
2202
+ */
2203
+ static async readCsv(path, options = {}) {
2204
+ let csvString;
2205
+ if (typeof process !== "undefined" && process.versions?.node) {
2206
+ try {
2207
+ const fs = await import('fs/promises');
2208
+ csvString = await fs.readFile(path, "utf-8");
2209
+ } catch (error) {
2210
+ throw new DataValidationError(
2211
+ `Failed to read CSV file: ${error instanceof Error ? error.message : String(error)}`
2212
+ );
2213
+ }
2214
+ } else if (typeof fetch !== "undefined") {
2215
+ try {
2216
+ const response = await fetch(path);
2217
+ if (!response.ok) {
2218
+ throw new DataValidationError(`HTTP ${response.status}: ${response.statusText}`);
2219
+ }
2220
+ csvString = await response.text();
2221
+ } catch (error) {
2222
+ throw new DataValidationError(
2223
+ `Failed to fetch CSV: ${error instanceof Error ? error.message : String(error)}`
2224
+ );
2225
+ }
2226
+ } else {
2227
+ throw new DataValidationError("Environment not supported");
2228
+ }
2229
+ return _DataFrame.fromCsvString(csvString, options);
2230
+ }
2231
+ /**
2232
+ * Convert DataFrame to CSV string with proper quoting and escaping.
2233
+ * Time complexity: O(n × m) where n is rows, m is columns.
2234
+ */
2235
+ toCsvString(options = {}) {
2236
+ const delimiter = options.delimiter ?? ",";
2237
+ const quoteChar = options.quoteChar ?? '"';
2238
+ const includeIndex = options.includeIndex ?? false;
2239
+ const header = options.header ?? true;
2240
+ const lines = [];
2241
+ const escapeField = (value) => {
2242
+ const str = String(value ?? "");
2243
+ if (str.includes(delimiter) || str.includes(quoteChar) || str.includes("\n") || str.includes("\r")) {
2244
+ return quoteChar + str.split(quoteChar).join(quoteChar + quoteChar) + quoteChar;
2245
+ }
2246
+ return str;
2247
+ };
2248
+ if (header) {
2249
+ const headerFields = includeIndex ? ["index", ...this._columns] : [...this._columns];
2250
+ lines.push(headerFields.map(escapeField).join(delimiter));
2251
+ }
2252
+ for (let i = 0; i < this._index.length; i++) {
2253
+ const rowFields = [];
2254
+ if (includeIndex) {
2255
+ rowFields.push(this._index[i]);
2256
+ }
2257
+ for (const col of this._columns) {
2258
+ const colData = this._data.get(col);
2259
+ rowFields.push(colData?.[i] ?? "");
2260
+ }
2261
+ lines.push(rowFields.map(escapeField).join(delimiter));
2262
+ }
2263
+ return lines.join("\n");
2264
+ }
2265
+ /**
2266
+ * Write DataFrame to CSV file - environment-aware.
2267
+ * Time complexity: O(n × m) for generation + O(k) for write.
2268
+ */
2269
+ async toCsv(path, options = {}) {
2270
+ const csvString = this.toCsvString(options);
2271
+ if (typeof process !== "undefined" && process.versions?.node) {
2272
+ try {
2273
+ const fs = await import('fs/promises');
2274
+ await fs.writeFile(path, csvString, "utf-8");
2275
+ } catch (error) {
2276
+ throw new DataValidationError(
2277
+ `Failed to write CSV file: ${error instanceof Error ? error.message : String(error)}`
2278
+ );
2279
+ }
2280
+ } else if (typeof document !== "undefined" && typeof URL !== "undefined") {
2281
+ const blob = new Blob([csvString], { type: "text/csv;charset=utf-8;" });
2282
+ const url = URL.createObjectURL(blob);
2283
+ const link = document.createElement("a");
2284
+ link.href = url;
2285
+ link.download = path;
2286
+ link.style.display = "none";
2287
+ document.body.appendChild(link);
2288
+ link.click();
2289
+ document.body.removeChild(link);
2290
+ URL.revokeObjectURL(url);
2291
+ } else {
2292
+ throw new DataValidationError("Environment not supported");
2293
+ }
2294
+ }
2295
+ /**
2296
+ * Serialize DataFrame to JSON string.
2297
+ * Time complexity: O(n × m).
2298
+ */
2299
+ toJsonString() {
2300
+ return JSON.stringify(
2301
+ {
2302
+ columns: this._columns,
2303
+ index: this._index,
2304
+ data: Object.fromEntries(this._data)
2305
+ },
2306
+ null,
2307
+ 2
2308
+ );
2309
+ }
2310
+ /**
2311
+ * Create DataFrame from JSON string.
2312
+ * Time complexity: O(n × m).
2313
+ */
2314
+ static fromJsonString(jsonStr) {
2315
+ let parsed;
2316
+ try {
2317
+ parsed = JSON.parse(jsonStr);
2318
+ } catch (error) {
2319
+ throw new DataValidationError(
2320
+ `Failed to parse JSON: ${error instanceof Error ? error.message : String(error)}`
2321
+ );
2322
+ }
2323
+ if (!isRecord(parsed)) {
2324
+ throw new DataValidationError("Invalid JSON: expected object (not array)");
2325
+ }
2326
+ const obj = parsed;
2327
+ if (!isStringArray(obj["columns"])) {
2328
+ throw new DataValidationError(
2329
+ 'Invalid JSON: missing or invalid "columns" field (expected array)'
2330
+ );
2331
+ }
2332
+ if (!isIndexLabelArray(obj["index"])) {
2333
+ throw new DataValidationError(
2334
+ 'Invalid JSON: missing or invalid "index" field (expected array)'
2335
+ );
2336
+ }
2337
+ if (!isRecord(obj["data"])) {
2338
+ throw new DataValidationError(
2339
+ 'Invalid JSON: missing or invalid "data" field (expected object)'
2340
+ );
2341
+ }
2342
+ const columns = obj["columns"];
2343
+ const index = obj["index"];
2344
+ const rawData = obj["data"];
2345
+ ensureUniqueLabels(columns, "column name");
2346
+ const dataKeys = Object.keys(rawData);
2347
+ for (const col of columns) {
2348
+ if (!(col in rawData)) {
2349
+ throw new DataValidationError(`Missing data for column '${col}'`);
2350
+ }
2351
+ }
2352
+ for (const key of dataKeys) {
2353
+ if (!columns.includes(key)) {
2354
+ throw new DataValidationError(`Unexpected data column '${key}' not listed in columns`);
2355
+ }
2356
+ }
2357
+ const data = {};
2358
+ for (const [key, value] of Object.entries(rawData)) {
2359
+ if (!Array.isArray(value)) {
2360
+ throw new DataValidationError(`Invalid data for column '${key}': expected array`);
2361
+ }
2362
+ data[key] = value;
2363
+ }
2364
+ return new _DataFrame(data, {
2365
+ columns,
2366
+ index
2367
+ });
2368
+ }
2369
+ /**
2370
+ * Read JSON file - environment-aware.
2371
+ * Time complexity: O(n) for file read + O(m) for parsing.
2372
+ */
2373
+ static async readJson(path) {
2374
+ let jsonString;
2375
+ if (typeof process !== "undefined" && process.versions?.node) {
2376
+ try {
2377
+ const fs = await import('fs/promises');
2378
+ jsonString = await fs.readFile(path, "utf-8");
2379
+ } catch (error) {
2380
+ throw new DataValidationError(
2381
+ `Failed to read JSON file: ${error instanceof Error ? error.message : String(error)}`
2382
+ );
2383
+ }
2384
+ } else if (typeof fetch !== "undefined") {
2385
+ try {
2386
+ const response = await fetch(path);
2387
+ if (!response.ok) {
2388
+ throw new DataValidationError(`HTTP ${response.status}: ${response.statusText}`);
2389
+ }
2390
+ jsonString = await response.text();
2391
+ } catch (error) {
2392
+ throw new DataValidationError(
2393
+ `Failed to fetch JSON: ${error instanceof Error ? error.message : String(error)}`
2394
+ );
2395
+ }
2396
+ } else {
2397
+ throw new DataValidationError("Environment not supported");
2398
+ }
2399
+ return _DataFrame.fromJsonString(jsonString);
2400
+ }
2401
+ /**
2402
+ * Write DataFrame to JSON file - environment-aware.
2403
+ * Time complexity: O(n × m) for generation + O(k) for write.
2404
+ */
2405
+ async toJson(path) {
2406
+ const jsonString = this.toJsonString();
2407
+ if (typeof process !== "undefined" && process.versions?.node) {
2408
+ try {
2409
+ const fs = await import('fs/promises');
2410
+ await fs.writeFile(path, jsonString, "utf-8");
2411
+ } catch (error) {
2412
+ throw new DataValidationError(
2413
+ `Failed to write JSON file: ${error instanceof Error ? error.message : String(error)}`
2414
+ );
2415
+ }
2416
+ } else if (typeof document !== "undefined" && typeof URL !== "undefined") {
2417
+ const blob = new Blob([jsonString], {
2418
+ type: "application/json;charset=utf-8;"
2419
+ });
2420
+ const url = URL.createObjectURL(blob);
2421
+ const link = document.createElement("a");
2422
+ link.href = url;
2423
+ link.download = path;
2424
+ link.style.display = "none";
2425
+ document.body.appendChild(link);
2426
+ link.click();
2427
+ document.body.removeChild(link);
2428
+ URL.revokeObjectURL(url);
2429
+ } else {
2430
+ throw new DataValidationError("Environment not supported");
2431
+ }
2432
+ }
2433
+ /**
2434
+ * Create DataFrame from a Tensor.
2435
+ *
2436
+ * @param tensor - Tensor to convert (must be 1D or 2D)
2437
+ * @param columns - Column names (optional). If provided, length must match tensor columns.
2438
+ * @returns DataFrame
2439
+ *
2440
+ * @example
2441
+ * ```ts
2442
+ * import { tensor } from 'deepbox/ndarray';
2443
+ *
2444
+ * const t = tensor([[1, 2], [3, 4], [5, 6]]);
2445
+ * const df = DataFrame.fromTensor(t, ['col1', 'col2']);
2446
+ * ```
2447
+ */
2448
+ static fromTensor(tensor2, columns) {
2449
+ const storage = tensor2.data;
2450
+ let data;
2451
+ if (storage instanceof BigInt64Array) {
2452
+ data = Array.from(storage, (v) => Number(v));
2453
+ } else if (ArrayBuffer.isView(storage)) {
2454
+ data = Array.from(storage, (v) => Number(v));
2455
+ } else if (Array.isArray(storage)) {
2456
+ data = [...storage];
2457
+ } else {
2458
+ throw new DataValidationError("Unsupported tensor storage type");
2459
+ }
2460
+ if (tensor2.ndim === 1) {
2461
+ if (columns && columns.length !== 1) {
2462
+ throw new DataValidationError(
2463
+ `Expected exactly 1 column name for 1D tensor, received ${columns.length}`
2464
+ );
2465
+ }
2466
+ const colName = columns?.[0] ?? "col0";
2467
+ return new _DataFrame({ [colName]: data });
2468
+ }
2469
+ if (tensor2.ndim === 2) {
2470
+ const rows = tensor2.shape[0];
2471
+ const cols = tensor2.shape[1];
2472
+ if (rows === void 0 || cols === void 0) {
2473
+ throw new DataValidationError("Invalid tensor shape");
2474
+ }
2475
+ if (columns && columns.length !== cols) {
2476
+ throw new DataValidationError(
2477
+ `Column count (${columns.length}) must match tensor columns (${cols})`
2478
+ );
2479
+ }
2480
+ const dfData = {};
2481
+ for (let c = 0; c < cols; c++) {
2482
+ const colName = columns?.[c] ?? `col${c}`;
2483
+ const colData = [];
2484
+ for (let r = 0; r < rows; r++) {
2485
+ colData.push(data[r * cols + c]);
2486
+ }
2487
+ dfData[colName] = colData;
2488
+ }
2489
+ return new _DataFrame(dfData, {
2490
+ columns: columns ?? Array.from({ length: cols }, (_, i) => `col${i}`)
2491
+ });
2492
+ }
2493
+ throw new DataValidationError(
2494
+ `Cannot create DataFrame from ${tensor2.ndim}D tensor. Only 1D and 2D tensors supported.`
2495
+ );
2496
+ }
2497
+ /**
2498
+ * Remove duplicate rows from DataFrame.
2499
+ * Time complexity: O(n × m) where n is rows, m is columns.
2500
+ *
2501
+ * @param subset - Columns to consider for identifying duplicates (default: all columns)
2502
+ * @param keep - Which duplicates to keep: 'first', 'last', or false (remove all)
2503
+ * @returns New DataFrame with duplicates removed
2504
+ *
2505
+ * @example
2506
+ * ```ts
2507
+ * const df = new DataFrame({ a: [1, 1, 2], b: [3, 3, 4] });
2508
+ * df.drop_duplicates(); // Keeps first occurrence: [[1, 3], [2, 4]]
2509
+ * df.drop_duplicates(undefined, 'last'); // Keeps last occurrence
2510
+ * ```
2511
+ */
2512
+ drop_duplicates(subset, keep = "first") {
2513
+ const checkCols = subset ?? this._columns;
2514
+ for (const col of checkCols) {
2515
+ if (!this._columns.includes(col)) {
2516
+ throw new DataValidationError(`Column '${col}' not found in DataFrame`);
2517
+ }
2518
+ }
2519
+ const seen = /* @__PURE__ */ new Map();
2520
+ const keepIndices = [];
2521
+ for (let i = 0; i < this._index.length; i++) {
2522
+ const signature = [];
2523
+ for (const col of checkCols) {
2524
+ signature.push(this._data.get(col)?.[i]);
2525
+ }
2526
+ const key = createKey(signature);
2527
+ const existing = seen.get(key);
2528
+ if (existing === void 0) {
2529
+ seen.set(key, [i]);
2530
+ } else {
2531
+ existing.push(i);
2532
+ }
2533
+ }
2534
+ for (const [_key, indices] of seen.entries()) {
2535
+ if (keep === "first") {
2536
+ const firstIndex = indices[0];
2537
+ if (firstIndex !== void 0) {
2538
+ keepIndices.push(firstIndex);
2539
+ }
2540
+ } else if (keep === "last") {
2541
+ const lastIndex = indices[indices.length - 1];
2542
+ if (lastIndex !== void 0) {
2543
+ keepIndices.push(lastIndex);
2544
+ }
2545
+ } else if (keep === false && indices.length === 1) {
2546
+ const onlyIndex = indices[0];
2547
+ if (onlyIndex !== void 0) {
2548
+ keepIndices.push(onlyIndex);
2549
+ }
2550
+ }
2551
+ }
2552
+ keepIndices.sort((a, b) => a - b);
2553
+ const newData = {};
2554
+ const newIndex = [];
2555
+ for (const col of this._columns) {
2556
+ newData[col] = [];
2557
+ }
2558
+ for (const idx of keepIndices) {
2559
+ const label = this._index[idx];
2560
+ if (label === void 0) {
2561
+ throw new DataValidationError(`Index label at position ${idx} is undefined`);
2562
+ }
2563
+ newIndex.push(label);
2564
+ for (const col of this._columns) {
2565
+ newData[col]?.push(this._data.get(col)?.[idx]);
2566
+ }
2567
+ }
2568
+ return new _DataFrame(newData, {
2569
+ columns: this._columns,
2570
+ index: newIndex
2571
+ });
2572
+ }
2573
+ /**
2574
+ * Return boolean Series indicating duplicate rows.
2575
+ * Time complexity: O(n × m).
2576
+ *
2577
+ * @param subset - Columns to consider for identifying duplicates
2578
+ * @param keep - Which duplicates to mark as False: 'first', 'last', or false (mark all)
2579
+ * @returns Series of booleans (true = duplicate, false = unique)
2580
+ *
2581
+ * @example
2582
+ * ```ts
2583
+ * const df = new DataFrame({ a: [1, 1, 2], b: [3, 3, 4] });
2584
+ * df.duplicated(); // Series([false, true, false])
2585
+ * ```
2586
+ */
2587
+ duplicated(subset, keep = "first") {
2588
+ const checkCols = subset ?? this._columns;
2589
+ for (const col of checkCols) {
2590
+ if (!this._columns.includes(col)) {
2591
+ throw new DataValidationError(`Column '${col}' not found in DataFrame`);
2592
+ }
2593
+ }
2594
+ const seen = /* @__PURE__ */ new Map();
2595
+ for (let i = 0; i < this._index.length; i++) {
2596
+ const signature = [];
2597
+ for (const col of checkCols) {
2598
+ signature.push(this._data.get(col)?.[i]);
2599
+ }
2600
+ const key = createKey(signature);
2601
+ const existing = seen.get(key);
2602
+ if (existing === void 0) {
2603
+ seen.set(key, [i]);
2604
+ } else {
2605
+ existing.push(i);
2606
+ }
2607
+ }
2608
+ const isDuplicate = new Array(this._index.length).fill(false);
2609
+ for (const [_key, indices] of seen.entries()) {
2610
+ if (indices.length > 1) {
2611
+ if (keep === "first") {
2612
+ for (let i = 1; i < indices.length; i++) {
2613
+ const idx = indices[i];
2614
+ if (idx !== void 0) isDuplicate[idx] = true;
2615
+ }
2616
+ } else if (keep === "last") {
2617
+ for (let i = 0; i < indices.length - 1; i++) {
2618
+ const idx = indices[i];
2619
+ if (idx !== void 0) isDuplicate[idx] = true;
2620
+ }
2621
+ } else if (keep === false) {
2622
+ for (const idx of indices) {
2623
+ isDuplicate[idx] = true;
2624
+ }
2625
+ }
2626
+ }
2627
+ }
2628
+ return new Series(isDuplicate, { index: this._index });
2629
+ }
2630
+ /**
2631
+ * Rename columns or index labels.
2632
+ * Time complexity: O(m) for columns, O(n) for index.
2633
+ *
2634
+ * @param mapper - Object mapping old names to new names, or function to transform names
2635
+ * @param axis - 0 for index, 1 for columns
2636
+ * @returns New DataFrame with renamed labels
2637
+ *
2638
+ * @example
2639
+ * ```ts
2640
+ * const df = new DataFrame({ a: [1, 2], b: [3, 4] });
2641
+ * df.rename({ a: 'x', b: 'y' }, 1); // Rename columns a->x, b->y
2642
+ * df.rename((name) => name.toUpperCase(), 1); // Uppercase all column names
2643
+ * ```
2644
+ */
2645
+ rename(mapper, axis = 1) {
2646
+ if (axis === 1) {
2647
+ const newColumns = this._columns.map((col) => {
2648
+ if (typeof mapper === "function") {
2649
+ return mapper(col);
2650
+ }
2651
+ return mapper[col] ?? col;
2652
+ });
2653
+ const newData = {};
2654
+ for (let i = 0; i < this._columns.length; i++) {
2655
+ const oldCol = this._columns[i];
2656
+ const newCol = newColumns[i];
2657
+ if (oldCol && newCol) {
2658
+ const colData = this._data.get(oldCol);
2659
+ if (colData) {
2660
+ newData[newCol] = [...colData];
2661
+ }
2662
+ }
2663
+ }
2664
+ return new _DataFrame(newData, {
2665
+ columns: newColumns,
2666
+ index: this._index
2667
+ });
2668
+ } else {
2669
+ const newIndex = this._index.map((label) => {
2670
+ const labelStr = String(label);
2671
+ if (typeof mapper === "function") {
2672
+ return mapper(labelStr);
2673
+ }
2674
+ return mapper[labelStr] ?? label;
2675
+ });
2676
+ const newData = {};
2677
+ for (const col of this._columns) {
2678
+ const colData = this._data.get(col);
2679
+ if (colData) {
2680
+ newData[col] = [...colData];
2681
+ }
2682
+ }
2683
+ return new _DataFrame(newData, {
2684
+ columns: this._columns,
2685
+ index: newIndex
2686
+ });
2687
+ }
2688
+ }
2689
+ /**
2690
+ * Reset index to default integer index.
2691
+ * Time complexity: O(n).
2692
+ *
2693
+ * @param drop - If true, don't add old index as column.
2694
+ * If a column named "index" already exists, the new column will be
2695
+ * named "index_1", "index_2", etc.
2696
+ * @returns New DataFrame with reset index
2697
+ *
2698
+ * @example
2699
+ * ```ts
2700
+ * const df = new DataFrame({ a: [1, 2] }, { index: ['x', 'y'] });
2701
+ * df.reset_index(); // Index becomes [0, 1], adds 'index' column with ['x', 'y']
2702
+ * df.reset_index(true); // Index becomes [0, 1], no new column
2703
+ * ```
2704
+ */
2705
+ reset_index(drop = false) {
2706
+ const newData = {};
2707
+ let indexName = "index";
2708
+ if (!drop) {
2709
+ if (this._columns.includes(indexName)) {
2710
+ let suffix = 1;
2711
+ while (this._columns.includes(`${indexName}_${suffix}`)) {
2712
+ suffix++;
2713
+ }
2714
+ indexName = `${indexName}_${suffix}`;
2715
+ }
2716
+ newData[indexName] = [...this._index];
2717
+ }
2718
+ for (const col of this._columns) {
2719
+ const colData = this._data.get(col);
2720
+ if (colData) {
2721
+ newData[col] = [...colData];
2722
+ }
2723
+ }
2724
+ const newColumns = drop ? this._columns : [indexName, ...this._columns];
2725
+ return new _DataFrame(newData, {
2726
+ columns: newColumns,
2727
+ index: Array.from({ length: this._index.length }, (_, i) => i)
2728
+ });
2729
+ }
2730
+ /**
2731
+ * Set a column as the index.
2732
+ * Time complexity: O(n).
2733
+ *
2734
+ * @param column - Column name to use as index
2735
+ * @param drop - If true, remove the column after setting it as index
2736
+ * @returns New DataFrame with new index
2737
+ *
2738
+ * @example
2739
+ * ```ts
2740
+ * const df = new DataFrame({ id: ['a', 'b', 'c'], value: [1, 2, 3] });
2741
+ * df.set_index('id'); // Index becomes ['a', 'b', 'c']
2742
+ * ```
2743
+ */
2744
+ set_index(column, drop = true) {
2745
+ if (!this._columns.includes(column)) {
2746
+ throw new InvalidParameterError(
2747
+ `Column '${column}' not found in DataFrame`,
2748
+ "column",
2749
+ column
2750
+ );
2751
+ }
2752
+ const newIndexData = this._data.get(column);
2753
+ if (!newIndexData) {
2754
+ throw new DataValidationError(`Column '${column}' has no data`);
2755
+ }
2756
+ const newIndex = newIndexData.map(
2757
+ (v) => typeof v === "string" || typeof v === "number" ? v : String(v)
2758
+ );
2759
+ const newData = {};
2760
+ const newColumns = [];
2761
+ for (const col of this._columns) {
2762
+ if (col === column && drop) continue;
2763
+ const colData = this._data.get(col);
2764
+ if (colData) {
2765
+ newData[col] = [...colData];
2766
+ newColumns.push(col);
2767
+ }
2768
+ }
2769
+ return new _DataFrame(newData, {
2770
+ columns: newColumns,
2771
+ index: newIndex
2772
+ });
2773
+ }
2774
+ /**
2775
+ * Return boolean DataFrame showing null values.
2776
+ * Time complexity: O(n × m).
2777
+ *
2778
+ * @returns DataFrame of booleans (true = null/undefined, false = not null)
2779
+ *
2780
+ * @example
2781
+ * ```ts
2782
+ * const df = new DataFrame({ a: [1, null, 3], b: [4, 5, undefined] });
2783
+ * df.isnull(); // [[false, false], [true, false], [false, true]]
2784
+ * ```
2785
+ */
2786
+ isnull() {
2787
+ const newData = {};
2788
+ for (const col of this._columns) {
2789
+ const colData = this._data.get(col);
2790
+ if (colData) {
2791
+ newData[col] = colData.map((v) => v === null || v === void 0);
2792
+ }
2793
+ }
2794
+ return new _DataFrame(newData, {
2795
+ columns: this._columns,
2796
+ index: this._index
2797
+ });
2798
+ }
2799
+ /**
2800
+ * Return boolean DataFrame showing non-null values.
2801
+ * Time complexity: O(n × m).
2802
+ *
2803
+ * @returns DataFrame of booleans (true = not null, false = null/undefined)
2804
+ *
2805
+ * @example
2806
+ * ```ts
2807
+ * const df = new DataFrame({ a: [1, null, 3], b: [4, 5, undefined] });
2808
+ * df.notnull(); // [[true, true], [false, true], [true, false]]
2809
+ * ```
2810
+ */
2811
+ notnull() {
2812
+ const newData = {};
2813
+ for (const col of this._columns) {
2814
+ const colData = this._data.get(col);
2815
+ if (colData) {
2816
+ newData[col] = colData.map((v) => v !== null && v !== void 0);
2817
+ }
2818
+ }
2819
+ return new _DataFrame(newData, {
2820
+ columns: this._columns,
2821
+ index: this._index
2822
+ });
2823
+ }
2824
+ /**
2825
+ * Replace values in DataFrame.
2826
+ * Time complexity: O(n × m).
2827
+ *
2828
+ * @param toReplace - Value or array of values to replace
2829
+ * @param value - Replacement value
2830
+ * @returns New DataFrame with replaced values
2831
+ *
2832
+ * @example
2833
+ * ```ts
2834
+ * const df = new DataFrame({ a: [1, 2, 3], b: [4, 5, 6] });
2835
+ * df.replace(2, 99); // Replace all 2s with 99
2836
+ * df.replace([1, 2], 0); // Replace 1s and 2s with 0
2837
+ * ```
2838
+ */
2839
+ replace(toReplace, value) {
2840
+ const replaceSet = new Set(Array.isArray(toReplace) ? toReplace : [toReplace]);
2841
+ const newData = {};
2842
+ for (const col of this._columns) {
2843
+ const colData = this._data.get(col);
2844
+ if (colData) {
2845
+ newData[col] = colData.map((v) => replaceSet.has(v) ? value : v);
2846
+ }
2847
+ }
2848
+ return new _DataFrame(newData, {
2849
+ columns: this._columns,
2850
+ index: this._index
2851
+ });
2852
+ }
2853
+ /**
2854
+ * Clip (limit) values in a range.
2855
+ * Time complexity: O(n × m).
2856
+ *
2857
+ * @param lower - Minimum value (values below are set to this)
2858
+ * @param upper - Maximum value (values above are set to this)
2859
+ * @returns New DataFrame with clipped values
2860
+ *
2861
+ * @example
2862
+ * ```ts
2863
+ * const df = new DataFrame({ a: [1, 5, 10], b: [2, 8, 15] });
2864
+ * df.clip(3, 9); // [[3, 3], [5, 8], [9, 9]]
2865
+ * ```
2866
+ */
2867
+ clip(lower, upper) {
2868
+ const newData = {};
2869
+ for (const col of this._columns) {
2870
+ const colData = this._data.get(col);
2871
+ if (colData) {
2872
+ newData[col] = colData.map((v) => {
2873
+ if (typeof v !== "number") return v;
2874
+ let result = v;
2875
+ if (lower !== void 0 && result < lower) result = lower;
2876
+ if (upper !== void 0 && result > upper) result = upper;
2877
+ return result;
2878
+ });
2879
+ }
2880
+ }
2881
+ return new _DataFrame(newData, {
2882
+ columns: this._columns,
2883
+ index: this._index
2884
+ });
2885
+ }
2886
+ /**
2887
+ * Return a random sample of rows.
2888
+ * Time complexity: O(n) for sampling.
2889
+ *
2890
+ * @param n - Number of rows to sample
2891
+ * @param random_state - Random seed for reproducibility
2892
+ * @returns New DataFrame with sampled rows
2893
+ *
2894
+ * @example
2895
+ * ```ts
2896
+ * const df = new DataFrame({ a: [1, 2, 3, 4, 5], b: [6, 7, 8, 9, 10] });
2897
+ * df.sample(3); // Random 3 rows
2898
+ * ```
2899
+ */
2900
+ sample(n, random_state) {
2901
+ if (!Number.isFinite(n) || !Number.isInteger(n)) {
2902
+ throw new InvalidParameterError("n must be a finite integer", "n", n);
2903
+ }
2904
+ if (random_state !== void 0) {
2905
+ if (!Number.isFinite(random_state) || !Number.isInteger(random_state)) {
2906
+ throw new InvalidParameterError(
2907
+ "random_state must be a finite integer",
2908
+ "random_state",
2909
+ random_state
2910
+ );
2911
+ }
2912
+ }
2913
+ if (n < 0 || n > this._index.length) {
2914
+ throw new DataValidationError(`Sample size ${n} must be between 0 and ${this._index.length}`);
2915
+ }
2916
+ const rng = random_state !== void 0 ? this.seededRandom(random_state) : Math.random;
2917
+ const indices = Array.from({ length: this._index.length }, (_, i) => i);
2918
+ for (let i = indices.length - 1; i > 0; i--) {
2919
+ const j = Math.floor(rng() * (i + 1));
2920
+ const current = indices[i];
2921
+ const swap = indices[j];
2922
+ if (current === void 0 || swap === void 0) {
2923
+ throw new DataValidationError("Sample index resolution failed");
2924
+ }
2925
+ indices[i] = swap;
2926
+ indices[j] = current;
2927
+ }
2928
+ const sampledIndices = indices.slice(0, n);
2929
+ const newData = {};
2930
+ const newIndex = [];
2931
+ for (const col of this._columns) {
2932
+ newData[col] = [];
2933
+ }
2934
+ for (const idx of sampledIndices) {
2935
+ const label = this._index[idx];
2936
+ if (label === void 0) {
2937
+ throw new DataValidationError(`Index label at position ${idx} is undefined`);
2938
+ }
2939
+ newIndex.push(label);
2940
+ for (const col of this._columns) {
2941
+ newData[col]?.push(this._data.get(col)?.[idx]);
2942
+ }
2943
+ }
2944
+ return new _DataFrame(newData, {
2945
+ columns: this._columns,
2946
+ index: newIndex
2947
+ });
2948
+ }
2949
+ /**
2950
+ * Seeded random number generator for reproducibility.
2951
+ * @private
2952
+ */
2953
+ seededRandom(seed) {
2954
+ let state = seed >>> 0;
2955
+ return () => {
2956
+ state = (state * 1664525 + 1013904223) % 2 ** 32;
2957
+ return state / 2 ** 32;
2958
+ };
2959
+ }
2960
+ /**
2961
+ * Return values at the given quantile.
2962
+ * Time complexity: O(n log n) per column due to sorting.
2963
+ *
2964
+ * @param q - Quantile to compute (0 to 1)
2965
+ * @returns Series with quantile values for each numeric column
2966
+ *
2967
+ * @example
2968
+ * ```ts
2969
+ * const df = new DataFrame({ a: [1, 2, 3, 4, 5], b: [10, 20, 30, 40, 50] });
2970
+ * df.quantile(0.5); // Median: Series({ a: 3, b: 30 })
2971
+ * df.quantile(0.25); // 25th percentile
2972
+ * ```
2973
+ */
2974
+ quantile(q) {
2975
+ if (!Number.isFinite(q) || q < 0 || q > 1) {
2976
+ throw new InvalidParameterError("q must be a finite number between 0 and 1", "q", q);
2977
+ }
2978
+ const result = [];
2979
+ const resultIndex = [];
2980
+ for (const col of this._columns) {
2981
+ const colData = this._data.get(col);
2982
+ if (!colData) continue;
2983
+ const numericData = toNumericValues(colData);
2984
+ if (numericData.length === 0) {
2985
+ result.push(NaN);
2986
+ resultIndex.push(col);
2987
+ continue;
2988
+ }
2989
+ const sorted = [...numericData].sort((a, b) => a - b);
2990
+ const idx = q * (sorted.length - 1);
2991
+ const lower = Math.floor(idx);
2992
+ const upper = Math.ceil(idx);
2993
+ const weight = idx - lower;
2994
+ const value = (sorted[lower] ?? 0) * (1 - weight) + (sorted[upper] ?? 0) * weight;
2995
+ result.push(value);
2996
+ resultIndex.push(col);
2997
+ }
2998
+ return new Series(result, { index: resultIndex });
2999
+ }
3000
+ /**
3001
+ * Compute numerical rank of values (1 through n) along axis.
3002
+ * Time complexity: O(n log n) per column.
3003
+ *
3004
+ * @param method - How to rank ties: 'average', 'min', 'max', 'first', 'dense'
3005
+ * @param ascending - Rank in ascending order
3006
+ * @returns New DataFrame with ranks
3007
+ *
3008
+ * @example
3009
+ * ```ts
3010
+ * const df = new DataFrame({ a: [3, 1, 2, 1] });
3011
+ * df.rank(); // [[4], [1.5], [3], [1.5]] (average method)
3012
+ * df.rank('min'); // [[4], [1], [3], [1]]
3013
+ * ```
3014
+ */
3015
+ rank(method = "average", ascending = true) {
3016
+ const newData = {};
3017
+ for (const col of this._columns) {
3018
+ const colData = this._data.get(col);
3019
+ if (!colData) continue;
3020
+ const numericData = toNumericValues(colData);
3021
+ if (numericData.length === 0) {
3022
+ newData[col] = colData.map(() => null);
3023
+ continue;
3024
+ }
3025
+ const indexed = colData.map((v, i2) => ({ value: v, index: i2 }));
3026
+ const numericIndexed = indexed.filter(
3027
+ (item) => isValidNumber(item.value)
3028
+ );
3029
+ numericIndexed.sort((a, b) => {
3030
+ if (ascending) {
3031
+ return a.value - b.value;
3032
+ }
3033
+ return b.value - a.value;
3034
+ });
3035
+ const ranks = new Array(colData.length).fill(null);
3036
+ let i = 0;
3037
+ let denseRank = 0;
3038
+ while (i < numericIndexed.length) {
3039
+ const currentItem = numericIndexed[i];
3040
+ if (!currentItem) {
3041
+ break;
3042
+ }
3043
+ const currentValue = currentItem.value;
3044
+ const tieStart = i;
3045
+ while (i < numericIndexed.length) {
3046
+ const nextItem = numericIndexed[i];
3047
+ if (!nextItem || nextItem.value !== currentValue) {
3048
+ break;
3049
+ }
3050
+ i++;
3051
+ }
3052
+ const tieEnd = i;
3053
+ denseRank++;
3054
+ for (let j = tieStart; j < tieEnd; j++) {
3055
+ const item = numericIndexed[j];
3056
+ if (!item) continue;
3057
+ let rank;
3058
+ if (method === "average") {
3059
+ rank = (tieStart + tieEnd + 1) / 2;
3060
+ } else if (method === "min") {
3061
+ rank = tieStart + 1;
3062
+ } else if (method === "max") {
3063
+ rank = tieEnd;
3064
+ } else if (method === "first") {
3065
+ rank = j + 1;
3066
+ } else {
3067
+ rank = denseRank;
3068
+ }
3069
+ ranks[item.index] = rank;
3070
+ }
3071
+ }
3072
+ newData[col] = ranks;
3073
+ }
3074
+ return new _DataFrame(newData, {
3075
+ columns: this._columns,
3076
+ index: this._index
3077
+ });
3078
+ }
3079
+ /**
3080
+ * Calculate the difference between consecutive rows.
3081
+ * Time complexity: O(n × m).
3082
+ *
3083
+ * @param periods - Number of periods to shift (default: 1)
3084
+ * @returns New DataFrame with differences
3085
+ *
3086
+ * @example
3087
+ * ```ts
3088
+ * const df = new DataFrame({ a: [1, 3, 6, 10] });
3089
+ * df.diff(); // [[null], [2], [3], [4]]
3090
+ * df.diff(2); // [[null], [null], [5], [7]]
3091
+ * ```
3092
+ */
3093
+ diff(periods = 1) {
3094
+ if (!Number.isFinite(periods) || !Number.isInteger(periods) || periods < 0) {
3095
+ throw new InvalidParameterError("periods must be a non-negative integer", "periods", periods);
3096
+ }
3097
+ const newData = {};
3098
+ for (const col of this._columns) {
3099
+ const colData = this._data.get(col);
3100
+ if (!colData) continue;
3101
+ const diffData = [];
3102
+ for (let i = 0; i < colData.length; i++) {
3103
+ if (i < periods) {
3104
+ diffData.push(null);
3105
+ } else {
3106
+ const current = colData[i];
3107
+ const previous = colData[i - periods];
3108
+ if (typeof current === "number" && typeof previous === "number") {
3109
+ diffData.push(current - previous);
3110
+ } else {
3111
+ diffData.push(null);
3112
+ }
3113
+ }
3114
+ }
3115
+ newData[col] = diffData;
3116
+ }
3117
+ return new _DataFrame(newData, {
3118
+ columns: this._columns,
3119
+ index: this._index
3120
+ });
3121
+ }
3122
+ /**
3123
+ * Calculate percentage change between consecutive rows.
3124
+ * Time complexity: O(n × m).
3125
+ *
3126
+ * @param periods - Number of periods to shift (default: 1)
3127
+ * @returns New DataFrame with percentage changes
3128
+ *
3129
+ * @example
3130
+ * ```ts
3131
+ * const df = new DataFrame({ a: [100, 110, 121] });
3132
+ * df.pct_change(); // [[null], [0.1], [0.1]] (10% increase each time)
3133
+ * ```
3134
+ */
3135
+ pct_change(periods = 1) {
3136
+ if (!Number.isFinite(periods) || !Number.isInteger(periods) || periods < 0) {
3137
+ throw new InvalidParameterError("periods must be a non-negative integer", "periods", periods);
3138
+ }
3139
+ const newData = {};
3140
+ for (const col of this._columns) {
3141
+ const colData = this._data.get(col);
3142
+ if (!colData) continue;
3143
+ const pctData = [];
3144
+ for (let i = 0; i < colData.length; i++) {
3145
+ if (i < periods) {
3146
+ pctData.push(null);
3147
+ } else {
3148
+ const current = colData[i];
3149
+ const previous = colData[i - periods];
3150
+ if (typeof current === "number" && typeof previous === "number" && previous !== 0) {
3151
+ pctData.push((current - previous) / previous);
3152
+ } else {
3153
+ pctData.push(null);
3154
+ }
3155
+ }
3156
+ }
3157
+ newData[col] = pctData;
3158
+ }
3159
+ return new _DataFrame(newData, {
3160
+ columns: this._columns,
3161
+ index: this._index
3162
+ });
3163
+ }
3164
+ /**
3165
+ * Return cumulative sum over DataFrame axis.
3166
+ * Time complexity: O(n × m).
3167
+ *
3168
+ * @returns New DataFrame with cumulative sums
3169
+ *
3170
+ * @example
3171
+ * ```ts
3172
+ * const df = new DataFrame({ a: [1, 2, 3], b: [4, 5, 6] });
3173
+ * df.cumsum(); // [[1, 4], [3, 9], [6, 15]]
3174
+ * ```
3175
+ */
3176
+ cumsum() {
3177
+ const newData = {};
3178
+ for (const col of this._columns) {
3179
+ const colData = this._data.get(col);
3180
+ if (!colData) continue;
3181
+ const cumData = [];
3182
+ let cumSum = 0;
3183
+ for (const value of colData) {
3184
+ if (typeof value === "number") {
3185
+ cumSum += value;
3186
+ cumData.push(cumSum);
3187
+ } else {
3188
+ cumData.push(null);
3189
+ }
3190
+ }
3191
+ newData[col] = cumData;
3192
+ }
3193
+ return new _DataFrame(newData, {
3194
+ columns: this._columns,
3195
+ index: this._index
3196
+ });
3197
+ }
3198
+ /**
3199
+ * Return cumulative product over DataFrame axis.
3200
+ * Time complexity: O(n × m).
3201
+ *
3202
+ * @returns New DataFrame with cumulative products
3203
+ *
3204
+ * @example
3205
+ * ```ts
3206
+ * const df = new DataFrame({ a: [2, 3, 4] });
3207
+ * df.cumprod(); // [[2], [6], [24]]
3208
+ * ```
3209
+ */
3210
+ cumprod() {
3211
+ const newData = {};
3212
+ for (const col of this._columns) {
3213
+ const colData = this._data.get(col);
3214
+ if (!colData) continue;
3215
+ const cumData = [];
3216
+ let cumProd = 1;
3217
+ for (const value of colData) {
3218
+ if (typeof value === "number") {
3219
+ cumProd *= value;
3220
+ cumData.push(cumProd);
3221
+ } else {
3222
+ cumData.push(null);
3223
+ }
3224
+ }
3225
+ newData[col] = cumData;
3226
+ }
3227
+ return new _DataFrame(newData, {
3228
+ columns: this._columns,
3229
+ index: this._index
3230
+ });
3231
+ }
3232
+ /**
3233
+ * Return cumulative maximum over DataFrame axis.
3234
+ * Time complexity: O(n × m).
3235
+ *
3236
+ * @returns New DataFrame with cumulative maximums
3237
+ *
3238
+ * @example
3239
+ * ```ts
3240
+ * const df = new DataFrame({ a: [3, 1, 5, 2] });
3241
+ * df.cummax(); // [[3], [3], [5], [5]]
3242
+ * ```
3243
+ */
3244
+ cummax() {
3245
+ const newData = {};
3246
+ for (const col of this._columns) {
3247
+ const colData = this._data.get(col);
3248
+ if (!colData) continue;
3249
+ const cumData = [];
3250
+ let cumMax = -Infinity;
3251
+ for (const value of colData) {
3252
+ if (typeof value === "number") {
3253
+ cumMax = Math.max(cumMax, value);
3254
+ cumData.push(cumMax);
3255
+ } else {
3256
+ cumData.push(null);
3257
+ }
3258
+ }
3259
+ newData[col] = cumData;
3260
+ }
3261
+ return new _DataFrame(newData, {
3262
+ columns: this._columns,
3263
+ index: this._index
3264
+ });
3265
+ }
3266
+ /**
3267
+ * Return cumulative minimum over DataFrame axis.
3268
+ * Time complexity: O(n × m).
3269
+ *
3270
+ * @returns New DataFrame with cumulative minimums
3271
+ *
3272
+ * @example
3273
+ * ```ts
3274
+ * const df = new DataFrame({ a: [3, 1, 5, 2] });
3275
+ * df.cummin(); // [[3], [1], [1], [1]]
3276
+ * ```
3277
+ */
3278
+ cummin() {
3279
+ const newData = {};
3280
+ for (const col of this._columns) {
3281
+ const colData = this._data.get(col);
3282
+ if (!colData) continue;
3283
+ const cumData = [];
3284
+ let cumMin = Infinity;
3285
+ for (const value of colData) {
3286
+ if (typeof value === "number") {
3287
+ cumMin = Math.min(cumMin, value);
3288
+ cumData.push(cumMin);
3289
+ } else {
3290
+ cumData.push(null);
3291
+ }
3292
+ }
3293
+ newData[col] = cumData;
3294
+ }
3295
+ return new _DataFrame(newData, {
3296
+ columns: this._columns,
3297
+ index: this._index
3298
+ });
3299
+ }
3300
+ /**
3301
+ * Shift index by desired number of periods.
3302
+ * Time complexity: O(n × m).
3303
+ *
3304
+ * @param periods - Number of periods to shift (positive = down, negative = up)
3305
+ * @param fill_value - Value to use for newly introduced missing values
3306
+ * @returns New DataFrame with shifted data
3307
+ *
3308
+ * @example
3309
+ * ```ts
3310
+ * const df = new DataFrame({ a: [1, 2, 3, 4] });
3311
+ * df.shift(1); // [[null], [1], [2], [3]]
3312
+ * df.shift(-1); // [[2], [3], [4], [null]]
3313
+ * df.shift(1, 0); // [[0], [1], [2], [3]]
3314
+ * ```
3315
+ */
3316
+ shift(periods = 1, fill_value = null) {
3317
+ if (!Number.isFinite(periods) || !Number.isInteger(periods)) {
3318
+ throw new InvalidParameterError("periods must be a finite integer", "periods", periods);
3319
+ }
3320
+ const newData = {};
3321
+ for (const col of this._columns) {
3322
+ const colData = this._data.get(col);
3323
+ if (!colData) continue;
3324
+ const shiftedData = [];
3325
+ const rowCount = colData.length;
3326
+ if (periods > 0) {
3327
+ const shift = Math.min(periods, rowCount);
3328
+ for (let i = 0; i < shift; i++) {
3329
+ shiftedData.push(fill_value);
3330
+ }
3331
+ for (let i = 0; i < rowCount - shift; i++) {
3332
+ shiftedData.push(colData[i]);
3333
+ }
3334
+ } else if (periods < 0) {
3335
+ const absPeriods = Math.min(Math.abs(periods), rowCount);
3336
+ for (let i = absPeriods; i < rowCount; i++) {
3337
+ shiftedData.push(colData[i]);
3338
+ }
3339
+ for (let i = 0; i < absPeriods; i++) {
3340
+ shiftedData.push(fill_value);
3341
+ }
3342
+ } else {
3343
+ shiftedData.push(...colData);
3344
+ }
3345
+ newData[col] = shiftedData;
3346
+ }
3347
+ return new _DataFrame(newData, {
3348
+ columns: this._columns,
3349
+ index: this._index
3350
+ });
3351
+ }
3352
+ /**
3353
+ * Pivot DataFrame.
3354
+ * Time complexity: O(n × m).
3355
+ *
3356
+ * @param index - Column to use as index
3357
+ * @param columns - Column to use as column headers
3358
+ * @param values - Column to use as values
3359
+ * @returns New DataFrame with pivoted data
3360
+ *
3361
+ * @example
3362
+ * ```ts
3363
+ * const df = new DataFrame({
3364
+ * country: ['USA', 'USA', 'Canada', 'Canada'],
3365
+ * year: [2010, 2011, 2010, 2011],
3366
+ * value: [100, 200, 300, 400]
3367
+ * });
3368
+ * df.pivot('country', 'year', 'value');
3369
+ * // country | 2010 | 2011
3370
+ * // USA | 100 | 200
3371
+ * // Canada | 300 | 400
3372
+ * ```
3373
+ */
3374
+ pivot(index, columns, values) {
3375
+ if (!this._columns.includes(index)) {
3376
+ throw new DataValidationError(`Column '${index}' not found in DataFrame`);
3377
+ }
3378
+ if (!this._columns.includes(columns)) {
3379
+ throw new DataValidationError(`Column '${columns}' not found in DataFrame`);
3380
+ }
3381
+ if (!this._columns.includes(values)) {
3382
+ throw new DataValidationError(`Column '${values}' not found in DataFrame`);
3383
+ }
3384
+ const indexData = this._data.get(index);
3385
+ const columnData = this._data.get(columns);
3386
+ const valueData = this._data.get(values);
3387
+ if (!indexData || !columnData || !valueData) {
3388
+ throw new DataValidationError("Pivot columns have no data");
3389
+ }
3390
+ const pivotData = {};
3391
+ const pivotIndex = [];
3392
+ const uniqueIndices = /* @__PURE__ */ new Set();
3393
+ const uniqueColumns = [];
3394
+ const seenColumns = /* @__PURE__ */ new Set();
3395
+ for (const idx of indexData) {
3396
+ if (idx === null || idx === void 0) {
3397
+ continue;
3398
+ }
3399
+ const key = typeof idx === "string" || typeof idx === "number" ? idx : String(idx);
3400
+ if (!uniqueIndices.has(key)) {
3401
+ uniqueIndices.add(key);
3402
+ pivotIndex.push(key);
3403
+ }
3404
+ }
3405
+ for (const col of columnData) {
3406
+ if (col === null || col === void 0) {
3407
+ continue;
3408
+ }
3409
+ const colKey = String(col);
3410
+ if (!seenColumns.has(colKey)) {
3411
+ seenColumns.add(colKey);
3412
+ uniqueColumns.push(colKey);
3413
+ }
3414
+ }
3415
+ const rowPositionByIndex = /* @__PURE__ */ new Map();
3416
+ for (let i = 0; i < pivotIndex.length; i++) {
3417
+ const key = pivotIndex[i];
3418
+ if (key !== void 0) {
3419
+ rowPositionByIndex.set(key, i);
3420
+ }
3421
+ }
3422
+ for (const colKey of uniqueColumns) {
3423
+ pivotData[colKey] = new Array(pivotIndex.length).fill(null);
3424
+ }
3425
+ const visited = /* @__PURE__ */ new Set();
3426
+ for (let i = 0; i < indexData.length; i++) {
3427
+ const idx = indexData[i];
3428
+ const col = columnData[i];
3429
+ const value = valueData[i];
3430
+ if (idx !== null && idx !== void 0 && col !== null && col !== void 0) {
3431
+ const indexKey = typeof idx === "string" || typeof idx === "number" ? idx : String(idx);
3432
+ const colKey = String(col);
3433
+ const rowPos = rowPositionByIndex.get(indexKey);
3434
+ if (rowPos === void 0) {
3435
+ continue;
3436
+ }
3437
+ const cellKey = `${rowPos}:${colKey}`;
3438
+ if (visited.has(cellKey)) {
3439
+ throw new DataValidationError(
3440
+ `Duplicate pivot entry for index '${String(indexKey)}' and column '${colKey}'`
3441
+ );
3442
+ }
3443
+ visited.add(cellKey);
3444
+ const targetColumn = pivotData[colKey];
3445
+ if (targetColumn) {
3446
+ targetColumn[rowPos] = value;
3447
+ }
3448
+ }
3449
+ }
3450
+ return new _DataFrame(pivotData, {
3451
+ columns: uniqueColumns,
3452
+ index: pivotIndex
3453
+ });
3454
+ }
3455
+ /**
3456
+ * Melt DataFrame.
3457
+ * Time complexity: O(n × m).
3458
+ *
3459
+ * @param id_vars - Columns to keep as is
3460
+ * @param value_vars - Columns to melt
3461
+ * @param var_name - Name for new column with melted variable names
3462
+ * @param value_name - Name for new column with melted values.
3463
+ * Must not conflict with existing columns or var_name.
3464
+ * @returns New DataFrame with melted data
3465
+ *
3466
+ * @example
3467
+ * ```ts
3468
+ * const df = new DataFrame({
3469
+ * id: ['a', 'b'],
3470
+ * x: [1, 2],
3471
+ * y: [3, 4]
3472
+ * });
3473
+ * df.melt(['id'], ['x', 'y'], 'variable', 'value');
3474
+ * // id | variable | value
3475
+ * // a | x | 1
3476
+ * // a | y | 3
3477
+ * // b | x | 2
3478
+ * // b | y | 4
3479
+ * ```
3480
+ */
3481
+ melt(id_vars, value_vars, var_name = "variable", value_name = "value") {
3482
+ const idVars = [...id_vars];
3483
+ const valueVars = [...value_vars];
3484
+ ensureUniqueLabels(idVars, "id_var");
3485
+ ensureUniqueLabels(valueVars, "value_var");
3486
+ for (const idVar of idVars) {
3487
+ if (!this._columns.includes(idVar)) {
3488
+ throw new DataValidationError(`Column '${idVar}' not found in DataFrame`);
3489
+ }
3490
+ }
3491
+ for (const valueVar of valueVars) {
3492
+ if (!this._columns.includes(valueVar)) {
3493
+ throw new DataValidationError(`Column '${valueVar}' not found in DataFrame`);
3494
+ }
3495
+ }
3496
+ if (var_name === value_name) {
3497
+ throw new DataValidationError("var_name and value_name must be different");
3498
+ }
3499
+ const reservedNames = /* @__PURE__ */ new Set([...idVars, ...valueVars]);
3500
+ if (reservedNames.has(var_name) || reservedNames.has(value_name)) {
3501
+ throw new DataValidationError(
3502
+ "var_name and value_name must not conflict with existing columns"
3503
+ );
3504
+ }
3505
+ const newData = {};
3506
+ for (const idVar of idVars) {
3507
+ newData[idVar] = [];
3508
+ }
3509
+ newData[var_name] = [];
3510
+ newData[value_name] = [];
3511
+ for (let i = 0; i < this._index.length; i++) {
3512
+ for (const valueVar of valueVars) {
3513
+ for (const idVar of idVars) {
3514
+ newData[idVar]?.push(this._data.get(idVar)?.[i]);
3515
+ }
3516
+ newData[var_name]?.push(valueVar);
3517
+ newData[value_name]?.push(this._data.get(valueVar)?.[i]);
3518
+ }
3519
+ }
3520
+ return new _DataFrame(newData, {
3521
+ columns: [...idVars, var_name, value_name]
3522
+ });
3523
+ }
3524
+ /**
3525
+ * Rolling window mean calculation.
3526
+ *
3527
+ * @param window - Size of the rolling window
3528
+ * @param on - Column to apply rolling calculation to (if omitted, applies to all columns)
3529
+ * @returns New DataFrame with rolling mean values
3530
+ *
3531
+ * @example
3532
+ * ```ts
3533
+ * const df = new DataFrame({ a: [1, 2, 3, 4, 5] });
3534
+ * df.rolling(3); // [[null], [null], [2], [3], [4]]
3535
+ * ```
3536
+ */
3537
+ rolling(window, on) {
3538
+ const newData = {};
3539
+ if (!Number.isFinite(window) || !Number.isInteger(window) || window <= 0) {
3540
+ throw new InvalidParameterError("window must be a positive integer", "window", window);
3541
+ }
3542
+ if (on && !this._columns.includes(on)) {
3543
+ throw new DataValidationError(`Column '${on}' not found in DataFrame`);
3544
+ }
3545
+ for (const col of this._columns) {
3546
+ if (col === on || !on) {
3547
+ const colData = this._data.get(col);
3548
+ if (!colData) continue;
3549
+ const rollingData = [];
3550
+ let windowSum = 0;
3551
+ let windowCount = 0;
3552
+ for (let i = 0; i < colData.length; i++) {
3553
+ const incoming = colData[i];
3554
+ if (isValidNumber(incoming)) {
3555
+ windowSum += incoming;
3556
+ windowCount++;
3557
+ }
3558
+ if (i >= window) {
3559
+ const outgoing = colData[i - window];
3560
+ if (isValidNumber(outgoing)) {
3561
+ windowSum -= outgoing;
3562
+ windowCount--;
3563
+ }
3564
+ }
3565
+ if (i < window - 1) {
3566
+ rollingData.push(null);
3567
+ } else if (windowCount === 0) {
3568
+ rollingData.push(null);
3569
+ } else {
3570
+ rollingData.push(windowSum / windowCount);
3571
+ }
3572
+ }
3573
+ newData[col] = rollingData;
3574
+ }
3575
+ }
3576
+ const outColumns = on ? [on] : this._columns;
3577
+ return new _DataFrame(newData, {
3578
+ columns: outColumns,
3579
+ index: this._index
3580
+ });
3581
+ }
3582
+ /**
3583
+ * Return a human-readable tabular string representation.
3584
+ *
3585
+ * Columns are right-aligned and padded so that rows line up.
3586
+ * Large DataFrames are truncated with an ellipsis row.
3587
+ *
3588
+ * @param maxRows - Maximum rows to display before summarizing (default: 20).
3589
+ * @returns Formatted table string
3590
+ *
3591
+ * @example
3592
+ * ```ts
3593
+ * const df = new DataFrame({ a: [1, 2], b: [3, 4] });
3594
+ * df.toString();
3595
+ * // " a b\n0 1 3\n1 2 4"
3596
+ * ```
3597
+ */
3598
+ toString(maxRows = 20) {
3599
+ const nRows = this.shape[0] ?? 0;
3600
+ const cols = this._columns;
3601
+ const half = Math.floor(maxRows / 2);
3602
+ const showAll = nRows <= maxRows;
3603
+ const topCount = showAll ? nRows : half;
3604
+ const bottomCount = showAll ? 0 : half;
3605
+ const allRows = [];
3606
+ allRows.push(["", ...cols]);
3607
+ for (let i = 0; i < topCount; i++) {
3608
+ const idx = this._index[i];
3609
+ const row = [String(idx ?? i)];
3610
+ for (const col of cols) {
3611
+ const colData = this._data.get(col);
3612
+ const val = colData ? colData[i] : void 0;
3613
+ row.push(val === null || val === void 0 ? "null" : String(val));
3614
+ }
3615
+ allRows.push(row);
3616
+ }
3617
+ if (!showAll) {
3618
+ allRows.push(["...", ...cols.map(() => "...")]);
3619
+ for (let i = nRows - bottomCount; i < nRows; i++) {
3620
+ const idx = this._index[i];
3621
+ const row = [String(idx ?? i)];
3622
+ for (const col of cols) {
3623
+ const colData = this._data.get(col);
3624
+ const val = colData ? colData[i] : void 0;
3625
+ row.push(val === null || val === void 0 ? "null" : String(val));
3626
+ }
3627
+ allRows.push(row);
3628
+ }
3629
+ }
3630
+ const numCols = cols.length + 1;
3631
+ const widths = new Array(numCols).fill(0);
3632
+ for (const row of allRows) {
3633
+ for (let c = 0; c < numCols; c++) {
3634
+ const cell = row[c] ?? "";
3635
+ if (cell.length > (widths[c] ?? 0)) {
3636
+ widths[c] = cell.length;
3637
+ }
3638
+ }
3639
+ }
3640
+ const lines = [];
3641
+ for (const row of allRows) {
3642
+ const cells = [];
3643
+ for (let c = 0; c < numCols; c++) {
3644
+ const cell = row[c] ?? "";
3645
+ const w = widths[c] ?? 0;
3646
+ cells.push(cell.padStart(w));
3647
+ }
3648
+ lines.push(cells.join(" "));
3649
+ }
3650
+ return lines.join("\n");
3651
+ }
3652
+ };
3653
+ var DataFrameGroupBy = class {
3654
+ // Store the group mapping (computed once)
3655
+ groupMap;
3656
+ // Store the original key values for each group key (to avoid parsing)
3657
+ keyValuesMap;
3658
+ df;
3659
+ by;
3660
+ constructor(df, by) {
3661
+ this.df = df;
3662
+ this.by = by;
3663
+ const buildResult = this.buildGroupMap();
3664
+ this.groupMap = buildResult.groupMap;
3665
+ this.keyValuesMap = buildResult.keyValuesMap;
3666
+ }
3667
+ /**
3668
+ * Build the grouping map: group key -> array of row indices.
3669
+ *
3670
+ * @private
3671
+ */
3672
+ buildGroupMap() {
3673
+ const groupByCols = Array.isArray(this.by) ? this.by : [this.by];
3674
+ const groupMap = /* @__PURE__ */ new Map();
3675
+ const keyValuesMap = /* @__PURE__ */ new Map();
3676
+ const numRows = this.df.shape[0];
3677
+ if (groupByCols.length === 1) {
3678
+ const colData = this.df.get(groupByCols[0]).data;
3679
+ for (let i = 0; i < numRows; i++) {
3680
+ const val = colData[i];
3681
+ const key = createKey(val);
3682
+ let bucket = groupMap.get(key);
3683
+ if (bucket === void 0) {
3684
+ bucket = [];
3685
+ groupMap.set(key, bucket);
3686
+ keyValuesMap.set(key, [val]);
3687
+ }
3688
+ bucket.push(i);
3689
+ }
3690
+ } else {
3691
+ const colDataArrays = [];
3692
+ for (let c = 0; c < groupByCols.length; c++) {
3693
+ colDataArrays.push(this.df.get(groupByCols[c]).data);
3694
+ }
3695
+ for (let i = 0; i < numRows; i++) {
3696
+ const keyParts = new Array(groupByCols.length);
3697
+ for (let c = 0; c < groupByCols.length; c++) {
3698
+ const colArr = colDataArrays[c];
3699
+ keyParts[c] = colArr !== void 0 ? colArr[i] : void 0;
3700
+ }
3701
+ const key = createKey(keyParts);
3702
+ let bucket = groupMap.get(key);
3703
+ if (bucket === void 0) {
3704
+ bucket = [];
3705
+ groupMap.set(key, bucket);
3706
+ keyValuesMap.set(key, keyParts);
3707
+ }
3708
+ bucket.push(i);
3709
+ }
3710
+ }
3711
+ return { groupMap, keyValuesMap };
3712
+ }
3713
+ /**
3714
+ * Aggregate grouped data.
3715
+ *
3716
+ * @param operations - Dictionary of column name to aggregation function
3717
+ * @returns New DataFrame with aggregated data
3718
+ *
3719
+ * @example
3720
+ * ```ts
3721
+ * const grouped = df.groupBy('category');
3722
+ * const result = grouped.agg({ value: 'sum', count: 'count' });
3723
+ * ```
3724
+ */
3725
+ agg(operations) {
3726
+ const groupByCols = Array.isArray(this.by) ? this.by : [this.by];
3727
+ const resultData = {};
3728
+ const outputColumns = [];
3729
+ for (const col of groupByCols) {
3730
+ resultData[col] = [];
3731
+ outputColumns.push(col);
3732
+ }
3733
+ for (const [col, aggFunc] of Object.entries(operations)) {
3734
+ if (Array.isArray(aggFunc)) {
3735
+ for (const fn of aggFunc) {
3736
+ const outCol = `${col}_${fn}`;
3737
+ resultData[outCol] = [];
3738
+ outputColumns.push(outCol);
3739
+ }
3740
+ } else {
3741
+ resultData[col] = [];
3742
+ outputColumns.push(col);
3743
+ }
3744
+ }
3745
+ for (const [keyStr, indices] of this.groupMap.entries()) {
3746
+ const keyParts = this.keyValuesMap.get(keyStr);
3747
+ if (!keyParts) {
3748
+ throw new DataValidationError(`Missing key values for group: ${keyStr}`);
3749
+ }
3750
+ for (let i = 0; i < groupByCols.length; i++) {
3751
+ const groupCol = groupByCols[i];
3752
+ if (groupCol) resultData[groupCol]?.push(keyParts[i]);
3753
+ }
3754
+ for (const [col, aggFunc] of Object.entries(operations)) {
3755
+ const seriesData = this.df.get(col).data;
3756
+ const funcs = Array.isArray(aggFunc) ? aggFunc : [aggFunc];
3757
+ for (const func of funcs) {
3758
+ let result;
3759
+ switch (func) {
3760
+ case "count": {
3761
+ let count = 0;
3762
+ for (const idx of indices) {
3763
+ const val = seriesData[idx];
3764
+ if (val !== null && val !== void 0) count++;
3765
+ }
3766
+ result = count;
3767
+ break;
3768
+ }
3769
+ case "first": {
3770
+ const firstIdx = indices[0];
3771
+ result = firstIdx !== void 0 ? seriesData[firstIdx] : void 0;
3772
+ break;
3773
+ }
3774
+ case "last": {
3775
+ const lastIdx = indices[indices.length - 1];
3776
+ result = lastIdx !== void 0 ? seriesData[lastIdx] : void 0;
3777
+ break;
3778
+ }
3779
+ case "sum": {
3780
+ let sum = 0;
3781
+ let hasNumeric = false;
3782
+ for (const idx of indices) {
3783
+ const val = seriesData[idx];
3784
+ if (val === null || val === void 0) continue;
3785
+ if (typeof val !== "number") {
3786
+ throw new DataValidationError("sum() only works on numbers");
3787
+ }
3788
+ if (isValidNumber(val)) {
3789
+ sum += val;
3790
+ hasNumeric = true;
3791
+ }
3792
+ }
3793
+ result = hasNumeric ? sum : 0;
3794
+ break;
3795
+ }
3796
+ case "mean": {
3797
+ let sum = 0;
3798
+ let count = 0;
3799
+ for (const idx of indices) {
3800
+ const val = seriesData[idx];
3801
+ if (val === null || val === void 0) continue;
3802
+ if (typeof val !== "number") {
3803
+ throw new DataValidationError("mean() only works on numbers");
3804
+ }
3805
+ if (isValidNumber(val)) {
3806
+ sum += val;
3807
+ count++;
3808
+ }
3809
+ }
3810
+ result = count > 0 ? sum / count : NaN;
3811
+ break;
3812
+ }
3813
+ case "median": {
3814
+ const nums = [];
3815
+ for (const idx of indices) {
3816
+ const val = seriesData[idx];
3817
+ if (val === null || val === void 0) continue;
3818
+ if (typeof val !== "number") {
3819
+ throw new DataValidationError("median() only works on numbers");
3820
+ }
3821
+ if (isValidNumber(val)) nums.push(val);
3822
+ }
3823
+ if (nums.length === 0) {
3824
+ result = NaN;
3825
+ } else {
3826
+ nums.sort((a, b) => a - b);
3827
+ const mid = Math.floor(nums.length / 2);
3828
+ if (nums.length % 2 === 0) {
3829
+ const v1 = nums[mid - 1];
3830
+ const v2 = nums[mid];
3831
+ result = v1 !== void 0 && v2 !== void 0 ? (v1 + v2) / 2 : NaN;
3832
+ } else {
3833
+ result = nums[mid] ?? NaN;
3834
+ }
3835
+ }
3836
+ break;
3837
+ }
3838
+ case "min": {
3839
+ let min = Infinity;
3840
+ let hasNumeric = false;
3841
+ for (const idx of indices) {
3842
+ const val = seriesData[idx];
3843
+ if (val === null || val === void 0) continue;
3844
+ if (typeof val !== "number") {
3845
+ throw new DataValidationError("min() only works on numbers");
3846
+ }
3847
+ if (isValidNumber(val)) {
3848
+ if (val < min) min = val;
3849
+ hasNumeric = true;
3850
+ }
3851
+ }
3852
+ result = hasNumeric ? min : NaN;
3853
+ break;
3854
+ }
3855
+ case "max": {
3856
+ let max = -Infinity;
3857
+ let hasNumeric = false;
3858
+ for (const idx of indices) {
3859
+ const val = seriesData[idx];
3860
+ if (val === null || val === void 0) continue;
3861
+ if (typeof val !== "number") {
3862
+ throw new DataValidationError("max() only works on numbers");
3863
+ }
3864
+ if (isValidNumber(val)) {
3865
+ if (val > max) max = val;
3866
+ hasNumeric = true;
3867
+ }
3868
+ }
3869
+ result = hasNumeric ? max : NaN;
3870
+ break;
3871
+ }
3872
+ case "std": {
3873
+ let sum = 0;
3874
+ let count = 0;
3875
+ const nums = [];
3876
+ for (const idx of indices) {
3877
+ const val = seriesData[idx];
3878
+ if (val === null || val === void 0) continue;
3879
+ if (typeof val !== "number") {
3880
+ throw new DataValidationError("std() only works on numbers");
3881
+ }
3882
+ if (isValidNumber(val)) {
3883
+ sum += val;
3884
+ count++;
3885
+ nums.push(val);
3886
+ }
3887
+ }
3888
+ if (count < 2) {
3889
+ result = NaN;
3890
+ } else {
3891
+ const mean = sum / count;
3892
+ let sumSq = 0;
3893
+ for (const val of nums) {
3894
+ sumSq += (val - mean) ** 2;
3895
+ }
3896
+ result = Math.sqrt(sumSq / (count - 1));
3897
+ }
3898
+ break;
3899
+ }
3900
+ case "var": {
3901
+ let sum = 0;
3902
+ let count = 0;
3903
+ const nums = [];
3904
+ for (const idx of indices) {
3905
+ const val = seriesData[idx];
3906
+ if (val === null || val === void 0) continue;
3907
+ if (typeof val !== "number") {
3908
+ throw new DataValidationError("var() only works on numbers");
3909
+ }
3910
+ if (isValidNumber(val)) {
3911
+ sum += val;
3912
+ count++;
3913
+ nums.push(val);
3914
+ }
3915
+ }
3916
+ if (count < 2) {
3917
+ result = NaN;
3918
+ } else {
3919
+ const mean = sum / count;
3920
+ let sumSq = 0;
3921
+ for (const val of nums) {
3922
+ sumSq += (val - mean) ** 2;
3923
+ }
3924
+ result = sumSq / (count - 1);
3925
+ }
3926
+ break;
3927
+ }
3928
+ default:
3929
+ throw new DataValidationError(`Unsupported aggregation function: ${func}`);
3930
+ }
3931
+ const outCol = Array.isArray(aggFunc) ? `${col}_${func}` : col;
3932
+ resultData[outCol]?.push(result);
3933
+ }
3934
+ }
3935
+ }
3936
+ return new DataFrame(resultData, { columns: outputColumns });
3937
+ }
3938
+ /**
3939
+ * Helper to identify numeric columns (excluding grouping columns).
3940
+ * @private
3941
+ */
3942
+ getNumericColumns() {
3943
+ const groupByCols = Array.isArray(this.by) ? this.by : [this.by];
3944
+ const otherCols = this.df.columns.filter((c) => !groupByCols.includes(c));
3945
+ return otherCols.filter((col) => {
3946
+ const colData = this.df.get(col);
3947
+ return colData.data.some(isValidNumber);
3948
+ });
3949
+ }
3950
+ /**
3951
+ * Helper method to perform same aggregation on all numeric non-grouping columns.
3952
+ * @private
3953
+ */
3954
+ aggNumeric(operation) {
3955
+ const numericCols = this.getNumericColumns();
3956
+ const operations = {};
3957
+ for (const col of numericCols) {
3958
+ operations[col] = operation;
3959
+ }
3960
+ return this.agg(operations);
3961
+ }
3962
+ /**
3963
+ * Helper method to perform same aggregation on all non-grouping columns.
3964
+ *
3965
+ * @private
3966
+ */
3967
+ aggAll(operation) {
3968
+ const groupByCols = Array.isArray(this.by) ? this.by : [this.by];
3969
+ const otherCols = this.df.columns.filter((c) => !groupByCols.includes(c));
3970
+ const operations = {};
3971
+ for (const col of otherCols) {
3972
+ operations[col] = operation;
3973
+ }
3974
+ return this.agg(operations);
3975
+ }
3976
+ /**
3977
+ * Compute sum for each group.
3978
+ *
3979
+ * @returns DataFrame with summed values by group
3980
+ *
3981
+ * @example
3982
+ * ```ts
3983
+ * const df = new DataFrame({
3984
+ * category: ['A', 'A', 'B', 'B'],
3985
+ * value: [1, 2, 3, 4]
3986
+ * });
3987
+ * df.groupBy('category').sum();
3988
+ * // category | value
3989
+ * // A | 3
3990
+ * // B | 7
3991
+ * ```
3992
+ */
3993
+ sum() {
3994
+ return this.aggNumeric("sum");
3995
+ }
3996
+ /**
3997
+ * Compute mean (average) for each group.
3998
+ *
3999
+ * @returns DataFrame with mean values by group
4000
+ */
4001
+ mean() {
4002
+ return this.aggNumeric("mean");
4003
+ }
4004
+ /**
4005
+ * Count non-null values in each non-grouping column for every group.
4006
+ *
4007
+ * @returns DataFrame with per-column non-null counts by group
4008
+ */
4009
+ count() {
4010
+ return this.aggAll("count");
4011
+ }
4012
+ /**
4013
+ * Compute minimum value for each group.
4014
+ *
4015
+ * @returns DataFrame with minimum values by group
4016
+ */
4017
+ min() {
4018
+ return this.aggNumeric("min");
4019
+ }
4020
+ /**
4021
+ * Compute maximum value for each group.
4022
+ *
4023
+ * @returns DataFrame with maximum values by group
4024
+ */
4025
+ max() {
4026
+ return this.aggNumeric("max");
4027
+ }
4028
+ /**
4029
+ * Compute standard deviation for each group.
4030
+ *
4031
+ * @returns DataFrame with standard deviation values by group
4032
+ */
4033
+ std() {
4034
+ return this.aggNumeric("std");
4035
+ }
4036
+ /**
4037
+ * Compute variance for each group.
4038
+ *
4039
+ * @returns DataFrame with variance values by group
4040
+ */
4041
+ var() {
4042
+ return this.aggNumeric("var");
4043
+ }
4044
+ /**
4045
+ * Compute median for each group.
4046
+ *
4047
+ * @returns DataFrame with median values by group
4048
+ */
4049
+ median() {
4050
+ return this.aggNumeric("median");
4051
+ }
4052
+ };
4053
+
4054
+ export { DataFrame, DataFrameGroupBy, Series, dataframe_exports };
4055
+ //# sourceMappingURL=chunk-PL7TAYKI.js.map
4056
+ //# sourceMappingURL=chunk-PL7TAYKI.js.map