df-script 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +269 -0
  3. package/dist/api.d.ts +39 -0
  4. package/dist/api.js +18 -0
  5. package/dist/bundle.min.js +1 -0
  6. package/dist/columnExpressions/ColumnExpr.d.ts +368 -0
  7. package/dist/columnExpressions/ColumnExpr.js +23 -0
  8. package/dist/columnExpressions/ExprBase.d.ts +24 -0
  9. package/dist/columnExpressions/ExprBase.js +133 -0
  10. package/dist/columnExpressions/constants.d.ts +2 -0
  11. package/dist/columnExpressions/constants.js +5 -0
  12. package/dist/columnExpressions/functions/all.d.ts +5 -0
  13. package/dist/columnExpressions/functions/all.js +11 -0
  14. package/dist/columnExpressions/functions/coalesce.d.ts +3 -0
  15. package/dist/columnExpressions/functions/coalesce.js +40 -0
  16. package/dist/columnExpressions/functions/exclude.d.ts +5 -0
  17. package/dist/columnExpressions/functions/exclude.js +13 -0
  18. package/dist/columnExpressions/functions/lit.d.ts +5 -0
  19. package/dist/columnExpressions/functions/lit.js +22 -0
  20. package/dist/columnExpressions/functions/when.d.ts +24 -0
  21. package/dist/columnExpressions/functions/when.js +81 -0
  22. package/dist/columnExpressions/index.d.ts +19 -0
  23. package/dist/columnExpressions/index.js +81 -0
  24. package/dist/columnExpressions/mixins/AggregationExpr.d.ts +50 -0
  25. package/dist/columnExpressions/mixins/AggregationExpr.js +172 -0
  26. package/dist/columnExpressions/mixins/ArithmeticExpr.d.ts +67 -0
  27. package/dist/columnExpressions/mixins/ArithmeticExpr.js +143 -0
  28. package/dist/columnExpressions/mixins/ComparisonExpr.d.ts +60 -0
  29. package/dist/columnExpressions/mixins/ComparisonExpr.js +389 -0
  30. package/dist/columnExpressions/mixins/ListExpr.d.ts +63 -0
  31. package/dist/columnExpressions/mixins/ListExpr.js +248 -0
  32. package/dist/columnExpressions/mixins/LogicalExpr.d.ts +33 -0
  33. package/dist/columnExpressions/mixins/LogicalExpr.js +54 -0
  34. package/dist/columnExpressions/mixins/ManipulationExpr.d.ts +31 -0
  35. package/dist/columnExpressions/mixins/ManipulationExpr.js +35 -0
  36. package/dist/columnExpressions/mixins/StringExpr.d.ts +79 -0
  37. package/dist/columnExpressions/mixins/StringExpr.js +249 -0
  38. package/dist/columnExpressions/mixins/TemporalExpr.d.ts +69 -0
  39. package/dist/columnExpressions/mixins/TemporalExpr.js +127 -0
  40. package/dist/columnExpressions/mixins/WindowExpr.d.ts +50 -0
  41. package/dist/columnExpressions/mixins/WindowExpr.js +281 -0
  42. package/dist/columnExpressions/types.d.ts +7 -0
  43. package/dist/columnExpressions/types.js +2 -0
  44. package/dist/dataframe/constants.d.ts +1 -0
  45. package/dist/dataframe/constants.js +4 -0
  46. package/dist/dataframe/dataframe.d.ts +42 -0
  47. package/dist/dataframe/dataframe.js +749 -0
  48. package/dist/dataframe/grouped/grouped.d.ts +15 -0
  49. package/dist/dataframe/grouped/grouped.js +113 -0
  50. package/dist/dataframe/index.d.ts +4 -0
  51. package/dist/dataframe/index.js +20 -0
  52. package/dist/dataframe/types.d.ts +33 -0
  53. package/dist/dataframe/types.js +2 -0
  54. package/dist/dataframe/utils.d.ts +11 -0
  55. package/dist/dataframe/utils.js +215 -0
  56. package/dist/datatypes/DataType.d.ts +40 -0
  57. package/dist/datatypes/DataType.js +47 -0
  58. package/dist/datatypes/index.d.ts +29 -0
  59. package/dist/datatypes/index.js +46 -0
  60. package/dist/datatypes/types/Binary.d.ts +9 -0
  61. package/dist/datatypes/types/Binary.js +18 -0
  62. package/dist/datatypes/types/Boolean.d.ts +9 -0
  63. package/dist/datatypes/types/Boolean.js +19 -0
  64. package/dist/datatypes/types/Date.d.ts +8 -0
  65. package/dist/datatypes/types/Date.js +21 -0
  66. package/dist/datatypes/types/Datetime.d.ts +8 -0
  67. package/dist/datatypes/types/Datetime.js +17 -0
  68. package/dist/datatypes/types/Decimal.d.ts +10 -0
  69. package/dist/datatypes/types/Decimal.js +28 -0
  70. package/dist/datatypes/types/Duration.d.ts +8 -0
  71. package/dist/datatypes/types/Duration.js +17 -0
  72. package/dist/datatypes/types/Float32.d.ts +8 -0
  73. package/dist/datatypes/types/Float32.js +17 -0
  74. package/dist/datatypes/types/Float64.d.ts +8 -0
  75. package/dist/datatypes/types/Float64.js +17 -0
  76. package/dist/datatypes/types/Int16.d.ts +8 -0
  77. package/dist/datatypes/types/Int16.js +17 -0
  78. package/dist/datatypes/types/Int32.d.ts +8 -0
  79. package/dist/datatypes/types/Int32.js +17 -0
  80. package/dist/datatypes/types/Int64.d.ts +8 -0
  81. package/dist/datatypes/types/Int64.js +17 -0
  82. package/dist/datatypes/types/Int8.d.ts +8 -0
  83. package/dist/datatypes/types/Int8.js +17 -0
  84. package/dist/datatypes/types/List.d.ts +10 -0
  85. package/dist/datatypes/types/List.js +31 -0
  86. package/dist/datatypes/types/Null.d.ts +9 -0
  87. package/dist/datatypes/types/Null.js +17 -0
  88. package/dist/datatypes/types/Object.d.ts +9 -0
  89. package/dist/datatypes/types/Object.js +17 -0
  90. package/dist/datatypes/types/Struct.d.ts +14 -0
  91. package/dist/datatypes/types/Struct.js +39 -0
  92. package/dist/datatypes/types/Time.d.ts +8 -0
  93. package/dist/datatypes/types/Time.js +29 -0
  94. package/dist/datatypes/types/UInt16.d.ts +8 -0
  95. package/dist/datatypes/types/UInt16.js +17 -0
  96. package/dist/datatypes/types/UInt32.d.ts +8 -0
  97. package/dist/datatypes/types/UInt32.js +17 -0
  98. package/dist/datatypes/types/UInt64.d.ts +8 -0
  99. package/dist/datatypes/types/UInt64.js +17 -0
  100. package/dist/datatypes/types/UInt8.d.ts +8 -0
  101. package/dist/datatypes/types/UInt8.js +17 -0
  102. package/dist/datatypes/types/Utf8.d.ts +10 -0
  103. package/dist/datatypes/types/Utf8.js +20 -0
  104. package/dist/datatypes/types.d.ts +172 -0
  105. package/dist/datatypes/types.js +286 -0
  106. package/dist/exceptions/index.d.ts +13 -0
  107. package/dist/exceptions/index.js +43 -0
  108. package/dist/exceptions/utils.d.ts +2 -0
  109. package/dist/exceptions/utils.js +9 -0
  110. package/dist/functions/concat.d.ts +4 -0
  111. package/dist/functions/concat.js +248 -0
  112. package/dist/functions/index.d.ts +1 -0
  113. package/dist/functions/index.js +17 -0
  114. package/dist/index.d.ts +7 -0
  115. package/dist/index.js +1 -0
  116. package/dist/types.d.ts +47 -0
  117. package/dist/types.js +2 -0
  118. package/dist/utils/boolean.d.ts +1 -0
  119. package/dist/utils/boolean.js +18 -0
  120. package/dist/utils/date.d.ts +57 -0
  121. package/dist/utils/date.js +349 -0
  122. package/dist/utils/guards.d.ts +14 -0
  123. package/dist/utils/guards.js +143 -0
  124. package/dist/utils/index.d.ts +5 -0
  125. package/dist/utils/index.js +21 -0
  126. package/dist/utils/json.d.ts +2 -0
  127. package/dist/utils/json.js +33 -0
  128. package/dist/utils/list.d.ts +23 -0
  129. package/dist/utils/list.js +128 -0
  130. package/dist/utils/number.d.ts +86 -0
  131. package/dist/utils/number.js +223 -0
  132. package/dist/utils/string.d.ts +52 -0
  133. package/dist/utils/string.js +120 -0
  134. package/package.json +34 -0
@@ -0,0 +1,749 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DataFrame = void 0;
4
+ const columnExpressions_1 = require("../columnExpressions");
5
+ const grouped_1 = require("./grouped/grouped");
6
+ const datatypes_1 = require("../datatypes");
7
+ const utils_1 = require("../utils");
8
+ const exceptions_1 = require("../exceptions");
9
+ const concat_1 = require("../functions/concat");
10
+ const constants_1 = require("./constants");
11
+ const utils_2 = require("./utils");
12
+ class DataFrame {
13
+ _columns;
14
+ _height;
15
+ _schema = {};
16
+ constructor(data, schema, height) {
17
+ if (Array.isArray(data)) {
18
+ const { columns, height: h } = (0, utils_2.rowsToColumns)(data);
19
+ this._columns = columns;
20
+ this._height = h;
21
+ schema ? this.applySchema(schema) : this.inferSchema();
22
+ return;
23
+ }
24
+ if ((0, utils_1.isObj)(data)) {
25
+ let firstLength = -1;
26
+ for (const [key, col] of Object.entries(data)) {
27
+ const colLen = (0, utils_1.isArrayOrTypedArray)(col) ? col.length : 0;
28
+ if (firstLength === -1) {
29
+ firstLength = colLen;
30
+ }
31
+ else if (colLen !== firstLength) {
32
+ throw new exceptions_1.DataFrameError(`Column height mismatch: Column "${key}" has length ${colLen}, but previous columns have length ${firstLength}`);
33
+ }
34
+ }
35
+ this._columns = data;
36
+ this._height = height !== undefined ? height : (firstLength === -1 ? 0 : firstLength);
37
+ schema ? this.applySchema(schema) : this.inferSchema();
38
+ return;
39
+ }
40
+ this._columns = {};
41
+ this._height = 0;
42
+ schema ? this.applySchema(schema) : (this._schema = {});
43
+ }
44
+ inferSchema() {
45
+ const schema = {};
46
+ const keys = Object.keys(this._columns);
47
+ for (const key of keys) {
48
+ schema[key] = (0, utils_2.inferColumnType)(this._columns[key]);
49
+ }
50
+ this._schema = schema;
51
+ }
52
+ applySchema(schema) {
53
+ this._schema = schema;
54
+ const keys = Object.keys(schema);
55
+ const newColumns = {};
56
+ for (const key of keys) {
57
+ const type = schema[key];
58
+ const oldCol = this._columns[key];
59
+ let newCol = type.allocate ? type.allocate(this._height) : new Array(this._height).fill(null);
60
+ if (!oldCol) {
61
+ if (this._height > 0 && (0, utils_1.isTypedArray)(newCol)) {
62
+ newCol = new Array(this._height).fill(null);
63
+ }
64
+ newColumns[key] = newCol;
65
+ continue;
66
+ }
67
+ let hasNulls = false;
68
+ const coercedVals = new Array(this._height);
69
+ for (let i = 0; i < this._height; i++) {
70
+ const coerced = type.coerce(oldCol[i]);
71
+ coercedVals[i] = coerced;
72
+ if (coerced == null) {
73
+ hasNulls = true;
74
+ }
75
+ }
76
+ if (hasNulls && (0, utils_1.isTypedArray)(newCol)) {
77
+ newCol = new Array(this._height);
78
+ }
79
+ for (let i = 0; i < this._height; i++) {
80
+ newCol[i] = coercedVals[i];
81
+ }
82
+ newColumns[key] = newCol;
83
+ }
84
+ this._columns = newColumns;
85
+ }
86
+ getSchema() {
87
+ return this._schema;
88
+ }
89
+ to_dicts() {
90
+ return (0, utils_2.columnsToRows)(this._columns, this._height);
91
+ }
92
+ to_dict() {
93
+ return { ...this._columns };
94
+ }
95
+ get columns() {
96
+ return Object.keys(this._columns);
97
+ }
98
+ concat(items, options = {}) {
99
+ const arrayItems = (0, utils_1.isArrayOfType)(items, DataFrame, { mode: "every", allowEmpty: false })
100
+ ? items
101
+ : [items];
102
+ return (0, concat_1.concat)([this, ...arrayItems], options);
103
+ }
104
+ drop(...args) {
105
+ const columnsToDrop = new Set(args.flat());
106
+ const newColumns = {};
107
+ const outSchema = {};
108
+ for (const key of Object.keys(this._columns)) {
109
+ if (!columnsToDrop.has(key)) {
110
+ newColumns[key] = this._columns[key];
111
+ outSchema[key] = this._schema[key];
112
+ }
113
+ }
114
+ return new DataFrame(newColumns, outSchema, this._height);
115
+ }
116
+ get dtypes() {
117
+ const keys = Object.keys(this._columns);
118
+ const len = keys.length;
119
+ const result = new Array(len);
120
+ for (let i = 0; i < len; i++) {
121
+ result[i] = this._schema[keys[i]];
122
+ }
123
+ return result;
124
+ }
125
+ filter(...exprs) {
126
+ if (this._height === 0)
127
+ return new DataFrame({}, this._schema, 0);
128
+ const height = this._height;
129
+ const keys = Object.keys(this._columns);
130
+ const numKeys = keys.length;
131
+ const evaluatedExprs = [];
132
+ const funcPredicates = [];
133
+ for (const expr of exprs) {
134
+ if (typeof expr === "function") {
135
+ funcPredicates.push(expr);
136
+ }
137
+ else {
138
+ evaluatedExprs.push(expr.evaluate(this._columns, height));
139
+ }
140
+ }
141
+ const matchingIndices = [];
142
+ let currentIndex = 0;
143
+ let rowObj = null;
144
+ if (funcPredicates.length > 0) {
145
+ const columns = this._columns;
146
+ rowObj = {};
147
+ for (let k = 0; k < numKeys; k++) {
148
+ const key = keys[k];
149
+ const col = columns[key];
150
+ Object.defineProperty(rowObj, key, {
151
+ get() {
152
+ const val = col[currentIndex];
153
+ return val === undefined ? null : val;
154
+ },
155
+ enumerable: true,
156
+ configurable: true
157
+ });
158
+ }
159
+ }
160
+ for (let i = 0; i < height; i++) {
161
+ let keep = true;
162
+ for (let j = 0; j < evaluatedExprs.length; j++) {
163
+ if (!evaluatedExprs[j][i]) {
164
+ keep = false;
165
+ break;
166
+ }
167
+ }
168
+ if (!keep)
169
+ continue;
170
+ if (rowObj) {
171
+ currentIndex = i;
172
+ for (let j = 0; j < funcPredicates.length; j++) {
173
+ if (!funcPredicates[j](rowObj)) {
174
+ keep = false;
175
+ break;
176
+ }
177
+ }
178
+ }
179
+ if (!keep)
180
+ continue;
181
+ matchingIndices.push(i);
182
+ }
183
+ const newColumns = (0, utils_2.gatherColumnsByIndices)(this._columns, matchingIndices);
184
+ const newHeight = matchingIndices.length;
185
+ return new DataFrame(newColumns, this._schema, newHeight);
186
+ }
187
+ groupby(keys) {
188
+ const keysArr = (0, utils_1.toValidArray)(keys);
189
+ const groups = new Map();
190
+ const len = this._height;
191
+ const keysLen = keysArr.length;
192
+ const keysStr = new Array(keysLen);
193
+ for (let i = 0; i < keysLen; i++) {
194
+ keysStr[i] = String(keysArr[i]);
195
+ }
196
+ for (let j = 0; j < keysStr.length; j++) {
197
+ (0, exceptions_1.assertColumnExists)(keysStr[j], this._columns, "Grouping key");
198
+ }
199
+ const singleKey = keysLen === 1 ? keysStr[0] : null;
200
+ const col = singleKey ? this._columns[singleKey] : null;
201
+ for (let i = 0; i < len; i++) {
202
+ let hash;
203
+ if (singleKey) {
204
+ const val = col[i];
205
+ hash = val == null ? "" : String(val);
206
+ }
207
+ else {
208
+ const vals = new Array(keysLen);
209
+ for (let j = 0; j < keysLen; j++) {
210
+ const val = this._columns[keysStr[j]][i];
211
+ vals[j] = val == null ? "" : String(val);
212
+ }
213
+ hash = vals.join(constants_1.KEY_SEPARATOR);
214
+ }
215
+ let group = groups.get(hash);
216
+ if (group === undefined) {
217
+ groups.set(hash, group = []);
218
+ }
219
+ group.push(i);
220
+ }
221
+ const allKeys = Object.keys(this._columns);
222
+ return new grouped_1.GroupedData(groups, keysArr, allKeys, this._columns, this._height, this._schema);
223
+ }
224
+ head(n = 10) {
225
+ return this.limit(n, { offset: 0, from: "start" });
226
+ }
227
+ get height() {
228
+ return this._height;
229
+ }
230
+ hstack(other, options = {}) {
231
+ return this.concat(other, { how: "horizontal", horizontal: options });
232
+ }
233
+ join(config) {
234
+ const { other, on, how = "inner", suffixes = ["", "_right"] } = config;
235
+ const joinKeys = (0, utils_1.toValidArray)(on);
236
+ for (const key of joinKeys) {
237
+ const keyStr = String(key);
238
+ (0, exceptions_1.assertColumnExists)(keyStr, this._columns, "Join key", " in the left DataFrame.");
239
+ (0, exceptions_1.assertColumnExists)(keyStr, other._columns, "Join key", " in the right DataFrame.");
240
+ }
241
+ const [leftSuffix, rightSuffix] = suffixes;
242
+ const leftKeys = Object.keys(this._columns);
243
+ const rightKeys = Object.keys(other._columns);
244
+ const joinKeySet = new Set(joinKeys);
245
+ const leftLen = leftKeys.length;
246
+ const rightLen = rightKeys.length;
247
+ const getColumnHashAt = (columns, idx) => {
248
+ const len = joinKeys.length;
249
+ if (len === 1) {
250
+ const val = columns[joinKeys[0]][idx];
251
+ return val == null ? null : String(val);
252
+ }
253
+ const vals = new Array(len);
254
+ for (let i = 0; i < len; i++) {
255
+ const val = columns[joinKeys[i]][idx];
256
+ if (val == null)
257
+ return null;
258
+ vals[i] = String(val);
259
+ }
260
+ return vals.join(constants_1.KEY_SEPARATOR);
261
+ };
262
+ const rightHash = new Map();
263
+ const rightHeight = other._height;
264
+ const rightCols = other._columns;
265
+ for (let i = 0; i < rightHeight; i++) {
266
+ const hash = getColumnHashAt(rightCols, i);
267
+ if (hash === null)
268
+ continue;
269
+ let list = rightHash.get(hash);
270
+ if (list === undefined) {
271
+ list = [];
272
+ rightHash.set(hash, list);
273
+ }
274
+ list.push(i);
275
+ }
276
+ const leftHeight = this._height;
277
+ const leftCols = this._columns;
278
+ const leftIndices = [];
279
+ const rightIndices = [];
280
+ const trackRight = how === "outer" || how === "right";
281
+ const matchedRightIndices = trackRight ? new Set() : null;
282
+ for (let i = 0; i < leftHeight; i++) {
283
+ const hash = getColumnHashAt(leftCols, i);
284
+ const matches = hash === null ? undefined : rightHash.get(hash);
285
+ if (matches === undefined) {
286
+ if (how === "left" || how === "outer") {
287
+ leftIndices.push(i);
288
+ rightIndices.push(null);
289
+ }
290
+ }
291
+ else {
292
+ for (let m = 0; m < matches.length; m++) {
293
+ const rIdx = matches[m];
294
+ if (trackRight) {
295
+ matchedRightIndices.add(rIdx);
296
+ }
297
+ leftIndices.push(i);
298
+ rightIndices.push(rIdx);
299
+ }
300
+ }
301
+ }
302
+ if (trackRight) {
303
+ for (let j = 0; j < rightHeight; j++) {
304
+ if (!matchedRightIndices.has(j)) {
305
+ leftIndices.push(-1);
306
+ rightIndices.push(j);
307
+ }
308
+ }
309
+ }
310
+ const outHeight = leftIndices.length;
311
+ const newColumns = {};
312
+ const outSchema = {};
313
+ for (let i = 0; i < leftLen; i++) {
314
+ const k = leftKeys[i];
315
+ const mappedName = (k in other._columns && !joinKeySet.has(k)) ? `${k}${leftSuffix}` : k;
316
+ const leftCol = this._columns[k];
317
+ const isJoinKey = joinKeySet.has(k);
318
+ const outCol = new Array(outHeight);
319
+ if (isJoinKey) {
320
+ const rightCol = other._columns[k];
321
+ for (let r = 0; r < outHeight; r++) {
322
+ const leftIdx = leftIndices[r];
323
+ if (leftIdx !== -1) {
324
+ outCol[r] = leftCol[leftIdx];
325
+ }
326
+ else {
327
+ const rightIdx = rightIndices[r];
328
+ outCol[r] = rightIdx !== null ? rightCol[rightIdx] : null;
329
+ }
330
+ }
331
+ }
332
+ else {
333
+ for (let r = 0; r < outHeight; r++) {
334
+ const leftIdx = leftIndices[r];
335
+ outCol[r] = leftIdx !== -1 ? leftCol[leftIdx] : null;
336
+ }
337
+ }
338
+ newColumns[mappedName] = outCol;
339
+ if (this._schema[k]) {
340
+ outSchema[mappedName] = this._schema[k];
341
+ }
342
+ }
343
+ for (let i = 0; i < rightLen; i++) {
344
+ const k = rightKeys[i];
345
+ if (!joinKeySet.has(k)) {
346
+ const mappedName = k in this._columns ? `${k}${rightSuffix}` : k;
347
+ const rightCol = other._columns[k];
348
+ const outCol = new Array(outHeight);
349
+ for (let r = 0; r < outHeight; r++) {
350
+ const rightIdx = rightIndices[r];
351
+ outCol[r] = rightIdx !== null ? rightCol[rightIdx] : null;
352
+ }
353
+ newColumns[mappedName] = outCol;
354
+ if (other._schema[k]) {
355
+ outSchema[mappedName] = other._schema[k];
356
+ }
357
+ }
358
+ }
359
+ return new DataFrame(newColumns, outSchema, outHeight);
360
+ }
361
+ limit(n, options = {}) {
362
+ const { offset = 0, from = "start" } = options;
363
+ const len = this._height;
364
+ const safeN = isNaN(n) ? 0 : Math.max(Math.floor(n), 0);
365
+ const safeOffset = isNaN(offset) ? 0 : Math.max(Math.floor(offset), 0);
366
+ if (safeN === 0 || len === 0 || safeOffset >= len) {
367
+ const newColumns = {};
368
+ const outSchema = {};
369
+ for (const key of Object.keys(this._columns)) {
370
+ newColumns[key] = [];
371
+ outSchema[key] = this._schema[key];
372
+ }
373
+ return new DataFrame(newColumns, outSchema, 0);
374
+ }
375
+ let actualStart = 0;
376
+ let actualEnd = 0;
377
+ if (from === "end") {
378
+ actualEnd = Math.max(len - safeOffset, 0);
379
+ actualStart = Math.max(actualEnd - safeN, 0);
380
+ }
381
+ else {
382
+ actualEnd = Math.min(safeOffset + safeN, len);
383
+ actualStart = safeOffset;
384
+ }
385
+ const newHeight = Math.max(actualEnd - actualStart, 0);
386
+ const newColumns = {};
387
+ for (const key of Object.keys(this._columns)) {
388
+ newColumns[key] = this._columns[key].slice(actualStart, actualEnd);
389
+ }
390
+ return new DataFrame(newColumns, this._schema, newHeight);
391
+ }
392
+ pivot(config) {
393
+ if (this._height === 0)
394
+ return new DataFrame({}, {}, 0);
395
+ const { index, columns, values } = config;
396
+ const indexArr = (0, utils_1.toValidArray)(index);
397
+ const indexLen = indexArr.length;
398
+ const indexStr = new Array(indexLen);
399
+ for (let i = 0; i < indexLen; i++) {
400
+ indexStr[i] = String(indexArr[i]);
401
+ }
402
+ const colKey = String(columns);
403
+ const valKey = String(values);
404
+ for (const idxKey of indexStr) {
405
+ (0, exceptions_1.assertColumnExists)(idxKey, this._columns, "Pivot index key", " in the DataFrame.");
406
+ }
407
+ (0, exceptions_1.assertColumnExists)(colKey, this._columns, "Pivot column key");
408
+ (0, exceptions_1.assertColumnExists)(valKey, this._columns, "Pivot values key", " in the DataFrame.");
409
+ const groups = new Map();
410
+ const colNames = new Set();
411
+ const height = this._height;
412
+ const pivotCol = this._columns[colKey];
413
+ const valCol = this._columns[valKey];
414
+ for (let i = 0; i < height; i++) {
415
+ const vals = new Array(indexLen);
416
+ for (let j = 0; j < indexLen; j++) {
417
+ const val = this._columns[indexStr[j]][i];
418
+ vals[j] = val == null ? "" : String(val);
419
+ }
420
+ const rowKey = vals.join(constants_1.KEY_SEPARATOR);
421
+ const pivotColName = String(pivotCol[i]);
422
+ colNames.add(pivotColName);
423
+ let group = groups.get(rowKey);
424
+ if (group === undefined) {
425
+ group = { firstIdx: i, indices: [] };
426
+ groups.set(rowKey, group);
427
+ }
428
+ group.indices.push(i);
429
+ }
430
+ const outHeight = groups.size;
431
+ const newColumns = {};
432
+ const outSchema = {};
433
+ for (let j = 0; j < indexLen; j++) {
434
+ const k = indexStr[j];
435
+ newColumns[k] = new Array(outHeight);
436
+ if (this._schema[k]) {
437
+ outSchema[k] = this._schema[k];
438
+ }
439
+ }
440
+ const allCols = Array.from(colNames);
441
+ const valType = this._schema[valKey] || datatypes_1.DataTypeRegistry.Utf8;
442
+ for (let j = 0; j < allCols.length; j++) {
443
+ const colName = allCols[j];
444
+ newColumns[colName] = new Array(outHeight).fill(null);
445
+ outSchema[colName] = valType;
446
+ }
447
+ let groupIdx = 0;
448
+ for (const group of groups.values()) {
449
+ const firstIdx = group.firstIdx;
450
+ for (let j = 0; j < indexLen; j++) {
451
+ const k = indexStr[j];
452
+ newColumns[k][groupIdx] = this._columns[k][firstIdx];
453
+ }
454
+ const indices = group.indices;
455
+ for (let k = 0; k < indices.length; k++) {
456
+ const idx = indices[k];
457
+ const pivotColName = String(pivotCol[idx]);
458
+ newColumns[pivotColName][groupIdx] = valCol[idx];
459
+ }
460
+ groupIdx++;
461
+ }
462
+ return new DataFrame(newColumns, outSchema, outHeight);
463
+ }
464
+ rename(mapping) {
465
+ const renameMapping = mapping || {};
466
+ const newColumns = {};
467
+ const outSchema = {};
468
+ const originalKeys = Object.keys(this._columns);
469
+ for (const key of originalKeys) {
470
+ const newKey = renameMapping[key] || key;
471
+ newColumns[newKey] = this._columns[key];
472
+ outSchema[newKey] = this._schema[key];
473
+ }
474
+ const finalKeys = Object.keys(newColumns);
475
+ if (finalKeys.length < originalKeys.length) {
476
+ throw new exceptions_1.DataFrameError("Rename collision: Multiple columns mapped to the same output name.");
477
+ }
478
+ return new DataFrame(newColumns, outSchema, this._height);
479
+ }
480
+ reverse() {
481
+ if (this._height === 0)
482
+ return this;
483
+ const newColumns = {};
484
+ const keys = Object.keys(this._columns);
485
+ const len = keys.length;
486
+ for (let i = 0; i < len; i++) {
487
+ const key = keys[i];
488
+ newColumns[key] = this._columns[key].slice().reverse();
489
+ }
490
+ return new DataFrame(newColumns, this._schema, this._height);
491
+ }
492
+ get schema() {
493
+ return this._schema;
494
+ }
495
+ select(...args) {
496
+ const exprs = this._normalizeArgs(args);
497
+ const allKeys = Object.keys(this._columns);
498
+ const expandedExprs = (0, columnExpressions_1.resolveColumnSelectors)(exprs, allKeys);
499
+ const newColumns = {};
500
+ const outSchema = {};
501
+ for (const expr of expandedExprs) {
502
+ const targetKey = expr.outputName || expr.colName || columnExpressions_1.ALL_COLUMNS_MARKER;
503
+ if (targetKey in newColumns) {
504
+ throw new exceptions_1.DataFrameError(`Duplicate column selection: "${targetKey}" is selected multiple times.`);
505
+ }
506
+ newColumns[targetKey] = expr.isWindow
507
+ ? (0, utils_2.resolveWindowExpr)(expr, this._columns, this._height)
508
+ : expr.evaluate(this._columns, this._height);
509
+ const originalKey = expr.colName || targetKey;
510
+ const isPureColSelector = expr instanceof columnExpressions_1.ColumnExpr && expr.ops.length === 0 && !expr.isWindow && !expr.aggFn;
511
+ outSchema[targetKey] = (isPureColSelector && this._schema[originalKey])
512
+ ? this._schema[originalKey]
513
+ : (0, utils_2.inferColumnType)(newColumns[targetKey]);
514
+ }
515
+ return new DataFrame(newColumns, outSchema, this._height);
516
+ }
517
+ get shape() {
518
+ return [this.height, this.width];
519
+ }
520
+ slice(start, end) {
521
+ const total = this._height;
522
+ const actualStart = start < 0 ? Math.max(total + start, 0) : Math.min(start, total);
523
+ const actualEnd = end === undefined
524
+ ? total
525
+ : (end < 0 ? Math.max(total + end, 0) : Math.min(end, total));
526
+ const n = Math.max(actualEnd - actualStart, 0);
527
+ return this.limit(n, { offset: actualStart });
528
+ }
529
+ sort(config) {
530
+ if (!config || !config.by || this._height === 0)
531
+ return this;
532
+ const { by, descending = false, nullsLast = true, custom } = config;
533
+ const sortKeys = (0, utils_1.toValidArray)(by);
534
+ for (let i = 0; i < sortKeys.length; i++) {
535
+ const keyOrExpr = sortKeys[i];
536
+ if (typeof keyOrExpr === "string") {
537
+ (0, exceptions_1.assertColumnExists)(keyOrExpr, this._columns, "Sort key");
538
+ }
539
+ }
540
+ const descArray = Array.isArray(descending)
541
+ ? descending
542
+ : new Array(sortKeys.length).fill(descending);
543
+ const sortKeysLen = sortKeys.length;
544
+ const plan = new Array(sortKeysLen);
545
+ for (let i = 0; i < sortKeysLen; i++) {
546
+ const keyOrExpr = sortKeys[i];
547
+ const isDesc = descArray[i] ? -1 : 1;
548
+ const customComp = (custom && typeof keyOrExpr === "string") ? custom[keyOrExpr] : null;
549
+ const values = keyOrExpr?.evaluate
550
+ ? keyOrExpr.evaluate(this._columns, this._height)
551
+ : (this._columns[keyOrExpr] || new Array(this._height).fill(null));
552
+ plan[i] = {
553
+ values,
554
+ isDesc,
555
+ customComp
556
+ };
557
+ }
558
+ const planLen = plan.length;
559
+ const nullMultiplier = nullsLast ? 1 : -1;
560
+ const indices = new Array(this._height);
561
+ for (let i = 0; i < this._height; i++) {
562
+ indices[i] = i;
563
+ }
564
+ indices.sort((idxA, idxB) => {
565
+ for (let i = 0; i < planLen; i++) {
566
+ const { values, isDesc, customComp } = plan[i];
567
+ const vA = values[idxA];
568
+ const vB = values[idxB];
569
+ if (customComp) {
570
+ const res = customComp(vA, vB);
571
+ if (res !== 0)
572
+ return res * isDesc;
573
+ continue;
574
+ }
575
+ if (vA == null || vB == null) {
576
+ if (vA === vB)
577
+ continue;
578
+ return (vA == null ? 1 : -1) * nullMultiplier;
579
+ }
580
+ if (vA === vB)
581
+ continue;
582
+ const res = vA < vB ? -1 : 1;
583
+ return res * isDesc;
584
+ }
585
+ return 0;
586
+ });
587
+ const newColumns = (0, utils_2.gatherColumnsByIndices)(this._columns, indices);
588
+ return new DataFrame(newColumns, this._schema, this._height);
589
+ }
590
+ tail(n = 10) {
591
+ return this.limit(n, { offset: 0, from: 'end' });
592
+ }
593
+ to_list(nameOrExpr) {
594
+ if (this._height === 0)
595
+ return [];
596
+ const isExpr = nameOrExpr && typeof nameOrExpr !== "string" && "evaluate" in nameOrExpr;
597
+ let colData;
598
+ if (isExpr) {
599
+ const expr = nameOrExpr;
600
+ colData = expr.evaluate(this._columns, this._height);
601
+ }
602
+ else {
603
+ const key = nameOrExpr;
604
+ if (key == null) {
605
+ return new Array(this._height).fill(null);
606
+ }
607
+ (0, exceptions_1.assertColumnExists)(key, this._columns, "Column");
608
+ colData = this._columns[key];
609
+ }
610
+ return Array.isArray(colData) ? colData : Array.from(colData);
611
+ }
612
+ unique(columns) {
613
+ if (this._height === 0)
614
+ return new DataFrame({}, this._schema, 0);
615
+ const colsArr = (0, utils_1.toValidArray)(columns);
616
+ const colsStr = colsArr.length === 0
617
+ ? Object.keys(this._columns)
618
+ : colsArr.map(String);
619
+ for (const colKey of colsStr) {
620
+ (0, exceptions_1.assertColumnExists)(colKey, this._columns, "Unique column key");
621
+ }
622
+ const seen = new Set();
623
+ const matchingIndices = [];
624
+ const colsLen = colsStr.length;
625
+ const height = this._height;
626
+ const singleCol = colsLen === 1 ? this._columns[colsStr[0]] : null;
627
+ for (let i = 0; i < height; i++) {
628
+ let hash;
629
+ if (singleCol) {
630
+ const val = singleCol[i];
631
+ hash = val == null ? "" : String(val);
632
+ }
633
+ else {
634
+ const vals = new Array(colsLen);
635
+ for (let j = 0; j < colsLen; j++) {
636
+ const val = this._columns[colsStr[j]][i];
637
+ vals[j] = val == null ? "" : String(val);
638
+ }
639
+ hash = vals.join(constants_1.KEY_SEPARATOR);
640
+ }
641
+ if (!seen.has(hash)) {
642
+ seen.add(hash);
643
+ matchingIndices.push(i);
644
+ }
645
+ }
646
+ const newColumns = (0, utils_2.gatherColumnsByIndices)(this._columns, matchingIndices);
647
+ const newHeight = matchingIndices.length;
648
+ return new DataFrame(newColumns, this._schema, newHeight);
649
+ }
650
+ unpivot(config) {
651
+ const { idVars, valueVars, varName = "variable", valueName = "value" } = config;
652
+ const idVarsStr = (0, utils_1.toValidArray)(idVars).map(String);
653
+ const valueVarsStr = (0, utils_1.toValidArray)(valueVars).map(String);
654
+ const idVarsLen = idVarsStr.length;
655
+ const valueVarsLen = valueVarsStr.length;
656
+ for (const idKey of idVarsStr) {
657
+ (0, exceptions_1.assertColumnExists)(idKey, this._columns, "Unpivot id variable key");
658
+ }
659
+ for (const vKey of valueVarsStr) {
660
+ (0, exceptions_1.assertColumnExists)(vKey, this._columns, "Unpivot value variable key");
661
+ }
662
+ const newHeight = this._height * valueVarsLen;
663
+ const newColumns = {};
664
+ for (let k = 0; k < idVarsLen; k++) {
665
+ newColumns[idVarsStr[k]] = new Array(newHeight);
666
+ }
667
+ newColumns[varName] = new Array(newHeight);
668
+ newColumns[valueName] = new Array(newHeight);
669
+ let outIdx = 0;
670
+ for (let i = 0; i < this._height; i++) {
671
+ for (let j = 0; j < valueVarsLen; j++) {
672
+ const vVar = valueVarsStr[j];
673
+ for (let k = 0; k < idVarsLen; k++) {
674
+ const idKey = idVarsStr[k];
675
+ newColumns[idKey][outIdx] = this._columns[idKey][i];
676
+ }
677
+ newColumns[varName][outIdx] = vVar;
678
+ newColumns[valueName][outIdx] = this._columns[vVar][i];
679
+ outIdx++;
680
+ }
681
+ }
682
+ const outSchema = {};
683
+ for (const key of idVarsStr) {
684
+ outSchema[key] = this._schema[key];
685
+ }
686
+ outSchema[varName] = datatypes_1.DataTypeRegistry.Utf8;
687
+ outSchema[valueName] = (0, utils_2.inferColumnType)(newColumns[valueName]);
688
+ return new DataFrame(newColumns, outSchema, newHeight);
689
+ }
690
+ vstack(other) {
691
+ return this.concat(other, { how: "vertical" });
692
+ }
693
+ get width() {
694
+ return Object.keys(this._columns).length;
695
+ }
696
+ _normalizeArgs(args) {
697
+ const flatArgs = args.flat();
698
+ const exprs = [];
699
+ for (const arg of flatArgs) {
700
+ if (typeof arg === "string") {
701
+ exprs.push(new columnExpressions_1.ColumnExpr(arg));
702
+ }
703
+ else if ((0, utils_1.isObj)(arg) && 'evaluate' in arg) {
704
+ exprs.push(arg);
705
+ }
706
+ else if ((0, utils_1.isObj)(arg)) {
707
+ for (const [key, val] of Object.entries(arg)) {
708
+ if ((0, utils_1.isObj)(val) && 'evaluate' in val) {
709
+ exprs.push(val.alias(key));
710
+ }
711
+ else {
712
+ const staticExpr = new columnExpressions_1.ColumnExpr(key);
713
+ staticExpr.evaluate = (_cols, h) => new Array(h).fill(val);
714
+ exprs.push(staticExpr);
715
+ }
716
+ }
717
+ }
718
+ }
719
+ return exprs;
720
+ }
721
+ with_columns(...args) {
722
+ const exprs = this._normalizeArgs(args);
723
+ const allKeys = Object.keys(this._columns);
724
+ const expandedExprs = (0, columnExpressions_1.resolveColumnSelectors)(exprs, allKeys);
725
+ const numEntries = expandedExprs.length;
726
+ const newColumns = { ...this._columns };
727
+ const outSchema = { ...this._schema };
728
+ for (let j = 0; j < numEntries; j++) {
729
+ const expr = expandedExprs[j];
730
+ const name = expr.outputName || expr.colName || columnExpressions_1.ALL_COLUMNS_MARKER;
731
+ if (expr.isWindow) {
732
+ newColumns[name] = (0, utils_2.resolveWindowExpr)(expr, this._columns, this._height);
733
+ }
734
+ else {
735
+ newColumns[name] = expr.evaluate(this._columns, this._height);
736
+ }
737
+ const originalKey = expr.colName || name;
738
+ const isPureColSelector = expr instanceof columnExpressions_1.ColumnExpr && expr.ops.length === 0 && !expr.isWindow && !expr.aggFn;
739
+ if (isPureColSelector && this._schema[originalKey]) {
740
+ outSchema[name] = this._schema[originalKey];
741
+ }
742
+ else {
743
+ outSchema[name] = (0, utils_2.inferColumnType)(newColumns[name]);
744
+ }
745
+ }
746
+ return new DataFrame(newColumns, outSchema, this._height);
747
+ }
748
+ }
749
+ exports.DataFrame = DataFrame;