polars-df 0.2.5 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/Cargo.lock +290 -137
  4. data/Cargo.toml +1 -1
  5. data/README.md +40 -2
  6. data/ext/polars/Cargo.toml +5 -4
  7. data/ext/polars/src/apply/dataframe.rs +6 -6
  8. data/ext/polars/src/apply/series.rs +10 -10
  9. data/ext/polars/src/batched_csv.rs +6 -4
  10. data/ext/polars/src/conversion.rs +56 -17
  11. data/ext/polars/src/dataframe.rs +65 -43
  12. data/ext/polars/src/error.rs +16 -8
  13. data/ext/polars/src/file.rs +5 -4
  14. data/ext/polars/src/lazy/apply.rs +1 -1
  15. data/ext/polars/src/lazy/dataframe.rs +12 -6
  16. data/ext/polars/src/lazy/dsl.rs +99 -45
  17. data/ext/polars/src/lazy/meta.rs +10 -9
  18. data/ext/polars/src/lib.rs +33 -29
  19. data/ext/polars/src/numo.rs +57 -0
  20. data/ext/polars/src/object.rs +2 -1
  21. data/ext/polars/src/series.rs +67 -53
  22. data/lib/polars/cat_expr.rb +0 -4
  23. data/lib/polars/cat_name_space.rb +0 -4
  24. data/lib/polars/convert.rb +0 -7
  25. data/lib/polars/data_frame.rb +165 -209
  26. data/lib/polars/data_types.rb +4 -0
  27. data/lib/polars/date_time_expr.rb +19 -151
  28. data/lib/polars/date_time_name_space.rb +17 -17
  29. data/lib/polars/expr.rb +68 -315
  30. data/lib/polars/group_by.rb +79 -51
  31. data/lib/polars/io.rb +1 -1
  32. data/lib/polars/lazy_frame.rb +1 -103
  33. data/lib/polars/lazy_functions.rb +0 -26
  34. data/lib/polars/lazy_group_by.rb +0 -8
  35. data/lib/polars/list_expr.rb +5 -27
  36. data/lib/polars/list_name_space.rb +5 -8
  37. data/lib/polars/plot.rb +109 -0
  38. data/lib/polars/series.rb +61 -19
  39. data/lib/polars/string_expr.rb +20 -76
  40. data/lib/polars/string_name_space.rb +5 -15
  41. data/lib/polars/struct_expr.rb +0 -2
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +1 -0
  44. metadata +5 -3
@@ -24,7 +24,6 @@ module Polars
24
24
  # # │ u32 │
25
25
  # # ╞═════╡
26
26
  # # │ 2 │
27
- # # ├╌╌╌╌╌┤
28
27
  # # │ 1 │
29
28
  # # └─────┘
30
29
  def lengths
@@ -46,7 +45,6 @@ module Polars
46
45
  # # │ i64 │
47
46
  # # ╞════════╡
48
47
  # # │ 1 │
49
- # # ├╌╌╌╌╌╌╌╌┤
50
48
  # # │ 5 │
51
49
  # # └────────┘
52
50
  def sum
@@ -68,7 +66,6 @@ module Polars
68
66
  # # │ i64 │
69
67
  # # ╞════════╡
70
68
  # # │ 1 │
71
- # # ├╌╌╌╌╌╌╌╌┤
72
69
  # # │ 3 │
73
70
  # # └────────┘
74
71
  def max
@@ -90,7 +87,6 @@ module Polars
90
87
  # # │ i64 │
91
88
  # # ╞════════╡
92
89
  # # │ 1 │
93
- # # ├╌╌╌╌╌╌╌╌┤
94
90
  # # │ 2 │
95
91
  # # └────────┘
96
92
  def min
@@ -112,7 +108,6 @@ module Polars
112
108
  # # │ f64 │
113
109
  # # ╞════════╡
114
110
  # # │ 1.0 │
115
- # # ├╌╌╌╌╌╌╌╌┤
116
111
  # # │ 2.5 │
117
112
  # # └────────┘
118
113
  def mean
@@ -138,7 +133,6 @@ module Polars
138
133
  # # │ list[i64] │
139
134
  # # ╞═══════════╡
140
135
  # # │ [1, 2, 3] │
141
- # # ├╌╌╌╌╌╌╌╌╌╌╌┤
142
136
  # # │ [1, 2, 9] │
143
137
  # # └───────────┘
144
138
  def sort(reverse: false)
@@ -164,7 +158,6 @@ module Polars
164
158
  # # │ list[i64] │
165
159
  # # ╞═══════════╡
166
160
  # # │ [1, 2, 3] │
167
- # # ├╌╌╌╌╌╌╌╌╌╌╌┤
168
161
  # # │ [2, 1, 9] │
169
162
  # # └───────────┘
170
163
  def reverse
@@ -218,7 +211,6 @@ module Polars
218
211
  # # │ list[str] │
219
212
  # # ╞═════════════════╡
220
213
  # # │ ["a", "b", "c"] │
221
- # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
222
214
  # # │ ["x", "y", "z"] │
223
215
  # # └─────────────────┘
224
216
  def concat(other)
@@ -258,9 +250,7 @@ module Polars
258
250
  # # │ i64 │
259
251
  # # ╞══════╡
260
252
  # # │ 3 │
261
- # # ├╌╌╌╌╌╌┤
262
253
  # # │ null │
263
- # # ├╌╌╌╌╌╌┤
264
254
  # # │ 1 │
265
255
  # # └──────┘
266
256
  def get(index)
@@ -290,9 +280,7 @@ module Polars
290
280
  # # │ i64 │
291
281
  # # ╞══════╡
292
282
  # # │ 3 │
293
- # # ├╌╌╌╌╌╌┤
294
283
  # # │ null │
295
- # # ├╌╌╌╌╌╌┤
296
284
  # # │ 1 │
297
285
  # # └──────┘
298
286
  def first
@@ -314,9 +302,7 @@ module Polars
314
302
  # # │ i64 │
315
303
  # # ╞══════╡
316
304
  # # │ 1 │
317
- # # ├╌╌╌╌╌╌┤
318
305
  # # │ null │
319
- # # ├╌╌╌╌╌╌┤
320
306
  # # │ 2 │
321
307
  # # └──────┘
322
308
  def last
@@ -341,9 +327,7 @@ module Polars
341
327
  # # │ bool │
342
328
  # # ╞═══════╡
343
329
  # # │ true │
344
- # # ├╌╌╌╌╌╌╌┤
345
330
  # # │ false │
346
- # # ├╌╌╌╌╌╌╌┤
347
331
  # # │ true │
348
332
  # # └───────┘
349
333
  def contains(item)
@@ -370,7 +354,6 @@ module Polars
370
354
  # # │ str │
371
355
  # # ╞═══════╡
372
356
  # # │ a b c │
373
- # # ├╌╌╌╌╌╌╌┤
374
357
  # # │ x y │
375
358
  # # └───────┘
376
359
  def join(separator)
@@ -396,7 +379,6 @@ module Polars
396
379
  # # │ u32 │
397
380
  # # ╞═════╡
398
381
  # # │ 0 │
399
- # # ├╌╌╌╌╌┤
400
382
  # # │ 1 │
401
383
  # # └─────┘
402
384
  def arg_min
@@ -422,7 +404,6 @@ module Polars
422
404
  # # │ u32 │
423
405
  # # ╞═════╡
424
406
  # # │ 1 │
425
- # # ├╌╌╌╌╌┤
426
407
  # # │ 0 │
427
408
  # # └─────┘
428
409
  def arg_max
@@ -443,7 +424,7 @@ module Polars
443
424
  # s.arr.diff
444
425
  # # =>
445
426
  # # shape: (2,)
446
- # # Series: 'a' [list]
427
+ # # Series: 'a' [list[i64]]
447
428
  # # [
448
429
  # # [null, 1, ... 1]
449
430
  # # [null, -8, -1]
@@ -464,7 +445,7 @@ module Polars
464
445
  # s.arr.shift
465
446
  # # =>
466
447
  # # shape: (2,)
467
- # # Series: 'a' [list]
448
+ # # Series: 'a' [list[i64]]
468
449
  # # [
469
450
  # # [null, 1, ... 3]
470
451
  # # [null, 10, 2]
@@ -488,7 +469,7 @@ module Polars
488
469
  # s.arr.slice(1, 2)
489
470
  # # =>
490
471
  # # shape: (2,)
491
- # # Series: 'a' [list]
472
+ # # Series: 'a' [list[i64]]
492
473
  # # [
493
474
  # # [2, 3]
494
475
  # # [2, 1]
@@ -511,7 +492,7 @@ module Polars
511
492
  # s.arr.head(2)
512
493
  # # =>
513
494
  # # shape: (2,)
514
- # # Series: 'a' [list]
495
+ # # Series: 'a' [list[i64]]
515
496
  # # [
516
497
  # # [1, 2]
517
498
  # # [10, 2]
@@ -532,7 +513,7 @@ module Polars
532
513
  # s.arr.tail(2)
533
514
  # # =>
534
515
  # # shape: (2,)
535
- # # Series: 'a' [list]
516
+ # # Series: 'a' [list[i64]]
536
517
  # # [
537
518
  # # [3, 4]
538
519
  # # [2, 1]
@@ -563,7 +544,6 @@ module Polars
563
544
  # # │ struct[3] │
564
545
  # # ╞════════════╡
565
546
  # # │ {1,2,3} │
566
- # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
567
547
  # # │ {1,2,null} │
568
548
  # # └────────────┘
569
549
  def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
@@ -598,9 +578,7 @@ module Polars
598
578
  # # │ i64 ┆ i64 ┆ list[f32] │
599
579
  # # ╞═════╪═════╪════════════╡
600
580
  # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
601
- # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
602
581
  # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
603
- # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
604
582
  # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
605
583
  # # └─────┴─────┴────────────┘
606
584
  def eval(expr, parallel: false)
@@ -183,7 +183,7 @@ module Polars
183
183
  # s.arr.diff
184
184
  # # =>
185
185
  # # shape: (2,)
186
- # # Series: 'a' [list]
186
+ # # Series: 'a' [list[i64]]
187
187
  # # [
188
188
  # # [null, 1, ... 1]
189
189
  # # [null, -8, -1]
@@ -204,7 +204,7 @@ module Polars
204
204
  # s.arr.shift
205
205
  # # =>
206
206
  # # shape: (2,)
207
- # # Series: 'a' [list]
207
+ # # Series: 'a' [list[i64]]
208
208
  # # [
209
209
  # # [null, 1, ... 3]
210
210
  # # [null, 10, 2]
@@ -228,7 +228,7 @@ module Polars
228
228
  # s.arr.slice(1, 2)
229
229
  # # =>
230
230
  # # shape: (2,)
231
- # # Series: 'a' [list]
231
+ # # Series: 'a' [list[i64]]
232
232
  # # [
233
233
  # # [2, 3]
234
234
  # # [2, 1]
@@ -249,7 +249,7 @@ module Polars
249
249
  # s.arr.head(2)
250
250
  # # =>
251
251
  # # shape: (2,)
252
- # # Series: 'a' [list]
252
+ # # Series: 'a' [list[i64]]
253
253
  # # [
254
254
  # # [1, 2]
255
255
  # # [10, 2]
@@ -270,7 +270,7 @@ module Polars
270
270
  # s.arr.tail(2)
271
271
  # # =>
272
272
  # # shape: (2,)
273
- # # Series: 'a' [list]
273
+ # # Series: 'a' [list[i64]]
274
274
  # # [
275
275
  # # [3, 4]
276
276
  # # [2, 1]
@@ -300,7 +300,6 @@ module Polars
300
300
  # # │ struct[3] │
301
301
  # # ╞════════════╡
302
302
  # # │ {1,2,3} │
303
- # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
304
303
  # # │ {1,2,null} │
305
304
  # # └────────────┘
306
305
  def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
@@ -334,9 +333,7 @@ module Polars
334
333
  # # │ i64 ┆ i64 ┆ list[f32] │
335
334
  # # ╞═════╪═════╪════════════╡
336
335
  # # │ 1 ┆ 4 ┆ [1.0, 2.0] │
337
- # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
338
336
  # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
339
- # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
340
337
  # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
341
338
  # # └─────┴─────┴────────────┘
342
339
  def eval(expr, parallel: false)
@@ -0,0 +1,109 @@
1
+ module Polars
2
+ module Plot
3
+ # Plot data.
4
+ #
5
+ # @return [Vega::LiteChart]
6
+ def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
7
+ require "vega"
8
+
9
+ raise ArgumentError, "Must specify columns" if columns.size != 2 && (!x || !y)
10
+ x ||= columns[0]
11
+ y ||= columns[1]
12
+ type ||= begin
13
+ if self[x].numeric? && self[y].numeric?
14
+ "scatter"
15
+ elsif self[x].utf8? && self[y].numeric?
16
+ "column"
17
+ elsif (self[x].dtype == Date || self[x].dtype.is_a?(Datetime)) && self[y].numeric?
18
+ "line"
19
+ else
20
+ raise "Cannot determine type. Use the type option."
21
+ end
22
+ end
23
+ df = self[(group.nil? ? [x, y] : [x, y, group]).map(&:to_s).uniq]
24
+ data = df.rows(named: true)
25
+
26
+ case type
27
+ when "line", "area"
28
+ x_type =
29
+ if df[x].numeric?
30
+ "quantitative"
31
+ elsif df[x].datelike?
32
+ "temporal"
33
+ else
34
+ "nominal"
35
+ end
36
+
37
+ scale = x_type == "temporal" ? {type: "utc"} : {}
38
+ encoding = {
39
+ x: {field: x, type: x_type, scale: scale},
40
+ y: {field: y, type: "quantitative"}
41
+ }
42
+ encoding[:color] = {field: group} if group
43
+
44
+ Vega.lite
45
+ .data(data)
46
+ .mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
47
+ .encoding(encoding)
48
+ .config(axis: {labelFontSize: 12})
49
+ when "pie"
50
+ raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
51
+
52
+ Vega.lite
53
+ .data(data)
54
+ .mark(type: "arc", tooltip: true)
55
+ .encoding(
56
+ color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
57
+ theta: {field: y, type: "quantitative"}
58
+ )
59
+ .view(stroke: nil)
60
+ when "column"
61
+ encoding = {
62
+ x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
63
+ y: {field: y, type: "quantitative"}
64
+ }
65
+ if group
66
+ encoding[:color] = {field: group}
67
+ encoding[:xOffset] = {field: group} unless stacked
68
+ end
69
+
70
+ Vega.lite
71
+ .data(data)
72
+ .mark(type: "bar", tooltip: true)
73
+ .encoding(encoding)
74
+ .config(axis: {labelFontSize: 12})
75
+ when "bar"
76
+ encoding = {
77
+ # TODO determine label angle
78
+ y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
79
+ x: {field: y, type: "quantitative"}
80
+ }
81
+ if group
82
+ encoding[:color] = {field: group}
83
+ encoding[:yOffset] = {field: group} unless stacked
84
+ end
85
+
86
+ Vega.lite
87
+ .data(data)
88
+ .mark(type: "bar", tooltip: true)
89
+ .encoding(encoding)
90
+ .config(axis: {labelFontSize: 12})
91
+ when "scatter"
92
+ encoding = {
93
+ x: {field: x, type: "quantitative", scale: {zero: false}},
94
+ y: {field: y, type: "quantitative", scale: {zero: false}},
95
+ size: {value: 60}
96
+ }
97
+ encoding[:color] = {field: group} if group
98
+
99
+ Vega.lite
100
+ .data(data)
101
+ .mark(type: "circle", tooltip: true)
102
+ .encoding(encoding)
103
+ .config(axis: {labelFontSize: 12})
104
+ else
105
+ raise ArgumentError, "Invalid type: #{type}"
106
+ end
107
+ end
108
+ end
109
+ end
data/lib/polars/series.rb CHANGED
@@ -259,6 +259,17 @@ module Polars
259
259
  0 - self
260
260
  end
261
261
 
262
+ # Returns an enumerator.
263
+ #
264
+ # @return [Object]
265
+ def each
266
+ return to_enum(:each) unless block_given?
267
+
268
+ length.times do |i|
269
+ yield self[i]
270
+ end
271
+ end
272
+
262
273
  # Returns elements of the Series.
263
274
  #
264
275
  # @return [Object]
@@ -432,15 +443,10 @@ module Polars
432
443
  # # │ str ┆ f64 │
433
444
  # # ╞════════════╪══════════╡
434
445
  # # │ min ┆ 1.0 │
435
- # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
436
446
  # # │ max ┆ 5.0 │
437
- # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
438
447
  # # │ null_count ┆ 0.0 │
439
- # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
440
448
  # # │ mean ┆ 3.0 │
441
- # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
442
449
  # # │ std ┆ 1.581139 │
443
- # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
444
450
  # # │ count ┆ 5.0 │
445
451
  # # └────────────┴──────────┘
446
452
  #
@@ -455,9 +461,7 @@ module Polars
455
461
  # # │ str ┆ i64 │
456
462
  # # ╞════════════╪═══════╡
457
463
  # # │ unique ┆ 4 │
458
- # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
459
464
  # # │ null_count ┆ 1 │
460
- # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
461
465
  # # │ count ┆ 5 │
462
466
  # # └────────────┴───────┘
463
467
  def describe
@@ -660,13 +664,11 @@ module Polars
660
664
  # # │ u8 ┆ u8 ┆ u8 │
661
665
  # # ╞═════╪═════╪═════╡
662
666
  # # │ 1 ┆ 0 ┆ 0 │
663
- # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
664
667
  # # │ 0 ┆ 1 ┆ 0 │
665
- # # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
666
668
  # # │ 0 ┆ 0 ┆ 1 │
667
669
  # # └─────┴─────┴─────┘
668
- def to_dummies
669
- Utils.wrap_df(_s.to_dummies)
670
+ def to_dummies(separator: "_")
671
+ Utils.wrap_df(_s.to_dummies(separator))
670
672
  end
671
673
 
672
674
  # Count the unique values in a Series.
@@ -687,9 +689,7 @@ module Polars
687
689
  # # │ i64 ┆ u32 │
688
690
  # # ╞═════╪════════╡
689
691
  # # │ 1 ┆ 1 │
690
- # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
691
692
  # # │ 2 ┆ 2 │
692
- # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
693
693
  # # │ 3 ┆ 1 │
694
694
  # # └─────┴────────┘
695
695
  def value_counts(sort: false)
@@ -1285,8 +1285,12 @@ module Polars
1285
1285
  # Expression or scalar value.
1286
1286
  #
1287
1287
  # @return [Integer]
1288
- def search_sorted(element)
1289
- Polars.select(Polars.lit(self).search_sorted(element))[0, 0]
1288
+ def search_sorted(element, side: "any")
1289
+ if element.is_a?(Integer) || element.is_a?(Float)
1290
+ return Polars.select(Polars.lit(self).search_sorted(element, side: side)).item
1291
+ end
1292
+ element = Series.new(element)
1293
+ Polars.select(Polars.lit(self).search_sorted(element, side: side)).to_series
1290
1294
  end
1291
1295
 
1292
1296
  # Get unique elements in series.
@@ -1500,7 +1504,7 @@ module Polars
1500
1504
  # sets = Polars::Series.new("sets", [[1, 2, 3], [1, 2], [9, 10]])
1501
1505
  # # =>
1502
1506
  # # shape: (3,)
1503
- # # Series: 'sets' [list]
1507
+ # # Series: 'sets' [list[i64]]
1504
1508
  # # [
1505
1509
  # # [1, 2, 3]
1506
1510
  # # [1, 2]
@@ -1772,8 +1776,9 @@ module Polars
1772
1776
  # s.is_datelike
1773
1777
  # # => true
1774
1778
  def is_datelike
1775
- [Date, Datetime, Duration, Time].include?(dtype)
1779
+ [Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
1776
1780
  end
1781
+ alias_method :datelike?, :is_datelike
1777
1782
 
1778
1783
  # Check if this Series has floating point numbers.
1779
1784
  #
@@ -1819,8 +1824,45 @@ module Polars
1819
1824
  # def view
1820
1825
  # end
1821
1826
 
1822
- # def to_numo
1823
- # end
1827
+ # Convert this Series to a Numo array. This operation clones data but is completely safe.
1828
+ #
1829
+ # @return [Numo::NArray]
1830
+ #
1831
+ # @example
1832
+ # s = Polars::Series.new("a", [1, 2, 3])
1833
+ # s.to_numo
1834
+ # # =>
1835
+ # # Numo::Int64#shape=[3]
1836
+ # # [1, 2, 3]
1837
+ def to_numo
1838
+ if !has_validity
1839
+ if is_datelike
1840
+ Numo::RObject.cast(to_a)
1841
+ elsif is_numeric
1842
+ # TODO make more efficient
1843
+ {
1844
+ UInt8 => Numo::UInt8,
1845
+ UInt16 => Numo::UInt16,
1846
+ UInt32 => Numo::UInt32,
1847
+ UInt64 => Numo::UInt64,
1848
+ Int8 => Numo::Int8,
1849
+ Int16 => Numo::Int16,
1850
+ Int32 => Numo::Int32,
1851
+ Int64 => Numo::Int64,
1852
+ Float32 => Numo::SFloat,
1853
+ Float64 => Numo::DFloat
1854
+ }.fetch(dtype).cast(to_a)
1855
+ elsif is_boolean
1856
+ Numo::Bit.cast(to_a)
1857
+ else
1858
+ _s.to_numo
1859
+ end
1860
+ elsif is_datelike
1861
+ Numo::RObject.cast(to_a)
1862
+ else
1863
+ _s.to_numo
1864
+ end
1865
+ end
1824
1866
 
1825
1867
  # Set masked values.
1826
1868
  #