polars-df 0.14.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -26,7 +26,6 @@ module Polars
26
26
  skip_rows_after_header: 0,
27
27
  row_count_name: nil,
28
28
  row_count_offset: 0,
29
- sample_size: 1024,
30
29
  eol_char: "\n",
31
30
  new_columns: nil,
32
31
  raise_if_empty: true,
@@ -79,7 +78,6 @@ module Polars
79
78
  parse_dates,
80
79
  skip_rows_after_header,
81
80
  Utils.parse_row_index_args(row_count_name, row_count_offset),
82
- sample_size,
83
81
  eol_char,
84
82
  raise_if_empty,
85
83
  truncate_ragged_lines,
@@ -11,32 +11,112 @@ module Polars
11
11
 
12
12
  # Check if binaries in Series contain a binary substring.
13
13
  #
14
- # @param lit [String]
14
+ # @param literal [String]
15
15
  # The binary substring to look for
16
16
  #
17
17
  # @return [Expr]
18
- def contains(lit)
19
- Utils.wrap_expr(_rbexpr.binary_contains(lit))
18
+ #
19
+ # @example
20
+ # colors = Polars::DataFrame.new(
21
+ # {
22
+ # "name" => ["black", "yellow", "blue"],
23
+ # "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
24
+ # "lit" => ["\x00".b, "\xff\x00".b, "\xff\xff".b]
25
+ # }
26
+ # )
27
+ # colors.select(
28
+ # "name",
29
+ # Polars.col("code").bin.contains("\xff".b).alias("contains_with_lit"),
30
+ # Polars.col("code").bin.contains(Polars.col("lit")).alias("contains_with_expr"),
31
+ # )
32
+ # # =>
33
+ # # shape: (3, 3)
34
+ # # ┌────────┬───────────────────┬────────────────────┐
35
+ # # │ name ┆ contains_with_lit ┆ contains_with_expr │
36
+ # # │ --- ┆ --- ┆ --- │
37
+ # # │ str ┆ bool ┆ bool │
38
+ # # ╞════════╪═══════════════════╪════════════════════╡
39
+ # # │ black ┆ false ┆ true │
40
+ # # │ yellow ┆ true ┆ true │
41
+ # # │ blue ┆ true ┆ false │
42
+ # # └────────┴───────────────────┴────────────────────┘
43
+ def contains(literal)
44
+ literal = Utils.parse_into_expression(literal, str_as_lit: true)
45
+ Utils.wrap_expr(_rbexpr.binary_contains(literal))
20
46
  end
21
47
 
22
48
  # Check if string values end with a binary substring.
23
49
  #
24
- # @param sub [String]
50
+ # @param suffix [String]
25
51
  # Suffix substring.
26
52
  #
27
53
  # @return [Expr]
28
- def ends_with(sub)
29
- Utils.wrap_expr(_rbexpr.binary_ends_with(sub))
54
+ #
55
+ # @example
56
+ # colors = Polars::DataFrame.new(
57
+ # {
58
+ # "name" => ["black", "yellow", "blue"],
59
+ # "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
60
+ # "suffix" => ["\x00".b, "\xff\x00".b, "\x00\x00".b]
61
+ # }
62
+ # )
63
+ # colors.select(
64
+ # "name",
65
+ # Polars.col("code").bin.ends_with("\xff".b).alias("ends_with_lit"),
66
+ # Polars.col("code").bin.ends_with(Polars.col("suffix")).alias("ends_with_expr")
67
+ # )
68
+ # # =>
69
+ # # shape: (3, 3)
70
+ # # ┌────────┬───────────────┬────────────────┐
71
+ # # │ name ┆ ends_with_lit ┆ ends_with_expr │
72
+ # # │ --- ┆ --- ┆ --- │
73
+ # # │ str ┆ bool ┆ bool │
74
+ # # ╞════════╪═══════════════╪════════════════╡
75
+ # # │ black ┆ false ┆ true │
76
+ # # │ yellow ┆ false ┆ true │
77
+ # # │ blue ┆ true ┆ false │
78
+ # # └────────┴───────────────┴────────────────┘
79
+ def ends_with(suffix)
80
+ suffix = Utils.parse_into_expression(suffix, str_as_lit: true)
81
+ Utils.wrap_expr(_rbexpr.binary_ends_with(suffix))
30
82
  end
31
83
 
32
84
  # Check if values start with a binary substring.
33
85
  #
34
- # @param sub [String]
86
+ # @param prefix [String]
35
87
  # Prefix substring.
36
88
  #
37
89
  # @return [Expr]
38
- def starts_with(sub)
39
- Utils.wrap_expr(_rbexpr.binary_starts_with(sub))
90
+ #
91
+ # @example
92
+ # colors = Polars::DataFrame.new(
93
+ # {
94
+ # "name": ["black", "yellow", "blue"],
95
+ # "code": ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
96
+ # "prefix": ["\x00".b, "\xff\x00".b, "\x00\x00".b]
97
+ # }
98
+ # )
99
+ # colors.select(
100
+ # "name",
101
+ # Polars.col("code").bin.starts_with("\xff".b).alias("starts_with_lit"),
102
+ # Polars.col("code")
103
+ # .bin.starts_with(Polars.col("prefix"))
104
+ # .alias("starts_with_expr")
105
+ # )
106
+ # # =>
107
+ # # shape: (3, 3)
108
+ # # ┌────────┬─────────────────┬──────────────────┐
109
+ # # │ name ┆ starts_with_lit ┆ starts_with_expr │
110
+ # # │ --- ┆ --- ┆ --- │
111
+ # # │ str ┆ bool ┆ bool │
112
+ # # ╞════════╪═════════════════╪══════════════════╡
113
+ # # │ black ┆ false ┆ true │
114
+ # # │ yellow ┆ true ┆ false │
115
+ # # │ blue ┆ false ┆ true │
116
+ # # └────────┴─────────────────┴──────────────────┘
117
+ def starts_with(prefix)
118
+ prefix = Utils.parse_into_expression(prefix, str_as_lit: true)
119
+ Utils.wrap_expr(_rbexpr.binary_starts_with(prefix))
40
120
  end
41
121
 
42
122
  # Decode a value using the provided encoding.
@@ -48,6 +128,28 @@ module Polars
48
128
  # otherwise mask out with a null value.
49
129
  #
50
130
  # @return [Expr]
131
+ #
132
+ # @example
133
+ # colors = Polars::DataFrame.new(
134
+ # {
135
+ # "name" => ["black", "yellow", "blue"],
136
+ # "encoded" => ["000000".b, "ffff00".b, "0000ff".b]
137
+ # }
138
+ # )
139
+ # colors.with_columns(
140
+ # Polars.col("encoded").bin.decode("hex").alias("code")
141
+ # )
142
+ # # =>
143
+ # # shape: (3, 3)
144
+ # # ┌────────┬───────────┬─────────────────┐
145
+ # # │ name ┆ encoded ┆ code │
146
+ # # │ --- ┆ --- ┆ --- │
147
+ # # │ str ┆ binary ┆ binary │
148
+ # # ╞════════╪═══════════╪═════════════════╡
149
+ # # │ black ┆ b"000000" ┆ b"\x00\x00\x00" │
150
+ # # │ yellow ┆ b"ffff00" ┆ b"\xff\xff\x00" │
151
+ # # │ blue ┆ b"0000ff" ┆ b"\x00\x00\xff" │
152
+ # # └────────┴───────────┴─────────────────┘
51
153
  def decode(encoding, strict: true)
52
154
  if encoding == "hex"
53
155
  Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
@@ -64,6 +166,28 @@ module Polars
64
166
  # The encoding to use.
65
167
  #
66
168
  # @return [Expr]
169
+ #
170
+ # @example
171
+ # colors = Polars::DataFrame.new(
172
+ # {
173
+ # "color" => ["black", "yellow", "blue"],
174
+ # "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b]
175
+ # }
176
+ # )
177
+ # colors.with_columns(
178
+ # Polars.col("code").bin.encode("hex").alias("encoded")
179
+ # )
180
+ # # =>
181
+ # # shape: (3, 3)
182
+ # # ┌────────┬─────────────────┬─────────┐
183
+ # # │ color ┆ code ┆ encoded │
184
+ # # │ --- ┆ --- ┆ --- │
185
+ # # │ str ┆ binary ┆ str │
186
+ # # ╞════════╪═════════════════╪═════════╡
187
+ # # │ black ┆ b"\x00\x00\x00" ┆ 000000 │
188
+ # # │ yellow ┆ b"\xff\xff\x00" ┆ ffff00 │
189
+ # # │ blue ┆ b"\x00\x00\xff" ┆ 0000ff │
190
+ # # └────────┴─────────────────┴─────────┘
67
191
  def encode(encoding)
68
192
  if encoding == "hex"
69
193
  Utils.wrap_expr(_rbexpr.binary_hex_encode)
@@ -12,31 +12,67 @@ module Polars
12
12
 
13
13
  # Check if binaries in Series contain a binary substring.
14
14
  #
15
- # @param lit [String]
15
+ # @param literal [String]
16
16
  # The binary substring to look for
17
17
  #
18
18
  # @return [Series]
19
- def contains(lit)
19
+ #
20
+ # @example
21
+ # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
22
+ # s.bin.contains("\xff".b)
23
+ # # =>
24
+ # # shape: (3,)
25
+ # # Series: 'colors' [bool]
26
+ # # [
27
+ # # false
28
+ # # true
29
+ # # true
30
+ # # ]
31
+ def contains(literal)
20
32
  super
21
33
  end
22
34
 
23
35
  # Check if string values end with a binary substring.
24
36
  #
25
- # @param sub [String]
37
+ # @param suffix [String]
26
38
  # Suffix substring.
27
39
  #
28
40
  # @return [Series]
29
- def ends_with(sub)
41
+ #
42
+ # @example
43
+ # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
44
+ # s.bin.ends_with("\x00".b)
45
+ # # =>
46
+ # # shape: (3,)
47
+ # # Series: 'colors' [bool]
48
+ # # [
49
+ # # true
50
+ # # true
51
+ # # false
52
+ # # ]
53
+ def ends_with(suffix)
30
54
  super
31
55
  end
32
56
 
33
57
  # Check if values start with a binary substring.
34
58
  #
35
- # @param sub [String]
59
+ # @param prefix [String]
36
60
  # Prefix substring.
37
61
  #
38
62
  # @return [Series]
39
- def starts_with(sub)
63
+ #
64
+ # @example
65
+ # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
66
+ # s.bin.starts_with("\x00".b)
67
+ # # =>
68
+ # # shape: (3,)
69
+ # # Series: 'colors' [bool]
70
+ # # [
71
+ # # true
72
+ # # false
73
+ # # true
74
+ # # ]
75
+ def starts_with(prefix)
40
76
  super
41
77
  end
42
78
 
@@ -49,6 +85,42 @@ module Polars
49
85
  # otherwise mask out with a null value.
50
86
  #
51
87
  # @return [Series]
88
+ #
89
+ # @example Decode values using hexadecimal encoding.
90
+ # s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "0000ff".b])
91
+ # s.bin.decode("hex")
92
+ # # =>
93
+ # # shape: (3,)
94
+ # # Series: 'colors' [binary]
95
+ # # [
96
+ # # b"\x00\x00\x00"
97
+ # # b"\xff\xff\x00"
98
+ # # b"\x00\x00\xff"
99
+ # # ]
100
+ #
101
+ # @example Decode values using Base64 encoding.
102
+ # s = Polars::Series.new("colors", ["AAAA".b, "//8A".b, "AAD/".b])
103
+ # s.bin.decode("base64")
104
+ # # =>
105
+ # # shape: (3,)
106
+ # # Series: 'colors' [binary]
107
+ # # [
108
+ # # b"\x00\x00\x00"
109
+ # # b"\xff\xff\x00"
110
+ # # b"\x00\x00\xff"
111
+ # # ]
112
+ #
113
+ # @example Set `strict=False` to set invalid values to null instead of raising an error.
114
+ # s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "invalid_value".b])
115
+ # s.bin.decode("hex", strict: false)
116
+ # # =>
117
+ # # shape: (3,)
118
+ # # Series: 'colors' [binary]
119
+ # # [
120
+ # # b"\x00\x00\x00"
121
+ # # b"\xff\xff\x00"
122
+ # # null
123
+ # # ]
52
124
  def decode(encoding, strict: true)
53
125
  super
54
126
  end
@@ -59,6 +131,29 @@ module Polars
59
131
  # The encoding to use.
60
132
  #
61
133
  # @return [Series]
134
+ #
135
+ # @example Encode values using hexadecimal encoding.
136
+ # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
137
+ # s.bin.encode("hex")
138
+ # # =>
139
+ # # shape: (3,)
140
+ # # Series: 'colors' [str]
141
+ # # [
142
+ # # "000000"
143
+ # # "ffff00"
144
+ # # "0000ff"
145
+ # # ]
146
+ #
147
+ # @example Encode values using Base64 encoding.
148
+ # s.bin.encode("base64")
149
+ # # =>
150
+ # # shape: (3,)
151
+ # # Series: 'colors' [str]
152
+ # # [
153
+ # # "AAAA"
154
+ # # "//8A"
155
+ # # "AAD/"
156
+ # # ]
62
157
  def encode(encoding)
63
158
  super
64
159
  end
data/lib/polars/config.rb CHANGED
@@ -527,4 +527,8 @@ module Polars
527
527
  self
528
528
  end
529
529
  end
530
+
531
+ def self.config(...)
532
+ Config.new(...)
533
+ end
530
534
  end