polars-df 0.14.0-aarch64-linux-musl → 0.16.0-aarch64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE-THIRD-PARTY.txt +23495 -12923
  5. data/LICENSE.txt +1 -0
  6. data/README.md +38 -4
  7. data/lib/polars/3.2/polars.so +0 -0
  8. data/lib/polars/3.3/polars.so +0 -0
  9. data/lib/polars/{3.1 → 3.4}/polars.so +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +452 -101
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +3 -1
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +103 -2
  20. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  21. data/lib/polars/functions/as_datatype.rb +51 -2
  22. data/lib/polars/functions/col.rb +1 -1
  23. data/lib/polars/functions/eager.rb +1 -3
  24. data/lib/polars/functions/lazy.rb +95 -13
  25. data/lib/polars/functions/range/time_range.rb +21 -21
  26. data/lib/polars/io/csv.rb +14 -16
  27. data/lib/polars/io/database.rb +2 -2
  28. data/lib/polars/io/delta.rb +126 -0
  29. data/lib/polars/io/ipc.rb +14 -4
  30. data/lib/polars/io/ndjson.rb +10 -0
  31. data/lib/polars/io/parquet.rb +168 -111
  32. data/lib/polars/lazy_frame.rb +684 -20
  33. data/lib/polars/list_name_space.rb +169 -0
  34. data/lib/polars/selectors.rb +1226 -0
  35. data/lib/polars/series.rb +465 -35
  36. data/lib/polars/string_cache.rb +27 -1
  37. data/lib/polars/string_expr.rb +0 -1
  38. data/lib/polars/string_name_space.rb +73 -3
  39. data/lib/polars/struct_name_space.rb +31 -7
  40. data/lib/polars/utils/various.rb +5 -1
  41. data/lib/polars/utils.rb +45 -10
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +17 -1
  44. metadata +10 -9
  45. data/lib/polars/functions.rb +0 -57
data/LICENSE.txt CHANGED
@@ -1,5 +1,6 @@
1
1
  Copyright (c) 2020 Ritchie Vink
2
2
  Copyright (c) 2022-2024 Andrew Kane
3
+ Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
4
 
4
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
5
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -14,18 +14,17 @@ gem "polars-df"
14
14
 
15
15
  ## Getting Started
16
16
 
17
- This library follows the [Polars Python API](https://pola-rs.github.io/polars/py-polars/html/reference/index.html).
17
+ This library follows the [Polars Python API](https://docs.pola.rs/api/python/stable/reference/index.html).
18
18
 
19
19
  ```ruby
20
- Polars.read_csv("iris.csv")
21
- .lazy
20
+ Polars.scan_csv("iris.csv")
22
21
  .filter(Polars.col("sepal_length") > 5)
23
22
  .group_by("species")
24
23
  .agg(Polars.all.sum)
25
24
  .collect
26
25
  ```
27
26
 
28
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
27
+ You can follow [Polars tutorials](https://docs.pola.rs/user-guide/getting-started/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
29
28
 
30
29
  ## Reference
31
30
 
@@ -89,6 +88,15 @@ From Avro
89
88
  Polars.read_avro("file.avro")
90
89
  ```
91
90
 
91
+ From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental, unreleased]
92
+
93
+ ```ruby
94
+ Polars.read_delta("./table")
95
+
96
+ # or lazily with
97
+ Polars.scan_delta("./table")
98
+ ```
99
+
92
100
  From a hash
93
101
 
94
102
  ```ruby
@@ -337,6 +345,32 @@ Parquet
337
345
  df.write_parquet("file.parquet")
338
346
  ```
339
347
 
348
+ JSON
349
+
350
+ ```ruby
351
+ df.write_json("file.json")
352
+ # or
353
+ df.write_ndjson("file.ndjson")
354
+ ```
355
+
356
+ Feather / Arrow IPC
357
+
358
+ ```ruby
359
+ df.write_ipc("file.arrow")
360
+ ```
361
+
362
+ Avro
363
+
364
+ ```ruby
365
+ df.write_avro("file.avro")
366
+ ```
367
+
368
+ Delta Lake [experimental, unreleased]
369
+
370
+ ```ruby
371
+ df.write_delta("./table")
372
+ ```
373
+
340
374
  Numo array
341
375
 
342
376
  ```ruby
Binary file
Binary file
Binary file
@@ -26,7 +26,6 @@ module Polars
26
26
  skip_rows_after_header: 0,
27
27
  row_count_name: nil,
28
28
  row_count_offset: 0,
29
- sample_size: 1024,
30
29
  eol_char: "\n",
31
30
  new_columns: nil,
32
31
  raise_if_empty: true,
@@ -79,7 +78,6 @@ module Polars
79
78
  parse_dates,
80
79
  skip_rows_after_header,
81
80
  Utils.parse_row_index_args(row_count_name, row_count_offset),
82
- sample_size,
83
81
  eol_char,
84
82
  raise_if_empty,
85
83
  truncate_ragged_lines,
@@ -11,32 +11,112 @@ module Polars
11
11
 
12
12
  # Check if binaries in Series contain a binary substring.
13
13
  #
14
- # @param lit [String]
14
+ # @param literal [String]
15
15
  # The binary substring to look for
16
16
  #
17
17
  # @return [Expr]
18
- def contains(lit)
19
- Utils.wrap_expr(_rbexpr.binary_contains(lit))
18
+ #
19
+ # @example
20
+ # colors = Polars::DataFrame.new(
21
+ # {
22
+ # "name" => ["black", "yellow", "blue"],
23
+ # "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
24
+ # "lit" => ["\x00".b, "\xff\x00".b, "\xff\xff".b]
25
+ # }
26
+ # )
27
+ # colors.select(
28
+ # "name",
29
+ # Polars.col("code").bin.contains("\xff".b).alias("contains_with_lit"),
30
+ # Polars.col("code").bin.contains(Polars.col("lit")).alias("contains_with_expr"),
31
+ # )
32
+ # # =>
33
+ # # shape: (3, 3)
34
+ # # ┌────────┬───────────────────┬────────────────────┐
35
+ # # │ name ┆ contains_with_lit ┆ contains_with_expr │
36
+ # # │ --- ┆ --- ┆ --- │
37
+ # # │ str ┆ bool ┆ bool │
38
+ # # ╞════════╪═══════════════════╪════════════════════╡
39
+ # # │ black ┆ false ┆ true │
40
+ # # │ yellow ┆ true ┆ true │
41
+ # # │ blue ┆ true ┆ false │
42
+ # # └────────┴───────────────────┴────────────────────┘
43
+ def contains(literal)
44
+ literal = Utils.parse_into_expression(literal, str_as_lit: true)
45
+ Utils.wrap_expr(_rbexpr.binary_contains(literal))
20
46
  end
21
47
 
22
48
  # Check if string values end with a binary substring.
23
49
  #
24
- # @param sub [String]
50
+ # @param suffix [String]
25
51
  # Suffix substring.
26
52
  #
27
53
  # @return [Expr]
28
- def ends_with(sub)
29
- Utils.wrap_expr(_rbexpr.binary_ends_with(sub))
54
+ #
55
+ # @example
56
+ # colors = Polars::DataFrame.new(
57
+ # {
58
+ # "name" => ["black", "yellow", "blue"],
59
+ # "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
60
+ # "suffix" => ["\x00".b, "\xff\x00".b, "\x00\x00".b]
61
+ # }
62
+ # )
63
+ # colors.select(
64
+ # "name",
65
+ # Polars.col("code").bin.ends_with("\xff".b).alias("ends_with_lit"),
66
+ # Polars.col("code").bin.ends_with(Polars.col("suffix")).alias("ends_with_expr")
67
+ # )
68
+ # # =>
69
+ # # shape: (3, 3)
70
+ # # ┌────────┬───────────────┬────────────────┐
71
+ # # │ name ┆ ends_with_lit ┆ ends_with_expr │
72
+ # # │ --- ┆ --- ┆ --- │
73
+ # # │ str ┆ bool ┆ bool │
74
+ # # ╞════════╪═══════════════╪════════════════╡
75
+ # # │ black ┆ false ┆ true │
76
+ # # │ yellow ┆ false ┆ true │
77
+ # # │ blue ┆ true ┆ false │
78
+ # # └────────┴───────────────┴────────────────┘
79
+ def ends_with(suffix)
80
+ suffix = Utils.parse_into_expression(suffix, str_as_lit: true)
81
+ Utils.wrap_expr(_rbexpr.binary_ends_with(suffix))
30
82
  end
31
83
 
32
84
  # Check if values start with a binary substring.
33
85
  #
34
- # @param sub [String]
86
+ # @param prefix [String]
35
87
  # Prefix substring.
36
88
  #
37
89
  # @return [Expr]
38
- def starts_with(sub)
39
- Utils.wrap_expr(_rbexpr.binary_starts_with(sub))
90
+ #
91
+ # @example
92
+ # colors = Polars::DataFrame.new(
93
+ # {
94
+ # "name": ["black", "yellow", "blue"],
95
+ # "code": ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
96
+ # "prefix": ["\x00".b, "\xff\x00".b, "\x00\x00".b]
97
+ # }
98
+ # )
99
+ # colors.select(
100
+ # "name",
101
+ # Polars.col("code").bin.starts_with("\xff".b).alias("starts_with_lit"),
102
+ # Polars.col("code")
103
+ # .bin.starts_with(Polars.col("prefix"))
104
+ # .alias("starts_with_expr")
105
+ # )
106
+ # # =>
107
+ # # shape: (3, 3)
108
+ # # ┌────────┬─────────────────┬──────────────────┐
109
+ # # │ name ┆ starts_with_lit ┆ starts_with_expr │
110
+ # # │ --- ┆ --- ┆ --- │
111
+ # # │ str ┆ bool ┆ bool │
112
+ # # ╞════════╪═════════════════╪══════════════════╡
113
+ # # │ black ┆ false ┆ true │
114
+ # # │ yellow ┆ true ┆ false │
115
+ # # │ blue ┆ false ┆ true │
116
+ # # └────────┴─────────────────┴──────────────────┘
117
+ def starts_with(prefix)
118
+ prefix = Utils.parse_into_expression(prefix, str_as_lit: true)
119
+ Utils.wrap_expr(_rbexpr.binary_starts_with(prefix))
40
120
  end
41
121
 
42
122
  # Decode a value using the provided encoding.
@@ -48,6 +128,28 @@ module Polars
48
128
  # otherwise mask out with a null value.
49
129
  #
50
130
  # @return [Expr]
131
+ #
132
+ # @example
133
+ # colors = Polars::DataFrame.new(
134
+ # {
135
+ # "name" => ["black", "yellow", "blue"],
136
+ # "encoded" => ["000000".b, "ffff00".b, "0000ff".b]
137
+ # }
138
+ # )
139
+ # colors.with_columns(
140
+ # Polars.col("encoded").bin.decode("hex").alias("code")
141
+ # )
142
+ # # =>
143
+ # # shape: (3, 3)
144
+ # # ┌────────┬───────────┬─────────────────┐
145
+ # # │ name ┆ encoded ┆ code │
146
+ # # │ --- ┆ --- ┆ --- │
147
+ # # │ str ┆ binary ┆ binary │
148
+ # # ╞════════╪═══════════╪═════════════════╡
149
+ # # │ black ┆ b"000000" ┆ b"\x00\x00\x00" │
150
+ # # │ yellow ┆ b"ffff00" ┆ b"\xff\xff\x00" │
151
+ # # │ blue ┆ b"0000ff" ┆ b"\x00\x00\xff" │
152
+ # # └────────┴───────────┴─────────────────┘
51
153
  def decode(encoding, strict: true)
52
154
  if encoding == "hex"
53
155
  Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
@@ -64,6 +166,28 @@ module Polars
64
166
  # The encoding to use.
65
167
  #
66
168
  # @return [Expr]
169
+ #
170
+ # @example
171
+ # colors = Polars::DataFrame.new(
172
+ # {
173
+ # "color" => ["black", "yellow", "blue"],
174
+ # "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b]
175
+ # }
176
+ # )
177
+ # colors.with_columns(
178
+ # Polars.col("code").bin.encode("hex").alias("encoded")
179
+ # )
180
+ # # =>
181
+ # # shape: (3, 3)
182
+ # # ┌────────┬─────────────────┬─────────┐
183
+ # # │ color ┆ code ┆ encoded │
184
+ # # │ --- ┆ --- ┆ --- │
185
+ # # │ str ┆ binary ┆ str │
186
+ # # ╞════════╪═════════════════╪═════════╡
187
+ # # │ black ┆ b"\x00\x00\x00" ┆ 000000 │
188
+ # # │ yellow ┆ b"\xff\xff\x00" ┆ ffff00 │
189
+ # # │ blue ┆ b"\x00\x00\xff" ┆ 0000ff │
190
+ # # └────────┴─────────────────┴─────────┘
67
191
  def encode(encoding)
68
192
  if encoding == "hex"
69
193
  Utils.wrap_expr(_rbexpr.binary_hex_encode)
@@ -12,31 +12,67 @@ module Polars
12
12
 
13
13
  # Check if binaries in Series contain a binary substring.
14
14
  #
15
- # @param lit [String]
15
+ # @param literal [String]
16
16
  # The binary substring to look for
17
17
  #
18
18
  # @return [Series]
19
- def contains(lit)
19
+ #
20
+ # @example
21
+ # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
22
+ # s.bin.contains("\xff".b)
23
+ # # =>
24
+ # # shape: (3,)
25
+ # # Series: 'colors' [bool]
26
+ # # [
27
+ # # false
28
+ # # true
29
+ # # true
30
+ # # ]
31
+ def contains(literal)
20
32
  super
21
33
  end
22
34
 
23
35
  # Check if string values end with a binary substring.
24
36
  #
25
- # @param sub [String]
37
+ # @param suffix [String]
26
38
  # Suffix substring.
27
39
  #
28
40
  # @return [Series]
29
- def ends_with(sub)
41
+ #
42
+ # @example
43
+ # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
44
+ # s.bin.ends_with("\x00".b)
45
+ # # =>
46
+ # # shape: (3,)
47
+ # # Series: 'colors' [bool]
48
+ # # [
49
+ # # true
50
+ # # true
51
+ # # false
52
+ # # ]
53
+ def ends_with(suffix)
30
54
  super
31
55
  end
32
56
 
33
57
  # Check if values start with a binary substring.
34
58
  #
35
- # @param sub [String]
59
+ # @param prefix [String]
36
60
  # Prefix substring.
37
61
  #
38
62
  # @return [Series]
39
- def starts_with(sub)
63
+ #
64
+ # @example
65
+ # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
66
+ # s.bin.starts_with("\x00".b)
67
+ # # =>
68
+ # # shape: (3,)
69
+ # # Series: 'colors' [bool]
70
+ # # [
71
+ # # true
72
+ # # false
73
+ # # true
74
+ # # ]
75
+ def starts_with(prefix)
40
76
  super
41
77
  end
42
78
 
@@ -49,6 +85,42 @@ module Polars
49
85
  # otherwise mask out with a null value.
50
86
  #
51
87
  # @return [Series]
88
+ #
89
+ # @example Decode values using hexadecimal encoding.
90
+ # s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "0000ff".b])
91
+ # s.bin.decode("hex")
92
+ # # =>
93
+ # # shape: (3,)
94
+ # # Series: 'colors' [binary]
95
+ # # [
96
+ # # b"\x00\x00\x00"
97
+ # # b"\xff\xff\x00"
98
+ # # b"\x00\x00\xff"
99
+ # # ]
100
+ #
101
+ # @example Decode values using Base64 encoding.
102
+ # s = Polars::Series.new("colors", ["AAAA".b, "//8A".b, "AAD/".b])
103
+ # s.bin.decode("base64")
104
+ # # =>
105
+ # # shape: (3,)
106
+ # # Series: 'colors' [binary]
107
+ # # [
108
+ # # b"\x00\x00\x00"
109
+ # # b"\xff\xff\x00"
110
+ # # b"\x00\x00\xff"
111
+ # # ]
112
+ #
113
+ # @example Set `strict=False` to set invalid values to null instead of raising an error.
114
+ # s = Polars::Series.new("colors", ["000000".b, "ffff00".b, "invalid_value".b])
115
+ # s.bin.decode("hex", strict: false)
116
+ # # =>
117
+ # # shape: (3,)
118
+ # # Series: 'colors' [binary]
119
+ # # [
120
+ # # b"\x00\x00\x00"
121
+ # # b"\xff\xff\x00"
122
+ # # null
123
+ # # ]
52
124
  def decode(encoding, strict: true)
53
125
  super
54
126
  end
@@ -59,6 +131,29 @@ module Polars
59
131
  # The encoding to use.
60
132
  #
61
133
  # @return [Series]
134
+ #
135
+ # @example Encode values using hexadecimal encoding.
136
+ # s = Polars::Series.new("colors", ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b])
137
+ # s.bin.encode("hex")
138
+ # # =>
139
+ # # shape: (3,)
140
+ # # Series: 'colors' [str]
141
+ # # [
142
+ # # "000000"
143
+ # # "ffff00"
144
+ # # "0000ff"
145
+ # # ]
146
+ #
147
+ # @example Encode values using Base64 encoding.
148
+ # s.bin.encode("base64")
149
+ # # =>
150
+ # # shape: (3,)
151
+ # # Series: 'colors' [str]
152
+ # # [
153
+ # # "AAAA"
154
+ # # "//8A"
155
+ # # "AAD/"
156
+ # # ]
62
157
  def encode(encoding)
63
158
  super
64
159
  end
data/lib/polars/config.rb CHANGED
@@ -527,4 +527,8 @@ module Polars
527
527
  self
528
528
  end
529
529
  end
530
+
531
+ def self.config(...)
532
+ Config.new(...)
533
+ end
530
534
  end