polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
data/lib/polars.rb CHANGED
@@ -26,8 +26,8 @@ require_relative "polars/catalog/unity/namespace_info"
26
26
  require_relative "polars/catalog/unity/table_info"
27
27
  require_relative "polars/config"
28
28
  require_relative "polars/convert"
29
- require_relative "polars/plot"
30
29
  require_relative "polars/data_frame"
30
+ require_relative "polars/data_frame_plot"
31
31
  require_relative "polars/data_types"
32
32
  require_relative "polars/data_type_expr"
33
33
  require_relative "polars/data_type_group"
@@ -41,6 +41,7 @@ require_relative "polars/functions/business"
41
41
  require_relative "polars/functions/col"
42
42
  require_relative "polars/functions/datatype"
43
43
  require_relative "polars/functions/eager"
44
+ require_relative "polars/functions/escape_regex"
44
45
  require_relative "polars/functions/lazy"
45
46
  require_relative "polars/functions/len"
46
47
  require_relative "polars/functions/lit"
@@ -52,10 +53,13 @@ require_relative "polars/functions/aggregation/vertical"
52
53
  require_relative "polars/functions/range/date_range"
53
54
  require_relative "polars/functions/range/datetime_range"
54
55
  require_relative "polars/functions/range/int_range"
56
+ require_relative "polars/functions/range/linear_space"
55
57
  require_relative "polars/functions/range/time_range"
56
58
  require_relative "polars/group_by"
57
59
  require_relative "polars/iceberg_dataset"
60
+ require_relative "polars/in_process_query"
58
61
  require_relative "polars/io/avro"
62
+ require_relative "polars/io/cloud"
59
63
  require_relative "polars/io/csv"
60
64
  require_relative "polars/io/database"
61
65
  require_relative "polars/io/delta"
@@ -79,6 +83,7 @@ require_relative "polars/schema"
79
83
  require_relative "polars/selector"
80
84
  require_relative "polars/selectors"
81
85
  require_relative "polars/series"
86
+ require_relative "polars/series_plot"
82
87
  require_relative "polars/slice"
83
88
  require_relative "polars/sql_context"
84
89
  require_relative "polars/string_cache"
@@ -89,9 +94,14 @@ require_relative "polars/struct_name_space"
89
94
  require_relative "polars/testing"
90
95
  require_relative "polars/utils"
91
96
  require_relative "polars/utils/constants"
97
+ require_relative "polars/utils/construction/data_frame"
98
+ require_relative "polars/utils/construction/series"
99
+ require_relative "polars/utils/construction/utils"
92
100
  require_relative "polars/utils/convert"
101
+ require_relative "polars/utils/deprecation"
93
102
  require_relative "polars/utils/parse"
94
103
  require_relative "polars/utils/serde"
104
+ require_relative "polars/utils/unstable"
95
105
  require_relative "polars/utils/various"
96
106
  require_relative "polars/utils/wrap"
97
107
  require_relative "polars/version"
@@ -105,6 +115,9 @@ module Polars
105
115
  # @private
106
116
  F = self
107
117
 
118
+ # @private
119
+ NO_DEFAULT = Object.new
120
+
108
121
  # @private
109
122
  N_INFER_DEFAULT = 100
110
123
 
@@ -121,4 +134,37 @@ module Polars
121
134
  def self.thread_pool_size
122
135
  Plr.thread_pool_size
123
136
  end
137
+
138
+ # Return the data type used for Polars indexing.
139
+ #
140
+ # @return [Object]
141
+ #
142
+ # @example
143
+ # Polars.get_index_type
144
+ # # => Polars::UInt32
145
+ def self.get_index_type
146
+ Plr.get_index_type
147
+ end
148
+
149
+ # Return detailed Polars build information.
150
+ #
151
+ # @return [Hash]
152
+ #
153
+ # @example
154
+ # Polars.build_info
155
+ def self.build_info
156
+ {"version" => VERSION}
157
+ end
158
+
159
+ # Print out the version of Polars and its optional dependencies.
160
+ #
161
+ # @return [nil]
162
+ def self.show_versions
163
+ puts "--------Version info---------"
164
+ puts "Polars: #{VERSION}"
165
+ puts "Index type: #{get_index_type}"
166
+ puts "Platform: #{RUBY_PLATFORM}"
167
+ puts "Ruby: #{RUBY_VERSION}"
168
+ nil
169
+ end
124
170
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.23.0
4
+ version: 0.24.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -51,8 +51,9 @@ files:
51
51
  - README.md
52
52
  - ext/polars/Cargo.toml
53
53
  - ext/polars/extconf.rb
54
- - ext/polars/src/allocator.rs
55
54
  - ext/polars/src/batched_csv.rs
55
+ - ext/polars/src/c_api/allocator.rs
56
+ - ext/polars/src/c_api/mod.rs
56
57
  - ext/polars/src/catalog/mod.rs
57
58
  - ext/polars/src/catalog/unity.rs
58
59
  - ext/polars/src/conversion/any_value.rs
@@ -96,15 +97,19 @@ files:
96
97
  - ext/polars/src/functions/random.rs
97
98
  - ext/polars/src/functions/range.rs
98
99
  - ext/polars/src/functions/string_cache.rs
100
+ - ext/polars/src/functions/strings.rs
101
+ - ext/polars/src/functions/utils.rs
99
102
  - ext/polars/src/functions/whenthen.rs
100
103
  - ext/polars/src/interop/arrow/mod.rs
101
- - ext/polars/src/interop/arrow/to_ruby.rs
104
+ - ext/polars/src/interop/arrow/to_rb.rs
105
+ - ext/polars/src/interop/arrow/to_rust.rs
102
106
  - ext/polars/src/interop/mod.rs
103
107
  - ext/polars/src/interop/numo/mod.rs
104
108
  - ext/polars/src/interop/numo/numo_rs.rs
105
109
  - ext/polars/src/interop/numo/to_numo_df.rs
106
110
  - ext/polars/src/interop/numo/to_numo_series.rs
107
111
  - ext/polars/src/io/mod.rs
112
+ - ext/polars/src/lazyframe/exitable.rs
108
113
  - ext/polars/src/lazyframe/general.rs
109
114
  - ext/polars/src/lazyframe/mod.rs
110
115
  - ext/polars/src/lazyframe/optflags.rs
@@ -131,6 +136,10 @@ files:
131
136
  - ext/polars/src/series/mod.rs
132
137
  - ext/polars/src/series/scatter.rs
133
138
  - ext/polars/src/sql.rs
139
+ - ext/polars/src/testing/frame.rs
140
+ - ext/polars/src/testing/mod.rs
141
+ - ext/polars/src/testing/series.rs
142
+ - ext/polars/src/timeout.rs
134
143
  - ext/polars/src/utils.rs
135
144
  - lib/polars-df.rb
136
145
  - lib/polars.rb
@@ -149,6 +158,7 @@ files:
149
158
  - lib/polars/config.rb
150
159
  - lib/polars/convert.rb
151
160
  - lib/polars/data_frame.rb
161
+ - lib/polars/data_frame_plot.rb
152
162
  - lib/polars/data_type_expr.rb
153
163
  - lib/polars/data_type_group.rb
154
164
  - lib/polars/data_types.rb
@@ -165,6 +175,7 @@ files:
165
175
  - lib/polars/functions/col.rb
166
176
  - lib/polars/functions/datatype.rb
167
177
  - lib/polars/functions/eager.rb
178
+ - lib/polars/functions/escape_regex.rb
168
179
  - lib/polars/functions/lazy.rb
169
180
  - lib/polars/functions/len.rb
170
181
  - lib/polars/functions/lit.rb
@@ -172,12 +183,15 @@ files:
172
183
  - lib/polars/functions/range/date_range.rb
173
184
  - lib/polars/functions/range/datetime_range.rb
174
185
  - lib/polars/functions/range/int_range.rb
186
+ - lib/polars/functions/range/linear_space.rb
175
187
  - lib/polars/functions/range/time_range.rb
176
188
  - lib/polars/functions/repeat.rb
177
189
  - lib/polars/functions/whenthen.rb
178
190
  - lib/polars/group_by.rb
179
191
  - lib/polars/iceberg_dataset.rb
192
+ - lib/polars/in_process_query.rb
180
193
  - lib/polars/io/avro.rb
194
+ - lib/polars/io/cloud.rb
181
195
  - lib/polars/io/csv.rb
182
196
  - lib/polars/io/database.rb
183
197
  - lib/polars/io/delta.rb
@@ -194,7 +208,6 @@ files:
194
208
  - lib/polars/list_name_space.rb
195
209
  - lib/polars/meta_expr.rb
196
210
  - lib/polars/name_expr.rb
197
- - lib/polars/plot.rb
198
211
  - lib/polars/query_opt_flags.rb
199
212
  - lib/polars/rolling_group_by.rb
200
213
  - lib/polars/scan_cast_options.rb
@@ -202,6 +215,7 @@ files:
202
215
  - lib/polars/selector.rb
203
216
  - lib/polars/selectors.rb
204
217
  - lib/polars/series.rb
218
+ - lib/polars/series_plot.rb
205
219
  - lib/polars/slice.rb
206
220
  - lib/polars/sql_context.rb
207
221
  - lib/polars/string_cache.rb
@@ -212,9 +226,14 @@ files:
212
226
  - lib/polars/testing.rb
213
227
  - lib/polars/utils.rb
214
228
  - lib/polars/utils/constants.rb
229
+ - lib/polars/utils/construction/data_frame.rb
230
+ - lib/polars/utils/construction/series.rb
231
+ - lib/polars/utils/construction/utils.rb
215
232
  - lib/polars/utils/convert.rb
233
+ - lib/polars/utils/deprecation.rb
216
234
  - lib/polars/utils/parse.rb
217
235
  - lib/polars/utils/serde.rb
236
+ - lib/polars/utils/unstable.rb
218
237
  - lib/polars/utils/various.rb
219
238
  - lib/polars/utils/wrap.rb
220
239
  - lib/polars/version.rb
@@ -230,14 +249,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
230
249
  requirements:
231
250
  - - ">="
232
251
  - !ruby/object:Gem::Version
233
- version: '3.2'
252
+ version: '3.3'
234
253
  required_rubygems_version: !ruby/object:Gem::Requirement
235
254
  requirements:
236
255
  - - ">="
237
256
  - !ruby/object:Gem::Version
238
257
  version: '0'
239
258
  requirements: []
240
- rubygems_version: 3.6.9
259
+ rubygems_version: 4.0.3
241
260
  specification_version: 4
242
261
  summary: Blazingly fast DataFrames for Ruby
243
262
  test_files: []
@@ -1,13 +0,0 @@
1
- #[cfg(target_os = "linux")]
2
- use jemallocator::Jemalloc;
3
-
4
- #[cfg(not(any(target_os = "linux", target_os = "windows")))]
5
- use mimalloc::MiMalloc;
6
-
7
- #[global_allocator]
8
- #[cfg(target_os = "linux")]
9
- static ALLOC: Jemalloc = Jemalloc;
10
-
11
- #[global_allocator]
12
- #[cfg(not(any(target_os = "linux", target_os = "windows")))]
13
- static ALLOC: MiMalloc = MiMalloc;
data/lib/polars/plot.rb DELETED
@@ -1,109 +0,0 @@
1
- module Polars
2
- module Plot
3
- # Plot data.
4
- #
5
- # @return [Vega::LiteChart]
6
- def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
7
- require "vega"
8
-
9
- raise ArgumentError, "Must specify columns" if columns.size != 2 && (!x || !y)
10
- x ||= columns[0]
11
- y ||= columns[1]
12
- type ||= begin
13
- if self[x].numeric? && self[y].numeric?
14
- "scatter"
15
- elsif self[x].utf8? && self[y].numeric?
16
- "column"
17
- elsif (self[x].dtype == Date || self[x].dtype.is_a?(Datetime)) && self[y].numeric?
18
- "line"
19
- else
20
- raise "Cannot determine type. Use the type option."
21
- end
22
- end
23
- df = self[(group.nil? ? [x, y] : [x, y, group]).map(&:to_s).uniq]
24
- data = df.rows(named: true)
25
-
26
- case type
27
- when "line", "area"
28
- x_type =
29
- if df[x].numeric?
30
- "quantitative"
31
- elsif df[x].datelike?
32
- "temporal"
33
- else
34
- "nominal"
35
- end
36
-
37
- scale = x_type == "temporal" ? {type: "utc"} : {}
38
- encoding = {
39
- x: {field: x, type: x_type, scale: scale},
40
- y: {field: y, type: "quantitative"}
41
- }
42
- encoding[:color] = {field: group} if group
43
-
44
- Vega.lite
45
- .data(data)
46
- .mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
47
- .encoding(encoding)
48
- .config(axis: {labelFontSize: 12})
49
- when "pie"
50
- raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
51
-
52
- Vega.lite
53
- .data(data)
54
- .mark(type: "arc", tooltip: true)
55
- .encoding(
56
- color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
57
- theta: {field: y, type: "quantitative"}
58
- )
59
- .view(stroke: nil)
60
- when "column"
61
- encoding = {
62
- x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
63
- y: {field: y, type: "quantitative"}
64
- }
65
- if group
66
- encoding[:color] = {field: group}
67
- encoding[:xOffset] = {field: group} unless stacked
68
- end
69
-
70
- Vega.lite
71
- .data(data)
72
- .mark(type: "bar", tooltip: true)
73
- .encoding(encoding)
74
- .config(axis: {labelFontSize: 12})
75
- when "bar"
76
- encoding = {
77
- # TODO determine label angle
78
- y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
79
- x: {field: y, type: "quantitative"}
80
- }
81
- if group
82
- encoding[:color] = {field: group}
83
- encoding[:yOffset] = {field: group} unless stacked
84
- end
85
-
86
- Vega.lite
87
- .data(data)
88
- .mark(type: "bar", tooltip: true)
89
- .encoding(encoding)
90
- .config(axis: {labelFontSize: 12})
91
- when "scatter"
92
- encoding = {
93
- x: {field: x, type: "quantitative", scale: {zero: false}},
94
- y: {field: y, type: "quantitative", scale: {zero: false}},
95
- size: {value: 60}
96
- }
97
- encoding[:color] = {field: group} if group
98
-
99
- Vega.lite
100
- .data(data)
101
- .mark(type: "circle", tooltip: true)
102
- .encoding(encoding)
103
- .config(axis: {labelFontSize: 12})
104
- else
105
- raise ArgumentError, "Invalid type: #{type}"
106
- end
107
- end
108
- end
109
- end