polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -0,0 +1,364 @@
1
+ module Polars
2
+ module Utils
3
+ def self.sequence_to_rbseries(name, values, dtype: nil, strict: true, nan_to_null: false)
4
+ ruby_dtype = nil
5
+
6
+ if values.is_a?(Range)
7
+ if values.begin.is_a?(::String)
8
+ values = values.to_a
9
+ else
10
+ return range_to_series(name, values, dtype: dtype)._s
11
+ end
12
+ end
13
+
14
+ if values.length == 0 && dtype.nil?
15
+ dtype = Null
16
+ elsif [List, Array].include?(dtype)
17
+ ruby_dtype = ::Array
18
+ end
19
+
20
+ rb_temporal_types = [::Date, ::DateTime, ::Time]
21
+ rb_temporal_types << ActiveSupport::TimeWithZone if defined?(ActiveSupport::TimeWithZone)
22
+
23
+ value = get_first_non_none(values)
24
+ if !value.nil?
25
+ if value.is_a?(Hash) && dtype != Object
26
+ return DataFrame.new(values).to_struct(name)._s
27
+ end
28
+ end
29
+
30
+ if !dtype.nil? && is_polars_dtype(dtype) && !dtype.nested? && dtype != Unknown && ruby_dtype.nil?
31
+ constructor = polars_type_to_constructor(dtype)
32
+ rbseries = _construct_series_with_fallbacks(
33
+ constructor, name, values, dtype, strict: strict
34
+ )
35
+
36
+ if [Date, Datetime, Duration, Time, Boolean, Categorical, Enum].include?(dtype) || dtype.is_a?(Decimal) || dtype.is_a?(Categorical)
37
+ if rbseries.dtype != dtype
38
+ rbseries = rbseries.cast(dtype, true, false)
39
+ end
40
+ end
41
+
42
+ # Uninstanced Decimal is a bit special and has various inference paths
43
+ if dtype == Decimal
44
+ if rbseries.dtype == String
45
+ rbseries = rbseries.str_to_decimal_infer(0)
46
+ elsif rbseries.dtype.float?
47
+ # Go through string so we infer an appropriate scale.
48
+ rbseries = rbseries.cast(
49
+ String, strict, false
50
+ ).str_to_decimal_infer(0)
51
+ elsif rbseries.dtype.integer? || rbseries.dtype == Null
52
+ rbseries = rbseries.cast(
53
+ Decimal.new(nil, 0), strict, false
54
+ )
55
+ elsif !rbseries.dtype.is_a?(Decimal)
56
+ msg = "can't convert #{rbseries.dtype} to Decimal"
57
+ raise TypeError, msg
58
+ end
59
+ end
60
+
61
+ return rbseries
62
+
63
+ elsif dtype == Struct
64
+ struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
65
+ empty = {}
66
+
67
+ data = []
68
+ invalid = []
69
+ values.each_with_index do |v, i|
70
+ if v.nil?
71
+ invalid << i
72
+ data << empty
73
+ else
74
+ data << v
75
+ end
76
+ end
77
+
78
+ return sequence_to_rbdf(
79
+ values.map { |v| v.nil? ? empty : v },
80
+ schema: struct_schema,
81
+ orient: "row",
82
+ ).to_struct(name, invalid)
83
+ end
84
+
85
+ if ruby_dtype.nil?
86
+ if value.nil?
87
+ constructor = polars_type_to_constructor(Null)
88
+ return constructor.(name, values, strict)
89
+ end
90
+
91
+ ruby_dtype = value.class
92
+ end
93
+
94
+ # temporal branch
95
+ if rb_temporal_types.include?(ruby_dtype)
96
+ if dtype.nil?
97
+ dtype = parse_into_dtype(ruby_dtype)
98
+ elsif rb_temporal_types.include?(dtype)
99
+ dtype = parse_into_dtype(dtype)
100
+ end
101
+
102
+ values_dtype = value.nil? ? nil : try_parse_into_dtype(value.class)
103
+ if !values_dtype.nil? && values_dtype.float?
104
+ msg = "'float' object cannot be interpreted as a #{ruby_dtype.name.inspect}"
105
+ raise TypeError, msg
106
+ end
107
+
108
+ rb_series = RbSeries.new_from_any_values(name, values, strict)
109
+
110
+ time_unit = dtype.respond_to?(:time_unit) ? dtype.time_unit : nil
111
+ time_zone = dtype.respond_to?(:time_zone) ? dtype.time_zone : nil
112
+
113
+ if dtype.temporal? && values_dtype == String && dtype != Duration
114
+ s = wrap_s(rb_series).str.strptime(dtype, strict: strict)
115
+ elsif !time_unit.nil? && values_dtype != Date
116
+ s = wrap_s(rb_series).dt.cast_time_unit(time_unit)
117
+ else
118
+ s = wrap_s(rb_series)
119
+ end
120
+
121
+ if dtype == Datetime && !time_zone.nil?
122
+ return s.dt.convert_time_zone(time_zone)._s
123
+ end
124
+ s._s
125
+
126
+ elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
127
+ raise Todo
128
+
129
+ elsif ruby_dtype == ::Array
130
+ if dtype.nil?
131
+ RbSeries.new_from_any_values(name, values, strict)
132
+ elsif dtype.is_a?(Object)
133
+ RbSeries.new_object(name, values, strict)
134
+ else
135
+ inner_dtype = dtype.respond_to?(:inner) ? dtype.inner : nil
136
+ if !inner_dtype.nil?
137
+ rbseries_list =
138
+ values.map do |value|
139
+ if value.nil?
140
+ nil
141
+ else
142
+ sequence_to_rbseries(
143
+ "",
144
+ value,
145
+ dtype: inner_dtype,
146
+ strict: strict,
147
+ nan_to_null: nan_to_null,
148
+ )
149
+ end
150
+ end
151
+ rbseries = RbSeries.new_series_list(name, rbseries_list, strict)
152
+ else
153
+ # panics in Python
154
+ raise Todo if dtype.eql?(Array)
155
+
156
+ rbseries = RbSeries.new_from_any_values_and_dtype(
157
+ name, values, dtype, strict
158
+ )
159
+ end
160
+ if dtype != rbseries.dtype
161
+ rbseries = rbseries.cast(dtype, false, false)
162
+ end
163
+ rbseries
164
+ end
165
+
166
+ elsif ruby_dtype == Series
167
+ RbSeries.new_series_list(
168
+ name, values.map { |v| !v.nil? ? v._s : v }, strict
169
+ )
170
+
171
+ elsif ruby_dtype == RbSeries
172
+ RbSeries.new_series_list(name, values, strict)
173
+ else
174
+ constructor =
175
+ if value.is_a?(::String)
176
+ if value.encoding == Encoding::UTF_8
177
+ RbSeries.method(:new_str)
178
+ else
179
+ RbSeries.method(:new_binary)
180
+ end
181
+ elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
182
+ # TODO improve performance
183
+ RbSeries.method(:new_opt_f64)
184
+ else
185
+ rb_type_to_constructor(value.class)
186
+ end
187
+
188
+ _construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
189
+ end
190
+ end
191
+
192
+ def self._construct_series_with_fallbacks(constructor, name, values, dtype, strict:)
193
+ begin
194
+ constructor.(name, values, strict)
195
+ rescue
196
+ if dtype.nil?
197
+ RbSeries.new_from_any_values(name, values, strict)
198
+ else
199
+ RbSeries.new_from_any_values_and_dtype(name, values, dtype, strict)
200
+ end
201
+ end
202
+ end
203
+
204
+ def self.numo_to_rbseries(name, values, strict: true, nan_to_null: false)
205
+ # not needed yet
206
+ # if !values.contiguous?
207
+ # end
208
+
209
+ if values.shape.length == 1
210
+ values, dtype = numo_values_and_dtype(values)
211
+ constructor = numo_type_to_constructor(dtype)
212
+ constructor.(
213
+ name, values.to_a, [Numo::SFloat, Numo::DFloat].include?(dtype) ? nan_to_null : strict
214
+ )
215
+ else
216
+ original_shape = values.shape
217
+ values_1d = values.reshape(original_shape.inject(&:*))
218
+
219
+ rb_s = numo_to_rbseries(
220
+ name,
221
+ values_1d,
222
+ strict: strict,
223
+ nan_to_null: nan_to_null
224
+ )
225
+ Utils.wrap_s(rb_s).reshape(original_shape)._s
226
+ end
227
+ end
228
+
229
+ def self.series_to_rbseries(name, values, dtype: nil, strict: true)
230
+ s = values.clone
231
+ if !dtype.nil? && dtype != s.dtype
232
+ s = s.cast(dtype, strict: strict)
233
+ end
234
+ if !name.nil?
235
+ s = s.alias(name)
236
+ end
237
+ s._s
238
+ end
239
+
240
+ def self.dataframe_to_rbseries(
241
+ name,
242
+ values,
243
+ dtype: nil,
244
+ strict: true
245
+ )
246
+ if values.width > 1
247
+ name ||= ""
248
+ s = values.to_struct(name)
249
+ elsif values.width == 1
250
+ s = values.to_series
251
+ if !name.nil?
252
+ s = s.alias(name)
253
+ end
254
+ else
255
+ msg = "cannot initialize Series from DataFrame without any columns"
256
+ raise TypeError, msg
257
+ end
258
+
259
+ if !dtype.nil? && dtype != s.dtype
260
+ s = s.cast(dtype, strict: strict)
261
+ end
262
+
263
+ s._s
264
+ end
265
+
266
+ # TODO move rest
267
+
268
+ POLARS_TYPE_TO_CONSTRUCTOR = {
269
+ Float32 => RbSeries.method(:new_opt_f32),
270
+ Float64 => RbSeries.method(:new_opt_f64),
271
+ Int8 => RbSeries.method(:new_opt_i8),
272
+ Int16 => RbSeries.method(:new_opt_i16),
273
+ Int32 => RbSeries.method(:new_opt_i32),
274
+ Int64 => RbSeries.method(:new_opt_i64),
275
+ Int128 => RbSeries.method(:new_opt_i128),
276
+ UInt8 => RbSeries.method(:new_opt_u8),
277
+ UInt16 => RbSeries.method(:new_opt_u16),
278
+ UInt32 => RbSeries.method(:new_opt_u32),
279
+ UInt64 => RbSeries.method(:new_opt_u64),
280
+ UInt128 => RbSeries.method(:new_opt_u128),
281
+ Decimal => RbSeries.method(:new_decimal),
282
+ Date => RbSeries.method(:new_from_any_values),
283
+ Datetime => RbSeries.method(:new_from_any_values),
284
+ Duration => RbSeries.method(:new_from_any_values),
285
+ Time => RbSeries.method(:new_from_any_values),
286
+ Boolean => RbSeries.method(:new_opt_bool),
287
+ Utf8 => RbSeries.method(:new_str),
288
+ Object => RbSeries.method(:new_object),
289
+ Categorical => RbSeries.method(:new_str),
290
+ Enum => RbSeries.method(:new_str),
291
+ Binary => RbSeries.method(:new_binary),
292
+ Null => RbSeries.method(:new_null)
293
+ }
294
+
295
+ SYM_TYPE_TO_CONSTRUCTOR = {
296
+ f32: RbSeries.method(:new_opt_f32),
297
+ f64: RbSeries.method(:new_opt_f64),
298
+ i8: RbSeries.method(:new_opt_i8),
299
+ i16: RbSeries.method(:new_opt_i16),
300
+ i32: RbSeries.method(:new_opt_i32),
301
+ i64: RbSeries.method(:new_opt_i64),
302
+ i128: RbSeries.method(:new_opt_i128),
303
+ u8: RbSeries.method(:new_opt_u8),
304
+ u16: RbSeries.method(:new_opt_u16),
305
+ u32: RbSeries.method(:new_opt_u32),
306
+ u64: RbSeries.method(:new_opt_u64),
307
+ u128: RbSeries.method(:new_opt_u128),
308
+ bool: RbSeries.method(:new_opt_bool),
309
+ str: RbSeries.method(:new_str)
310
+ }
311
+
312
+ def self.polars_type_to_constructor(dtype)
313
+ if dtype.is_a?(Array)
314
+ lambda do |name, values, strict|
315
+ RbSeries.new_array(dtype.width, dtype.inner, name, values, strict)
316
+ end
317
+ elsif dtype.is_a?(Class) && dtype < DataType
318
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
319
+ elsif dtype.is_a?(DataType)
320
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.class)
321
+ else
322
+ SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
323
+ end
324
+ rescue KeyError
325
+ raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
326
+ end
327
+
328
+ RB_TYPE_TO_CONSTRUCTOR = {
329
+ Float => RbSeries.method(:new_opt_f64),
330
+ Integer => RbSeries.method(:new_opt_i64),
331
+ TrueClass => RbSeries.method(:new_opt_bool),
332
+ FalseClass => RbSeries.method(:new_opt_bool),
333
+ BigDecimal => RbSeries.method(:new_decimal),
334
+ NilClass => RbSeries.method(:new_null)
335
+ }
336
+
337
+ def self.rb_type_to_constructor(dtype)
338
+ RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
339
+ rescue KeyError
340
+ RbSeries.method(:new_object)
341
+ end
342
+
343
+ def self.numo_values_and_dtype(values)
344
+ [values, values.class]
345
+ end
346
+
347
+ def self.numo_type_to_constructor(dtype)
348
+ {
349
+ Numo::Float32 => RbSeries.method(:new_opt_f32),
350
+ Numo::Float64 => RbSeries.method(:new_opt_f64),
351
+ Numo::Int8 => RbSeries.method(:new_opt_i8),
352
+ Numo::Int16 => RbSeries.method(:new_opt_i16),
353
+ Numo::Int32 => RbSeries.method(:new_opt_i32),
354
+ Numo::Int64 => RbSeries.method(:new_opt_i64),
355
+ Numo::UInt8 => RbSeries.method(:new_opt_u8),
356
+ Numo::UInt16 => RbSeries.method(:new_opt_u16),
357
+ Numo::UInt32 => RbSeries.method(:new_opt_u32),
358
+ Numo::UInt64 => RbSeries.method(:new_opt_u64)
359
+ }.fetch(dtype)
360
+ rescue KeyError
361
+ RbSeries.method(:new_object)
362
+ end
363
+ end
364
+ end
@@ -0,0 +1,9 @@
1
+ module Polars
2
+ module Utils
3
+ def self.get_first_non_none(values)
4
+ if !values.nil?
5
+ values.find { |v| !v.nil? }
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ module Polars
2
+ module Utils
3
+ def self.issue_deprecation_warning(message)
4
+ warn message
5
+ end
6
+
7
+ def self.deprecated(message)
8
+ issue_deprecation_warning(message)
9
+ end
10
+ end
11
+ end
@@ -8,10 +8,15 @@ module Polars
8
8
  end
9
9
 
10
10
  if file.nil?
11
- return serialize_to_bytes.call
11
+ serialize_to_bytes.call
12
+ elsif pathlike?(file)
13
+ file = normalize_filepath(file)
14
+ serializer.(file)
15
+ nil
16
+ else
17
+ serializer.(file)
18
+ nil
12
19
  end
13
-
14
- raise Todo
15
20
  end
16
21
  end
17
22
  end
@@ -0,0 +1,19 @@
1
+ module Polars
2
+ module Utils
3
+ def self.issue_unstable_warning(message = nil)
4
+ warnings_enabled = ENV.fetch("POLARS_WARN_UNSTABLE", 0).to_i != 0
5
+ if !warnings_enabled
6
+ return
7
+ end
8
+
9
+ if message.nil?
10
+ message = "this functionality is considered unstable."
11
+ end
12
+ message += (
13
+ " It may be changed at any point without it being considered a breaking change."
14
+ )
15
+
16
+ warn message
17
+ end
18
+ end
19
+ end
@@ -39,6 +39,20 @@ module Polars
39
39
  end
40
40
  end
41
41
 
42
+ def self.range_to_series(name, rng, dtype: nil)
43
+ dtype ||= Int64
44
+ if dtype.integer?
45
+ range = F.int_range(
46
+ rng.first, rng.last + (rng.exclude_end? ? 0 : 1), step: 1, dtype: dtype, eager: true
47
+ )
48
+ else
49
+ range = F.int_range(
50
+ rng.first, rng.last + (rng.exclude_end? ? 0 : 1), step: 1, eager: true
51
+ ).cast(dtype)
52
+ end
53
+ range.alias(name)
54
+ end
55
+
42
56
  def self.arrlen(obj)
43
57
  if obj.is_a?(Range)
44
58
  # size only works for numeric ranges
@@ -97,5 +111,50 @@ module Polars
97
111
  raise TypeError, msg
98
112
  end
99
113
  end
114
+
115
+ def self._update_columns(df, new_columns)
116
+ if df.width > new_columns.length
117
+ cols = df.columns
118
+ new_columns.each_with_index do |name, i|
119
+ cols[i] = name
120
+ end
121
+ new_columns = cols
122
+ end
123
+ df.columns = new_columns.to_a
124
+ df
125
+ end
126
+
127
+ def self.parse_percentiles(
128
+ percentiles, inject_median: false
129
+ )
130
+ if percentiles.is_a?(Float)
131
+ percentiles = [percentiles]
132
+ elsif percentiles.nil?
133
+ percentiles = []
134
+ end
135
+ if !percentiles.all? { |p| p >= 0 && p <= 1 }
136
+ msg = "`percentiles` must all be in the range [0, 1]"
137
+ raise ArgumentError, msg
138
+ end
139
+
140
+ sub_50_percentiles = percentiles.select { |p| p < 0.5 }.sort
141
+ at_or_above_50_percentiles = percentiles.select { |p| p >= 0.5 }.sort
142
+
143
+ if inject_median && (!at_or_above_50_percentiles || at_or_above_50_percentiles[0] != 0.5)
144
+ at_or_above_50_percentiles = [0.5, *at_or_above_50_percentiles]
145
+ end
146
+
147
+ [*sub_50_percentiles, *at_or_above_50_percentiles]
148
+ end
149
+
150
+ def self.display_dot_graph(
151
+ dot:,
152
+ show: true,
153
+ output_path: nil,
154
+ raw_output: false,
155
+ figsize: [16.0, 12.0]
156
+ )
157
+ raise Todo
158
+ end
100
159
  end
101
160
  end
data/lib/polars/utils.rb CHANGED
@@ -4,7 +4,7 @@ module Polars
4
4
  DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
5
5
 
6
6
  def self.is_polars_dtype(dtype, include_unknown: false)
7
- is_dtype = dtype.is_a?(Symbol) || dtype.is_a?(::String) || dtype.is_a?(DataType) || (dtype.is_a?(Class) && dtype < DataType)
7
+ is_dtype = dtype.is_a?(DataType) || (dtype.is_a?(Class) && dtype < DataType)
8
8
 
9
9
  if !include_unknown
10
10
  is_dtype && dtype != Unknown
@@ -17,42 +17,6 @@ module Polars
17
17
  obj.is_a?(Expr) && obj.meta.is_column
18
18
  end
19
19
 
20
- def self.map_rb_type_to_dtype(ruby_dtype)
21
- if ruby_dtype == Float
22
- Float64
23
- elsif ruby_dtype == Integer
24
- Int64
25
- elsif ruby_dtype == ::String
26
- Utf8
27
- elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
28
- Boolean
29
- elsif ruby_dtype == DateTime || ruby_dtype == ::Time || (defined?(ActiveSupport::TimeWithZone) && ruby_dtype == ActiveSupport::TimeWithZone)
30
- Datetime.new("ns")
31
- elsif ruby_dtype == ::Date
32
- Date
33
- elsif ruby_dtype == ::Array
34
- List
35
- elsif ruby_dtype == NilClass
36
- Null
37
- else
38
- raise TypeError, "Invalid type"
39
- end
40
- end
41
-
42
- # TODO fix
43
- def self.rb_type_to_dtype(data_type)
44
- if is_polars_dtype(data_type)
45
- data_type = data_type.to_s if data_type.is_a?(Symbol)
46
- return data_type
47
- end
48
-
49
- begin
50
- map_rb_type_to_dtype(data_type)
51
- rescue TypeError
52
- raise ArgumentError, "Conversion of Ruby data type #{data_type.inspect} to Polars data type not implemented."
53
- end
54
- end
55
-
56
20
  def self.parse_row_index_args(row_index_name = nil, row_index_offset = 0)
57
21
  if row_index_name.nil?
58
22
  nil
@@ -64,15 +28,14 @@ module Polars
64
28
  def self.handle_projection_columns(columns)
65
29
  projection = nil
66
30
  if columns
67
- raise Todo
68
- # if columns.is_a?(::String) || columns.is_a?(Symbol)
69
- # columns = [columns]
70
- # elsif is_int_sequence(columns)
71
- # projection = columns.to_a
72
- # columns = nil
73
- # elsif !is_str_sequence(columns)
74
- # raise ArgumentError, "columns arg should contain a list of all integers or all strings values."
75
- # end
31
+ if columns.is_a?(::String)
32
+ columns = [columns]
33
+ elsif is_int_sequence(columns)
34
+ projection = columns.to_a
35
+ columns = nil
36
+ elsif !is_str_sequence(columns)
37
+ raise ArgumentError, "columns arg should contain a list of all integers or all strings values."
38
+ end
76
39
  end
77
40
  [projection, columns]
78
41
  end
@@ -153,10 +116,46 @@ module Polars
153
116
  if is_polars_dtype(input)
154
117
  input
155
118
  else
156
- raise Todo
119
+ parse_rb_type_into_dtype(input)
157
120
  end
158
121
  end
159
122
 
123
+ def self.try_parse_into_dtype(input)
124
+ parse_into_dtype(input)
125
+ rescue TypeError
126
+ nil
127
+ end
128
+
129
+ def self.parse_rb_type_into_dtype(input)
130
+ if input == Integer
131
+ Int64.new
132
+ elsif input == Float
133
+ Float64.new
134
+ elsif input == ::String
135
+ String.new
136
+ elsif input == ::Time || input == ::DateTime || (defined?(ActiveSupport::TimeWithZone) && input == ActiveSupport::TimeWithZone)
137
+ Datetime.new("ns")
138
+ elsif input == ::Date
139
+ Date.new
140
+ elsif input == NilClass
141
+ Null.new
142
+ elsif input == ::Array
143
+ List
144
+ # this is required as pass through. Don't remove
145
+ elsif input == Unknown
146
+ Unknown
147
+ else
148
+ _raise_on_invalid_dtype(input)
149
+ end
150
+ end
151
+
152
+ def self._raise_on_invalid_dtype(input)
153
+ # TODO improve
154
+ input_type = input.inspect
155
+ msg = "cannot parse input #{input_type} into Polars data type"
156
+ raise TypeError, msg
157
+ end
158
+
160
159
  def self.re_escape(s)
161
160
  # escapes _only_ those metachars with meaning to the rust regex crate
162
161
  Plr.re_escape(s)
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.23.0"
3
+ VERSION = "0.24.0"
4
4
  end