polars-df 0.4.0-aarch64-linux → 0.6.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/utils.rb CHANGED
@@ -23,52 +23,98 @@ module Polars
23
23
  Polars.col(name)
24
24
  end
25
25
 
26
+ def self.arrlen(obj)
27
+ if obj.is_a?(Range)
28
+ # size only works for numeric ranges
29
+ obj.to_a.length
30
+ elsif obj.is_a?(String)
31
+ nil
32
+ else
33
+ obj.length
34
+ end
35
+ rescue
36
+ nil
37
+ end
38
+
26
39
  def self._timedelta_to_pl_duration(td)
27
40
  td
28
41
  end
29
42
 
30
- def self._datetime_to_pl_timestamp(dt, tu)
31
- if tu == "ns"
32
- (dt.to_datetime.utc.to_f * 1e9).to_i
33
- elsif tu == "us"
34
- (dt.to_datetime.utc.to_f * 1e6).to_i
35
- elsif tu == "ms"
36
- (dt.to_datetime.utc.to_f * 1e3).to_i
37
- elsif tu.nil?
38
- (dt.to_datetime.utc.to_f * 1e6).to_i
43
+ def self._datetime_to_pl_timestamp(dt, time_unit)
44
+ dt = dt.to_datetime.to_time
45
+ if time_unit == "ns"
46
+ nanos = dt.nsec
47
+ dt.to_i * 1_000_000_000 + nanos
48
+ elsif time_unit == "us"
49
+ micros = dt.usec
50
+ dt.to_i * 1_000_000 + micros
51
+ elsif time_unit == "ms"
52
+ millis = dt.usec / 1000
53
+ dt.to_i * 1_000 + millis
54
+ elsif time_unit.nil?
55
+ # Ruby has ns precision
56
+ nanos = dt.nsec
57
+ dt.to_i * 1_000_000_000 + nanos
58
+ else
59
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
60
+ end
61
+ end
62
+
63
+ def self._date_to_pl_date(d)
64
+ dt = d.to_datetime.to_time
65
+ dt.to_i / (3600 * 24)
66
+ end
67
+
68
+ def self._to_ruby_time(value)
69
+ if value == 0
70
+ ::Time.utc(2000, 1, 1)
39
71
  else
40
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
41
- end
42
- end
43
-
44
- def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
45
- if dtype == :date || dtype == Date
46
- # days to seconds
47
- # important to create from utc. Not doing this leads
48
- # to inconsistencies dependent on the timezone you are in.
49
- ::Time.at(value * 86400).utc.to_date
50
- # TODO fix dtype
51
- elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
52
- if tz.nil? || tz == ""
53
- if tu == "ns"
54
- raise Todo
55
- elsif tu == "us"
56
- dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
57
- elsif tu == "ms"
58
- raise Todo
59
- else
60
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
61
- end
72
+ seconds, nanoseconds = value.divmod(1_000_000_000)
73
+ minutes, seconds = seconds.divmod(60)
74
+ hours, minutes = minutes.divmod(60)
75
+ ::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
76
+ end
77
+ end
78
+
79
+ def self._to_ruby_duration(value, time_unit = "ns")
80
+ if time_unit == "ns"
81
+ value / 1e9
82
+ elsif time_unit == "us"
83
+ value / 1e6
84
+ elsif time_unit == "ms"
85
+ value / 1e3
86
+ else
87
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
88
+ end
89
+ end
90
+
91
+ def self._to_ruby_date(value)
92
+ # days to seconds
93
+ # important to create from utc. Not doing this leads
94
+ # to inconsistencies dependent on the timezone you are in.
95
+ ::Time.at(value * 86400).utc.to_date
96
+ end
97
+
98
+ def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
99
+ if time_zone.nil? || time_zone == ""
100
+ if time_unit == "ns"
101
+ return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
102
+ elsif time_unit == "us"
103
+ return ::Time.at(value / 1000000, value % 1000000, :usec).utc
104
+ elsif time_unit == "ms"
105
+ return ::Time.at(value / 1000, value % 1000, :millisecond).utc
62
106
  else
63
- raise Todo
107
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
64
108
  end
65
-
66
- dt
67
109
  else
68
- raise NotImplementedError
110
+ raise Todo
69
111
  end
70
112
  end
71
113
 
114
+ def self._to_ruby_decimal(digits, scale)
115
+ BigDecimal("#{digits}e#{scale}")
116
+ end
117
+
72
118
  def self.selection_to_rbexpr_list(exprs)
73
119
  if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
74
120
  exprs = [exprs]
@@ -102,19 +148,34 @@ module Polars
102
148
  end
103
149
 
104
150
  # TODO fix
105
- def self.is_polars_dtype(data_type)
151
+ def self.is_polars_dtype(data_type, include_unknown: false)
152
+ if data_type == Unknown
153
+ return include_unknown
154
+ end
106
155
  data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
107
156
  end
108
157
 
109
- RB_TYPE_TO_DTYPE = {
110
- Float => :f64,
111
- Integer => :i64,
112
- String => :str,
113
- TrueClass => :bool,
114
- FalseClass => :bool,
115
- ::Date => :date,
116
- ::DateTime => :datetime
117
- }
158
+ def self.map_rb_type_to_dtype(ruby_dtype)
159
+ if ruby_dtype == Float
160
+ Float64
161
+ elsif ruby_dtype == Integer
162
+ Int64
163
+ elsif ruby_dtype == String
164
+ Utf8
165
+ elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
166
+ Boolean
167
+ elsif ruby_dtype == DateTime || ruby_dtype == ::Time || (defined?(ActiveSupport::TimeWithZone) && ruby_dtype == ActiveSupport::TimeWithZone)
168
+ Datetime.new("ns")
169
+ elsif ruby_dtype == ::Date
170
+ Date
171
+ elsif ruby_dtype == ::Array
172
+ List
173
+ elsif ruby_dtype == NilClass
174
+ Null
175
+ else
176
+ raise TypeError, "Invalid type"
177
+ end
178
+ end
118
179
 
119
180
  # TODO fix
120
181
  def self.rb_type_to_dtype(data_type)
@@ -124,8 +185,8 @@ module Polars
124
185
  end
125
186
 
126
187
  begin
127
- RB_TYPE_TO_DTYPE.fetch(data_type).to_s
128
- rescue KeyError
188
+ map_rb_type_to_dtype(data_type)
189
+ rescue TypeError
129
190
  raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
130
191
  end
131
192
  end
@@ -178,7 +239,7 @@ module Polars
178
239
  end
179
240
 
180
241
  def self.bool?(value)
181
- value == true || value == false
242
+ value.is_a?(TrueClass) || value.is_a?(FalseClass)
182
243
  end
183
244
 
184
245
  def self.strlike?(value)
@@ -194,35 +255,58 @@ module Polars
194
255
  end
195
256
 
196
257
  def self.is_bool_sequence(val)
197
- val.is_a?(Array) && val.all? { |x| x == true || x == false }
258
+ val.is_a?(::Array) && val.all? { |x| x == true || x == false }
198
259
  end
199
260
 
200
261
  def self.is_dtype_sequence(val)
201
- val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
262
+ val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
202
263
  end
203
264
 
204
265
  def self.is_int_sequence(val)
205
- val.is_a?(Array) && _is_iterable_of(val, Integer)
266
+ val.is_a?(::Array) && _is_iterable_of(val, Integer)
206
267
  end
207
268
 
208
269
  def self.is_expr_sequence(val)
209
- val.is_a?(Array) && _is_iterable_of(val, Expr)
270
+ val.is_a?(::Array) && _is_iterable_of(val, Expr)
210
271
  end
211
272
 
212
273
  def self.is_rbexpr_sequence(val)
213
- val.is_a?(Array) && _is_iterable_of(val, RbExpr)
274
+ val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
214
275
  end
215
276
 
216
277
  def self.is_str_sequence(val, allow_str: false)
217
278
  if allow_str == false && val.is_a?(String)
218
279
  false
219
280
  else
220
- val.is_a?(Array) && _is_iterable_of(val, String)
281
+ val.is_a?(::Array) && _is_iterable_of(val, String)
221
282
  end
222
283
  end
223
284
 
224
285
  def self.local_file?(file)
225
286
  Dir.glob(file).any?
226
287
  end
288
+
289
+ def self.parse_as_expression(input, str_as_lit: false, structify: false)
290
+ if input.is_a?(Expr)
291
+ expr = input
292
+ elsif input.is_a?(String) && !str_as_lit
293
+ expr = Polars.col(input)
294
+ structify = false
295
+ elsif [Integer, Float, String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
296
+ expr = Polars.lit(input)
297
+ structify = false
298
+ elsif input.is_a?(Array)
299
+ expr = Polars.lit(Polars::Series.new("", [input]))
300
+ structify = false
301
+ else
302
+ raise TypeError, "did not expect value #{input} of type #{input.class.name}, maybe disambiguate with pl.lit or pl.col"
303
+ end
304
+
305
+ if structify
306
+ raise Todo
307
+ end
308
+
309
+ expr._rbexpr
310
+ end
227
311
  end
228
312
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.4.0"
3
+ VERSION = "0.6.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -1,16 +1,19 @@
1
1
  # ext
2
2
  begin
3
- require_relative "polars/#{RUBY_VERSION.to_f}/polars"
3
+ require "polars/#{RUBY_VERSION.to_f}/polars"
4
4
  rescue LoadError
5
- require_relative "polars/polars"
5
+ require "polars/polars"
6
6
  end
7
7
 
8
8
  # stdlib
9
+ require "bigdecimal"
9
10
  require "date"
10
11
  require "stringio"
11
12
 
12
13
  # modules
13
14
  require_relative "polars/expr_dispatch"
15
+ require_relative "polars/array_expr"
16
+ require_relative "polars/array_name_space"
14
17
  require_relative "polars/batched_csv_reader"
15
18
  require_relative "polars/binary_expr"
16
19
  require_relative "polars/binary_name_space"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.6.0
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-01 00:00:00.000000000 Z
11
+ date: 2023-07-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -28,6 +28,8 @@ files:
28
28
  - lib/polars/3.0/polars.so
29
29
  - lib/polars/3.1/polars.so
30
30
  - lib/polars/3.2/polars.so
31
+ - lib/polars/array_expr.rb
32
+ - lib/polars/array_name_space.rb
31
33
  - lib/polars/batched_csv_reader.rb
32
34
  - lib/polars/binary_expr.rb
33
35
  - lib/polars/binary_name_space.rb