polars-df 0.4.0-arm64-darwin → 0.6.0-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
data/lib/polars/utils.rb CHANGED
@@ -23,52 +23,98 @@ module Polars
23
23
  Polars.col(name)
24
24
  end
25
25
 
26
+ def self.arrlen(obj)
27
+ if obj.is_a?(Range)
28
+ # size only works for numeric ranges
29
+ obj.to_a.length
30
+ elsif obj.is_a?(String)
31
+ nil
32
+ else
33
+ obj.length
34
+ end
35
+ rescue
36
+ nil
37
+ end
38
+
26
39
  def self._timedelta_to_pl_duration(td)
27
40
  td
28
41
  end
29
42
 
30
- def self._datetime_to_pl_timestamp(dt, tu)
31
- if tu == "ns"
32
- (dt.to_datetime.utc.to_f * 1e9).to_i
33
- elsif tu == "us"
34
- (dt.to_datetime.utc.to_f * 1e6).to_i
35
- elsif tu == "ms"
36
- (dt.to_datetime.utc.to_f * 1e3).to_i
37
- elsif tu.nil?
38
- (dt.to_datetime.utc.to_f * 1e6).to_i
43
+ def self._datetime_to_pl_timestamp(dt, time_unit)
44
+ dt = dt.to_datetime.to_time
45
+ if time_unit == "ns"
46
+ nanos = dt.nsec
47
+ dt.to_i * 1_000_000_000 + nanos
48
+ elsif time_unit == "us"
49
+ micros = dt.usec
50
+ dt.to_i * 1_000_000 + micros
51
+ elsif time_unit == "ms"
52
+ millis = dt.usec / 1000
53
+ dt.to_i * 1_000 + millis
54
+ elsif time_unit.nil?
55
+ # Ruby has ns precision
56
+ nanos = dt.nsec
57
+ dt.to_i * 1_000_000_000 + nanos
58
+ else
59
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
60
+ end
61
+ end
62
+
63
+ def self._date_to_pl_date(d)
64
+ dt = d.to_datetime.to_time
65
+ dt.to_i / (3600 * 24)
66
+ end
67
+
68
+ def self._to_ruby_time(value)
69
+ if value == 0
70
+ ::Time.utc(2000, 1, 1)
39
71
  else
40
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
41
- end
42
- end
43
-
44
- def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
45
- if dtype == :date || dtype == Date
46
- # days to seconds
47
- # important to create from utc. Not doing this leads
48
- # to inconsistencies dependent on the timezone you are in.
49
- ::Time.at(value * 86400).utc.to_date
50
- # TODO fix dtype
51
- elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
52
- if tz.nil? || tz == ""
53
- if tu == "ns"
54
- raise Todo
55
- elsif tu == "us"
56
- dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
57
- elsif tu == "ms"
58
- raise Todo
59
- else
60
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
61
- end
72
+ seconds, nanoseconds = value.divmod(1_000_000_000)
73
+ minutes, seconds = seconds.divmod(60)
74
+ hours, minutes = minutes.divmod(60)
75
+ ::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
76
+ end
77
+ end
78
+
79
+ def self._to_ruby_duration(value, time_unit = "ns")
80
+ if time_unit == "ns"
81
+ value / 1e9
82
+ elsif time_unit == "us"
83
+ value / 1e6
84
+ elsif time_unit == "ms"
85
+ value / 1e3
86
+ else
87
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
88
+ end
89
+ end
90
+
91
+ def self._to_ruby_date(value)
92
+ # days to seconds
93
+ # important to create from utc. Not doing this leads
94
+ # to inconsistencies dependent on the timezone you are in.
95
+ ::Time.at(value * 86400).utc.to_date
96
+ end
97
+
98
+ def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
99
+ if time_zone.nil? || time_zone == ""
100
+ if time_unit == "ns"
101
+ return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
102
+ elsif time_unit == "us"
103
+ return ::Time.at(value / 1000000, value % 1000000, :usec).utc
104
+ elsif time_unit == "ms"
105
+ return ::Time.at(value / 1000, value % 1000, :millisecond).utc
62
106
  else
63
- raise Todo
107
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
64
108
  end
65
-
66
- dt
67
109
  else
68
- raise NotImplementedError
110
+ raise Todo
69
111
  end
70
112
  end
71
113
 
114
+ def self._to_ruby_decimal(digits, scale)
115
+ BigDecimal("#{digits}e#{scale}")
116
+ end
117
+
72
118
  def self.selection_to_rbexpr_list(exprs)
73
119
  if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
74
120
  exprs = [exprs]
@@ -102,19 +148,34 @@ module Polars
102
148
  end
103
149
 
104
150
  # TODO fix
105
- def self.is_polars_dtype(data_type)
151
+ def self.is_polars_dtype(data_type, include_unknown: false)
152
+ if data_type == Unknown
153
+ return include_unknown
154
+ end
106
155
  data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
107
156
  end
108
157
 
109
- RB_TYPE_TO_DTYPE = {
110
- Float => :f64,
111
- Integer => :i64,
112
- String => :str,
113
- TrueClass => :bool,
114
- FalseClass => :bool,
115
- ::Date => :date,
116
- ::DateTime => :datetime
117
- }
158
+ def self.map_rb_type_to_dtype(ruby_dtype)
159
+ if ruby_dtype == Float
160
+ Float64
161
+ elsif ruby_dtype == Integer
162
+ Int64
163
+ elsif ruby_dtype == String
164
+ Utf8
165
+ elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
166
+ Boolean
167
+ elsif ruby_dtype == DateTime || ruby_dtype == ::Time || (defined?(ActiveSupport::TimeWithZone) && ruby_dtype == ActiveSupport::TimeWithZone)
168
+ Datetime.new("ns")
169
+ elsif ruby_dtype == ::Date
170
+ Date
171
+ elsif ruby_dtype == ::Array
172
+ List
173
+ elsif ruby_dtype == NilClass
174
+ Null
175
+ else
176
+ raise TypeError, "Invalid type"
177
+ end
178
+ end
118
179
 
119
180
  # TODO fix
120
181
  def self.rb_type_to_dtype(data_type)
@@ -124,8 +185,8 @@ module Polars
124
185
  end
125
186
 
126
187
  begin
127
- RB_TYPE_TO_DTYPE.fetch(data_type).to_s
128
- rescue KeyError
188
+ map_rb_type_to_dtype(data_type)
189
+ rescue TypeError
129
190
  raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
130
191
  end
131
192
  end
@@ -178,7 +239,7 @@ module Polars
178
239
  end
179
240
 
180
241
  def self.bool?(value)
181
- value == true || value == false
242
+ value.is_a?(TrueClass) || value.is_a?(FalseClass)
182
243
  end
183
244
 
184
245
  def self.strlike?(value)
@@ -194,35 +255,58 @@ module Polars
194
255
  end
195
256
 
196
257
  def self.is_bool_sequence(val)
197
- val.is_a?(Array) && val.all? { |x| x == true || x == false }
258
+ val.is_a?(::Array) && val.all? { |x| x == true || x == false }
198
259
  end
199
260
 
200
261
  def self.is_dtype_sequence(val)
201
- val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
262
+ val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
202
263
  end
203
264
 
204
265
  def self.is_int_sequence(val)
205
- val.is_a?(Array) && _is_iterable_of(val, Integer)
266
+ val.is_a?(::Array) && _is_iterable_of(val, Integer)
206
267
  end
207
268
 
208
269
  def self.is_expr_sequence(val)
209
- val.is_a?(Array) && _is_iterable_of(val, Expr)
270
+ val.is_a?(::Array) && _is_iterable_of(val, Expr)
210
271
  end
211
272
 
212
273
  def self.is_rbexpr_sequence(val)
213
- val.is_a?(Array) && _is_iterable_of(val, RbExpr)
274
+ val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
214
275
  end
215
276
 
216
277
  def self.is_str_sequence(val, allow_str: false)
217
278
  if allow_str == false && val.is_a?(String)
218
279
  false
219
280
  else
220
- val.is_a?(Array) && _is_iterable_of(val, String)
281
+ val.is_a?(::Array) && _is_iterable_of(val, String)
221
282
  end
222
283
  end
223
284
 
224
285
  def self.local_file?(file)
225
286
  Dir.glob(file).any?
226
287
  end
288
+
289
+ def self.parse_as_expression(input, str_as_lit: false, structify: false)
290
+ if input.is_a?(Expr)
291
+ expr = input
292
+ elsif input.is_a?(String) && !str_as_lit
293
+ expr = Polars.col(input)
294
+ structify = false
295
+ elsif [Integer, Float, String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
296
+ expr = Polars.lit(input)
297
+ structify = false
298
+ elsif input.is_a?(Array)
299
+ expr = Polars.lit(Polars::Series.new("", [input]))
300
+ structify = false
301
+ else
302
+ raise TypeError, "did not expect value #{input} of type #{input.class.name}, maybe disambiguate with pl.lit or pl.col"
303
+ end
304
+
305
+ if structify
306
+ raise Todo
307
+ end
308
+
309
+ expr._rbexpr
310
+ end
227
311
  end
228
312
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.4.0"
3
+ VERSION = "0.6.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -1,16 +1,19 @@
1
1
  # ext
2
2
  begin
3
- require_relative "polars/#{RUBY_VERSION.to_f}/polars"
3
+ require "polars/#{RUBY_VERSION.to_f}/polars"
4
4
  rescue LoadError
5
- require_relative "polars/polars"
5
+ require "polars/polars"
6
6
  end
7
7
 
8
8
  # stdlib
9
+ require "bigdecimal"
9
10
  require "date"
10
11
  require "stringio"
11
12
 
12
13
  # modules
13
14
  require_relative "polars/expr_dispatch"
15
+ require_relative "polars/array_expr"
16
+ require_relative "polars/array_name_space"
14
17
  require_relative "polars/batched_csv_reader"
15
18
  require_relative "polars/binary_expr"
16
19
  require_relative "polars/binary_name_space"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.6.0
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-01 00:00:00.000000000 Z
11
+ date: 2023-07-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -28,6 +28,8 @@ files:
28
28
  - lib/polars/3.0/polars.bundle
29
29
  - lib/polars/3.1/polars.bundle
30
30
  - lib/polars/3.2/polars.bundle
31
+ - lib/polars/array_expr.rb
32
+ - lib/polars/array_name_space.rb
31
33
  - lib/polars/batched_csv_reader.rb
32
34
  - lib/polars/binary_expr.rb
33
35
  - lib/polars/binary_name_space.rb