polars-df 0.5.0-x86_64-darwin → 0.6.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,6 +10,112 @@ module Polars
10
10
  self._s = series._s
11
11
  end
12
12
 
13
+ # Convert a Utf8 column into a Date column.
14
+ #
15
+ # @param format [String]
16
+ # Format to use for conversion. Refer to the
17
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
18
+ # for the full specification. Example: `"%Y-%m-%d"`.
19
+ # If set to nil (default), the format is inferred from the data.
20
+ # @param strict [Boolean]
21
+ # Raise an error if any conversion fails.
22
+ # @param exact [Boolean]
23
+ # Require an exact format match. If false, allow the format to match anywhere
24
+ # in the target string.
25
+ # @param cache [Boolean]
26
+ # Use a cache of unique, converted dates to apply the conversion.
27
+ #
28
+ # @return [Series]
29
+ #
30
+ # @example
31
+ # s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
32
+ # s.str.to_date
33
+ # # =>
34
+ # # shape: (3,)
35
+ # # Series: '' [date]
36
+ # # [
37
+ # # 2020-01-01
38
+ # # 2020-02-01
39
+ # # 2020-03-01
40
+ # # ]
41
+ def to_date(format = nil, strict: true, exact: true, cache: true)
42
+ super
43
+ end
44
+
45
+ # Convert a Utf8 column into a Datetime column.
46
+ #
47
+ # @param format [String]
48
+ # Format to use for conversion. Refer to the
49
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
50
+ # for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
51
+ # If set to nil (default), the format is inferred from the data.
52
+ # @param time_unit ["us", "ns", "ms"]
53
+ # Unit of time for the resulting Datetime column. If set to nil (default),
54
+ # the time unit is inferred from the format string if given, eg:
55
+ # `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
56
+ # found, the default is `"us"`.
57
+ # @param time_zone [String]
58
+ # Time zone for the resulting Datetime column.
59
+ # @param strict [Boolean]
60
+ # Raise an error if any conversion fails.
61
+ # @param exact [Boolean]
62
+ # Require an exact format match. If false, allow the format to match anywhere
63
+ # in the target string.
64
+ # @param cache [Boolean]
65
+ # Use a cache of unique, converted datetimes to apply the conversion.
66
+ #
67
+ # @return [Series]
68
+ #
69
+ # @example
70
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
71
+ # s.str.to_datetime("%Y-%m-%d %H:%M%#z")
72
+ # # =>
73
+ # # shape: (2,)
74
+ # # Series: '' [datetime[μs, UTC]]
75
+ # # [
76
+ # # 2020-01-01 01:00:00 UTC
77
+ # # 2020-01-01 02:00:00 UTC
78
+ # # ]
79
+ def to_datetime(
80
+ format = nil,
81
+ time_unit: nil,
82
+ time_zone: nil,
83
+ strict: true,
84
+ exact: true,
85
+ cache: true
86
+ )
87
+ super
88
+ end
89
+
90
+ # Convert a Utf8 column into a Time column.
91
+ #
92
+ # @param format [String]
93
+ # Format to use for conversion. Refer to the
94
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
95
+ # for the full specification. Example: `"%H:%M:%S"`.
96
+ # If set to nil (default), the format is inferred from the data.
97
+ # @param strict [Boolean]
98
+ # Raise an error if any conversion fails.
99
+ # @param cache [Boolean]
100
+ # Use a cache of unique, converted times to apply the conversion.
101
+ #
102
+ # @return [Series]
103
+ #
104
+ # @example
105
+ # s = Polars::Series.new(["01:00", "02:00", "03:00"])
106
+ # s.str.to_time("%H:%M")
107
+ # # =>
108
+ # # shape: (3,)
109
+ # # Series: '' [time]
110
+ # # [
111
+ # # 01:00:00
112
+ # # 02:00:00
113
+ # # 03:00:00
114
+ # # ]
115
+ def to_time(format = nil, strict: true, cache: true)
116
+ super
117
+ end
118
+
13
119
  # Parse a Series of dtype Utf8 to a Date/Datetime Series.
14
120
  #
15
121
  # @param datatype [Symbol]
@@ -23,10 +129,23 @@ module Polars
23
129
  # @param exact [Boolean]
24
130
  # - If true, require an exact format match.
25
131
  # - If false, allow the format to match anywhere in the target string.
132
+ # @param cache [Boolean]
133
+ # Use a cache of unique, converted dates to apply the datetime conversion.
26
134
  #
27
135
  # @return [Series]
28
136
  #
29
- # @example
137
+ # @example Dealing with a consistent format:
138
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
139
+ # s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
140
+ # # =>
141
+ # # shape: (2,)
142
+ # # Series: '' [datetime[μs, UTC]]
143
+ # # [
144
+ # # 2020-01-01 01:00:00 UTC
145
+ # # 2020-01-01 02:00:00 UTC
146
+ # # ]
147
+ #
148
+ # @example Dealing with different formats.
30
149
  # s = Polars::Series.new(
31
150
  # "date",
32
151
  # [
@@ -36,28 +155,24 @@ module Polars
36
155
  # "Sun Jul 8 00:34:60 2001"
37
156
  # ]
38
157
  # )
39
- # s.to_frame.with_column(
40
- # Polars.col("date")
41
- # .str.strptime(Polars::Date, "%F", strict: false)
42
- # .fill_null(
43
- # Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false)
44
- # )
45
- # .fill_null(Polars.col("date").str.strptime(Polars::Date, "%D", strict: false))
46
- # .fill_null(Polars.col("date").str.strptime(Polars::Date, "%c", strict: false))
47
- # )
158
+ # s.to_frame.select(
159
+ # Polars.coalesce(
160
+ # Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
161
+ # Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
162
+ # Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
163
+ # Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
164
+ # )
165
+ # ).to_series
48
166
  # # =>
49
- # # shape: (4, 1)
50
- # # ┌────────────┐
51
- # # │ date │
52
- # # │ --- │
53
- # # │ date │
54
- # # ╞════════════╡
55
- # # │ 2021-04-22 │
56
- # # │ 2022-01-04 │
57
- # # 2022-01-31
58
- # # │ 2001-07-08 │
59
- # # └────────────┘
60
- def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
167
+ # # shape: (4,)
168
+ # # Series: 'date' [date]
169
+ # # [
170
+ # # 2021-04-22
171
+ # # 2022-01-04
172
+ # # 2022-01-31
173
+ # # 2001-07-08
174
+ # # ]
175
+ def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
61
176
  super
62
177
  end
63
178
 
data/lib/polars/utils.rb CHANGED
@@ -40,17 +40,23 @@ module Polars
40
40
  td
41
41
  end
42
42
 
43
- def self._datetime_to_pl_timestamp(dt, tu)
44
- if tu == "ns"
45
- (dt.to_datetime.to_time.to_f * 1e9).to_i
46
- elsif tu == "us"
47
- (dt.to_datetime.to_time.to_f * 1e6).to_i
48
- elsif tu == "ms"
49
- (dt.to_datetime.to_time.to_f * 1e3).to_i
50
- elsif tu.nil?
51
- (dt.to_datetime.to_time.to_f * 1e6).to_i
43
+ def self._datetime_to_pl_timestamp(dt, time_unit)
44
+ dt = dt.to_datetime.to_time
45
+ if time_unit == "ns"
46
+ nanos = dt.nsec
47
+ dt.to_i * 1_000_000_000 + nanos
48
+ elsif time_unit == "us"
49
+ micros = dt.usec
50
+ dt.to_i * 1_000_000 + micros
51
+ elsif time_unit == "ms"
52
+ millis = dt.usec / 1000
53
+ dt.to_i * 1_000 + millis
54
+ elsif time_unit.nil?
55
+ # Ruby has ns precision
56
+ nanos = dt.nsec
57
+ dt.to_i * 1_000_000_000 + nanos
52
58
  else
53
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
59
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
54
60
  end
55
61
  end
56
62
 
@@ -59,46 +65,56 @@ module Polars
59
65
  dt.to_i / (3600 * 24)
60
66
  end
61
67
 
62
- def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
63
- if dtype == :date || dtype == Date
64
- # days to seconds
65
- # important to create from utc. Not doing this leads
66
- # to inconsistencies dependent on the timezone you are in.
67
- ::Time.at(value * 86400).utc.to_date
68
- # TODO fix dtype
69
- elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
70
- if tz.nil? || tz == ""
71
- if tu == "ns"
72
- raise Todo
73
- elsif tu == "us"
74
- dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
75
- elsif tu == "ms"
76
- raise Todo
77
- else
78
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
79
- end
80
- else
81
- raise Todo
82
- end
83
-
84
- dt
68
+ def self._to_ruby_time(value)
69
+ if value == 0
70
+ ::Time.utc(2000, 1, 1)
85
71
  else
86
- raise NotImplementedError
72
+ seconds, nanoseconds = value.divmod(1_000_000_000)
73
+ minutes, seconds = seconds.divmod(60)
74
+ hours, minutes = minutes.divmod(60)
75
+ ::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
87
76
  end
88
77
  end
89
78
 
90
- def self._to_ruby_duration(value, tu = "ns")
91
- if tu == "ns"
79
+ def self._to_ruby_duration(value, time_unit = "ns")
80
+ if time_unit == "ns"
92
81
  value / 1e9
93
- elsif tu == "us"
82
+ elsif time_unit == "us"
94
83
  value / 1e6
95
- elsif tu == "ms"
84
+ elsif time_unit == "ms"
96
85
  value / 1e3
97
86
  else
98
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
87
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
99
88
  end
100
89
  end
101
90
 
91
+ def self._to_ruby_date(value)
92
+ # days to seconds
93
+ # important to create from utc. Not doing this leads
94
+ # to inconsistencies dependent on the timezone you are in.
95
+ ::Time.at(value * 86400).utc.to_date
96
+ end
97
+
98
+ def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
99
+ if time_zone.nil? || time_zone == ""
100
+ if time_unit == "ns"
101
+ return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
102
+ elsif time_unit == "us"
103
+ return ::Time.at(value / 1000000, value % 1000000, :usec).utc
104
+ elsif time_unit == "ms"
105
+ return ::Time.at(value / 1000, value % 1000, :millisecond).utc
106
+ else
107
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
108
+ end
109
+ else
110
+ raise Todo
111
+ end
112
+ end
113
+
114
+ def self._to_ruby_decimal(digits, scale)
115
+ BigDecimal("#{digits}e#{scale}")
116
+ end
117
+
102
118
  def self.selection_to_rbexpr_list(exprs)
103
119
  if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
104
120
  exprs = [exprs]
@@ -139,16 +155,27 @@ module Polars
139
155
  data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
140
156
  end
141
157
 
142
- RB_TYPE_TO_DTYPE = {
143
- Float => :f64,
144
- Integer => :i64,
145
- String => :str,
146
- TrueClass => :bool,
147
- FalseClass => :bool,
148
- ::Date => :date,
149
- ::DateTime => :datetime,
150
- ::Time => :datetime
151
- }
158
+ def self.map_rb_type_to_dtype(ruby_dtype)
159
+ if ruby_dtype == Float
160
+ Float64
161
+ elsif ruby_dtype == Integer
162
+ Int64
163
+ elsif ruby_dtype == String
164
+ Utf8
165
+ elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
166
+ Boolean
167
+ elsif ruby_dtype == DateTime || ruby_dtype == ::Time || (defined?(ActiveSupport::TimeWithZone) && ruby_dtype == ActiveSupport::TimeWithZone)
168
+ Datetime.new("ns")
169
+ elsif ruby_dtype == ::Date
170
+ Date
171
+ elsif ruby_dtype == ::Array
172
+ List
173
+ elsif ruby_dtype == NilClass
174
+ Null
175
+ else
176
+ raise TypeError, "Invalid type"
177
+ end
178
+ end
152
179
 
153
180
  # TODO fix
154
181
  def self.rb_type_to_dtype(data_type)
@@ -158,8 +185,8 @@ module Polars
158
185
  end
159
186
 
160
187
  begin
161
- RB_TYPE_TO_DTYPE.fetch(data_type).to_s
162
- rescue KeyError
188
+ map_rb_type_to_dtype(data_type)
189
+ rescue TypeError
163
190
  raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
164
191
  end
165
192
  end
@@ -228,35 +255,58 @@ module Polars
228
255
  end
229
256
 
230
257
  def self.is_bool_sequence(val)
231
- val.is_a?(Array) && val.all? { |x| x == true || x == false }
258
+ val.is_a?(::Array) && val.all? { |x| x == true || x == false }
232
259
  end
233
260
 
234
261
  def self.is_dtype_sequence(val)
235
- val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
262
+ val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
236
263
  end
237
264
 
238
265
  def self.is_int_sequence(val)
239
- val.is_a?(Array) && _is_iterable_of(val, Integer)
266
+ val.is_a?(::Array) && _is_iterable_of(val, Integer)
240
267
  end
241
268
 
242
269
  def self.is_expr_sequence(val)
243
- val.is_a?(Array) && _is_iterable_of(val, Expr)
270
+ val.is_a?(::Array) && _is_iterable_of(val, Expr)
244
271
  end
245
272
 
246
273
  def self.is_rbexpr_sequence(val)
247
- val.is_a?(Array) && _is_iterable_of(val, RbExpr)
274
+ val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
248
275
  end
249
276
 
250
277
  def self.is_str_sequence(val, allow_str: false)
251
278
  if allow_str == false && val.is_a?(String)
252
279
  false
253
280
  else
254
- val.is_a?(Array) && _is_iterable_of(val, String)
281
+ val.is_a?(::Array) && _is_iterable_of(val, String)
255
282
  end
256
283
  end
257
284
 
258
285
  def self.local_file?(file)
259
286
  Dir.glob(file).any?
260
287
  end
288
+
289
+ def self.parse_as_expression(input, str_as_lit: false, structify: false)
290
+ if input.is_a?(Expr)
291
+ expr = input
292
+ elsif input.is_a?(String) && !str_as_lit
293
+ expr = Polars.col(input)
294
+ structify = false
295
+ elsif [Integer, Float, String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
296
+ expr = Polars.lit(input)
297
+ structify = false
298
+ elsif input.is_a?(Array)
299
+ expr = Polars.lit(Polars::Series.new("", [input]))
300
+ structify = false
301
+ else
302
+ raise TypeError, "did not expect value #{input} of type #{input.class.name}, maybe disambiguate with pl.lit or pl.col"
303
+ end
304
+
305
+ if structify
306
+ raise Todo
307
+ end
308
+
309
+ expr._rbexpr
310
+ end
261
311
  end
262
312
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.5.0"
3
+ VERSION = "0.6.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -6,11 +6,14 @@ rescue LoadError
6
6
  end
7
7
 
8
8
  # stdlib
9
+ require "bigdecimal"
9
10
  require "date"
10
11
  require "stringio"
11
12
 
12
13
  # modules
13
14
  require_relative "polars/expr_dispatch"
15
+ require_relative "polars/array_expr"
16
+ require_relative "polars/array_name_space"
14
17
  require_relative "polars/batched_csv_reader"
15
18
  require_relative "polars/binary_expr"
16
19
  require_relative "polars/binary_name_space"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-16 00:00:00.000000000 Z
11
+ date: 2023-07-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -28,6 +28,8 @@ files:
28
28
  - lib/polars/3.0/polars.bundle
29
29
  - lib/polars/3.1/polars.bundle
30
30
  - lib/polars/3.2/polars.bundle
31
+ - lib/polars/array_expr.rb
32
+ - lib/polars/array_name_space.rb
31
33
  - lib/polars/batched_csv_reader.rb
32
34
  - lib/polars/binary_expr.rb
33
35
  - lib/polars/binary_name_space.rb