polars-df 0.5.0-x86_64-darwin → 0.6.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,112 @@ module Polars
10
10
  self._s = series._s
11
11
  end
12
12
 
13
+ # Convert a Utf8 column into a Date column.
14
+ #
15
+ # @param format [String]
16
+ # Format to use for conversion. Refer to the
17
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
18
+ # for the full specification. Example: `"%Y-%m-%d"`.
19
+ # If set to nil (default), the format is inferred from the data.
20
+ # @param strict [Boolean]
21
+ # Raise an error if any conversion fails.
22
+ # @param exact [Boolean]
23
+ # Require an exact format match. If false, allow the format to match anywhere
24
+ # in the target string.
25
+ # @param cache [Boolean]
26
+ # Use a cache of unique, converted dates to apply the conversion.
27
+ #
28
+ # @return [Series]
29
+ #
30
+ # @example
31
+ # s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
32
+ # s.str.to_date
33
+ # # =>
34
+ # # shape: (3,)
35
+ # # Series: '' [date]
36
+ # # [
37
+ # # 2020-01-01
38
+ # # 2020-02-01
39
+ # # 2020-03-01
40
+ # # ]
41
+ def to_date(format = nil, strict: true, exact: true, cache: true)
42
+ super
43
+ end
44
+
45
+ # Convert a Utf8 column into a Datetime column.
46
+ #
47
+ # @param format [String]
48
+ # Format to use for conversion. Refer to the
49
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
50
+ # for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
51
+ # If set to nil (default), the format is inferred from the data.
52
+ # @param time_unit ["us", "ns", "ms"]
53
+ # Unit of time for the resulting Datetime column. If set to nil (default),
54
+ # the time unit is inferred from the format string if given, eg:
55
+ # `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
56
+ # found, the default is `"us"`.
57
+ # @param time_zone [String]
58
+ # Time zone for the resulting Datetime column.
59
+ # @param strict [Boolean]
60
+ # Raise an error if any conversion fails.
61
+ # @param exact [Boolean]
62
+ # Require an exact format match. If false, allow the format to match anywhere
63
+ # in the target string.
64
+ # @param cache [Boolean]
65
+ # Use a cache of unique, converted datetimes to apply the conversion.
66
+ #
67
+ # @return [Series]
68
+ #
69
+ # @example
70
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
71
+ # s.str.to_datetime("%Y-%m-%d %H:%M%#z")
72
+ # # =>
73
+ # # shape: (2,)
74
+ # # Series: '' [datetime[μs, UTC]]
75
+ # # [
76
+ # # 2020-01-01 01:00:00 UTC
77
+ # # 2020-01-01 02:00:00 UTC
78
+ # # ]
79
+ def to_datetime(
80
+ format = nil,
81
+ time_unit: nil,
82
+ time_zone: nil,
83
+ strict: true,
84
+ exact: true,
85
+ cache: true
86
+ )
87
+ super
88
+ end
89
+
90
+ # Convert a Utf8 column into a Time column.
91
+ #
92
+ # @param format [String]
93
+ # Format to use for conversion. Refer to the
94
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
95
+ # for the full specification. Example: `"%H:%M:%S"`.
96
+ # If set to nil (default), the format is inferred from the data.
97
+ # @param strict [Boolean]
98
+ # Raise an error if any conversion fails.
99
+ # @param cache [Boolean]
100
+ # Use a cache of unique, converted times to apply the conversion.
101
+ #
102
+ # @return [Series]
103
+ #
104
+ # @example
105
+ # s = Polars::Series.new(["01:00", "02:00", "03:00"])
106
+ # s.str.to_time("%H:%M")
107
+ # # =>
108
+ # # shape: (3,)
109
+ # # Series: '' [time]
110
+ # # [
111
+ # # 01:00:00
112
+ # # 02:00:00
113
+ # # 03:00:00
114
+ # # ]
115
+ def to_time(format = nil, strict: true, cache: true)
116
+ super
117
+ end
118
+
13
119
  # Parse a Series of dtype Utf8 to a Date/Datetime Series.
14
120
  #
15
121
  # @param datatype [Symbol]
@@ -23,10 +129,23 @@ module Polars
23
129
  # @param exact [Boolean]
24
130
  # - If true, require an exact format match.
25
131
  # - If false, allow the format to match anywhere in the target string.
132
+ # @param cache [Boolean]
133
+ # Use a cache of unique, converted dates to apply the datetime conversion.
26
134
  #
27
135
  # @return [Series]
28
136
  #
29
- # @example
137
+ # @example Dealing with a consistent format:
138
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
139
+ # s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
140
+ # # =>
141
+ # # shape: (2,)
142
+ # # Series: '' [datetime[μs, UTC]]
143
+ # # [
144
+ # # 2020-01-01 01:00:00 UTC
145
+ # # 2020-01-01 02:00:00 UTC
146
+ # # ]
147
+ #
148
+ # @example Dealing with different formats.
30
149
  # s = Polars::Series.new(
31
150
  # "date",
32
151
  # [
@@ -36,28 +155,24 @@ module Polars
36
155
  # "Sun Jul 8 00:34:60 2001"
37
156
  # ]
38
157
  # )
39
- # s.to_frame.with_column(
40
- # Polars.col("date")
41
- # .str.strptime(Polars::Date, "%F", strict: false)
42
- # .fill_null(
43
- # Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false)
44
- # )
45
- # .fill_null(Polars.col("date").str.strptime(Polars::Date, "%D", strict: false))
46
- # .fill_null(Polars.col("date").str.strptime(Polars::Date, "%c", strict: false))
47
- # )
158
+ # s.to_frame.select(
159
+ # Polars.coalesce(
160
+ # Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
161
+ # Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
162
+ # Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
163
+ # Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
164
+ # )
165
+ # ).to_series
48
166
  # # =>
49
- # # shape: (4, 1)
50
- # # ┌────────────┐
51
- # # │ date │
52
- # # │ --- │
53
- # # │ date │
54
- # # ╞════════════╡
55
- # # │ 2021-04-22 │
56
- # # │ 2022-01-04 │
57
- # # 2022-01-31
58
- # # │ 2001-07-08 │
59
- # # └────────────┘
60
- def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
167
+ # # shape: (4,)
168
+ # # Series: 'date' [date]
169
+ # # [
170
+ # # 2021-04-22
171
+ # # 2022-01-04
172
+ # # 2022-01-31
173
+ # # 2001-07-08
174
+ # # ]
175
+ def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
61
176
  super
62
177
  end
63
178
 
data/lib/polars/utils.rb CHANGED
@@ -40,17 +40,23 @@ module Polars
40
40
  td
41
41
  end
42
42
 
43
- def self._datetime_to_pl_timestamp(dt, tu)
44
- if tu == "ns"
45
- (dt.to_datetime.to_time.to_f * 1e9).to_i
46
- elsif tu == "us"
47
- (dt.to_datetime.to_time.to_f * 1e6).to_i
48
- elsif tu == "ms"
49
- (dt.to_datetime.to_time.to_f * 1e3).to_i
50
- elsif tu.nil?
51
- (dt.to_datetime.to_time.to_f * 1e6).to_i
43
+ def self._datetime_to_pl_timestamp(dt, time_unit)
44
+ dt = dt.to_datetime.to_time
45
+ if time_unit == "ns"
46
+ nanos = dt.nsec
47
+ dt.to_i * 1_000_000_000 + nanos
48
+ elsif time_unit == "us"
49
+ micros = dt.usec
50
+ dt.to_i * 1_000_000 + micros
51
+ elsif time_unit == "ms"
52
+ millis = dt.usec / 1000
53
+ dt.to_i * 1_000 + millis
54
+ elsif time_unit.nil?
55
+ # Ruby has ns precision
56
+ nanos = dt.nsec
57
+ dt.to_i * 1_000_000_000 + nanos
52
58
  else
53
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
59
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
54
60
  end
55
61
  end
56
62
 
@@ -59,46 +65,56 @@ module Polars
59
65
  dt.to_i / (3600 * 24)
60
66
  end
61
67
 
62
- def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
63
- if dtype == :date || dtype == Date
64
- # days to seconds
65
- # important to create from utc. Not doing this leads
66
- # to inconsistencies dependent on the timezone you are in.
67
- ::Time.at(value * 86400).utc.to_date
68
- # TODO fix dtype
69
- elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
70
- if tz.nil? || tz == ""
71
- if tu == "ns"
72
- raise Todo
73
- elsif tu == "us"
74
- dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
75
- elsif tu == "ms"
76
- raise Todo
77
- else
78
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
79
- end
80
- else
81
- raise Todo
82
- end
83
-
84
- dt
68
+ def self._to_ruby_time(value)
69
+ if value == 0
70
+ ::Time.utc(2000, 1, 1)
85
71
  else
86
- raise NotImplementedError
72
+ seconds, nanoseconds = value.divmod(1_000_000_000)
73
+ minutes, seconds = seconds.divmod(60)
74
+ hours, minutes = minutes.divmod(60)
75
+ ::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
87
76
  end
88
77
  end
89
78
 
90
- def self._to_ruby_duration(value, tu = "ns")
91
- if tu == "ns"
79
+ def self._to_ruby_duration(value, time_unit = "ns")
80
+ if time_unit == "ns"
92
81
  value / 1e9
93
- elsif tu == "us"
82
+ elsif time_unit == "us"
94
83
  value / 1e6
95
- elsif tu == "ms"
84
+ elsif time_unit == "ms"
96
85
  value / 1e3
97
86
  else
98
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
87
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
99
88
  end
100
89
  end
101
90
 
91
+ def self._to_ruby_date(value)
92
+ # days to seconds
93
+ # important to create from utc. Not doing this leads
94
+ # to inconsistencies dependent on the timezone you are in.
95
+ ::Time.at(value * 86400).utc.to_date
96
+ end
97
+
98
+ def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
99
+ if time_zone.nil? || time_zone == ""
100
+ if time_unit == "ns"
101
+ return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
102
+ elsif time_unit == "us"
103
+ return ::Time.at(value / 1000000, value % 1000000, :usec).utc
104
+ elsif time_unit == "ms"
105
+ return ::Time.at(value / 1000, value % 1000, :millisecond).utc
106
+ else
107
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
108
+ end
109
+ else
110
+ raise Todo
111
+ end
112
+ end
113
+
114
+ def self._to_ruby_decimal(digits, scale)
115
+ BigDecimal("#{digits}e#{scale}")
116
+ end
117
+
102
118
  def self.selection_to_rbexpr_list(exprs)
103
119
  if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
104
120
  exprs = [exprs]
@@ -139,16 +155,27 @@ module Polars
139
155
  data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
140
156
  end
141
157
 
142
- RB_TYPE_TO_DTYPE = {
143
- Float => :f64,
144
- Integer => :i64,
145
- String => :str,
146
- TrueClass => :bool,
147
- FalseClass => :bool,
148
- ::Date => :date,
149
- ::DateTime => :datetime,
150
- ::Time => :datetime
151
- }
158
+ def self.map_rb_type_to_dtype(ruby_dtype)
159
+ if ruby_dtype == Float
160
+ Float64
161
+ elsif ruby_dtype == Integer
162
+ Int64
163
+ elsif ruby_dtype == String
164
+ Utf8
165
+ elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
166
+ Boolean
167
+ elsif ruby_dtype == DateTime || ruby_dtype == ::Time || (defined?(ActiveSupport::TimeWithZone) && ruby_dtype == ActiveSupport::TimeWithZone)
168
+ Datetime.new("ns")
169
+ elsif ruby_dtype == ::Date
170
+ Date
171
+ elsif ruby_dtype == ::Array
172
+ List
173
+ elsif ruby_dtype == NilClass
174
+ Null
175
+ else
176
+ raise TypeError, "Invalid type"
177
+ end
178
+ end
152
179
 
153
180
  # TODO fix
154
181
  def self.rb_type_to_dtype(data_type)
@@ -158,8 +185,8 @@ module Polars
158
185
  end
159
186
 
160
187
  begin
161
- RB_TYPE_TO_DTYPE.fetch(data_type).to_s
162
- rescue KeyError
188
+ map_rb_type_to_dtype(data_type)
189
+ rescue TypeError
163
190
  raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
164
191
  end
165
192
  end
@@ -228,35 +255,58 @@ module Polars
228
255
  end
229
256
 
230
257
  def self.is_bool_sequence(val)
231
- val.is_a?(Array) && val.all? { |x| x == true || x == false }
258
+ val.is_a?(::Array) && val.all? { |x| x == true || x == false }
232
259
  end
233
260
 
234
261
  def self.is_dtype_sequence(val)
235
- val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
262
+ val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
236
263
  end
237
264
 
238
265
  def self.is_int_sequence(val)
239
- val.is_a?(Array) && _is_iterable_of(val, Integer)
266
+ val.is_a?(::Array) && _is_iterable_of(val, Integer)
240
267
  end
241
268
 
242
269
  def self.is_expr_sequence(val)
243
- val.is_a?(Array) && _is_iterable_of(val, Expr)
270
+ val.is_a?(::Array) && _is_iterable_of(val, Expr)
244
271
  end
245
272
 
246
273
  def self.is_rbexpr_sequence(val)
247
- val.is_a?(Array) && _is_iterable_of(val, RbExpr)
274
+ val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
248
275
  end
249
276
 
250
277
  def self.is_str_sequence(val, allow_str: false)
251
278
  if allow_str == false && val.is_a?(String)
252
279
  false
253
280
  else
254
- val.is_a?(Array) && _is_iterable_of(val, String)
281
+ val.is_a?(::Array) && _is_iterable_of(val, String)
255
282
  end
256
283
  end
257
284
 
258
285
  def self.local_file?(file)
259
286
  Dir.glob(file).any?
260
287
  end
288
+
289
+ def self.parse_as_expression(input, str_as_lit: false, structify: false)
290
+ if input.is_a?(Expr)
291
+ expr = input
292
+ elsif input.is_a?(String) && !str_as_lit
293
+ expr = Polars.col(input)
294
+ structify = false
295
+ elsif [Integer, Float, String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
296
+ expr = Polars.lit(input)
297
+ structify = false
298
+ elsif input.is_a?(Array)
299
+ expr = Polars.lit(Polars::Series.new("", [input]))
300
+ structify = false
301
+ else
302
+ raise TypeError, "did not expect value #{input} of type #{input.class.name}, maybe disambiguate with pl.lit or pl.col"
303
+ end
304
+
305
+ if structify
306
+ raise Todo
307
+ end
308
+
309
+ expr._rbexpr
310
+ end
261
311
  end
262
312
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.5.0"
3
+ VERSION = "0.6.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -6,11 +6,14 @@ rescue LoadError
6
6
  end
7
7
 
8
8
  # stdlib
9
+ require "bigdecimal"
9
10
  require "date"
10
11
  require "stringio"
11
12
 
12
13
  # modules
13
14
  require_relative "polars/expr_dispatch"
15
+ require_relative "polars/array_expr"
16
+ require_relative "polars/array_name_space"
14
17
  require_relative "polars/batched_csv_reader"
15
18
  require_relative "polars/binary_expr"
16
19
  require_relative "polars/binary_name_space"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-16 00:00:00.000000000 Z
11
+ date: 2023-07-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -28,6 +28,8 @@ files:
28
28
  - lib/polars/3.0/polars.bundle
29
29
  - lib/polars/3.1/polars.bundle
30
30
  - lib/polars/3.2/polars.bundle
31
+ - lib/polars/array_expr.rb
32
+ - lib/polars/array_name_space.rb
31
33
  - lib/polars/batched_csv_reader.rb
32
34
  - lib/polars/binary_expr.rb
33
35
  - lib/polars/binary_name_space.rb