polars-df 0.5.0-x86_64-darwin → 0.6.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Cargo.lock +337 -381
- data/LICENSE-THIRD-PARTY.txt +1032 -703
- data/README.md +4 -3
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/data_frame.rb +91 -49
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +76 -69
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +82 -30
- data/lib/polars/lazy_functions.rb +67 -31
- data/lib/polars/list_expr.rb +28 -28
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +70 -16
- data/lib/polars/string_expr.rb +137 -11
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/utils.rb +107 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +4 -2
@@ -10,6 +10,112 @@ module Polars
|
|
10
10
|
self._s = series._s
|
11
11
|
end
|
12
12
|
|
13
|
+
# Convert a Utf8 column into a Date column.
|
14
|
+
#
|
15
|
+
# @param format [String]
|
16
|
+
# Format to use for conversion. Refer to the
|
17
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
18
|
+
# for the full specification. Example: `"%Y-%m-%d"`.
|
19
|
+
# If set to nil (default), the format is inferred from the data.
|
20
|
+
# @param strict [Boolean]
|
21
|
+
# Raise an error if any conversion fails.
|
22
|
+
# @param exact [Boolean]
|
23
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
24
|
+
# in the target string.
|
25
|
+
# @param cache [Boolean]
|
26
|
+
# Use a cache of unique, converted dates to apply the conversion.
|
27
|
+
#
|
28
|
+
# @return [Series]
|
29
|
+
#
|
30
|
+
# @example
|
31
|
+
# s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
|
32
|
+
# s.str.to_date
|
33
|
+
# # =>
|
34
|
+
# # shape: (3,)
|
35
|
+
# # Series: '' [date]
|
36
|
+
# # [
|
37
|
+
# # 2020-01-01
|
38
|
+
# # 2020-02-01
|
39
|
+
# # 2020-03-01
|
40
|
+
# # ]
|
41
|
+
def to_date(format = nil, strict: true, exact: true, cache: true)
|
42
|
+
super
|
43
|
+
end
|
44
|
+
|
45
|
+
# Convert a Utf8 column into a Datetime column.
|
46
|
+
#
|
47
|
+
# @param format [String]
|
48
|
+
# Format to use for conversion. Refer to the
|
49
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
50
|
+
# for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
|
51
|
+
# If set to nil (default), the format is inferred from the data.
|
52
|
+
# @param time_unit ["us", "ns", "ms"]
|
53
|
+
# Unit of time for the resulting Datetime column. If set to nil (default),
|
54
|
+
# the time unit is inferred from the format string if given, eg:
|
55
|
+
# `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
|
56
|
+
# found, the default is `"us"`.
|
57
|
+
# @param time_zone [String]
|
58
|
+
# Time zone for the resulting Datetime column.
|
59
|
+
# @param strict [Boolean]
|
60
|
+
# Raise an error if any conversion fails.
|
61
|
+
# @param exact [Boolean]
|
62
|
+
# Require an exact format match. If false, allow the format to match anywhere
|
63
|
+
# in the target string.
|
64
|
+
# @param cache [Boolean]
|
65
|
+
# Use a cache of unique, converted datetimes to apply the conversion.
|
66
|
+
#
|
67
|
+
# @return [Series]
|
68
|
+
#
|
69
|
+
# @example
|
70
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
71
|
+
# s.str.to_datetime("%Y-%m-%d %H:%M%#z")
|
72
|
+
# # =>
|
73
|
+
# # shape: (2,)
|
74
|
+
# # Series: '' [datetime[μs, UTC]]
|
75
|
+
# # [
|
76
|
+
# # 2020-01-01 01:00:00 UTC
|
77
|
+
# # 2020-01-01 02:00:00 UTC
|
78
|
+
# # ]
|
79
|
+
def to_datetime(
|
80
|
+
format = nil,
|
81
|
+
time_unit: nil,
|
82
|
+
time_zone: nil,
|
83
|
+
strict: true,
|
84
|
+
exact: true,
|
85
|
+
cache: true
|
86
|
+
)
|
87
|
+
super
|
88
|
+
end
|
89
|
+
|
90
|
+
# Convert a Utf8 column into a Time column.
|
91
|
+
#
|
92
|
+
# @param format [String]
|
93
|
+
# Format to use for conversion. Refer to the
|
94
|
+
# [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
95
|
+
# for the full specification. Example: `"%H:%M:%S"`.
|
96
|
+
# If set to nil (default), the format is inferred from the data.
|
97
|
+
# @param strict [Boolean]
|
98
|
+
# Raise an error if any conversion fails.
|
99
|
+
# @param cache [Boolean]
|
100
|
+
# Use a cache of unique, converted times to apply the conversion.
|
101
|
+
#
|
102
|
+
# @return [Series]
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# s = Polars::Series.new(["01:00", "02:00", "03:00"])
|
106
|
+
# s.str.to_time("%H:%M")
|
107
|
+
# # =>
|
108
|
+
# # shape: (3,)
|
109
|
+
# # Series: '' [time]
|
110
|
+
# # [
|
111
|
+
# # 01:00:00
|
112
|
+
# # 02:00:00
|
113
|
+
# # 03:00:00
|
114
|
+
# # ]
|
115
|
+
def to_time(format = nil, strict: true, cache: true)
|
116
|
+
super
|
117
|
+
end
|
118
|
+
|
13
119
|
# Parse a Series of dtype Utf8 to a Date/Datetime Series.
|
14
120
|
#
|
15
121
|
# @param datatype [Symbol]
|
@@ -23,10 +129,23 @@ module Polars
|
|
23
129
|
# @param exact [Boolean]
|
24
130
|
# - If true, require an exact format match.
|
25
131
|
# - If false, allow the format to match anywhere in the target string.
|
132
|
+
# @param cache [Boolean]
|
133
|
+
# Use a cache of unique, converted dates to apply the datetime conversion.
|
26
134
|
#
|
27
135
|
# @return [Series]
|
28
136
|
#
|
29
|
-
# @example
|
137
|
+
# @example Dealing with a consistent format:
|
138
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
139
|
+
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
140
|
+
# # =>
|
141
|
+
# # shape: (2,)
|
142
|
+
# # Series: '' [datetime[μs, UTC]]
|
143
|
+
# # [
|
144
|
+
# # 2020-01-01 01:00:00 UTC
|
145
|
+
# # 2020-01-01 02:00:00 UTC
|
146
|
+
# # ]
|
147
|
+
#
|
148
|
+
# @example Dealing with different formats.
|
30
149
|
# s = Polars::Series.new(
|
31
150
|
# "date",
|
32
151
|
# [
|
@@ -36,28 +155,24 @@ module Polars
|
|
36
155
|
# "Sun Jul 8 00:34:60 2001"
|
37
156
|
# ]
|
38
157
|
# )
|
39
|
-
# s.to_frame.
|
40
|
-
# Polars.
|
41
|
-
# .str.strptime(Polars::Date, "%F", strict: false)
|
42
|
-
# .
|
43
|
-
#
|
44
|
-
# )
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# )
|
158
|
+
# s.to_frame.select(
|
159
|
+
# Polars.coalesce(
|
160
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
|
161
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
|
162
|
+
# Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
|
163
|
+
# Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
|
164
|
+
# )
|
165
|
+
# ).to_series
|
48
166
|
# # =>
|
49
|
-
# # shape: (4,
|
50
|
-
# #
|
51
|
-
# #
|
52
|
-
# #
|
53
|
-
# #
|
54
|
-
# #
|
55
|
-
# #
|
56
|
-
# #
|
57
|
-
|
58
|
-
# # │ 2001-07-08 │
|
59
|
-
# # └────────────┘
|
60
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
167
|
+
# # shape: (4,)
|
168
|
+
# # Series: 'date' [date]
|
169
|
+
# # [
|
170
|
+
# # 2021-04-22
|
171
|
+
# # 2022-01-04
|
172
|
+
# # 2022-01-31
|
173
|
+
# # 2001-07-08
|
174
|
+
# # ]
|
175
|
+
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
|
61
176
|
super
|
62
177
|
end
|
63
178
|
|
data/lib/polars/utils.rb
CHANGED
@@ -40,17 +40,23 @@ module Polars
|
|
40
40
|
td
|
41
41
|
end
|
42
42
|
|
43
|
-
def self._datetime_to_pl_timestamp(dt,
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
elsif
|
49
|
-
|
50
|
-
|
51
|
-
|
43
|
+
def self._datetime_to_pl_timestamp(dt, time_unit)
|
44
|
+
dt = dt.to_datetime.to_time
|
45
|
+
if time_unit == "ns"
|
46
|
+
nanos = dt.nsec
|
47
|
+
dt.to_i * 1_000_000_000 + nanos
|
48
|
+
elsif time_unit == "us"
|
49
|
+
micros = dt.usec
|
50
|
+
dt.to_i * 1_000_000 + micros
|
51
|
+
elsif time_unit == "ms"
|
52
|
+
millis = dt.usec / 1000
|
53
|
+
dt.to_i * 1_000 + millis
|
54
|
+
elsif time_unit.nil?
|
55
|
+
# Ruby has ns precision
|
56
|
+
nanos = dt.nsec
|
57
|
+
dt.to_i * 1_000_000_000 + nanos
|
52
58
|
else
|
53
|
-
raise ArgumentError, "
|
59
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
54
60
|
end
|
55
61
|
end
|
56
62
|
|
@@ -59,46 +65,56 @@ module Polars
|
|
59
65
|
dt.to_i / (3600 * 24)
|
60
66
|
end
|
61
67
|
|
62
|
-
def self.
|
63
|
-
if
|
64
|
-
|
65
|
-
# important to create from utc. Not doing this leads
|
66
|
-
# to inconsistencies dependent on the timezone you are in.
|
67
|
-
::Time.at(value * 86400).utc.to_date
|
68
|
-
# TODO fix dtype
|
69
|
-
elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
|
70
|
-
if tz.nil? || tz == ""
|
71
|
-
if tu == "ns"
|
72
|
-
raise Todo
|
73
|
-
elsif tu == "us"
|
74
|
-
dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
|
75
|
-
elsif tu == "ms"
|
76
|
-
raise Todo
|
77
|
-
else
|
78
|
-
raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
79
|
-
end
|
80
|
-
else
|
81
|
-
raise Todo
|
82
|
-
end
|
83
|
-
|
84
|
-
dt
|
68
|
+
def self._to_ruby_time(value)
|
69
|
+
if value == 0
|
70
|
+
::Time.utc(2000, 1, 1)
|
85
71
|
else
|
86
|
-
|
72
|
+
seconds, nanoseconds = value.divmod(1_000_000_000)
|
73
|
+
minutes, seconds = seconds.divmod(60)
|
74
|
+
hours, minutes = minutes.divmod(60)
|
75
|
+
::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
|
87
76
|
end
|
88
77
|
end
|
89
78
|
|
90
|
-
def self._to_ruby_duration(value,
|
91
|
-
if
|
79
|
+
def self._to_ruby_duration(value, time_unit = "ns")
|
80
|
+
if time_unit == "ns"
|
92
81
|
value / 1e9
|
93
|
-
elsif
|
82
|
+
elsif time_unit == "us"
|
94
83
|
value / 1e6
|
95
|
-
elsif
|
84
|
+
elsif time_unit == "ms"
|
96
85
|
value / 1e3
|
97
86
|
else
|
98
|
-
raise ArgumentError, "
|
87
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
99
88
|
end
|
100
89
|
end
|
101
90
|
|
91
|
+
def self._to_ruby_date(value)
|
92
|
+
# days to seconds
|
93
|
+
# important to create from utc. Not doing this leads
|
94
|
+
# to inconsistencies dependent on the timezone you are in.
|
95
|
+
::Time.at(value * 86400).utc.to_date
|
96
|
+
end
|
97
|
+
|
98
|
+
def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
|
99
|
+
if time_zone.nil? || time_zone == ""
|
100
|
+
if time_unit == "ns"
|
101
|
+
return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
|
102
|
+
elsif time_unit == "us"
|
103
|
+
return ::Time.at(value / 1000000, value % 1000000, :usec).utc
|
104
|
+
elsif time_unit == "ms"
|
105
|
+
return ::Time.at(value / 1000, value % 1000, :millisecond).utc
|
106
|
+
else
|
107
|
+
raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
|
108
|
+
end
|
109
|
+
else
|
110
|
+
raise Todo
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def self._to_ruby_decimal(digits, scale)
|
115
|
+
BigDecimal("#{digits}e#{scale}")
|
116
|
+
end
|
117
|
+
|
102
118
|
def self.selection_to_rbexpr_list(exprs)
|
103
119
|
if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
104
120
|
exprs = [exprs]
|
@@ -139,16 +155,27 @@ module Polars
|
|
139
155
|
data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
140
156
|
end
|
141
157
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
158
|
+
def self.map_rb_type_to_dtype(ruby_dtype)
|
159
|
+
if ruby_dtype == Float
|
160
|
+
Float64
|
161
|
+
elsif ruby_dtype == Integer
|
162
|
+
Int64
|
163
|
+
elsif ruby_dtype == String
|
164
|
+
Utf8
|
165
|
+
elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
|
166
|
+
Boolean
|
167
|
+
elsif ruby_dtype == DateTime || ruby_dtype == ::Time || (defined?(ActiveSupport::TimeWithZone) && ruby_dtype == ActiveSupport::TimeWithZone)
|
168
|
+
Datetime.new("ns")
|
169
|
+
elsif ruby_dtype == ::Date
|
170
|
+
Date
|
171
|
+
elsif ruby_dtype == ::Array
|
172
|
+
List
|
173
|
+
elsif ruby_dtype == NilClass
|
174
|
+
Null
|
175
|
+
else
|
176
|
+
raise TypeError, "Invalid type"
|
177
|
+
end
|
178
|
+
end
|
152
179
|
|
153
180
|
# TODO fix
|
154
181
|
def self.rb_type_to_dtype(data_type)
|
@@ -158,8 +185,8 @@ module Polars
|
|
158
185
|
end
|
159
186
|
|
160
187
|
begin
|
161
|
-
|
162
|
-
rescue
|
188
|
+
map_rb_type_to_dtype(data_type)
|
189
|
+
rescue TypeError
|
163
190
|
raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
|
164
191
|
end
|
165
192
|
end
|
@@ -228,35 +255,58 @@ module Polars
|
|
228
255
|
end
|
229
256
|
|
230
257
|
def self.is_bool_sequence(val)
|
231
|
-
val.is_a?(Array) && val.all? { |x| x == true || x == false }
|
258
|
+
val.is_a?(::Array) && val.all? { |x| x == true || x == false }
|
232
259
|
end
|
233
260
|
|
234
261
|
def self.is_dtype_sequence(val)
|
235
|
-
val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
|
262
|
+
val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
|
236
263
|
end
|
237
264
|
|
238
265
|
def self.is_int_sequence(val)
|
239
|
-
val.is_a?(Array) && _is_iterable_of(val, Integer)
|
266
|
+
val.is_a?(::Array) && _is_iterable_of(val, Integer)
|
240
267
|
end
|
241
268
|
|
242
269
|
def self.is_expr_sequence(val)
|
243
|
-
val.is_a?(Array) && _is_iterable_of(val, Expr)
|
270
|
+
val.is_a?(::Array) && _is_iterable_of(val, Expr)
|
244
271
|
end
|
245
272
|
|
246
273
|
def self.is_rbexpr_sequence(val)
|
247
|
-
val.is_a?(Array) && _is_iterable_of(val, RbExpr)
|
274
|
+
val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
|
248
275
|
end
|
249
276
|
|
250
277
|
def self.is_str_sequence(val, allow_str: false)
|
251
278
|
if allow_str == false && val.is_a?(String)
|
252
279
|
false
|
253
280
|
else
|
254
|
-
val.is_a?(Array) && _is_iterable_of(val, String)
|
281
|
+
val.is_a?(::Array) && _is_iterable_of(val, String)
|
255
282
|
end
|
256
283
|
end
|
257
284
|
|
258
285
|
def self.local_file?(file)
|
259
286
|
Dir.glob(file).any?
|
260
287
|
end
|
288
|
+
|
289
|
+
def self.parse_as_expression(input, str_as_lit: false, structify: false)
|
290
|
+
if input.is_a?(Expr)
|
291
|
+
expr = input
|
292
|
+
elsif input.is_a?(String) && !str_as_lit
|
293
|
+
expr = Polars.col(input)
|
294
|
+
structify = false
|
295
|
+
elsif [Integer, Float, String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
|
296
|
+
expr = Polars.lit(input)
|
297
|
+
structify = false
|
298
|
+
elsif input.is_a?(Array)
|
299
|
+
expr = Polars.lit(Polars::Series.new("", [input]))
|
300
|
+
structify = false
|
301
|
+
else
|
302
|
+
raise TypeError, "did not expect value #{input} of type #{input.class.name}, maybe disambiguate with pl.lit or pl.col"
|
303
|
+
end
|
304
|
+
|
305
|
+
if structify
|
306
|
+
raise Todo
|
307
|
+
end
|
308
|
+
|
309
|
+
expr._rbexpr
|
310
|
+
end
|
261
311
|
end
|
262
312
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -6,11 +6,14 @@ rescue LoadError
|
|
6
6
|
end
|
7
7
|
|
8
8
|
# stdlib
|
9
|
+
require "bigdecimal"
|
9
10
|
require "date"
|
10
11
|
require "stringio"
|
11
12
|
|
12
13
|
# modules
|
13
14
|
require_relative "polars/expr_dispatch"
|
15
|
+
require_relative "polars/array_expr"
|
16
|
+
require_relative "polars/array_name_space"
|
14
17
|
require_relative "polars/batched_csv_reader"
|
15
18
|
require_relative "polars/binary_expr"
|
16
19
|
require_relative "polars/binary_name_space"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-07-24 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -28,6 +28,8 @@ files:
|
|
28
28
|
- lib/polars/3.0/polars.bundle
|
29
29
|
- lib/polars/3.1/polars.bundle
|
30
30
|
- lib/polars/3.2/polars.bundle
|
31
|
+
- lib/polars/array_expr.rb
|
32
|
+
- lib/polars/array_name_space.rb
|
31
33
|
- lib/polars/batched_csv_reader.rb
|
32
34
|
- lib/polars/binary_expr.rb
|
33
35
|
- lib/polars/binary_name_space.rb
|