polars-df 0.4.0-x86_64-darwin → 0.5.0-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +272 -191
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +2043 -1202
- data/README.md +2 -2
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +201 -50
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/expr.rb +70 -10
- data/lib/polars/lazy_frame.rb +4 -3
- data/lib/polars/lazy_functions.rb +4 -1
- data/lib/polars/list_expr.rb +68 -19
- data/lib/polars/series.rb +181 -73
- data/lib/polars/string_expr.rb +149 -43
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +41 -7
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -2
- metadata +2 -2
data/README.md
CHANGED
@@ -58,9 +58,9 @@ Polars.scan_parquet("file.parquet")
|
|
58
58
|
From Active Record
|
59
59
|
|
60
60
|
```ruby
|
61
|
-
Polars.
|
61
|
+
Polars.read_database(User.all)
|
62
62
|
# or
|
63
|
-
Polars.
|
63
|
+
Polars.read_database("SELECT * FROM users")
|
64
64
|
```
|
65
65
|
|
66
66
|
From JSON
|
Binary file
|
Binary file
|
Binary file
|
data/lib/polars/convert.rb
CHANGED
@@ -26,8 +26,8 @@ module Polars
|
|
26
26
|
# # │ 1 ┆ 3 │
|
27
27
|
# # │ 2 ┆ 4 │
|
28
28
|
# # └─────┴─────┘
|
29
|
-
def from_hash(data, columns: nil)
|
30
|
-
DataFrame._from_hash(data,
|
29
|
+
def from_hash(data, schema: nil, columns: nil)
|
30
|
+
DataFrame._from_hash(data, schema: schema || columns)
|
31
31
|
end
|
32
32
|
|
33
33
|
# Construct a DataFrame from a sequence of dictionaries. This operation clones data.
|
data/lib/polars/data_frame.rb
CHANGED
@@ -18,7 +18,10 @@ module Polars
|
|
18
18
|
# Whether to interpret two-dimensional data as columns or as rows. If `nil`,
|
19
19
|
# the orientation is inferred by matching the columns and data dimensions. If
|
20
20
|
# this does not yield conclusive results, column orientation is used.
|
21
|
-
def initialize(data = nil, columns: nil, orient: nil)
|
21
|
+
def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
|
22
|
+
schema ||= columns
|
23
|
+
raise Todo if schema_overrides
|
24
|
+
|
22
25
|
# TODO deprecate in favor of read_sql
|
23
26
|
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
24
27
|
result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
|
@@ -29,14 +32,14 @@ module Polars
|
|
29
32
|
end
|
30
33
|
|
31
34
|
if data.nil?
|
32
|
-
self._df = self.class.hash_to_rbdf({},
|
35
|
+
self._df = self.class.hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
|
33
36
|
elsif data.is_a?(Hash)
|
34
37
|
data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
|
35
|
-
self._df = self.class.hash_to_rbdf(data,
|
38
|
+
self._df = self.class.hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, nan_to_null: nan_to_null)
|
36
39
|
elsif data.is_a?(Array)
|
37
|
-
self._df = self.class.sequence_to_rbdf(data,
|
40
|
+
self._df = self.class.sequence_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, orient: orient, infer_schema_length: infer_schema_length)
|
38
41
|
elsif data.is_a?(Series)
|
39
|
-
self._df = self.class.series_to_rbdf(data,
|
42
|
+
self._df = self.class.series_to_rbdf(data, schema: schema, schema_overrides: schema_overrides)
|
40
43
|
else
|
41
44
|
raise ArgumentError, "DataFrame constructor called with unsupported type; got #{data.class.name}"
|
42
45
|
end
|
@@ -56,8 +59,8 @@ module Polars
|
|
56
59
|
end
|
57
60
|
|
58
61
|
# @private
|
59
|
-
def self._from_hash(data,
|
60
|
-
_from_rbdf(hash_to_rbdf(data,
|
62
|
+
def self._from_hash(data, schema: nil, schema_overrides: nil)
|
63
|
+
_from_rbdf(hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides))
|
61
64
|
end
|
62
65
|
|
63
66
|
# def self._from_records
|
@@ -336,6 +339,7 @@ module Polars
|
|
336
339
|
end
|
337
340
|
alias_method :count, :height
|
338
341
|
alias_method :length, :height
|
342
|
+
alias_method :size, :height
|
339
343
|
|
340
344
|
# Get the width of the DataFrame.
|
341
345
|
#
|
@@ -546,6 +550,13 @@ module Polars
|
|
546
550
|
end
|
547
551
|
alias_method :inspect, :to_s
|
548
552
|
|
553
|
+
# Returns an array representing the DataFrame
|
554
|
+
#
|
555
|
+
# @return [Array]
|
556
|
+
def to_a
|
557
|
+
rows(named: true)
|
558
|
+
end
|
559
|
+
|
549
560
|
# Check if DataFrame includes column.
|
550
561
|
#
|
551
562
|
# @return [Boolean]
|
@@ -655,7 +666,7 @@ module Polars
|
|
655
666
|
end
|
656
667
|
|
657
668
|
# Ruby-specific
|
658
|
-
if item.is_a?(Expr)
|
669
|
+
if item.is_a?(Expr) || item.is_a?(Series)
|
659
670
|
return filter(item)
|
660
671
|
end
|
661
672
|
|
@@ -665,15 +676,42 @@ module Polars
|
|
665
676
|
# Set item.
|
666
677
|
#
|
667
678
|
# @return [Object]
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
679
|
+
def []=(*key, value)
|
680
|
+
if key.length == 1
|
681
|
+
key = key.first
|
682
|
+
elsif key.length != 2
|
683
|
+
raise ArgumentError, "wrong number of arguments (given #{key.length + 1}, expected 2..3)"
|
684
|
+
end
|
673
685
|
|
674
|
-
|
675
|
-
|
686
|
+
if Utils.strlike?(key)
|
687
|
+
if value.is_a?(Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
|
688
|
+
value = Series.new(value)
|
689
|
+
elsif !value.is_a?(Series)
|
690
|
+
value = Polars.lit(value)
|
691
|
+
end
|
692
|
+
self._df = with_column(value.alias(key.to_s))._df
|
693
|
+
elsif key.is_a?(Array)
|
694
|
+
row_selection, col_selection = key
|
676
695
|
|
696
|
+
if Utils.strlike?(col_selection)
|
697
|
+
s = self[col_selection]
|
698
|
+
elsif col_selection.is_a?(Integer)
|
699
|
+
raise Todo
|
700
|
+
else
|
701
|
+
raise ArgumentError, "column selection not understood: #{col_selection}"
|
702
|
+
end
|
703
|
+
|
704
|
+
s[row_selection] = value
|
705
|
+
|
706
|
+
if col_selection.is_a?(Integer)
|
707
|
+
replace_at_idx(col_selection, s)
|
708
|
+
elsif Utils.strlike?(col_selection)
|
709
|
+
replace(col_selection, s)
|
710
|
+
end
|
711
|
+
else
|
712
|
+
raise Todo
|
713
|
+
end
|
714
|
+
end
|
677
715
|
|
678
716
|
# Return the dataframe as a scalar.
|
679
717
|
#
|
@@ -1462,6 +1500,20 @@ module Polars
|
|
1462
1500
|
end
|
1463
1501
|
end
|
1464
1502
|
|
1503
|
+
# Sort the DataFrame by column in-place.
|
1504
|
+
#
|
1505
|
+
# @param by [String]
|
1506
|
+
# By which column to sort.
|
1507
|
+
# @param reverse [Boolean]
|
1508
|
+
# Reverse/descending sort.
|
1509
|
+
# @param nulls_last [Boolean]
|
1510
|
+
# Place null values last. Can only be used if sorted by a single column.
|
1511
|
+
#
|
1512
|
+
# @return [DataFrame]
|
1513
|
+
def sort!(by, reverse: false, nulls_last: false)
|
1514
|
+
self._df = sort(by, reverse: reverse, nulls_last: nulls_last)._df
|
1515
|
+
end
|
1516
|
+
|
1465
1517
|
# Check if DataFrame is equal to other.
|
1466
1518
|
#
|
1467
1519
|
# @param other [DataFrame]
|
@@ -1519,7 +1571,7 @@ module Polars
|
|
1519
1571
|
# # │ 30 ┆ 6 │
|
1520
1572
|
# # └─────┴─────┘
|
1521
1573
|
def replace(column, new_col)
|
1522
|
-
_df.replace(column, new_col._s)
|
1574
|
+
_df.replace(column.to_s, new_col._s)
|
1523
1575
|
self
|
1524
1576
|
end
|
1525
1577
|
|
@@ -1860,7 +1912,7 @@ module Polars
|
|
1860
1912
|
# "2020-01-08 23:16:43"
|
1861
1913
|
# ]
|
1862
1914
|
# df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
1863
|
-
# Polars.col("dt").str.strptime(
|
1915
|
+
# Polars.col("dt").str.strptime(Polars::Datetime)
|
1864
1916
|
# )
|
1865
1917
|
# df.groupby_rolling(index_column: "dt", period: "2d").agg(
|
1866
1918
|
# [
|
@@ -2791,6 +2843,16 @@ module Polars
|
|
2791
2843
|
Utils.wrap_s(_df.drop_in_place(name))
|
2792
2844
|
end
|
2793
2845
|
|
2846
|
+
# Drop in place if exists.
|
2847
|
+
#
|
2848
|
+
# @param name [Object]
|
2849
|
+
# Column to drop.
|
2850
|
+
#
|
2851
|
+
# @return [Series]
|
2852
|
+
def delete(name)
|
2853
|
+
drop_in_place(name) if include?(name)
|
2854
|
+
end
|
2855
|
+
|
2794
2856
|
# Create an empty copy of the current DataFrame.
|
2795
2857
|
#
|
2796
2858
|
# Returns a DataFrame with identical schema but no data.
|
@@ -3202,7 +3264,7 @@ module Polars
|
|
3202
3264
|
# # │ B ┆ 1 │
|
3203
3265
|
# # │ C ┆ 2 │
|
3204
3266
|
# # │ D ┆ 3 │
|
3205
|
-
# # │
|
3267
|
+
# # │ E ┆ 4 │
|
3206
3268
|
# # │ F ┆ 5 │
|
3207
3269
|
# # │ G ┆ 6 │
|
3208
3270
|
# # │ H ┆ 7 │
|
@@ -4428,7 +4490,7 @@ module Polars
|
|
4428
4490
|
end
|
4429
4491
|
end
|
4430
4492
|
|
4431
|
-
# Returns an iterator over the DataFrame of rows of
|
4493
|
+
# Returns an iterator over the DataFrame of rows of Ruby-native values.
|
4432
4494
|
#
|
4433
4495
|
# @param named [Boolean]
|
4434
4496
|
# Return hashes instead of arrays. The hashes are a mapping of
|
@@ -4489,6 +4551,24 @@ module Polars
|
|
4489
4551
|
end
|
4490
4552
|
end
|
4491
4553
|
|
4554
|
+
# Returns an iterator over the DataFrame of rows of Ruby-native values.
|
4555
|
+
#
|
4556
|
+
# @param named [Boolean]
|
4557
|
+
# Return hashes instead of arrays. The hashes are a mapping of
|
4558
|
+
# column name to row value. This is more expensive than returning an
|
4559
|
+
# array, but allows for accessing values by column name.
|
4560
|
+
# @param buffer_size [Integer]
|
4561
|
+
# Determines the number of rows that are buffered internally while iterating
|
4562
|
+
# over the data; you should only modify this in very specific cases where the
|
4563
|
+
# default value is determined not to be a good fit to your access pattern, as
|
4564
|
+
# the speedup from using the buffer is significant (~2-4x). Setting this
|
4565
|
+
# value to zero disables row buffering.
|
4566
|
+
#
|
4567
|
+
# @return [Object]
|
4568
|
+
def each_row(named: true, buffer_size: 500, &block)
|
4569
|
+
iter_rows(named: named, buffer_size: buffer_size, &block)
|
4570
|
+
end
|
4571
|
+
|
4492
4572
|
# Shrink DataFrame memory usage.
|
4493
4573
|
#
|
4494
4574
|
# Shrinks to fit the exact capacity needed to hold the data.
|
@@ -4742,20 +4822,63 @@ module Polars
|
|
4742
4822
|
end
|
4743
4823
|
|
4744
4824
|
# @private
|
4745
|
-
def self.
|
4746
|
-
|
4747
|
-
|
4825
|
+
def self.expand_hash_scalars(data, schema_overrides: nil, order: nil, nan_to_null: false)
|
4826
|
+
updated_data = {}
|
4827
|
+
unless data.empty?
|
4828
|
+
dtypes = schema_overrides || {}
|
4829
|
+
array_len = data.values.map { |val| Utils.arrlen(val) || 0 }.max
|
4830
|
+
if array_len > 0
|
4831
|
+
data.each do |name, val|
|
4832
|
+
dtype = dtypes[name]
|
4833
|
+
if val.is_a?(Hash) && dtype != Struct
|
4834
|
+
updated_data[name] = DataFrame.new(val).to_struct(name)
|
4835
|
+
elsif !Utils.arrlen(val).nil?
|
4836
|
+
updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
|
4837
|
+
elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
|
4838
|
+
dtype = Polars::Float64 if val.nil? && dtype.nil?
|
4839
|
+
updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
|
4840
|
+
else
|
4841
|
+
raise Todo
|
4842
|
+
end
|
4843
|
+
end
|
4844
|
+
elsif data.values.all? { |val| Utils.arrlen(val) == 0 }
|
4845
|
+
data.each do |name, val|
|
4846
|
+
updated_data[name] = Series.new(name, val, dtype: dtypes[name])
|
4847
|
+
end
|
4848
|
+
elsif data.values.all? { |val| Utils.arrlen(val).nil? }
|
4849
|
+
data.each do |name, val|
|
4850
|
+
updated_data[name] = Series.new(name, [val], dtype: dtypes[name])
|
4851
|
+
end
|
4852
|
+
end
|
4853
|
+
end
|
4854
|
+
updated_data
|
4855
|
+
end
|
4748
4856
|
|
4749
|
-
|
4750
|
-
|
4751
|
-
|
4752
|
-
|
4857
|
+
# @private
|
4858
|
+
def self.hash_to_rbdf(data, schema: nil, schema_overrides: nil, nan_to_null: nil)
|
4859
|
+
if schema.is_a?(Hash) && !data.empty?
|
4860
|
+
if !data.all? { |col, _| schema[col] }
|
4861
|
+
raise ArgumentError, "The given column-schema names do not match the data dictionary"
|
4753
4862
|
end
|
4754
|
-
|
4755
|
-
|
4863
|
+
|
4864
|
+
data = schema.to_h { |col| [col, data[col]] }
|
4865
|
+
end
|
4866
|
+
|
4867
|
+
column_names, schema_overrides = _unpack_schema(
|
4868
|
+
schema, lookup_names: data.keys, schema_overrides: schema_overrides
|
4869
|
+
)
|
4870
|
+
if column_names.empty?
|
4871
|
+
column_names = data.keys
|
4756
4872
|
end
|
4757
4873
|
|
4758
|
-
|
4874
|
+
if data.empty? && !schema_overrides.empty?
|
4875
|
+
data_series = column_names.map { |name| Series.new(name, [], dtype: schema_overrides[name], nan_to_null: nan_to_null)._s }
|
4876
|
+
else
|
4877
|
+
data_series = expand_hash_scalars(data, schema_overrides: schema_overrides, nan_to_null: nan_to_null).values.map(&:_s)
|
4878
|
+
end
|
4879
|
+
|
4880
|
+
data_series = _handle_columns_arg(data_series, columns: column_names, from_hash: true)
|
4881
|
+
RbDataFrame.new(data_series)
|
4759
4882
|
end
|
4760
4883
|
|
4761
4884
|
# @private
|
@@ -4764,14 +4887,12 @@ module Polars
|
|
4764
4887
|
end
|
4765
4888
|
|
4766
4889
|
# @private
|
4767
|
-
def self.
|
4768
|
-
|
4769
|
-
|
4770
|
-
if columns.is_a?(Hash)
|
4771
|
-
columns = columns.to_a
|
4890
|
+
def self._unpack_schema(schema, schema_overrides: nil, n_expected: nil, lookup_names: nil, include_overrides_in_columns: false)
|
4891
|
+
if schema.is_a?(Hash)
|
4892
|
+
schema = schema.to_a
|
4772
4893
|
end
|
4773
4894
|
column_names =
|
4774
|
-
(
|
4895
|
+
(schema || []).map.with_index do |col, i|
|
4775
4896
|
if col.is_a?(String)
|
4776
4897
|
col || "column_#{i}"
|
4777
4898
|
else
|
@@ -4784,21 +4905,38 @@ module Polars
|
|
4784
4905
|
# TODO zip_longest
|
4785
4906
|
lookup = column_names.zip(lookup_names || []).to_h
|
4786
4907
|
|
4787
|
-
|
4788
|
-
|
4789
|
-
(columns || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
|
4908
|
+
column_dtypes =
|
4909
|
+
(schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
|
4790
4910
|
[lookup[col[0]] || col[0], col[1]]
|
4791
4911
|
end
|
4792
|
-
|
4912
|
+
|
4913
|
+
if schema_overrides
|
4914
|
+
raise Todo
|
4915
|
+
end
|
4916
|
+
|
4917
|
+
column_dtypes.each do |col, dtype|
|
4918
|
+
if !Utils.is_polars_dtype(dtype, include_unknown: true) && !dtype.nil?
|
4919
|
+
column_dtypes[col] = Utils.rb_type_to_dtype(dtype)
|
4920
|
+
end
|
4921
|
+
end
|
4922
|
+
|
4923
|
+
[column_names, column_dtypes]
|
4793
4924
|
end
|
4794
4925
|
|
4795
|
-
def self._handle_columns_arg(data, columns: nil)
|
4796
|
-
if columns.nil?
|
4926
|
+
def self._handle_columns_arg(data, columns: nil, from_hash: false)
|
4927
|
+
if columns.nil? || columns.empty?
|
4797
4928
|
data
|
4798
4929
|
else
|
4799
4930
|
if data.empty?
|
4800
4931
|
columns.map { |c| Series.new(c, nil)._s }
|
4801
4932
|
elsif data.length == columns.length
|
4933
|
+
if from_hash
|
4934
|
+
series_map = data.to_h { |s| [s.name, s] }
|
4935
|
+
if columns.all? { |col| series_map.key?(col) }
|
4936
|
+
return columns.map { |col| series_map[col] }
|
4937
|
+
end
|
4938
|
+
end
|
4939
|
+
|
4802
4940
|
columns.each_with_index do |c, i|
|
4803
4941
|
# not in-place?
|
4804
4942
|
data[i].rename(c)
|
@@ -4813,7 +4951,7 @@ module Polars
|
|
4813
4951
|
def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
|
4814
4952
|
rbdf_columns = rbdf.columns
|
4815
4953
|
rbdf_dtypes = rbdf.dtypes
|
4816
|
-
columns, dtypes =
|
4954
|
+
columns, dtypes = _unpack_schema(
|
4817
4955
|
(columns || rbdf_columns), schema_overrides: schema_overrides
|
4818
4956
|
)
|
4819
4957
|
column_subset = []
|
@@ -4851,20 +4989,23 @@ module Polars
|
|
4851
4989
|
end
|
4852
4990
|
|
4853
4991
|
# @private
|
4854
|
-
def self.sequence_to_rbdf(data,
|
4992
|
+
def self.sequence_to_rbdf(data, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 50)
|
4993
|
+
raise Todo if schema_overrides
|
4994
|
+
columns = schema
|
4995
|
+
|
4855
4996
|
if data.length == 0
|
4856
|
-
return hash_to_rbdf({},
|
4997
|
+
return hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
|
4857
4998
|
end
|
4858
4999
|
|
4859
5000
|
if data[0].is_a?(Series)
|
4860
5001
|
# series_names = data.map(&:name)
|
4861
|
-
# columns, dtypes =
|
5002
|
+
# columns, dtypes = _unpack_schema(columns || series_names, n_expected: data.length)
|
4862
5003
|
data_series = []
|
4863
5004
|
data.each do |s|
|
4864
5005
|
data_series << s._s
|
4865
5006
|
end
|
4866
5007
|
elsif data[0].is_a?(Hash)
|
4867
|
-
column_names, dtypes =
|
5008
|
+
column_names, dtypes = _unpack_schema(columns)
|
4868
5009
|
schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
|
4869
5010
|
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
|
4870
5011
|
if column_names
|
@@ -4890,11 +5031,21 @@ module Polars
|
|
4890
5031
|
end
|
4891
5032
|
|
4892
5033
|
# @private
|
4893
|
-
def self.series_to_rbdf(data,
|
4894
|
-
|
4895
|
-
|
5034
|
+
def self.series_to_rbdf(data, schema: nil, schema_overrides: nil)
|
5035
|
+
data_series = [data._s]
|
5036
|
+
series_name = data_series.map(&:name)
|
5037
|
+
column_names, schema_overrides = _unpack_schema(
|
5038
|
+
schema || series_name, schema_overrides: schema_overrides, n_expected: 1
|
5039
|
+
)
|
5040
|
+
if schema_overrides.any?
|
5041
|
+
new_dtype = schema_overrides.values[0]
|
5042
|
+
if new_dtype != data.dtype
|
5043
|
+
data_series[0] = data_series[0].cast(new_dtype, true)
|
5044
|
+
end
|
4896
5045
|
end
|
4897
|
-
|
5046
|
+
|
5047
|
+
data_series = _handle_columns_arg(data_series, columns: column_names)
|
5048
|
+
RbDataFrame.new(data_series)
|
4898
5049
|
end
|
4899
5050
|
|
4900
5051
|
def wrap_ldf(ldf)
|
data/lib/polars/data_types.rb
CHANGED
@@ -84,20 +84,22 @@ module Polars
|
|
84
84
|
|
85
85
|
# Calendar date and time type.
|
86
86
|
class Datetime < TemporalType
|
87
|
-
attr_reader :
|
87
|
+
attr_reader :time_unit, :time_zone
|
88
|
+
alias_method :tu, :time_unit
|
88
89
|
|
89
90
|
def initialize(time_unit = "us", time_zone = nil)
|
90
|
-
@
|
91
|
+
@time_unit = time_unit || "us"
|
91
92
|
@time_zone = time_zone
|
92
93
|
end
|
93
94
|
end
|
94
95
|
|
95
96
|
# Time duration/delta type.
|
96
97
|
class Duration < TemporalType
|
97
|
-
attr_reader :
|
98
|
+
attr_reader :time_unit
|
99
|
+
alias_method :tu, :time_unit
|
98
100
|
|
99
101
|
def initialize(time_unit = "us")
|
100
|
-
@
|
102
|
+
@time_unit = time_unit
|
101
103
|
end
|
102
104
|
end
|
103
105
|
|
@@ -218,6 +218,25 @@ module Polars
|
|
218
218
|
)
|
219
219
|
end
|
220
220
|
|
221
|
+
# Create a naive Datetime from an existing Date/Datetime expression and a Time.
|
222
|
+
#
|
223
|
+
# If the underlying expression is a Datetime then its time component is replaced,
|
224
|
+
# and if it is a Date then a new Datetime is created by combining the two values.
|
225
|
+
#
|
226
|
+
# @param time [Object]
|
227
|
+
# A Ruby time literal or Polars expression/column that resolves to a time.
|
228
|
+
# @param time_unit ["ns", "us", "ms"]
|
229
|
+
# Unit of time.
|
230
|
+
#
|
231
|
+
# @return [Expr]
|
232
|
+
def combine(time, time_unit: "us")
|
233
|
+
unless time.is_a?(Time) || time.is_a?(Expr)
|
234
|
+
raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
|
235
|
+
end
|
236
|
+
time = Utils.expr_to_lit_or_expr(time)
|
237
|
+
Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
|
238
|
+
end
|
239
|
+
|
221
240
|
# Format Date/datetime with a formatting rule.
|
222
241
|
#
|
223
242
|
# See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
|
@@ -270,6 +289,34 @@ module Polars
|
|
270
289
|
Utils.wrap_expr(_rbexpr.year)
|
271
290
|
end
|
272
291
|
|
292
|
+
# Determine whether the year of the underlying date is a leap year.
|
293
|
+
#
|
294
|
+
# Applies to Date and Datetime columns.
|
295
|
+
#
|
296
|
+
# @return [Expr]
|
297
|
+
#
|
298
|
+
# @example
|
299
|
+
# start = DateTime.new(2000, 1, 1)
|
300
|
+
# stop = DateTime.new(2002, 1, 1)
|
301
|
+
# df = Polars::DataFrame.new(
|
302
|
+
# {"date" => Polars.date_range(start, stop, "1y")}
|
303
|
+
# )
|
304
|
+
# df.select(Polars.col("date").dt.is_leap_year)
|
305
|
+
# # =>
|
306
|
+
# # shape: (3, 1)
|
307
|
+
# # ┌───────┐
|
308
|
+
# # │ date │
|
309
|
+
# # │ --- │
|
310
|
+
# # │ bool │
|
311
|
+
# # ╞═══════╡
|
312
|
+
# # │ true │
|
313
|
+
# # │ false │
|
314
|
+
# # │ false │
|
315
|
+
# # └───────┘
|
316
|
+
def is_leap_year
|
317
|
+
Utils.wrap_expr(_rbexpr.dt_is_leap_year)
|
318
|
+
end
|
319
|
+
|
273
320
|
# Extract ISO year from underlying Date representation.
|
274
321
|
#
|
275
322
|
# Applies to Date and Datetime columns.
|
@@ -550,6 +597,27 @@ module Polars
|
|
550
597
|
Utils.wrap_expr(_rbexpr.ordinal_day)
|
551
598
|
end
|
552
599
|
|
600
|
+
# Time
|
601
|
+
#
|
602
|
+
# @return [Expr]
|
603
|
+
def time
|
604
|
+
Utils.wrap_expr(_rbexpr.dt_time)
|
605
|
+
end
|
606
|
+
|
607
|
+
# Date
|
608
|
+
#
|
609
|
+
# @return [Expr]
|
610
|
+
def date
|
611
|
+
Utils.wrap_expr(_rbexpr.dt_date)
|
612
|
+
end
|
613
|
+
|
614
|
+
# Datetime
|
615
|
+
#
|
616
|
+
# @return [Expr]
|
617
|
+
def datetime
|
618
|
+
Utils.wrap_expr(_rbexpr.dt_datetime)
|
619
|
+
end
|
620
|
+
|
553
621
|
# Extract hour from underlying DateTime representation.
|
554
622
|
#
|
555
623
|
# Applies to Datetime columns.
|
@@ -958,8 +1026,8 @@ module Polars
|
|
958
1026
|
# Time zone for the `Datetime` Series.
|
959
1027
|
#
|
960
1028
|
# @return [Expr]
|
961
|
-
def replace_time_zone(tz)
|
962
|
-
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
|
1029
|
+
def replace_time_zone(tz, use_earliest: nil)
|
1030
|
+
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
|
963
1031
|
end
|
964
1032
|
|
965
1033
|
# Localize tz-naive Datetime Series to tz-aware Datetime Series.
|
@@ -1282,5 +1350,77 @@ module Polars
|
|
1282
1350
|
def offset_by(by)
|
1283
1351
|
Utils.wrap_expr(_rbexpr.dt_offset_by(by))
|
1284
1352
|
end
|
1353
|
+
|
1354
|
+
# Roll backward to the first day of the month.
|
1355
|
+
#
|
1356
|
+
# @return [Expr]
|
1357
|
+
#
|
1358
|
+
# @example
|
1359
|
+
# df = Polars::DataFrame.new(
|
1360
|
+
# {
|
1361
|
+
# "dates" => Polars.date_range(
|
1362
|
+
# DateTime.new(2000, 1, 15, 2),
|
1363
|
+
# DateTime.new(2000, 12, 15, 2),
|
1364
|
+
# "1mo"
|
1365
|
+
# )
|
1366
|
+
# }
|
1367
|
+
# )
|
1368
|
+
# df.select(Polars.col("dates").dt.month_start)
|
1369
|
+
# # =>
|
1370
|
+
# # shape: (12, 1)
|
1371
|
+
# # ┌─────────────────────┐
|
1372
|
+
# # │ dates │
|
1373
|
+
# # │ --- │
|
1374
|
+
# # │ datetime[μs] │
|
1375
|
+
# # ╞═════════════════════╡
|
1376
|
+
# # │ 2000-01-01 02:00:00 │
|
1377
|
+
# # │ 2000-02-01 02:00:00 │
|
1378
|
+
# # │ 2000-03-01 02:00:00 │
|
1379
|
+
# # │ 2000-04-01 02:00:00 │
|
1380
|
+
# # │ … │
|
1381
|
+
# # │ 2000-09-01 02:00:00 │
|
1382
|
+
# # │ 2000-10-01 02:00:00 │
|
1383
|
+
# # │ 2000-11-01 02:00:00 │
|
1384
|
+
# # │ 2000-12-01 02:00:00 │
|
1385
|
+
# # └─────────────────────┘
|
1386
|
+
def month_start
|
1387
|
+
Utils.wrap_expr(_rbexpr.dt_month_start)
|
1388
|
+
end
|
1389
|
+
|
1390
|
+
# Roll forward to the last day of the month.
|
1391
|
+
#
|
1392
|
+
# @return [Expr]
|
1393
|
+
#
|
1394
|
+
# @example
|
1395
|
+
# df = Polars::DataFrame.new(
|
1396
|
+
# {
|
1397
|
+
# "dates" => Polars.date_range(
|
1398
|
+
# DateTime.new(2000, 1, 15, 2),
|
1399
|
+
# DateTime.new(2000, 12, 15, 2),
|
1400
|
+
# "1mo"
|
1401
|
+
# )
|
1402
|
+
# }
|
1403
|
+
# )
|
1404
|
+
# df.select(Polars.col("dates").dt.month_end)
|
1405
|
+
# # =>
|
1406
|
+
# # shape: (12, 1)
|
1407
|
+
# # ┌─────────────────────┐
|
1408
|
+
# # │ dates │
|
1409
|
+
# # │ --- │
|
1410
|
+
# # │ datetime[μs] │
|
1411
|
+
# # ╞═════════════════════╡
|
1412
|
+
# # │ 2000-01-31 02:00:00 │
|
1413
|
+
# # │ 2000-02-29 02:00:00 │
|
1414
|
+
# # │ 2000-03-31 02:00:00 │
|
1415
|
+
# # │ 2000-04-30 02:00:00 │
|
1416
|
+
# # │ … │
|
1417
|
+
# # │ 2000-09-30 02:00:00 │
|
1418
|
+
# # │ 2000-10-31 02:00:00 │
|
1419
|
+
# # │ 2000-11-30 02:00:00 │
|
1420
|
+
# # │ 2000-12-31 02:00:00 │
|
1421
|
+
# # └─────────────────────┘
|
1422
|
+
def month_end
|
1423
|
+
Utils.wrap_expr(_rbexpr.dt_month_end)
|
1424
|
+
end
|
1285
1425
|
end
|
1286
1426
|
end
|