polars-df 0.4.0-x86_64-linux → 0.5.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +272 -191
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +1990 -1149
- data/README.md +2 -2
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +201 -50
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/expr.rb +70 -10
- data/lib/polars/lazy_frame.rb +4 -3
- data/lib/polars/lazy_functions.rb +4 -1
- data/lib/polars/list_expr.rb +68 -19
- data/lib/polars/series.rb +181 -73
- data/lib/polars/string_expr.rb +149 -43
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +41 -7
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -2
- metadata +2 -2
data/README.md
CHANGED
@@ -58,9 +58,9 @@ Polars.scan_parquet("file.parquet")
|
|
58
58
|
From Active Record
|
59
59
|
|
60
60
|
```ruby
|
61
|
-
Polars.
|
61
|
+
Polars.read_database(User.all)
|
62
62
|
# or
|
63
|
-
Polars.
|
63
|
+
Polars.read_database("SELECT * FROM users")
|
64
64
|
```
|
65
65
|
|
66
66
|
From JSON
|
data/lib/polars/3.0/polars.so
CHANGED
Binary file
|
data/lib/polars/3.1/polars.so
CHANGED
Binary file
|
data/lib/polars/3.2/polars.so
CHANGED
Binary file
|
data/lib/polars/convert.rb
CHANGED
@@ -26,8 +26,8 @@ module Polars
|
|
26
26
|
# # │ 1 ┆ 3 │
|
27
27
|
# # │ 2 ┆ 4 │
|
28
28
|
# # └─────┴─────┘
|
29
|
-
def from_hash(data, columns: nil)
|
30
|
-
DataFrame._from_hash(data,
|
29
|
+
def from_hash(data, schema: nil, columns: nil)
|
30
|
+
DataFrame._from_hash(data, schema: schema || columns)
|
31
31
|
end
|
32
32
|
|
33
33
|
# Construct a DataFrame from a sequence of dictionaries. This operation clones data.
|
data/lib/polars/data_frame.rb
CHANGED
@@ -18,7 +18,10 @@ module Polars
|
|
18
18
|
# Whether to interpret two-dimensional data as columns or as rows. If `nil`,
|
19
19
|
# the orientation is inferred by matching the columns and data dimensions. If
|
20
20
|
# this does not yield conclusive results, column orientation is used.
|
21
|
-
def initialize(data = nil, columns: nil, orient: nil)
|
21
|
+
def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
|
22
|
+
schema ||= columns
|
23
|
+
raise Todo if schema_overrides
|
24
|
+
|
22
25
|
# TODO deprecate in favor of read_sql
|
23
26
|
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
24
27
|
result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
|
@@ -29,14 +32,14 @@ module Polars
|
|
29
32
|
end
|
30
33
|
|
31
34
|
if data.nil?
|
32
|
-
self._df = self.class.hash_to_rbdf({},
|
35
|
+
self._df = self.class.hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
|
33
36
|
elsif data.is_a?(Hash)
|
34
37
|
data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
|
35
|
-
self._df = self.class.hash_to_rbdf(data,
|
38
|
+
self._df = self.class.hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, nan_to_null: nan_to_null)
|
36
39
|
elsif data.is_a?(Array)
|
37
|
-
self._df = self.class.sequence_to_rbdf(data,
|
40
|
+
self._df = self.class.sequence_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, orient: orient, infer_schema_length: infer_schema_length)
|
38
41
|
elsif data.is_a?(Series)
|
39
|
-
self._df = self.class.series_to_rbdf(data,
|
42
|
+
self._df = self.class.series_to_rbdf(data, schema: schema, schema_overrides: schema_overrides)
|
40
43
|
else
|
41
44
|
raise ArgumentError, "DataFrame constructor called with unsupported type; got #{data.class.name}"
|
42
45
|
end
|
@@ -56,8 +59,8 @@ module Polars
|
|
56
59
|
end
|
57
60
|
|
58
61
|
# @private
|
59
|
-
def self._from_hash(data,
|
60
|
-
_from_rbdf(hash_to_rbdf(data,
|
62
|
+
def self._from_hash(data, schema: nil, schema_overrides: nil)
|
63
|
+
_from_rbdf(hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides))
|
61
64
|
end
|
62
65
|
|
63
66
|
# def self._from_records
|
@@ -336,6 +339,7 @@ module Polars
|
|
336
339
|
end
|
337
340
|
alias_method :count, :height
|
338
341
|
alias_method :length, :height
|
342
|
+
alias_method :size, :height
|
339
343
|
|
340
344
|
# Get the width of the DataFrame.
|
341
345
|
#
|
@@ -546,6 +550,13 @@ module Polars
|
|
546
550
|
end
|
547
551
|
alias_method :inspect, :to_s
|
548
552
|
|
553
|
+
# Returns an array representing the DataFrame
|
554
|
+
#
|
555
|
+
# @return [Array]
|
556
|
+
def to_a
|
557
|
+
rows(named: true)
|
558
|
+
end
|
559
|
+
|
549
560
|
# Check if DataFrame includes column.
|
550
561
|
#
|
551
562
|
# @return [Boolean]
|
@@ -655,7 +666,7 @@ module Polars
|
|
655
666
|
end
|
656
667
|
|
657
668
|
# Ruby-specific
|
658
|
-
if item.is_a?(Expr)
|
669
|
+
if item.is_a?(Expr) || item.is_a?(Series)
|
659
670
|
return filter(item)
|
660
671
|
end
|
661
672
|
|
@@ -665,15 +676,42 @@ module Polars
|
|
665
676
|
# Set item.
|
666
677
|
#
|
667
678
|
# @return [Object]
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
679
|
+
def []=(*key, value)
|
680
|
+
if key.length == 1
|
681
|
+
key = key.first
|
682
|
+
elsif key.length != 2
|
683
|
+
raise ArgumentError, "wrong number of arguments (given #{key.length + 1}, expected 2..3)"
|
684
|
+
end
|
673
685
|
|
674
|
-
|
675
|
-
|
686
|
+
if Utils.strlike?(key)
|
687
|
+
if value.is_a?(Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
|
688
|
+
value = Series.new(value)
|
689
|
+
elsif !value.is_a?(Series)
|
690
|
+
value = Polars.lit(value)
|
691
|
+
end
|
692
|
+
self._df = with_column(value.alias(key.to_s))._df
|
693
|
+
elsif key.is_a?(Array)
|
694
|
+
row_selection, col_selection = key
|
676
695
|
|
696
|
+
if Utils.strlike?(col_selection)
|
697
|
+
s = self[col_selection]
|
698
|
+
elsif col_selection.is_a?(Integer)
|
699
|
+
raise Todo
|
700
|
+
else
|
701
|
+
raise ArgumentError, "column selection not understood: #{col_selection}"
|
702
|
+
end
|
703
|
+
|
704
|
+
s[row_selection] = value
|
705
|
+
|
706
|
+
if col_selection.is_a?(Integer)
|
707
|
+
replace_at_idx(col_selection, s)
|
708
|
+
elsif Utils.strlike?(col_selection)
|
709
|
+
replace(col_selection, s)
|
710
|
+
end
|
711
|
+
else
|
712
|
+
raise Todo
|
713
|
+
end
|
714
|
+
end
|
677
715
|
|
678
716
|
# Return the dataframe as a scalar.
|
679
717
|
#
|
@@ -1462,6 +1500,20 @@ module Polars
|
|
1462
1500
|
end
|
1463
1501
|
end
|
1464
1502
|
|
1503
|
+
# Sort the DataFrame by column in-place.
|
1504
|
+
#
|
1505
|
+
# @param by [String]
|
1506
|
+
# By which column to sort.
|
1507
|
+
# @param reverse [Boolean]
|
1508
|
+
# Reverse/descending sort.
|
1509
|
+
# @param nulls_last [Boolean]
|
1510
|
+
# Place null values last. Can only be used if sorted by a single column.
|
1511
|
+
#
|
1512
|
+
# @return [DataFrame]
|
1513
|
+
def sort!(by, reverse: false, nulls_last: false)
|
1514
|
+
self._df = sort(by, reverse: reverse, nulls_last: nulls_last)._df
|
1515
|
+
end
|
1516
|
+
|
1465
1517
|
# Check if DataFrame is equal to other.
|
1466
1518
|
#
|
1467
1519
|
# @param other [DataFrame]
|
@@ -1519,7 +1571,7 @@ module Polars
|
|
1519
1571
|
# # │ 30 ┆ 6 │
|
1520
1572
|
# # └─────┴─────┘
|
1521
1573
|
def replace(column, new_col)
|
1522
|
-
_df.replace(column, new_col._s)
|
1574
|
+
_df.replace(column.to_s, new_col._s)
|
1523
1575
|
self
|
1524
1576
|
end
|
1525
1577
|
|
@@ -1860,7 +1912,7 @@ module Polars
|
|
1860
1912
|
# "2020-01-08 23:16:43"
|
1861
1913
|
# ]
|
1862
1914
|
# df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
1863
|
-
# Polars.col("dt").str.strptime(
|
1915
|
+
# Polars.col("dt").str.strptime(Polars::Datetime)
|
1864
1916
|
# )
|
1865
1917
|
# df.groupby_rolling(index_column: "dt", period: "2d").agg(
|
1866
1918
|
# [
|
@@ -2791,6 +2843,16 @@ module Polars
|
|
2791
2843
|
Utils.wrap_s(_df.drop_in_place(name))
|
2792
2844
|
end
|
2793
2845
|
|
2846
|
+
# Drop in place if exists.
|
2847
|
+
#
|
2848
|
+
# @param name [Object]
|
2849
|
+
# Column to drop.
|
2850
|
+
#
|
2851
|
+
# @return [Series]
|
2852
|
+
def delete(name)
|
2853
|
+
drop_in_place(name) if include?(name)
|
2854
|
+
end
|
2855
|
+
|
2794
2856
|
# Create an empty copy of the current DataFrame.
|
2795
2857
|
#
|
2796
2858
|
# Returns a DataFrame with identical schema but no data.
|
@@ -3202,7 +3264,7 @@ module Polars
|
|
3202
3264
|
# # │ B ┆ 1 │
|
3203
3265
|
# # │ C ┆ 2 │
|
3204
3266
|
# # │ D ┆ 3 │
|
3205
|
-
# # │
|
3267
|
+
# # │ E ┆ 4 │
|
3206
3268
|
# # │ F ┆ 5 │
|
3207
3269
|
# # │ G ┆ 6 │
|
3208
3270
|
# # │ H ┆ 7 │
|
@@ -4428,7 +4490,7 @@ module Polars
|
|
4428
4490
|
end
|
4429
4491
|
end
|
4430
4492
|
|
4431
|
-
# Returns an iterator over the DataFrame of rows of
|
4493
|
+
# Returns an iterator over the DataFrame of rows of Ruby-native values.
|
4432
4494
|
#
|
4433
4495
|
# @param named [Boolean]
|
4434
4496
|
# Return hashes instead of arrays. The hashes are a mapping of
|
@@ -4489,6 +4551,24 @@ module Polars
|
|
4489
4551
|
end
|
4490
4552
|
end
|
4491
4553
|
|
4554
|
+
# Returns an iterator over the DataFrame of rows of Ruby-native values.
|
4555
|
+
#
|
4556
|
+
# @param named [Boolean]
|
4557
|
+
# Return hashes instead of arrays. The hashes are a mapping of
|
4558
|
+
# column name to row value. This is more expensive than returning an
|
4559
|
+
# array, but allows for accessing values by column name.
|
4560
|
+
# @param buffer_size [Integer]
|
4561
|
+
# Determines the number of rows that are buffered internally while iterating
|
4562
|
+
# over the data; you should only modify this in very specific cases where the
|
4563
|
+
# default value is determined not to be a good fit to your access pattern, as
|
4564
|
+
# the speedup from using the buffer is significant (~2-4x). Setting this
|
4565
|
+
# value to zero disables row buffering.
|
4566
|
+
#
|
4567
|
+
# @return [Object]
|
4568
|
+
def each_row(named: true, buffer_size: 500, &block)
|
4569
|
+
iter_rows(named: named, buffer_size: buffer_size, &block)
|
4570
|
+
end
|
4571
|
+
|
4492
4572
|
# Shrink DataFrame memory usage.
|
4493
4573
|
#
|
4494
4574
|
# Shrinks to fit the exact capacity needed to hold the data.
|
@@ -4742,20 +4822,63 @@ module Polars
|
|
4742
4822
|
end
|
4743
4823
|
|
4744
4824
|
# @private
|
4745
|
-
def self.
|
4746
|
-
|
4747
|
-
|
4825
|
+
def self.expand_hash_scalars(data, schema_overrides: nil, order: nil, nan_to_null: false)
|
4826
|
+
updated_data = {}
|
4827
|
+
unless data.empty?
|
4828
|
+
dtypes = schema_overrides || {}
|
4829
|
+
array_len = data.values.map { |val| Utils.arrlen(val) || 0 }.max
|
4830
|
+
if array_len > 0
|
4831
|
+
data.each do |name, val|
|
4832
|
+
dtype = dtypes[name]
|
4833
|
+
if val.is_a?(Hash) && dtype != Struct
|
4834
|
+
updated_data[name] = DataFrame.new(val).to_struct(name)
|
4835
|
+
elsif !Utils.arrlen(val).nil?
|
4836
|
+
updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
|
4837
|
+
elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
|
4838
|
+
dtype = Polars::Float64 if val.nil? && dtype.nil?
|
4839
|
+
updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
|
4840
|
+
else
|
4841
|
+
raise Todo
|
4842
|
+
end
|
4843
|
+
end
|
4844
|
+
elsif data.values.all? { |val| Utils.arrlen(val) == 0 }
|
4845
|
+
data.each do |name, val|
|
4846
|
+
updated_data[name] = Series.new(name, val, dtype: dtypes[name])
|
4847
|
+
end
|
4848
|
+
elsif data.values.all? { |val| Utils.arrlen(val).nil? }
|
4849
|
+
data.each do |name, val|
|
4850
|
+
updated_data[name] = Series.new(name, [val], dtype: dtypes[name])
|
4851
|
+
end
|
4852
|
+
end
|
4853
|
+
end
|
4854
|
+
updated_data
|
4855
|
+
end
|
4748
4856
|
|
4749
|
-
|
4750
|
-
|
4751
|
-
|
4752
|
-
|
4857
|
+
# @private
|
4858
|
+
def self.hash_to_rbdf(data, schema: nil, schema_overrides: nil, nan_to_null: nil)
|
4859
|
+
if schema.is_a?(Hash) && !data.empty?
|
4860
|
+
if !data.all? { |col, _| schema[col] }
|
4861
|
+
raise ArgumentError, "The given column-schema names do not match the data dictionary"
|
4753
4862
|
end
|
4754
|
-
|
4755
|
-
|
4863
|
+
|
4864
|
+
data = schema.to_h { |col| [col, data[col]] }
|
4865
|
+
end
|
4866
|
+
|
4867
|
+
column_names, schema_overrides = _unpack_schema(
|
4868
|
+
schema, lookup_names: data.keys, schema_overrides: schema_overrides
|
4869
|
+
)
|
4870
|
+
if column_names.empty?
|
4871
|
+
column_names = data.keys
|
4756
4872
|
end
|
4757
4873
|
|
4758
|
-
|
4874
|
+
if data.empty? && !schema_overrides.empty?
|
4875
|
+
data_series = column_names.map { |name| Series.new(name, [], dtype: schema_overrides[name], nan_to_null: nan_to_null)._s }
|
4876
|
+
else
|
4877
|
+
data_series = expand_hash_scalars(data, schema_overrides: schema_overrides, nan_to_null: nan_to_null).values.map(&:_s)
|
4878
|
+
end
|
4879
|
+
|
4880
|
+
data_series = _handle_columns_arg(data_series, columns: column_names, from_hash: true)
|
4881
|
+
RbDataFrame.new(data_series)
|
4759
4882
|
end
|
4760
4883
|
|
4761
4884
|
# @private
|
@@ -4764,14 +4887,12 @@ module Polars
|
|
4764
4887
|
end
|
4765
4888
|
|
4766
4889
|
# @private
|
4767
|
-
def self.
|
4768
|
-
|
4769
|
-
|
4770
|
-
if columns.is_a?(Hash)
|
4771
|
-
columns = columns.to_a
|
4890
|
+
def self._unpack_schema(schema, schema_overrides: nil, n_expected: nil, lookup_names: nil, include_overrides_in_columns: false)
|
4891
|
+
if schema.is_a?(Hash)
|
4892
|
+
schema = schema.to_a
|
4772
4893
|
end
|
4773
4894
|
column_names =
|
4774
|
-
(
|
4895
|
+
(schema || []).map.with_index do |col, i|
|
4775
4896
|
if col.is_a?(String)
|
4776
4897
|
col || "column_#{i}"
|
4777
4898
|
else
|
@@ -4784,21 +4905,38 @@ module Polars
|
|
4784
4905
|
# TODO zip_longest
|
4785
4906
|
lookup = column_names.zip(lookup_names || []).to_h
|
4786
4907
|
|
4787
|
-
|
4788
|
-
|
4789
|
-
(columns || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
|
4908
|
+
column_dtypes =
|
4909
|
+
(schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
|
4790
4910
|
[lookup[col[0]] || col[0], col[1]]
|
4791
4911
|
end
|
4792
|
-
|
4912
|
+
|
4913
|
+
if schema_overrides
|
4914
|
+
raise Todo
|
4915
|
+
end
|
4916
|
+
|
4917
|
+
column_dtypes.each do |col, dtype|
|
4918
|
+
if !Utils.is_polars_dtype(dtype, include_unknown: true) && !dtype.nil?
|
4919
|
+
column_dtypes[col] = Utils.rb_type_to_dtype(dtype)
|
4920
|
+
end
|
4921
|
+
end
|
4922
|
+
|
4923
|
+
[column_names, column_dtypes]
|
4793
4924
|
end
|
4794
4925
|
|
4795
|
-
def self._handle_columns_arg(data, columns: nil)
|
4796
|
-
if columns.nil?
|
4926
|
+
def self._handle_columns_arg(data, columns: nil, from_hash: false)
|
4927
|
+
if columns.nil? || columns.empty?
|
4797
4928
|
data
|
4798
4929
|
else
|
4799
4930
|
if data.empty?
|
4800
4931
|
columns.map { |c| Series.new(c, nil)._s }
|
4801
4932
|
elsif data.length == columns.length
|
4933
|
+
if from_hash
|
4934
|
+
series_map = data.to_h { |s| [s.name, s] }
|
4935
|
+
if columns.all? { |col| series_map.key?(col) }
|
4936
|
+
return columns.map { |col| series_map[col] }
|
4937
|
+
end
|
4938
|
+
end
|
4939
|
+
|
4802
4940
|
columns.each_with_index do |c, i|
|
4803
4941
|
# not in-place?
|
4804
4942
|
data[i].rename(c)
|
@@ -4813,7 +4951,7 @@ module Polars
|
|
4813
4951
|
def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
|
4814
4952
|
rbdf_columns = rbdf.columns
|
4815
4953
|
rbdf_dtypes = rbdf.dtypes
|
4816
|
-
columns, dtypes =
|
4954
|
+
columns, dtypes = _unpack_schema(
|
4817
4955
|
(columns || rbdf_columns), schema_overrides: schema_overrides
|
4818
4956
|
)
|
4819
4957
|
column_subset = []
|
@@ -4851,20 +4989,23 @@ module Polars
|
|
4851
4989
|
end
|
4852
4990
|
|
4853
4991
|
# @private
|
4854
|
-
def self.sequence_to_rbdf(data,
|
4992
|
+
def self.sequence_to_rbdf(data, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 50)
|
4993
|
+
raise Todo if schema_overrides
|
4994
|
+
columns = schema
|
4995
|
+
|
4855
4996
|
if data.length == 0
|
4856
|
-
return hash_to_rbdf({},
|
4997
|
+
return hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
|
4857
4998
|
end
|
4858
4999
|
|
4859
5000
|
if data[0].is_a?(Series)
|
4860
5001
|
# series_names = data.map(&:name)
|
4861
|
-
# columns, dtypes =
|
5002
|
+
# columns, dtypes = _unpack_schema(columns || series_names, n_expected: data.length)
|
4862
5003
|
data_series = []
|
4863
5004
|
data.each do |s|
|
4864
5005
|
data_series << s._s
|
4865
5006
|
end
|
4866
5007
|
elsif data[0].is_a?(Hash)
|
4867
|
-
column_names, dtypes =
|
5008
|
+
column_names, dtypes = _unpack_schema(columns)
|
4868
5009
|
schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
|
4869
5010
|
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
|
4870
5011
|
if column_names
|
@@ -4890,11 +5031,21 @@ module Polars
|
|
4890
5031
|
end
|
4891
5032
|
|
4892
5033
|
# @private
|
4893
|
-
def self.series_to_rbdf(data,
|
4894
|
-
|
4895
|
-
|
5034
|
+
def self.series_to_rbdf(data, schema: nil, schema_overrides: nil)
|
5035
|
+
data_series = [data._s]
|
5036
|
+
series_name = data_series.map(&:name)
|
5037
|
+
column_names, schema_overrides = _unpack_schema(
|
5038
|
+
schema || series_name, schema_overrides: schema_overrides, n_expected: 1
|
5039
|
+
)
|
5040
|
+
if schema_overrides.any?
|
5041
|
+
new_dtype = schema_overrides.values[0]
|
5042
|
+
if new_dtype != data.dtype
|
5043
|
+
data_series[0] = data_series[0].cast(new_dtype, true)
|
5044
|
+
end
|
4896
5045
|
end
|
4897
|
-
|
5046
|
+
|
5047
|
+
data_series = _handle_columns_arg(data_series, columns: column_names)
|
5048
|
+
RbDataFrame.new(data_series)
|
4898
5049
|
end
|
4899
5050
|
|
4900
5051
|
def wrap_ldf(ldf)
|
data/lib/polars/data_types.rb
CHANGED
@@ -84,20 +84,22 @@ module Polars
|
|
84
84
|
|
85
85
|
# Calendar date and time type.
|
86
86
|
class Datetime < TemporalType
|
87
|
-
attr_reader :
|
87
|
+
attr_reader :time_unit, :time_zone
|
88
|
+
alias_method :tu, :time_unit
|
88
89
|
|
89
90
|
def initialize(time_unit = "us", time_zone = nil)
|
90
|
-
@
|
91
|
+
@time_unit = time_unit || "us"
|
91
92
|
@time_zone = time_zone
|
92
93
|
end
|
93
94
|
end
|
94
95
|
|
95
96
|
# Time duration/delta type.
|
96
97
|
class Duration < TemporalType
|
97
|
-
attr_reader :
|
98
|
+
attr_reader :time_unit
|
99
|
+
alias_method :tu, :time_unit
|
98
100
|
|
99
101
|
def initialize(time_unit = "us")
|
100
|
-
@
|
102
|
+
@time_unit = time_unit
|
101
103
|
end
|
102
104
|
end
|
103
105
|
|
@@ -218,6 +218,25 @@ module Polars
|
|
218
218
|
)
|
219
219
|
end
|
220
220
|
|
221
|
+
# Create a naive Datetime from an existing Date/Datetime expression and a Time.
|
222
|
+
#
|
223
|
+
# If the underlying expression is a Datetime then its time component is replaced,
|
224
|
+
# and if it is a Date then a new Datetime is created by combining the two values.
|
225
|
+
#
|
226
|
+
# @param time [Object]
|
227
|
+
# A Ruby time literal or Polars expression/column that resolves to a time.
|
228
|
+
# @param time_unit ["ns", "us", "ms"]
|
229
|
+
# Unit of time.
|
230
|
+
#
|
231
|
+
# @return [Expr]
|
232
|
+
def combine(time, time_unit: "us")
|
233
|
+
unless time.is_a?(Time) || time.is_a?(Expr)
|
234
|
+
raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
|
235
|
+
end
|
236
|
+
time = Utils.expr_to_lit_or_expr(time)
|
237
|
+
Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
|
238
|
+
end
|
239
|
+
|
221
240
|
# Format Date/datetime with a formatting rule.
|
222
241
|
#
|
223
242
|
# See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
|
@@ -270,6 +289,34 @@ module Polars
|
|
270
289
|
Utils.wrap_expr(_rbexpr.year)
|
271
290
|
end
|
272
291
|
|
292
|
+
# Determine whether the year of the underlying date is a leap year.
|
293
|
+
#
|
294
|
+
# Applies to Date and Datetime columns.
|
295
|
+
#
|
296
|
+
# @return [Expr]
|
297
|
+
#
|
298
|
+
# @example
|
299
|
+
# start = DateTime.new(2000, 1, 1)
|
300
|
+
# stop = DateTime.new(2002, 1, 1)
|
301
|
+
# df = Polars::DataFrame.new(
|
302
|
+
# {"date" => Polars.date_range(start, stop, "1y")}
|
303
|
+
# )
|
304
|
+
# df.select(Polars.col("date").dt.is_leap_year)
|
305
|
+
# # =>
|
306
|
+
# # shape: (3, 1)
|
307
|
+
# # ┌───────┐
|
308
|
+
# # │ date │
|
309
|
+
# # │ --- │
|
310
|
+
# # │ bool │
|
311
|
+
# # ╞═══════╡
|
312
|
+
# # │ true │
|
313
|
+
# # │ false │
|
314
|
+
# # │ false │
|
315
|
+
# # └───────┘
|
316
|
+
def is_leap_year
|
317
|
+
Utils.wrap_expr(_rbexpr.dt_is_leap_year)
|
318
|
+
end
|
319
|
+
|
273
320
|
# Extract ISO year from underlying Date representation.
|
274
321
|
#
|
275
322
|
# Applies to Date and Datetime columns.
|
@@ -550,6 +597,27 @@ module Polars
|
|
550
597
|
Utils.wrap_expr(_rbexpr.ordinal_day)
|
551
598
|
end
|
552
599
|
|
600
|
+
# Time
|
601
|
+
#
|
602
|
+
# @return [Expr]
|
603
|
+
def time
|
604
|
+
Utils.wrap_expr(_rbexpr.dt_time)
|
605
|
+
end
|
606
|
+
|
607
|
+
# Date
|
608
|
+
#
|
609
|
+
# @return [Expr]
|
610
|
+
def date
|
611
|
+
Utils.wrap_expr(_rbexpr.dt_date)
|
612
|
+
end
|
613
|
+
|
614
|
+
# Datetime
|
615
|
+
#
|
616
|
+
# @return [Expr]
|
617
|
+
def datetime
|
618
|
+
Utils.wrap_expr(_rbexpr.dt_datetime)
|
619
|
+
end
|
620
|
+
|
553
621
|
# Extract hour from underlying DateTime representation.
|
554
622
|
#
|
555
623
|
# Applies to Datetime columns.
|
@@ -958,8 +1026,8 @@ module Polars
|
|
958
1026
|
# Time zone for the `Datetime` Series.
|
959
1027
|
#
|
960
1028
|
# @return [Expr]
|
961
|
-
def replace_time_zone(tz)
|
962
|
-
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
|
1029
|
+
def replace_time_zone(tz, use_earliest: nil)
|
1030
|
+
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
|
963
1031
|
end
|
964
1032
|
|
965
1033
|
# Localize tz-naive Datetime Series to tz-aware Datetime Series.
|
@@ -1282,5 +1350,77 @@ module Polars
|
|
1282
1350
|
def offset_by(by)
|
1283
1351
|
Utils.wrap_expr(_rbexpr.dt_offset_by(by))
|
1284
1352
|
end
|
1353
|
+
|
1354
|
+
# Roll backward to the first day of the month.
|
1355
|
+
#
|
1356
|
+
# @return [Expr]
|
1357
|
+
#
|
1358
|
+
# @example
|
1359
|
+
# df = Polars::DataFrame.new(
|
1360
|
+
# {
|
1361
|
+
# "dates" => Polars.date_range(
|
1362
|
+
# DateTime.new(2000, 1, 15, 2),
|
1363
|
+
# DateTime.new(2000, 12, 15, 2),
|
1364
|
+
# "1mo"
|
1365
|
+
# )
|
1366
|
+
# }
|
1367
|
+
# )
|
1368
|
+
# df.select(Polars.col("dates").dt.month_start)
|
1369
|
+
# # =>
|
1370
|
+
# # shape: (12, 1)
|
1371
|
+
# # ┌─────────────────────┐
|
1372
|
+
# # │ dates │
|
1373
|
+
# # │ --- │
|
1374
|
+
# # │ datetime[μs] │
|
1375
|
+
# # ╞═════════════════════╡
|
1376
|
+
# # │ 2000-01-01 02:00:00 │
|
1377
|
+
# # │ 2000-02-01 02:00:00 │
|
1378
|
+
# # │ 2000-03-01 02:00:00 │
|
1379
|
+
# # │ 2000-04-01 02:00:00 │
|
1380
|
+
# # │ … │
|
1381
|
+
# # │ 2000-09-01 02:00:00 │
|
1382
|
+
# # │ 2000-10-01 02:00:00 │
|
1383
|
+
# # │ 2000-11-01 02:00:00 │
|
1384
|
+
# # │ 2000-12-01 02:00:00 │
|
1385
|
+
# # └─────────────────────┘
|
1386
|
+
def month_start
|
1387
|
+
Utils.wrap_expr(_rbexpr.dt_month_start)
|
1388
|
+
end
|
1389
|
+
|
1390
|
+
# Roll forward to the last day of the month.
|
1391
|
+
#
|
1392
|
+
# @return [Expr]
|
1393
|
+
#
|
1394
|
+
# @example
|
1395
|
+
# df = Polars::DataFrame.new(
|
1396
|
+
# {
|
1397
|
+
# "dates" => Polars.date_range(
|
1398
|
+
# DateTime.new(2000, 1, 15, 2),
|
1399
|
+
# DateTime.new(2000, 12, 15, 2),
|
1400
|
+
# "1mo"
|
1401
|
+
# )
|
1402
|
+
# }
|
1403
|
+
# )
|
1404
|
+
# df.select(Polars.col("dates").dt.month_end)
|
1405
|
+
# # =>
|
1406
|
+
# # shape: (12, 1)
|
1407
|
+
# # ┌─────────────────────┐
|
1408
|
+
# # │ dates │
|
1409
|
+
# # │ --- │
|
1410
|
+
# # │ datetime[μs] │
|
1411
|
+
# # ╞═════════════════════╡
|
1412
|
+
# # │ 2000-01-31 02:00:00 │
|
1413
|
+
# # │ 2000-02-29 02:00:00 │
|
1414
|
+
# # │ 2000-03-31 02:00:00 │
|
1415
|
+
# # │ 2000-04-30 02:00:00 │
|
1416
|
+
# # │ … │
|
1417
|
+
# # │ 2000-09-30 02:00:00 │
|
1418
|
+
# # │ 2000-10-31 02:00:00 │
|
1419
|
+
# # │ 2000-11-30 02:00:00 │
|
1420
|
+
# # │ 2000-12-31 02:00:00 │
|
1421
|
+
# # └─────────────────────┘
|
1422
|
+
def month_end
|
1423
|
+
Utils.wrap_expr(_rbexpr.dt_month_end)
|
1424
|
+
end
|
1285
1425
|
end
|
1286
1426
|
end
|