polars-df 0.13.0-x86_64-linux-musl → 0.15.0-x86_64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE-THIRD-PARTY.txt +24801 -13447
  5. data/LICENSE.txt +1 -0
  6. data/README.md +1 -2
  7. data/lib/polars/3.1/polars.so +0 -0
  8. data/lib/polars/3.2/polars.so +0 -0
  9. data/lib/polars/3.3/polars.so +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +285 -62
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +2 -0
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +109 -8
  20. data/lib/polars/functions/as_datatype.rb +51 -2
  21. data/lib/polars/functions/col.rb +1 -1
  22. data/lib/polars/functions/eager.rb +1 -3
  23. data/lib/polars/functions/lazy.rb +88 -10
  24. data/lib/polars/functions/range/time_range.rb +21 -21
  25. data/lib/polars/io/csv.rb +14 -16
  26. data/lib/polars/io/database.rb +2 -2
  27. data/lib/polars/io/ipc.rb +14 -12
  28. data/lib/polars/io/ndjson.rb +10 -0
  29. data/lib/polars/io/parquet.rb +168 -111
  30. data/lib/polars/lazy_frame.rb +649 -15
  31. data/lib/polars/list_name_space.rb +169 -0
  32. data/lib/polars/selectors.rb +1144 -0
  33. data/lib/polars/series.rb +470 -40
  34. data/lib/polars/string_cache.rb +27 -1
  35. data/lib/polars/string_expr.rb +0 -1
  36. data/lib/polars/string_name_space.rb +73 -3
  37. data/lib/polars/struct_name_space.rb +31 -7
  38. data/lib/polars/utils/various.rb +5 -1
  39. data/lib/polars/utils.rb +45 -10
  40. data/lib/polars/version.rb +1 -1
  41. data/lib/polars.rb +2 -1
  42. metadata +4 -3
  43. data/lib/polars/functions.rb +0 -57
@@ -1,11 +1,37 @@
1
1
  module Polars
2
- # Context manager for enabling and disabling the global string cache.
2
+ # Class for enabling and disabling the global string cache.
3
+ #
4
+ # @example Construct two Series using the same global string cache.
5
+ # s1 = nil
6
+ # s2 = nil
7
+ # Polars::StringCache.new do
8
+ # s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
9
+ # s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
10
+ # end
11
+ #
12
+ # @example As both Series are constructed under the same global string cache, they can be concatenated.
13
+ # Polars.concat([s1, s2])
14
+ # # =>
15
+ # # shape: (6,)
16
+ # # Series: 'color' [cat]
17
+ # # [
18
+ # # "red"
19
+ # # "green"
20
+ # # "red"
21
+ # # "blue"
22
+ # # "red"
23
+ # # "green"
24
+ # # ]
3
25
  class StringCache
4
26
  def initialize(&block)
5
27
  RbStringCacheHolder.hold(&block)
6
28
  end
7
29
  end
8
30
 
31
+ def self.string_cache(...)
32
+ StringCache.new(...)
33
+ end
34
+
9
35
  module Functions
10
36
  # Enable the global string cache.
11
37
  #
@@ -421,7 +421,6 @@ module Polars
421
421
  # # │ THERE'S NO TURNING BACK ┆ There's No Turning Back │
422
422
  # # └─────────────────────────┴─────────────────────────┘
423
423
  def to_titlecase
424
- raise Todo
425
424
  Utils.wrap_expr(_rbexpr.str_to_titlecase)
426
425
  end
427
426
 
@@ -658,7 +658,18 @@ module Polars
658
658
  # An optional single character that should be trimmed
659
659
  #
660
660
  # @return [Series]
661
- def strip(matches = nil)
661
+ #
662
+ # @example
663
+ # s = Polars::Series.new([" hello ", "\tworld"])
664
+ # s.str.strip_chars
665
+ # # =>
666
+ # # shape: (2,)
667
+ # # Series: '' [str]
668
+ # # [
669
+ # # "hello"
670
+ # # "world"
671
+ # # ]
672
+ def strip_chars(matches = nil)
662
673
  super
663
674
  end
664
675
 
@@ -668,9 +679,21 @@ module Polars
668
679
  # An optional single character that should be trimmed
669
680
  #
670
681
  # @return [Series]
671
- def lstrip(matches = nil)
682
+ #
683
+ # @example
684
+ # s = Polars::Series.new([" hello ", "\tworld"])
685
+ # s.str.strip_chars_start
686
+ # # =>
687
+ # # shape: (2,)
688
+ # # Series: '' [str]
689
+ # # [
690
+ # # "hello "
691
+ # # "world"
692
+ # # ]
693
+ def strip_chars_start(matches = nil)
672
694
  super
673
695
  end
696
+ alias_method :lstrip, :strip_chars_start
674
697
 
675
698
  # Remove trailing whitespace.
676
699
  #
@@ -678,9 +701,21 @@ module Polars
678
701
  # An optional single character that should be trimmed
679
702
  #
680
703
  # @return [Series]
681
- def rstrip(matches = nil)
704
+ #
705
+ # @example
706
+ # s = Polars::Series.new([" hello ", "world\t"])
707
+ # s.str.strip_chars_end
708
+ # # =>
709
+ # # shape: (2,)
710
+ # # Series: '' [str]
711
+ # # [
712
+ # # " hello"
713
+ # # "world"
714
+ # # ]
715
+ def strip_chars_end(matches = nil)
682
716
  super
683
717
  end
718
+ alias_method :rstrip, :strip_chars_end
684
719
 
685
720
  # Fills the string with zeroes.
686
721
  #
@@ -695,6 +730,19 @@ module Polars
695
730
  # Fill the value up to this length.
696
731
  #
697
732
  # @return [Series]
733
+ #
734
+ # @example
735
+ # s = Polars::Series.new([-1, 123, 999999, nil])
736
+ # s.cast(Polars::String).str.zfill(4)
737
+ # # =>
738
+ # # shape: (4,)
739
+ # # Series: '' [str]
740
+ # # [
741
+ # # "-001"
742
+ # # "0123"
743
+ # # "999999"
744
+ # # null
745
+ # # ]
698
746
  def zfill(length)
699
747
  super
700
748
  end
@@ -758,6 +806,17 @@ module Polars
758
806
  # Modify the strings to their lowercase equivalent.
759
807
  #
760
808
  # @return [Series]
809
+ #
810
+ # @example
811
+ # s = Polars::Series.new("foo", ["CAT", "DOG"])
812
+ # s.str.to_lowercase
813
+ # # =>
814
+ # # shape: (2,)
815
+ # # Series: 'foo' [str]
816
+ # # [
817
+ # # "cat"
818
+ # # "dog"
819
+ # # ]
761
820
  def to_lowercase
762
821
  super
763
822
  end
@@ -765,6 +824,17 @@ module Polars
765
824
  # Modify the strings to their uppercase equivalent.
766
825
  #
767
826
  # @return [Series]
827
+ #
828
+ # @example
829
+ # s = Polars::Series.new("foo", ["cat", "dog"])
830
+ # s.str.to_uppercase
831
+ # # =>
832
+ # # shape: (2,)
833
+ # # Series: 'foo' [str]
834
+ # # [
835
+ # # "CAT"
836
+ # # "DOG"
837
+ # # ]
768
838
  def to_uppercase
769
839
  super
770
840
  end
@@ -23,16 +23,14 @@ module Polars
23
23
  end
24
24
  end
25
25
 
26
- # Convert this Struct Series to a DataFrame.
27
- #
28
- # @return [DataFrame]
29
- def to_frame
30
- Utils.wrap_df(_s.struct_to_frame)
31
- end
32
-
33
26
  # Get the names of the fields.
34
27
  #
35
28
  # @return [Array]
29
+ #
30
+ # @example
31
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
32
+ # s.struct.fields
33
+ # # => ["a", "b"]
36
34
  def fields
37
35
  if _s.nil?
38
36
  []
@@ -47,6 +45,17 @@ module Polars
47
45
  # Name of the field
48
46
  #
49
47
  # @return [Series]
48
+ #
49
+ # @example
50
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
51
+ # s.struct.field("a")
52
+ # # =>
53
+ # # shape: (2,)
54
+ # # Series: 'a' [i64]
55
+ # # [
56
+ # # 1
57
+ # # 3
58
+ # # ]
50
59
  def field(name)
51
60
  super
52
61
  end
@@ -57,6 +66,16 @@ module Polars
57
66
  # New names in the order of the struct's fields
58
67
  #
59
68
  # @return [Series]
69
+ #
70
+ # @example
71
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
72
+ # s.struct.fields
73
+ # # => ["a", "b"]
74
+ #
75
+ # @example
76
+ # s = s.struct.rename_fields(["c", "d"])
77
+ # s.struct.fields
78
+ # # => ["c", "d"]
60
79
  def rename_fields(names)
61
80
  super
62
81
  end
@@ -64,6 +83,11 @@ module Polars
64
83
  # Get the struct definition as a name/dtype schema dict.
65
84
  #
66
85
  # @return [Object]
86
+ #
87
+ # @example
88
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
89
+ # s.struct.schema
90
+ # # => {"a"=>Polars::Int64, "b"=>Polars::Int64}
67
91
  def schema
68
92
  if _s.nil?
69
93
  {}
@@ -12,6 +12,10 @@ module Polars
12
12
  val.all? { |x| x.is_a?(eltype) }
13
13
  end
14
14
 
15
+ def self.is_path_or_str_sequence(val)
16
+ val.is_a?(::Array) && val.all? { |x| pathlike?(x) }
17
+ end
18
+
15
19
  def self.is_bool_sequence(val)
16
20
  val.is_a?(::Array) && val.all? { |x| x == true || x == false }
17
21
  end
@@ -42,7 +46,7 @@ module Polars
42
46
  end
43
47
 
44
48
  def self.normalize_filepath(path, check_not_directory: true)
45
- path = File.expand_path(path)
49
+ path = File.expand_path(path) if !path.is_a?(::String) || path.start_with?("~")
46
50
  if check_not_directory && File.exist?(path) && Dir.exist?(path)
47
51
  raise ArgumentError, "Expected a file path; #{path} is a directory"
48
52
  end
data/lib/polars/utils.rb CHANGED
@@ -3,12 +3,18 @@ module Polars
3
3
  module Utils
4
4
  DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
5
5
 
6
- # TODO fix
7
- def self.is_polars_dtype(data_type, include_unknown: false)
8
- if data_type == Unknown
9
- return include_unknown
6
+ def self.is_polars_dtype(dtype, include_unknown: false)
7
+ is_dtype = dtype.is_a?(Symbol) || dtype.is_a?(::String) || dtype.is_a?(DataType) || (dtype.is_a?(Class) && dtype < DataType)
8
+
9
+ if !include_unknown
10
+ is_dtype && dtype != Unknown
11
+ else
12
+ is_dtype
10
13
  end
11
- data_type.is_a?(Symbol) || data_type.is_a?(::String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
14
+ end
15
+
16
+ def self.is_column(obj)
17
+ obj.is_a?(Expr) && obj.meta.is_column
12
18
  end
13
19
 
14
20
  def self.map_rb_type_to_dtype(ruby_dtype)
@@ -100,6 +106,27 @@ module Polars
100
106
  end
101
107
  end
102
108
 
109
+ def self.is_selector(obj)
110
+ obj.is_a?(Selectors::SelectorProxy)
111
+ end
112
+
113
+ def self.expand_selector(target, selector, strict: true)
114
+ if target.is_a?(Hash)
115
+ target = DataFrame.new(schema: target)
116
+ end
117
+
118
+ if !is_selector(selector) && !is_polars_dtype(selector)
119
+ msg = "expected a selector; found #{selector.inspect} instead."
120
+ raise TypeError, msg
121
+ end
122
+
123
+ if is_selector(selector)
124
+ target.select(selector).columns
125
+ else
126
+ target.select(Polars.col(selector)).columns
127
+ end
128
+ end
129
+
103
130
  def self._expand_selectors(frame, *items)
104
131
  items_iter = _parse_inputs_as_iterable(items)
105
132
 
@@ -115,16 +142,24 @@ module Polars
115
142
  expanded
116
143
  end
117
144
 
118
- # TODO
119
- def self.is_selector(obj)
120
- false
121
- end
122
-
123
145
  def self.parse_interval_argument(interval)
124
146
  if interval.include?(" ")
125
147
  interval = interval.gsub(" ", "")
126
148
  end
127
149
  interval.downcase
128
150
  end
151
+
152
+ def self.parse_into_dtype(input)
153
+ if is_polars_dtype(input)
154
+ input
155
+ else
156
+ raise Todo
157
+ end
158
+ end
159
+
160
+ def self.re_escape(s)
161
+ # escapes _only_ those metachars with meaning to the rust regex crate
162
+ Plr.re_escape(s)
163
+ end
129
164
  end
130
165
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.13.0"
3
+ VERSION = "0.15.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -24,12 +24,12 @@ require_relative "polars/convert"
24
24
  require_relative "polars/plot"
25
25
  require_relative "polars/data_frame"
26
26
  require_relative "polars/data_types"
27
+ require_relative "polars/data_type_group"
27
28
  require_relative "polars/date_time_expr"
28
29
  require_relative "polars/date_time_name_space"
29
30
  require_relative "polars/dynamic_group_by"
30
31
  require_relative "polars/exceptions"
31
32
  require_relative "polars/expr"
32
- require_relative "polars/functions"
33
33
  require_relative "polars/functions/as_datatype"
34
34
  require_relative "polars/functions/col"
35
35
  require_relative "polars/functions/eager"
@@ -60,6 +60,7 @@ require_relative "polars/list_name_space"
60
60
  require_relative "polars/meta_expr"
61
61
  require_relative "polars/name_expr"
62
62
  require_relative "polars/rolling_group_by"
63
+ require_relative "polars/selectors"
63
64
  require_relative "polars/series"
64
65
  require_relative "polars/slice"
65
66
  require_relative "polars/sql_context"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.15.0
5
5
  platform: x86_64-linux-musl
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-05 00:00:00.000000000 Z
11
+ date: 2024-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -53,6 +53,7 @@ files:
53
53
  - lib/polars/config.rb
54
54
  - lib/polars/convert.rb
55
55
  - lib/polars/data_frame.rb
56
+ - lib/polars/data_type_group.rb
56
57
  - lib/polars/data_types.rb
57
58
  - lib/polars/date_time_expr.rb
58
59
  - lib/polars/date_time_name_space.rb
@@ -60,7 +61,6 @@ files:
60
61
  - lib/polars/exceptions.rb
61
62
  - lib/polars/expr.rb
62
63
  - lib/polars/expr_dispatch.rb
63
- - lib/polars/functions.rb
64
64
  - lib/polars/functions/aggregation/horizontal.rb
65
65
  - lib/polars/functions/aggregation/vertical.rb
66
66
  - lib/polars/functions/as_datatype.rb
@@ -92,6 +92,7 @@ files:
92
92
  - lib/polars/name_expr.rb
93
93
  - lib/polars/plot.rb
94
94
  - lib/polars/rolling_group_by.rb
95
+ - lib/polars/selectors.rb
95
96
  - lib/polars/series.rb
96
97
  - lib/polars/slice.rb
97
98
  - lib/polars/sql_context.rb
@@ -1,57 +0,0 @@
1
- module Polars
2
- module Functions
3
- # Convert categorical variables into dummy/indicator variables.
4
- #
5
- # @param df [DataFrame]
6
- # DataFrame to convert.
7
- # @param columns [Array, nil]
8
- # A subset of columns to convert to dummy variables. `nil` means
9
- # "all columns".
10
- #
11
- # @return [DataFrame]
12
- def get_dummies(df, columns: nil)
13
- df.to_dummies(columns: columns)
14
- end
15
-
16
- # Aggregate to list.
17
- #
18
- # @return [Expr]
19
- def to_list(name)
20
- col(name).list
21
- end
22
-
23
- # Compute the spearman rank correlation between two columns.
24
- #
25
- # Missing data will be excluded from the computation.
26
- #
27
- # @param a [Object]
28
- # Column name or Expression.
29
- # @param b [Object]
30
- # Column name or Expression.
31
- # @param ddof [Integer]
32
- # Delta degrees of freedom
33
- # @param propagate_nans [Boolean]
34
- # If `True` any `NaN` encountered will lead to `NaN` in the output.
35
- # Defaults to `False` where `NaN` are regarded as larger than any finite number
36
- # and thus lead to the highest rank.
37
- #
38
- # @return [Expr]
39
- def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
40
- corr(a, b, method: "spearman", ddof: ddof, propagate_nans: propagate_nans)
41
- end
42
-
43
- # Compute the pearson's correlation between two columns.
44
- #
45
- # @param a [Object]
46
- # Column name or Expression.
47
- # @param b [Object]
48
- # Column name or Expression.
49
- # @param ddof [Integer]
50
- # Delta degrees of freedom
51
- #
52
- # @return [Expr]
53
- def pearson_corr(a, b, ddof: 1)
54
- corr(a, b, method: "pearson", ddof: ddof)
55
- end
56
- end
57
- end