polars-df 0.14.0-x86_64-darwin → 0.16.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE-THIRD-PARTY.txt +24956 -14152
  5. data/LICENSE.txt +1 -0
  6. data/README.md +38 -4
  7. data/lib/polars/3.2/polars.bundle +0 -0
  8. data/lib/polars/3.3/polars.bundle +0 -0
  9. data/lib/polars/{3.1 → 3.4}/polars.bundle +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +452 -101
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +3 -1
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +103 -2
  20. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  21. data/lib/polars/functions/as_datatype.rb +51 -2
  22. data/lib/polars/functions/col.rb +1 -1
  23. data/lib/polars/functions/eager.rb +1 -3
  24. data/lib/polars/functions/lazy.rb +95 -13
  25. data/lib/polars/functions/range/time_range.rb +21 -21
  26. data/lib/polars/io/csv.rb +14 -16
  27. data/lib/polars/io/database.rb +2 -2
  28. data/lib/polars/io/delta.rb +126 -0
  29. data/lib/polars/io/ipc.rb +14 -4
  30. data/lib/polars/io/ndjson.rb +10 -0
  31. data/lib/polars/io/parquet.rb +168 -111
  32. data/lib/polars/lazy_frame.rb +684 -20
  33. data/lib/polars/list_name_space.rb +169 -0
  34. data/lib/polars/selectors.rb +1226 -0
  35. data/lib/polars/series.rb +465 -35
  36. data/lib/polars/string_cache.rb +27 -1
  37. data/lib/polars/string_expr.rb +0 -1
  38. data/lib/polars/string_name_space.rb +73 -3
  39. data/lib/polars/struct_name_space.rb +31 -7
  40. data/lib/polars/utils/various.rb +5 -1
  41. data/lib/polars/utils.rb +45 -10
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +17 -1
  44. metadata +9 -8
  45. data/lib/polars/functions.rb +0 -57
@@ -1,11 +1,37 @@
1
1
  module Polars
2
- # Context manager for enabling and disabling the global string cache.
2
+ # Class for enabling and disabling the global string cache.
3
+ #
4
+ # @example Construct two Series using the same global string cache.
5
+ # s1 = nil
6
+ # s2 = nil
7
+ # Polars::StringCache.new do
8
+ # s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
9
+ # s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
10
+ # end
11
+ #
12
+ # @example As both Series are constructed under the same global string cache, they can be concatenated.
13
+ # Polars.concat([s1, s2])
14
+ # # =>
15
+ # # shape: (6,)
16
+ # # Series: 'color' [cat]
17
+ # # [
18
+ # # "red"
19
+ # # "green"
20
+ # # "red"
21
+ # # "blue"
22
+ # # "red"
23
+ # # "green"
24
+ # # ]
3
25
  class StringCache
4
26
  def initialize(&block)
5
27
  RbStringCacheHolder.hold(&block)
6
28
  end
7
29
  end
8
30
 
31
+ def self.string_cache(...)
32
+ StringCache.new(...)
33
+ end
34
+
9
35
  module Functions
10
36
  # Enable the global string cache.
11
37
  #
@@ -421,7 +421,6 @@ module Polars
421
421
  # # │ THERE'S NO TURNING BACK ┆ There's No Turning Back │
422
422
  # # └─────────────────────────┴─────────────────────────┘
423
423
  def to_titlecase
424
- raise Todo
425
424
  Utils.wrap_expr(_rbexpr.str_to_titlecase)
426
425
  end
427
426
 
@@ -658,7 +658,18 @@ module Polars
658
658
  # An optional single character that should be trimmed
659
659
  #
660
660
  # @return [Series]
661
- def strip(matches = nil)
661
+ #
662
+ # @example
663
+ # s = Polars::Series.new([" hello ", "\tworld"])
664
+ # s.str.strip_chars
665
+ # # =>
666
+ # # shape: (2,)
667
+ # # Series: '' [str]
668
+ # # [
669
+ # # "hello"
670
+ # # "world"
671
+ # # ]
672
+ def strip_chars(matches = nil)
662
673
  super
663
674
  end
664
675
 
@@ -668,9 +679,21 @@ module Polars
668
679
  # An optional single character that should be trimmed
669
680
  #
670
681
  # @return [Series]
671
- def lstrip(matches = nil)
682
+ #
683
+ # @example
684
+ # s = Polars::Series.new([" hello ", "\tworld"])
685
+ # s.str.strip_chars_start
686
+ # # =>
687
+ # # shape: (2,)
688
+ # # Series: '' [str]
689
+ # # [
690
+ # # "hello "
691
+ # # "world"
692
+ # # ]
693
+ def strip_chars_start(matches = nil)
672
694
  super
673
695
  end
696
+ alias_method :lstrip, :strip_chars_start
674
697
 
675
698
  # Remove trailing whitespace.
676
699
  #
@@ -678,9 +701,21 @@ module Polars
678
701
  # An optional single character that should be trimmed
679
702
  #
680
703
  # @return [Series]
681
- def rstrip(matches = nil)
704
+ #
705
+ # @example
706
+ # s = Polars::Series.new([" hello ", "world\t"])
707
+ # s.str.strip_chars_end
708
+ # # =>
709
+ # # shape: (2,)
710
+ # # Series: '' [str]
711
+ # # [
712
+ # # " hello"
713
+ # # "world"
714
+ # # ]
715
+ def strip_chars_end(matches = nil)
682
716
  super
683
717
  end
718
+ alias_method :rstrip, :strip_chars_end
684
719
 
685
720
  # Fills the string with zeroes.
686
721
  #
@@ -695,6 +730,19 @@ module Polars
695
730
  # Fill the value up to this length.
696
731
  #
697
732
  # @return [Series]
733
+ #
734
+ # @example
735
+ # s = Polars::Series.new([-1, 123, 999999, nil])
736
+ # s.cast(Polars::String).str.zfill(4)
737
+ # # =>
738
+ # # shape: (4,)
739
+ # # Series: '' [str]
740
+ # # [
741
+ # # "-001"
742
+ # # "0123"
743
+ # # "999999"
744
+ # # null
745
+ # # ]
698
746
  def zfill(length)
699
747
  super
700
748
  end
@@ -758,6 +806,17 @@ module Polars
758
806
  # Modify the strings to their lowercase equivalent.
759
807
  #
760
808
  # @return [Series]
809
+ #
810
+ # @example
811
+ # s = Polars::Series.new("foo", ["CAT", "DOG"])
812
+ # s.str.to_lowercase
813
+ # # =>
814
+ # # shape: (2,)
815
+ # # Series: 'foo' [str]
816
+ # # [
817
+ # # "cat"
818
+ # # "dog"
819
+ # # ]
761
820
  def to_lowercase
762
821
  super
763
822
  end
@@ -765,6 +824,17 @@ module Polars
765
824
  # Modify the strings to their uppercase equivalent.
766
825
  #
767
826
  # @return [Series]
827
+ #
828
+ # @example
829
+ # s = Polars::Series.new("foo", ["cat", "dog"])
830
+ # s.str.to_uppercase
831
+ # # =>
832
+ # # shape: (2,)
833
+ # # Series: 'foo' [str]
834
+ # # [
835
+ # # "CAT"
836
+ # # "DOG"
837
+ # # ]
768
838
  def to_uppercase
769
839
  super
770
840
  end
@@ -23,16 +23,14 @@ module Polars
23
23
  end
24
24
  end
25
25
 
26
- # Convert this Struct Series to a DataFrame.
27
- #
28
- # @return [DataFrame]
29
- def to_frame
30
- Utils.wrap_df(_s.struct_to_frame)
31
- end
32
-
33
26
  # Get the names of the fields.
34
27
  #
35
28
  # @return [Array]
29
+ #
30
+ # @example
31
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
32
+ # s.struct.fields
33
+ # # => ["a", "b"]
36
34
  def fields
37
35
  if _s.nil?
38
36
  []
@@ -47,6 +45,17 @@ module Polars
47
45
  # Name of the field
48
46
  #
49
47
  # @return [Series]
48
+ #
49
+ # @example
50
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
51
+ # s.struct.field("a")
52
+ # # =>
53
+ # # shape: (2,)
54
+ # # Series: 'a' [i64]
55
+ # # [
56
+ # # 1
57
+ # # 3
58
+ # # ]
50
59
  def field(name)
51
60
  super
52
61
  end
@@ -57,6 +66,16 @@ module Polars
57
66
  # New names in the order of the struct's fields
58
67
  #
59
68
  # @return [Series]
69
+ #
70
+ # @example
71
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
72
+ # s.struct.fields
73
+ # # => ["a", "b"]
74
+ #
75
+ # @example
76
+ # s = s.struct.rename_fields(["c", "d"])
77
+ # s.struct.fields
78
+ # # => ["c", "d"]
60
79
  def rename_fields(names)
61
80
  super
62
81
  end
@@ -64,6 +83,11 @@ module Polars
64
83
  # Get the struct definition as a name/dtype schema dict.
65
84
  #
66
85
  # @return [Object]
86
+ #
87
+ # @example
88
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
89
+ # s.struct.schema
90
+ # # => {"a"=>Polars::Int64, "b"=>Polars::Int64}
67
91
  def schema
68
92
  if _s.nil?
69
93
  {}
@@ -12,6 +12,10 @@ module Polars
12
12
  val.all? { |x| x.is_a?(eltype) }
13
13
  end
14
14
 
15
+ def self.is_path_or_str_sequence(val)
16
+ val.is_a?(::Array) && val.all? { |x| pathlike?(x) }
17
+ end
18
+
15
19
  def self.is_bool_sequence(val)
16
20
  val.is_a?(::Array) && val.all? { |x| x == true || x == false }
17
21
  end
@@ -42,7 +46,7 @@ module Polars
42
46
  end
43
47
 
44
48
  def self.normalize_filepath(path, check_not_directory: true)
45
- path = File.expand_path(path)
49
+ path = File.expand_path(path) if !path.is_a?(::String) || path.start_with?("~")
46
50
  if check_not_directory && File.exist?(path) && Dir.exist?(path)
47
51
  raise ArgumentError, "Expected a file path; #{path} is a directory"
48
52
  end
data/lib/polars/utils.rb CHANGED
@@ -3,12 +3,18 @@ module Polars
3
3
  module Utils
4
4
  DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
5
5
 
6
- # TODO fix
7
- def self.is_polars_dtype(data_type, include_unknown: false)
8
- if data_type == Unknown
9
- return include_unknown
6
+ def self.is_polars_dtype(dtype, include_unknown: false)
7
+ is_dtype = dtype.is_a?(Symbol) || dtype.is_a?(::String) || dtype.is_a?(DataType) || (dtype.is_a?(Class) && dtype < DataType)
8
+
9
+ if !include_unknown
10
+ is_dtype && dtype != Unknown
11
+ else
12
+ is_dtype
10
13
  end
11
- data_type.is_a?(Symbol) || data_type.is_a?(::String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
14
+ end
15
+
16
+ def self.is_column(obj)
17
+ obj.is_a?(Expr) && obj.meta.is_column
12
18
  end
13
19
 
14
20
  def self.map_rb_type_to_dtype(ruby_dtype)
@@ -100,6 +106,27 @@ module Polars
100
106
  end
101
107
  end
102
108
 
109
+ def self.is_selector(obj)
110
+ obj.is_a?(Selectors::SelectorProxy)
111
+ end
112
+
113
+ def self.expand_selector(target, selector, strict: true)
114
+ if target.is_a?(Hash)
115
+ target = DataFrame.new(schema: target)
116
+ end
117
+
118
+ if !is_selector(selector) && !is_polars_dtype(selector)
119
+ msg = "expected a selector; found #{selector.inspect} instead."
120
+ raise TypeError, msg
121
+ end
122
+
123
+ if is_selector(selector)
124
+ target.select(selector).columns
125
+ else
126
+ target.select(Polars.col(selector)).columns
127
+ end
128
+ end
129
+
103
130
  def self._expand_selectors(frame, *items)
104
131
  items_iter = _parse_inputs_as_iterable(items)
105
132
 
@@ -115,16 +142,24 @@ module Polars
115
142
  expanded
116
143
  end
117
144
 
118
- # TODO
119
- def self.is_selector(obj)
120
- false
121
- end
122
-
123
145
  def self.parse_interval_argument(interval)
124
146
  if interval.include?(" ")
125
147
  interval = interval.gsub(" ", "")
126
148
  end
127
149
  interval.downcase
128
150
  end
151
+
152
+ def self.parse_into_dtype(input)
153
+ if is_polars_dtype(input)
154
+ input
155
+ else
156
+ raise Todo
157
+ end
158
+ end
159
+
160
+ def self.re_escape(s)
161
+ # escapes _only_ those metachars with meaning to the rust regex crate
162
+ Plr.re_escape(s)
163
+ end
129
164
  end
130
165
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.14.0"
3
+ VERSION = "0.16.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -24,12 +24,12 @@ require_relative "polars/convert"
24
24
  require_relative "polars/plot"
25
25
  require_relative "polars/data_frame"
26
26
  require_relative "polars/data_types"
27
+ require_relative "polars/data_type_group"
27
28
  require_relative "polars/date_time_expr"
28
29
  require_relative "polars/date_time_name_space"
29
30
  require_relative "polars/dynamic_group_by"
30
31
  require_relative "polars/exceptions"
31
32
  require_relative "polars/expr"
32
- require_relative "polars/functions"
33
33
  require_relative "polars/functions/as_datatype"
34
34
  require_relative "polars/functions/col"
35
35
  require_relative "polars/functions/eager"
@@ -49,6 +49,7 @@ require_relative "polars/group_by"
49
49
  require_relative "polars/io/avro"
50
50
  require_relative "polars/io/csv"
51
51
  require_relative "polars/io/database"
52
+ require_relative "polars/io/delta"
52
53
  require_relative "polars/io/ipc"
53
54
  require_relative "polars/io/json"
54
55
  require_relative "polars/io/ndjson"
@@ -60,6 +61,7 @@ require_relative "polars/list_name_space"
60
61
  require_relative "polars/meta_expr"
61
62
  require_relative "polars/name_expr"
62
63
  require_relative "polars/rolling_group_by"
64
+ require_relative "polars/selectors"
63
65
  require_relative "polars/series"
64
66
  require_relative "polars/slice"
65
67
  require_relative "polars/sql_context"
@@ -88,4 +90,18 @@ module Polars
88
90
 
89
91
  # @private
90
92
  N_INFER_DEFAULT = 100
93
+
94
+ # @private
95
+ class ArrowArrayStream
96
+ def arrow_c_stream
97
+ self
98
+ end
99
+ end
100
+
101
+ # Return the number of threads in the Polars thread pool.
102
+ #
103
+ # @return [Integer]
104
+ def self.thread_pool_size
105
+ Plr.thread_pool_size
106
+ end
91
107
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.16.0
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-17 00:00:00.000000000 Z
11
+ date: 2024-12-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -24,7 +24,6 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
- force_ruby_platform: false
28
27
  description:
29
28
  email: andrew@ankane.org
30
29
  executables: []
@@ -40,9 +39,9 @@ files:
40
39
  - README.md
41
40
  - lib/polars-df.rb
42
41
  - lib/polars.rb
43
- - lib/polars/3.1/polars.bundle
44
42
  - lib/polars/3.2/polars.bundle
45
43
  - lib/polars/3.3/polars.bundle
44
+ - lib/polars/3.4/polars.bundle
46
45
  - lib/polars/array_expr.rb
47
46
  - lib/polars/array_name_space.rb
48
47
  - lib/polars/batched_csv_reader.rb
@@ -53,6 +52,7 @@ files:
53
52
  - lib/polars/config.rb
54
53
  - lib/polars/convert.rb
55
54
  - lib/polars/data_frame.rb
55
+ - lib/polars/data_type_group.rb
56
56
  - lib/polars/data_types.rb
57
57
  - lib/polars/date_time_expr.rb
58
58
  - lib/polars/date_time_name_space.rb
@@ -60,7 +60,6 @@ files:
60
60
  - lib/polars/exceptions.rb
61
61
  - lib/polars/expr.rb
62
62
  - lib/polars/expr_dispatch.rb
63
- - lib/polars/functions.rb
64
63
  - lib/polars/functions/aggregation/horizontal.rb
65
64
  - lib/polars/functions/aggregation/vertical.rb
66
65
  - lib/polars/functions/as_datatype.rb
@@ -80,6 +79,7 @@ files:
80
79
  - lib/polars/io/avro.rb
81
80
  - lib/polars/io/csv.rb
82
81
  - lib/polars/io/database.rb
82
+ - lib/polars/io/delta.rb
83
83
  - lib/polars/io/ipc.rb
84
84
  - lib/polars/io/json.rb
85
85
  - lib/polars/io/ndjson.rb
@@ -92,6 +92,7 @@ files:
92
92
  - lib/polars/name_expr.rb
93
93
  - lib/polars/plot.rb
94
94
  - lib/polars/rolling_group_by.rb
95
+ - lib/polars/selectors.rb
95
96
  - lib/polars/series.rb
96
97
  - lib/polars/slice.rb
97
98
  - lib/polars/sql_context.rb
@@ -121,17 +122,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
121
122
  requirements:
122
123
  - - ">="
123
124
  - !ruby/object:Gem::Version
124
- version: '3.1'
125
+ version: '3.2'
125
126
  - - "<"
126
127
  - !ruby/object:Gem::Version
127
- version: 3.4.dev
128
+ version: 3.5.dev
128
129
  required_rubygems_version: !ruby/object:Gem::Requirement
129
130
  requirements:
130
131
  - - ">="
131
132
  - !ruby/object:Gem::Version
132
133
  version: '0'
133
134
  requirements: []
134
- rubygems_version: 3.4.4
135
+ rubygems_version: 3.5.23
135
136
  signing_key:
136
137
  specification_version: 4
137
138
  summary: Blazingly fast DataFrames for Ruby
@@ -1,57 +0,0 @@
1
- module Polars
2
- module Functions
3
- # Convert categorical variables into dummy/indicator variables.
4
- #
5
- # @param df [DataFrame]
6
- # DataFrame to convert.
7
- # @param columns [Array, nil]
8
- # A subset of columns to convert to dummy variables. `nil` means
9
- # "all columns".
10
- #
11
- # @return [DataFrame]
12
- def get_dummies(df, columns: nil)
13
- df.to_dummies(columns: columns)
14
- end
15
-
16
- # Aggregate to list.
17
- #
18
- # @return [Expr]
19
- def to_list(name)
20
- col(name).list
21
- end
22
-
23
- # Compute the spearman rank correlation between two columns.
24
- #
25
- # Missing data will be excluded from the computation.
26
- #
27
- # @param a [Object]
28
- # Column name or Expression.
29
- # @param b [Object]
30
- # Column name or Expression.
31
- # @param ddof [Integer]
32
- # Delta degrees of freedom
33
- # @param propagate_nans [Boolean]
34
- # If `True` any `NaN` encountered will lead to `NaN` in the output.
35
- # Defaults to `False` where `NaN` are regarded as larger than any finite number
36
- # and thus lead to the highest rank.
37
- #
38
- # @return [Expr]
39
- def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
40
- corr(a, b, method: "spearman", ddof: ddof, propagate_nans: propagate_nans)
41
- end
42
-
43
- # Compute the pearson's correlation between two columns.
44
- #
45
- # @param a [Object]
46
- # Column name or Expression.
47
- # @param b [Object]
48
- # Column name or Expression.
49
- # @param ddof [Integer]
50
- # Delta degrees of freedom
51
- #
52
- # @return [Expr]
53
- def pearson_corr(a, b, ddof: 1)
54
- corr(a, b, method: "pearson", ddof: ddof)
55
- end
56
- end
57
- end