polars-df 0.2.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +33 -0
  4. data/Cargo.lock +2230 -0
  5. data/Cargo.toml +10 -0
  6. data/LICENSE-THIRD-PARTY.txt +38856 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +91 -0
  9. data/lib/polars/3.0/polars.bundle +0 -0
  10. data/lib/polars/3.1/polars.bundle +0 -0
  11. data/lib/polars/3.2/polars.bundle +0 -0
  12. data/lib/polars/batched_csv_reader.rb +96 -0
  13. data/lib/polars/cat_expr.rb +52 -0
  14. data/lib/polars/cat_name_space.rb +54 -0
  15. data/lib/polars/convert.rb +100 -0
  16. data/lib/polars/data_frame.rb +4833 -0
  17. data/lib/polars/data_types.rb +122 -0
  18. data/lib/polars/date_time_expr.rb +1418 -0
  19. data/lib/polars/date_time_name_space.rb +1484 -0
  20. data/lib/polars/dynamic_group_by.rb +52 -0
  21. data/lib/polars/exceptions.rb +20 -0
  22. data/lib/polars/expr.rb +5307 -0
  23. data/lib/polars/expr_dispatch.rb +22 -0
  24. data/lib/polars/functions.rb +453 -0
  25. data/lib/polars/group_by.rb +558 -0
  26. data/lib/polars/io.rb +814 -0
  27. data/lib/polars/lazy_frame.rb +2442 -0
  28. data/lib/polars/lazy_functions.rb +1195 -0
  29. data/lib/polars/lazy_group_by.rb +93 -0
  30. data/lib/polars/list_expr.rb +610 -0
  31. data/lib/polars/list_name_space.rb +346 -0
  32. data/lib/polars/meta_expr.rb +54 -0
  33. data/lib/polars/rolling_group_by.rb +35 -0
  34. data/lib/polars/series.rb +3730 -0
  35. data/lib/polars/slice.rb +104 -0
  36. data/lib/polars/string_expr.rb +972 -0
  37. data/lib/polars/string_name_space.rb +690 -0
  38. data/lib/polars/struct_expr.rb +100 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +192 -0
  41. data/lib/polars/version.rb +4 -0
  42. data/lib/polars/when.rb +16 -0
  43. data/lib/polars/when_then.rb +19 -0
  44. data/lib/polars-df.rb +1 -0
  45. data/lib/polars.rb +50 -0
  46. metadata +89 -0
@@ -0,0 +1,100 @@
1
+ module Polars
2
+ # Namespace for struct related expressions.
3
+ class StructExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Retrieve one of the fields of this `Struct` as a new Series.
13
+ #
14
+ # @return [Expr]
15
+ def [](item)
16
+ if item.is_a?(String)
17
+ field(item)
18
+ elsif item.is_a?(Integer)
19
+ Utils.wrap_expr(_rbexpr.struct_field_by_index(item))
20
+ else
21
+ raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
22
+ end
23
+ end
24
+
25
+ # Retrieve one of the fields of this `Struct` as a new Series.
26
+ #
27
+ # @param name [String]
28
+ # Name of the field
29
+ #
30
+ # @return [Expr]
31
+ #
32
+ # @example
33
+ # df = (
34
+ # Polars::DataFrame.new(
35
+ # {
36
+ # "int" => [1, 2],
37
+ # "str" => ["a", "b"],
38
+ # "bool" => [true, nil],
39
+ # "list" => [[1, 2], [3]]
40
+ # }
41
+ # )
42
+ # .to_struct("my_struct")
43
+ # .to_frame
44
+ # )
45
+ # df.select(Polars.col("my_struct").struct.field("str"))
46
+ # # =>
47
+ # # shape: (2, 1)
48
+ # # ┌─────┐
49
+ # # │ str │
50
+ # # │ --- │
51
+ # # │ str │
52
+ # # ╞═════╡
53
+ # # │ a │
54
+ # # ├╌╌╌╌╌┤
55
+ # # │ b │
56
+ # # └─────┘
57
+ def field(name)
58
+ Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
59
+ end
60
+
61
+ # Rename the fields of the struct.
62
+ #
63
+ # @param names [Array]
64
+ # New names in the order of the struct's fields
65
+ #
66
+ # @return [Expr]
67
+ #
68
+ # @example
69
+ # df = (
70
+ # Polars::DataFrame.new(
71
+ # {
72
+ # "int" => [1, 2],
73
+ # "str" => ["a", "b"],
74
+ # "bool" => [true, nil],
75
+ # "list" => [[1, 2], [3]]
76
+ # }
77
+ # )
78
+ # .to_struct("my_struct")
79
+ # .to_frame
80
+ # )
81
+ # df = df.with_column(
82
+ # Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
83
+ # )
84
+ # df.select(Polars.col("my_struct").struct.field("INT"))
85
+ # # =>
86
+ # # shape: (2, 1)
87
+ # # ┌─────┐
88
+ # # │ INT │
89
+ # # │ --- │
90
+ # # │ i64 │
91
+ # # ╞═════╡
92
+ # # │ 1 │
93
+ # # ├╌╌╌╌╌┤
94
+ # # │ 2 │
95
+ # # └─────┘
96
+ def rename_fields(names)
97
+ Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,64 @@
1
+ module Polars
2
+ # Series.struct namespace.
3
+ class StructNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "struct"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Retrieve one of the fields of this `Struct` as a new Series.
14
+ #
15
+ # @return [Series]
16
+ def [](item)
17
+ if item.is_a?(Integer)
18
+ field(fields[item])
19
+ elsif item.is_a?(String)
20
+ field(item)
21
+ else
22
+ raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
23
+ end
24
+ end
25
+
26
+ # Convert this Struct Series to a DataFrame.
27
+ #
28
+ # @return [DataFrame]
29
+ def to_frame
30
+ Utils.wrap_df(_s.struct_to_frame)
31
+ end
32
+
33
+ # Get the names of the fields.
34
+ #
35
+ # @return [Array]
36
+ def fields
37
+ if _s.nil?
38
+ []
39
+ else
40
+ _s.struct_fields
41
+ end
42
+ end
43
+
44
+ # Retrieve one of the fields of this `Struct` as a new Series.
45
+ #
46
+ # @param name [String]
47
+ # Name of the field
48
+ #
49
+ # @return [Series]
50
+ def field(name)
51
+ super
52
+ end
53
+
54
+ # Rename the fields of the struct.
55
+ #
56
+ # @param names [Array]
57
+ # New names in the order of the struct's fields
58
+ #
59
+ # @return [Series]
60
+ def rename_fields(names)
61
+ super
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,192 @@
1
+ module Polars
2
+ # @private
3
+ module Utils
4
+ DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
5
+
6
+ def self.wrap_s(s)
7
+ Series._from_rbseries(s)
8
+ end
9
+
10
+ def self.wrap_df(df)
11
+ DataFrame._from_rbdf(df)
12
+ end
13
+
14
+ def self.wrap_ldf(ldf)
15
+ LazyFrame._from_rbldf(ldf)
16
+ end
17
+
18
+ def self.wrap_expr(rbexpr)
19
+ Expr._from_rbexpr(rbexpr)
20
+ end
21
+
22
+ def self.col(name)
23
+ Polars.col(name)
24
+ end
25
+
26
+ def self._timedelta_to_pl_duration(td)
27
+ td
28
+ end
29
+
30
+ def self._datetime_to_pl_timestamp(dt, tu)
31
+ if tu == "ns"
32
+ (dt.to_datetime.utc.to_f * 1e9).to_i
33
+ elsif tu == "us"
34
+ (dt.to_datetime.utc.to_f * 1e6).to_i
35
+ elsif tu == "ms"
36
+ (dt.to_datetime.utc.to_f * 1e3).to_i
37
+ elsif tu.nil?
38
+ (dt.to_datetime.utc.to_f * 1e6).to_i
39
+ else
40
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
41
+ end
42
+ end
43
+
44
+ def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
45
+ if dtype == :date || dtype == Date
46
+ # days to seconds
47
+ # important to create from utc. Not doing this leads
48
+ # to inconsistencies dependent on the timezone you are in.
49
+ ::Time.at(value * 86400).utc.to_date
50
+ # TODO fix dtype
51
+ elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
52
+ if tz.nil? || tz == ""
53
+ if tu == "ns"
54
+ raise Todo
55
+ elsif tu == "us"
56
+ dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
57
+ elsif tu == "ms"
58
+ raise Todo
59
+ else
60
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
61
+ end
62
+ else
63
+ raise Todo
64
+ end
65
+
66
+ dt
67
+ else
68
+ raise NotImplementedError
69
+ end
70
+ end
71
+
72
+ def self.selection_to_rbexpr_list(exprs)
73
+ if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
74
+ exprs = [exprs]
75
+ end
76
+
77
+ exprs.map { |e| expr_to_lit_or_expr(e, str_to_lit: false)._rbexpr }
78
+ end
79
+
80
+ def self.expr_to_lit_or_expr(expr, str_to_lit: true)
81
+ if expr.is_a?(String) && !str_to_lit
82
+ col(expr)
83
+ elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil?
84
+ lit(expr)
85
+ elsif expr.is_a?(Expr)
86
+ expr
87
+ else
88
+ raise ArgumentError, "did not expect value #{expr} of type #{expr.class.name}, maybe disambiguate with Polars.lit or Polars.col"
89
+ end
90
+ end
91
+
92
+ def self.lit(value)
93
+ Polars.lit(value)
94
+ end
95
+
96
+ def self.format_path(path)
97
+ File.expand_path(path)
98
+ end
99
+
100
+ # TODO fix
101
+ def self.is_polars_dtype(data_type)
102
+ data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
103
+ end
104
+
105
+ RB_TYPE_TO_DTYPE = {
106
+ Float => :f64,
107
+ Integer => :i64,
108
+ String => :str,
109
+ TrueClass => :bool,
110
+ FalseClass => :bool,
111
+ ::Date => :date,
112
+ ::DateTime => :datetime
113
+ }
114
+
115
+ # TODO fix
116
+ def self.rb_type_to_dtype(data_type)
117
+ if is_polars_dtype(data_type)
118
+ data_type = data_type.to_s if data_type.is_a?(Symbol)
119
+ return data_type
120
+ end
121
+
122
+ begin
123
+ RB_TYPE_TO_DTYPE.fetch(data_type).to_s
124
+ rescue KeyError
125
+ raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
126
+ end
127
+ end
128
+
129
+ def self._process_null_values(null_values)
130
+ if null_values.is_a?(Hash)
131
+ null_values.to_a
132
+ else
133
+ null_values
134
+ end
135
+ end
136
+
137
+ def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
138
+ if !row_count_name.nil?
139
+ [row_count_name, row_count_offset]
140
+ else
141
+ nil
142
+ end
143
+ end
144
+
145
+ def self.handle_projection_columns(columns)
146
+ projection = nil
147
+ if columns
148
+ raise Todo
149
+ # if columns.is_a?(String) || columns.is_a?(Symbol)
150
+ # columns = [columns]
151
+ # elsif is_int_sequence(columns)
152
+ # projection = columns.to_a
153
+ # columns = nil
154
+ # elsif !is_str_sequence(columns)
155
+ # raise ArgumentError, "columns arg should contain a list of all integers or all strings values."
156
+ # end
157
+ end
158
+ [projection, columns]
159
+ end
160
+
161
+ def self.scale_bytes(sz, to:)
162
+ scaling_factor = {
163
+ "b" => 1,
164
+ "k" => 1024,
165
+ "m" => 1024 ** 2,
166
+ "g" => 1024 ** 3,
167
+ "t" => 1024 ** 4,
168
+ }[to[0]]
169
+ if scaling_factor > 1
170
+ sz / scaling_factor.to_f
171
+ else
172
+ sz
173
+ end
174
+ end
175
+
176
+ def self.bool?(value)
177
+ value == true || value == false
178
+ end
179
+
180
+ def self._is_iterable_of(val, eltype)
181
+ val.all? { |x| x.is_a?(eltype) }
182
+ end
183
+
184
+ def self.is_str_sequence(val, allow_str: false)
185
+ if allow_str == false && val.is_a?(String)
186
+ false
187
+ else
188
+ val.is_a?(Array) && _is_iterable_of(val, String)
189
+ end
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,4 @@
1
+ module Polars
2
+ # @private
3
+ VERSION = "0.2.0"
4
+ end
@@ -0,0 +1,16 @@
1
+ module Polars
2
+ # @private
3
+ class When
4
+ attr_accessor :_rbwhen
5
+
6
+ def initialize(rbwhen)
7
+ self._rbwhen = rbwhen
8
+ end
9
+
10
+ def then(expr)
11
+ expr = Utils.expr_to_lit_or_expr(expr)
12
+ rbwhenthen = _rbwhen._then(expr._rbexpr)
13
+ WhenThen.new(rbwhenthen)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,19 @@
1
+ module Polars
2
+ # @private
3
+ class WhenThen
4
+ attr_accessor :_rbwhenthen
5
+
6
+ def initialize(rbwhenthen)
7
+ self._rbwhenthen = rbwhenthen
8
+ end
9
+
10
+ def when(predicate)
11
+ WhenThenThen.new(_rbwhenthen.when(predicate._rbexpr))
12
+ end
13
+
14
+ def otherwise(expr)
15
+ expr = Utils.expr_to_lit_or_expr(expr)
16
+ Utils.wrap_expr(_rbwhenthen.otherwise(expr._rbexpr))
17
+ end
18
+ end
19
+ end
data/lib/polars-df.rb ADDED
@@ -0,0 +1 @@
1
+ require "polars"
data/lib/polars.rb ADDED
@@ -0,0 +1,50 @@
1
+ # ext
2
+ begin
3
+ require_relative "polars/#{RUBY_VERSION.to_f}/polars"
4
+ rescue LoadError
5
+ require_relative "polars/polars"
6
+ end
7
+
8
+ # stdlib
9
+ require "date"
10
+
11
+ # modules
12
+ require_relative "polars/expr_dispatch"
13
+ require_relative "polars/batched_csv_reader"
14
+ require_relative "polars/cat_expr"
15
+ require_relative "polars/cat_name_space"
16
+ require_relative "polars/convert"
17
+ require_relative "polars/data_frame"
18
+ require_relative "polars/data_types"
19
+ require_relative "polars/date_time_expr"
20
+ require_relative "polars/date_time_name_space"
21
+ require_relative "polars/dynamic_group_by"
22
+ require_relative "polars/exceptions"
23
+ require_relative "polars/expr"
24
+ require_relative "polars/functions"
25
+ require_relative "polars/group_by"
26
+ require_relative "polars/io"
27
+ require_relative "polars/lazy_frame"
28
+ require_relative "polars/lazy_functions"
29
+ require_relative "polars/lazy_group_by"
30
+ require_relative "polars/list_expr"
31
+ require_relative "polars/list_name_space"
32
+ require_relative "polars/meta_expr"
33
+ require_relative "polars/rolling_group_by"
34
+ require_relative "polars/series"
35
+ require_relative "polars/slice"
36
+ require_relative "polars/string_expr"
37
+ require_relative "polars/string_name_space"
38
+ require_relative "polars/struct_expr"
39
+ require_relative "polars/struct_name_space"
40
+ require_relative "polars/utils"
41
+ require_relative "polars/version"
42
+ require_relative "polars/when"
43
+ require_relative "polars/when_then"
44
+
45
+ module Polars
46
+ extend Convert
47
+ extend Functions
48
+ extend IO
49
+ extend LazyFunctions
50
+ end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: polars-df
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: x86_64-darwin
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-01-15 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email: andrew@ankane.org
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - ".yardopts"
20
+ - CHANGELOG.md
21
+ - Cargo.lock
22
+ - Cargo.toml
23
+ - LICENSE-THIRD-PARTY.txt
24
+ - LICENSE.txt
25
+ - README.md
26
+ - lib/polars-df.rb
27
+ - lib/polars.rb
28
+ - lib/polars/3.0/polars.bundle
29
+ - lib/polars/3.1/polars.bundle
30
+ - lib/polars/3.2/polars.bundle
31
+ - lib/polars/batched_csv_reader.rb
32
+ - lib/polars/cat_expr.rb
33
+ - lib/polars/cat_name_space.rb
34
+ - lib/polars/convert.rb
35
+ - lib/polars/data_frame.rb
36
+ - lib/polars/data_types.rb
37
+ - lib/polars/date_time_expr.rb
38
+ - lib/polars/date_time_name_space.rb
39
+ - lib/polars/dynamic_group_by.rb
40
+ - lib/polars/exceptions.rb
41
+ - lib/polars/expr.rb
42
+ - lib/polars/expr_dispatch.rb
43
+ - lib/polars/functions.rb
44
+ - lib/polars/group_by.rb
45
+ - lib/polars/io.rb
46
+ - lib/polars/lazy_frame.rb
47
+ - lib/polars/lazy_functions.rb
48
+ - lib/polars/lazy_group_by.rb
49
+ - lib/polars/list_expr.rb
50
+ - lib/polars/list_name_space.rb
51
+ - lib/polars/meta_expr.rb
52
+ - lib/polars/rolling_group_by.rb
53
+ - lib/polars/series.rb
54
+ - lib/polars/slice.rb
55
+ - lib/polars/string_expr.rb
56
+ - lib/polars/string_name_space.rb
57
+ - lib/polars/struct_expr.rb
58
+ - lib/polars/struct_name_space.rb
59
+ - lib/polars/utils.rb
60
+ - lib/polars/version.rb
61
+ - lib/polars/when.rb
62
+ - lib/polars/when_then.rb
63
+ homepage: https://github.com/ankane/polars-ruby
64
+ licenses:
65
+ - MIT
66
+ metadata: {}
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ - - "<"
77
+ - !ruby/object:Gem::Version
78
+ version: 3.3.dev
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubygems_version: 3.4.3
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: Blazingly fast DataFrames for Ruby
89
+ test_files: []