polars-df 0.2.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +33 -0
  4. data/Cargo.lock +2230 -0
  5. data/Cargo.toml +10 -0
  6. data/LICENSE-THIRD-PARTY.txt +38856 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +91 -0
  9. data/lib/polars/3.0/polars.bundle +0 -0
  10. data/lib/polars/3.1/polars.bundle +0 -0
  11. data/lib/polars/3.2/polars.bundle +0 -0
  12. data/lib/polars/batched_csv_reader.rb +96 -0
  13. data/lib/polars/cat_expr.rb +52 -0
  14. data/lib/polars/cat_name_space.rb +54 -0
  15. data/lib/polars/convert.rb +100 -0
  16. data/lib/polars/data_frame.rb +4833 -0
  17. data/lib/polars/data_types.rb +122 -0
  18. data/lib/polars/date_time_expr.rb +1418 -0
  19. data/lib/polars/date_time_name_space.rb +1484 -0
  20. data/lib/polars/dynamic_group_by.rb +52 -0
  21. data/lib/polars/exceptions.rb +20 -0
  22. data/lib/polars/expr.rb +5307 -0
  23. data/lib/polars/expr_dispatch.rb +22 -0
  24. data/lib/polars/functions.rb +453 -0
  25. data/lib/polars/group_by.rb +558 -0
  26. data/lib/polars/io.rb +814 -0
  27. data/lib/polars/lazy_frame.rb +2442 -0
  28. data/lib/polars/lazy_functions.rb +1195 -0
  29. data/lib/polars/lazy_group_by.rb +93 -0
  30. data/lib/polars/list_expr.rb +610 -0
  31. data/lib/polars/list_name_space.rb +346 -0
  32. data/lib/polars/meta_expr.rb +54 -0
  33. data/lib/polars/rolling_group_by.rb +35 -0
  34. data/lib/polars/series.rb +3730 -0
  35. data/lib/polars/slice.rb +104 -0
  36. data/lib/polars/string_expr.rb +972 -0
  37. data/lib/polars/string_name_space.rb +690 -0
  38. data/lib/polars/struct_expr.rb +100 -0
  39. data/lib/polars/struct_name_space.rb +64 -0
  40. data/lib/polars/utils.rb +192 -0
  41. data/lib/polars/version.rb +4 -0
  42. data/lib/polars/when.rb +16 -0
  43. data/lib/polars/when_then.rb +19 -0
  44. data/lib/polars-df.rb +1 -0
  45. data/lib/polars.rb +50 -0
  46. metadata +89 -0
@@ -0,0 +1,100 @@
1
+ module Polars
2
+ # Namespace for struct related expressions.
3
+ class StructExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Retrieve one of the fields of this `Struct` as a new Series.
13
+ #
14
+ # @return [Expr]
15
+ def [](item)
16
+ if item.is_a?(String)
17
+ field(item)
18
+ elsif item.is_a?(Integer)
19
+ Utils.wrap_expr(_rbexpr.struct_field_by_index(item))
20
+ else
21
+ raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
22
+ end
23
+ end
24
+
25
+ # Retrieve one of the fields of this `Struct` as a new Series.
26
+ #
27
+ # @param name [String]
28
+ # Name of the field
29
+ #
30
+ # @return [Expr]
31
+ #
32
+ # @example
33
+ # df = (
34
+ # Polars::DataFrame.new(
35
+ # {
36
+ # "int" => [1, 2],
37
+ # "str" => ["a", "b"],
38
+ # "bool" => [true, nil],
39
+ # "list" => [[1, 2], [3]]
40
+ # }
41
+ # )
42
+ # .to_struct("my_struct")
43
+ # .to_frame
44
+ # )
45
+ # df.select(Polars.col("my_struct").struct.field("str"))
46
+ # # =>
47
+ # # shape: (2, 1)
48
+ # # ┌─────┐
49
+ # # │ str │
50
+ # # │ --- │
51
+ # # │ str │
52
+ # # ╞═════╡
53
+ # # │ a │
54
+ # # ├╌╌╌╌╌┤
55
+ # # │ b │
56
+ # # └─────┘
57
+ def field(name)
58
+ Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
59
+ end
60
+
61
+ # Rename the fields of the struct.
62
+ #
63
+ # @param names [Array]
64
+ # New names in the order of the struct's fields
65
+ #
66
+ # @return [Expr]
67
+ #
68
+ # @example
69
+ # df = (
70
+ # Polars::DataFrame.new(
71
+ # {
72
+ # "int" => [1, 2],
73
+ # "str" => ["a", "b"],
74
+ # "bool" => [true, nil],
75
+ # "list" => [[1, 2], [3]]
76
+ # }
77
+ # )
78
+ # .to_struct("my_struct")
79
+ # .to_frame
80
+ # )
81
+ # df = df.with_column(
82
+ # Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
83
+ # )
84
+ # df.select(Polars.col("my_struct").struct.field("INT"))
85
+ # # =>
86
+ # # shape: (2, 1)
87
+ # # ┌─────┐
88
+ # # │ INT │
89
+ # # │ --- │
90
+ # # │ i64 │
91
+ # # ╞═════╡
92
+ # # │ 1 │
93
+ # # ├╌╌╌╌╌┤
94
+ # # │ 2 │
95
+ # # └─────┘
96
+ def rename_fields(names)
97
+ Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,64 @@
1
+ module Polars
2
+ # Series.struct namespace.
3
+ class StructNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "struct"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Retrieve one of the fields of this `Struct` as a new Series.
14
+ #
15
+ # @return [Series]
16
+ def [](item)
17
+ if item.is_a?(Integer)
18
+ field(fields[item])
19
+ elsif item.is_a?(String)
20
+ field(item)
21
+ else
22
+ raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
23
+ end
24
+ end
25
+
26
+ # Convert this Struct Series to a DataFrame.
27
+ #
28
+ # @return [DataFrame]
29
+ def to_frame
30
+ Utils.wrap_df(_s.struct_to_frame)
31
+ end
32
+
33
+ # Get the names of the fields.
34
+ #
35
+ # @return [Array]
36
+ def fields
37
+ if _s.nil?
38
+ []
39
+ else
40
+ _s.struct_fields
41
+ end
42
+ end
43
+
44
+ # Retrieve one of the fields of this `Struct` as a new Series.
45
+ #
46
+ # @param name [String]
47
+ # Name of the field
48
+ #
49
+ # @return [Series]
50
+ def field(name)
51
+ super
52
+ end
53
+
54
+ # Rename the fields of the struct.
55
+ #
56
+ # @param names [Array]
57
+ # New names in the order of the struct's fields
58
+ #
59
+ # @return [Series]
60
+ def rename_fields(names)
61
+ super
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,192 @@
1
+ module Polars
2
+ # @private
3
+ module Utils
4
+ DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
5
+
6
+ def self.wrap_s(s)
7
+ Series._from_rbseries(s)
8
+ end
9
+
10
+ def self.wrap_df(df)
11
+ DataFrame._from_rbdf(df)
12
+ end
13
+
14
+ def self.wrap_ldf(ldf)
15
+ LazyFrame._from_rbldf(ldf)
16
+ end
17
+
18
+ def self.wrap_expr(rbexpr)
19
+ Expr._from_rbexpr(rbexpr)
20
+ end
21
+
22
+ def self.col(name)
23
+ Polars.col(name)
24
+ end
25
+
26
+ def self._timedelta_to_pl_duration(td)
27
+ td
28
+ end
29
+
30
+ def self._datetime_to_pl_timestamp(dt, tu)
31
+ if tu == "ns"
32
+ (dt.to_datetime.utc.to_f * 1e9).to_i
33
+ elsif tu == "us"
34
+ (dt.to_datetime.utc.to_f * 1e6).to_i
35
+ elsif tu == "ms"
36
+ (dt.to_datetime.utc.to_f * 1e3).to_i
37
+ elsif tu.nil?
38
+ (dt.to_datetime.utc.to_f * 1e6).to_i
39
+ else
40
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
41
+ end
42
+ end
43
+
44
+ def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
45
+ if dtype == :date || dtype == Date
46
+ # days to seconds
47
+ # important to create from utc. Not doing this leads
48
+ # to inconsistencies dependent on the timezone you are in.
49
+ ::Time.at(value * 86400).utc.to_date
50
+ # TODO fix dtype
51
+ elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
52
+ if tz.nil? || tz == ""
53
+ if tu == "ns"
54
+ raise Todo
55
+ elsif tu == "us"
56
+ dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
57
+ elsif tu == "ms"
58
+ raise Todo
59
+ else
60
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
61
+ end
62
+ else
63
+ raise Todo
64
+ end
65
+
66
+ dt
67
+ else
68
+ raise NotImplementedError
69
+ end
70
+ end
71
+
72
+ def self.selection_to_rbexpr_list(exprs)
73
+ if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
74
+ exprs = [exprs]
75
+ end
76
+
77
+ exprs.map { |e| expr_to_lit_or_expr(e, str_to_lit: false)._rbexpr }
78
+ end
79
+
80
+ def self.expr_to_lit_or_expr(expr, str_to_lit: true)
81
+ if expr.is_a?(String) && !str_to_lit
82
+ col(expr)
83
+ elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil?
84
+ lit(expr)
85
+ elsif expr.is_a?(Expr)
86
+ expr
87
+ else
88
+ raise ArgumentError, "did not expect value #{expr} of type #{expr.class.name}, maybe disambiguate with Polars.lit or Polars.col"
89
+ end
90
+ end
91
+
92
+ def self.lit(value)
93
+ Polars.lit(value)
94
+ end
95
+
96
+ def self.format_path(path)
97
+ File.expand_path(path)
98
+ end
99
+
100
+ # TODO fix
101
+ def self.is_polars_dtype(data_type)
102
+ data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
103
+ end
104
+
105
+ RB_TYPE_TO_DTYPE = {
106
+ Float => :f64,
107
+ Integer => :i64,
108
+ String => :str,
109
+ TrueClass => :bool,
110
+ FalseClass => :bool,
111
+ ::Date => :date,
112
+ ::DateTime => :datetime
113
+ }
114
+
115
+ # TODO fix
116
+ def self.rb_type_to_dtype(data_type)
117
+ if is_polars_dtype(data_type)
118
+ data_type = data_type.to_s if data_type.is_a?(Symbol)
119
+ return data_type
120
+ end
121
+
122
+ begin
123
+ RB_TYPE_TO_DTYPE.fetch(data_type).to_s
124
+ rescue KeyError
125
+ raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
126
+ end
127
+ end
128
+
129
+ def self._process_null_values(null_values)
130
+ if null_values.is_a?(Hash)
131
+ null_values.to_a
132
+ else
133
+ null_values
134
+ end
135
+ end
136
+
137
+ def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
138
+ if !row_count_name.nil?
139
+ [row_count_name, row_count_offset]
140
+ else
141
+ nil
142
+ end
143
+ end
144
+
145
+ def self.handle_projection_columns(columns)
146
+ projection = nil
147
+ if columns
148
+ raise Todo
149
+ # if columns.is_a?(String) || columns.is_a?(Symbol)
150
+ # columns = [columns]
151
+ # elsif is_int_sequence(columns)
152
+ # projection = columns.to_a
153
+ # columns = nil
154
+ # elsif !is_str_sequence(columns)
155
+ # raise ArgumentError, "columns arg should contain a list of all integers or all strings values."
156
+ # end
157
+ end
158
+ [projection, columns]
159
+ end
160
+
161
+ def self.scale_bytes(sz, to:)
162
+ scaling_factor = {
163
+ "b" => 1,
164
+ "k" => 1024,
165
+ "m" => 1024 ** 2,
166
+ "g" => 1024 ** 3,
167
+ "t" => 1024 ** 4,
168
+ }[to[0]]
169
+ if scaling_factor > 1
170
+ sz / scaling_factor.to_f
171
+ else
172
+ sz
173
+ end
174
+ end
175
+
176
+ def self.bool?(value)
177
+ value == true || value == false
178
+ end
179
+
180
+ def self._is_iterable_of(val, eltype)
181
+ val.all? { |x| x.is_a?(eltype) }
182
+ end
183
+
184
+ def self.is_str_sequence(val, allow_str: false)
185
+ if allow_str == false && val.is_a?(String)
186
+ false
187
+ else
188
+ val.is_a?(Array) && _is_iterable_of(val, String)
189
+ end
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,4 @@
1
+ module Polars
2
+ # @private
3
+ VERSION = "0.2.0"
4
+ end
@@ -0,0 +1,16 @@
1
+ module Polars
2
+ # @private
3
+ class When
4
+ attr_accessor :_rbwhen
5
+
6
+ def initialize(rbwhen)
7
+ self._rbwhen = rbwhen
8
+ end
9
+
10
+ def then(expr)
11
+ expr = Utils.expr_to_lit_or_expr(expr)
12
+ rbwhenthen = _rbwhen._then(expr._rbexpr)
13
+ WhenThen.new(rbwhenthen)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,19 @@
1
+ module Polars
2
+ # @private
3
+ class WhenThen
4
+ attr_accessor :_rbwhenthen
5
+
6
+ def initialize(rbwhenthen)
7
+ self._rbwhenthen = rbwhenthen
8
+ end
9
+
10
+ def when(predicate)
11
+ WhenThenThen.new(_rbwhenthen.when(predicate._rbexpr))
12
+ end
13
+
14
+ def otherwise(expr)
15
+ expr = Utils.expr_to_lit_or_expr(expr)
16
+ Utils.wrap_expr(_rbwhenthen.otherwise(expr._rbexpr))
17
+ end
18
+ end
19
+ end
data/lib/polars-df.rb ADDED
@@ -0,0 +1 @@
1
+ require "polars"
data/lib/polars.rb ADDED
@@ -0,0 +1,50 @@
1
+ # ext
2
+ begin
3
+ require_relative "polars/#{RUBY_VERSION.to_f}/polars"
4
+ rescue LoadError
5
+ require_relative "polars/polars"
6
+ end
7
+
8
+ # stdlib
9
+ require "date"
10
+
11
+ # modules
12
+ require_relative "polars/expr_dispatch"
13
+ require_relative "polars/batched_csv_reader"
14
+ require_relative "polars/cat_expr"
15
+ require_relative "polars/cat_name_space"
16
+ require_relative "polars/convert"
17
+ require_relative "polars/data_frame"
18
+ require_relative "polars/data_types"
19
+ require_relative "polars/date_time_expr"
20
+ require_relative "polars/date_time_name_space"
21
+ require_relative "polars/dynamic_group_by"
22
+ require_relative "polars/exceptions"
23
+ require_relative "polars/expr"
24
+ require_relative "polars/functions"
25
+ require_relative "polars/group_by"
26
+ require_relative "polars/io"
27
+ require_relative "polars/lazy_frame"
28
+ require_relative "polars/lazy_functions"
29
+ require_relative "polars/lazy_group_by"
30
+ require_relative "polars/list_expr"
31
+ require_relative "polars/list_name_space"
32
+ require_relative "polars/meta_expr"
33
+ require_relative "polars/rolling_group_by"
34
+ require_relative "polars/series"
35
+ require_relative "polars/slice"
36
+ require_relative "polars/string_expr"
37
+ require_relative "polars/string_name_space"
38
+ require_relative "polars/struct_expr"
39
+ require_relative "polars/struct_name_space"
40
+ require_relative "polars/utils"
41
+ require_relative "polars/version"
42
+ require_relative "polars/when"
43
+ require_relative "polars/when_then"
44
+
45
+ module Polars
46
+ extend Convert
47
+ extend Functions
48
+ extend IO
49
+ extend LazyFunctions
50
+ end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: polars-df
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: arm64-darwin
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-01-15 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email: andrew@ankane.org
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - ".yardopts"
20
+ - CHANGELOG.md
21
+ - Cargo.lock
22
+ - Cargo.toml
23
+ - LICENSE-THIRD-PARTY.txt
24
+ - LICENSE.txt
25
+ - README.md
26
+ - lib/polars-df.rb
27
+ - lib/polars.rb
28
+ - lib/polars/3.0/polars.bundle
29
+ - lib/polars/3.1/polars.bundle
30
+ - lib/polars/3.2/polars.bundle
31
+ - lib/polars/batched_csv_reader.rb
32
+ - lib/polars/cat_expr.rb
33
+ - lib/polars/cat_name_space.rb
34
+ - lib/polars/convert.rb
35
+ - lib/polars/data_frame.rb
36
+ - lib/polars/data_types.rb
37
+ - lib/polars/date_time_expr.rb
38
+ - lib/polars/date_time_name_space.rb
39
+ - lib/polars/dynamic_group_by.rb
40
+ - lib/polars/exceptions.rb
41
+ - lib/polars/expr.rb
42
+ - lib/polars/expr_dispatch.rb
43
+ - lib/polars/functions.rb
44
+ - lib/polars/group_by.rb
45
+ - lib/polars/io.rb
46
+ - lib/polars/lazy_frame.rb
47
+ - lib/polars/lazy_functions.rb
48
+ - lib/polars/lazy_group_by.rb
49
+ - lib/polars/list_expr.rb
50
+ - lib/polars/list_name_space.rb
51
+ - lib/polars/meta_expr.rb
52
+ - lib/polars/rolling_group_by.rb
53
+ - lib/polars/series.rb
54
+ - lib/polars/slice.rb
55
+ - lib/polars/string_expr.rb
56
+ - lib/polars/string_name_space.rb
57
+ - lib/polars/struct_expr.rb
58
+ - lib/polars/struct_name_space.rb
59
+ - lib/polars/utils.rb
60
+ - lib/polars/version.rb
61
+ - lib/polars/when.rb
62
+ - lib/polars/when_then.rb
63
+ homepage: https://github.com/ankane/polars-ruby
64
+ licenses:
65
+ - MIT
66
+ metadata: {}
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ - - "<"
77
+ - !ruby/object:Gem::Version
78
+ version: 3.3.dev
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubygems_version: 3.4.3
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: Blazingly fast DataFrames for Ruby
89
+ test_files: []