polars-df 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +8 -0
  4. data/Cargo.lock +2 -1
  5. data/README.md +1 -1
  6. data/ext/polars/Cargo.toml +7 -1
  7. data/ext/polars/src/batched_csv.rs +120 -0
  8. data/ext/polars/src/conversion.rs +139 -6
  9. data/ext/polars/src/dataframe.rs +360 -15
  10. data/ext/polars/src/error.rs +9 -0
  11. data/ext/polars/src/file.rs +8 -7
  12. data/ext/polars/src/lazy/apply.rs +7 -0
  13. data/ext/polars/src/lazy/dataframe.rs +135 -3
  14. data/ext/polars/src/lazy/dsl.rs +97 -2
  15. data/ext/polars/src/lazy/meta.rs +1 -1
  16. data/ext/polars/src/lazy/mod.rs +1 -0
  17. data/ext/polars/src/lib.rs +227 -12
  18. data/ext/polars/src/series.rs +190 -38
  19. data/ext/polars/src/set.rs +91 -0
  20. data/ext/polars/src/utils.rs +19 -0
  21. data/lib/polars/batched_csv_reader.rb +96 -0
  22. data/lib/polars/cat_expr.rb +39 -0
  23. data/lib/polars/data_frame.rb +2813 -100
  24. data/lib/polars/date_time_expr.rb +1282 -7
  25. data/lib/polars/exceptions.rb +20 -0
  26. data/lib/polars/expr.rb +631 -11
  27. data/lib/polars/expr_dispatch.rb +14 -0
  28. data/lib/polars/functions.rb +219 -0
  29. data/lib/polars/group_by.rb +517 -0
  30. data/lib/polars/io.rb +763 -4
  31. data/lib/polars/lazy_frame.rb +1415 -67
  32. data/lib/polars/lazy_functions.rb +430 -9
  33. data/lib/polars/lazy_group_by.rb +79 -0
  34. data/lib/polars/list_expr.rb +5 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2244 -192
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +663 -2
  39. data/lib/polars/struct_expr.rb +73 -0
  40. data/lib/polars/utils.rb +76 -3
  41. data/lib/polars/version.rb +2 -1
  42. data/lib/polars/when.rb +1 -0
  43. data/lib/polars/when_then.rb +1 -0
  44. data/lib/polars.rb +8 -2
  45. metadata +12 -2
@@ -1,11 +1,17 @@
1
1
  module Polars
2
+ # Namespace for struct related expressions.
2
3
  class StructExpr
4
+ # @private
3
5
  attr_accessor :_rbexpr
4
6
 
7
+ # @private
5
8
  def initialize(expr)
6
9
  self._rbexpr = expr._rbexpr
7
10
  end
8
11
 
12
+ # Retrieve one of the fields of this `Struct` as a new Series.
13
+ #
14
+ # @return [Expr]
9
15
  def [](item)
10
16
  if item.is_a?(String)
11
17
  field(item)
@@ -16,10 +22,77 @@ module Polars
16
22
  end
17
23
  end
18
24
 
25
+ # Retrieve one of the fields of this `Struct` as a new Series.
26
+ #
27
+ # @param name [String]
28
+ # Name of the field
29
+ #
30
+ # @return [Expr]
31
+ #
32
+ # @example
33
+ # df = (
34
+ # Polars::DataFrame.new(
35
+ # {
36
+ # "int" => [1, 2],
37
+ # "str" => ["a", "b"],
38
+ # "bool" => [true, nil],
39
+ # "list" => [[1, 2], [3]]
40
+ # }
41
+ # )
42
+ # .to_struct("my_struct")
43
+ # .to_frame
44
+ # )
45
+ # df.select(Polars.col("my_struct").struct.field("str"))
46
+ # # =>
47
+ # # shape: (2, 1)
48
+ # # ┌─────┐
49
+ # # │ str │
50
+ # # │ --- │
51
+ # # │ str │
52
+ # # ╞═════╡
53
+ # # │ a │
54
+ # # ├╌╌╌╌╌┤
55
+ # # │ b │
56
+ # # └─────┘
19
57
  def field(name)
20
58
  Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
21
59
  end
22
60
 
61
+ # Rename the fields of the struct.
62
+ #
63
+ # @param names [Array]
64
+ # New names in the order of the struct's fields
65
+ #
66
+ # @return [Expr]
67
+ #
68
+ # @example
69
+ # df = (
70
+ # Polars::DataFrame.new(
71
+ # {
72
+ # "int" => [1, 2],
73
+ # "str" => ["a", "b"],
74
+ # "bool" => [true, nil],
75
+ # "list" => [[1, 2], [3]]
76
+ # }
77
+ # )
78
+ # .to_struct("my_struct")
79
+ # .to_frame
80
+ # )
81
+ # df = df.with_column(
82
+ # Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
83
+ # )
84
+ # df.select(Polars.col("my_struct").struct.field("INT"))
85
+ # # =>
86
+ # # shape: (2, 1)
87
+ # # ┌─────┐
88
+ # # │ INT │
89
+ # # │ --- │
90
+ # # │ i64 │
91
+ # # ╞═════╡
92
+ # # │ 1 │
93
+ # # ├╌╌╌╌╌┤
94
+ # # │ 2 │
95
+ # # └─────┘
23
96
  def rename_fields(names)
24
97
  Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
25
98
  end
data/lib/polars/utils.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  module Utils
3
4
  DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
4
5
 
@@ -18,6 +19,24 @@ module Polars
18
19
  Polars.col(name)
19
20
  end
20
21
 
22
+ def self._timedelta_to_pl_duration(td)
23
+ td
24
+ end
25
+
26
+ def self._datetime_to_pl_timestamp(dt, tu)
27
+ if tu == "ns"
28
+ (dt.to_datetime.utc.to_f * 1e9).to_i
29
+ elsif tu == "us"
30
+ (dt.to_datetime.utc.to_f * 1e6).to_i
31
+ elsif tu == "ms"
32
+ (dt.to_datetime.utc.to_f * 1e3).to_i
33
+ elsif tu.nil?
34
+ (dt.to_datetime.utc.to_f * 1e6).to_i
35
+ else
36
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
37
+ end
38
+ end
39
+
21
40
  def self.selection_to_rbexpr_list(exprs)
22
41
  if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
23
42
  exprs = [exprs]
@@ -48,12 +67,62 @@ module Polars
48
67
 
49
68
  # TODO fix
50
69
  def self.is_polars_dtype(data_type)
51
- true
70
+ data_type.is_a?(Symbol) || data_type.is_a?(String)
52
71
  end
53
72
 
73
+ RB_TYPE_TO_DTYPE = {
74
+ Float => :f64,
75
+ Integer => :i64,
76
+ String => :str,
77
+ TrueClass => :bool,
78
+ FalseClass => :bool,
79
+ Date => :date,
80
+ DateTime => :datetime
81
+ }
82
+
54
83
  # TODO fix
55
- def self.rb_type_to_dtype(dtype)
56
- dtype.to_s
84
+ def self.rb_type_to_dtype(data_type)
85
+ if is_polars_dtype(data_type)
86
+ return data_type.to_s
87
+ end
88
+
89
+ begin
90
+ RB_TYPE_TO_DTYPE.fetch(data_type).to_s
91
+ rescue KeyError
92
+ raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
93
+ end
94
+ end
95
+
96
+ def self._process_null_values(null_values)
97
+ if null_values.is_a?(Hash)
98
+ null_values.to_a
99
+ else
100
+ null_values
101
+ end
102
+ end
103
+
104
+ def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
105
+ if !row_count_name.nil?
106
+ [row_count_name, row_count_offset]
107
+ else
108
+ nil
109
+ end
110
+ end
111
+
112
+ def self.handle_projection_columns(columns)
113
+ projection = nil
114
+ if columns
115
+ raise Todo
116
+ # if columns.is_a?(String) || columns.is_a?(Symbol)
117
+ # columns = [columns]
118
+ # elsif is_int_sequence(columns)
119
+ # projection = columns.to_a
120
+ # columns = nil
121
+ # elsif !is_str_sequence(columns)
122
+ # raise ArgumentError, "columns arg should contain a list of all integers or all strings values."
123
+ # end
124
+ end
125
+ [projection, columns]
57
126
  end
58
127
 
59
128
  def self.scale_bytes(sz, to:)
@@ -70,5 +139,9 @@ module Polars
70
139
  sz
71
140
  end
72
141
  end
142
+
143
+ def self.bool?(value)
144
+ value == true || value == false
145
+ end
73
146
  end
74
147
  end
@@ -1,3 +1,4 @@
1
1
  module Polars
2
- VERSION = "0.1.1"
2
+ # @private
3
+ VERSION = "0.1.3"
3
4
  end
data/lib/polars/when.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  class When
3
4
  attr_accessor :_rbwhen
4
5
 
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  class WhenThen
3
4
  attr_accessor :_rbwhenthen
4
5
 
data/lib/polars.rb CHANGED
@@ -1,12 +1,19 @@
1
1
  # ext
2
2
  require "polars/polars"
3
3
 
4
+ # stdlib
5
+ require "date"
6
+
4
7
  # modules
8
+ require "polars/expr_dispatch"
9
+ require "polars/batched_csv_reader"
5
10
  require "polars/cat_expr"
6
11
  require "polars/data_frame"
7
12
  require "polars/date_time_expr"
13
+ require "polars/exceptions"
8
14
  require "polars/expr"
9
15
  require "polars/functions"
16
+ require "polars/group_by"
10
17
  require "polars/io"
11
18
  require "polars/lazy_frame"
12
19
  require "polars/lazy_functions"
@@ -14,6 +21,7 @@ require "polars/lazy_group_by"
14
21
  require "polars/list_expr"
15
22
  require "polars/meta_expr"
16
23
  require "polars/series"
24
+ require "polars/slice"
17
25
  require "polars/string_expr"
18
26
  require "polars/struct_expr"
19
27
  require "polars/utils"
@@ -22,8 +30,6 @@ require "polars/when"
22
30
  require "polars/when_then"
23
31
 
24
32
  module Polars
25
- class Error < StandardError; end
26
-
27
33
  extend Functions
28
34
  extend IO
29
35
  extend LazyFunctions
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-24 00:00:00.000000000 Z
11
+ date: 2022-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -31,6 +31,7 @@ extensions:
31
31
  - ext/polars/extconf.rb
32
32
  extra_rdoc_files: []
33
33
  files:
34
+ - ".yardopts"
34
35
  - CHANGELOG.md
35
36
  - Cargo.lock
36
37
  - Cargo.toml
@@ -38,10 +39,12 @@ files:
38
39
  - README.md
39
40
  - ext/polars/Cargo.toml
40
41
  - ext/polars/extconf.rb
42
+ - ext/polars/src/batched_csv.rs
41
43
  - ext/polars/src/conversion.rs
42
44
  - ext/polars/src/dataframe.rs
43
45
  - ext/polars/src/error.rs
44
46
  - ext/polars/src/file.rs
47
+ - ext/polars/src/lazy/apply.rs
45
48
  - ext/polars/src/lazy/dataframe.rs
46
49
  - ext/polars/src/lazy/dsl.rs
47
50
  - ext/polars/src/lazy/meta.rs
@@ -49,13 +52,19 @@ files:
49
52
  - ext/polars/src/lazy/utils.rs
50
53
  - ext/polars/src/lib.rs
51
54
  - ext/polars/src/series.rs
55
+ - ext/polars/src/set.rs
56
+ - ext/polars/src/utils.rs
52
57
  - lib/polars-df.rb
53
58
  - lib/polars.rb
59
+ - lib/polars/batched_csv_reader.rb
54
60
  - lib/polars/cat_expr.rb
55
61
  - lib/polars/data_frame.rb
56
62
  - lib/polars/date_time_expr.rb
63
+ - lib/polars/exceptions.rb
57
64
  - lib/polars/expr.rb
65
+ - lib/polars/expr_dispatch.rb
58
66
  - lib/polars/functions.rb
67
+ - lib/polars/group_by.rb
59
68
  - lib/polars/io.rb
60
69
  - lib/polars/lazy_frame.rb
61
70
  - lib/polars/lazy_functions.rb
@@ -63,6 +72,7 @@ files:
63
72
  - lib/polars/list_expr.rb
64
73
  - lib/polars/meta_expr.rb
65
74
  - lib/polars/series.rb
75
+ - lib/polars/slice.rb
66
76
  - lib/polars/string_expr.rb
67
77
  - lib/polars/struct_expr.rb
68
78
  - lib/polars/utils.rb