polars-df 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +8 -0
  4. data/Cargo.lock +2 -1
  5. data/README.md +1 -1
  6. data/ext/polars/Cargo.toml +7 -1
  7. data/ext/polars/src/batched_csv.rs +120 -0
  8. data/ext/polars/src/conversion.rs +139 -6
  9. data/ext/polars/src/dataframe.rs +360 -15
  10. data/ext/polars/src/error.rs +9 -0
  11. data/ext/polars/src/file.rs +8 -7
  12. data/ext/polars/src/lazy/apply.rs +7 -0
  13. data/ext/polars/src/lazy/dataframe.rs +135 -3
  14. data/ext/polars/src/lazy/dsl.rs +97 -2
  15. data/ext/polars/src/lazy/meta.rs +1 -1
  16. data/ext/polars/src/lazy/mod.rs +1 -0
  17. data/ext/polars/src/lib.rs +227 -12
  18. data/ext/polars/src/series.rs +190 -38
  19. data/ext/polars/src/set.rs +91 -0
  20. data/ext/polars/src/utils.rs +19 -0
  21. data/lib/polars/batched_csv_reader.rb +96 -0
  22. data/lib/polars/cat_expr.rb +39 -0
  23. data/lib/polars/data_frame.rb +2813 -100
  24. data/lib/polars/date_time_expr.rb +1282 -7
  25. data/lib/polars/exceptions.rb +20 -0
  26. data/lib/polars/expr.rb +631 -11
  27. data/lib/polars/expr_dispatch.rb +14 -0
  28. data/lib/polars/functions.rb +219 -0
  29. data/lib/polars/group_by.rb +517 -0
  30. data/lib/polars/io.rb +763 -4
  31. data/lib/polars/lazy_frame.rb +1415 -67
  32. data/lib/polars/lazy_functions.rb +430 -9
  33. data/lib/polars/lazy_group_by.rb +79 -0
  34. data/lib/polars/list_expr.rb +5 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2244 -192
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +663 -2
  39. data/lib/polars/struct_expr.rb +73 -0
  40. data/lib/polars/utils.rb +76 -3
  41. data/lib/polars/version.rb +2 -1
  42. data/lib/polars/when.rb +1 -0
  43. data/lib/polars/when_then.rb +1 -0
  44. data/lib/polars.rb +8 -2
  45. metadata +12 -2
@@ -1,11 +1,17 @@
1
1
  module Polars
2
+ # Namespace for struct related expressions.
2
3
  class StructExpr
4
+ # @private
3
5
  attr_accessor :_rbexpr
4
6
 
7
+ # @private
5
8
  def initialize(expr)
6
9
  self._rbexpr = expr._rbexpr
7
10
  end
8
11
 
12
+ # Retrieve one of the fields of this `Struct` as a new Series.
13
+ #
14
+ # @return [Expr]
9
15
  def [](item)
10
16
  if item.is_a?(String)
11
17
  field(item)
@@ -16,10 +22,77 @@ module Polars
16
22
  end
17
23
  end
18
24
 
25
+ # Retrieve one of the fields of this `Struct` as a new Series.
26
+ #
27
+ # @param name [String]
28
+ # Name of the field
29
+ #
30
+ # @return [Expr]
31
+ #
32
+ # @example
33
+ # df = (
34
+ # Polars::DataFrame.new(
35
+ # {
36
+ # "int" => [1, 2],
37
+ # "str" => ["a", "b"],
38
+ # "bool" => [true, nil],
39
+ # "list" => [[1, 2], [3]]
40
+ # }
41
+ # )
42
+ # .to_struct("my_struct")
43
+ # .to_frame
44
+ # )
45
+ # df.select(Polars.col("my_struct").struct.field("str"))
46
+ # # =>
47
+ # # shape: (2, 1)
48
+ # # ┌─────┐
49
+ # # │ str │
50
+ # # │ --- │
51
+ # # │ str │
52
+ # # ╞═════╡
53
+ # # │ a │
54
+ # # ├╌╌╌╌╌┤
55
+ # # │ b │
56
+ # # └─────┘
19
57
  def field(name)
20
58
  Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
21
59
  end
22
60
 
61
+ # Rename the fields of the struct.
62
+ #
63
+ # @param names [Array]
64
+ # New names in the order of the struct's fields
65
+ #
66
+ # @return [Expr]
67
+ #
68
+ # @example
69
+ # df = (
70
+ # Polars::DataFrame.new(
71
+ # {
72
+ # "int" => [1, 2],
73
+ # "str" => ["a", "b"],
74
+ # "bool" => [true, nil],
75
+ # "list" => [[1, 2], [3]]
76
+ # }
77
+ # )
78
+ # .to_struct("my_struct")
79
+ # .to_frame
80
+ # )
81
+ # df = df.with_column(
82
+ # Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
83
+ # )
84
+ # df.select(Polars.col("my_struct").struct.field("INT"))
85
+ # # =>
86
+ # # shape: (2, 1)
87
+ # # ┌─────┐
88
+ # # │ INT │
89
+ # # │ --- │
90
+ # # │ i64 │
91
+ # # ╞═════╡
92
+ # # │ 1 │
93
+ # # ├╌╌╌╌╌┤
94
+ # # │ 2 │
95
+ # # └─────┘
23
96
  def rename_fields(names)
24
97
  Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
25
98
  end
data/lib/polars/utils.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  module Utils
3
4
  DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
4
5
 
@@ -18,6 +19,24 @@ module Polars
18
19
  Polars.col(name)
19
20
  end
20
21
 
22
+ def self._timedelta_to_pl_duration(td)
23
+ td
24
+ end
25
+
26
+ def self._datetime_to_pl_timestamp(dt, tu)
27
+ if tu == "ns"
28
+ (dt.to_datetime.utc.to_f * 1e9).to_i
29
+ elsif tu == "us"
30
+ (dt.to_datetime.utc.to_f * 1e6).to_i
31
+ elsif tu == "ms"
32
+ (dt.to_datetime.utc.to_f * 1e3).to_i
33
+ elsif tu.nil?
34
+ (dt.to_datetime.utc.to_f * 1e6).to_i
35
+ else
36
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
37
+ end
38
+ end
39
+
21
40
  def self.selection_to_rbexpr_list(exprs)
22
41
  if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
23
42
  exprs = [exprs]
@@ -48,12 +67,62 @@ module Polars
48
67
 
49
68
  # TODO fix
50
69
  def self.is_polars_dtype(data_type)
51
- true
70
+ data_type.is_a?(Symbol) || data_type.is_a?(String)
52
71
  end
53
72
 
73
+ RB_TYPE_TO_DTYPE = {
74
+ Float => :f64,
75
+ Integer => :i64,
76
+ String => :str,
77
+ TrueClass => :bool,
78
+ FalseClass => :bool,
79
+ Date => :date,
80
+ DateTime => :datetime
81
+ }
82
+
54
83
  # TODO fix
55
- def self.rb_type_to_dtype(dtype)
56
- dtype.to_s
84
+ def self.rb_type_to_dtype(data_type)
85
+ if is_polars_dtype(data_type)
86
+ return data_type.to_s
87
+ end
88
+
89
+ begin
90
+ RB_TYPE_TO_DTYPE.fetch(data_type).to_s
91
+ rescue KeyError
92
+ raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
93
+ end
94
+ end
95
+
96
+ def self._process_null_values(null_values)
97
+ if null_values.is_a?(Hash)
98
+ null_values.to_a
99
+ else
100
+ null_values
101
+ end
102
+ end
103
+
104
+ def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
105
+ if !row_count_name.nil?
106
+ [row_count_name, row_count_offset]
107
+ else
108
+ nil
109
+ end
110
+ end
111
+
112
+ def self.handle_projection_columns(columns)
113
+ projection = nil
114
+ if columns
115
+ raise Todo
116
+ # if columns.is_a?(String) || columns.is_a?(Symbol)
117
+ # columns = [columns]
118
+ # elsif is_int_sequence(columns)
119
+ # projection = columns.to_a
120
+ # columns = nil
121
+ # elsif !is_str_sequence(columns)
122
+ # raise ArgumentError, "columns arg should contain a list of all integers or all strings values."
123
+ # end
124
+ end
125
+ [projection, columns]
57
126
  end
58
127
 
59
128
  def self.scale_bytes(sz, to:)
@@ -70,5 +139,9 @@ module Polars
70
139
  sz
71
140
  end
72
141
  end
142
+
143
+ def self.bool?(value)
144
+ value == true || value == false
145
+ end
73
146
  end
74
147
  end
@@ -1,3 +1,4 @@
1
1
  module Polars
2
- VERSION = "0.1.1"
2
+ # @private
3
+ VERSION = "0.1.3"
3
4
  end
data/lib/polars/when.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  class When
3
4
  attr_accessor :_rbwhen
4
5
 
@@ -1,4 +1,5 @@
1
1
  module Polars
2
+ # @private
2
3
  class WhenThen
3
4
  attr_accessor :_rbwhenthen
4
5
 
data/lib/polars.rb CHANGED
@@ -1,12 +1,19 @@
1
1
  # ext
2
2
  require "polars/polars"
3
3
 
4
+ # stdlib
5
+ require "date"
6
+
4
7
  # modules
8
+ require "polars/expr_dispatch"
9
+ require "polars/batched_csv_reader"
5
10
  require "polars/cat_expr"
6
11
  require "polars/data_frame"
7
12
  require "polars/date_time_expr"
13
+ require "polars/exceptions"
8
14
  require "polars/expr"
9
15
  require "polars/functions"
16
+ require "polars/group_by"
10
17
  require "polars/io"
11
18
  require "polars/lazy_frame"
12
19
  require "polars/lazy_functions"
@@ -14,6 +21,7 @@ require "polars/lazy_group_by"
14
21
  require "polars/list_expr"
15
22
  require "polars/meta_expr"
16
23
  require "polars/series"
24
+ require "polars/slice"
17
25
  require "polars/string_expr"
18
26
  require "polars/struct_expr"
19
27
  require "polars/utils"
@@ -22,8 +30,6 @@ require "polars/when"
22
30
  require "polars/when_then"
23
31
 
24
32
  module Polars
25
- class Error < StandardError; end
26
-
27
33
  extend Functions
28
34
  extend IO
29
35
  extend LazyFunctions
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-24 00:00:00.000000000 Z
11
+ date: 2022-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -31,6 +31,7 @@ extensions:
31
31
  - ext/polars/extconf.rb
32
32
  extra_rdoc_files: []
33
33
  files:
34
+ - ".yardopts"
34
35
  - CHANGELOG.md
35
36
  - Cargo.lock
36
37
  - Cargo.toml
@@ -38,10 +39,12 @@ files:
38
39
  - README.md
39
40
  - ext/polars/Cargo.toml
40
41
  - ext/polars/extconf.rb
42
+ - ext/polars/src/batched_csv.rs
41
43
  - ext/polars/src/conversion.rs
42
44
  - ext/polars/src/dataframe.rs
43
45
  - ext/polars/src/error.rs
44
46
  - ext/polars/src/file.rs
47
+ - ext/polars/src/lazy/apply.rs
45
48
  - ext/polars/src/lazy/dataframe.rs
46
49
  - ext/polars/src/lazy/dsl.rs
47
50
  - ext/polars/src/lazy/meta.rs
@@ -49,13 +52,19 @@ files:
49
52
  - ext/polars/src/lazy/utils.rs
50
53
  - ext/polars/src/lib.rs
51
54
  - ext/polars/src/series.rs
55
+ - ext/polars/src/set.rs
56
+ - ext/polars/src/utils.rs
52
57
  - lib/polars-df.rb
53
58
  - lib/polars.rb
59
+ - lib/polars/batched_csv_reader.rb
54
60
  - lib/polars/cat_expr.rb
55
61
  - lib/polars/data_frame.rb
56
62
  - lib/polars/date_time_expr.rb
63
+ - lib/polars/exceptions.rb
57
64
  - lib/polars/expr.rb
65
+ - lib/polars/expr_dispatch.rb
58
66
  - lib/polars/functions.rb
67
+ - lib/polars/group_by.rb
59
68
  - lib/polars/io.rb
60
69
  - lib/polars/lazy_frame.rb
61
70
  - lib/polars/lazy_functions.rb
@@ -63,6 +72,7 @@ files:
63
72
  - lib/polars/list_expr.rb
64
73
  - lib/polars/meta_expr.rb
65
74
  - lib/polars/series.rb
75
+ - lib/polars/slice.rb
66
76
  - lib/polars/string_expr.rb
67
77
  - lib/polars/struct_expr.rb
68
78
  - lib/polars/utils.rb