polars-df 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +12 -0
 - data/Cargo.lock +468 -538
 - data/Cargo.toml +1 -0
 - data/README.md +8 -7
 - data/ext/polars/Cargo.toml +17 -10
 - data/ext/polars/src/batched_csv.rs +26 -26
 - data/ext/polars/src/conversion.rs +121 -93
 - data/ext/polars/src/dataframe.rs +116 -71
 - data/ext/polars/src/error.rs +0 -5
 - data/ext/polars/src/expr/binary.rs +18 -6
 - data/ext/polars/src/expr/datetime.rs +10 -12
 - data/ext/polars/src/expr/general.rs +68 -284
 - data/ext/polars/src/expr/list.rs +17 -9
 - data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
 - data/ext/polars/src/expr/name.rs +44 -0
 - data/ext/polars/src/expr/rolling.rs +196 -0
 - data/ext/polars/src/expr/string.rs +85 -58
 - data/ext/polars/src/file.rs +3 -3
 - data/ext/polars/src/functions/aggregation.rs +35 -0
 - data/ext/polars/src/functions/eager.rs +7 -31
 - data/ext/polars/src/functions/io.rs +10 -10
 - data/ext/polars/src/functions/lazy.rs +66 -41
 - data/ext/polars/src/functions/meta.rs +30 -0
 - data/ext/polars/src/functions/misc.rs +8 -0
 - data/ext/polars/src/functions/mod.rs +5 -0
 - data/ext/polars/src/functions/random.rs +6 -0
 - data/ext/polars/src/functions/range.rs +46 -0
 - data/ext/polars/src/functions/string_cache.rs +11 -0
 - data/ext/polars/src/functions/whenthen.rs +7 -7
 - data/ext/polars/src/lazyframe.rs +47 -42
 - data/ext/polars/src/lib.rs +156 -72
 - data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
 - data/ext/polars/src/{apply → map}/mod.rs +3 -3
 - data/ext/polars/src/{apply → map}/series.rs +12 -16
 - data/ext/polars/src/object.rs +1 -1
 - data/ext/polars/src/rb_modules.rs +22 -7
 - data/ext/polars/src/series/construction.rs +4 -4
 - data/ext/polars/src/series/export.rs +2 -2
 - data/ext/polars/src/series/set_at_idx.rs +33 -17
 - data/ext/polars/src/series.rs +7 -27
 - data/ext/polars/src/sql.rs +46 -0
 - data/lib/polars/config.rb +530 -0
 - data/lib/polars/data_frame.rb +115 -82
 - data/lib/polars/date_time_expr.rb +13 -18
 - data/lib/polars/date_time_name_space.rb +5 -25
 - data/lib/polars/dynamic_group_by.rb +2 -2
 - data/lib/polars/expr.rb +177 -94
 - data/lib/polars/functions.rb +29 -37
 - data/lib/polars/group_by.rb +38 -55
 - data/lib/polars/io.rb +37 -2
 - data/lib/polars/lazy_frame.rb +93 -66
 - data/lib/polars/lazy_functions.rb +36 -48
 - data/lib/polars/lazy_group_by.rb +7 -8
 - data/lib/polars/list_expr.rb +12 -8
 - data/lib/polars/list_name_space.rb +2 -2
 - data/lib/polars/name_expr.rb +198 -0
 - data/lib/polars/rolling_group_by.rb +2 -2
 - data/lib/polars/series.rb +26 -13
 - data/lib/polars/sql_context.rb +194 -0
 - data/lib/polars/string_expr.rb +114 -60
 - data/lib/polars/string_name_space.rb +19 -4
 - data/lib/polars/utils.rb +12 -0
 - data/lib/polars/version.rb +1 -1
 - data/lib/polars.rb +3 -0
 - metadata +18 -7
 - /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
 
    
        data/lib/polars/functions.rb
    CHANGED
    
    | 
         @@ -19,8 +19,8 @@ module Polars 
     | 
|
| 
       19 
19 
     | 
    
         
             
                #   DataFrames/Series/LazyFrames to concatenate.
         
     | 
| 
       20 
20 
     | 
    
         
             
                # @param rechunk [Boolean]
         
     | 
| 
       21 
21 
     | 
    
         
             
                #   Make sure that all data is in contiguous memory.
         
     | 
| 
       22 
     | 
    
         
            -
                # @param how ["vertical", "diagonal", "horizontal"]
         
     | 
| 
       23 
     | 
    
         
            -
                #    
     | 
| 
      
 22 
     | 
    
         
            +
                # @param how ["vertical", "vertical_relaxed", "diagonal", "horizontal"]
         
     | 
| 
      
 23 
     | 
    
         
            +
                #   LazyFrames do not support the `horizontal` strategy.
         
     | 
| 
       24 
24 
     | 
    
         
             
                #
         
     | 
| 
       25 
25 
     | 
    
         
             
                #   - Vertical: applies multiple `vstack` operations.
         
     | 
| 
       26 
26 
     | 
    
         
             
                #   - Diagonal: finds a union between the column schemas and fills missing column values with null.
         
     | 
| 
         @@ -55,18 +55,21 @@ module Polars 
     | 
|
| 
       55 
55 
     | 
    
         
             
                    if how == "vertical"
         
     | 
| 
       56 
56 
     | 
    
         
             
                      out = Utils.wrap_df(_concat_df(items))
         
     | 
| 
       57 
57 
     | 
    
         
             
                    elsif how == "diagonal"
         
     | 
| 
       58 
     | 
    
         
            -
                      out = Utils.wrap_df( 
     | 
| 
      
 58 
     | 
    
         
            +
                      out = Utils.wrap_df(_concat_df_diagonal(items))
         
     | 
| 
       59 
59 
     | 
    
         
             
                    elsif how == "horizontal"
         
     | 
| 
       60 
     | 
    
         
            -
                      out = Utils.wrap_df( 
     | 
| 
      
 60 
     | 
    
         
            +
                      out = Utils.wrap_df(_concat_df_horizontal(items))
         
     | 
| 
       61 
61 
     | 
    
         
             
                    else
         
     | 
| 
       62 
62 
     | 
    
         
             
                      raise ArgumentError, "how must be one of {{'vertical', 'diagonal', 'horizontal'}}, got #{how}"
         
     | 
| 
       63 
63 
     | 
    
         
             
                    end
         
     | 
| 
       64 
64 
     | 
    
         
             
                  elsif first.is_a?(LazyFrame)
         
     | 
| 
       65 
65 
     | 
    
         
             
                    if how == "vertical"
         
     | 
| 
       66 
     | 
    
         
            -
                       
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
      
 66 
     | 
    
         
            +
                      return Utils.wrap_ldf(_concat_lf(items, rechunk, parallel, false))
         
     | 
| 
      
 67 
     | 
    
         
            +
                    elsif how == "vertical_relaxed"
         
     | 
| 
      
 68 
     | 
    
         
            +
                      return Utils.wrap_ldf(_concat_lf(items, rechunk, parallel, true))
         
     | 
| 
      
 69 
     | 
    
         
            +
                    elsif how == "diagonal"
         
     | 
| 
      
 70 
     | 
    
         
            +
                      return Utils.wrap_ldf(_concat_lf_diagonal(items, rechunk, parallel, false))
         
     | 
| 
       68 
71 
     | 
    
         
             
                    else
         
     | 
| 
       69 
     | 
    
         
            -
                      raise ArgumentError, "Lazy only allows 'vertical' concat strategy."
         
     | 
| 
      
 72 
     | 
    
         
            +
                      raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', and 'diagonal' concat strategy."
         
     | 
| 
       70 
73 
     | 
    
         
             
                    end
         
     | 
| 
       71 
74 
     | 
    
         
             
                  elsif first.is_a?(Series)
         
     | 
| 
       72 
75 
     | 
    
         
             
                    # TODO
         
     | 
| 
         @@ -89,9 +92,9 @@ module Polars 
     | 
|
| 
       89 
92 
     | 
    
         | 
| 
       90 
93 
     | 
    
         
             
                # Create a range of type `Datetime` (or `Date`).
         
     | 
| 
       91 
94 
     | 
    
         
             
                #
         
     | 
| 
       92 
     | 
    
         
            -
                # @param  
     | 
| 
      
 95 
     | 
    
         
            +
                # @param start [Object]
         
     | 
| 
       93 
96 
     | 
    
         
             
                #   Lower bound of the date range.
         
     | 
| 
       94 
     | 
    
         
            -
                # @param  
     | 
| 
      
 97 
     | 
    
         
            +
                # @param stop [Object]
         
     | 
| 
       95 
98 
     | 
    
         
             
                #   Upper bound of the date range.
         
     | 
| 
       96 
99 
     | 
    
         
             
                # @param interval [Object]
         
     | 
| 
       97 
100 
     | 
    
         
             
                #   Interval periods. It can be a polars duration string, such as `3d12h4m25s`
         
     | 
| 
         @@ -145,8 +148,8 @@ module Polars 
     | 
|
| 
       145 
148 
     | 
    
         
             
                #   #         1985-01-10 00:00:00
         
     | 
| 
       146 
149 
     | 
    
         
             
                #   # ]
         
     | 
| 
       147 
150 
     | 
    
         
             
                def date_range(
         
     | 
| 
       148 
     | 
    
         
            -
                   
     | 
| 
       149 
     | 
    
         
            -
                   
     | 
| 
      
 151 
     | 
    
         
            +
                  start,
         
     | 
| 
      
 152 
     | 
    
         
            +
                  stop,
         
     | 
| 
       150 
153 
     | 
    
         
             
                  interval,
         
     | 
| 
       151 
154 
     | 
    
         
             
                  lazy: false,
         
     | 
| 
       152 
155 
     | 
    
         
             
                  closed: "both",
         
     | 
| 
         @@ -163,39 +166,28 @@ module Polars 
     | 
|
| 
       163 
166 
     | 
    
         
             
                    end
         
     | 
| 
       164 
167 
     | 
    
         
             
                  end
         
     | 
| 
       165 
168 
     | 
    
         | 
| 
       166 
     | 
    
         
            -
                  if  
     | 
| 
       167 
     | 
    
         
            -
                     
     | 
| 
       168 
     | 
    
         
            -
             
     | 
| 
       169 
     | 
    
         
            -
                     
     | 
| 
       170 
     | 
    
         
            -
                       
     | 
| 
       171 
     | 
    
         
            -
                     
     | 
| 
      
 169 
     | 
    
         
            +
                  if time_unit.nil?
         
     | 
| 
      
 170 
     | 
    
         
            +
                    if interval.include?("ns")
         
     | 
| 
      
 171 
     | 
    
         
            +
                      time_unit = "ns"
         
     | 
| 
      
 172 
     | 
    
         
            +
                    else
         
     | 
| 
      
 173 
     | 
    
         
            +
                      time_unit = "us"
         
     | 
| 
      
 174 
     | 
    
         
            +
                    end
         
     | 
| 
       172 
175 
     | 
    
         
             
                  end
         
     | 
| 
       173 
176 
     | 
    
         | 
| 
       174 
     | 
    
         
            -
                   
     | 
| 
       175 
     | 
    
         
            -
                   
     | 
| 
      
 177 
     | 
    
         
            +
                  start_rbexpr = Utils.parse_as_expression(start)
         
     | 
| 
      
 178 
     | 
    
         
            +
                  stop_rbexpr = Utils.parse_as_expression(stop)
         
     | 
| 
       176 
179 
     | 
    
         | 
| 
       177 
     | 
    
         
            -
                   
     | 
| 
       178 
     | 
    
         
            -
                     
     | 
| 
       179 
     | 
    
         
            -
                   
     | 
| 
       180 
     | 
    
         
            -
                    tu = "ns"
         
     | 
| 
       181 
     | 
    
         
            -
                  else
         
     | 
| 
       182 
     | 
    
         
            -
                    tu = "us"
         
     | 
| 
       183 
     | 
    
         
            -
                  end
         
     | 
| 
      
 180 
     | 
    
         
            +
                  result = Utils.wrap_expr(
         
     | 
| 
      
 181 
     | 
    
         
            +
                    _rb_date_range(start_rbexpr, stop_rbexpr, interval, closed, time_unit, time_zone)
         
     | 
| 
      
 182 
     | 
    
         
            +
                  )
         
     | 
| 
       184 
183 
     | 
    
         | 
| 
       185 
     | 
    
         
            -
                   
     | 
| 
       186 
     | 
    
         
            -
                  stop = Utils._datetime_to_pl_timestamp(high, tu)
         
     | 
| 
       187 
     | 
    
         
            -
                  if name.nil?
         
     | 
| 
       188 
     | 
    
         
            -
                    name = ""
         
     | 
| 
       189 
     | 
    
         
            -
                  end
         
     | 
| 
      
 184 
     | 
    
         
            +
                  result = result.alias(name.to_s)
         
     | 
| 
       190 
185 
     | 
    
         | 
| 
       191 
     | 
    
         
            -
                   
     | 
| 
       192 
     | 
    
         
            -
                     
     | 
| 
       193 
     | 
    
         
            -
                  )
         
     | 
| 
       194 
     | 
    
         
            -
                  if low_is_date && high_is_date && !["h", "m", "s"].any? { |v| _interval_granularity(interval).end_with?(v) }
         
     | 
| 
       195 
     | 
    
         
            -
                    dt_range = dt_range.cast(Date)
         
     | 
| 
      
 186 
     | 
    
         
            +
                  if !lazy
         
     | 
| 
      
 187 
     | 
    
         
            +
                    return select(result).to_series
         
     | 
| 
       196 
188 
     | 
    
         
             
                  end
         
     | 
| 
       197 
189 
     | 
    
         | 
| 
       198 
     | 
    
         
            -
                   
     | 
| 
      
 190 
     | 
    
         
            +
                  result
         
     | 
| 
       199 
191 
     | 
    
         
             
                end
         
     | 
| 
       200 
192 
     | 
    
         | 
| 
       201 
193 
     | 
    
         
             
                # Bin values into discrete values.
         
     | 
    
        data/lib/polars/group_by.rb
    CHANGED
    
    | 
         @@ -2,23 +2,19 @@ module Polars 
     | 
|
| 
       2 
2 
     | 
    
         
             
              # Starts a new GroupBy operation.
         
     | 
| 
       3 
3 
     | 
    
         
             
              class GroupBy
         
     | 
| 
       4 
4 
     | 
    
         
             
                # @private
         
     | 
| 
       5 
     | 
    
         
            -
                 
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
                  self._df = df
         
     | 
| 
       10 
     | 
    
         
            -
                  self._dataframe_class = dataframe_class
         
     | 
| 
       11 
     | 
    
         
            -
                  self.by = by
         
     | 
| 
       12 
     | 
    
         
            -
                  self.maintain_order = maintain_order
         
     | 
| 
      
 5 
     | 
    
         
            +
                def initialize(df, by, maintain_order: false)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  @df = df
         
     | 
| 
      
 7 
     | 
    
         
            +
                  @by = by
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @maintain_order = maintain_order
         
     | 
| 
       13 
9 
     | 
    
         
             
                end
         
     | 
| 
       14 
10 
     | 
    
         | 
| 
       15 
     | 
    
         
            -
                # Allows iteration over the groups of the  
     | 
| 
      
 11 
     | 
    
         
            +
                # Allows iteration over the groups of the group by operation.
         
     | 
| 
       16 
12 
     | 
    
         
             
                #
         
     | 
| 
       17 
13 
     | 
    
         
             
                # @return [Object]
         
     | 
| 
       18 
14 
     | 
    
         
             
                #
         
     | 
| 
       19 
15 
     | 
    
         
             
                # @example
         
     | 
| 
       20 
16 
     | 
    
         
             
                #   df = Polars::DataFrame.new({"foo" => ["a", "a", "b"], "bar" => [1, 2, 3]})
         
     | 
| 
       21 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 17 
     | 
    
         
            +
                #   df.group_by("foo", maintain_order: true).each.to_h
         
     | 
| 
       22 
18 
     | 
    
         
             
                #   # =>
         
     | 
| 
       23 
19 
     | 
    
         
             
                #   # {"a"=>shape: (2, 2)
         
     | 
| 
       24 
20 
     | 
    
         
             
                #   # ┌─────┬─────┐
         
     | 
| 
         @@ -41,10 +37,9 @@ module Polars 
     | 
|
| 
       41 
37 
     | 
    
         | 
| 
       42 
38 
     | 
    
         
             
                  temp_col = "__POLARS_GB_GROUP_INDICES"
         
     | 
| 
       43 
39 
     | 
    
         
             
                  groups_df =
         
     | 
| 
       44 
     | 
    
         
            -
                     
     | 
| 
       45 
     | 
    
         
            -
                      .lazy
         
     | 
| 
      
 40 
     | 
    
         
            +
                    @df.lazy
         
     | 
| 
       46 
41 
     | 
    
         
             
                      .with_row_count(name: temp_col)
         
     | 
| 
       47 
     | 
    
         
            -
                      . 
     | 
| 
      
 42 
     | 
    
         
            +
                      .group_by(@by, maintain_order: @maintain_order)
         
     | 
| 
       48 
43 
     | 
    
         
             
                      .agg(Polars.col(temp_col))
         
     | 
| 
       49 
44 
     | 
    
         
             
                      .collect(no_optimization: true)
         
     | 
| 
       50 
45 
     | 
    
         | 
| 
         @@ -52,7 +47,7 @@ module Polars 
     | 
|
| 
       52 
47 
     | 
    
         | 
| 
       53 
48 
     | 
    
         
             
                  # When grouping by a single column, group name is a single value
         
     | 
| 
       54 
49 
     | 
    
         
             
                  # When grouping by multiple columns, group name is a tuple of values
         
     | 
| 
       55 
     | 
    
         
            -
                  if by.is_a?(String) || by.is_a?(Expr)
         
     | 
| 
      
 50 
     | 
    
         
            +
                  if @by.is_a?(String) || @by.is_a?(Expr)
         
     | 
| 
       56 
51 
     | 
    
         
             
                    _group_names = group_names.to_series.each
         
     | 
| 
       57 
52 
     | 
    
         
             
                  else
         
     | 
| 
       58 
53 
     | 
    
         
             
                    _group_names = group_names.iter_rows
         
     | 
| 
         @@ -62,10 +57,8 @@ module Polars 
     | 
|
| 
       62 
57 
     | 
    
         
             
                  _current_index = 0
         
     | 
| 
       63 
58 
     | 
    
         | 
| 
       64 
59 
     | 
    
         
             
                  while _current_index < _group_indices.length
         
     | 
| 
       65 
     | 
    
         
            -
                    df = _dataframe_class._from_rbdf(_df)
         
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
60 
     | 
    
         
             
                    group_name = _group_names.next
         
     | 
| 
       68 
     | 
    
         
            -
                    group_data = df[_group_indices[_current_index]]
         
     | 
| 
      
 61 
     | 
    
         
            +
                    group_data = @df[_group_indices[_current_index]]
         
     | 
| 
       69 
62 
     | 
    
         
             
                    _current_index += 1
         
     | 
| 
       70 
63 
     | 
    
         | 
| 
       71 
64 
     | 
    
         
             
                    yield group_name, group_data
         
     | 
| 
         @@ -96,7 +89,7 @@ module Polars 
     | 
|
| 
       96 
89 
     | 
    
         
             
                #       "shape" => ["square", "triangle", "square", "triangle", "square"]
         
     | 
| 
       97 
90 
     | 
    
         
             
                #     }
         
     | 
| 
       98 
91 
     | 
    
         
             
                #   )
         
     | 
| 
       99 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 92 
     | 
    
         
            +
                #   df.group_by("color").apply { |group_df| group_df.sample(2) }
         
     | 
| 
       100 
93 
     | 
    
         
             
                #   # =>
         
     | 
| 
       101 
94 
     | 
    
         
             
                #   # shape: (4, 3)
         
     | 
| 
       102 
95 
     | 
    
         
             
                #   # ┌─────┬───────┬──────────┐
         
     | 
| 
         @@ -110,7 +103,7 @@ module Polars 
     | 
|
| 
       110 
103 
     | 
    
         
             
                #   # │ 3   ┆ red   ┆ triangle │
         
     | 
| 
       111 
104 
     | 
    
         
             
                #   # └─────┴───────┴──────────┘
         
     | 
| 
       112 
105 
     | 
    
         
             
                # def apply(&f)
         
     | 
| 
       113 
     | 
    
         
            -
                #   _dataframe_class._from_rbdf(_df. 
     | 
| 
      
 106 
     | 
    
         
            +
                #   _dataframe_class._from_rbdf(_df.group_by_apply(by, f))
         
     | 
| 
       114 
107 
     | 
    
         
             
                # end
         
     | 
| 
       115 
108 
     | 
    
         | 
| 
       116 
109 
     | 
    
         
             
                # Use multiple aggregations on columns.
         
     | 
| 
         @@ -126,7 +119,7 @@ module Polars 
     | 
|
| 
       126 
119 
     | 
    
         
             
                #   df = Polars::DataFrame.new(
         
     | 
| 
       127 
120 
     | 
    
         
             
                #     {"foo" => ["one", "two", "two", "one", "two"], "bar" => [5, 3, 2, 4, 1]}
         
     | 
| 
       128 
121 
     | 
    
         
             
                #   )
         
     | 
| 
       129 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 122 
     | 
    
         
            +
                #   df.group_by("foo", maintain_order: true).agg(
         
     | 
| 
       130 
123 
     | 
    
         
             
                #     [
         
     | 
| 
       131 
124 
     | 
    
         
             
                #       Polars.sum("bar").suffix("_sum"),
         
     | 
| 
       132 
125 
     | 
    
         
             
                #       Polars.col("bar").sort.tail(2).sum.suffix("_tail_sum")
         
     | 
| 
         @@ -143,12 +136,10 @@ module Polars 
     | 
|
| 
       143 
136 
     | 
    
         
             
                #   # │ two ┆ 6       ┆ 5            │
         
     | 
| 
       144 
137 
     | 
    
         
             
                #   # └─────┴─────────┴──────────────┘
         
     | 
| 
       145 
138 
     | 
    
         
             
                def agg(aggs)
         
     | 
| 
       146 
     | 
    
         
            -
                  df 
     | 
| 
       147 
     | 
    
         
            -
                    . 
     | 
| 
       148 
     | 
    
         
            -
                    .groupby(by, maintain_order: maintain_order)
         
     | 
| 
      
 139 
     | 
    
         
            +
                  @df.lazy
         
     | 
| 
      
 140 
     | 
    
         
            +
                    .group_by(@by, maintain_order: @maintain_order)
         
     | 
| 
       149 
141 
     | 
    
         
             
                    .agg(aggs)
         
     | 
| 
       150 
     | 
    
         
            -
                    .collect(no_optimization: true 
     | 
| 
       151 
     | 
    
         
            -
                  _dataframe_class._from_rbdf(df._df)
         
     | 
| 
      
 142 
     | 
    
         
            +
                    .collect(no_optimization: true)
         
     | 
| 
       152 
143 
     | 
    
         
             
                end
         
     | 
| 
       153 
144 
     | 
    
         | 
| 
       154 
145 
     | 
    
         
             
                # Get the first `n` rows of each group.
         
     | 
| 
         @@ -181,7 +172,7 @@ module Polars 
     | 
|
| 
       181 
172 
     | 
    
         
             
                #   # └─────────┴─────┘
         
     | 
| 
       182 
173 
     | 
    
         
             
                #
         
     | 
| 
       183 
174 
     | 
    
         
             
                # @example
         
     | 
| 
       184 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 175 
     | 
    
         
            +
                #   df.group_by("letters").head(2).sort("letters")
         
     | 
| 
       185 
176 
     | 
    
         
             
                #   # =>
         
     | 
| 
       186 
177 
     | 
    
         
             
                #   # shape: (5, 2)
         
     | 
| 
       187 
178 
     | 
    
         
             
                #   # ┌─────────┬─────┐
         
     | 
| 
         @@ -196,14 +187,10 @@ module Polars 
     | 
|
| 
       196 
187 
     | 
    
         
             
                #   # │ c       ┆ 2   │
         
     | 
| 
       197 
188 
     | 
    
         
             
                #   # └─────────┴─────┘
         
     | 
| 
       198 
189 
     | 
    
         
             
                def head(n = 5)
         
     | 
| 
       199 
     | 
    
         
            -
                  df 
     | 
| 
       200 
     | 
    
         
            -
                     
     | 
| 
       201 
     | 
    
         
            -
             
     | 
| 
       202 
     | 
    
         
            -
             
     | 
| 
       203 
     | 
    
         
            -
                      .head(n)
         
     | 
| 
       204 
     | 
    
         
            -
                      .collect(no_optimization: true, string_cache: false)
         
     | 
| 
       205 
     | 
    
         
            -
                  )
         
     | 
| 
       206 
     | 
    
         
            -
                  _dataframe_class._from_rbdf(df._df)
         
     | 
| 
      
 190 
     | 
    
         
            +
                  @df.lazy
         
     | 
| 
      
 191 
     | 
    
         
            +
                    .group_by(@by, maintain_order: @maintain_order)
         
     | 
| 
      
 192 
     | 
    
         
            +
                    .head(n)
         
     | 
| 
      
 193 
     | 
    
         
            +
                    .collect(no_optimization: true)
         
     | 
| 
       207 
194 
     | 
    
         
             
                end
         
     | 
| 
       208 
195 
     | 
    
         | 
| 
       209 
196 
     | 
    
         
             
                # Get the last `n` rows of each group.
         
     | 
| 
         @@ -236,7 +223,7 @@ module Polars 
     | 
|
| 
       236 
223 
     | 
    
         
             
                #   # └─────────┴─────┘
         
     | 
| 
       237 
224 
     | 
    
         
             
                #
         
     | 
| 
       238 
225 
     | 
    
         
             
                # @example
         
     | 
| 
       239 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 226 
     | 
    
         
            +
                #   df.group_by("letters").tail(2).sort("letters")
         
     | 
| 
       240 
227 
     | 
    
         
             
                #   # =>
         
     | 
| 
       241 
228 
     | 
    
         
             
                #   # shape: (5, 2)
         
     | 
| 
       242 
229 
     | 
    
         
             
                #   # ┌─────────┬─────┐
         
     | 
| 
         @@ -251,14 +238,10 @@ module Polars 
     | 
|
| 
       251 
238 
     | 
    
         
             
                #   # │ c       ┆ 4   │
         
     | 
| 
       252 
239 
     | 
    
         
             
                #   # └─────────┴─────┘
         
     | 
| 
       253 
240 
     | 
    
         
             
                def tail(n = 5)
         
     | 
| 
       254 
     | 
    
         
            -
                  df 
     | 
| 
       255 
     | 
    
         
            -
                     
     | 
| 
       256 
     | 
    
         
            -
             
     | 
| 
       257 
     | 
    
         
            -
             
     | 
| 
       258 
     | 
    
         
            -
                      .tail(n)
         
     | 
| 
       259 
     | 
    
         
            -
                      .collect(no_optimization: true, string_cache: false)
         
     | 
| 
       260 
     | 
    
         
            -
                  )
         
     | 
| 
       261 
     | 
    
         
            -
                  _dataframe_class._from_rbdf(df._df)
         
     | 
| 
      
 241 
     | 
    
         
            +
                  @df.lazy
         
     | 
| 
      
 242 
     | 
    
         
            +
                    .group_by(@by, maintain_order: @maintain_order)
         
     | 
| 
      
 243 
     | 
    
         
            +
                    .tail(n)
         
     | 
| 
      
 244 
     | 
    
         
            +
                    .collect(no_optimization: true)
         
     | 
| 
       262 
245 
     | 
    
         
             
                end
         
     | 
| 
       263 
246 
     | 
    
         | 
| 
       264 
247 
     | 
    
         
             
                # Aggregate the first values in the group.
         
     | 
| 
         @@ -274,7 +257,7 @@ module Polars 
     | 
|
| 
       274 
257 
     | 
    
         
             
                #       "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
         
     | 
| 
       275 
258 
     | 
    
         
             
                #     }
         
     | 
| 
       276 
259 
     | 
    
         
             
                #   )
         
     | 
| 
       277 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 260 
     | 
    
         
            +
                #   df.group_by("d", maintain_order: true).first
         
     | 
| 
       278 
261 
     | 
    
         
             
                #   # =>
         
     | 
| 
       279 
262 
     | 
    
         
             
                #   # shape: (3, 4)
         
     | 
| 
       280 
263 
     | 
    
         
             
                #   # ┌────────┬─────┬──────┬───────┐
         
     | 
| 
         @@ -303,7 +286,7 @@ module Polars 
     | 
|
| 
       303 
286 
     | 
    
         
             
                #       "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
         
     | 
| 
       304 
287 
     | 
    
         
             
                #     }
         
     | 
| 
       305 
288 
     | 
    
         
             
                #   )
         
     | 
| 
       306 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 289 
     | 
    
         
            +
                #   df.group_by("d", maintain_order: true).last
         
     | 
| 
       307 
290 
     | 
    
         
             
                #   # =>
         
     | 
| 
       308 
291 
     | 
    
         
             
                #   # shape: (3, 4)
         
     | 
| 
       309 
292 
     | 
    
         
             
                #   # ┌────────┬─────┬──────┬───────┐
         
     | 
| 
         @@ -332,7 +315,7 @@ module Polars 
     | 
|
| 
       332 
315 
     | 
    
         
             
                #       "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
         
     | 
| 
       333 
316 
     | 
    
         
             
                #     }
         
     | 
| 
       334 
317 
     | 
    
         
             
                #   )
         
     | 
| 
       335 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 318 
     | 
    
         
            +
                #   df.group_by("d", maintain_order: true).sum
         
     | 
| 
       336 
319 
     | 
    
         
             
                #   # =>
         
     | 
| 
       337 
320 
     | 
    
         
             
                #   # shape: (3, 4)
         
     | 
| 
       338 
321 
     | 
    
         
             
                #   # ┌────────┬─────┬──────┬─────┐
         
     | 
| 
         @@ -361,7 +344,7 @@ module Polars 
     | 
|
| 
       361 
344 
     | 
    
         
             
                #       "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
         
     | 
| 
       362 
345 
     | 
    
         
             
                #     }
         
     | 
| 
       363 
346 
     | 
    
         
             
                #   )
         
     | 
| 
       364 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 347 
     | 
    
         
            +
                #   df.group_by("d", maintain_order: true).min
         
     | 
| 
       365 
348 
     | 
    
         
             
                #   # =>
         
     | 
| 
       366 
349 
     | 
    
         
             
                #   # shape: (3, 4)
         
     | 
| 
       367 
350 
     | 
    
         
             
                #   # ┌────────┬─────┬──────┬───────┐
         
     | 
| 
         @@ -390,7 +373,7 @@ module Polars 
     | 
|
| 
       390 
373 
     | 
    
         
             
                #       "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
         
     | 
| 
       391 
374 
     | 
    
         
             
                #     }
         
     | 
| 
       392 
375 
     | 
    
         
             
                #   )
         
     | 
| 
       393 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 376 
     | 
    
         
            +
                #   df.group_by("d", maintain_order: true).max
         
     | 
| 
       394 
377 
     | 
    
         
             
                #   # =>
         
     | 
| 
       395 
378 
     | 
    
         
             
                #   # shape: (3, 4)
         
     | 
| 
       396 
379 
     | 
    
         
             
                #   # ┌────────┬─────┬──────┬──────┐
         
     | 
| 
         @@ -419,7 +402,7 @@ module Polars 
     | 
|
| 
       419 
402 
     | 
    
         
             
                #       "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
         
     | 
| 
       420 
403 
     | 
    
         
             
                #     }
         
     | 
| 
       421 
404 
     | 
    
         
             
                #   )
         
     | 
| 
       422 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 405 
     | 
    
         
            +
                #   df.group_by("d", maintain_order: true).count
         
     | 
| 
       423 
406 
     | 
    
         
             
                #   # =>
         
     | 
| 
       424 
407 
     | 
    
         
             
                #   # shape: (3, 2)
         
     | 
| 
       425 
408 
     | 
    
         
             
                #   # ┌────────┬───────┐
         
     | 
| 
         @@ -448,7 +431,7 @@ module Polars 
     | 
|
| 
       448 
431 
     | 
    
         
             
                #       "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
         
     | 
| 
       449 
432 
     | 
    
         
             
                #     }
         
     | 
| 
       450 
433 
     | 
    
         
             
                #   )
         
     | 
| 
       451 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 434 
     | 
    
         
            +
                #   df.group_by("d", maintain_order: true).mean
         
     | 
| 
       452 
435 
     | 
    
         
             
                #   # =>
         
     | 
| 
       453 
436 
     | 
    
         
             
                #   # shape: (3, 4)
         
     | 
| 
       454 
437 
     | 
    
         
             
                #   # ┌────────┬─────┬──────────┬──────────┐
         
     | 
| 
         @@ -476,7 +459,7 @@ module Polars 
     | 
|
| 
       476 
459 
     | 
    
         
             
                #       "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
         
     | 
| 
       477 
460 
     | 
    
         
             
                #     }
         
     | 
| 
       478 
461 
     | 
    
         
             
                #   )
         
     | 
| 
       479 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 462 
     | 
    
         
            +
                #   df.group_by("d", maintain_order: true).n_unique
         
     | 
| 
       480 
463 
     | 
    
         
             
                #   # =>
         
     | 
| 
       481 
464 
     | 
    
         
             
                #   # shape: (2, 3)
         
     | 
| 
       482 
465 
     | 
    
         
             
                #   # ┌────────┬─────┬─────┐
         
     | 
| 
         @@ -508,7 +491,7 @@ module Polars 
     | 
|
| 
       508 
491 
     | 
    
         
             
                #       "d" => ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"]
         
     | 
| 
       509 
492 
     | 
    
         
             
                #     }
         
     | 
| 
       510 
493 
     | 
    
         
             
                #   )
         
     | 
| 
       511 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 494 
     | 
    
         
            +
                #   df.group_by("d", maintain_order: true).quantile(1)
         
     | 
| 
       512 
495 
     | 
    
         
             
                #   # =>
         
     | 
| 
       513 
496 
     | 
    
         
             
                #   # shape: (3, 3)
         
     | 
| 
       514 
497 
     | 
    
         
             
                #   # ┌────────┬─────┬──────┐
         
     | 
| 
         @@ -536,7 +519,7 @@ module Polars 
     | 
|
| 
       536 
519 
     | 
    
         
             
                #       "d" => ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"]
         
     | 
| 
       537 
520 
     | 
    
         
             
                #     }
         
     | 
| 
       538 
521 
     | 
    
         
             
                #   )
         
     | 
| 
       539 
     | 
    
         
            -
                #   df. 
     | 
| 
      
 522 
     | 
    
         
            +
                #   df.group_by("d", maintain_order: true).median
         
     | 
| 
       540 
523 
     | 
    
         
             
                #   # =>
         
     | 
| 
       541 
524 
     | 
    
         
             
                #   # shape: (2, 3)
         
     | 
| 
       542 
525 
     | 
    
         
             
                #   # ┌────────┬─────┬──────┐
         
     | 
| 
         @@ -555,11 +538,11 @@ module Polars 
     | 
|
| 
       555 
538 
     | 
    
         
             
                #
         
     | 
| 
       556 
539 
     | 
    
         
             
                # @return [Vega::LiteChart]
         
     | 
| 
       557 
540 
     | 
    
         
             
                def plot(*args, **options)
         
     | 
| 
       558 
     | 
    
         
            -
                  raise ArgumentError, "Multiple groups not supported" if by.is_a?(::Array) && by.size > 1
         
     | 
| 
      
 541 
     | 
    
         
            +
                  raise ArgumentError, "Multiple groups not supported" if @by.is_a?(::Array) && @by.size > 1
         
     | 
| 
       559 
542 
     | 
    
         
             
                  # same message as Ruby
         
     | 
| 
       560 
543 
     | 
    
         
             
                  raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
         
     | 
| 
       561 
544 
     | 
    
         | 
| 
       562 
     | 
    
         
            -
                   
     | 
| 
      
 545 
     | 
    
         
            +
                  @df.plot(*args, **options, group: @by)
         
     | 
| 
       563 
546 
     | 
    
         
             
                end
         
     | 
| 
       564 
547 
     | 
    
         
             
              end
         
     | 
| 
       565 
548 
     | 
    
         
             
            end
         
     | 
    
        data/lib/polars/io.rb
    CHANGED
    
    | 
         @@ -621,11 +621,46 @@ module Polars 
     | 
|
| 
       621 
621 
     | 
    
         
             
                    else
         
     | 
| 
       622 
622 
     | 
    
         
             
                      raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
         
     | 
| 
       623 
623 
     | 
    
         
             
                    end
         
     | 
| 
      
 624 
     | 
    
         
            +
             
     | 
| 
       624 
625 
     | 
    
         
             
                  data = {}
         
     | 
| 
      
 626 
     | 
    
         
            +
                  schema_overrides = {}
         
     | 
| 
      
 627 
     | 
    
         
            +
             
     | 
| 
       625 
628 
     | 
    
         
             
                  result.columns.each_with_index do |k, i|
         
     | 
| 
       626 
     | 
    
         
            -
                     
     | 
| 
      
 629 
     | 
    
         
            +
                    column_type = result.column_types[i]
         
     | 
| 
      
 630 
     | 
    
         
            +
             
     | 
| 
      
 631 
     | 
    
         
            +
                    data[k] =
         
     | 
| 
      
 632 
     | 
    
         
            +
                      if column_type
         
     | 
| 
      
 633 
     | 
    
         
            +
                        result.rows.map { |r| column_type.deserialize(r[i]) }
         
     | 
| 
      
 634 
     | 
    
         
            +
                      else
         
     | 
| 
      
 635 
     | 
    
         
            +
                        result.rows.map { |r| r[i] }
         
     | 
| 
      
 636 
     | 
    
         
            +
                      end
         
     | 
| 
      
 637 
     | 
    
         
            +
             
     | 
| 
      
 638 
     | 
    
         
            +
                    polars_type =
         
     | 
| 
      
 639 
     | 
    
         
            +
                      case column_type&.type
         
     | 
| 
      
 640 
     | 
    
         
            +
                      when :binary
         
     | 
| 
      
 641 
     | 
    
         
            +
                        Binary
         
     | 
| 
      
 642 
     | 
    
         
            +
                      when :boolean
         
     | 
| 
      
 643 
     | 
    
         
            +
                        Boolean
         
     | 
| 
      
 644 
     | 
    
         
            +
                      when :date
         
     | 
| 
      
 645 
     | 
    
         
            +
                        Date
         
     | 
| 
      
 646 
     | 
    
         
            +
                      when :datetime, :timestamp
         
     | 
| 
      
 647 
     | 
    
         
            +
                        Datetime
         
     | 
| 
      
 648 
     | 
    
         
            +
                      when :decimal
         
     | 
| 
      
 649 
     | 
    
         
            +
                        Decimal
         
     | 
| 
      
 650 
     | 
    
         
            +
                      when :float
         
     | 
| 
      
 651 
     | 
    
         
            +
                        Float64
         
     | 
| 
      
 652 
     | 
    
         
            +
                      when :integer
         
     | 
| 
      
 653 
     | 
    
         
            +
                        Int64
         
     | 
| 
      
 654 
     | 
    
         
            +
                      when :string, :text
         
     | 
| 
      
 655 
     | 
    
         
            +
                        Utf8
         
     | 
| 
      
 656 
     | 
    
         
            +
                      when :time
         
     | 
| 
      
 657 
     | 
    
         
            +
                        Time
         
     | 
| 
      
 658 
     | 
    
         
            +
                      end
         
     | 
| 
      
 659 
     | 
    
         
            +
             
     | 
| 
      
 660 
     | 
    
         
            +
                    schema_overrides[k] = polars_type if polars_type
         
     | 
| 
       627 
661 
     | 
    
         
             
                  end
         
     | 
| 
       628 
     | 
    
         
            -
             
     | 
| 
      
 662 
     | 
    
         
            +
             
     | 
| 
      
 663 
     | 
    
         
            +
                  DataFrame.new(data, schema_overrides: schema_overrides)
         
     | 
| 
       629 
664 
     | 
    
         
             
                end
         
     | 
| 
       630 
665 
     | 
    
         
             
                alias_method :read_sql, :read_database
         
     | 
| 
       631 
666 
     | 
    
         |