polars-df 0.11.0-arm64-darwin → 0.13.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/Cargo.lock +428 -450
- data/LICENSE-THIRD-PARTY.txt +2212 -1952
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +35 -7
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +15 -8
- data/lib/polars/lazy_frame.rb +123 -105
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +108 -191
- data/lib/polars/string_expr.rb +51 -76
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +7 -2
    
        data/lib/polars/lazy_frame.rb
    CHANGED
    
    | @@ -63,7 +63,7 @@ module Polars | |
| 63 63 | 
             
                #   df.columns
         | 
| 64 64 | 
             
                #   # => ["foo", "bar"]
         | 
| 65 65 | 
             
                def columns
         | 
| 66 | 
            -
                  _ldf. | 
| 66 | 
            +
                  _ldf.collect_schema.keys
         | 
| 67 67 | 
             
                end
         | 
| 68 68 |  | 
| 69 69 | 
             
                # Get dtypes of columns in LazyFrame.
         | 
| @@ -81,7 +81,7 @@ module Polars | |
| 81 81 | 
             
                #   lf.dtypes
         | 
| 82 82 | 
             
                #   # => [Polars::Int64, Polars::Float64, Polars::String]
         | 
| 83 83 | 
             
                def dtypes
         | 
| 84 | 
            -
                  _ldf. | 
| 84 | 
            +
                  _ldf.collect_schema.values
         | 
| 85 85 | 
             
                end
         | 
| 86 86 |  | 
| 87 87 | 
             
                # Get the schema.
         | 
| @@ -99,7 +99,7 @@ module Polars | |
| 99 99 | 
             
                #   lf.schema
         | 
| 100 100 | 
             
                #   # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
         | 
| 101 101 | 
             
                def schema
         | 
| 102 | 
            -
                  _ldf. | 
| 102 | 
            +
                  _ldf.collect_schema
         | 
| 103 103 | 
             
                end
         | 
| 104 104 |  | 
| 105 105 | 
             
                # Get the width of the LazyFrame.
         | 
| @@ -111,7 +111,7 @@ module Polars | |
| 111 111 | 
             
                #   lf.width
         | 
| 112 112 | 
             
                #   # => 2
         | 
| 113 113 | 
             
                def width
         | 
| 114 | 
            -
                  _ldf. | 
| 114 | 
            +
                  _ldf.collect_schema.length
         | 
| 115 115 | 
             
                end
         | 
| 116 116 |  | 
| 117 117 | 
             
                # Check if LazyFrame includes key.
         | 
| @@ -261,16 +261,23 @@ module Polars | |
| 261 261 | 
             
                #   # │ 2   ┆ 7.0 ┆ b   │
         | 
| 262 262 | 
             
                #   # │ 1   ┆ 6.0 ┆ a   │
         | 
| 263 263 | 
             
                #   # └─────┴─────┴─────┘
         | 
| 264 | 
            -
                def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
         | 
| 265 | 
            -
                  if by.is_a?(::String)
         | 
| 266 | 
            -
                    return _from_rbldf( | 
| 267 | 
            -
             | 
| 268 | 
            -
             | 
| 269 | 
            -
             | 
| 264 | 
            +
                def sort(by, *more_by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true)
         | 
| 265 | 
            +
                  if by.is_a?(::String) && more_by.empty?
         | 
| 266 | 
            +
                    return _from_rbldf(
         | 
| 267 | 
            +
                      _ldf.sort(
         | 
| 268 | 
            +
                        by, reverse, nulls_last, maintain_order, multithreaded
         | 
| 269 | 
            +
                      )
         | 
| 270 | 
            +
                    )
         | 
| 270 271 | 
             
                  end
         | 
| 271 272 |  | 
| 272 | 
            -
                  by = Utils. | 
| 273 | 
            -
                   | 
| 273 | 
            +
                  by = Utils.parse_into_list_of_expressions(by, *more_by)
         | 
| 274 | 
            +
                  reverse = Utils.extend_bool(reverse, by.length, "reverse", "by")
         | 
| 275 | 
            +
                  nulls_last = Utils.extend_bool(nulls_last, by.length, "nulls_last", "by")
         | 
| 276 | 
            +
                  _from_rbldf(
         | 
| 277 | 
            +
                    _ldf.sort_by_exprs(
         | 
| 278 | 
            +
                      by, reverse, nulls_last, maintain_order, multithreaded
         | 
| 279 | 
            +
                    )
         | 
| 280 | 
            +
                  )
         | 
| 274 281 | 
             
                end
         | 
| 275 282 |  | 
| 276 283 | 
             
                # def profile
         | 
| @@ -415,7 +422,7 @@ module Polars | |
| 415 422 | 
             
                  path,
         | 
| 416 423 | 
             
                  compression: "zstd",
         | 
| 417 424 | 
             
                  compression_level: nil,
         | 
| 418 | 
            -
                  statistics:  | 
| 425 | 
            +
                  statistics: true,
         | 
| 419 426 | 
             
                  row_group_size: nil,
         | 
| 420 427 | 
             
                  data_pagesize_limit: nil,
         | 
| 421 428 | 
             
                  maintain_order: true,
         | 
| @@ -435,6 +442,24 @@ module Polars | |
| 435 442 | 
             
                    no_optimization: no_optimization
         | 
| 436 443 | 
             
                  )
         | 
| 437 444 |  | 
| 445 | 
            +
                  if statistics == true
         | 
| 446 | 
            +
                    statistics = {
         | 
| 447 | 
            +
                      min: true,
         | 
| 448 | 
            +
                      max: true,
         | 
| 449 | 
            +
                      distinct_count: false,
         | 
| 450 | 
            +
                      null_count: true
         | 
| 451 | 
            +
                    }
         | 
| 452 | 
            +
                  elsif statistics == false
         | 
| 453 | 
            +
                    statistics = {}
         | 
| 454 | 
            +
                  elsif statistics == "full"
         | 
| 455 | 
            +
                    statistics = {
         | 
| 456 | 
            +
                      min: true,
         | 
| 457 | 
            +
                      max: true,
         | 
| 458 | 
            +
                      distinct_count: true,
         | 
| 459 | 
            +
                      null_count: true
         | 
| 460 | 
            +
                    }
         | 
| 461 | 
            +
                  end
         | 
| 462 | 
            +
             | 
| 438 463 | 
             
                  lf.sink_parquet(
         | 
| 439 464 | 
             
                    path,
         | 
| 440 465 | 
             
                    compression,
         | 
| @@ -589,6 +614,7 @@ module Polars | |
| 589 614 | 
             
                  datetime_format: nil,
         | 
| 590 615 | 
             
                  date_format: nil,
         | 
| 591 616 | 
             
                  time_format: nil,
         | 
| 617 | 
            +
                  float_scientific: nil,
         | 
| 592 618 | 
             
                  float_precision: nil,
         | 
| 593 619 | 
             
                  null_value: nil,
         | 
| 594 620 | 
             
                  quote_style: nil,
         | 
| @@ -623,6 +649,7 @@ module Polars | |
| 623 649 | 
             
                    datetime_format,
         | 
| 624 650 | 
             
                    date_format,
         | 
| 625 651 | 
             
                    time_format,
         | 
| 652 | 
            +
                    float_scientific,
         | 
| 626 653 | 
             
                    float_precision,
         | 
| 627 654 | 
             
                    null_value,
         | 
| 628 655 | 
             
                    quote_style,
         | 
| @@ -907,7 +934,7 @@ module Polars | |
| 907 934 | 
             
                def filter(predicate)
         | 
| 908 935 | 
             
                  _from_rbldf(
         | 
| 909 936 | 
             
                    _ldf.filter(
         | 
| 910 | 
            -
                      Utils. | 
| 937 | 
            +
                      Utils.parse_into_expression(predicate, str_as_lit: false)
         | 
| 911 938 | 
             
                    )
         | 
| 912 939 | 
             
                  )
         | 
| 913 940 | 
             
                end
         | 
| @@ -1003,7 +1030,7 @@ module Polars | |
| 1003 1030 | 
             
                def select(*exprs, **named_exprs)
         | 
| 1004 1031 | 
             
                  structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
         | 
| 1005 1032 |  | 
| 1006 | 
            -
                  rbexprs = Utils. | 
| 1033 | 
            +
                  rbexprs = Utils.parse_into_list_of_expressions(
         | 
| 1007 1034 | 
             
                    *exprs, **named_exprs, __structify: structify
         | 
| 1008 1035 | 
             
                  )
         | 
| 1009 1036 | 
             
                  _from_rbldf(_ldf.select(rbexprs))
         | 
| @@ -1011,12 +1038,14 @@ module Polars | |
| 1011 1038 |  | 
| 1012 1039 | 
             
                # Start a group by operation.
         | 
| 1013 1040 | 
             
                #
         | 
| 1014 | 
            -
                # @param by [ | 
| 1041 | 
            +
                # @param by [Array]
         | 
| 1015 1042 | 
             
                #   Column(s) to group by.
         | 
| 1016 1043 | 
             
                # @param maintain_order [Boolean]
         | 
| 1017 1044 | 
             
                #   Make sure that the order of the groups remain consistent. This is more
         | 
| 1018 1045 | 
             
                #   expensive than a default group by.
         | 
| 1019 | 
            -
                #
         | 
| 1046 | 
            +
                # @param named_by [Hash]
         | 
| 1047 | 
            +
                #   Additional columns to group by, specified as keyword arguments.
         | 
| 1048 | 
            +
                #   The columns will be renamed to the keyword used.
         | 
| 1020 1049 | 
             
                # @return [LazyGroupBy]
         | 
| 1021 1050 | 
             
                #
         | 
| 1022 1051 | 
             
                # @example
         | 
| @@ -1039,9 +1068,9 @@ module Polars | |
| 1039 1068 | 
             
                #   # │ b   ┆ 11  │
         | 
| 1040 1069 | 
             
                #   # │ c   ┆ 6   │
         | 
| 1041 1070 | 
             
                #   # └─────┴─────┘
         | 
| 1042 | 
            -
                def group_by(by, maintain_order: false)
         | 
| 1043 | 
            -
                   | 
| 1044 | 
            -
                  lgb = _ldf.group_by( | 
| 1071 | 
            +
                def group_by(*by, maintain_order: false, **named_by)
         | 
| 1072 | 
            +
                  exprs = Utils.parse_into_list_of_expressions(*by, **named_by)
         | 
| 1073 | 
            +
                  lgb = _ldf.group_by(exprs, maintain_order)
         | 
| 1045 1074 | 
             
                  LazyGroupBy.new(lgb)
         | 
| 1046 1075 | 
             
                end
         | 
| 1047 1076 | 
             
                alias_method :groupby, :group_by
         | 
| @@ -1095,12 +1124,6 @@ module Polars | |
| 1095 1124 | 
             
                #   Define whether the temporal window interval is closed or not.
         | 
| 1096 1125 | 
             
                # @param by [Object]
         | 
| 1097 1126 | 
             
                #   Also group by this column/these columns.
         | 
| 1098 | 
            -
                # @param check_sorted [Boolean]
         | 
| 1099 | 
            -
                #   When the `by` argument is given, polars can not check sortedness
         | 
| 1100 | 
            -
                #   by the metadata and has to do a full scan on the index column to
         | 
| 1101 | 
            -
                #   verify data is sorted. This is expensive. If you are sure the
         | 
| 1102 | 
            -
                #   data within the by groups is sorted, you can set this to `false`.
         | 
| 1103 | 
            -
                #   Doing so incorrectly will lead to incorrect output
         | 
| 1104 1127 | 
             
                #
         | 
| 1105 1128 | 
             
                # @return [LazyFrame]
         | 
| 1106 1129 | 
             
                #
         | 
| @@ -1142,21 +1165,20 @@ module Polars | |
| 1142 1165 | 
             
                  period:,
         | 
| 1143 1166 | 
             
                  offset: nil,
         | 
| 1144 1167 | 
             
                  closed: "right",
         | 
| 1145 | 
            -
                  by: nil | 
| 1146 | 
            -
                  check_sorted: true
         | 
| 1168 | 
            +
                  by: nil
         | 
| 1147 1169 | 
             
                )
         | 
| 1148 | 
            -
                  index_column = Utils. | 
| 1170 | 
            +
                  index_column = Utils.parse_into_expression(index_column)
         | 
| 1149 1171 | 
             
                  if offset.nil?
         | 
| 1150 | 
            -
                    offset =  | 
| 1172 | 
            +
                    offset = Utils.negate_duration_string(Utils.parse_as_duration_string(period))
         | 
| 1151 1173 | 
             
                  end
         | 
| 1152 1174 |  | 
| 1153 | 
            -
                  rbexprs_by =  | 
| 1154 | 
            -
             | 
| 1155 | 
            -
                  offset = Utils._timedelta_to_pl_duration(offset)
         | 
| 1156 | 
            -
             | 
| 1157 | 
            -
                  lgb = _ldf.rolling(
         | 
| 1158 | 
            -
                    index_column, period, offset, closed, rbexprs_by, check_sorted
         | 
| 1175 | 
            +
                  rbexprs_by = (
         | 
| 1176 | 
            +
                    !by.nil? ? Utils.parse_into_list_of_expressions(by) : []
         | 
| 1159 1177 | 
             
                  )
         | 
| 1178 | 
            +
                  period = Utils.parse_as_duration_string(period)
         | 
| 1179 | 
            +
                  offset = Utils.parse_as_duration_string(offset)
         | 
| 1180 | 
            +
             | 
| 1181 | 
            +
                  lgb = _ldf.rolling(index_column, period, offset, closed, rbexprs_by)
         | 
| 1160 1182 | 
             
                  LazyGroupBy.new(lgb)
         | 
| 1161 1183 | 
             
                end
         | 
| 1162 1184 | 
             
                alias_method :group_by_rolling, :rolling
         | 
| @@ -1224,22 +1246,18 @@ module Polars | |
| 1224 1246 | 
             
                #   Define whether the temporal window interval is closed or not.
         | 
| 1225 1247 | 
             
                # @param by [Object]
         | 
| 1226 1248 | 
             
                #   Also group by this column/these columns
         | 
| 1227 | 
            -
                # @param check_sorted [Boolean]
         | 
| 1228 | 
            -
                #   When the `by` argument is given, polars can not check sortedness
         | 
| 1229 | 
            -
                #   by the metadata and has to do a full scan on the index column to
         | 
| 1230 | 
            -
                #   verify data is sorted. This is expensive. If you are sure the
         | 
| 1231 | 
            -
                #   data within the by groups is sorted, you can set this to `false`.
         | 
| 1232 | 
            -
                #   Doing so incorrectly will lead to incorrect output.
         | 
| 1233 1249 | 
             
                #
         | 
| 1234 1250 | 
             
                # @return [DataFrame]
         | 
| 1235 1251 | 
             
                #
         | 
| 1236 1252 | 
             
                # @example
         | 
| 1237 1253 | 
             
                #   df = Polars::DataFrame.new(
         | 
| 1238 1254 | 
             
                #     {
         | 
| 1239 | 
            -
                #       "time" => Polars. | 
| 1255 | 
            +
                #       "time" => Polars.datetime_range(
         | 
| 1240 1256 | 
             
                #         DateTime.new(2021, 12, 16),
         | 
| 1241 1257 | 
             
                #         DateTime.new(2021, 12, 16, 3),
         | 
| 1242 | 
            -
                #         "30m"
         | 
| 1258 | 
            +
                #         "30m",
         | 
| 1259 | 
            +
                #         time_unit: "us",
         | 
| 1260 | 
            +
                #         eager: true
         | 
| 1243 1261 | 
             
                #       ),
         | 
| 1244 1262 | 
             
                #       "n" => 0..6
         | 
| 1245 1263 | 
             
                #     }
         | 
| @@ -1338,10 +1356,12 @@ module Polars | |
| 1338 1356 | 
             
                # @example Dynamic group bys can also be combined with grouping on normal keys.
         | 
| 1339 1357 | 
             
                #   df = Polars::DataFrame.new(
         | 
| 1340 1358 | 
             
                #     {
         | 
| 1341 | 
            -
                #       "time" => Polars. | 
| 1359 | 
            +
                #       "time" => Polars.datetime_range(
         | 
| 1342 1360 | 
             
                #         DateTime.new(2021, 12, 16),
         | 
| 1343 1361 | 
             
                #         DateTime.new(2021, 12, 16, 3),
         | 
| 1344 | 
            -
                #         "30m"
         | 
| 1362 | 
            +
                #         "30m",
         | 
| 1363 | 
            +
                #         time_unit: "us",
         | 
| 1364 | 
            +
                #         eager: true
         | 
| 1345 1365 | 
             
                #       ),
         | 
| 1346 1366 | 
             
                #       "groups" => ["a", "a", "a", "b", "b", "a", "a"]
         | 
| 1347 1367 | 
             
                #     }
         | 
| @@ -1405,14 +1425,13 @@ module Polars | |
| 1405 1425 | 
             
                  closed: "left",
         | 
| 1406 1426 | 
             
                  label: "left",
         | 
| 1407 1427 | 
             
                  by: nil,
         | 
| 1408 | 
            -
                  start_by: "window" | 
| 1409 | 
            -
                  check_sorted: true
         | 
| 1428 | 
            +
                  start_by: "window"
         | 
| 1410 1429 | 
             
                )
         | 
| 1411 1430 | 
             
                  if !truncate.nil?
         | 
| 1412 1431 | 
             
                    label = truncate ? "left" : "datapoint"
         | 
| 1413 1432 | 
             
                  end
         | 
| 1414 1433 |  | 
| 1415 | 
            -
                  index_column = Utils. | 
| 1434 | 
            +
                  index_column = Utils.parse_into_expression(index_column, str_as_lit: false)
         | 
| 1416 1435 | 
             
                  if offset.nil?
         | 
| 1417 1436 | 
             
                    offset = period.nil? ? "-#{every}" : "0ns"
         | 
| 1418 1437 | 
             
                  end
         | 
| @@ -1421,13 +1440,13 @@ module Polars | |
| 1421 1440 | 
             
                    period = every
         | 
| 1422 1441 | 
             
                  end
         | 
| 1423 1442 |  | 
| 1424 | 
            -
                  period = Utils. | 
| 1425 | 
            -
                  offset = Utils. | 
| 1426 | 
            -
                  every = Utils. | 
| 1443 | 
            +
                  period = Utils.parse_as_duration_string(period)
         | 
| 1444 | 
            +
                  offset = Utils.parse_as_duration_string(offset)
         | 
| 1445 | 
            +
                  every = Utils.parse_as_duration_string(every)
         | 
| 1427 1446 |  | 
| 1428 | 
            -
                  rbexprs_by = by.nil? ? [] : Utils. | 
| 1447 | 
            +
                  rbexprs_by = by.nil? ? [] : Utils.parse_into_list_of_expressions(by)
         | 
| 1429 1448 | 
             
                  lgb = _ldf.group_by_dynamic(
         | 
| 1430 | 
            -
                    index_column | 
| 1449 | 
            +
                    index_column,
         | 
| 1431 1450 | 
             
                    every,
         | 
| 1432 1451 | 
             
                    period,
         | 
| 1433 1452 | 
             
                    offset,
         | 
| @@ -1435,8 +1454,7 @@ module Polars | |
| 1435 1454 | 
             
                    include_boundaries,
         | 
| 1436 1455 | 
             
                    closed,
         | 
| 1437 1456 | 
             
                    rbexprs_by,
         | 
| 1438 | 
            -
                    start_by | 
| 1439 | 
            -
                    check_sorted
         | 
| 1457 | 
            +
                    start_by
         | 
| 1440 1458 | 
             
                  )
         | 
| 1441 1459 | 
             
                  LazyGroupBy.new(lgb)
         | 
| 1442 1460 | 
             
                end
         | 
| @@ -1587,7 +1605,7 @@ module Polars | |
| 1587 1605 | 
             
                # @param on Object
         | 
| 1588 1606 | 
             
                #   Join column of both DataFrames. If set, `left_on` and `right_on` should be
         | 
| 1589 1607 | 
             
                #   None.
         | 
| 1590 | 
            -
                # @param how ["inner", "left", " | 
| 1608 | 
            +
                # @param how ["inner", "left", "full", "semi", "anti", "cross"]
         | 
| 1591 1609 | 
             
                #   Join strategy.
         | 
| 1592 1610 | 
             
                # @param suffix [String]
         | 
| 1593 1611 | 
             
                #   Suffix to append to columns with a duplicate name.
         | 
| @@ -1629,7 +1647,7 @@ module Polars | |
| 1629 1647 | 
             
                #   # └─────┴─────┴─────┴───────┘
         | 
| 1630 1648 | 
             
                #
         | 
| 1631 1649 | 
             
                # @example
         | 
| 1632 | 
            -
                #   df.join(other_df, on: "ham", how: " | 
| 1650 | 
            +
                #   df.join(other_df, on: "ham", how: "full").collect
         | 
| 1633 1651 | 
             
                #   # =>
         | 
| 1634 1652 | 
             
                #   # shape: (4, 5)
         | 
| 1635 1653 | 
             
                #   # ┌──────┬──────┬──────┬───────┬───────────┐
         | 
| @@ -1696,7 +1714,9 @@ module Polars | |
| 1696 1714 | 
             
                    raise ArgumentError, "Expected a `LazyFrame` as join table, got #{other.class.name}"
         | 
| 1697 1715 | 
             
                  end
         | 
| 1698 1716 |  | 
| 1699 | 
            -
                  if how == " | 
| 1717 | 
            +
                  if how == "outer"
         | 
| 1718 | 
            +
                    how = "full"
         | 
| 1719 | 
            +
                  elsif how == "cross"
         | 
| 1700 1720 | 
             
                    return _from_rbldf(
         | 
| 1701 1721 | 
             
                      _ldf.join(
         | 
| 1702 1722 | 
             
                        other._ldf, [], [], allow_parallel, join_nulls, force_parallel, how, suffix
         | 
| @@ -1705,12 +1725,12 @@ module Polars | |
| 1705 1725 | 
             
                  end
         | 
| 1706 1726 |  | 
| 1707 1727 | 
             
                  if !on.nil?
         | 
| 1708 | 
            -
                    rbexprs = Utils. | 
| 1728 | 
            +
                    rbexprs = Utils.parse_into_list_of_expressions(on)
         | 
| 1709 1729 | 
             
                    rbexprs_left = rbexprs
         | 
| 1710 1730 | 
             
                    rbexprs_right = rbexprs
         | 
| 1711 1731 | 
             
                  elsif !left_on.nil? && !right_on.nil?
         | 
| 1712 | 
            -
                    rbexprs_left = Utils. | 
| 1713 | 
            -
                    rbexprs_right = Utils. | 
| 1732 | 
            +
                    rbexprs_left = Utils.parse_into_list_of_expressions(left_on)
         | 
| 1733 | 
            +
                    rbexprs_right = Utils.parse_into_list_of_expressions(right_on)
         | 
| 1714 1734 | 
             
                  else
         | 
| 1715 1735 | 
             
                    raise ArgumentError, "must specify `on` OR `left_on` and `right_on`"
         | 
| 1716 1736 | 
             
                  end
         | 
| @@ -1765,7 +1785,8 @@ module Polars | |
| 1765 1785 | 
             
                #   # └─────┴──────┴───────┴─────┴──────┴───────┘
         | 
| 1766 1786 | 
             
                def with_columns(*exprs, **named_exprs)
         | 
| 1767 1787 | 
             
                  structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0"
         | 
| 1768 | 
            -
             | 
| 1788 | 
            +
             | 
| 1789 | 
            +
                  rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs, __structify: structify)
         | 
| 1769 1790 |  | 
| 1770 1791 | 
             
                  _from_rbldf(_ldf.with_columns(rbexprs))
         | 
| 1771 1792 | 
             
                end
         | 
| @@ -1926,9 +1947,9 @@ module Polars | |
| 1926 1947 | 
             
                #   # └──────┴──────┘
         | 
| 1927 1948 | 
             
                def shift(n, fill_value: nil)
         | 
| 1928 1949 | 
             
                  if !fill_value.nil?
         | 
| 1929 | 
            -
                    fill_value = Utils. | 
| 1950 | 
            +
                    fill_value = Utils.parse_into_expression(fill_value, str_as_lit: true)
         | 
| 1930 1951 | 
             
                  end
         | 
| 1931 | 
            -
                  n = Utils. | 
| 1952 | 
            +
                  n = Utils.parse_into_expression(n)
         | 
| 1932 1953 | 
             
                  _from_rbldf(_ldf.shift(n, fill_value))
         | 
| 1933 1954 | 
             
                end
         | 
| 1934 1955 |  | 
| @@ -2125,7 +2146,7 @@ module Polars | |
| 2125 2146 | 
             
                #   # │ 3   ┆ 7   │
         | 
| 2126 2147 | 
             
                #   # └─────┴─────┘
         | 
| 2127 2148 | 
             
                def take_every(n)
         | 
| 2128 | 
            -
                  select( | 
| 2149 | 
            +
                  select(F.col("*").take_every(n))
         | 
| 2129 2150 | 
             
                end
         | 
| 2130 2151 |  | 
| 2131 2152 | 
             
                # Fill null values using the specified value or strategy.
         | 
| @@ -2168,7 +2189,7 @@ module Polars | |
| 2168 2189 | 
             
                #   # └──────┴──────┘
         | 
| 2169 2190 | 
             
                def fill_nan(fill_value)
         | 
| 2170 2191 | 
             
                  if !fill_value.is_a?(Expr)
         | 
| 2171 | 
            -
                    fill_value =  | 
| 2192 | 
            +
                    fill_value = F.lit(fill_value)
         | 
| 2172 2193 | 
             
                  end
         | 
| 2173 2194 | 
             
                  _from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
         | 
| 2174 2195 | 
             
                end
         | 
| @@ -2359,8 +2380,8 @@ module Polars | |
| 2359 2380 | 
             
                #   # │ 3.0 ┆ 1.0 │
         | 
| 2360 2381 | 
             
                #   # └─────┴─────┘
         | 
| 2361 2382 | 
             
                def quantile(quantile, interpolation: "nearest")
         | 
| 2362 | 
            -
                  quantile = Utils. | 
| 2363 | 
            -
                  _from_rbldf(_ldf.quantile(quantile | 
| 2383 | 
            +
                  quantile = Utils.parse_into_expression(quantile, str_as_lit: false)
         | 
| 2384 | 
            +
                  _from_rbldf(_ldf.quantile(quantile, interpolation))
         | 
| 2364 2385 | 
             
                end
         | 
| 2365 2386 |  | 
| 2366 2387 | 
             
                # Explode lists to long format.
         | 
| @@ -2392,7 +2413,7 @@ module Polars | |
| 2392 2413 | 
             
                #   # │ c       ┆ 8       │
         | 
| 2393 2414 | 
             
                #   # └─────────┴─────────┘
         | 
| 2394 2415 | 
             
                def explode(columns)
         | 
| 2395 | 
            -
                  columns = Utils. | 
| 2416 | 
            +
                  columns = Utils.parse_into_list_of_expressions(columns)
         | 
| 2396 2417 | 
             
                  _from_rbldf(_ldf.explode(columns))
         | 
| 2397 2418 | 
             
                end
         | 
| 2398 2419 |  | 
| @@ -2455,35 +2476,35 @@ module Polars | |
| 2455 2476 | 
             
                # Optionally leaves identifiers set.
         | 
| 2456 2477 | 
             
                #
         | 
| 2457 2478 | 
             
                # This function is useful to massage a DataFrame into a format where one or more
         | 
| 2458 | 
            -
                # columns are identifier variables ( | 
| 2459 | 
            -
                # measured variables ( | 
| 2479 | 
            +
                # columns are identifier variables (index) while all other columns, considered
         | 
| 2480 | 
            +
                # measured variables (on), are "unpivoted" to the row axis leaving just
         | 
| 2460 2481 | 
             
                # two non-identifier columns, 'variable' and 'value'.
         | 
| 2461 2482 | 
             
                #
         | 
| 2462 | 
            -
                # @param  | 
| 2463 | 
            -
                #    | 
| 2464 | 
            -
                #  | 
| 2465 | 
            -
                # | 
| 2466 | 
            -
                #    | 
| 2483 | 
            +
                # @param on [Object]
         | 
| 2484 | 
            +
                #   Column(s) or selector(s) to use as values variables; if `on`
         | 
| 2485 | 
            +
                #   is empty all columns that are not in `index` will be used.
         | 
| 2486 | 
            +
                # @param index [Object]
         | 
| 2487 | 
            +
                #   Column(s) or selector(s) to use as identifier variables.
         | 
| 2467 2488 | 
             
                # @param variable_name [String]
         | 
| 2468 | 
            -
                #   Name to give to the ` | 
| 2489 | 
            +
                #   Name to give to the `variable` column. Defaults to "variable"
         | 
| 2469 2490 | 
             
                # @param value_name [String]
         | 
| 2470 2491 | 
             
                #   Name to give to the `value` column. Defaults to "value"
         | 
| 2471 2492 | 
             
                # @param streamable [Boolean]
         | 
| 2472 2493 | 
             
                #   Allow this node to run in the streaming engine.
         | 
| 2473 | 
            -
                #   If this runs in streaming, the output of the  | 
| 2494 | 
            +
                #   If this runs in streaming, the output of the unpivot operation
         | 
| 2474 2495 | 
             
                #   will not have a stable ordering.
         | 
| 2475 2496 | 
             
                #
         | 
| 2476 2497 | 
             
                # @return [LazyFrame]
         | 
| 2477 2498 | 
             
                #
         | 
| 2478 2499 | 
             
                # @example
         | 
| 2479 | 
            -
                #    | 
| 2500 | 
            +
                #   lf = Polars::LazyFrame.new(
         | 
| 2480 2501 | 
             
                #     {
         | 
| 2481 2502 | 
             
                #       "a" => ["x", "y", "z"],
         | 
| 2482 2503 | 
             
                #       "b" => [1, 3, 5],
         | 
| 2483 2504 | 
             
                #       "c" => [2, 4, 6]
         | 
| 2484 2505 | 
             
                #     }
         | 
| 2485 | 
            -
                #   ) | 
| 2486 | 
            -
                #    | 
| 2506 | 
            +
                #   )
         | 
| 2507 | 
            +
                #   lf.unpivot(Polars::Selectors.numeric, index: "a").collect
         | 
| 2487 2508 | 
             
                #   # =>
         | 
| 2488 2509 | 
             
                #   # shape: (6, 3)
         | 
| 2489 2510 | 
             
                #   # ┌─────┬──────────┬───────┐
         | 
| @@ -2498,23 +2519,25 @@ module Polars | |
| 2498 2519 | 
             
                #   # │ y   ┆ c        ┆ 4     │
         | 
| 2499 2520 | 
             
                #   # │ z   ┆ c        ┆ 6     │
         | 
| 2500 2521 | 
             
                #   # └─────┴──────────┴───────┘
         | 
| 2501 | 
            -
                def  | 
| 2502 | 
            -
                   | 
| 2503 | 
            -
             | 
| 2504 | 
            -
                   | 
| 2505 | 
            -
                   | 
| 2506 | 
            -
             | 
| 2507 | 
            -
             | 
| 2508 | 
            -
                  if  | 
| 2509 | 
            -
                     | 
| 2510 | 
            -
                  end
         | 
| 2511 | 
            -
                  if id_vars.nil?
         | 
| 2512 | 
            -
                    id_vars = []
         | 
| 2522 | 
            +
                def unpivot(
         | 
| 2523 | 
            +
                  on,
         | 
| 2524 | 
            +
                  index: nil,
         | 
| 2525 | 
            +
                  variable_name: nil,
         | 
| 2526 | 
            +
                  value_name: nil,
         | 
| 2527 | 
            +
                  streamable: true
         | 
| 2528 | 
            +
                )
         | 
| 2529 | 
            +
                  if !streamable
         | 
| 2530 | 
            +
                    warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
         | 
| 2513 2531 | 
             
                  end
         | 
| 2532 | 
            +
             | 
| 2533 | 
            +
                  on = on.nil? ? [] : Utils._expand_selectors(self, on)
         | 
| 2534 | 
            +
                  index = index.nil? ? [] : Utils._expand_selectors(self, index)
         | 
| 2535 | 
            +
             | 
| 2514 2536 | 
             
                  _from_rbldf(
         | 
| 2515 | 
            -
                    _ldf. | 
| 2537 | 
            +
                    _ldf.unpivot(on, index, value_name, variable_name)
         | 
| 2516 2538 | 
             
                  )
         | 
| 2517 2539 | 
             
                end
         | 
| 2540 | 
            +
                alias_method :melt, :unpivot
         | 
| 2518 2541 |  | 
| 2519 2542 | 
             
                # def map
         | 
| 2520 2543 | 
             
                # end
         | 
| @@ -2545,7 +2568,7 @@ module Polars | |
| 2545 2568 | 
             
                #   # │ 10.0 ┆ null ┆ 9.0      │
         | 
| 2546 2569 | 
             
                #   # └──────┴──────┴──────────┘
         | 
| 2547 2570 | 
             
                def interpolate
         | 
| 2548 | 
            -
                  select( | 
| 2571 | 
            +
                  select(F.col("*").interpolate)
         | 
| 2549 2572 | 
             
                end
         | 
| 2550 2573 |  | 
| 2551 2574 | 
             
                # Decompose a struct into its fields.
         | 
| @@ -2652,24 +2675,19 @@ module Polars | |
| 2652 2675 | 
             
                #
         | 
| 2653 2676 | 
             
                # @param column [Object]
         | 
| 2654 2677 | 
             
                #   Columns that are sorted
         | 
| 2655 | 
            -
                # @param more_columns [Object]
         | 
| 2656 | 
            -
                #   Additional columns that are sorted, specified as positional arguments.
         | 
| 2657 2678 | 
             
                # @param descending [Boolean]
         | 
| 2658 2679 | 
             
                #   Whether the columns are sorted in descending order.
         | 
| 2659 2680 | 
             
                #
         | 
| 2660 2681 | 
             
                # @return [LazyFrame]
         | 
| 2661 2682 | 
             
                def set_sorted(
         | 
| 2662 2683 | 
             
                  column,
         | 
| 2663 | 
            -
                  *more_columns,
         | 
| 2664 2684 | 
             
                  descending: false
         | 
| 2665 2685 | 
             
                )
         | 
| 2666 | 
            -
                   | 
| 2667 | 
            -
             | 
| 2668 | 
            -
                     | 
| 2686 | 
            +
                  if !Utils.strlike?(column)
         | 
| 2687 | 
            +
                    msg = "expected a 'str' for argument 'column' in 'set_sorted'"
         | 
| 2688 | 
            +
                    raise TypeError, msg
         | 
| 2669 2689 | 
             
                  end
         | 
| 2670 | 
            -
                  with_columns(
         | 
| 2671 | 
            -
                    columns.map { |e| Utils.wrap_expr(e).set_sorted(descending: descending) }
         | 
| 2672 | 
            -
                  )
         | 
| 2690 | 
            +
                  with_columns(F.col(column).set_sorted(descending: descending))
         | 
| 2673 2691 | 
             
                end
         | 
| 2674 2692 |  | 
| 2675 2693 | 
             
                # TODO
         | 
    
        data/lib/polars/lazy_group_by.rb
    CHANGED
    
    | @@ -107,7 +107,7 @@ module Polars | |
| 107 107 | 
             
                #   # │ b   ┆ 5     ┆ 10.0           │
         | 
| 108 108 | 
             
                #   # └─────┴───────┴────────────────┘
         | 
| 109 109 | 
             
                def agg(*aggs, **named_aggs)
         | 
| 110 | 
            -
                  rbexprs = Utils. | 
| 110 | 
            +
                  rbexprs = Utils.parse_into_list_of_expressions(*aggs, **named_aggs)
         | 
| 111 111 | 
             
                  Utils.wrap_ldf(@lgb.agg(rbexprs))
         | 
| 112 112 | 
             
                end
         | 
| 113 113 |  | 
    
        data/lib/polars/list_expr.rb
    CHANGED
    
    | @@ -146,7 +146,7 @@ module Polars | |
| 146 146 | 
             
                  end
         | 
| 147 147 |  | 
| 148 148 | 
             
                  if !fraction.nil?
         | 
| 149 | 
            -
                    fraction = Utils. | 
| 149 | 
            +
                    fraction = Utils.parse_into_expression(fraction)
         | 
| 150 150 | 
             
                    return Utils.wrap_expr(
         | 
| 151 151 | 
             
                      _rbexpr.list_sample_fraction(
         | 
| 152 152 | 
             
                        fraction, with_replacement, shuffle, seed
         | 
| @@ -155,7 +155,7 @@ module Polars | |
| 155 155 | 
             
                  end
         | 
| 156 156 |  | 
| 157 157 | 
             
                  n = 1 if n.nil?
         | 
| 158 | 
            -
                  n = Utils. | 
| 158 | 
            +
                  n = Utils.parse_into_expression(n)
         | 
| 159 159 | 
             
                  Utils.wrap_expr(_rbexpr.list_sample_n(n, with_replacement, shuffle, seed))
         | 
| 160 160 | 
             
                end
         | 
| 161 161 |  | 
| @@ -387,7 +387,7 @@ module Polars | |
| 387 387 | 
             
                #   # │ 1    │
         | 
| 388 388 | 
             
                #   # └──────┘
         | 
| 389 389 | 
             
                def get(index, null_on_oob: true)
         | 
| 390 | 
            -
                  index = Utils. | 
| 390 | 
            +
                  index = Utils.parse_into_expression(index)
         | 
| 391 391 | 
             
                  Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob))
         | 
| 392 392 | 
             
                end
         | 
| 393 393 |  | 
| @@ -431,7 +431,7 @@ module Polars | |
| 431 431 | 
             
                  if index.is_a?(::Array)
         | 
| 432 432 | 
             
                    index = Series.new(index)
         | 
| 433 433 | 
             
                  end
         | 
| 434 | 
            -
                  index = Utils. | 
| 434 | 
            +
                  index = Utils.parse_into_expression(index, str_as_lit: false)
         | 
| 435 435 | 
             
                  Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
         | 
| 436 436 | 
             
                end
         | 
| 437 437 | 
             
                alias_method :take, :gather
         | 
| @@ -502,7 +502,7 @@ module Polars | |
| 502 502 | 
             
                #   # │ true  │
         | 
| 503 503 | 
             
                #   # └───────┘
         | 
| 504 504 | 
             
                def contains(item)
         | 
| 505 | 
            -
                  Utils.wrap_expr(_rbexpr.list_contains(Utils. | 
| 505 | 
            +
                  Utils.wrap_expr(_rbexpr.list_contains(Utils.parse_into_expression(item)))
         | 
| 506 506 | 
             
                end
         | 
| 507 507 |  | 
| 508 508 | 
             
                # Join all string items in a sublist and place a separator between them.
         | 
| @@ -530,7 +530,7 @@ module Polars | |
| 530 530 | 
             
                #   # │ x y   │
         | 
| 531 531 | 
             
                #   # └───────┘
         | 
| 532 532 | 
             
                def join(separator, ignore_nulls: true)
         | 
| 533 | 
            -
                  separator = Utils. | 
| 533 | 
            +
                  separator = Utils.parse_into_expression(separator, str_as_lit: true)
         | 
| 534 534 | 
             
                  Utils.wrap_expr(_rbexpr.list_join(separator, ignore_nulls))
         | 
| 535 535 | 
             
                end
         | 
| 536 536 |  | 
| @@ -625,7 +625,7 @@ module Polars | |
| 625 625 | 
             
                #   #         [null, 10, 2]
         | 
| 626 626 | 
             
                #   # ]
         | 
| 627 627 | 
             
                def shift(n = 1)
         | 
| 628 | 
            -
                  n = Utils. | 
| 628 | 
            +
                  n = Utils.parse_into_expression(n)
         | 
| 629 629 | 
             
                  Utils.wrap_expr(_rbexpr.list_shift(n))
         | 
| 630 630 | 
             
                end
         | 
| 631 631 |  | 
| @@ -650,8 +650,8 @@ module Polars | |
| 650 650 | 
             
                #   #         [2, 1]
         | 
| 651 651 | 
             
                #   # ]
         | 
| 652 652 | 
             
                def slice(offset, length = nil)
         | 
| 653 | 
            -
                  offset = Utils. | 
| 654 | 
            -
                  length = Utils. | 
| 653 | 
            +
                  offset = Utils.parse_into_expression(offset, str_as_lit: false)
         | 
| 654 | 
            +
                  length = Utils.parse_into_expression(length, str_as_lit: false)
         | 
| 655 655 | 
             
                  Utils.wrap_expr(_rbexpr.list_slice(offset, length))
         | 
| 656 656 | 
             
                end
         | 
| 657 657 |  | 
| @@ -694,7 +694,7 @@ module Polars | |
| 694 694 | 
             
                #   #         [2, 1]
         | 
| 695 695 | 
             
                #   # ]
         | 
| 696 696 | 
             
                def tail(n = 5)
         | 
| 697 | 
            -
                  n = Utils. | 
| 697 | 
            +
                  n = Utils.parse_into_expression(n)
         | 
| 698 698 | 
             
                  Utils.wrap_expr(_rbexpr.list_tail(n))
         | 
| 699 699 | 
             
                end
         | 
| 700 700 |  | 
| @@ -722,7 +722,7 @@ module Polars | |
| 722 722 | 
             
                #   # │ 0              │
         | 
| 723 723 | 
             
                #   # └────────────────┘
         | 
| 724 724 | 
             
                def count_matches(element)
         | 
| 725 | 
            -
                  Utils.wrap_expr(_rbexpr.list_count_matches(Utils. | 
| 725 | 
            +
                  Utils.wrap_expr(_rbexpr.list_count_matches(Utils.parse_into_expression(element)))
         | 
| 726 726 | 
             
                end
         | 
| 727 727 | 
             
                alias_method :count_match, :count_matches
         | 
| 728 728 |  | 
| @@ -197,9 +197,13 @@ module Polars | |
| 197 197 | 
             
                #
         | 
| 198 198 | 
             
                # @param index [Integer]
         | 
| 199 199 | 
             
                #   Index to return per sublist
         | 
| 200 | 
            +
                # @param null_on_oob [Boolean]
         | 
| 201 | 
            +
                #   Behavior if an index is out of bounds:
         | 
| 202 | 
            +
                #   true -> set as null
         | 
| 203 | 
            +
                #   false -> raise an error
         | 
| 200 204 | 
             
                #
         | 
| 201 205 | 
             
                # @return [Series]
         | 
| 202 | 
            -
                def get(index)
         | 
| 206 | 
            +
                def get(index, null_on_oob: false)
         | 
| 203 207 | 
             
                  super
         | 
| 204 208 | 
             
                end
         | 
| 205 209 |  | 
| @@ -10,25 +10,23 @@ module Polars | |
| 10 10 | 
             
                  period,
         | 
| 11 11 | 
             
                  offset,
         | 
| 12 12 | 
             
                  closed,
         | 
| 13 | 
            -
                   | 
| 14 | 
            -
                  check_sorted
         | 
| 13 | 
            +
                  group_by
         | 
| 15 14 | 
             
                )
         | 
| 16 | 
            -
                  period = Utils. | 
| 17 | 
            -
                  offset = Utils. | 
| 15 | 
            +
                  period = Utils.parse_as_duration_string(period)
         | 
| 16 | 
            +
                  offset = Utils.parse_as_duration_string(offset)
         | 
| 18 17 |  | 
| 19 18 | 
             
                  @df = df
         | 
| 20 19 | 
             
                  @time_column = index_column
         | 
| 21 20 | 
             
                  @period = period
         | 
| 22 21 | 
             
                  @offset = offset
         | 
| 23 22 | 
             
                  @closed = closed
         | 
| 24 | 
            -
                  @ | 
| 25 | 
            -
                  @check_sorted = check_sorted
         | 
| 23 | 
            +
                  @group_by = group_by
         | 
| 26 24 | 
             
                end
         | 
| 27 25 |  | 
| 28 26 | 
             
                def agg(*aggs, **named_aggs)
         | 
| 29 27 | 
             
                  @df.lazy
         | 
| 30 28 | 
             
                    .group_by_rolling(
         | 
| 31 | 
            -
                      index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @ | 
| 29 | 
            +
                      index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
         | 
| 32 30 | 
             
                    )
         | 
| 33 31 | 
             
                    .agg(*aggs, **named_aggs)
         | 
| 34 32 | 
             
                    .collect(no_optimization: true, string_cache: false)
         |