polars-df 0.3.1-arm64-darwin → 0.4.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/polars/io.rb CHANGED
@@ -2,7 +2,7 @@ module Polars
2
2
  module IO
3
3
  # Read a CSV file into a DataFrame.
4
4
  #
5
- # @param file [Object]
5
+ # @param source [Object]
6
6
  # Path to a file or a file-like object.
7
7
  # @param has_header [Boolean]
8
8
  # Indicate if the first row of dataset is a header or not.
@@ -89,7 +89,7 @@ module Polars
89
89
  # Set `rechunk: false` if you are benchmarking the csv-reader. A `rechunk` is
90
90
  # an expensive operation.
91
91
  def read_csv(
92
- file,
92
+ source,
93
93
  has_header: true,
94
94
  columns: nil,
95
95
  new_columns: nil,
@@ -137,7 +137,7 @@ module Polars
137
137
  end
138
138
 
139
139
  df = nil
140
- _prepare_file_arg(file) do |data|
140
+ _prepare_file_arg(source) do |data|
141
141
  df = DataFrame._read_csv(
142
142
  data,
143
143
  has_header: has_header,
@@ -178,7 +178,7 @@ module Polars
178
178
  # projections to the scan level, thereby potentially reducing
179
179
  # memory overhead.
180
180
  #
181
- # @param file [Object]
181
+ # @param source [Object]
182
182
  # Path to a file.
183
183
  # @param has_header [Boolean]
184
184
  # Indicate if the first row of dataset is a header or not.
@@ -242,7 +242,7 @@ module Polars
242
242
  #
243
243
  # @return [LazyFrame]
244
244
  def scan_csv(
245
- file,
245
+ source,
246
246
  has_header: true,
247
247
  sep: ",",
248
248
  comment_char: nil,
@@ -268,12 +268,12 @@ module Polars
268
268
  _check_arg_is_1byte("comment_char", comment_char, false)
269
269
  _check_arg_is_1byte("quote_char", quote_char, true)
270
270
 
271
- if Utils.pathlike?(file)
272
- file = Utils.format_path(file)
271
+ if Utils.pathlike?(source)
272
+ source = Utils.normalise_filepath(source)
273
273
  end
274
274
 
275
275
  LazyFrame._scan_csv(
276
- file,
276
+ source,
277
277
  has_header: has_header,
278
278
  sep: sep,
279
279
  comment_char: comment_char,
@@ -302,7 +302,7 @@ module Polars
302
302
  # This allows the query optimizer to push down predicates and projections to the scan
303
303
  # level, thereby potentially reducing memory overhead.
304
304
  #
305
- # @param file [String]
305
+ # @param source [String]
306
306
  # Path to a IPC file.
307
307
  # @param n_rows [Integer]
308
308
  # Stop reading from IPC file after reading `n_rows`.
@@ -324,7 +324,7 @@ module Polars
324
324
  #
325
325
  # @return [LazyFrame]
326
326
  def scan_ipc(
327
- file,
327
+ source,
328
328
  n_rows: nil,
329
329
  cache: true,
330
330
  rechunk: true,
@@ -334,7 +334,7 @@ module Polars
334
334
  memory_map: true
335
335
  )
336
336
  LazyFrame._scan_ipc(
337
- file,
337
+ source,
338
338
  n_rows: n_rows,
339
339
  cache: cache,
340
340
  rechunk: rechunk,
@@ -350,7 +350,7 @@ module Polars
350
350
  # This allows the query optimizer to push down predicates and projections to the scan
351
351
  # level, thereby potentially reducing memory overhead.
352
352
  #
353
- # @param file [String]
353
+ # @param source [String]
354
354
  # Path to a file.
355
355
  # @param n_rows [Integer]
356
356
  # Stop reading from parquet file after reading `n_rows`.
@@ -374,7 +374,7 @@ module Polars
374
374
  #
375
375
  # @return [LazyFrame]
376
376
  def scan_parquet(
377
- file,
377
+ source,
378
378
  n_rows: nil,
379
379
  cache: true,
380
380
  parallel: "auto",
@@ -384,12 +384,12 @@ module Polars
384
384
  storage_options: nil,
385
385
  low_memory: false
386
386
  )
387
- if Utils.pathlike?(file)
388
- file = Utils.format_path(file)
387
+ if Utils.pathlike?(source)
388
+ source = Utils.normalise_filepath(source)
389
389
  end
390
390
 
391
391
  LazyFrame._scan_parquet(
392
- file,
392
+ source,
393
393
  n_rows:n_rows,
394
394
  cache: cache,
395
395
  parallel: parallel,
@@ -406,7 +406,7 @@ module Polars
406
406
  # This allows the query optimizer to push down predicates and projections to the scan
407
407
  # level, thereby potentially reducing memory overhead.
408
408
  #
409
- # @param file [String]
409
+ # @param source [String]
410
410
  # Path to a file.
411
411
  # @param infer_schema_length [Integer]
412
412
  # Infer the schema length from the first `infer_schema_length` rows.
@@ -426,7 +426,7 @@ module Polars
426
426
  #
427
427
  # @return [LazyFrame]
428
428
  def scan_ndjson(
429
- file,
429
+ source,
430
430
  infer_schema_length: 100,
431
431
  batch_size: 1024,
432
432
  n_rows: nil,
@@ -435,12 +435,12 @@ module Polars
435
435
  row_count_name: nil,
436
436
  row_count_offset: 0
437
437
  )
438
- if Utils.pathlike?(file)
439
- file = Utils.format_path(file)
438
+ if Utils.pathlike?(source)
439
+ source = Utils.normalise_filepath(source)
440
440
  end
441
441
 
442
442
  LazyFrame._scan_ndjson(
443
- file,
443
+ source,
444
444
  infer_schema_length: infer_schema_length,
445
445
  batch_size: batch_size,
446
446
  n_rows: n_rows,
@@ -453,7 +453,7 @@ module Polars
453
453
 
454
454
  # Read into a DataFrame from Apache Avro format.
455
455
  #
456
- # @param file [Object]
456
+ # @param source [Object]
457
457
  # Path to a file or a file-like object.
458
458
  # @param columns [Object]
459
459
  # Columns to select. Accepts a list of column indices (starting at zero) or a list
@@ -462,17 +462,17 @@ module Polars
462
462
  # Stop reading from Apache Avro file after reading ``n_rows``.
463
463
  #
464
464
  # @return [DataFrame]
465
- def read_avro(file, columns: nil, n_rows: nil)
466
- if Utils.pathlike?(file)
467
- file = Utils.format_path(file)
465
+ def read_avro(source, columns: nil, n_rows: nil)
466
+ if Utils.pathlike?(source)
467
+ source = Utils.normalise_filepath(source)
468
468
  end
469
469
 
470
- DataFrame._read_avro(file, n_rows: n_rows, columns: columns)
470
+ DataFrame._read_avro(source, n_rows: n_rows, columns: columns)
471
471
  end
472
472
 
473
473
  # Read into a DataFrame from Arrow IPC (Feather v2) file.
474
474
  #
475
- # @param file [Object]
475
+ # @param source [Object]
476
476
  # Path to a file or a file-like object.
477
477
  # @param columns [Object]
478
478
  # Columns to select. Accepts a list of column indices (starting at zero) or a list
@@ -495,7 +495,7 @@ module Polars
495
495
  #
496
496
  # @return [DataFrame]
497
497
  def read_ipc(
498
- file,
498
+ source,
499
499
  columns: nil,
500
500
  n_rows: nil,
501
501
  memory_map: true,
@@ -505,7 +505,7 @@ module Polars
505
505
  rechunk: true
506
506
  )
507
507
  storage_options ||= {}
508
- _prepare_file_arg(file, **storage_options) do |data|
508
+ _prepare_file_arg(source, **storage_options) do |data|
509
509
  DataFrame._read_ipc(
510
510
  data,
511
511
  columns: columns,
@@ -520,8 +520,8 @@ module Polars
520
520
 
521
521
  # Read into a DataFrame from a parquet file.
522
522
  #
523
- # @param file [Object]
524
- # Path to a file, or a file-like object.
523
+ # @param source [Object]
524
+ # Path to a file or a file-like object.
525
525
  # @param columns [Object]
526
526
  # Columns to select. Accepts a list of column indices (starting at zero) or a list
527
527
  # of column names.
@@ -539,6 +539,12 @@ module Polars
539
539
  # Offset to start the row_count column (only use if the name is set).
540
540
  # @param low_memory [Boolean]
541
541
  # Reduce memory pressure at the expense of performance.
542
+ # @param use_statistics [Boolean]
543
+ # Use statistics in the parquet to determine if pages
544
+ # can be skipped from reading.
545
+ # @param rechunk [Boolean]
546
+ # Make sure that all columns are contiguous in memory by
547
+ # aggregating the chunks into a single array.
542
548
  #
543
549
  # @return [DataFrame]
544
550
  #
@@ -548,16 +554,18 @@ module Polars
548
554
  # Set `rechunk: false` if you are benchmarking the parquet-reader. A `rechunk` is
549
555
  # an expensive operation.
550
556
  def read_parquet(
551
- file,
557
+ source,
552
558
  columns: nil,
553
559
  n_rows: nil,
554
560
  storage_options: nil,
555
561
  parallel: "auto",
556
562
  row_count_name: nil,
557
563
  row_count_offset: 0,
558
- low_memory: false
564
+ low_memory: false,
565
+ use_statistics: true,
566
+ rechunk: true
559
567
  )
560
- _prepare_file_arg(file) do |data|
568
+ _prepare_file_arg(source) do |data|
561
569
  DataFrame._read_parquet(
562
570
  data,
563
571
  columns: columns,
@@ -565,49 +573,51 @@ module Polars
565
573
  parallel: parallel,
566
574
  row_count_name: row_count_name,
567
575
  row_count_offset: row_count_offset,
568
- low_memory: low_memory
576
+ low_memory: low_memory,
577
+ use_statistics: use_statistics,
578
+ rechunk: rechunk
569
579
  )
570
580
  end
571
581
  end
572
582
 
573
583
  # Read into a DataFrame from a JSON file.
574
584
  #
575
- # @param file [Object]
585
+ # @param source [Object]
576
586
  # Path to a file or a file-like object.
577
587
  #
578
588
  # @return [DataFrame]
579
- def read_json(file)
580
- DataFrame._read_json(file)
589
+ def read_json(source)
590
+ DataFrame._read_json(source)
581
591
  end
582
592
 
583
593
  # Read into a DataFrame from a newline delimited JSON file.
584
594
  #
585
- # @param file [Object]
595
+ # @param source [Object]
586
596
  # Path to a file or a file-like object.
587
597
  #
588
598
  # @return [DataFrame]
589
- def read_ndjson(file)
590
- DataFrame._read_ndjson(file)
599
+ def read_ndjson(source)
600
+ DataFrame._read_ndjson(source)
591
601
  end
592
602
 
593
603
  # Read a SQL query into a DataFrame.
594
604
  #
595
- # @param sql [Object]
605
+ # @param query [Object]
596
606
  # ActiveRecord::Relation or ActiveRecord::Result.
597
607
  #
598
608
  # @return [DataFrame]
599
- def read_sql(sql)
609
+ def read_database(query)
600
610
  if !defined?(ActiveRecord)
601
611
  raise Error, "Active Record not available"
602
612
  end
603
613
 
604
614
  result =
605
- if sql.is_a?(ActiveRecord::Result)
606
- sql
607
- elsif sql.is_a?(ActiveRecord::Relation)
608
- sql.connection.select_all(sql.to_sql)
609
- elsif sql.is_a?(String)
610
- ActiveRecord::Base.connection.select_all(sql)
615
+ if query.is_a?(ActiveRecord::Result)
616
+ query
617
+ elsif query.is_a?(ActiveRecord::Relation)
618
+ query.connection.select_all(query.to_sql)
619
+ elsif query.is_a?(String)
620
+ ActiveRecord::Base.connection.select_all(query)
611
621
  else
612
622
  raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
613
623
  end
@@ -617,6 +627,7 @@ module Polars
617
627
  end
618
628
  DataFrame.new(data)
619
629
  end
630
+ alias_method :read_sql, :read_database
620
631
 
621
632
  # def read_excel
622
633
  # end
@@ -628,7 +639,7 @@ module Polars
628
639
  # file chunks. After that work will only be done
629
640
  # if `next_batches` is called.
630
641
  #
631
- # @param file [Object]
642
+ # @param source [Object]
632
643
  # Path to a file or a file-like object.
633
644
  # @param has_header [Boolean]
634
645
  # Indicate if the first row of dataset is a header or not.
@@ -712,7 +723,7 @@ module Polars
712
723
  # )
713
724
  # reader.next_batches(5)
714
725
  def read_csv_batched(
715
- file,
726
+ source,
716
727
  has_header: true,
717
728
  columns: nil,
718
729
  new_columns: nil,
@@ -752,7 +763,7 @@ module Polars
752
763
  end
753
764
 
754
765
  BatchedCsvReader.new(
755
- file,
766
+ source,
756
767
  has_header: has_header,
757
768
  columns: columns || projection,
758
769
  sep: sep,
@@ -781,30 +792,30 @@ module Polars
781
792
 
782
793
  # Get a schema of the IPC file without reading data.
783
794
  #
784
- # @param file [Object]
795
+ # @param source [Object]
785
796
  # Path to a file or a file-like object.
786
797
  #
787
798
  # @return [Hash]
788
- def read_ipc_schema(file)
789
- if Utils.pathlike?(file)
790
- file = Utils.format_path(file)
799
+ def read_ipc_schema(source)
800
+ if Utils.pathlike?(source)
801
+ source = Utils.normalise_filepath(source)
791
802
  end
792
803
 
793
- _ipc_schema(file)
804
+ _ipc_schema(source)
794
805
  end
795
806
 
796
807
  # Get a schema of the Parquet file without reading data.
797
808
  #
798
- # @param file [Object]
809
+ # @param source [Object]
799
810
  # Path to a file or a file-like object.
800
811
  #
801
812
  # @return [Hash]
802
- def read_parquet_schema(file)
803
- if Utils.pathlike?(file)
804
- file = Utils.format_path(file)
813
+ def read_parquet_schema(source)
814
+ if Utils.pathlike?(source)
815
+ source = Utils.normalise_filepath(source)
805
816
  end
806
817
 
807
- _parquet_schema(file)
818
+ _parquet_schema(source)
808
819
  end
809
820
 
810
821
  private
@@ -80,7 +80,8 @@ module Polars
80
80
  row_count_name: nil,
81
81
  row_count_offset: 0,
82
82
  storage_options: nil,
83
- low_memory: false
83
+ low_memory: false,
84
+ use_statistics: true
84
85
  )
85
86
  _from_rbldf(
86
87
  RbLazyFrame.new_from_parquet(
@@ -90,7 +91,8 @@ module Polars
90
91
  parallel,
91
92
  rechunk,
92
93
  Utils._prepare_row_count_args(row_count_name, row_count_offset),
93
- low_memory
94
+ low_memory,
95
+ use_statistics
94
96
  )
95
97
  )
96
98
  end
@@ -107,7 +109,7 @@ module Polars
107
109
  memory_map: true
108
110
  )
109
111
  if Utils.pathlike?(file)
110
- file = Utils.format_path(file)
112
+ file = Utils.normalise_filepath(file)
111
113
  end
112
114
 
113
115
  _from_rbldf(
@@ -157,7 +159,7 @@ module Polars
157
159
  # @return [LazyFrame]
158
160
  def self.read_json(file)
159
161
  if Utils.pathlike?(file)
160
- file = Utils.format_path(file)
162
+ file = Utils.normalise_filepath(file)
161
163
  end
162
164
 
163
165
  Utils.wrap_ldf(RbLazyFrame.read_json(file))
@@ -264,7 +266,7 @@ module Polars
264
266
  # @return [nil]
265
267
  def write_json(file)
266
268
  if Utils.pathlike?(file)
267
- file = Utils.format_path(file)
269
+ file = Utils.normalise_filepath(file)
268
270
  end
269
271
  _ldf.write_json(file)
270
272
  nil
@@ -473,6 +475,96 @@ module Polars
473
475
  Utils.wrap_df(ldf.collect)
474
476
  end
475
477
 
478
+ # Persists a LazyFrame at the provided path.
479
+ #
480
+ # This allows streaming results that are larger than RAM to be written to disk.
481
+ #
482
+ # @param path [String]
483
+ # File path to which the file should be written.
484
+ # @param compression ["lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"]
485
+ # Choose "zstd" for good compression performance.
486
+ # Choose "lz4" for fast compression/decompression.
487
+ # Choose "snappy" for more backwards compatibility guarantees
488
+ # when you deal with older parquet readers.
489
+ # @param compression_level [Integer]
490
+ # The level of compression to use. Higher compression means smaller files on
491
+ # disk.
492
+ #
493
+ # - "gzip" : min-level: 0, max-level: 10.
494
+ # - "brotli" : min-level: 0, max-level: 11.
495
+ # - "zstd" : min-level: 1, max-level: 22.
496
+ # @param statistics [Boolean]
497
+ # Write statistics to the parquet headers. This requires extra compute.
498
+ # @param row_group_size [Integer]
499
+ # Size of the row groups in number of rows.
500
+ # If `nil` (default), the chunks of the `DataFrame` are
501
+ # used. Writing in smaller chunks may reduce memory pressure and improve
502
+ # writing speeds.
503
+ # @param data_pagesize_limit [Integer]
504
+ # Size limit of individual data pages.
505
+ # If not set defaults to 1024 * 1024 bytes
506
+ # @param maintain_order [Boolean]
507
+ # Maintain the order in which data is processed.
508
+ # Setting this to `false` will be slightly faster.
509
+ # @param type_coercion [Boolean]
510
+ # Do type coercion optimization.
511
+ # @param predicate_pushdown [Boolean]
512
+ # Do predicate pushdown optimization.
513
+ # @param projection_pushdown [Boolean]
514
+ # Do projection pushdown optimization.
515
+ # @param simplify_expression [Boolean]
516
+ # Run simplify expressions optimization.
517
+ # @param no_optimization [Boolean]
518
+ # Turn off (certain) optimizations.
519
+ # @param slice_pushdown [Boolean]
520
+ # Slice pushdown optimization.
521
+ #
522
+ # @return [DataFrame]
523
+ #
524
+ # @example
525
+ # lf = Polars.scan_csv("/path/to/my_larger_than_ram_file.csv")
526
+ # lf.sink_parquet("out.parquet")
527
+ def sink_parquet(
528
+ path,
529
+ compression: "zstd",
530
+ compression_level: nil,
531
+ statistics: false,
532
+ row_group_size: nil,
533
+ data_pagesize_limit: nil,
534
+ maintain_order: true,
535
+ type_coercion: true,
536
+ predicate_pushdown: true,
537
+ projection_pushdown: true,
538
+ simplify_expression: true,
539
+ no_optimization: false,
540
+ slice_pushdown: true
541
+ )
542
+ if no_optimization
543
+ predicate_pushdown = false
544
+ projection_pushdown = false
545
+ slice_pushdown = false
546
+ end
547
+
548
+ lf = _ldf.optimization_toggle(
549
+ type_coercion,
550
+ predicate_pushdown,
551
+ projection_pushdown,
552
+ simplify_expression,
553
+ slice_pushdown,
554
+ false,
555
+ true
556
+ )
557
+ lf.sink_parquet(
558
+ path,
559
+ compression,
560
+ compression_level,
561
+ statistics,
562
+ row_group_size,
563
+ data_pagesize_limit,
564
+ maintain_order
565
+ )
566
+ end
567
+
476
568
  # Collect a small number of rows for debugging purposes.
477
569
  #
478
570
  # Fetch is like a {#collect} operation, but it overwrites the number of rows
@@ -2192,6 +2284,10 @@ module Polars
2192
2284
  # Name to give to the `value` column. Defaults to "variable"
2193
2285
  # @param value_name [String]
2194
2286
  # Name to give to the `value` column. Defaults to "value"
2287
+ # @param streamable [Boolean]
2288
+ # Allow this node to run in the streaming engine.
2289
+ # If this runs in streaming, the output of the melt operation
2290
+ # will not have a stable ordering.
2195
2291
  #
2196
2292
  # @return [LazyFrame]
2197
2293
  #
@@ -2218,7 +2314,7 @@ module Polars
2218
2314
  # # │ y ┆ c ┆ 4 │
2219
2315
  # # │ z ┆ c ┆ 6 │
2220
2316
  # # └─────┴──────────┴───────┘
2221
- def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil)
2317
+ def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil, streamable: true)
2222
2318
  if value_vars.is_a?(String)
2223
2319
  value_vars = [value_vars]
2224
2320
  end
@@ -2232,7 +2328,7 @@ module Polars
2232
2328
  id_vars = []
2233
2329
  end
2234
2330
  _from_rbldf(
2235
- _ldf.melt(id_vars, value_vars, value_name, variable_name)
2331
+ _ldf.melt(id_vars, value_vars, value_name, variable_name, streamable)
2236
2332
  )
2237
2333
  end
2238
2334
 
@@ -657,7 +657,7 @@ module Polars
657
657
  # Default is ascending.
658
658
  #
659
659
  # @return [Expr]
660
- def argsort_by(exprs, reverse: false)
660
+ def arg_sort_by(exprs, reverse: false)
661
661
  if !exprs.is_a?(Array)
662
662
  exprs = [exprs]
663
663
  end
@@ -665,8 +665,9 @@ module Polars
665
665
  reverse = [reverse] * exprs.length
666
666
  end
667
667
  exprs = Utils.selection_to_rbexpr_list(exprs)
668
- Utils.wrap_expr(RbExpr.argsort_by(exprs, reverse))
668
+ Utils.wrap_expr(RbExpr.arg_sort_by(exprs, reverse))
669
669
  end
670
+ alias_method :argsort_by, :arg_sort_by
670
671
 
671
672
  # Create polars `Duration` from distinct time components.
672
673
  #
@@ -426,7 +426,7 @@ module Polars
426
426
  # # shape: (2,)
427
427
  # # Series: 'a' [list[i64]]
428
428
  # # [
429
- # # [null, 1, ... 1]
429
+ # # [null, 1, 1]
430
430
  # # [null, -8, -1]
431
431
  # # ]
432
432
  def diff(n: 1, null_behavior: "ignore")
@@ -447,7 +447,7 @@ module Polars
447
447
  # # shape: (2,)
448
448
  # # Series: 'a' [list[i64]]
449
449
  # # [
450
- # # [null, 1, ... 3]
450
+ # # [null, 1, 3]
451
451
  # # [null, 10, 2]
452
452
  # # ]
453
453
  def shift(periods = 1)
@@ -185,7 +185,7 @@ module Polars
185
185
  # # shape: (2,)
186
186
  # # Series: 'a' [list[i64]]
187
187
  # # [
188
- # # [null, 1, ... 1]
188
+ # # [null, 1, 1]
189
189
  # # [null, -8, -1]
190
190
  # # ]
191
191
  def diff(n: 1, null_behavior: "ignore")
@@ -206,7 +206,7 @@ module Polars
206
206
  # # shape: (2,)
207
207
  # # Series: 'a' [list[i64]]
208
208
  # # [
209
- # # [null, 1, ... 3]
209
+ # # [null, 1, 3]
210
210
  # # [null, 10, 2]
211
211
  # # ]
212
212
  def shift(periods = 1)
data/lib/polars/series.rb CHANGED
@@ -3531,6 +3531,13 @@ module Polars
3531
3531
  ListNameSpace.new(self)
3532
3532
  end
3533
3533
 
3534
+ # Create an object namespace of all binary related methods.
3535
+ #
3536
+ # @return [BinaryNameSpace]
3537
+ def bin
3538
+ BinaryNameSpace.new(self)
3539
+ end
3540
+
3534
3541
  # Create an object namespace of all categorical related methods.
3535
3542
  #
3536
3543
  # @return [CatNameSpace]
@@ -3795,7 +3802,8 @@ module Polars
3795
3802
  UInt32 => RbSeries.method(:new_opt_u32),
3796
3803
  UInt64 => RbSeries.method(:new_opt_u64),
3797
3804
  Boolean => RbSeries.method(:new_opt_bool),
3798
- Utf8 => RbSeries.method(:new_str)
3805
+ Utf8 => RbSeries.method(:new_str),
3806
+ Binary => RbSeries.method(:new_binary)
3799
3807
  }
3800
3808
 
3801
3809
  SYM_TYPE_TO_CONSTRUCTOR = {
@@ -332,7 +332,7 @@ module Polars
332
332
  # # │ -0001 │
333
333
  # # │ 00000 │
334
334
  # # │ 00001 │
335
- # # │ ...
335
+ # # │
336
336
  # # │ 10000 │
337
337
  # # │ 100000 │
338
338
  # # │ 1000000 │
data/lib/polars/utils.rb CHANGED
@@ -93,8 +93,12 @@ module Polars
93
93
  Polars.lit(value)
94
94
  end
95
95
 
96
- def self.format_path(path)
97
- File.expand_path(path)
96
+ def self.normalise_filepath(path, check_not_directory: true)
97
+ path = File.expand_path(path)
98
+ if check_not_directory && File.exist?(path) && Dir.exist?(path)
99
+ raise ArgumentError, "Expected a file path; #{path} is a directory"
100
+ end
101
+ path
98
102
  end
99
103
 
100
104
  # TODO fix
@@ -216,5 +220,9 @@ module Polars
216
220
  val.is_a?(Array) && _is_iterable_of(val, String)
217
221
  end
218
222
  end
223
+
224
+ def self.local_file?(file)
225
+ Dir.glob(file).any?
226
+ end
219
227
  end
220
228
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.3.1"
3
+ VERSION = "0.4.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -12,6 +12,8 @@ require "stringio"
12
12
  # modules
13
13
  require_relative "polars/expr_dispatch"
14
14
  require_relative "polars/batched_csv_reader"
15
+ require_relative "polars/binary_expr"
16
+ require_relative "polars/binary_name_space"
15
17
  require_relative "polars/cat_expr"
16
18
  require_relative "polars/cat_name_space"
17
19
  require_relative "polars/convert"