polars-df 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/Cargo.lock +597 -599
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +20 -10
  7. data/ext/polars/src/batched_csv.rs +27 -28
  8. data/ext/polars/src/conversion.rs +135 -106
  9. data/ext/polars/src/dataframe.rs +140 -131
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/categorical.rs +8 -1
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +129 -286
  15. data/ext/polars/src/expr/list.rs +17 -9
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +201 -0
  19. data/ext/polars/src/expr/string.rs +94 -67
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +66 -41
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +41 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +74 -60
  33. data/ext/polars/src/lib.rs +175 -91
  34. data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
  35. data/ext/polars/src/{apply → map}/mod.rs +5 -5
  36. data/ext/polars/src/{apply → map}/series.rs +18 -22
  37. data/ext/polars/src/object.rs +0 -30
  38. data/ext/polars/src/on_startup.rs +32 -0
  39. data/ext/polars/src/rb_modules.rs +22 -7
  40. data/ext/polars/src/series/aggregation.rs +3 -0
  41. data/ext/polars/src/series/construction.rs +5 -5
  42. data/ext/polars/src/series/export.rs +4 -4
  43. data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
  44. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
  45. data/ext/polars/src/sql.rs +46 -0
  46. data/ext/polars/src/utils.rs +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +182 -145
  49. data/lib/polars/data_types.rb +4 -1
  50. data/lib/polars/date_time_expr.rb +23 -28
  51. data/lib/polars/date_time_name_space.rb +17 -37
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +398 -110
  54. data/lib/polars/functions.rb +29 -37
  55. data/lib/polars/group_by.rb +38 -55
  56. data/lib/polars/io.rb +40 -5
  57. data/lib/polars/lazy_frame.rb +116 -89
  58. data/lib/polars/lazy_functions.rb +40 -68
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +12 -8
  61. data/lib/polars/list_name_space.rb +2 -2
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +2 -2
  64. data/lib/polars/series.rb +315 -43
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +114 -60
  67. data/lib/polars/string_name_space.rb +19 -4
  68. data/lib/polars/struct_expr.rb +1 -1
  69. data/lib/polars/struct_name_space.rb +1 -1
  70. data/lib/polars/utils.rb +25 -13
  71. data/lib/polars/version.rb +1 -1
  72. data/lib/polars.rb +3 -0
  73. metadata +23 -11
  74. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/Cargo.toml CHANGED
@@ -1,5 +1,6 @@
1
1
  [workspace]
2
2
  members = ["ext/polars"]
3
+ resolver = "2"
3
4
 
4
5
  [patch.crates-io]
5
6
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
data/README.md CHANGED
@@ -20,7 +20,7 @@ This library follows the [Polars Python API](https://pola-rs.github.io/polars/py
20
20
  Polars.read_csv("iris.csv")
21
21
  .lazy
22
22
  .filter(Polars.col("sepal_length") > 5)
23
- .groupby("species")
23
+ .group_by("species")
24
24
  .agg(Polars.all.sum)
25
25
  .collect
26
26
  ```
@@ -260,19 +260,19 @@ df["a"].var
260
260
  Group
261
261
 
262
262
  ```ruby
263
- df.groupby("a").count
263
+ df.group_by("a").count
264
264
  ```
265
265
 
266
266
  Works with all summary statistics
267
267
 
268
268
  ```ruby
269
- df.groupby("a").max
269
+ df.group_by("a").max
270
270
  ```
271
271
 
272
272
  Multiple groups
273
273
 
274
274
  ```ruby
275
- df.groupby(["a", "b"]).count
275
+ df.group_by(["a", "b"]).count
276
276
  ```
277
277
 
278
278
  ## Combining Data Frames
@@ -359,7 +359,8 @@ Supported types are:
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
360
  - string - `Utf8`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
- - other - `Object`, `List`, `Struct`, `Array` [unreleased]
362
+ - nested - `List`, `Struct`, `Array`
363
+ - other - `Object`, `Null`
363
364
 
364
365
  Get column types
365
366
 
@@ -402,13 +403,13 @@ df.plot("a", "b", type: "pie")
402
403
  Group data
403
404
 
404
405
  ```ruby
405
- df.groupby("c").plot("a", "b")
406
+ df.group_by("c").plot("a", "b")
406
407
  ```
407
408
 
408
409
  Stacked columns or bars
409
410
 
410
411
  ```ruby
411
- df.groupby("c").plot("a", "b", stacked: true)
412
+ df.group_by("c").plot("a", "b", stacked: true)
412
413
  ```
413
414
 
414
415
  ## History
@@ -1,9 +1,10 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.6.0"
3
+ version = "0.8.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
7
+ rust-version = "1.73.0"
7
8
  publish = false
8
9
 
9
10
  [lib]
@@ -11,14 +12,17 @@ crate-type = ["cdylib"]
11
12
 
12
13
  [dependencies]
13
14
  ahash = "0.8"
14
- chrono = "=0.4.24"
15
- magnus = "0.5"
16
- polars-core = "0.31.1"
15
+ chrono = "0.4"
16
+ either = "1.8"
17
+ magnus = "0.6"
18
+ polars-core = "=0.36.2"
19
+ polars-parquet = "=0.36.2"
20
+ polars-utils = "=0.36.2"
17
21
  serde_json = "1"
18
22
  smartstring = "1"
19
23
 
20
24
  [dependencies.polars]
21
- version = "0.31.1"
25
+ version = "=0.36.2"
22
26
  features = [
23
27
  "abs",
24
28
  "approx_unique",
@@ -27,32 +31,35 @@ features = [
27
31
  "avro",
28
32
  "binary_encoding",
29
33
  "concat_str",
34
+ "cov",
30
35
  "cse",
31
36
  "csv",
32
37
  "cum_agg",
33
38
  "cumulative_eval",
39
+ "cutqcut",
34
40
  "dataframe_arithmetic",
35
41
  "date_offset",
36
42
  "diagonal_concat",
37
43
  "diff",
38
44
  "dot_product",
39
45
  "dtype-full",
40
- "dynamic_groupby",
46
+ "dynamic_group_by",
41
47
  "ewma",
42
48
  "extract_jsonpath",
43
49
  "fmt",
44
50
  "horizontal_concat",
45
51
  "interpolate",
46
52
  "ipc",
47
- "is_first",
53
+ "is_first_distinct",
48
54
  "is_in",
55
+ "is_last_distinct",
49
56
  "is_unique",
50
57
  "json",
51
58
  "lazy",
52
59
  "lazy_regex",
53
60
  "list_count",
54
61
  "list_eval",
55
- "list_take",
62
+ "list_gather",
56
63
  "list_to_struct",
57
64
  "log",
58
65
  "meta",
@@ -62,6 +69,7 @@ features = [
62
69
  "parquet",
63
70
  "partition_by",
64
71
  "pct_change",
72
+ "peaks",
65
73
  "performant",
66
74
  "pivot",
67
75
  "product",
@@ -71,6 +79,7 @@ features = [
71
79
  "range",
72
80
  "reinterpret",
73
81
  "repeat_by",
82
+ "rle",
74
83
  "rolling_window",
75
84
  "round_series",
76
85
  "row_hash",
@@ -78,9 +87,10 @@ features = [
78
87
  "semi_anti_join",
79
88
  "serde-lazy",
80
89
  "sign",
90
+ "sql",
81
91
  "string_encoding",
82
- "string_from_radix",
83
- "string_justify",
92
+ "string_pad",
93
+ "string_to_integer",
84
94
  "strings",
85
95
  "timezones",
86
96
  "to_dummies",
@@ -1,4 +1,4 @@
1
- use magnus::{RArray, Value};
1
+ use magnus::{prelude::*, RArray, Value};
2
2
  use polars::io::mmap::MmapBytesReader;
3
3
  use polars::io::RowCount;
4
4
  use polars::prelude::read_impl::OwnedBatchedCsvReader;
@@ -24,35 +24,34 @@ impl RbBatchedCsv {
24
24
  pub fn new(arguments: &[Value]) -> RbResult<Self> {
25
25
  // start arguments
26
26
  // this pattern is needed for more than 16
27
- let infer_schema_length: Option<usize> = arguments[0].try_convert()?;
28
- let chunk_size: usize = arguments[1].try_convert()?;
29
- let has_header: bool = arguments[2].try_convert()?;
30
- let ignore_errors: bool = arguments[3].try_convert()?;
31
- let n_rows: Option<usize> = arguments[4].try_convert()?;
32
- let skip_rows: usize = arguments[5].try_convert()?;
33
- let projection: Option<Vec<usize>> = arguments[6].try_convert()?;
34
- let sep: String = arguments[7].try_convert()?;
35
- let rechunk: bool = arguments[8].try_convert()?;
36
- let columns: Option<Vec<String>> = arguments[9].try_convert()?;
37
- let encoding: Wrap<CsvEncoding> = arguments[10].try_convert()?;
38
- let n_threads: Option<usize> = arguments[11].try_convert()?;
39
- let path: PathBuf = arguments[12].try_convert()?;
40
- let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[13].try_convert()?;
27
+ let infer_schema_length = Option::<usize>::try_convert(arguments[0])?;
28
+ let chunk_size = usize::try_convert(arguments[1])?;
29
+ let has_header = bool::try_convert(arguments[2])?;
30
+ let ignore_errors = bool::try_convert(arguments[3])?;
31
+ let n_rows = Option::<usize>::try_convert(arguments[4])?;
32
+ let skip_rows = usize::try_convert(arguments[5])?;
33
+ let projection = Option::<Vec<usize>>::try_convert(arguments[6])?;
34
+ let separator = String::try_convert(arguments[7])?;
35
+ let rechunk = bool::try_convert(arguments[8])?;
36
+ let columns = Option::<Vec<String>>::try_convert(arguments[9])?;
37
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[10])?;
38
+ let n_threads = Option::<usize>::try_convert(arguments[11])?;
39
+ let path = PathBuf::try_convert(arguments[12])?;
40
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[13])?;
41
41
  // TODO fix
42
- let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[14].try_convert()?;
43
- let low_memory: bool = arguments[15].try_convert()?;
44
- let comment_char: Option<String> = arguments[16].try_convert()?;
45
- let quote_char: Option<String> = arguments[17].try_convert()?;
46
- let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
47
- let try_parse_dates: bool = arguments[19].try_convert()?;
48
- let skip_rows_after_header: usize = arguments[20].try_convert()?;
49
- let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
50
- let sample_size: usize = arguments[22].try_convert()?;
51
- let eol_char: String = arguments[23].try_convert()?;
42
+ let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
43
+ let low_memory = bool::try_convert(arguments[15])?;
44
+ let comment_prefix = Option::<String>::try_convert(arguments[16])?;
45
+ let quote_char = Option::<String>::try_convert(arguments[17])?;
46
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
47
+ let try_parse_dates = bool::try_convert(arguments[19])?;
48
+ let skip_rows_after_header = usize::try_convert(arguments[20])?;
49
+ let row_count = Option::<(String, IdxSize)>::try_convert(arguments[21])?;
50
+ let sample_size = usize::try_convert(arguments[22])?;
51
+ let eol_char = String::try_convert(arguments[23])?;
52
52
  // end arguments
53
53
 
54
54
  let null_values = null_values.map(|w| w.0);
55
- let comment_char = comment_char.map(|s| s.as_bytes()[0]);
56
55
  let eol_char = eol_char.as_bytes()[0];
57
56
 
58
57
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
@@ -90,7 +89,7 @@ impl RbBatchedCsv {
90
89
  .infer_schema(infer_schema_length)
91
90
  .has_header(has_header)
92
91
  .with_n_rows(n_rows)
93
- .with_delimiter(sep.as_bytes()[0])
92
+ .with_separator(separator.as_bytes()[0])
94
93
  .with_skip_rows(skip_rows)
95
94
  .with_ignore_errors(ignore_errors)
96
95
  .with_projection(projection)
@@ -101,7 +100,7 @@ impl RbBatchedCsv {
101
100
  .with_n_threads(n_threads)
102
101
  .with_dtypes_slice(overwrite_dtype_slice.as_deref())
103
102
  .low_memory(low_memory)
104
- .with_comment_char(comment_char)
103
+ .with_comment_prefix(comment_prefix.as_deref())
105
104
  .with_null_values(null_values)
106
105
  .with_try_parse_dates(try_parse_dates)
107
106
  .with_quote_char(quote_char)