polars-df 0.6.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/Cargo.lock +597 -599
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +20 -10
  7. data/ext/polars/src/batched_csv.rs +27 -28
  8. data/ext/polars/src/conversion.rs +135 -106
  9. data/ext/polars/src/dataframe.rs +140 -131
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/categorical.rs +8 -1
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +129 -286
  15. data/ext/polars/src/expr/list.rs +17 -9
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +201 -0
  19. data/ext/polars/src/expr/string.rs +94 -67
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +66 -41
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +41 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +74 -60
  33. data/ext/polars/src/lib.rs +175 -91
  34. data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
  35. data/ext/polars/src/{apply → map}/mod.rs +5 -5
  36. data/ext/polars/src/{apply → map}/series.rs +18 -22
  37. data/ext/polars/src/object.rs +0 -30
  38. data/ext/polars/src/on_startup.rs +32 -0
  39. data/ext/polars/src/rb_modules.rs +22 -7
  40. data/ext/polars/src/series/aggregation.rs +3 -0
  41. data/ext/polars/src/series/construction.rs +5 -5
  42. data/ext/polars/src/series/export.rs +4 -4
  43. data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
  44. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
  45. data/ext/polars/src/sql.rs +46 -0
  46. data/ext/polars/src/utils.rs +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +182 -145
  49. data/lib/polars/data_types.rb +4 -1
  50. data/lib/polars/date_time_expr.rb +23 -28
  51. data/lib/polars/date_time_name_space.rb +17 -37
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +398 -110
  54. data/lib/polars/functions.rb +29 -37
  55. data/lib/polars/group_by.rb +38 -55
  56. data/lib/polars/io.rb +40 -5
  57. data/lib/polars/lazy_frame.rb +116 -89
  58. data/lib/polars/lazy_functions.rb +40 -68
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +12 -8
  61. data/lib/polars/list_name_space.rb +2 -2
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +2 -2
  64. data/lib/polars/series.rb +315 -43
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +114 -60
  67. data/lib/polars/string_name_space.rb +19 -4
  68. data/lib/polars/struct_expr.rb +1 -1
  69. data/lib/polars/struct_name_space.rb +1 -1
  70. data/lib/polars/utils.rb +25 -13
  71. data/lib/polars/version.rb +1 -1
  72. data/lib/polars.rb +3 -0
  73. metadata +23 -11
  74. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/Cargo.toml CHANGED
@@ -1,5 +1,6 @@
1
1
  [workspace]
2
2
  members = ["ext/polars"]
3
+ resolver = "2"
3
4
 
4
5
  [patch.crates-io]
5
6
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
data/README.md CHANGED
@@ -20,7 +20,7 @@ This library follows the [Polars Python API](https://pola-rs.github.io/polars/py
20
20
  Polars.read_csv("iris.csv")
21
21
  .lazy
22
22
  .filter(Polars.col("sepal_length") > 5)
23
- .groupby("species")
23
+ .group_by("species")
24
24
  .agg(Polars.all.sum)
25
25
  .collect
26
26
  ```
@@ -260,19 +260,19 @@ df["a"].var
260
260
  Group
261
261
 
262
262
  ```ruby
263
- df.groupby("a").count
263
+ df.group_by("a").count
264
264
  ```
265
265
 
266
266
  Works with all summary statistics
267
267
 
268
268
  ```ruby
269
- df.groupby("a").max
269
+ df.group_by("a").max
270
270
  ```
271
271
 
272
272
  Multiple groups
273
273
 
274
274
  ```ruby
275
- df.groupby(["a", "b"]).count
275
+ df.group_by(["a", "b"]).count
276
276
  ```
277
277
 
278
278
  ## Combining Data Frames
@@ -359,7 +359,8 @@ Supported types are:
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
360
  - string - `Utf8`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
- - other - `Object`, `List`, `Struct`, `Array` [unreleased]
362
+ - nested - `List`, `Struct`, `Array`
363
+ - other - `Object`, `Null`
363
364
 
364
365
  Get column types
365
366
 
@@ -402,13 +403,13 @@ df.plot("a", "b", type: "pie")
402
403
  Group data
403
404
 
404
405
  ```ruby
405
- df.groupby("c").plot("a", "b")
406
+ df.group_by("c").plot("a", "b")
406
407
  ```
407
408
 
408
409
  Stacked columns or bars
409
410
 
410
411
  ```ruby
411
- df.groupby("c").plot("a", "b", stacked: true)
412
+ df.group_by("c").plot("a", "b", stacked: true)
412
413
  ```
413
414
 
414
415
  ## History
@@ -1,9 +1,10 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.6.0"
3
+ version = "0.8.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
7
+ rust-version = "1.73.0"
7
8
  publish = false
8
9
 
9
10
  [lib]
@@ -11,14 +12,17 @@ crate-type = ["cdylib"]
11
12
 
12
13
  [dependencies]
13
14
  ahash = "0.8"
14
- chrono = "=0.4.24"
15
- magnus = "0.5"
16
- polars-core = "0.31.1"
15
+ chrono = "0.4"
16
+ either = "1.8"
17
+ magnus = "0.6"
18
+ polars-core = "=0.36.2"
19
+ polars-parquet = "=0.36.2"
20
+ polars-utils = "=0.36.2"
17
21
  serde_json = "1"
18
22
  smartstring = "1"
19
23
 
20
24
  [dependencies.polars]
21
- version = "0.31.1"
25
+ version = "=0.36.2"
22
26
  features = [
23
27
  "abs",
24
28
  "approx_unique",
@@ -27,32 +31,35 @@ features = [
27
31
  "avro",
28
32
  "binary_encoding",
29
33
  "concat_str",
34
+ "cov",
30
35
  "cse",
31
36
  "csv",
32
37
  "cum_agg",
33
38
  "cumulative_eval",
39
+ "cutqcut",
34
40
  "dataframe_arithmetic",
35
41
  "date_offset",
36
42
  "diagonal_concat",
37
43
  "diff",
38
44
  "dot_product",
39
45
  "dtype-full",
40
- "dynamic_groupby",
46
+ "dynamic_group_by",
41
47
  "ewma",
42
48
  "extract_jsonpath",
43
49
  "fmt",
44
50
  "horizontal_concat",
45
51
  "interpolate",
46
52
  "ipc",
47
- "is_first",
53
+ "is_first_distinct",
48
54
  "is_in",
55
+ "is_last_distinct",
49
56
  "is_unique",
50
57
  "json",
51
58
  "lazy",
52
59
  "lazy_regex",
53
60
  "list_count",
54
61
  "list_eval",
55
- "list_take",
62
+ "list_gather",
56
63
  "list_to_struct",
57
64
  "log",
58
65
  "meta",
@@ -62,6 +69,7 @@ features = [
62
69
  "parquet",
63
70
  "partition_by",
64
71
  "pct_change",
72
+ "peaks",
65
73
  "performant",
66
74
  "pivot",
67
75
  "product",
@@ -71,6 +79,7 @@ features = [
71
79
  "range",
72
80
  "reinterpret",
73
81
  "repeat_by",
82
+ "rle",
74
83
  "rolling_window",
75
84
  "round_series",
76
85
  "row_hash",
@@ -78,9 +87,10 @@ features = [
78
87
  "semi_anti_join",
79
88
  "serde-lazy",
80
89
  "sign",
90
+ "sql",
81
91
  "string_encoding",
82
- "string_from_radix",
83
- "string_justify",
92
+ "string_pad",
93
+ "string_to_integer",
84
94
  "strings",
85
95
  "timezones",
86
96
  "to_dummies",
@@ -1,4 +1,4 @@
1
- use magnus::{RArray, Value};
1
+ use magnus::{prelude::*, RArray, Value};
2
2
  use polars::io::mmap::MmapBytesReader;
3
3
  use polars::io::RowCount;
4
4
  use polars::prelude::read_impl::OwnedBatchedCsvReader;
@@ -24,35 +24,34 @@ impl RbBatchedCsv {
24
24
  pub fn new(arguments: &[Value]) -> RbResult<Self> {
25
25
  // start arguments
26
26
  // this pattern is needed for more than 16
27
- let infer_schema_length: Option<usize> = arguments[0].try_convert()?;
28
- let chunk_size: usize = arguments[1].try_convert()?;
29
- let has_header: bool = arguments[2].try_convert()?;
30
- let ignore_errors: bool = arguments[3].try_convert()?;
31
- let n_rows: Option<usize> = arguments[4].try_convert()?;
32
- let skip_rows: usize = arguments[5].try_convert()?;
33
- let projection: Option<Vec<usize>> = arguments[6].try_convert()?;
34
- let sep: String = arguments[7].try_convert()?;
35
- let rechunk: bool = arguments[8].try_convert()?;
36
- let columns: Option<Vec<String>> = arguments[9].try_convert()?;
37
- let encoding: Wrap<CsvEncoding> = arguments[10].try_convert()?;
38
- let n_threads: Option<usize> = arguments[11].try_convert()?;
39
- let path: PathBuf = arguments[12].try_convert()?;
40
- let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[13].try_convert()?;
27
+ let infer_schema_length = Option::<usize>::try_convert(arguments[0])?;
28
+ let chunk_size = usize::try_convert(arguments[1])?;
29
+ let has_header = bool::try_convert(arguments[2])?;
30
+ let ignore_errors = bool::try_convert(arguments[3])?;
31
+ let n_rows = Option::<usize>::try_convert(arguments[4])?;
32
+ let skip_rows = usize::try_convert(arguments[5])?;
33
+ let projection = Option::<Vec<usize>>::try_convert(arguments[6])?;
34
+ let separator = String::try_convert(arguments[7])?;
35
+ let rechunk = bool::try_convert(arguments[8])?;
36
+ let columns = Option::<Vec<String>>::try_convert(arguments[9])?;
37
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[10])?;
38
+ let n_threads = Option::<usize>::try_convert(arguments[11])?;
39
+ let path = PathBuf::try_convert(arguments[12])?;
40
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[13])?;
41
41
  // TODO fix
42
- let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[14].try_convert()?;
43
- let low_memory: bool = arguments[15].try_convert()?;
44
- let comment_char: Option<String> = arguments[16].try_convert()?;
45
- let quote_char: Option<String> = arguments[17].try_convert()?;
46
- let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
47
- let try_parse_dates: bool = arguments[19].try_convert()?;
48
- let skip_rows_after_header: usize = arguments[20].try_convert()?;
49
- let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
50
- let sample_size: usize = arguments[22].try_convert()?;
51
- let eol_char: String = arguments[23].try_convert()?;
42
+ let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
43
+ let low_memory = bool::try_convert(arguments[15])?;
44
+ let comment_prefix = Option::<String>::try_convert(arguments[16])?;
45
+ let quote_char = Option::<String>::try_convert(arguments[17])?;
46
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
47
+ let try_parse_dates = bool::try_convert(arguments[19])?;
48
+ let skip_rows_after_header = usize::try_convert(arguments[20])?;
49
+ let row_count = Option::<(String, IdxSize)>::try_convert(arguments[21])?;
50
+ let sample_size = usize::try_convert(arguments[22])?;
51
+ let eol_char = String::try_convert(arguments[23])?;
52
52
  // end arguments
53
53
 
54
54
  let null_values = null_values.map(|w| w.0);
55
- let comment_char = comment_char.map(|s| s.as_bytes()[0]);
56
55
  let eol_char = eol_char.as_bytes()[0];
57
56
 
58
57
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
@@ -90,7 +89,7 @@ impl RbBatchedCsv {
90
89
  .infer_schema(infer_schema_length)
91
90
  .has_header(has_header)
92
91
  .with_n_rows(n_rows)
93
- .with_delimiter(sep.as_bytes()[0])
92
+ .with_separator(separator.as_bytes()[0])
94
93
  .with_skip_rows(skip_rows)
95
94
  .with_ignore_errors(ignore_errors)
96
95
  .with_projection(projection)
@@ -101,7 +100,7 @@ impl RbBatchedCsv {
101
100
  .with_n_threads(n_threads)
102
101
  .with_dtypes_slice(overwrite_dtype_slice.as_deref())
103
102
  .low_memory(low_memory)
104
- .with_comment_char(comment_char)
103
+ .with_comment_prefix(comment_prefix.as_deref())
105
104
  .with_null_values(null_values)
106
105
  .with_try_parse_dates(try_parse_dates)
107
106
  .with_quote_char(quote_char)