xcsv 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d9a8d5c371cffac1168bab3828f1e7f3853a859d5c4eef6ef8cda2e45aee2ed5
4
- data.tar.gz: a03cd965c98b332e6c4b3dff1181d1b1bb959852958239f5da2389d12fe473db
3
+ metadata.gz: 4ff9165c5564192166cd4dba6c141e7cbec377d7e89b1e83aefeeed16a5f14e1
4
+ data.tar.gz: a8d607ec64710a73488358e07481f99a62d12f2fb09fe9747b44893ebc6c6860
5
5
  SHA512:
6
- metadata.gz: b8069138d85fea38fa096badd6055a37bfc3cc30dcf63d39240918fb65b282506352e1c95b8ae56e2dcb537399a0f9855cf84850680f978c1402e0baa61123c6
7
- data.tar.gz: 5eb46dc0ddef820e462aa2bc0a2f4c8018517426238ecf8d33def906e1ff05e760edf2760be7fd73ee9b752a3c082291bfedffadc59215c29f591f10f7d66ff7
6
+ metadata.gz: 78a1b77ae1f099e37966c93fca1669feb1c02bdb0f8e760bd1b8db36b9484eb3180041f2b704bf7dd5ba948bf6f25683b7924c27bb358911a8bf534463711a93
7
+ data.tar.gz: f3bb8a94f289faf706a65576f7d51b36f2c13504c76dd874813e34ee1572217ab3f5c9be2ba8db9e7381108dfdad95734d93960632dd08c6f4a82f1d39dce5ca
data/Cargo.lock CHANGED
@@ -1,8 +1,39 @@
1
+ [[package]]
2
+ name = "adler32"
3
+ version = "1.0.3"
4
+ source = "registry+https://github.com/rust-lang/crates.io-index"
5
+
6
+ [[package]]
7
+ name = "aho-corasick"
8
+ version = "0.6.9"
9
+ source = "registry+https://github.com/rust-lang/crates.io-index"
10
+ dependencies = [
11
+ "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "build_const"
16
+ version = "0.2.1"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+
19
+ [[package]]
20
+ name = "cc"
21
+ version = "1.0.25"
22
+ source = "registry+https://github.com/rust-lang/crates.io-index"
23
+
1
24
  [[package]]
2
25
  name = "cfg-if"
3
26
  version = "0.1.6"
4
27
  source = "registry+https://github.com/rust-lang/crates.io-index"
5
28
 
29
+ [[package]]
30
+ name = "crc"
31
+ version = "1.8.1"
32
+ source = "registry+https://github.com/rust-lang/crates.io-index"
33
+ dependencies = [
34
+ "build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
35
+ ]
36
+
6
37
  [[package]]
7
38
  name = "cstr-macro"
8
39
  version = "0.1.0"
@@ -25,6 +56,16 @@ dependencies = [
25
56
  "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
26
57
  ]
27
58
 
59
+ [[package]]
60
+ name = "flate2"
61
+ version = "1.0.4"
62
+ source = "registry+https://github.com/rust-lang/crates.io-index"
63
+ dependencies = [
64
+ "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
65
+ "miniz-sys 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
66
+ "miniz_oxide_c_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
67
+ ]
68
+
28
69
  [[package]]
29
70
  name = "helix"
30
71
  version = "0.7.5"
@@ -35,6 +76,11 @@ dependencies = [
35
76
  "libcruby-sys 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)",
36
77
  ]
37
78
 
79
+ [[package]]
80
+ name = "lazy_static"
81
+ version = "1.2.0"
82
+ source = "registry+https://github.com/rust-lang/crates.io-index"
83
+
38
84
  [[package]]
39
85
  name = "libc"
40
86
  version = "0.2.43"
@@ -58,11 +104,77 @@ dependencies = [
58
104
  "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
59
105
  ]
60
106
 
107
+ [[package]]
108
+ name = "miniz-sys"
109
+ version = "0.1.11"
110
+ source = "registry+https://github.com/rust-lang/crates.io-index"
111
+ dependencies = [
112
+ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
113
+ "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
114
+ ]
115
+
116
+ [[package]]
117
+ name = "miniz_oxide"
118
+ version = "0.2.0"
119
+ source = "registry+https://github.com/rust-lang/crates.io-index"
120
+ dependencies = [
121
+ "adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
122
+ ]
123
+
124
+ [[package]]
125
+ name = "miniz_oxide_c_api"
126
+ version = "0.2.0"
127
+ source = "registry+https://github.com/rust-lang/crates.io-index"
128
+ dependencies = [
129
+ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
130
+ "crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
131
+ "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
132
+ "miniz_oxide 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
133
+ ]
134
+
135
+ [[package]]
136
+ name = "regex"
137
+ version = "1.0.6"
138
+ source = "registry+https://github.com/rust-lang/crates.io-index"
139
+ dependencies = [
140
+ "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
141
+ "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
142
+ "regex-syntax 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
143
+ "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
144
+ "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
145
+ ]
146
+
147
+ [[package]]
148
+ name = "regex-syntax"
149
+ version = "0.6.3"
150
+ source = "registry+https://github.com/rust-lang/crates.io-index"
151
+ dependencies = [
152
+ "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
153
+ ]
154
+
61
155
  [[package]]
62
156
  name = "serde"
63
157
  version = "1.0.80"
64
158
  source = "registry+https://github.com/rust-lang/crates.io-index"
65
159
 
160
+ [[package]]
161
+ name = "thread_local"
162
+ version = "0.3.6"
163
+ source = "registry+https://github.com/rust-lang/crates.io-index"
164
+ dependencies = [
165
+ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
166
+ ]
167
+
168
+ [[package]]
169
+ name = "ucd-util"
170
+ version = "0.1.3"
171
+ source = "registry+https://github.com/rust-lang/crates.io-index"
172
+
173
+ [[package]]
174
+ name = "utf8-ranges"
175
+ version = "1.0.2"
176
+ source = "registry+https://github.com/rust-lang/crates.io-index"
177
+
66
178
  [[package]]
67
179
  name = "version_check"
68
180
  version = "0.1.5"
@@ -73,17 +185,34 @@ name = "xcsv"
73
185
  version = "0.1.0"
74
186
  dependencies = [
75
187
  "csv 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
188
+ "flate2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
76
189
  "helix 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)",
190
+ "regex 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
77
191
  ]
78
192
 
79
193
  [metadata]
194
+ "checksum adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7e522997b529f05601e05166c07ed17789691f562762c7f3b987263d2dedee5c"
195
+ "checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e"
196
+ "checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39"
197
+ "checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
80
198
  "checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
199
+ "checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb"
81
200
  "checksum cstr-macro 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db53fddba18cdd35477a7213a3ef6acfbfa333c31b42ce019e544c4a1420a06f"
82
201
  "checksum csv 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6d54f6b0fd69128a2894b1a3e57af5849a0963c1cc77b165d30b896e40296452"
83
202
  "checksum csv-core 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4dd8e6d86f7ba48b4276ef1317edc8cc36167546d8972feb4a2b5fec0b374105"
203
+ "checksum flate2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3b0c7353385f92079524de3b7116cf99d73947c08a7472774e9b3b04bff3b901"
84
204
  "checksum helix 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "49a017e3e798ad9386e0a0584e66fd6c04a80ccc1242eb8f689c62ce6f408240"
205
+ "checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
85
206
  "checksum libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)" = "76e3a3ef172f1a0b9a9ff0dd1491ae5e6c948b94479a3021819ba7d860c8645d"
86
207
  "checksum libcruby-sys 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fef6028cdce0c8d55676fd1d66bb810facef8cade0dd71d28511d375e84da4c0"
87
208
  "checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16"
209
+ "checksum miniz-sys 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "0300eafb20369952951699b68243ab4334f4b10a88f411c221d444b36c40e649"
210
+ "checksum miniz_oxide 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5ad30a47319c16cde58d0314f5d98202a80c9083b5f61178457403dfb14e509c"
211
+ "checksum miniz_oxide_c_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "28edaef377517fd9fe3e085c37d892ce7acd1fbeab9239c5a36eec352d8a8b7e"
212
+ "checksum regex 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ee84f70c8c08744ea9641a731c7fadb475bf2ecc52d7f627feb833e0b3990467"
213
+ "checksum regex-syntax 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fbc557aac2b708fe84121caf261346cc2eed71978024337e42eb46b8a252ac6e"
88
214
  "checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef"
215
+ "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
216
+ "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
217
+ "checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
89
218
  "checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
data/Cargo.toml CHANGED
@@ -9,3 +9,5 @@ crate-type = ["cdylib"]
9
9
  [dependencies]
10
10
  helix = "0.7.5"
11
11
  csv = "1.0.2"
12
+ regex = "1.0.6"
13
+ flate2 = "1.0.4"
data/README.md CHANGED
@@ -1,21 +1,81 @@
1
1
  Fast CSV reader based on [Rust CSV crate](https://docs.rs/csv/1.0.2/csv/)
2
2
 
3
- ---
3
+ ### Installation
4
4
 
5
5
  1. Install [Rust](https://www.rust-lang.org/)
6
6
 
7
- `curl https://sh.rustup.rs -sSf | sh`
7
+ `curl https://sh.rustup.rs -sSf | sh`
8
8
 
9
- Don't miss this message:
9
+ Don't miss this message:
10
10
 
11
+ ```
12
+ Rust is installed now. Great!
13
+
14
+ To get started you need Cargo's bin directory ($HOME/.cargo/bin) in your PATH
15
+ environment variable. Next time you log in this will be done automatically.
16
+
17
+ To configure your current shell run source $HOME/.cargo/env
18
+
19
+ ```
20
+
21
+ 2. `gem install xcsv`
22
+
23
+ ### Usage
24
+
25
+ ```ruby
26
+ require 'xcsv'
27
+
28
+ # Enumerable
29
+ csv_reader = XSV.new("foo.csv")
30
+ csv_reader.each do |rec|
31
+ rec #=> [col1, col2, col3, ...]
32
+ end
33
+
34
+ csv_reader = XSV.new("foo.csv")
35
+ csv_reader.take(10).to_a #=> [[col1, ...], [col1, ...], ...]
36
+
37
+ # While loop
38
+ csv_reader = XSV.new("bar.csv")
39
+ while (rec = csv_reader.next) do
40
+ rec #=> [col1, col2, col3, ...]
41
+ end
42
+
43
+ # Both forms will gunzip if file name ends with .gz
44
+ csv_reader = XSV.new("foo_bar.csv.gz")
45
+ while (rec = csv_reader.next) do
46
+ rec #=> [col1, col2, col3, ...]
47
+ end
11
48
  ```
12
- Rust is installed now. Great!
13
49
 
14
- To get started you need Cargo's bin directory ($HOME/.cargo/bin) in your PATH
15
- environment variable. Next time you log in this will be done automatically.
50
+ ### Benchmarks
51
+
52
+ #### Code
53
+
54
+ ```ruby
55
+ # FastestCSV
56
+ FastestCSV.foreach('sample.csv') do |rec|
57
+ end
16
58
 
17
- To configure your current shell run source $HOME/.cargo/env
59
+ # XCSV
60
+ csv_reader = XCSV.new('sample.csv')
61
+ while (l = csv_reader.next) do
62
+ end
18
63
 
64
+ # CSV
65
+ CSV.foreach('sample.csv') do |rec|
66
+ end
19
67
  ```
20
68
 
21
- 2. `bundle exec rake`
69
+ #### Parameters
70
+
71
+ |Records|File size|CPU|
72
+ |---|---|---|
73
+ |1M|742M (426M .gz)|i7-6600U @ 2.60GHz|
74
+
75
+ #### Elapsed time (secs):
76
+
77
+ |FastestCSV|XCSV|XCSV (.gz)|CSV|
78
+ |---|---|---|---|
79
+ |10.1|12.4|20.7|50.5|
80
+
81
+ **Note**: FastestCSV doesn't decode embedded newlines
data/lib/xcsv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class XCSV
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/xcsv.rb CHANGED
@@ -1,2 +1,12 @@
1
1
  require 'helix_runtime'
2
2
  require 'xcsv/native'
3
+
4
+ class XCSV
5
+ include Enumerable
6
+
7
+ def each
8
+ while (r = self.next) do
9
+ yield r
10
+ end
11
+ end
12
+ end
data/src/lib.rs CHANGED
@@ -1,19 +1,25 @@
1
1
  extern crate csv;
2
+ extern crate flate2;
3
+ extern crate regex;
2
4
 
3
5
  #[macro_use]
4
6
  extern crate helix;
5
7
 
6
8
  use std::fs::File;
7
- use std::io::BufReader;
9
+ use std::io::{BufReader, Read};
8
10
  use std::ops::{Deref, DerefMut};
9
11
 
12
+ use flate2::read::GzDecoder;
13
+ use regex::Regex;
14
+
10
15
  use helix::{FromRuby, CheckResult};
11
16
  use helix::sys::{VALUE};
12
17
 
13
18
  type CSVIterType = Iterator<Item=Result<csv::StringRecord, csv::Error>>;
14
19
 
15
20
  struct CSVIter {
16
- iter: Box<CSVIterType>
21
+ iter: Box<CSVIterType>,
22
+ path: String,
17
23
  }
18
24
 
19
25
  impl Deref for CSVIter {
@@ -32,7 +38,7 @@ impl DerefMut for CSVIter {
32
38
 
33
39
  impl std::fmt::Debug for CSVIter {
34
40
  fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
35
- write!(f, "CSVIter")
41
+ write!(f, "CSVIter: {}", self.path)
36
42
  }
37
43
  }
38
44
 
@@ -48,22 +54,29 @@ impl FromRuby for CSVIter {
48
54
  fn from_ruby(value: VALUE) -> CheckResult<CSVIter> {
49
55
  let checked_path = String::from_ruby(value)?;
50
56
 
51
- let csv_reader =
52
- match File::open(String::from_checked(checked_path)) {
53
- Ok(f) =>
54
- BufReader::new(f),
55
- Err(e) =>
56
- raise!(format!("Error while opening file: {}", e)),
57
+ let path = String::from_checked(checked_path);
58
+
59
+ let gz_regex = Regex::new("\\.gz\\z").unwrap();
60
+
61
+ let buf_reader =
62
+ match File::open(path.clone()) {
63
+ Ok(f) => BufReader::new(f),
64
+ Err(e) => raise!(format!("Error while opening file: {}", e)),
65
+ };
66
+
67
+ let gz_reader: Box<Read> =
68
+ if gz_regex.is_match(&path) {
69
+ Box::new(GzDecoder::new(buf_reader))
70
+ } else {
71
+ Box::new(buf_reader)
57
72
  };
58
73
 
59
74
  let csv_reader =
60
75
  csv::ReaderBuilder::new()
61
76
  .has_headers(false)
62
- .from_reader(csv_reader);
63
-
64
- let records = csv_reader.into_records();
77
+ .from_reader(gz_reader);
65
78
 
66
- Ok(CSVIter{iter: Box::new(records)})
79
+ Ok(CSVIter{iter: Box::new(csv_reader.into_records()), path: path})
67
80
  }
68
81
 
69
82
  fn from_checked(checked: CSVIter) -> CSVIter {
@@ -81,7 +94,7 @@ ruby! {
81
94
  XCSV { helix, iter }
82
95
  }
83
96
 
84
- def next_line(&mut self) -> Result<Option<Vec<String>>, helix::Error> {
97
+ def next(&mut self) -> Result<Option<Vec<String>>, helix::Error> {
85
98
  match self.iter.next() {
86
99
  Some(Ok(record)) =>
87
100
  Ok(Some(record.iter().map(|s| s.to_string()).collect())),
data/xcsv.gemspec CHANGED
@@ -22,6 +22,8 @@ Gem::Specification.new do |spec|
22
22
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
23
23
  spec.require_paths = ["lib"]
24
24
 
25
+ spec.required_ruby_version = '>= 2.0'
26
+
25
27
  spec.add_runtime_dependency "helix_runtime", "= 0.7.5"
26
28
  spec.add_development_dependency "bundler", "~> 1.17"
27
29
  spec.add_development_dependency "rake", "~> 12.3"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xcsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Moroz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-11-16 00:00:00.000000000 Z
11
+ date: 2018-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: helix_runtime
@@ -98,7 +98,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
98
98
  requirements:
99
99
  - - ">="
100
100
  - !ruby/object:Gem::Version
101
- version: '0'
101
+ version: '2.0'
102
102
  required_rubygems_version: !ruby/object:Gem::Requirement
103
103
  requirements:
104
104
  - - ">="