xcsv 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d9a8d5c371cffac1168bab3828f1e7f3853a859d5c4eef6ef8cda2e45aee2ed5
4
- data.tar.gz: a03cd965c98b332e6c4b3dff1181d1b1bb959852958239f5da2389d12fe473db
3
+ metadata.gz: 4ff9165c5564192166cd4dba6c141e7cbec377d7e89b1e83aefeeed16a5f14e1
4
+ data.tar.gz: a8d607ec64710a73488358e07481f99a62d12f2fb09fe9747b44893ebc6c6860
5
5
  SHA512:
6
- metadata.gz: b8069138d85fea38fa096badd6055a37bfc3cc30dcf63d39240918fb65b282506352e1c95b8ae56e2dcb537399a0f9855cf84850680f978c1402e0baa61123c6
7
- data.tar.gz: 5eb46dc0ddef820e462aa2bc0a2f4c8018517426238ecf8d33def906e1ff05e760edf2760be7fd73ee9b752a3c082291bfedffadc59215c29f591f10f7d66ff7
6
+ metadata.gz: 78a1b77ae1f099e37966c93fca1669feb1c02bdb0f8e760bd1b8db36b9484eb3180041f2b704bf7dd5ba948bf6f25683b7924c27bb358911a8bf534463711a93
7
+ data.tar.gz: f3bb8a94f289faf706a65576f7d51b36f2c13504c76dd874813e34ee1572217ab3f5c9be2ba8db9e7381108dfdad95734d93960632dd08c6f4a82f1d39dce5ca
data/Cargo.lock CHANGED
@@ -1,8 +1,39 @@
1
+ [[package]]
2
+ name = "adler32"
3
+ version = "1.0.3"
4
+ source = "registry+https://github.com/rust-lang/crates.io-index"
5
+
6
+ [[package]]
7
+ name = "aho-corasick"
8
+ version = "0.6.9"
9
+ source = "registry+https://github.com/rust-lang/crates.io-index"
10
+ dependencies = [
11
+ "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "build_const"
16
+ version = "0.2.1"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+
19
+ [[package]]
20
+ name = "cc"
21
+ version = "1.0.25"
22
+ source = "registry+https://github.com/rust-lang/crates.io-index"
23
+
1
24
  [[package]]
2
25
  name = "cfg-if"
3
26
  version = "0.1.6"
4
27
  source = "registry+https://github.com/rust-lang/crates.io-index"
5
28
 
29
+ [[package]]
30
+ name = "crc"
31
+ version = "1.8.1"
32
+ source = "registry+https://github.com/rust-lang/crates.io-index"
33
+ dependencies = [
34
+ "build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
35
+ ]
36
+
6
37
  [[package]]
7
38
  name = "cstr-macro"
8
39
  version = "0.1.0"
@@ -25,6 +56,16 @@ dependencies = [
25
56
  "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
26
57
  ]
27
58
 
59
+ [[package]]
60
+ name = "flate2"
61
+ version = "1.0.4"
62
+ source = "registry+https://github.com/rust-lang/crates.io-index"
63
+ dependencies = [
64
+ "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
65
+ "miniz-sys 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
66
+ "miniz_oxide_c_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
67
+ ]
68
+
28
69
  [[package]]
29
70
  name = "helix"
30
71
  version = "0.7.5"
@@ -35,6 +76,11 @@ dependencies = [
35
76
  "libcruby-sys 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)",
36
77
  ]
37
78
 
79
+ [[package]]
80
+ name = "lazy_static"
81
+ version = "1.2.0"
82
+ source = "registry+https://github.com/rust-lang/crates.io-index"
83
+
38
84
  [[package]]
39
85
  name = "libc"
40
86
  version = "0.2.43"
@@ -58,11 +104,77 @@ dependencies = [
58
104
  "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
59
105
  ]
60
106
 
107
+ [[package]]
108
+ name = "miniz-sys"
109
+ version = "0.1.11"
110
+ source = "registry+https://github.com/rust-lang/crates.io-index"
111
+ dependencies = [
112
+ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
113
+ "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
114
+ ]
115
+
116
+ [[package]]
117
+ name = "miniz_oxide"
118
+ version = "0.2.0"
119
+ source = "registry+https://github.com/rust-lang/crates.io-index"
120
+ dependencies = [
121
+ "adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
122
+ ]
123
+
124
+ [[package]]
125
+ name = "miniz_oxide_c_api"
126
+ version = "0.2.0"
127
+ source = "registry+https://github.com/rust-lang/crates.io-index"
128
+ dependencies = [
129
+ "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
130
+ "crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
131
+ "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
132
+ "miniz_oxide 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
133
+ ]
134
+
135
+ [[package]]
136
+ name = "regex"
137
+ version = "1.0.6"
138
+ source = "registry+https://github.com/rust-lang/crates.io-index"
139
+ dependencies = [
140
+ "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
141
+ "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
142
+ "regex-syntax 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
143
+ "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
144
+ "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
145
+ ]
146
+
147
+ [[package]]
148
+ name = "regex-syntax"
149
+ version = "0.6.3"
150
+ source = "registry+https://github.com/rust-lang/crates.io-index"
151
+ dependencies = [
152
+ "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
153
+ ]
154
+
61
155
  [[package]]
62
156
  name = "serde"
63
157
  version = "1.0.80"
64
158
  source = "registry+https://github.com/rust-lang/crates.io-index"
65
159
 
160
+ [[package]]
161
+ name = "thread_local"
162
+ version = "0.3.6"
163
+ source = "registry+https://github.com/rust-lang/crates.io-index"
164
+ dependencies = [
165
+ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
166
+ ]
167
+
168
+ [[package]]
169
+ name = "ucd-util"
170
+ version = "0.1.3"
171
+ source = "registry+https://github.com/rust-lang/crates.io-index"
172
+
173
+ [[package]]
174
+ name = "utf8-ranges"
175
+ version = "1.0.2"
176
+ source = "registry+https://github.com/rust-lang/crates.io-index"
177
+
66
178
  [[package]]
67
179
  name = "version_check"
68
180
  version = "0.1.5"
@@ -73,17 +185,34 @@ name = "xcsv"
73
185
  version = "0.1.0"
74
186
  dependencies = [
75
187
  "csv 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
188
+ "flate2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
76
189
  "helix 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)",
190
+ "regex 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
77
191
  ]
78
192
 
79
193
  [metadata]
194
+ "checksum adler32 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7e522997b529f05601e05166c07ed17789691f562762c7f3b987263d2dedee5c"
195
+ "checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e"
196
+ "checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39"
197
+ "checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
80
198
  "checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
199
+ "checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb"
81
200
  "checksum cstr-macro 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db53fddba18cdd35477a7213a3ef6acfbfa333c31b42ce019e544c4a1420a06f"
82
201
  "checksum csv 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6d54f6b0fd69128a2894b1a3e57af5849a0963c1cc77b165d30b896e40296452"
83
202
  "checksum csv-core 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4dd8e6d86f7ba48b4276ef1317edc8cc36167546d8972feb4a2b5fec0b374105"
203
+ "checksum flate2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3b0c7353385f92079524de3b7116cf99d73947c08a7472774e9b3b04bff3b901"
84
204
  "checksum helix 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "49a017e3e798ad9386e0a0584e66fd6c04a80ccc1242eb8f689c62ce6f408240"
205
+ "checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
85
206
  "checksum libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)" = "76e3a3ef172f1a0b9a9ff0dd1491ae5e6c948b94479a3021819ba7d860c8645d"
86
207
  "checksum libcruby-sys 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fef6028cdce0c8d55676fd1d66bb810facef8cade0dd71d28511d375e84da4c0"
87
208
  "checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16"
209
+ "checksum miniz-sys 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "0300eafb20369952951699b68243ab4334f4b10a88f411c221d444b36c40e649"
210
+ "checksum miniz_oxide 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5ad30a47319c16cde58d0314f5d98202a80c9083b5f61178457403dfb14e509c"
211
+ "checksum miniz_oxide_c_api 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "28edaef377517fd9fe3e085c37d892ce7acd1fbeab9239c5a36eec352d8a8b7e"
212
+ "checksum regex 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ee84f70c8c08744ea9641a731c7fadb475bf2ecc52d7f627feb833e0b3990467"
213
+ "checksum regex-syntax 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fbc557aac2b708fe84121caf261346cc2eed71978024337e42eb46b8a252ac6e"
88
214
  "checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef"
215
+ "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
216
+ "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
217
+ "checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
89
218
  "checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
data/Cargo.toml CHANGED
@@ -9,3 +9,5 @@ crate-type = ["cdylib"]
9
9
  [dependencies]
10
10
  helix = "0.7.5"
11
11
  csv = "1.0.2"
12
+ regex = "1.0.6"
13
+ flate2 = "1.0.4"
data/README.md CHANGED
@@ -1,21 +1,81 @@
1
1
  Fast CSV reader based on [Rust CSV crate](https://docs.rs/csv/1.0.2/csv/)
2
2
 
3
- ---
3
+ ### Installation
4
4
 
5
5
  1. Install [Rust](https://www.rust-lang.org/)
6
6
 
7
- `curl https://sh.rustup.rs -sSf | sh`
7
+ `curl https://sh.rustup.rs -sSf | sh`
8
8
 
9
- Don't miss this message:
9
+ Don't miss this message:
10
10
 
11
+ ```
12
+ Rust is installed now. Great!
13
+
14
+ To get started you need Cargo's bin directory ($HOME/.cargo/bin) in your PATH
15
+ environment variable. Next time you log in this will be done automatically.
16
+
17
+ To configure your current shell run source $HOME/.cargo/env
18
+
19
+ ```
20
+
21
+ 2. `gem install xcsv`
22
+
23
+ ### Usage
24
+
25
+ ```ruby
26
+ require 'xcsv'
27
+
28
+ # Enumerable
29
+ csv_reader = XSV.new("foo.csv")
30
+ csv_reader.each do |rec|
31
+ rec #=> [col1, col2, col3, ...]
32
+ end
33
+
34
+ csv_reader = XSV.new("foo.csv")
35
+ csv_reader.take(10).to_a #=> [[col1, ...], [col1, ...], ...]
36
+
37
+ # While loop
38
+ csv_reader = XSV.new("bar.csv")
39
+ while (rec = csv_reader.next) do
40
+ rec #=> [col1, col2, col3, ...]
41
+ end
42
+
43
+ # Both forms will gunzip if file name ends with .gz
44
+ csv_reader = XSV.new("foo_bar.csv.gz")
45
+ while (rec = csv_reader.next) do
46
+ rec #=> [col1, col2, col3, ...]
47
+ end
11
48
  ```
12
- Rust is installed now. Great!
13
49
 
14
- To get started you need Cargo's bin directory ($HOME/.cargo/bin) in your PATH
15
- environment variable. Next time you log in this will be done automatically.
50
+ ### Benchmarks
51
+
52
+ #### Code
53
+
54
+ ```ruby
55
+ # FastestCSV
56
+ FastestCSV.foreach('sample.csv') do |rec|
57
+ end
16
58
 
17
- To configure your current shell run source $HOME/.cargo/env
59
+ # XCSV
60
+ csv_reader = XCSV.new('sample.csv')
61
+ while (l = csv_reader.next) do
62
+ end
18
63
 
64
+ # CSV
65
+ CSV.foreach('sample.csv') do |rec|
66
+ end
19
67
  ```
20
68
 
21
- 2. `bundle exec rake`
69
+ #### Parameters
70
+
71
+ |Records|File size|CPU|
72
+ |---|---|---|
73
+ |1M|742M (426M .gz)|i7-6600U @ 2.60GHz|
74
+
75
+ #### Elapsed time (secs):
76
+
77
+ |FastestCSV|XCSV|XCSV (.gz)|CSV|
78
+ |---|---|---|---|
79
+ |10.1|12.4|20.7|50.5|
80
+
81
+ **Note**: FastestCSV doesn't decode embedded newlines
data/lib/xcsv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class XCSV
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/xcsv.rb CHANGED
@@ -1,2 +1,12 @@
1
1
  require 'helix_runtime'
2
2
  require 'xcsv/native'
3
+
4
+ class XCSV
5
+ include Enumerable
6
+
7
+ def each
8
+ while (r = self.next) do
9
+ yield r
10
+ end
11
+ end
12
+ end
data/src/lib.rs CHANGED
@@ -1,19 +1,25 @@
1
1
  extern crate csv;
2
+ extern crate flate2;
3
+ extern crate regex;
2
4
 
3
5
  #[macro_use]
4
6
  extern crate helix;
5
7
 
6
8
  use std::fs::File;
7
- use std::io::BufReader;
9
+ use std::io::{BufReader, Read};
8
10
  use std::ops::{Deref, DerefMut};
9
11
 
12
+ use flate2::read::GzDecoder;
13
+ use regex::Regex;
14
+
10
15
  use helix::{FromRuby, CheckResult};
11
16
  use helix::sys::{VALUE};
12
17
 
13
18
  type CSVIterType = Iterator<Item=Result<csv::StringRecord, csv::Error>>;
14
19
 
15
20
  struct CSVIter {
16
- iter: Box<CSVIterType>
21
+ iter: Box<CSVIterType>,
22
+ path: String,
17
23
  }
18
24
 
19
25
  impl Deref for CSVIter {
@@ -32,7 +38,7 @@ impl DerefMut for CSVIter {
32
38
 
33
39
  impl std::fmt::Debug for CSVIter {
34
40
  fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
35
- write!(f, "CSVIter")
41
+ write!(f, "CSVIter: {}", self.path)
36
42
  }
37
43
  }
38
44
 
@@ -48,22 +54,29 @@ impl FromRuby for CSVIter {
48
54
  fn from_ruby(value: VALUE) -> CheckResult<CSVIter> {
49
55
  let checked_path = String::from_ruby(value)?;
50
56
 
51
- let csv_reader =
52
- match File::open(String::from_checked(checked_path)) {
53
- Ok(f) =>
54
- BufReader::new(f),
55
- Err(e) =>
56
- raise!(format!("Error while opening file: {}", e)),
57
+ let path = String::from_checked(checked_path);
58
+
59
+ let gz_regex = Regex::new("\\.gz\\z").unwrap();
60
+
61
+ let buf_reader =
62
+ match File::open(path.clone()) {
63
+ Ok(f) => BufReader::new(f),
64
+ Err(e) => raise!(format!("Error while opening file: {}", e)),
65
+ };
66
+
67
+ let gz_reader: Box<Read> =
68
+ if gz_regex.is_match(&path) {
69
+ Box::new(GzDecoder::new(buf_reader))
70
+ } else {
71
+ Box::new(buf_reader)
57
72
  };
58
73
 
59
74
  let csv_reader =
60
75
  csv::ReaderBuilder::new()
61
76
  .has_headers(false)
62
- .from_reader(csv_reader);
63
-
64
- let records = csv_reader.into_records();
77
+ .from_reader(gz_reader);
65
78
 
66
- Ok(CSVIter{iter: Box::new(records)})
79
+ Ok(CSVIter{iter: Box::new(csv_reader.into_records()), path: path})
67
80
  }
68
81
 
69
82
  fn from_checked(checked: CSVIter) -> CSVIter {
@@ -81,7 +94,7 @@ ruby! {
81
94
  XCSV { helix, iter }
82
95
  }
83
96
 
84
- def next_line(&mut self) -> Result<Option<Vec<String>>, helix::Error> {
97
+ def next(&mut self) -> Result<Option<Vec<String>>, helix::Error> {
85
98
  match self.iter.next() {
86
99
  Some(Ok(record)) =>
87
100
  Ok(Some(record.iter().map(|s| s.to_string()).collect())),
data/xcsv.gemspec CHANGED
@@ -22,6 +22,8 @@ Gem::Specification.new do |spec|
22
22
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
23
23
  spec.require_paths = ["lib"]
24
24
 
25
+ spec.required_ruby_version = '>= 2.0'
26
+
25
27
  spec.add_runtime_dependency "helix_runtime", "= 0.7.5"
26
28
  spec.add_development_dependency "bundler", "~> 1.17"
27
29
  spec.add_development_dependency "rake", "~> 12.3"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xcsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Moroz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-11-16 00:00:00.000000000 Z
11
+ date: 2018-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: helix_runtime
@@ -98,7 +98,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
98
98
  requirements:
99
99
  - - ">="
100
100
  - !ruby/object:Gem::Version
101
- version: '0'
101
+ version: '2.0'
102
102
  required_rubygems_version: !ruby/object:Gem::Requirement
103
103
  requirements:
104
104
  - - ">="