dedup_csv 0.1.0-x86_64-linux → 0.1.1-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +13 -0
- data/Rakefile +5 -5
- data/ext/dedup_csv/Cargo.toml +14 -0
- data/ext/dedup_csv/extconf.rb +6 -0
- data/ext/dedup_csv/src/lib.rs +77 -0
- data/lib/dedup_csv/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45c03b08fa55283d608e1de618ba4b2e2ab57cfa8ea2d5b66f3ad3c423aef642
|
4
|
+
data.tar.gz: f88169135fbe5ab18388300b7ca28e0492cde3e9a0e0da262c916b7b999f837b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16bddd21882ea9cc516ac45f479cbcdecd33641b441fa2c2b81999a11ba2723faa63c2f4858c5af5111f5695ba1cf134b238a3a88ed3f843d4b4997ca1e5b96e
|
7
|
+
data.tar.gz: b5418d9534185df0182a9d5a841efe2c46da0858a272724e1c1ad923295eb69a9a8f386825ee1449d0f21dd10527fd00f9508846a8891bca79dd8fb4575fc9b3
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -2,6 +2,19 @@
|
|
2
2
|
|
3
3
|
Given 2 CSV files, this gem will create a third CSV file that contains rows from the first CSV file that are not present in the second CSV file.
|
4
4
|
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```bash
|
8
|
+
gem install dedup_csv
|
9
|
+
```
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
```irb
|
14
|
+
require 'dedup_csv/3.2/dedup_csv'
|
15
|
+
DedupCsv.dedup('file1.csv', 'file2.csv', 'output.csv')
|
16
|
+
```
|
17
|
+
|
5
18
|
## Contributing
|
6
19
|
|
7
20
|
Bug reports and pull requests are welcome on GitHub at https://github.com/kingsleyh/dedup_csv.
|
data/Rakefile
CHANGED
@@ -14,11 +14,11 @@ require 'rb_sys/extensiontask'
|
|
14
14
|
task build: :compile
|
15
15
|
|
16
16
|
spec = Bundler.load_gemspec('dedup_csv.gemspec')
|
17
|
-
spec.requirements.clear
|
18
|
-
spec.required_ruby_version = nil
|
19
|
-
spec.required_rubygems_version = nil
|
20
|
-
spec.extensions.clear
|
21
|
-
spec.files -= Dir['ext/**/*']
|
17
|
+
# spec.requirements.clear
|
18
|
+
# spec.required_ruby_version = nil
|
19
|
+
# spec.required_rubygems_version = nil
|
20
|
+
# spec.extensions.clear
|
21
|
+
# spec.files -= Dir['ext/**/*']
|
22
22
|
|
23
23
|
Rake::ExtensionTask.new('dedup_csv', spec) do |c|
|
24
24
|
c.lib_dir = 'lib/dedup_csv'
|
@@ -0,0 +1,14 @@
|
|
1
|
+
[package]
|
2
|
+
name = "dedup_csv"
|
3
|
+
version = "0.1.0"
|
4
|
+
edition = "2021"
|
5
|
+
authors = ["kingsley.hendrickse <kingsley.hendrickse@patchwork.health>"]
|
6
|
+
publish = false
|
7
|
+
|
8
|
+
[lib]
|
9
|
+
crate-type = ["cdylib"]
|
10
|
+
|
11
|
+
[dependencies]
|
12
|
+
magnus = { version = "0.7.1" }
|
13
|
+
csv = "1.3.0"
|
14
|
+
eyre = "0.6.12"
|
@@ -0,0 +1,77 @@
|
|
1
|
+
use std::error::Error;
|
2
|
+
use std::ffi::OsStr;
|
3
|
+
use std::fs::File;
|
4
|
+
use std::path::Path;
|
5
|
+
use csv::{StringRecord, Writer};
|
6
|
+
use magnus::{define_module, function, prelude::*, Ruby};
|
7
|
+
|
8
|
+
fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, target_path: String) -> magnus::error::Result<()> {
|
9
|
+
if !previous_csv_path.has_extension(&["csv"]) {
|
10
|
+
return Err(magnus::Error::new(ruby.exception_exception(), "previous_csv_path must be a csv file".to_string()));
|
11
|
+
}
|
12
|
+
if !new_csv_path.has_extension(&["csv"]) {
|
13
|
+
return Err(magnus::Error::new(ruby.exception_exception(), "new_csv_path must be a csv file".to_string()));
|
14
|
+
}
|
15
|
+
|
16
|
+
let csv1 = File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
|
17
|
+
let csv2 = File::open(new_csv_path).map_err(|e| magnus_err(ruby, e, "new_csv_path"))?;
|
18
|
+
|
19
|
+
let mut previous_csv: csv::Reader<File> = csv::Reader::from_reader(csv1);
|
20
|
+
let mut new_csv: csv::Reader<File> = csv::Reader::from_reader(csv2);
|
21
|
+
|
22
|
+
let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
|
23
|
+
|
24
|
+
let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv_path headers"))?;
|
25
|
+
let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv_path headers"))?;
|
26
|
+
|
27
|
+
if previous_headers != new_headers {
|
28
|
+
return Err(magnus::Error::new(ruby.exception_exception(), "headers of both csv files must be the same".to_string()));
|
29
|
+
}
|
30
|
+
|
31
|
+
wtr.write_byte_record(previous_headers.as_byte_record()).unwrap();
|
32
|
+
|
33
|
+
let mut previous_records = vec![];
|
34
|
+
for previous_record in previous_csv.records() {
|
35
|
+
let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
|
36
|
+
previous_records.push(previous_record)
|
37
|
+
}
|
38
|
+
|
39
|
+
for new_record in new_csv.records() {
|
40
|
+
let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
|
41
|
+
if !previous_records.contains(&new_record) {
|
42
|
+
let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
|
43
|
+
wtr.write_byte_record(new_record.as_byte_record()).unwrap();
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
wtr.flush().unwrap();
|
48
|
+
|
49
|
+
Ok(())
|
50
|
+
}
|
51
|
+
|
52
|
+
fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
|
53
|
+
magnus::Error::new(ruby.exception_exception(), format!("{}: {}", msg, e.to_string()))
|
54
|
+
}
|
55
|
+
|
56
|
+
#[magnus::init]
|
57
|
+
fn init() -> Result<(), magnus::Error> {
|
58
|
+
let module = define_module("DedupCsv")?;
|
59
|
+
module.define_singleton_method("dedup", function!(dedup, 3))?;
|
60
|
+
Ok(())
|
61
|
+
}
|
62
|
+
|
63
|
+
pub trait FileExtension {
|
64
|
+
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
|
65
|
+
}
|
66
|
+
|
67
|
+
impl<P: AsRef<Path>> FileExtension for P {
|
68
|
+
fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool {
|
69
|
+
if let Some(ref extension) = self.as_ref().extension().and_then(OsStr::to_str) {
|
70
|
+
return extensions
|
71
|
+
.iter()
|
72
|
+
.any(|x| x.as_ref().eq_ignore_ascii_case(extension));
|
73
|
+
}
|
74
|
+
|
75
|
+
false
|
76
|
+
}
|
77
|
+
}
|
data/lib/dedup_csv/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dedup_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- kingsley.hendrickse
|
@@ -25,6 +25,9 @@ files:
|
|
25
25
|
- Gemfile.lock
|
26
26
|
- README.md
|
27
27
|
- Rakefile
|
28
|
+
- ext/dedup_csv/Cargo.toml
|
29
|
+
- ext/dedup_csv/extconf.rb
|
30
|
+
- ext/dedup_csv/src/lib.rs
|
28
31
|
- lib/dedup_csv.rb
|
29
32
|
- lib/dedup_csv/2.7/dedup_csv.so
|
30
33
|
- lib/dedup_csv/3.0/dedup_csv.so
|
@@ -53,7 +56,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
56
|
requirements:
|
54
57
|
- - ">="
|
55
58
|
- !ruby/object:Gem::Version
|
56
|
-
version:
|
59
|
+
version: 3.0.0
|
57
60
|
requirements: []
|
58
61
|
rubygems_version: 3.4.4
|
59
62
|
signing_key:
|