dedup_csv 0.1.1-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1dcbe3b008e5b30deb07a50b0b705bbbaed20483986584883c7172f35886e277
4
+ data.tar.gz: c99bbe5a65cde9b55dd214a3b9e51f7d67309a3e39a1f7ad7a84faddc4efd9f1
5
+ SHA512:
6
+ metadata.gz: 3a8d7b974095956d59f6dc33120cb9cd8b70c67b01a39878094a607f0c9f1e3054a6ce9f3f1e36efbc32dc6bb8a1572f9057c1b3b32b6eaaf8ba36539b080ccb
7
+ data.tar.gz: 3119e73b752f6cdcff11d1bbfc4ac8e0670b98e6f109450a828152db4753da946a566a428e6136d0829671c0ea648445e9edad494041d8d661817c50546707f4
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,24 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.6
3
+ SuggestExtensions: false
4
+ NewCops: enable
5
+
6
+ Style/StringLiterals:
7
+ Enabled: true
8
+ EnforcedStyle: single_quotes
9
+
10
+ Style/StringLiteralsInInterpolation:
11
+ Enabled: true
12
+ EnforcedStyle: double_quotes
13
+
14
+ Layout/LineLength:
15
+ Max: 120
16
+
17
+ Metrics/BlockLength:
18
+ Max: 120
19
+
20
+
21
+
22
+
23
+
24
+
data/Cargo.lock ADDED
@@ -0,0 +1,424 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 3
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "1.1.3"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "bindgen"
16
+ version = "0.69.4"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
19
+ dependencies = [
20
+ "bitflags",
21
+ "cexpr",
22
+ "clang-sys",
23
+ "itertools",
24
+ "lazy_static",
25
+ "lazycell",
26
+ "proc-macro2",
27
+ "quote",
28
+ "regex",
29
+ "rustc-hash",
30
+ "shlex",
31
+ "syn",
32
+ ]
33
+
34
+ [[package]]
35
+ name = "bitflags"
36
+ version = "2.6.0"
37
+ source = "registry+https://github.com/rust-lang/crates.io-index"
38
+ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
39
+
40
+ [[package]]
41
+ name = "cexpr"
42
+ version = "0.6.0"
43
+ source = "registry+https://github.com/rust-lang/crates.io-index"
44
+ checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
45
+ dependencies = [
46
+ "nom",
47
+ ]
48
+
49
+ [[package]]
50
+ name = "cfg-if"
51
+ version = "1.0.0"
52
+ source = "registry+https://github.com/rust-lang/crates.io-index"
53
+ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
54
+
55
+ [[package]]
56
+ name = "clang-sys"
57
+ version = "1.8.1"
58
+ source = "registry+https://github.com/rust-lang/crates.io-index"
59
+ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
60
+ dependencies = [
61
+ "glob",
62
+ "libc",
63
+ "libloading",
64
+ ]
65
+
66
+ [[package]]
67
+ name = "csv"
68
+ version = "1.3.0"
69
+ source = "registry+https://github.com/rust-lang/crates.io-index"
70
+ checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe"
71
+ dependencies = [
72
+ "csv-core",
73
+ "itoa",
74
+ "ryu",
75
+ "serde",
76
+ ]
77
+
78
+ [[package]]
79
+ name = "csv-core"
80
+ version = "0.1.11"
81
+ source = "registry+https://github.com/rust-lang/crates.io-index"
82
+ checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
83
+ dependencies = [
84
+ "memchr",
85
+ ]
86
+
87
+ [[package]]
88
+ name = "dedup_csv"
89
+ version = "0.1.0"
90
+ dependencies = [
91
+ "csv",
92
+ "eyre",
93
+ "magnus",
94
+ ]
95
+
96
+ [[package]]
97
+ name = "either"
98
+ version = "1.13.0"
99
+ source = "registry+https://github.com/rust-lang/crates.io-index"
100
+ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
101
+
102
+ [[package]]
103
+ name = "eyre"
104
+ version = "0.6.12"
105
+ source = "registry+https://github.com/rust-lang/crates.io-index"
106
+ checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec"
107
+ dependencies = [
108
+ "indenter",
109
+ "once_cell",
110
+ ]
111
+
112
+ [[package]]
113
+ name = "glob"
114
+ version = "0.3.1"
115
+ source = "registry+https://github.com/rust-lang/crates.io-index"
116
+ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
117
+
118
+ [[package]]
119
+ name = "indenter"
120
+ version = "0.3.3"
121
+ source = "registry+https://github.com/rust-lang/crates.io-index"
122
+ checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683"
123
+
124
+ [[package]]
125
+ name = "itertools"
126
+ version = "0.12.1"
127
+ source = "registry+https://github.com/rust-lang/crates.io-index"
128
+ checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
129
+ dependencies = [
130
+ "either",
131
+ ]
132
+
133
+ [[package]]
134
+ name = "itoa"
135
+ version = "1.0.11"
136
+ source = "registry+https://github.com/rust-lang/crates.io-index"
137
+ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
138
+
139
+ [[package]]
140
+ name = "lazy_static"
141
+ version = "1.5.0"
142
+ source = "registry+https://github.com/rust-lang/crates.io-index"
143
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
144
+
145
+ [[package]]
146
+ name = "lazycell"
147
+ version = "1.3.0"
148
+ source = "registry+https://github.com/rust-lang/crates.io-index"
149
+ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
150
+
151
+ [[package]]
152
+ name = "libc"
153
+ version = "0.2.155"
154
+ source = "registry+https://github.com/rust-lang/crates.io-index"
155
+ checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
156
+
157
+ [[package]]
158
+ name = "libloading"
159
+ version = "0.8.4"
160
+ source = "registry+https://github.com/rust-lang/crates.io-index"
161
+ checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d"
162
+ dependencies = [
163
+ "cfg-if",
164
+ "windows-targets",
165
+ ]
166
+
167
+ [[package]]
168
+ name = "magnus"
169
+ version = "0.7.1"
170
+ source = "registry+https://github.com/rust-lang/crates.io-index"
171
+ checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
172
+ dependencies = [
173
+ "magnus-macros",
174
+ "rb-sys",
175
+ "rb-sys-env",
176
+ "seq-macro",
177
+ ]
178
+
179
+ [[package]]
180
+ name = "magnus-macros"
181
+ version = "0.6.0"
182
+ source = "registry+https://github.com/rust-lang/crates.io-index"
183
+ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
184
+ dependencies = [
185
+ "proc-macro2",
186
+ "quote",
187
+ "syn",
188
+ ]
189
+
190
+ [[package]]
191
+ name = "memchr"
192
+ version = "2.7.4"
193
+ source = "registry+https://github.com/rust-lang/crates.io-index"
194
+ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
195
+
196
+ [[package]]
197
+ name = "minimal-lexical"
198
+ version = "0.2.1"
199
+ source = "registry+https://github.com/rust-lang/crates.io-index"
200
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
201
+
202
+ [[package]]
203
+ name = "nom"
204
+ version = "7.1.3"
205
+ source = "registry+https://github.com/rust-lang/crates.io-index"
206
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
207
+ dependencies = [
208
+ "memchr",
209
+ "minimal-lexical",
210
+ ]
211
+
212
+ [[package]]
213
+ name = "once_cell"
214
+ version = "1.19.0"
215
+ source = "registry+https://github.com/rust-lang/crates.io-index"
216
+ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
217
+
218
+ [[package]]
219
+ name = "proc-macro2"
220
+ version = "1.0.86"
221
+ source = "registry+https://github.com/rust-lang/crates.io-index"
222
+ checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
223
+ dependencies = [
224
+ "unicode-ident",
225
+ ]
226
+
227
+ [[package]]
228
+ name = "quote"
229
+ version = "1.0.36"
230
+ source = "registry+https://github.com/rust-lang/crates.io-index"
231
+ checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
232
+ dependencies = [
233
+ "proc-macro2",
234
+ ]
235
+
236
+ [[package]]
237
+ name = "rb-sys"
238
+ version = "0.9.98"
239
+ source = "registry+https://github.com/rust-lang/crates.io-index"
240
+ checksum = "8914b2e6af10bd50dd7aaac8c5146872d3924d6012929b4ff504e988f6badd24"
241
+ dependencies = [
242
+ "rb-sys-build",
243
+ ]
244
+
245
+ [[package]]
246
+ name = "rb-sys-build"
247
+ version = "0.9.98"
248
+ source = "registry+https://github.com/rust-lang/crates.io-index"
249
+ checksum = "12af68c9757d419b82d65a12b5db538990dfe9416049fea3f0ba4b9a8ca108cd"
250
+ dependencies = [
251
+ "bindgen",
252
+ "lazy_static",
253
+ "proc-macro2",
254
+ "quote",
255
+ "regex",
256
+ "shell-words",
257
+ "syn",
258
+ ]
259
+
260
+ [[package]]
261
+ name = "rb-sys-env"
262
+ version = "0.1.2"
263
+ source = "registry+https://github.com/rust-lang/crates.io-index"
264
+ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
265
+
266
+ [[package]]
267
+ name = "regex"
268
+ version = "1.10.5"
269
+ source = "registry+https://github.com/rust-lang/crates.io-index"
270
+ checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
271
+ dependencies = [
272
+ "aho-corasick",
273
+ "memchr",
274
+ "regex-automata",
275
+ "regex-syntax",
276
+ ]
277
+
278
+ [[package]]
279
+ name = "regex-automata"
280
+ version = "0.4.7"
281
+ source = "registry+https://github.com/rust-lang/crates.io-index"
282
+ checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
283
+ dependencies = [
284
+ "aho-corasick",
285
+ "memchr",
286
+ "regex-syntax",
287
+ ]
288
+
289
+ [[package]]
290
+ name = "regex-syntax"
291
+ version = "0.8.4"
292
+ source = "registry+https://github.com/rust-lang/crates.io-index"
293
+ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
294
+
295
+ [[package]]
296
+ name = "rustc-hash"
297
+ version = "1.1.0"
298
+ source = "registry+https://github.com/rust-lang/crates.io-index"
299
+ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
300
+
301
+ [[package]]
302
+ name = "ryu"
303
+ version = "1.0.18"
304
+ source = "registry+https://github.com/rust-lang/crates.io-index"
305
+ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
306
+
307
+ [[package]]
308
+ name = "seq-macro"
309
+ version = "0.3.5"
310
+ source = "registry+https://github.com/rust-lang/crates.io-index"
311
+ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
312
+
313
+ [[package]]
314
+ name = "serde"
315
+ version = "1.0.204"
316
+ source = "registry+https://github.com/rust-lang/crates.io-index"
317
+ checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
318
+ dependencies = [
319
+ "serde_derive",
320
+ ]
321
+
322
+ [[package]]
323
+ name = "serde_derive"
324
+ version = "1.0.204"
325
+ source = "registry+https://github.com/rust-lang/crates.io-index"
326
+ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
327
+ dependencies = [
328
+ "proc-macro2",
329
+ "quote",
330
+ "syn",
331
+ ]
332
+
333
+ [[package]]
334
+ name = "shell-words"
335
+ version = "1.1.0"
336
+ source = "registry+https://github.com/rust-lang/crates.io-index"
337
+ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
338
+
339
+ [[package]]
340
+ name = "shlex"
341
+ version = "1.3.0"
342
+ source = "registry+https://github.com/rust-lang/crates.io-index"
343
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
344
+
345
+ [[package]]
346
+ name = "syn"
347
+ version = "2.0.71"
348
+ source = "registry+https://github.com/rust-lang/crates.io-index"
349
+ checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
350
+ dependencies = [
351
+ "proc-macro2",
352
+ "quote",
353
+ "unicode-ident",
354
+ ]
355
+
356
+ [[package]]
357
+ name = "unicode-ident"
358
+ version = "1.0.12"
359
+ source = "registry+https://github.com/rust-lang/crates.io-index"
360
+ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
361
+
362
+ [[package]]
363
+ name = "windows-targets"
364
+ version = "0.52.6"
365
+ source = "registry+https://github.com/rust-lang/crates.io-index"
366
+ checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
367
+ dependencies = [
368
+ "windows_aarch64_gnullvm",
369
+ "windows_aarch64_msvc",
370
+ "windows_i686_gnu",
371
+ "windows_i686_gnullvm",
372
+ "windows_i686_msvc",
373
+ "windows_x86_64_gnu",
374
+ "windows_x86_64_gnullvm",
375
+ "windows_x86_64_msvc",
376
+ ]
377
+
378
+ [[package]]
379
+ name = "windows_aarch64_gnullvm"
380
+ version = "0.52.6"
381
+ source = "registry+https://github.com/rust-lang/crates.io-index"
382
+ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
383
+
384
+ [[package]]
385
+ name = "windows_aarch64_msvc"
386
+ version = "0.52.6"
387
+ source = "registry+https://github.com/rust-lang/crates.io-index"
388
+ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
389
+
390
+ [[package]]
391
+ name = "windows_i686_gnu"
392
+ version = "0.52.6"
393
+ source = "registry+https://github.com/rust-lang/crates.io-index"
394
+ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
395
+
396
+ [[package]]
397
+ name = "windows_i686_gnullvm"
398
+ version = "0.52.6"
399
+ source = "registry+https://github.com/rust-lang/crates.io-index"
400
+ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
401
+
402
+ [[package]]
403
+ name = "windows_i686_msvc"
404
+ version = "0.52.6"
405
+ source = "registry+https://github.com/rust-lang/crates.io-index"
406
+ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
407
+
408
+ [[package]]
409
+ name = "windows_x86_64_gnu"
410
+ version = "0.52.6"
411
+ source = "registry+https://github.com/rust-lang/crates.io-index"
412
+ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
413
+
414
+ [[package]]
415
+ name = "windows_x86_64_gnullvm"
416
+ version = "0.52.6"
417
+ source = "registry+https://github.com/rust-lang/crates.io-index"
418
+ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
419
+
420
+ [[package]]
421
+ name = "windows_x86_64_msvc"
422
+ version = "0.52.6"
423
+ source = "registry+https://github.com/rust-lang/crates.io-index"
424
+ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
data/Cargo.toml ADDED
@@ -0,0 +1,7 @@
1
+ # This Cargo.toml is here to let externals tools (IDEs, etc.) know that this is
2
+ # a Rust project. Your extensions dependencies should be added to the Cargo.toml
3
+ # in the ext/ directory.
4
+
5
+ [workspace]
6
+ members = ["./ext/dedup_csv"]
7
+ resolver = "2"
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in dedup_csv.gemspec
6
+ gemspec
7
+
8
+ gem 'rake', '~> 13.0'
9
+
10
+ gem 'rake-compiler'
11
+ gem 'rb_sys', '~> 0.9.63'
12
+
13
+ gem 'rspec', '~> 3.0'
14
+
15
+ gem 'rubocop', '~> 1.21'
data/Gemfile.lock ADDED
@@ -0,0 +1,70 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ dedup_csv (0.1.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.2)
10
+ diff-lcs (1.5.1)
11
+ json (2.7.2)
12
+ language_server-protocol (3.17.0.3)
13
+ parallel (1.25.1)
14
+ parser (3.3.4.0)
15
+ ast (~> 2.4.1)
16
+ racc
17
+ racc (1.8.0)
18
+ rainbow (3.1.1)
19
+ rake (13.2.1)
20
+ rake-compiler (1.2.7)
21
+ rake
22
+ rb_sys (0.9.98)
23
+ regexp_parser (2.9.2)
24
+ rexml (3.3.2)
25
+ strscan
26
+ rspec (3.13.0)
27
+ rspec-core (~> 3.13.0)
28
+ rspec-expectations (~> 3.13.0)
29
+ rspec-mocks (~> 3.13.0)
30
+ rspec-core (3.13.0)
31
+ rspec-support (~> 3.13.0)
32
+ rspec-expectations (3.13.1)
33
+ diff-lcs (>= 1.2.0, < 2.0)
34
+ rspec-support (~> 3.13.0)
35
+ rspec-mocks (3.13.1)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.13.0)
38
+ rspec-support (3.13.1)
39
+ rubocop (1.65.0)
40
+ json (~> 2.3)
41
+ language_server-protocol (>= 3.17.0)
42
+ parallel (~> 1.10)
43
+ parser (>= 3.3.0.2)
44
+ rainbow (>= 2.2.2, < 4.0)
45
+ regexp_parser (>= 2.4, < 3.0)
46
+ rexml (>= 3.2.5, < 4.0)
47
+ rubocop-ast (>= 1.31.1, < 2.0)
48
+ ruby-progressbar (~> 1.7)
49
+ unicode-display_width (>= 2.4.0, < 3.0)
50
+ rubocop-ast (1.31.3)
51
+ parser (>= 3.3.1.0)
52
+ ruby-progressbar (1.13.0)
53
+ strscan (3.1.0)
54
+ unicode-display_width (2.5.0)
55
+
56
+ PLATFORMS
57
+ arm64-darwin-22
58
+ arm64-darwin-23
59
+ x86_64-linux
60
+
61
+ DEPENDENCIES
62
+ dedup_csv!
63
+ rake (~> 13.0)
64
+ rake-compiler
65
+ rb_sys (~> 0.9.63)
66
+ rspec (~> 3.0)
67
+ rubocop (~> 1.21)
68
+
69
+ BUNDLED WITH
70
+ 2.4.4
data/README.md ADDED
@@ -0,0 +1,20 @@
1
+ # DedupCsv
2
+
3
+ Given 2 CSV files, this gem will create a third CSV file that contains rows from the first CSV file that are not present in the second CSV file.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ gem install dedup_csv
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```irb
14
+ require 'dedup_csv/3.2/dedup_csv'
15
+ DedupCsv.dedup('file1.csv', 'file2.csv', 'output.csv')
16
+ ```
17
+
18
+ ## Contributing
19
+
20
+ Bug reports and pull requests are welcome on GitHub at https://github.com/kingsleyh/dedup_csv.
data/Rakefile ADDED
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require 'rubocop/rake_task'
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ require 'rb_sys/extensiontask'
13
+
14
+ task build: :compile
15
+
16
+ spec = Bundler.load_gemspec('dedup_csv.gemspec')
17
+ # spec.requirements.clear
18
+ # spec.required_ruby_version = nil
19
+ # spec.required_rubygems_version = nil
20
+ # spec.extensions.clear
21
+ # spec.files -= Dir['ext/**/*']
22
+
23
+ Rake::ExtensionTask.new('dedup_csv', spec) do |c|
24
+ c.lib_dir = 'lib/dedup_csv'
25
+ c.cross_compile = true
26
+ c.cross_platform = %w[
27
+ aarch64-linux
28
+ arm64-darwin
29
+ x64-mingw-ucrt
30
+ x64-mingw32
31
+ x86_64-darwin
32
+ x86_64-linux
33
+ x86_64-linux-musl
34
+ ]
35
+ end
36
+
37
+ RbSys::ExtensionTask.new('dedup_csv') do |ext|
38
+ ext.lib_dir = 'lib/dedup_csv'
39
+ end
40
+
41
+ task default: %i[compile spec rubocop]
@@ -0,0 +1,14 @@
1
+ [package]
2
+ name = "dedup_csv"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ authors = ["kingsley.hendrickse <kingsley.hendrickse@patchwork.health>"]
6
+ publish = false
7
+
8
+ [lib]
9
+ crate-type = ["cdylib"]
10
+
11
+ [dependencies]
12
+ magnus = { version = "0.7.1" }
13
+ csv = "1.3.0"
14
+ eyre = "0.6.12"
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+ require 'rb_sys/mkmf'
5
+
6
+ create_rust_makefile('dedup_csv/dedup_csv')
@@ -0,0 +1,77 @@
1
+ use std::error::Error;
2
+ use std::ffi::OsStr;
3
+ use std::fs::File;
4
+ use std::path::Path;
5
+ use csv::{StringRecord, Writer};
6
+ use magnus::{define_module, function, prelude::*, Ruby};
7
+
8
+ fn dedup(ruby: &Ruby, previous_csv_path: String, new_csv_path: String, target_path: String) -> magnus::error::Result<()> {
9
+ if !previous_csv_path.has_extension(&["csv"]) {
10
+ return Err(magnus::Error::new(ruby.exception_exception(), "previous_csv_path must be a csv file".to_string()));
11
+ }
12
+ if !new_csv_path.has_extension(&["csv"]) {
13
+ return Err(magnus::Error::new(ruby.exception_exception(), "new_csv_path must be a csv file".to_string()));
14
+ }
15
+
16
+ let csv1 = File::open(previous_csv_path).map_err(|e| magnus_err(ruby, e, "previous_csv_path"))?;
17
+ let csv2 = File::open(new_csv_path).map_err(|e| magnus_err(ruby, e, "new_csv_path"))?;
18
+
19
+ let mut previous_csv: csv::Reader<File> = csv::Reader::from_reader(csv1);
20
+ let mut new_csv: csv::Reader<File> = csv::Reader::from_reader(csv2);
21
+
22
+ let mut wtr = Writer::from_path(target_path).map_err(|e| magnus_err(ruby, e, "target_path"))?;
23
+
24
+ let previous_headers = previous_csv.headers().map_err(|e| magnus_err(ruby, e, "previous_csv_path headers"))?;
25
+ let new_headers = new_csv.headers().map_err(|e| magnus_err(ruby, e, "new_csv_path headers"))?;
26
+
27
+ if previous_headers != new_headers {
28
+ return Err(magnus::Error::new(ruby.exception_exception(), "headers of both csv files must be the same".to_string()));
29
+ }
30
+
31
+ wtr.write_byte_record(previous_headers.as_byte_record()).unwrap();
32
+
33
+ let mut previous_records = vec![];
34
+ for previous_record in previous_csv.records() {
35
+ let previous_record = previous_record.map_err(|e| magnus_err(ruby, e, "previous_record"))?;
36
+ previous_records.push(previous_record)
37
+ }
38
+
39
+ for new_record in new_csv.records() {
40
+ let new_record = new_record.map_err(|e| magnus_err(ruby, e, "new_record"))?;
41
+ if !previous_records.contains(&new_record) {
42
+ let new_record = new_record.into_iter().map(|r| r.trim_end()).collect::<StringRecord>();
43
+ wtr.write_byte_record(new_record.as_byte_record()).unwrap();
44
+ }
45
+ }
46
+
47
+ wtr.flush().unwrap();
48
+
49
+ Ok(())
50
+ }
51
+
52
+ fn magnus_err<E: Error>(ruby: &Ruby, e: E, msg: &str) -> magnus::Error {
53
+ magnus::Error::new(ruby.exception_exception(), format!("{}: {}", msg, e.to_string()))
54
+ }
55
+
56
+ #[magnus::init]
57
+ fn init() -> Result<(), magnus::Error> {
58
+ let module = define_module("DedupCsv")?;
59
+ module.define_singleton_method("dedup", function!(dedup, 3))?;
60
+ Ok(())
61
+ }
62
+
63
+ pub trait FileExtension {
64
+ fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool;
65
+ }
66
+
67
+ impl<P: AsRef<Path>> FileExtension for P {
68
+ fn has_extension<S: AsRef<str>>(&self, extensions: &[S]) -> bool {
69
+ if let Some(ref extension) = self.as_ref().extension().and_then(OsStr::to_str) {
70
+ return extensions
71
+ .iter()
72
+ .any(|x| x.as_ref().eq_ignore_ascii_case(extension));
73
+ }
74
+
75
+ false
76
+ }
77
+ }
Binary file
Binary file
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DedupCsv
4
+ VERSION = '0.1.1'
5
+ end
data/lib/dedup_csv.rb ADDED
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'dedup_csv/version'
4
+ require_relative 'dedup_csv/dedup_csv'
5
+
6
+ module DedupCsv
7
+ class Error < StandardError; end
8
+ # Your code goes here...
9
+ end
data/sig/dedup_csv.rbs ADDED
@@ -0,0 +1,4 @@
1
+ module DedupCsv
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dedup_csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: x64-mingw32
6
+ authors:
7
+ - kingsley.hendrickse
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-07-18 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Given 2 csv files of the same shape will find a delta
14
+ email:
15
+ - kingsley.hendrickse@patchwork.health
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".rspec"
21
+ - ".rubocop.yml"
22
+ - Cargo.lock
23
+ - Cargo.toml
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - README.md
27
+ - Rakefile
28
+ - ext/dedup_csv/Cargo.toml
29
+ - ext/dedup_csv/extconf.rb
30
+ - ext/dedup_csv/src/lib.rs
31
+ - lib/dedup_csv.rb
32
+ - lib/dedup_csv/2.7/dedup_csv.so
33
+ - lib/dedup_csv/3.0/dedup_csv.so
34
+ - lib/dedup_csv/version.rb
35
+ - sig/dedup_csv.rbs
36
+ homepage: http://github.com
37
+ licenses: []
38
+ metadata:
39
+ homepage_uri: http://github.com
40
+ rubygems_mfa_required: 'true'
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '2.7'
50
+ - - "<"
51
+ - !ruby/object:Gem::Version
52
+ version: 3.1.dev
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: 3.0.0
58
+ requirements: []
59
+ rubygems_version: 3.4.4
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: Fast CSV deduplicator
63
+ test_files: []