cevennes 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +18 -1
- data/LICENSE.txt +1 -1
- data/Makefile +1 -1
- data/README.md +43 -2
- data/cevennes.gemspec +1 -1
- data/lib/cevennes.rb +32 -11
- metadata +13 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 598f7e128effec146dbe0acc01bd26101b689a6efdcf647c061416aa43accb53
|
4
|
+
data.tar.gz: b1cf17c58da5e0562100feff9fe40c1eee3cf07a2fc6fab0b724fbd792a240f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9686e580322ab076d7e85506d716aaac8e2d131d9af350b31b2985539bfc8d7a01f2b40dfd23e285e78e620e82dbc2cede2d1840c5d60f72d130b22f16cf3502
|
7
|
+
data.tar.gz: 5bcef831927839276c9ca7f4304d5c6904f37587d4f4654ba88962250eafd59bfc7036ba1ff9f25824e898a2e8f5f87a250f3aa0317a7e1fadd696c4efe97453
|
data/CHANGELOG.md
CHANGED
@@ -2,7 +2,24 @@
|
|
2
2
|
# CHANGELOG.md
|
3
3
|
|
4
4
|
|
5
|
-
## cevennes 1.
|
5
|
+
## cevennes 1.2.0 released 2022-07-05
|
6
|
+
|
7
|
+
- Introduce `drop_equals: true`
|
8
|
+
|
9
|
+
|
10
|
+
## cevennes 1.1.1 released 2021-02-25
|
11
|
+
|
12
|
+
- Refine UTF-8 re-encoding
|
13
|
+
- Fix `ignore_key_case: true` :-(
|
14
|
+
- Clarify reencode
|
15
|
+
|
16
|
+
|
17
|
+
## cevennes 1.1.0 released 2021-02-24
|
18
|
+
|
19
|
+
- Introduce `ignore_key_case: true`
|
20
|
+
|
21
|
+
|
22
|
+
## cevennes 1.0.0 released 2018-09-07
|
6
23
|
|
7
24
|
- Align the "keys" entry on the =+-! entries
|
8
25
|
|
data/LICENSE.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
|
2
|
-
Copyright (c) 2018-
|
2
|
+
Copyright (c) 2018-2022, John Mettraux, jmettraux@gmail.com
|
3
3
|
|
4
4
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
5
|
of this software and associated documentation files (the "Software"), to deal
|
data/Makefile
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
# cevennes
|
3
3
|
|
4
|
-
[](https://github.com/jmettraux/cevennes/actions)
|
5
5
|
[](http://badge.fury.io/rb/cevennes)
|
6
6
|
|
7
7
|
Diffs CSVs by lines, focusing on a single ID
|
@@ -9,7 +9,7 @@ Diffs CSVs by lines, focusing on a single ID
|
|
9
9
|
|
10
10
|
## usage
|
11
11
|
|
12
|
-
Given two CSV strings and an identifier name, cevennes may compute a diff:
|
12
|
+
Given two CSV strings and an identifier name (a column name), cevennes may compute a diff:
|
13
13
|
```ruby
|
14
14
|
require 'cevennes'
|
15
15
|
|
@@ -27,6 +27,9 @@ cvs1 = %{
|
|
27
27
|
}.strip + "\n"
|
28
28
|
|
29
29
|
d = Cevennes.diff('id', cvs0, cvs1)
|
30
|
+
|
31
|
+
#d = Cevennes.diff('id', cvs0, cvs1, ignore_key_case: true)
|
32
|
+
# when the key case should be ignored ("Id" == "id")
|
30
33
|
```
|
31
34
|
|
32
35
|
`d` will yield:
|
@@ -54,6 +57,44 @@ The second entry is a summary of the changes, altered `!` line count, removed `-
|
|
54
57
|
The remaining entries are the (non-)changes themselves, from line 1 to the end.
|
55
58
|
|
56
59
|
|
60
|
+
### drop_equals: true
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
require 'cevennes'
|
64
|
+
|
65
|
+
cvs0 = %{
|
66
|
+
id,name,age
|
67
|
+
0,John,33
|
68
|
+
1,Jean-Baptiste,43
|
69
|
+
3,Luke,21
|
70
|
+
}.strip + "\n"
|
71
|
+
cvs1 = %{
|
72
|
+
id,name,age
|
73
|
+
0,John,33
|
74
|
+
1,Jean-Baptiste,44
|
75
|
+
4,Matthew,20
|
76
|
+
}.strip + "\n"
|
77
|
+
|
78
|
+
d = Cevennes.diff('id', cvs0, cvs1, drop_equals: true)
|
79
|
+
# ==>
|
80
|
+
[
|
81
|
+
[ 'keys', 1, [ 'id', 'name', 'age' ],
|
82
|
+
1, [ 'id', 'name', 'age' ] ],
|
83
|
+
[ 'stats',
|
84
|
+
{ '=' => 1, '!' => 1, '-' => 1, '+' => 1,
|
85
|
+
'l0' => 3, 'l1' => 3 } ],
|
86
|
+
[ '!', 3, [ '1', 'Jean-Baptiste', '43' ],
|
87
|
+
3, [ '1', 'Jean-Baptiste', '44' ] ],
|
88
|
+
[ '-', 4, [ '3', 'Luke', '21'],
|
89
|
+
-1, nil ],
|
90
|
+
[ '+', -1, nil,
|
91
|
+
4, [ '4', 'Matthew', '20' ] ]
|
92
|
+
]
|
93
|
+
#
|
94
|
+
# the "=" entries are not included
|
95
|
+
```
|
96
|
+
|
97
|
+
|
57
98
|
## LICENSE
|
58
99
|
|
59
100
|
MIT, see [LICENSE.txt](LICENSE.txt)
|
data/cevennes.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.platform = Gem::Platform::RUBY
|
11
11
|
s.authors = [ 'John Mettraux' ]
|
12
12
|
s.email = [ 'jmettraux@gmail.com' ]
|
13
|
-
s.homepage = '
|
13
|
+
s.homepage = 'https://github.com/jmettraux/cevennes'
|
14
14
|
s.license = 'MIT'
|
15
15
|
s.summary = 'CSV diff library'
|
16
16
|
|
data/lib/cevennes.rb
CHANGED
@@ -1,17 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
|
2
3
|
require 'csv'
|
3
4
|
|
4
5
|
|
5
6
|
module Cevennes
|
6
7
|
|
7
|
-
VERSION = '1.
|
8
|
+
VERSION = '1.2.0'
|
8
9
|
|
9
10
|
class << self
|
10
11
|
|
11
|
-
def diff(id, csv0, csv1)
|
12
|
+
def diff(id, csv0, csv1, opts={})
|
12
13
|
|
13
|
-
h0 = hash('old', id, csv0)
|
14
|
-
h1 = hash('new', id, csv1)
|
14
|
+
h0 = hash('old', id, csv0, opts)
|
15
|
+
h1 = hash('new', id, csv1, opts)
|
15
16
|
|
16
17
|
ks0 = h0.delete(:keys)
|
17
18
|
ks1 = h1.delete(:keys)
|
@@ -39,6 +40,8 @@ module Cevennes
|
|
39
40
|
s['l0'] = h0.length
|
40
41
|
s['l1'] = h1.length
|
41
42
|
|
43
|
+
d = d.reject { |e| e[0] == '=' } if opts[:drop_equals]
|
44
|
+
|
42
45
|
[ [ 'keys', *ks0, *ks1 ], [ 'stats', s ] ] + d
|
43
46
|
end
|
44
47
|
|
@@ -49,17 +52,28 @@ module Cevennes
|
|
49
52
|
row.collect { |cell| cell.is_a?(String) ? cell.strip : cell }
|
50
53
|
end
|
51
54
|
|
52
|
-
|
55
|
+
DOWNCASE = lambda { |x| x.respond_to?(:downcase) ? x.downcase : x }
|
56
|
+
IDENTITY = lambda { |x| x }
|
57
|
+
|
58
|
+
def hash(version, id, csv, opts)
|
59
|
+
|
60
|
+
d = opts[:ignore_key_case] ? DOWNCASE : IDENTITY
|
61
|
+
did = d[id]
|
53
62
|
|
54
63
|
csva = ::CSV.parse(reencode(csv))
|
55
64
|
.each_with_index.collect { |row, i| [ 1 + i, strip(row) ] }
|
56
65
|
.reject { |i, row| row.compact.empty? }
|
57
|
-
.drop_while { |i, row| ! row.
|
66
|
+
.drop_while { |i, row| ! row.find { |cell| d[cell] == did } }
|
58
67
|
|
59
68
|
fail ::IndexError.new("id #{id.inspect} not found in #{version} CSV") \
|
60
69
|
if csva.empty?
|
61
70
|
|
62
|
-
|
71
|
+
csva[0][1] =
|
72
|
+
opts[:ignore_key_case] ?
|
73
|
+
csva[0][1].collect { |c| DOWNCASE[c] } :
|
74
|
+
csva[0][1]
|
75
|
+
|
76
|
+
idi = csva[0][1].index(did)
|
63
77
|
|
64
78
|
csva[1..-1]
|
65
79
|
.inject({ keys: csva[0] }) { |h, (i, row)|
|
@@ -74,16 +88,23 @@ module Cevennes
|
|
74
88
|
# ::CSV.generate(encoding: 'UTF-8') { |csv| csv << row }.strip
|
75
89
|
#end
|
76
90
|
|
91
|
+
ENCODINGS = %w[ Windows-1252 ISO-8859-1 UTF-8 ].freeze
|
92
|
+
|
77
93
|
def reencode(s)
|
78
94
|
|
79
95
|
#s = unzip(s) if s[0, 2] == 'PK'
|
80
96
|
# no dependency on rubyzip
|
81
97
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
98
|
+
#return s if s.encoding == Encoding::UTF_8
|
99
|
+
# NO! have to force_encoding for UTF-8 as well!
|
100
|
+
|
101
|
+
s = s.dup if s.frozen?
|
102
|
+
|
103
|
+
ENCODINGS.each do |e|
|
104
|
+
(return s.force_encoding(e).encode('UTF-8')) rescue nil
|
86
105
|
end
|
106
|
+
|
107
|
+
nil
|
87
108
|
end
|
88
109
|
end
|
89
110
|
end
|
metadata
CHANGED
@@ -1,22 +1,22 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cevennes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Mettraux
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: rspec
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
16
15
|
requirements:
|
17
16
|
- - "~>"
|
18
17
|
- !ruby/object:Gem::Version
|
19
18
|
version: '3.7'
|
19
|
+
name: rspec
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -38,16 +38,16 @@ files:
|
|
38
38
|
- README.md
|
39
39
|
- cevennes.gemspec
|
40
40
|
- lib/cevennes.rb
|
41
|
-
homepage:
|
41
|
+
homepage: https://github.com/jmettraux/cevennes
|
42
42
|
licenses:
|
43
43
|
- MIT
|
44
44
|
metadata:
|
45
|
-
changelog_uri:
|
46
|
-
documentation_uri:
|
47
|
-
bug_tracker_uri:
|
48
|
-
homepage_uri:
|
49
|
-
source_code_uri:
|
50
|
-
post_install_message:
|
45
|
+
changelog_uri: https://github.com/jmettraux/cevennes/blob/master/CHANGELOG.md
|
46
|
+
documentation_uri: https://github.com/jmettraux/cevennes
|
47
|
+
bug_tracker_uri: https://github.com/jmettraux/cevennes/issues
|
48
|
+
homepage_uri: https://github.com/jmettraux/cevennes
|
49
|
+
source_code_uri: https://github.com/jmettraux/cevennes
|
50
|
+
post_install_message:
|
51
51
|
rdoc_options: []
|
52
52
|
require_paths:
|
53
53
|
- lib
|
@@ -62,9 +62,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
62
62
|
- !ruby/object:Gem::Version
|
63
63
|
version: '0'
|
64
64
|
requirements: []
|
65
|
-
|
66
|
-
|
67
|
-
signing_key:
|
65
|
+
rubygems_version: 3.0.6
|
66
|
+
signing_key:
|
68
67
|
specification_version: 4
|
69
68
|
summary: CSV diff library
|
70
69
|
test_files: []
|