cevennes 0.11.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 1d93b3bdf44f0c33cf0e90d02a2f6021c06abb58
4
- data.tar.gz: c038beab1cc0a2e93079bfd27e861c78b5d50d21
2
+ SHA256:
3
+ metadata.gz: f6b604ac611ad1d24fe34ce9769c5d5b34ca1f0cdbf005ff08bce1152f24976e
4
+ data.tar.gz: e2925852747049479ad3172780c62e1c32ec9d04958d6de7c82049a7aa8103ae
5
5
  SHA512:
6
- metadata.gz: d82a5c40592be7eb8ef83df1fda403dae71fc172bd815882c1c68f8feed32e842eedb7026433e68068d2d216aa3f0a246dec790a90ebce447658cdaa9d69bb2e
7
- data.tar.gz: 6b6e4edb32644166b67cef3d4d2bea2bfb57dda859b37013b132482c37f49f569047e8fcd87507cbe0a03ecd968cc88dceedfb81fb21d5d0996d7eb9787a6a0e
6
+ metadata.gz: 7f77f3878e80646e0ea1d0b2c67bd6927cd5c60ab7b08acdb78747c83e14c6c38752753d4a90f055fdfc797a72f675dbe069240b278110f7b402dc2992d88c42
7
+ data.tar.gz: b6ad6a2ab702e874bc03a6ca11c265fcd3157f77a556482a447e8d579381be5a50812e23ff1b6253032a50f9a5347af1ef849f5a37128ce7c20cc6c5d5c23e78
data/CHANGELOG.md CHANGED
@@ -2,6 +2,33 @@
2
2
  # CHANGELOG.md
3
3
 
4
4
 
5
+ ## cevennes 1.1.1 released 2021-02-25
6
+
7
+ - Refine UTF-8 re-encoding
8
+ - Fix `ignore_key_case: true` :-(
9
+ - Clarify reencode
10
+
11
+
12
+ ## cevennes 1.1.0 released 2021-02-24
13
+
14
+ - Introduce `ignore_key_case: true`
15
+
16
+
17
+ ## cevennes 1.0.0 released 2018-09-07
18
+
19
+ - Align the "keys" entry on the =+-! entries
20
+
21
+
22
+ ## cevennes 0.13.0 released 2018-09-07
23
+
24
+ - Trim cells and keys
25
+
26
+
27
+ ## cevennes 0.12.0 released 2018-09-06
28
+
29
+ - Fail with IndexError on missing id in CSV old or new
30
+
31
+
5
32
  ## cevennes 0.11.0 released 2018-09-06
6
33
 
7
34
  - Fix l0 and l1 stats
data/LICENSE.txt CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
- Copyright (c) 2018-2018, John Mettraux, jmettraux@gmail.com
2
+ Copyright (c) 2018-2021, John Mettraux, jmettraux@gmail.com
3
3
 
4
4
  Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -6,16 +6,56 @@
6
6
 
7
7
  Diffs CSVs by lines, focusing on a single ID
8
8
 
9
+
9
10
  ## usage
10
11
 
11
- TODO
12
+ Given two CSV strings and an identifier name (a column name), cevennes may compute a diff:
13
+ ```ruby
14
+ require 'cevennes'
15
+
16
+ cvs0 = %{
17
+ id,name,age
18
+ 0,John,33
19
+ 1,Jean-Baptiste,43
20
+ 3,Luke,21
21
+ }.strip + "\n"
22
+ cvs1 = %{
23
+ id,name,age
24
+ 0,John,33
25
+ 1,Jean-Baptiste,44
26
+ 4,Matthew,20
27
+ }.strip + "\n"
28
+
29
+ d = Cevennes.diff('id', cvs0, cvs1)
30
+
31
+ #d = Cevennes.diff('id', cvs0, cvs1, ignore_key_case: true)
32
+ # when the key case should be ignored ("Id" == "id")
12
33
  ```
13
- csv0 = %{
14
- }
15
- csv1 = %{
16
- }
17
- pp Cevennes.diff('ISIN', csv0, csv1)
34
+
35
+ `d` will yield:
36
+ ```ruby
37
+ [
38
+ [ 'keys', 1, [ 'id', 'name', 'age' ],
39
+ 1, [ 'id', 'name', 'age' ] ],
40
+ [ 'stats',
41
+ { '=' => 1, '!' => 1, '-' => 1, '+' => 1,
42
+ 'l0' => 3, 'l1' => 3 } ],
43
+ [ '=', 2, [ '0', 'John', '33'],
44
+ 2, nil ],
45
+ [ '!', 3, [ '1', 'Jean-Baptiste', '43' ],
46
+ 3, [ '1', 'Jean-Baptiste', '44' ] ],
47
+ [ '-', 4, [ '3', 'Luke', '21'],
48
+ -1, nil ],
49
+ [ '+', -1, nil,
50
+ 4, [ '4', 'Matthew', '20' ] ]
51
+ ]
18
52
  ```
53
+ It's a list where the first entry is a recap of the keys used in the old and the new CSV strings (the integer is the line number (starting at 1) where the keys where found.
54
+
55
+ The second entry is a summary of the changes, altered `!` line count, removed `-` line count, added `+` line count, old length `l0`, new length `l1`, and unchanged `=` line count.
56
+
57
+ The remaining entries are the (non-)changes themselves, from line 1 to the end.
58
+
19
59
 
20
60
  ## LICENSE
21
61
 
data/cevennes.gemspec CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.platform = Gem::Platform::RUBY
11
11
  s.authors = [ 'John Mettraux' ]
12
12
  s.email = [ 'jmettraux@gmail.com' ]
13
- s.homepage = 'http://github.com/jmettraux/cevennes'
13
+ s.homepage = 'https://github.com/jmettraux/cevennes'
14
14
  s.license = 'MIT'
15
15
  s.summary = 'CSV diff library'
16
16
 
@@ -19,7 +19,13 @@ Diffs CSVs by lines, focusing on a single ID
19
19
  }.strip
20
20
 
21
21
  s.metadata = {
22
- 'changelog_uri' => s.homepage + '/blob/master/CHANGELOG.md'
22
+ 'changelog_uri' => s.homepage + '/blob/master/CHANGELOG.md',
23
+ 'documentation_uri' => s.homepage,
24
+ 'bug_tracker_uri' => s.homepage + '/issues',
25
+ #'mailing_list_uri' => 'https://groups.google.com/forum/#!forum/floraison',
26
+ 'homepage_uri' => s.homepage,
27
+ 'source_code_uri' => s.homepage,
28
+ #'wiki_uri' => s.homepage + '/wiki',
23
29
  }
24
30
 
25
31
  #s.files = `git ls-files`.split("\n")
data/lib/cevennes.rb CHANGED
@@ -1,17 +1,18 @@
1
+ # frozen_string_literal: true
1
2
 
2
3
  require 'csv'
3
4
 
4
5
 
5
6
  module Cevennes
6
7
 
7
- VERSION = '0.11.0'
8
+ VERSION = '1.1.1'
8
9
 
9
10
  class << self
10
11
 
11
- def diff(id, csv0, csv1)
12
+ def diff(id, csv0, csv1, opts={})
12
13
 
13
- h0 = hash(id, csv0)
14
- h1 = hash(id, csv1)
14
+ h0 = hash('old', id, csv0, opts)
15
+ h1 = hash('new', id, csv1, opts)
15
16
 
16
17
  ks0 = h0.delete(:keys)
17
18
  ks1 = h1.delete(:keys)
@@ -38,22 +39,39 @@ module Cevennes
38
39
  s = d.inject({}) { |h, (a, _, _)| h[a] = (h[a] || 0) + 1; h }
39
40
  s['l0'] = h0.length
40
41
  s['l1'] = h1.length
41
- #s['ll0'] = s['='] + s['!'] + s['-']
42
- #s['ll1'] = s['='] + s['!'] + s['+']
43
42
 
44
- [ [ 'keys', ks0, ks1 ], [ 'stats', s ] ] + d
43
+ [ [ 'keys', *ks0, *ks1 ], [ 'stats', s ] ] + d
45
44
  end
46
45
 
47
46
  protected
48
47
 
49
- def hash(id, csv)
48
+ def strip(row)
49
+
50
+ row.collect { |cell| cell.is_a?(String) ? cell.strip : cell }
51
+ end
52
+
53
+ DOWNCASE = lambda { |x| x.respond_to?(:downcase) ? x.downcase : x }
54
+ IDENTITY = lambda { |x| x }
55
+
56
+ def hash(version, id, csv, opts)
57
+
58
+ d = opts[:ignore_key_case] ? DOWNCASE : IDENTITY
59
+ did = d[id]
50
60
 
51
61
  csva = ::CSV.parse(reencode(csv))
52
- .each_with_index.collect { |row, i| [ 1 + i, row ] }
62
+ .each_with_index.collect { |row, i| [ 1 + i, strip(row) ] }
53
63
  .reject { |i, row| row.compact.empty? }
54
- .drop_while { |i, row| ! row.include?(id) }
64
+ .drop_while { |i, row| ! row.find { |cell| d[cell] == did } }
65
+
66
+ fail ::IndexError.new("id #{id.inspect} not found in #{version} CSV") \
67
+ if csva.empty?
55
68
 
56
- idi = csva[0][1].index(id)
69
+ csva[0][1] =
70
+ opts[:ignore_key_case] ?
71
+ csva[0][1].collect { |c| DOWNCASE[c] } :
72
+ csva[0][1]
73
+
74
+ idi = csva[0][1].index(did)
57
75
 
58
76
  csva[1..-1]
59
77
  .inject({ keys: csva[0] }) { |h, (i, row)|
@@ -68,16 +86,23 @@ module Cevennes
68
86
  # ::CSV.generate(encoding: 'UTF-8') { |csv| csv << row }.strip
69
87
  #end
70
88
 
89
+ ENCODINGS = %w[ Windows-1252 ISO-8859-1 UTF-8 ].freeze
90
+
71
91
  def reencode(s)
72
92
 
73
93
  #s = unzip(s) if s[0, 2] == 'PK'
74
94
  # no dependency on rubyzip
75
95
 
76
- %w[ Windows-1252 ISO-8859-1 UTF-8 ].each do |e|
77
- ss = s.force_encoding(e).encode('UTF-8') rescue nil
78
- break ss if ss
79
- nil
96
+ #return s if s.encoding == Encoding::UTF_8
97
+ # NO! have to force_encoding for UTF-8 as well!
98
+
99
+ s = s.dup if s.frozen?
100
+
101
+ ENCODINGS.each do |e|
102
+ (return s.force_encoding(e).encode('UTF-8')) rescue nil
80
103
  end
104
+
105
+ nil
81
106
  end
82
107
  end
83
108
  end
metadata CHANGED
@@ -1,22 +1,22 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cevennes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Mettraux
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-06 00:00:00.000000000 Z
11
+ date: 2021-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: rspec
15
14
  requirement: !ruby/object:Gem::Requirement
16
15
  requirements:
17
16
  - - "~>"
18
17
  - !ruby/object:Gem::Version
19
18
  version: '3.7'
19
+ name: rspec
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
@@ -38,12 +38,16 @@ files:
38
38
  - README.md
39
39
  - cevennes.gemspec
40
40
  - lib/cevennes.rb
41
- homepage: http://github.com/jmettraux/cevennes
41
+ homepage: https://github.com/jmettraux/cevennes
42
42
  licenses:
43
43
  - MIT
44
44
  metadata:
45
- changelog_uri: http://github.com/jmettraux/cevennes/blob/master/CHANGELOG.md
46
- post_install_message:
45
+ changelog_uri: https://github.com/jmettraux/cevennes/blob/master/CHANGELOG.md
46
+ documentation_uri: https://github.com/jmettraux/cevennes
47
+ bug_tracker_uri: https://github.com/jmettraux/cevennes/issues
48
+ homepage_uri: https://github.com/jmettraux/cevennes
49
+ source_code_uri: https://github.com/jmettraux/cevennes
50
+ post_install_message:
47
51
  rdoc_options: []
48
52
  require_paths:
49
53
  - lib
@@ -58,9 +62,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
58
62
  - !ruby/object:Gem::Version
59
63
  version: '0'
60
64
  requirements: []
61
- rubyforge_project:
62
- rubygems_version: 2.6.14.1
63
- signing_key:
65
+ rubygems_version: 3.0.6
66
+ signing_key:
64
67
  specification_version: 4
65
68
  summary: CSV diff library
66
69
  test_files: []