csv-diff 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/csv-diff/algorithm.rb +24 -21
- data/lib/csv-diff/csv_diff.rb +7 -5
- data/lib/csv-diff/csv_source.rb +5 -0
- metadata +9 -7
- checksums.yaml +0 -15
data/lib/csv-diff/algorithm.rb
CHANGED
@@ -41,8 +41,31 @@ class CSVDiff
|
|
41
41
|
include_deletes = !options[:ignore_deletes]
|
42
42
|
|
43
43
|
diffs = Hash.new{ |h, k| h[k] = {} }
|
44
|
+
|
45
|
+
# First identify deletions
|
46
|
+
if include_deletes
|
47
|
+
(left_keys - right_keys).each do |key|
|
48
|
+
# Delete
|
49
|
+
key_vals = key.split('~', -1)
|
50
|
+
parent = key_vals[0...parent_fields].join('~')
|
51
|
+
left_parent = left_index[parent]
|
52
|
+
left_value = left_values[key]
|
53
|
+
left_idx = left_parent.index(key)
|
54
|
+
next unless left_idx
|
55
|
+
id = {}
|
56
|
+
id[:row] = left_keys.index(key) + 1
|
57
|
+
id[:sibling_position] = left_idx + 1
|
58
|
+
key_fields.each do |field_name|
|
59
|
+
id[field_name] = left_value[field_name]
|
60
|
+
end
|
61
|
+
diffs[key].merge!(id.merge(left_values[key].merge(:action => 'Delete')))
|
62
|
+
#puts "Delete: #{key}"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Now identify adds/updates
|
44
67
|
right_keys.each_with_index do |key, right_row_id|
|
45
|
-
key_vals = key.split('~')
|
68
|
+
key_vals = key.split('~', -1)
|
46
69
|
parent = key_vals[0...parent_fields].join('~')
|
47
70
|
left_parent = left_index[parent]
|
48
71
|
right_parent = right_index[parent]
|
@@ -81,26 +104,6 @@ class CSVDiff
|
|
81
104
|
end
|
82
105
|
end
|
83
106
|
|
84
|
-
# Now identify deletions
|
85
|
-
if include_deletes
|
86
|
-
(left_keys - right_keys).each do |key|
|
87
|
-
# Delete
|
88
|
-
key_vals = key.split('~')
|
89
|
-
parent = key_vals[0...parent_fields].join('~')
|
90
|
-
left_parent = left_index[parent]
|
91
|
-
left_value = left_values[key]
|
92
|
-
left_idx = left_parent.index(key)
|
93
|
-
next unless left_idx
|
94
|
-
id = {}
|
95
|
-
id[:row] = left_keys.index(key) + 1
|
96
|
-
id[:sibling_position] = left_idx + 1
|
97
|
-
key_fields.each do |field_name|
|
98
|
-
id[field_name] = left_value[field_name]
|
99
|
-
end
|
100
|
-
diffs[key].merge!(id.merge(left_values[key].merge(:action => 'Delete')))
|
101
|
-
#puts "Delete: #{key}"
|
102
|
-
end
|
103
|
-
end
|
104
107
|
diffs
|
105
108
|
end
|
106
109
|
|
data/lib/csv-diff/csv_diff.rb
CHANGED
@@ -95,8 +95,8 @@ class CSVDiff
|
|
95
95
|
# Performs a diff with the specified +options+.
|
96
96
|
def diff(options = {})
|
97
97
|
@summary = nil
|
98
|
-
@diffs = diff_sources(@left, @right, @key_fields, @diff_fields, options)
|
99
98
|
@options = options
|
99
|
+
@diffs = diff_sources(@left, @right, @key_fields, @diff_fields, options)
|
100
100
|
end
|
101
101
|
|
102
102
|
|
@@ -138,20 +138,22 @@ class CSVDiff
|
|
138
138
|
# Given two sets of field names, determines the common set of fields present
|
139
139
|
# in both, on which members can be diffed.
|
140
140
|
def get_diff_fields(left_fields, right_fields, options)
|
141
|
-
ignore_fields = (
|
142
|
-
|
141
|
+
ignore_fields = options.fetch(:ignore_fields, [])
|
142
|
+
ignore_fields = [ignore_fields] unless ignore_fields.is_a?(Array)
|
143
|
+
ignore_fields.map! do |f|
|
144
|
+
(f.is_a?(Fixnum) ? right_fields[f] : f).upcase
|
143
145
|
end
|
144
146
|
diff_fields = []
|
145
147
|
if options[:diff_common_fields_only]
|
146
148
|
right_fields.each_with_index do |fld, i|
|
147
149
|
if left_fields.include?(fld)
|
148
|
-
diff_fields << fld unless ignore_fields.include?(fld)
|
150
|
+
diff_fields << fld unless ignore_fields.include?(fld.upcase)
|
149
151
|
else
|
150
152
|
@warnings << "Field '#{fld}' is missing from the left (from) file, and won't be diffed"
|
151
153
|
end
|
152
154
|
end
|
153
155
|
else
|
154
|
-
diff_fields = (right_fields + left_fields).uniq.reject{ |fld| ignore_fields.include?(fld) }
|
156
|
+
diff_fields = (right_fields + left_fields).uniq.reject{ |fld| ignore_fields.include?(fld.upcase) }
|
155
157
|
end
|
156
158
|
diff_fields
|
157
159
|
end
|
data/lib/csv-diff/csv_source.rb
CHANGED
@@ -22,6 +22,9 @@ class CSVDiff
|
|
22
22
|
# values.
|
23
23
|
attr_reader :case_sensitive
|
24
24
|
alias_method :case_sensitive?, :case_sensitive
|
25
|
+
# @return [Boolean] True if leading/trailing whitespace should be stripped
|
26
|
+
# from fields
|
27
|
+
attr_reader :trim_whitespace
|
25
28
|
# @return [Hash<String,Hash>] A hash containing each line of the source,
|
26
29
|
# keyed on the values of the +key_fields+.
|
27
30
|
attr_reader :lines
|
@@ -113,6 +116,7 @@ class CSVDiff
|
|
113
116
|
@index = Hash.new{ |h, k| h[k] = [] }
|
114
117
|
@key_fields = find_field_indexes(@key_fields, @field_names) if @field_names
|
115
118
|
@case_sensitive = options.fetch(:case_sensitive, true)
|
119
|
+
@trim_whitespace = options.fetch(:trim_whitespace, false)
|
116
120
|
line_num = 0
|
117
121
|
lines.each do |row|
|
118
122
|
line_num += 1
|
@@ -126,6 +130,7 @@ class CSVDiff
|
|
126
130
|
line = {}
|
127
131
|
@field_names.each_with_index do |field, i|
|
128
132
|
line[field] = field_vals[i]
|
133
|
+
line[field].strip! if @trim_whitespace && line[field]
|
129
134
|
end
|
130
135
|
key_values = @key_fields.map{ |kf| field_vals[kf].to_s.upcase }
|
131
136
|
key = key_values.join('~')
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Adam Gardiner
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2016-01-26 00:00:00.000000000 Z
|
12
13
|
dependencies: []
|
13
14
|
description: ! " This library performs diffs of CSV files.\n\n Unlike
|
14
15
|
a standard diff that compares line by line, and is sensitive to the\n ordering
|
@@ -34,34 +35,35 @@ executables: []
|
|
34
35
|
extensions: []
|
35
36
|
extra_rdoc_files: []
|
36
37
|
files:
|
37
|
-
- LICENSE
|
38
38
|
- README.md
|
39
|
-
-
|
39
|
+
- LICENSE
|
40
40
|
- lib/csv-diff/algorithm.rb
|
41
41
|
- lib/csv-diff/csv_diff.rb
|
42
42
|
- lib/csv-diff/csv_source.rb
|
43
|
+
- lib/csv-diff.rb
|
43
44
|
- lib/csv_diff.rb
|
44
45
|
homepage: https://github.com/agardiner/csv-diff
|
45
46
|
licenses: []
|
46
|
-
metadata: {}
|
47
47
|
post_install_message: For command-line tools and diff reports, 'gem install csv-diff-report'
|
48
48
|
rdoc_options: []
|
49
49
|
require_paths:
|
50
50
|
- lib
|
51
51
|
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
52
53
|
requirements:
|
53
54
|
- - ! '>='
|
54
55
|
- !ruby/object:Gem::Version
|
55
56
|
version: '0'
|
56
57
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
57
59
|
requirements:
|
58
60
|
- - ! '>='
|
59
61
|
- !ruby/object:Gem::Version
|
60
62
|
version: '0'
|
61
63
|
requirements: []
|
62
64
|
rubyforge_project:
|
63
|
-
rubygems_version:
|
65
|
+
rubygems_version: 1.8.21
|
64
66
|
signing_key:
|
65
|
-
specification_version:
|
67
|
+
specification_version: 3
|
66
68
|
summary: CSV Diff is a library for generating diffs from data in CSV format
|
67
69
|
test_files: []
|
checksums.yaml
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
---
|
2
|
-
!binary "U0hBMQ==":
|
3
|
-
metadata.gz: !binary |-
|
4
|
-
MzM3MmMzNzU5ZDE5ZTA5MGI4OGQxNTBkMTE0NWM1MzYzNDVkYTFjYQ==
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
MDQ3Yzk3ZDc4ZTZiNTMwMzc1NGMxMTU5ZTBkMzdjMTMyNzE5OTYwMg==
|
7
|
-
SHA512:
|
8
|
-
metadata.gz: !binary |-
|
9
|
-
YTYxMDM1MjUxNDk3ZjE3YWUxZWJjNGFmMzQzMDUyNGJlOGUzMmI2MDVlNjg4
|
10
|
-
OWJiYmQxNTI2MjBlMmQzNGFkZTk0ZGY1Y2I0ZDBkODljYjc4NDQ1ZDY1ODky
|
11
|
-
ZDU4MDUwZjVjYWU0MTE5NTExMmExYTM0NTY1ZGMzYzc5NDkxYmE=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
YzU4YWM2MmZjMjE4MjlhZDgxN2IxNmI1NmU2YjFiZTcwN2ZlNTZlYzE5MzZm
|
14
|
-
NGJkZDNkODUzNGNlNjA2NGZlZWIyMWFmMjljOTQyNjE3OGU4YWFmY2UyZWE2
|
15
|
-
YzhlZmI0YTcxMGY3N2Y2ODQ5NTM3ODcyMTUzOTJkODhjNjY0YWM=
|