csv-diff 0.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/csv-diff/algorithm.rb +20 -3
- data/lib/csv-diff/csv_diff.rb +19 -10
- data/lib/csv-diff/csv_source.rb +14 -4
- metadata +10 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MzM3MmMzNzU5ZDE5ZTA5MGI4OGQxNTBkMTE0NWM1MzYzNDVkYTFjYQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MDQ3Yzk3ZDc4ZTZiNTMwMzc1NGMxMTU5ZTBkMzdjMTMyNzE5OTYwMg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YTYxMDM1MjUxNDk3ZjE3YWUxZWJjNGFmMzQzMDUyNGJlOGUzMmI2MDVlNjg4
|
10
|
+
OWJiYmQxNTI2MjBlMmQzNGFkZTk0ZGY1Y2I0ZDBkODljYjc4NDQ1ZDY1ODky
|
11
|
+
ZDU4MDUwZjVjYWU0MTE5NTExMmExYTM0NTY1ZGMzYzc5NDkxYmE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YzU4YWM2MmZjMjE4MjlhZDgxN2IxNmI1NmU2YjFiZTcwN2ZlNTZlYzE5MzZm
|
14
|
+
NGJkZDNkODUzNGNlNjA2NGZlZWIyMWFmMjljOTQyNjE3OGU4YWFmY2UyZWE2
|
15
|
+
YzhlZmI0YTcxMGY3N2Y2ODQ5NTM3ODcyMTUzOTJkODhjNjY0YWM=
|
data/lib/csv-diff/algorithm.rb
CHANGED
@@ -13,7 +13,20 @@ class CSVDiff
|
|
13
13
|
# that uniquely identify each row.
|
14
14
|
# @param diff_fields [Array] An array containing the names of the fields
|
15
15
|
# to be diff-ed.
|
16
|
+
# @param options [Hash] An options hash.
|
17
|
+
# @option options [Boolean] :ignore_adds If set to true, we ignore any
|
18
|
+
# new items that appear only in +right+.
|
19
|
+
# @option options [Boolean] :ignore_moves If set to true, we ignore any
|
20
|
+
# changes in sibling order.
|
21
|
+
# @option options [Boolean] :ignore_updates If set to true, we ignore any
|
22
|
+
# items that exist in both +left+ and +right+.
|
23
|
+
# @option options [Boolean] :ignore_deletes If set to true, we ignore any
|
24
|
+
# new items that appear only in +left+.
|
16
25
|
def diff_sources(left, right, key_fields, diff_fields, options = {})
|
26
|
+
unless left.case_sensitive? == right.case_sensitive?
|
27
|
+
raise ArgumentError, "Left and right must have same settings for case-sensitivity"
|
28
|
+
end
|
29
|
+
case_sensitive = left.case_sensitive?
|
17
30
|
left_index = left.index
|
18
31
|
left_values = left.lines
|
19
32
|
left_keys = left_values.keys
|
@@ -57,7 +70,7 @@ class CSVDiff
|
|
57
70
|
#puts "Move #{left_idx} -> #{right_idx}: #{key}"
|
58
71
|
end
|
59
72
|
end
|
60
|
-
if include_updates && (changes = diff_row(left_value, right_value, diff_fields))
|
73
|
+
if include_updates && (changes = diff_row(left_value, right_value, diff_fields, case_sensitive))
|
61
74
|
diffs[key].merge!(id.merge(changes.merge(:action => 'Update')))
|
62
75
|
#puts "Change: #{key}"
|
63
76
|
end
|
@@ -99,17 +112,21 @@ class CSVDiff
|
|
99
112
|
# file.
|
100
113
|
# @param right_row [Hash] The version of the CSV row from the right/to
|
101
114
|
# file.
|
115
|
+
# @param fields [Array<String>] An array of field names to compare.
|
116
|
+
# @param case_sensitive [Boolean] Whether field comparisons should be
|
117
|
+
# case sensitive or not.
|
102
118
|
# @return [Hash<String, Array>] A Hash whose keys are the fields that
|
103
119
|
# contain differences, and whose values are a two-element array of
|
104
120
|
# [left/from, right/to] values.
|
105
|
-
def diff_row(left_row, right_row, fields)
|
121
|
+
def diff_row(left_row, right_row, fields, case_sensitive)
|
106
122
|
diffs = {}
|
107
123
|
fields.each do |attr|
|
108
124
|
right_val = right_row[attr]
|
109
125
|
right_val = nil if right_val == ""
|
110
126
|
left_val = left_row[attr]
|
111
127
|
left_val = nil if left_val == ""
|
112
|
-
if left_val != right_val
|
128
|
+
if (case_sensitive && left_val != right_val) ||
|
129
|
+
(left_val.to_s.upcase != right_val.to_s.upcase)
|
113
130
|
diffs[attr] = [left_val, right_val]
|
114
131
|
#puts "#{attr}: #{left_val} -> #{right_val}"
|
115
132
|
end
|
data/lib/csv-diff/csv_diff.rb
CHANGED
@@ -28,13 +28,15 @@ class CSVDiff
|
|
28
28
|
# @return [Array<String>] An array of field names that are compared in the
|
29
29
|
# diff process.
|
30
30
|
attr_reader :diff_fields
|
31
|
-
# @return [Array<
|
32
|
-
#
|
31
|
+
# @return [Array<String>] An array of field namees of the key fields that
|
32
|
+
# uniquely identify each row.
|
33
33
|
attr_reader :key_fields
|
34
34
|
# @return [Array<String>] An array of field names for the parent field(s).
|
35
35
|
attr_reader :parent_fields
|
36
36
|
# @return [Array<String>] An array of field names for the child field(s).
|
37
37
|
attr_reader :child_fields
|
38
|
+
# @return [Hash] The options hash used for the diff.
|
39
|
+
attr_reader :options
|
38
40
|
|
39
41
|
|
40
42
|
# Generates a diff between two hierarchical tree structures, provided
|
@@ -84,7 +86,7 @@ class CSVDiff
|
|
84
86
|
@right = right.is_a?(CSVSource) ? right : CSVSource.new(right, options)
|
85
87
|
raise "No field names found in right (to) source" unless @right.field_names && @right.field_names.size > 0
|
86
88
|
@warnings = []
|
87
|
-
@diff_fields = get_diff_fields(@left.field_names, @right.field_names, options
|
89
|
+
@diff_fields = get_diff_fields(@left.field_names, @right.field_names, options)
|
88
90
|
@key_fields = @left.key_fields.map{ |kf| @diff_fields[kf] }
|
89
91
|
diff(options)
|
90
92
|
end
|
@@ -94,6 +96,7 @@ class CSVDiff
|
|
94
96
|
def diff(options = {})
|
95
97
|
@summary = nil
|
96
98
|
@diffs = diff_sources(@left, @right, @key_fields, @diff_fields, options)
|
99
|
+
@options = options
|
97
100
|
end
|
98
101
|
|
99
102
|
|
@@ -134,15 +137,21 @@ class CSVDiff
|
|
134
137
|
|
135
138
|
# Given two sets of field names, determines the common set of fields present
|
136
139
|
# in both, on which members can be diffed.
|
137
|
-
def get_diff_fields(left_fields, right_fields,
|
140
|
+
def get_diff_fields(left_fields, right_fields, options)
|
141
|
+
ignore_fields = (options[:ignore_fields] || []).map do |f|
|
142
|
+
f.is_a?(Fixnum) ? right_fields[f] : f
|
143
|
+
end
|
138
144
|
diff_fields = []
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
+
if options[:diff_common_fields_only]
|
146
|
+
right_fields.each_with_index do |fld, i|
|
147
|
+
if left_fields.include?(fld)
|
148
|
+
diff_fields << fld unless ignore_fields.include?(fld)
|
149
|
+
else
|
150
|
+
@warnings << "Field '#{fld}' is missing from the left (from) file, and won't be diffed"
|
151
|
+
end
|
145
152
|
end
|
153
|
+
else
|
154
|
+
diff_fields = (right_fields + left_fields).uniq.reject{ |fld| ignore_fields.include?(fld) }
|
146
155
|
end
|
147
156
|
diff_fields
|
148
157
|
end
|
data/lib/csv-diff/csv_source.rb
CHANGED
@@ -17,6 +17,11 @@ class CSVDiff
|
|
17
17
|
# @return [Array<String>] The names of the field(s) that distinguish a
|
18
18
|
# child of a parent record.
|
19
19
|
attr_reader :child_fields
|
20
|
+
# @return [Boolean] True if the source has been indexed with case-
|
21
|
+
# sensitive keys, or false if it has been indexed using upper-case key
|
22
|
+
# values.
|
23
|
+
attr_reader :case_sensitive
|
24
|
+
alias_method :case_sensitive?, :case_sensitive
|
20
25
|
# @return [Hash<String,Hash>] A hash containing each line of the source,
|
21
26
|
# keyed on the values of the +key_fields+.
|
22
27
|
attr_reader :lines
|
@@ -59,10 +64,13 @@ class CSVDiff
|
|
59
64
|
# identifies each row.
|
60
65
|
# @option options [Array<String>] :key_fields The names of the fields
|
61
66
|
# that uniquely identifies each row.
|
62
|
-
# @option options [String] :parent_field The name of the field that
|
63
|
-
#
|
64
|
-
# @option options [String] :child_field The name of the field that
|
65
|
-
# uniquely
|
67
|
+
# @option options [String] :parent_field The name of the field(s) that
|
68
|
+
# identify a parent within which sibling order should be checked.
|
69
|
+
# @option options [String] :child_field The name of the field(s) that
|
70
|
+
# uniquely identify a child of a parent.
|
71
|
+
# @option options [Boolean] :case_sensitive If true (the default), keys
|
72
|
+
# are indexed as-is; if false, the index is built in upper-case for
|
73
|
+
# case-insensitive comparisons.
|
66
74
|
def initialize(source, options = {})
|
67
75
|
if source.is_a?(String)
|
68
76
|
require 'csv'
|
@@ -104,6 +112,7 @@ class CSVDiff
|
|
104
112
|
@lines = {}
|
105
113
|
@index = Hash.new{ |h, k| h[k] = [] }
|
106
114
|
@key_fields = find_field_indexes(@key_fields, @field_names) if @field_names
|
115
|
+
@case_sensitive = options.fetch(:case_sensitive, true)
|
107
116
|
line_num = 0
|
108
117
|
lines.each do |row|
|
109
118
|
line_num += 1
|
@@ -121,6 +130,7 @@ class CSVDiff
|
|
121
130
|
key_values = @key_fields.map{ |kf| field_vals[kf].to_s.upcase }
|
122
131
|
key = key_values.join('~')
|
123
132
|
parent_key = key_values[0...(@parent_fields.length)].join('~')
|
133
|
+
parent_key.upcase! unless @case_sensitive
|
124
134
|
if @lines[key]
|
125
135
|
@warnings << "Duplicate key '#{key}' encountered and ignored at line #{line_num}"
|
126
136
|
else
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Gardiner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: ! " This library performs diffs of CSV files.\n\n Unlike
|
14
14
|
a standard diff that compares line by line, and is sensitive to the\n ordering
|
@@ -21,7 +21,14 @@ description: ! " This library performs diffs of CSV files.\n\n Unl
|
|
21
21
|
lend itself well to standard text diffs, as small changes\n in the organisation
|
22
22
|
of the tree at an upper level can lead to big movements\n in the position
|
23
23
|
of descendant records. By instead matching records by key,\n CSV-Diff avoids
|
24
|
-
this issue, while still being able to detect changes in\n sibling order.\n
|
24
|
+
this issue, while still being able to detect changes in\n sibling order.\n\n
|
25
|
+
\ This gem implements the core diff algorithm, and handles the loading and\n
|
26
|
+
\ diffing of CSV files. It returns a CSVDiff object, that contains the details\n
|
27
|
+
\ of differences in object form. This is useful for projects that need diff\n
|
28
|
+
\ capability, but want to handle the reporting of differences themselves.
|
29
|
+
For\n a pre-built diff reporting capability, see the csv-diff-report gem,
|
30
|
+
which\n provides a command-line tool for generating diff reports in HTML
|
31
|
+
or Excel\n format.\n"
|
25
32
|
email: adam.b.gardiner@gmail.com
|
26
33
|
executables: []
|
27
34
|
extensions: []
|