csv-diff 0.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/csv-diff/algorithm.rb +20 -3
- data/lib/csv-diff/csv_diff.rb +19 -10
- data/lib/csv-diff/csv_source.rb +14 -4
- metadata +10 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MzM3MmMzNzU5ZDE5ZTA5MGI4OGQxNTBkMTE0NWM1MzYzNDVkYTFjYQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MDQ3Yzk3ZDc4ZTZiNTMwMzc1NGMxMTU5ZTBkMzdjMTMyNzE5OTYwMg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YTYxMDM1MjUxNDk3ZjE3YWUxZWJjNGFmMzQzMDUyNGJlOGUzMmI2MDVlNjg4
|
10
|
+
OWJiYmQxNTI2MjBlMmQzNGFkZTk0ZGY1Y2I0ZDBkODljYjc4NDQ1ZDY1ODky
|
11
|
+
ZDU4MDUwZjVjYWU0MTE5NTExMmExYTM0NTY1ZGMzYzc5NDkxYmE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YzU4YWM2MmZjMjE4MjlhZDgxN2IxNmI1NmU2YjFiZTcwN2ZlNTZlYzE5MzZm
|
14
|
+
NGJkZDNkODUzNGNlNjA2NGZlZWIyMWFmMjljOTQyNjE3OGU4YWFmY2UyZWE2
|
15
|
+
YzhlZmI0YTcxMGY3N2Y2ODQ5NTM3ODcyMTUzOTJkODhjNjY0YWM=
|
data/lib/csv-diff/algorithm.rb
CHANGED
@@ -13,7 +13,20 @@ class CSVDiff
|
|
13
13
|
# that uniquely identify each row.
|
14
14
|
# @param diff_fields [Array] An array containing the names of the fields
|
15
15
|
# to be diff-ed.
|
16
|
+
# @param options [Hash] An options hash.
|
17
|
+
# @option options [Boolean] :ignore_adds If set to true, we ignore any
|
18
|
+
# new items that appear only in +right+.
|
19
|
+
# @option options [Boolean] :ignore_moves If set to true, we ignore any
|
20
|
+
# changes in sibling order.
|
21
|
+
# @option options [Boolean] :ignore_updates If set to true, we ignore any
|
22
|
+
# items that exist in both +left+ and +right+.
|
23
|
+
# @option options [Boolean] :ignore_deletes If set to true, we ignore any
|
24
|
+
# new items that appear only in +left+.
|
16
25
|
def diff_sources(left, right, key_fields, diff_fields, options = {})
|
26
|
+
unless left.case_sensitive? == right.case_sensitive?
|
27
|
+
raise ArgumentError, "Left and right must have same settings for case-sensitivity"
|
28
|
+
end
|
29
|
+
case_sensitive = left.case_sensitive?
|
17
30
|
left_index = left.index
|
18
31
|
left_values = left.lines
|
19
32
|
left_keys = left_values.keys
|
@@ -57,7 +70,7 @@ class CSVDiff
|
|
57
70
|
#puts "Move #{left_idx} -> #{right_idx}: #{key}"
|
58
71
|
end
|
59
72
|
end
|
60
|
-
if include_updates && (changes = diff_row(left_value, right_value, diff_fields))
|
73
|
+
if include_updates && (changes = diff_row(left_value, right_value, diff_fields, case_sensitive))
|
61
74
|
diffs[key].merge!(id.merge(changes.merge(:action => 'Update')))
|
62
75
|
#puts "Change: #{key}"
|
63
76
|
end
|
@@ -99,17 +112,21 @@ class CSVDiff
|
|
99
112
|
# file.
|
100
113
|
# @param right_row [Hash] The version of the CSV row from the right/to
|
101
114
|
# file.
|
115
|
+
# @param fields [Array<String>] An array of field names to compare.
|
116
|
+
# @param case_sensitive [Boolean] Whether field comparisons should be
|
117
|
+
# case sensitive or not.
|
102
118
|
# @return [Hash<String, Array>] A Hash whose keys are the fields that
|
103
119
|
# contain differences, and whose values are a two-element array of
|
104
120
|
# [left/from, right/to] values.
|
105
|
-
def diff_row(left_row, right_row, fields)
|
121
|
+
def diff_row(left_row, right_row, fields, case_sensitive)
|
106
122
|
diffs = {}
|
107
123
|
fields.each do |attr|
|
108
124
|
right_val = right_row[attr]
|
109
125
|
right_val = nil if right_val == ""
|
110
126
|
left_val = left_row[attr]
|
111
127
|
left_val = nil if left_val == ""
|
112
|
-
if left_val != right_val
|
128
|
+
if (case_sensitive && left_val != right_val) ||
|
129
|
+
(left_val.to_s.upcase != right_val.to_s.upcase)
|
113
130
|
diffs[attr] = [left_val, right_val]
|
114
131
|
#puts "#{attr}: #{left_val} -> #{right_val}"
|
115
132
|
end
|
data/lib/csv-diff/csv_diff.rb
CHANGED
@@ -28,13 +28,15 @@ class CSVDiff
|
|
28
28
|
# @return [Array<String>] An array of field names that are compared in the
|
29
29
|
# diff process.
|
30
30
|
attr_reader :diff_fields
|
31
|
-
# @return [Array<
|
32
|
-
#
|
31
|
+
# @return [Array<String>] An array of field namees of the key fields that
|
32
|
+
# uniquely identify each row.
|
33
33
|
attr_reader :key_fields
|
34
34
|
# @return [Array<String>] An array of field names for the parent field(s).
|
35
35
|
attr_reader :parent_fields
|
36
36
|
# @return [Array<String>] An array of field names for the child field(s).
|
37
37
|
attr_reader :child_fields
|
38
|
+
# @return [Hash] The options hash used for the diff.
|
39
|
+
attr_reader :options
|
38
40
|
|
39
41
|
|
40
42
|
# Generates a diff between two hierarchical tree structures, provided
|
@@ -84,7 +86,7 @@ class CSVDiff
|
|
84
86
|
@right = right.is_a?(CSVSource) ? right : CSVSource.new(right, options)
|
85
87
|
raise "No field names found in right (to) source" unless @right.field_names && @right.field_names.size > 0
|
86
88
|
@warnings = []
|
87
|
-
@diff_fields = get_diff_fields(@left.field_names, @right.field_names, options
|
89
|
+
@diff_fields = get_diff_fields(@left.field_names, @right.field_names, options)
|
88
90
|
@key_fields = @left.key_fields.map{ |kf| @diff_fields[kf] }
|
89
91
|
diff(options)
|
90
92
|
end
|
@@ -94,6 +96,7 @@ class CSVDiff
|
|
94
96
|
def diff(options = {})
|
95
97
|
@summary = nil
|
96
98
|
@diffs = diff_sources(@left, @right, @key_fields, @diff_fields, options)
|
99
|
+
@options = options
|
97
100
|
end
|
98
101
|
|
99
102
|
|
@@ -134,15 +137,21 @@ class CSVDiff
|
|
134
137
|
|
135
138
|
# Given two sets of field names, determines the common set of fields present
|
136
139
|
# in both, on which members can be diffed.
|
137
|
-
def get_diff_fields(left_fields, right_fields,
|
140
|
+
def get_diff_fields(left_fields, right_fields, options)
|
141
|
+
ignore_fields = (options[:ignore_fields] || []).map do |f|
|
142
|
+
f.is_a?(Fixnum) ? right_fields[f] : f
|
143
|
+
end
|
138
144
|
diff_fields = []
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
+
if options[:diff_common_fields_only]
|
146
|
+
right_fields.each_with_index do |fld, i|
|
147
|
+
if left_fields.include?(fld)
|
148
|
+
diff_fields << fld unless ignore_fields.include?(fld)
|
149
|
+
else
|
150
|
+
@warnings << "Field '#{fld}' is missing from the left (from) file, and won't be diffed"
|
151
|
+
end
|
145
152
|
end
|
153
|
+
else
|
154
|
+
diff_fields = (right_fields + left_fields).uniq.reject{ |fld| ignore_fields.include?(fld) }
|
146
155
|
end
|
147
156
|
diff_fields
|
148
157
|
end
|
data/lib/csv-diff/csv_source.rb
CHANGED
@@ -17,6 +17,11 @@ class CSVDiff
|
|
17
17
|
# @return [Array<String>] The names of the field(s) that distinguish a
|
18
18
|
# child of a parent record.
|
19
19
|
attr_reader :child_fields
|
20
|
+
# @return [Boolean] True if the source has been indexed with case-
|
21
|
+
# sensitive keys, or false if it has been indexed using upper-case key
|
22
|
+
# values.
|
23
|
+
attr_reader :case_sensitive
|
24
|
+
alias_method :case_sensitive?, :case_sensitive
|
20
25
|
# @return [Hash<String,Hash>] A hash containing each line of the source,
|
21
26
|
# keyed on the values of the +key_fields+.
|
22
27
|
attr_reader :lines
|
@@ -59,10 +64,13 @@ class CSVDiff
|
|
59
64
|
# identifies each row.
|
60
65
|
# @option options [Array<String>] :key_fields The names of the fields
|
61
66
|
# that uniquely identifies each row.
|
62
|
-
# @option options [String] :parent_field The name of the field that
|
63
|
-
#
|
64
|
-
# @option options [String] :child_field The name of the field that
|
65
|
-
# uniquely
|
67
|
+
# @option options [String] :parent_field The name of the field(s) that
|
68
|
+
# identify a parent within which sibling order should be checked.
|
69
|
+
# @option options [String] :child_field The name of the field(s) that
|
70
|
+
# uniquely identify a child of a parent.
|
71
|
+
# @option options [Boolean] :case_sensitive If true (the default), keys
|
72
|
+
# are indexed as-is; if false, the index is built in upper-case for
|
73
|
+
# case-insensitive comparisons.
|
66
74
|
def initialize(source, options = {})
|
67
75
|
if source.is_a?(String)
|
68
76
|
require 'csv'
|
@@ -104,6 +112,7 @@ class CSVDiff
|
|
104
112
|
@lines = {}
|
105
113
|
@index = Hash.new{ |h, k| h[k] = [] }
|
106
114
|
@key_fields = find_field_indexes(@key_fields, @field_names) if @field_names
|
115
|
+
@case_sensitive = options.fetch(:case_sensitive, true)
|
107
116
|
line_num = 0
|
108
117
|
lines.each do |row|
|
109
118
|
line_num += 1
|
@@ -121,6 +130,7 @@ class CSVDiff
|
|
121
130
|
key_values = @key_fields.map{ |kf| field_vals[kf].to_s.upcase }
|
122
131
|
key = key_values.join('~')
|
123
132
|
parent_key = key_values[0...(@parent_fields.length)].join('~')
|
133
|
+
parent_key.upcase! unless @case_sensitive
|
124
134
|
if @lines[key]
|
125
135
|
@warnings << "Duplicate key '#{key}' encountered and ignored at line #{line_num}"
|
126
136
|
else
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Gardiner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: ! " This library performs diffs of CSV files.\n\n Unlike
|
14
14
|
a standard diff that compares line by line, and is sensitive to the\n ordering
|
@@ -21,7 +21,14 @@ description: ! " This library performs diffs of CSV files.\n\n Unl
|
|
21
21
|
lend itself well to standard text diffs, as small changes\n in the organisation
|
22
22
|
of the tree at an upper level can lead to big movements\n in the position
|
23
23
|
of descendant records. By instead matching records by key,\n CSV-Diff avoids
|
24
|
-
this issue, while still being able to detect changes in\n sibling order.\n
|
24
|
+
this issue, while still being able to detect changes in\n sibling order.\n\n
|
25
|
+
\ This gem implements the core diff algorithm, and handles the loading and\n
|
26
|
+
\ diffing of CSV files. It returns a CSVDiff object, that contains the details\n
|
27
|
+
\ of differences in object form. This is useful for projects that need diff\n
|
28
|
+
\ capability, but want to handle the reporting of differences themselves.
|
29
|
+
For\n a pre-built diff reporting capability, see the csv-diff-report gem,
|
30
|
+
which\n provides a command-line tool for generating diff reports in HTML
|
31
|
+
or Excel\n format.\n"
|
25
32
|
email: adam.b.gardiner@gmail.com
|
26
33
|
executables: []
|
27
34
|
extensions: []
|