csv-diff 0.3.1 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +1 -1
- data/README.md +18 -1
- data/lib/csv-diff.rb +0 -0
- data/lib/csv-diff/algorithm.rb +116 -35
- data/lib/csv-diff/csv_diff.rb +1 -3
- data/lib/csv-diff/csv_source.rb +94 -6
- data/lib/csv_diff.rb +0 -0
- metadata +36 -29
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 877e2055af0b258a897c546c7d053d95f3ce3a75
|
4
|
+
data.tar.gz: 6c7f2c56091beff466c09d221206e3569ca2c912
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8074e3349c0017154222c243c8f995b4354a656cd913b1786b632103c5224ace09f8b75180e497f441c140ead9ae96f700fc8e918805cd880045b0fd335f1e58
|
7
|
+
data.tar.gz: 295ca04fb380e30b22520ea7c7f6d28fbe7eeb4324cf8c69f476c576572c1b99f1269101b2fe46aa9a2c19a5d9584a974fe0a7254e042961fc03750b1c8b9762
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -125,7 +125,7 @@ column in the data. In this case, a diff can be created simply via:
|
|
125
125
|
diff = CSVDiff.new(file1, file2)
|
126
126
|
```
|
127
127
|
|
128
|
-
###
|
128
|
+
### Specifying Unique Row Identifiers
|
129
129
|
|
130
130
|
Often however, rows are not uniquely identifiable via the first column in the file.
|
131
131
|
In a parent-child hierarchy, for example, combinations of parent and child may be
|
@@ -211,6 +211,23 @@ diff = CSVDiff.new(file1, file2, parent_field: 'Date', child_fields: ['HomeTeam'
|
|
211
211
|
ignore_fields: ['CreatedAt', 'UpdatedAt'])
|
212
212
|
```
|
213
213
|
|
214
|
+
### Filtering Rows
|
215
|
+
|
216
|
+
If you need to filter source data before running the diff process, you can use the :include
|
217
|
+
and :exclude options to do so. Both options take a Hash as their value; the hash should have
|
218
|
+
keys that are the field names or indexes (0-based) on which to filter, and whose values are
|
219
|
+
regular expressions or lambdas to be applied to values of the corresponding field. Rows will
|
220
|
+
only be diffed if they satisfy :include conditions, and do not satisfy :exclude conditions.
|
221
|
+
```ruby
|
222
|
+
# Generate a diff of Arsenal home games not refereed by Clattenburg
|
223
|
+
diff = CSVDiff.new(file1, file2, parent_field: 'Date', child_fields: ['HomeTeam', 'AwayTeam'],
|
224
|
+
include: {HomeTeam: 'Arsenal'}, exclude: {Referee: /Clattenburg/})
|
225
|
+
|
226
|
+
# Generate a diff of games played over the Xmas/New Year period
|
227
|
+
diff = CSVDiff.new(file1, file2, parent_field: 'Date', child_fields: ['HomeTeam', 'AwayTeam'],
|
228
|
+
include: {Date: lambda{ |d| holiday_period.include?(Date.strptime(d, '%y/%m/%d')) } })
|
229
|
+
```
|
230
|
+
|
214
231
|
### Ignoring Certain Changes
|
215
232
|
|
216
233
|
CSVDiff identifies Adds, Updates, Moves and Deletes; any of these changes can be selectively
|
data/lib/csv-diff.rb
CHANGED
File without changes
|
data/lib/csv-diff/algorithm.rb
CHANGED
@@ -3,6 +3,55 @@ class CSVDiff
|
|
3
3
|
# Implements the CSV diff algorithm.
|
4
4
|
module Algorithm
|
5
5
|
|
6
|
+
# Holds the details of a single difference
|
7
|
+
class Diff
|
8
|
+
|
9
|
+
attr_accessor :diff_type
|
10
|
+
attr_reader :fields
|
11
|
+
attr_reader :row
|
12
|
+
attr_reader :sibling_position
|
13
|
+
|
14
|
+
def initialize(diff_type, fields, row_idx, pos_idx)
|
15
|
+
@diff_type = diff_type
|
16
|
+
@fields = fields
|
17
|
+
@row = row_idx + 1
|
18
|
+
self.sibling_position = pos_idx
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
def sibling_position=(pos_idx)
|
23
|
+
if pos_idx.is_a?(Array)
|
24
|
+
pos_idx.compact!
|
25
|
+
if pos_idx.first != pos_idx.last
|
26
|
+
@sibling_position = pos_idx.map{ |pos| pos + 1 }
|
27
|
+
else
|
28
|
+
@sibling_position = pos_idx.first + 1
|
29
|
+
end
|
30
|
+
else
|
31
|
+
@sibling_position = pos_idx + 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# For backwards compatibility and access to fields with differences
|
37
|
+
def [](key)
|
38
|
+
case key
|
39
|
+
when String
|
40
|
+
@fields[key]
|
41
|
+
when :action
|
42
|
+
a = diff_type.to_s
|
43
|
+
a[0] = a[0].upcase
|
44
|
+
a
|
45
|
+
when :row
|
46
|
+
@row
|
47
|
+
when :sibling_position
|
48
|
+
@sibling_position
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
|
6
55
|
# Diffs two CSVSource structures.
|
7
56
|
#
|
8
57
|
# @param left [CSVSource] A CSVSource object containing the contents of
|
@@ -22,43 +71,50 @@ class CSVDiff
|
|
22
71
|
# items that exist in both +left+ and +right+.
|
23
72
|
# @option options [Boolean] :ignore_deletes If set to true, we ignore any
|
24
73
|
# new items that appear only in +left+.
|
74
|
+
# @option options [Hash<Object,Proc>] :equality_procs A Hash mapping fields
|
75
|
+
# to a 2-arg Proc that should be used to compare values in that field for
|
76
|
+
# equality.
|
25
77
|
def diff_sources(left, right, key_fields, diff_fields, options = {})
|
26
78
|
unless left.case_sensitive? == right.case_sensitive?
|
27
79
|
raise ArgumentError, "Left and right must have same settings for case-sensitivity"
|
28
80
|
end
|
29
|
-
|
81
|
+
|
82
|
+
# Ensure key fields are not also in the diff_fields
|
83
|
+
diff_fields = diff_fields - key_fields
|
84
|
+
|
30
85
|
left_index = left.index
|
31
86
|
left_values = left.lines
|
32
87
|
left_keys = left_values.keys
|
33
88
|
right_index = right.index
|
34
89
|
right_values = right.lines
|
35
90
|
right_keys = right_values.keys
|
36
|
-
|
91
|
+
parent_field_count = left.parent_fields.length
|
37
92
|
|
38
93
|
include_adds = !options[:ignore_adds]
|
39
94
|
include_moves = !options[:ignore_moves]
|
40
95
|
include_updates = !options[:ignore_updates]
|
41
96
|
include_deletes = !options[:ignore_deletes]
|
42
97
|
|
43
|
-
|
98
|
+
@case_sensitive = left.case_sensitive?
|
99
|
+
@equality_procs = options.fetch(:equality_procs, {})
|
100
|
+
|
101
|
+
diffs = {}
|
102
|
+
potential_moves = Hash.new{ |h, k| h[k] = [] }
|
44
103
|
|
45
104
|
# First identify deletions
|
46
105
|
if include_deletes
|
47
106
|
(left_keys - right_keys).each do |key|
|
48
107
|
# Delete
|
49
108
|
key_vals = key.split('~', -1)
|
50
|
-
parent = key_vals[0...
|
109
|
+
parent = key_vals[0...parent_field_count].join('~')
|
110
|
+
child = key_vals[parent_field_count..-1].join('~')
|
51
111
|
left_parent = left_index[parent]
|
52
112
|
left_value = left_values[key]
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
key_fields.each do |field_name|
|
59
|
-
id[field_name] = left_value[field_name]
|
60
|
-
end
|
61
|
-
diffs[key].merge!(id.merge(left_values[key].merge(:action => 'Delete')))
|
113
|
+
row_idx = left_keys.index(key)
|
114
|
+
sib_idx = left_parent.index(key)
|
115
|
+
raise "Can't locate key #{key} in parent #{parent}" unless sib_idx
|
116
|
+
diffs[key] = Diff.new(:delete, left_value, row_idx, sib_idx)
|
117
|
+
potential_moves[child] << key
|
62
118
|
#puts "Delete: #{key}"
|
63
119
|
end
|
64
120
|
end
|
@@ -66,7 +122,7 @@ class CSVDiff
|
|
66
122
|
# Now identify adds/updates
|
67
123
|
right_keys.each_with_index do |key, right_row_id|
|
68
124
|
key_vals = key.split('~', -1)
|
69
|
-
parent = key_vals[0...
|
125
|
+
parent = key_vals[0...parent_field_count].join('~')
|
70
126
|
left_parent = left_index[parent]
|
71
127
|
right_parent = right_index[parent]
|
72
128
|
left_value = left_values[key]
|
@@ -74,13 +130,12 @@ class CSVDiff
|
|
74
130
|
left_idx = left_parent && left_parent.index(key)
|
75
131
|
right_idx = right_parent && right_parent.index(key)
|
76
132
|
|
77
|
-
id = {}
|
78
|
-
id[:row] = right_row_id + 1
|
79
|
-
id[:sibling_position] = right_idx + 1
|
80
|
-
key_fields.each do |field_name|
|
81
|
-
id[field_name] = right_value[field_name]
|
82
|
-
end
|
83
133
|
if left_idx && right_idx
|
134
|
+
if include_updates && (changes = diff_row(left_value, right_value, diff_fields))
|
135
|
+
id = id_fields(key_fields, right_value)
|
136
|
+
diffs[key] = Diff.new(:update, id.merge!(changes), right_row_id, right_idx)
|
137
|
+
#puts "Change: #{key}"
|
138
|
+
end
|
84
139
|
if include_moves
|
85
140
|
left_common = left_parent & right_parent
|
86
141
|
right_common = right_parent & left_parent
|
@@ -88,19 +143,31 @@ class CSVDiff
|
|
88
143
|
right_pos = right_common.index(key)
|
89
144
|
if left_pos != right_pos
|
90
145
|
# Move
|
91
|
-
diffs[key]
|
92
|
-
|
146
|
+
if d = diffs[key]
|
147
|
+
d.sibling_position = [left_idx, right_idx]
|
148
|
+
else
|
149
|
+
id = id_fields(key_fields, right_value)
|
150
|
+
diffs[key] = Diff.new(:move, id, right_row_id, [left_idx, right_idx])
|
151
|
+
end
|
93
152
|
#puts "Move #{left_idx} -> #{right_idx}: #{key}"
|
94
153
|
end
|
95
154
|
end
|
96
|
-
|
97
|
-
diffs[key].merge!(id.merge(changes.merge(:action => 'Update')))
|
98
|
-
#puts "Change: #{key}"
|
99
|
-
end
|
100
|
-
elsif include_adds && right_idx
|
155
|
+
elsif right_idx
|
101
156
|
# Add
|
102
|
-
|
103
|
-
|
157
|
+
child = key_vals[parent_field_count..-1].join('~')
|
158
|
+
if potential_moves.has_key?(child) && old_key = potential_moves[child].pop
|
159
|
+
diffs.delete(old_key)
|
160
|
+
if include_updates
|
161
|
+
left_value = left_values[old_key]
|
162
|
+
id = id_fields(right.child_fields, right_value)
|
163
|
+
changes = diff_row(left_value, right_value, left.parent_fields + diff_fields)
|
164
|
+
diffs[key] = Diff.new(:update, id.merge!(changes), right_row_id, right_idx)
|
165
|
+
#puts "Update Parent: #{key}"
|
166
|
+
end
|
167
|
+
elsif include_adds
|
168
|
+
diffs[key] = Diff.new(:add, right_value, right_row_id, right_idx)
|
169
|
+
#puts "Add: #{key}"
|
170
|
+
end
|
104
171
|
end
|
105
172
|
end
|
106
173
|
|
@@ -116,27 +183,41 @@ class CSVDiff
|
|
116
183
|
# @param right_row [Hash] The version of the CSV row from the right/to
|
117
184
|
# file.
|
118
185
|
# @param fields [Array<String>] An array of field names to compare.
|
119
|
-
# @param case_sensitive [Boolean] Whether field comparisons should be
|
120
|
-
# case sensitive or not.
|
121
186
|
# @return [Hash<String, Array>] A Hash whose keys are the fields that
|
122
187
|
# contain differences, and whose values are a two-element array of
|
123
188
|
# [left/from, right/to] values.
|
124
|
-
def diff_row(left_row, right_row, fields
|
189
|
+
def diff_row(left_row, right_row, fields)
|
125
190
|
diffs = {}
|
126
191
|
fields.each do |attr|
|
192
|
+
eq_proc = @equality_procs[attr]
|
127
193
|
right_val = right_row[attr]
|
128
194
|
right_val = nil if right_val == ""
|
129
195
|
left_val = left_row[attr]
|
130
196
|
left_val = nil if left_val == ""
|
131
|
-
if
|
132
|
-
|
197
|
+
if eq_proc
|
198
|
+
diffs[attr] = [left_val, right_val] unless eq_proc.call(left_val, right_val)
|
199
|
+
elsif @case_sensitive
|
200
|
+
diffs[attr] = [left_val, right_val] unless left_val == right_val
|
201
|
+
elsif (left_val.to_s.upcase != right_val.to_s.upcase)
|
133
202
|
diffs[attr] = [left_val, right_val]
|
134
|
-
#puts "#{attr}: #{left_val} -> #{right_val}"
|
135
203
|
end
|
136
204
|
end
|
137
205
|
diffs if diffs.size > 0
|
138
206
|
end
|
139
207
|
|
208
|
+
|
209
|
+
private
|
210
|
+
|
211
|
+
|
212
|
+
# Return a hash containing just the key field values
|
213
|
+
def id_fields(key_fields, fields)
|
214
|
+
id = {}
|
215
|
+
key_fields.each do |field_name|
|
216
|
+
id[field_name] = fields[field_name]
|
217
|
+
end
|
218
|
+
id
|
219
|
+
end
|
220
|
+
|
140
221
|
end
|
141
222
|
|
142
223
|
end
|
data/lib/csv-diff/csv_diff.rb
CHANGED
@@ -87,7 +87,7 @@ class CSVDiff
|
|
87
87
|
raise "No field names found in right (to) source" unless @right.field_names && @right.field_names.size > 0
|
88
88
|
@warnings = []
|
89
89
|
@diff_fields = get_diff_fields(@left.field_names, @right.field_names, options)
|
90
|
-
@key_fields = @left.key_fields
|
90
|
+
@key_fields = @left.key_fields
|
91
91
|
diff(options)
|
92
92
|
end
|
93
93
|
|
@@ -148,8 +148,6 @@ class CSVDiff
|
|
148
148
|
right_fields.each_with_index do |fld, i|
|
149
149
|
if left_fields.include?(fld)
|
150
150
|
diff_fields << fld unless ignore_fields.include?(fld.upcase)
|
151
|
-
else
|
152
|
-
@warnings << "Field '#{fld}' is missing from the left (from) file, and won't be diffed"
|
153
151
|
end
|
154
152
|
end
|
155
153
|
else
|
data/lib/csv-diff/csv_source.rb
CHANGED
@@ -6,6 +6,7 @@ class CSVDiff
|
|
6
6
|
|
7
7
|
# @return [String] the path to the source file
|
8
8
|
attr_accessor :path
|
9
|
+
|
9
10
|
# @return [Array<String>] The names of the fields in the source file
|
10
11
|
attr_reader :field_names
|
11
12
|
# @return [Array<String>] The names of the field(s) that uniquely
|
@@ -17,6 +18,17 @@ class CSVDiff
|
|
17
18
|
# @return [Array<String>] The names of the field(s) that distinguish a
|
18
19
|
# child of a parent record.
|
19
20
|
attr_reader :child_fields
|
21
|
+
|
22
|
+
# @return [Array<Fixnum>] The indexes of the key fields in the source
|
23
|
+
# file.
|
24
|
+
attr_reader :key_field_indexes
|
25
|
+
# @return [Array<Fixnum>] The indexes of the parent fields in the source
|
26
|
+
# file.
|
27
|
+
attr_reader :parent_field_indexes
|
28
|
+
# @return [Array<Fixnum>] The indexes of the child fields in the source
|
29
|
+
# file.
|
30
|
+
attr_reader :child_field_indexes
|
31
|
+
|
20
32
|
# @return [Boolean] True if the source has been indexed with case-
|
21
33
|
# sensitive keys, or false if it has been indexed using upper-case key
|
22
34
|
# values.
|
@@ -34,6 +46,12 @@ class CSVDiff
|
|
34
46
|
# @return [Array<String>] An array of any warnings encountered while
|
35
47
|
# processing the source.
|
36
48
|
attr_reader :warnings
|
49
|
+
# @return [Fixnum] A count of the lines processed from this source.
|
50
|
+
# Excludes any header and duplicate records identified during indexing.
|
51
|
+
attr_reader :line_count
|
52
|
+
# @return [Fixnum] A count of the lines from this source that were skipped,
|
53
|
+
# due either to duplicate keys or filter conditions.
|
54
|
+
attr_reader :skip_count
|
37
55
|
|
38
56
|
|
39
57
|
# Creates a new diff source.
|
@@ -72,8 +90,14 @@ class CSVDiff
|
|
72
90
|
# @option options [String] :child_field The name of the field(s) that
|
73
91
|
# uniquely identify a child of a parent.
|
74
92
|
# @option options [Boolean] :case_sensitive If true (the default), keys
|
75
|
-
#
|
76
|
-
#
|
93
|
+
# are indexed as-is; if false, the index is built in upper-case for
|
94
|
+
# case-insensitive comparisons.
|
95
|
+
# @option options [Hash] :include A hash of field name(s) or index(es) to
|
96
|
+
# regular expression(s). Only source rows whose field values satisfy the
|
97
|
+
# regular expressions will be indexed and included in the diff process.
|
98
|
+
# @option options [Hash] :exclude A hash of field name(s) or index(es) to
|
99
|
+
# regular expression(s). Source rows with a field value that satisfies
|
100
|
+
# the regular expressions will be excluded from the diff process.
|
77
101
|
def initialize(source, options = {})
|
78
102
|
if source.is_a?(String)
|
79
103
|
require 'csv'
|
@@ -81,8 +105,12 @@ class CSVDiff
|
|
81
105
|
csv_options = options.fetch(:csv_options, {})
|
82
106
|
@path = source
|
83
107
|
source = CSV.open(@path, mode_string, csv_options).readlines
|
108
|
+
elsif !source.is_a?(Enumerable) || (source.is_a?(Enumerable) && source.size > 0 &&
|
109
|
+
!source.first.is_a?(Enumerable))
|
110
|
+
raise ArgumentError, "source must be a path to a file or an Enumerable<Enumerable>"
|
84
111
|
end
|
85
|
-
if
|
112
|
+
if (options.keys & [:parent_field, :parent_fields, :child_field, :child_fields]).empty? &&
|
113
|
+
(kf = options.fetch(:key_field, options[:key_fields]))
|
86
114
|
@key_fields = [kf].flatten
|
87
115
|
@parent_fields = @key_fields[0...-1]
|
88
116
|
@child_fields = @key_fields[-1..-1]
|
@@ -114,38 +142,69 @@ class CSVDiff
|
|
114
142
|
def index_source(lines, options)
|
115
143
|
@lines = {}
|
116
144
|
@index = Hash.new{ |h, k| h[k] = [] }
|
117
|
-
|
145
|
+
if @field_names
|
146
|
+
index_fields(options)
|
147
|
+
end
|
118
148
|
@case_sensitive = options.fetch(:case_sensitive, true)
|
119
149
|
@trim_whitespace = options.fetch(:trim_whitespace, false)
|
150
|
+
@line_count = 0
|
151
|
+
@skip_count = 0
|
120
152
|
line_num = 0
|
121
153
|
lines.each do |row|
|
122
154
|
line_num += 1
|
123
155
|
next if line_num == 1 && @field_names && options[:ignore_header]
|
124
156
|
unless @field_names
|
125
157
|
@field_names = row
|
126
|
-
|
158
|
+
index_fields(options)
|
127
159
|
next
|
128
160
|
end
|
129
161
|
field_vals = row
|
130
162
|
line = {}
|
163
|
+
filter = false
|
131
164
|
@field_names.each_with_index do |field, i|
|
132
165
|
line[field] = field_vals[i]
|
133
166
|
line[field].strip! if @trim_whitespace && line[field]
|
167
|
+
if @include_filter && f = @include_filter[i]
|
168
|
+
filter = !check_filter(f, line[field])
|
169
|
+
end
|
170
|
+
if @exclude_filter && f = @exclude_filter[i]
|
171
|
+
filter = check_filter(f, line[field])
|
172
|
+
end
|
173
|
+
break if filter
|
174
|
+
end
|
175
|
+
if filter
|
176
|
+
@skip_count += 1
|
177
|
+
next
|
134
178
|
end
|
135
|
-
key_values = @
|
179
|
+
key_values = @key_field_indexes.map{ |kf| field_vals[kf].to_s.upcase }
|
136
180
|
key = key_values.join('~')
|
137
181
|
parent_key = key_values[0...(@parent_fields.length)].join('~')
|
138
182
|
parent_key.upcase! unless @case_sensitive
|
139
183
|
if @lines[key]
|
140
184
|
@warnings << "Duplicate key '#{key}' encountered and ignored at line #{line_num}"
|
185
|
+
@skip_count += 1
|
141
186
|
else
|
142
187
|
@index[parent_key] << key
|
143
188
|
@lines[key] = line
|
189
|
+
@line_count += 1
|
144
190
|
end
|
145
191
|
end
|
146
192
|
end
|
147
193
|
|
148
194
|
|
195
|
+
def index_fields(options)
|
196
|
+
@key_field_indexes = find_field_indexes(@key_fields, @field_names)
|
197
|
+
@parent_field_indexes = find_field_indexes(@parent_fields, @field_names)
|
198
|
+
@child_field_indexes = find_field_indexes(@child_fields, @field_names)
|
199
|
+
@key_fields = @key_field_indexes.map{ |i| @field_names[i] }
|
200
|
+
@parent_fields = @parent_field_indexes.map{ |i| @field_names[i] }
|
201
|
+
@child_fields = @child_field_indexes.map{ |i| @field_names[i] }
|
202
|
+
|
203
|
+
@include_filter = convert_filter(options, :include, @field_names)
|
204
|
+
@exclude_filter = convert_filter(options, :exclude, @field_names)
|
205
|
+
end
|
206
|
+
|
207
|
+
|
149
208
|
# Converts an array of field names to an array of indexes of the fields
|
150
209
|
# matching those names.
|
151
210
|
def find_field_indexes(key_fields, field_names)
|
@@ -160,6 +219,35 @@ class CSVDiff
|
|
160
219
|
end
|
161
220
|
end
|
162
221
|
|
222
|
+
|
223
|
+
def convert_filter(options, key, field_names)
|
224
|
+
return unless hsh = options[key]
|
225
|
+
if !hsh.is_a?(Hash)
|
226
|
+
raise ArgumentError, ":#{key} option must be a Hash of field name(s)/index(es) to RegExp(s)"
|
227
|
+
end
|
228
|
+
keys = hsh.keys
|
229
|
+
idxs = find_field_indexes(keys, @field_names)
|
230
|
+
Hash[keys.each_with_index.map{ |k, i| [idxs[i], hsh[k]] }]
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
def check_filter(filter, field_val)
|
235
|
+
case filter
|
236
|
+
when String
|
237
|
+
if @case_sensitive
|
238
|
+
filter == field_val
|
239
|
+
else
|
240
|
+
filter.downcase == field_val.to_s.downcase
|
241
|
+
end
|
242
|
+
when Regexp
|
243
|
+
filter.match(field_val)
|
244
|
+
when Proc
|
245
|
+
filter.call(field_val)
|
246
|
+
else
|
247
|
+
raise ArgumentError, "Unsupported filter expression: #{filter.inspect}"
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
163
251
|
end
|
164
252
|
|
165
253
|
end
|
data/lib/csv_diff.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,35 +1,42 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Adam Gardiner
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2017-05-17 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
|
-
description:
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
13
|
+
description: |2
|
14
|
+
This library performs diffs of CSV files (or table-like sources).
|
15
|
+
|
16
|
+
Unlike a standard diff that compares line by line, and is sensitive to the
|
17
|
+
ordering of records, CSV-Diff identifies common lines by key field(s), and
|
18
|
+
then compares the contents of the fields in each line.
|
19
|
+
|
20
|
+
Data may be supplied in the form of CSV files, or as an array of arrays. The
|
21
|
+
diff process provides a fine level of control over what to diff, and can
|
22
|
+
optionally ignore certain types of changes (e.g. changes in position).
|
23
|
+
|
24
|
+
CSV-Diff is particularly well suited to data in parent-child format. Parent-
|
25
|
+
child data does not lend itself well to standard text diffs, as small changes
|
26
|
+
in the organisation of the tree at an upper level can lead to big movements
|
27
|
+
in the position of descendant records. By instead matching records by key,
|
28
|
+
CSV-Diff avoids this issue, while still being able to detect changes in
|
29
|
+
sibling order.
|
30
|
+
|
31
|
+
This gem implements the core diff algorithm, and handles the loading and
|
32
|
+
diffing of CSV files (or Arrays of Arrays). It returns a CSVDiff object
|
33
|
+
containing the details of differences in object form. This is useful for
|
34
|
+
projects that need diff capability, but want to handle the reporting or
|
35
|
+
actioning of differences themselves.
|
36
|
+
|
37
|
+
For a pre-built diff reporting capability, see the csv-diff-report gem,
|
38
|
+
which provides a command-line tool for generating diff reports in HTML,
|
39
|
+
Excel, or text formats.
|
33
40
|
email: adam.b.gardiner@gmail.com
|
34
41
|
executables: []
|
35
42
|
extensions: []
|
@@ -43,27 +50,27 @@ files:
|
|
43
50
|
- lib/csv-diff.rb
|
44
51
|
- lib/csv_diff.rb
|
45
52
|
homepage: https://github.com/agardiner/csv-diff
|
46
|
-
licenses:
|
53
|
+
licenses:
|
54
|
+
- MIT
|
55
|
+
metadata: {}
|
47
56
|
post_install_message: For command-line tools and diff reports, 'gem install csv-diff-report'
|
48
57
|
rdoc_options: []
|
49
58
|
require_paths:
|
50
59
|
- lib
|
51
60
|
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
-
none: false
|
53
61
|
requirements:
|
54
|
-
- -
|
62
|
+
- - '>='
|
55
63
|
- !ruby/object:Gem::Version
|
56
64
|
version: '0'
|
57
65
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
-
none: false
|
59
66
|
requirements:
|
60
|
-
- -
|
67
|
+
- - '>='
|
61
68
|
- !ruby/object:Gem::Version
|
62
69
|
version: '0'
|
63
70
|
requirements: []
|
64
71
|
rubyforge_project:
|
65
|
-
rubygems_version:
|
72
|
+
rubygems_version: 2.0.14.1
|
66
73
|
signing_key:
|
67
|
-
specification_version:
|
74
|
+
specification_version: 4
|
68
75
|
summary: CSV Diff is a library for generating diffs from data in CSV format
|
69
76
|
test_files: []
|