csv-diff 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +1 -1
- data/README.md +18 -1
- data/lib/csv-diff.rb +0 -0
- data/lib/csv-diff/algorithm.rb +116 -35
- data/lib/csv-diff/csv_diff.rb +1 -3
- data/lib/csv-diff/csv_source.rb +94 -6
- data/lib/csv_diff.rb +0 -0
- metadata +36 -29
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 877e2055af0b258a897c546c7d053d95f3ce3a75
|
4
|
+
data.tar.gz: 6c7f2c56091beff466c09d221206e3569ca2c912
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8074e3349c0017154222c243c8f995b4354a656cd913b1786b632103c5224ace09f8b75180e497f441c140ead9ae96f700fc8e918805cd880045b0fd335f1e58
|
7
|
+
data.tar.gz: 295ca04fb380e30b22520ea7c7f6d28fbe7eeb4324cf8c69f476c576572c1b99f1269101b2fe46aa9a2c19a5d9584a974fe0a7254e042961fc03750b1c8b9762
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -125,7 +125,7 @@ column in the data. In this case, a diff can be created simply via:
|
|
125
125
|
diff = CSVDiff.new(file1, file2)
|
126
126
|
```
|
127
127
|
|
128
|
-
###
|
128
|
+
### Specifying Unique Row Identifiers
|
129
129
|
|
130
130
|
Often however, rows are not uniquely identifiable via the first column in the file.
|
131
131
|
In a parent-child hierarchy, for example, combinations of parent and child may be
|
@@ -211,6 +211,23 @@ diff = CSVDiff.new(file1, file2, parent_field: 'Date', child_fields: ['HomeTeam'
|
|
211
211
|
ignore_fields: ['CreatedAt', 'UpdatedAt'])
|
212
212
|
```
|
213
213
|
|
214
|
+
### Filtering Rows
|
215
|
+
|
216
|
+
If you need to filter source data before running the diff process, you can use the :include
|
217
|
+
and :exclude options to do so. Both options take a Hash as their value; the hash should have
|
218
|
+
keys that are the field names or indexes (0-based) on which to filter, and whose values are
|
219
|
+
regular expressions or lambdas to be applied to values of the corresponding field. Rows will
|
220
|
+
only be diffed if they satisfy :include conditions, and do not satisfy :exclude conditions.
|
221
|
+
```ruby
|
222
|
+
# Generate a diff of Arsenal home games not refereed by Clattenburg
|
223
|
+
diff = CSVDiff.new(file1, file2, parent_field: 'Date', child_fields: ['HomeTeam', 'AwayTeam'],
|
224
|
+
include: {HomeTeam: 'Arsenal'}, exclude: {Referee: /Clattenburg/})
|
225
|
+
|
226
|
+
# Generate a diff of games played over the Xmas/New Year period
|
227
|
+
diff = CSVDiff.new(file1, file2, parent_field: 'Date', child_fields: ['HomeTeam', 'AwayTeam'],
|
228
|
+
include: {Date: lambda{ |d| holiday_period.include?(Date.strptime(d, '%y/%m/%d')) } })
|
229
|
+
```
|
230
|
+
|
214
231
|
### Ignoring Certain Changes
|
215
232
|
|
216
233
|
CSVDiff identifies Adds, Updates, Moves and Deletes; any of these changes can be selectively
|
data/lib/csv-diff.rb
CHANGED
File without changes
|
data/lib/csv-diff/algorithm.rb
CHANGED
@@ -3,6 +3,55 @@ class CSVDiff
|
|
3
3
|
# Implements the CSV diff algorithm.
|
4
4
|
module Algorithm
|
5
5
|
|
6
|
+
# Holds the details of a single difference
|
7
|
+
class Diff
|
8
|
+
|
9
|
+
attr_accessor :diff_type
|
10
|
+
attr_reader :fields
|
11
|
+
attr_reader :row
|
12
|
+
attr_reader :sibling_position
|
13
|
+
|
14
|
+
def initialize(diff_type, fields, row_idx, pos_idx)
|
15
|
+
@diff_type = diff_type
|
16
|
+
@fields = fields
|
17
|
+
@row = row_idx + 1
|
18
|
+
self.sibling_position = pos_idx
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
def sibling_position=(pos_idx)
|
23
|
+
if pos_idx.is_a?(Array)
|
24
|
+
pos_idx.compact!
|
25
|
+
if pos_idx.first != pos_idx.last
|
26
|
+
@sibling_position = pos_idx.map{ |pos| pos + 1 }
|
27
|
+
else
|
28
|
+
@sibling_position = pos_idx.first + 1
|
29
|
+
end
|
30
|
+
else
|
31
|
+
@sibling_position = pos_idx + 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# For backwards compatibility and access to fields with differences
|
37
|
+
def [](key)
|
38
|
+
case key
|
39
|
+
when String
|
40
|
+
@fields[key]
|
41
|
+
when :action
|
42
|
+
a = diff_type.to_s
|
43
|
+
a[0] = a[0].upcase
|
44
|
+
a
|
45
|
+
when :row
|
46
|
+
@row
|
47
|
+
when :sibling_position
|
48
|
+
@sibling_position
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
|
6
55
|
# Diffs two CSVSource structures.
|
7
56
|
#
|
8
57
|
# @param left [CSVSource] A CSVSource object containing the contents of
|
@@ -22,43 +71,50 @@ class CSVDiff
|
|
22
71
|
# items that exist in both +left+ and +right+.
|
23
72
|
# @option options [Boolean] :ignore_deletes If set to true, we ignore any
|
24
73
|
# new items that appear only in +left+.
|
74
|
+
# @option options [Hash<Object,Proc>] :equality_procs A Hash mapping fields
|
75
|
+
# to a 2-arg Proc that should be used to compare values in that field for
|
76
|
+
# equality.
|
25
77
|
def diff_sources(left, right, key_fields, diff_fields, options = {})
|
26
78
|
unless left.case_sensitive? == right.case_sensitive?
|
27
79
|
raise ArgumentError, "Left and right must have same settings for case-sensitivity"
|
28
80
|
end
|
29
|
-
|
81
|
+
|
82
|
+
# Ensure key fields are not also in the diff_fields
|
83
|
+
diff_fields = diff_fields - key_fields
|
84
|
+
|
30
85
|
left_index = left.index
|
31
86
|
left_values = left.lines
|
32
87
|
left_keys = left_values.keys
|
33
88
|
right_index = right.index
|
34
89
|
right_values = right.lines
|
35
90
|
right_keys = right_values.keys
|
36
|
-
|
91
|
+
parent_field_count = left.parent_fields.length
|
37
92
|
|
38
93
|
include_adds = !options[:ignore_adds]
|
39
94
|
include_moves = !options[:ignore_moves]
|
40
95
|
include_updates = !options[:ignore_updates]
|
41
96
|
include_deletes = !options[:ignore_deletes]
|
42
97
|
|
43
|
-
|
98
|
+
@case_sensitive = left.case_sensitive?
|
99
|
+
@equality_procs = options.fetch(:equality_procs, {})
|
100
|
+
|
101
|
+
diffs = {}
|
102
|
+
potential_moves = Hash.new{ |h, k| h[k] = [] }
|
44
103
|
|
45
104
|
# First identify deletions
|
46
105
|
if include_deletes
|
47
106
|
(left_keys - right_keys).each do |key|
|
48
107
|
# Delete
|
49
108
|
key_vals = key.split('~', -1)
|
50
|
-
parent = key_vals[0...
|
109
|
+
parent = key_vals[0...parent_field_count].join('~')
|
110
|
+
child = key_vals[parent_field_count..-1].join('~')
|
51
111
|
left_parent = left_index[parent]
|
52
112
|
left_value = left_values[key]
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
key_fields.each do |field_name|
|
59
|
-
id[field_name] = left_value[field_name]
|
60
|
-
end
|
61
|
-
diffs[key].merge!(id.merge(left_values[key].merge(:action => 'Delete')))
|
113
|
+
row_idx = left_keys.index(key)
|
114
|
+
sib_idx = left_parent.index(key)
|
115
|
+
raise "Can't locate key #{key} in parent #{parent}" unless sib_idx
|
116
|
+
diffs[key] = Diff.new(:delete, left_value, row_idx, sib_idx)
|
117
|
+
potential_moves[child] << key
|
62
118
|
#puts "Delete: #{key}"
|
63
119
|
end
|
64
120
|
end
|
@@ -66,7 +122,7 @@ class CSVDiff
|
|
66
122
|
# Now identify adds/updates
|
67
123
|
right_keys.each_with_index do |key, right_row_id|
|
68
124
|
key_vals = key.split('~', -1)
|
69
|
-
parent = key_vals[0...
|
125
|
+
parent = key_vals[0...parent_field_count].join('~')
|
70
126
|
left_parent = left_index[parent]
|
71
127
|
right_parent = right_index[parent]
|
72
128
|
left_value = left_values[key]
|
@@ -74,13 +130,12 @@ class CSVDiff
|
|
74
130
|
left_idx = left_parent && left_parent.index(key)
|
75
131
|
right_idx = right_parent && right_parent.index(key)
|
76
132
|
|
77
|
-
id = {}
|
78
|
-
id[:row] = right_row_id + 1
|
79
|
-
id[:sibling_position] = right_idx + 1
|
80
|
-
key_fields.each do |field_name|
|
81
|
-
id[field_name] = right_value[field_name]
|
82
|
-
end
|
83
133
|
if left_idx && right_idx
|
134
|
+
if include_updates && (changes = diff_row(left_value, right_value, diff_fields))
|
135
|
+
id = id_fields(key_fields, right_value)
|
136
|
+
diffs[key] = Diff.new(:update, id.merge!(changes), right_row_id, right_idx)
|
137
|
+
#puts "Change: #{key}"
|
138
|
+
end
|
84
139
|
if include_moves
|
85
140
|
left_common = left_parent & right_parent
|
86
141
|
right_common = right_parent & left_parent
|
@@ -88,19 +143,31 @@ class CSVDiff
|
|
88
143
|
right_pos = right_common.index(key)
|
89
144
|
if left_pos != right_pos
|
90
145
|
# Move
|
91
|
-
diffs[key]
|
92
|
-
|
146
|
+
if d = diffs[key]
|
147
|
+
d.sibling_position = [left_idx, right_idx]
|
148
|
+
else
|
149
|
+
id = id_fields(key_fields, right_value)
|
150
|
+
diffs[key] = Diff.new(:move, id, right_row_id, [left_idx, right_idx])
|
151
|
+
end
|
93
152
|
#puts "Move #{left_idx} -> #{right_idx}: #{key}"
|
94
153
|
end
|
95
154
|
end
|
96
|
-
|
97
|
-
diffs[key].merge!(id.merge(changes.merge(:action => 'Update')))
|
98
|
-
#puts "Change: #{key}"
|
99
|
-
end
|
100
|
-
elsif include_adds && right_idx
|
155
|
+
elsif right_idx
|
101
156
|
# Add
|
102
|
-
|
103
|
-
|
157
|
+
child = key_vals[parent_field_count..-1].join('~')
|
158
|
+
if potential_moves.has_key?(child) && old_key = potential_moves[child].pop
|
159
|
+
diffs.delete(old_key)
|
160
|
+
if include_updates
|
161
|
+
left_value = left_values[old_key]
|
162
|
+
id = id_fields(right.child_fields, right_value)
|
163
|
+
changes = diff_row(left_value, right_value, left.parent_fields + diff_fields)
|
164
|
+
diffs[key] = Diff.new(:update, id.merge!(changes), right_row_id, right_idx)
|
165
|
+
#puts "Update Parent: #{key}"
|
166
|
+
end
|
167
|
+
elsif include_adds
|
168
|
+
diffs[key] = Diff.new(:add, right_value, right_row_id, right_idx)
|
169
|
+
#puts "Add: #{key}"
|
170
|
+
end
|
104
171
|
end
|
105
172
|
end
|
106
173
|
|
@@ -116,27 +183,41 @@ class CSVDiff
|
|
116
183
|
# @param right_row [Hash] The version of the CSV row from the right/to
|
117
184
|
# file.
|
118
185
|
# @param fields [Array<String>] An array of field names to compare.
|
119
|
-
# @param case_sensitive [Boolean] Whether field comparisons should be
|
120
|
-
# case sensitive or not.
|
121
186
|
# @return [Hash<String, Array>] A Hash whose keys are the fields that
|
122
187
|
# contain differences, and whose values are a two-element array of
|
123
188
|
# [left/from, right/to] values.
|
124
|
-
def diff_row(left_row, right_row, fields
|
189
|
+
def diff_row(left_row, right_row, fields)
|
125
190
|
diffs = {}
|
126
191
|
fields.each do |attr|
|
192
|
+
eq_proc = @equality_procs[attr]
|
127
193
|
right_val = right_row[attr]
|
128
194
|
right_val = nil if right_val == ""
|
129
195
|
left_val = left_row[attr]
|
130
196
|
left_val = nil if left_val == ""
|
131
|
-
if
|
132
|
-
|
197
|
+
if eq_proc
|
198
|
+
diffs[attr] = [left_val, right_val] unless eq_proc.call(left_val, right_val)
|
199
|
+
elsif @case_sensitive
|
200
|
+
diffs[attr] = [left_val, right_val] unless left_val == right_val
|
201
|
+
elsif (left_val.to_s.upcase != right_val.to_s.upcase)
|
133
202
|
diffs[attr] = [left_val, right_val]
|
134
|
-
#puts "#{attr}: #{left_val} -> #{right_val}"
|
135
203
|
end
|
136
204
|
end
|
137
205
|
diffs if diffs.size > 0
|
138
206
|
end
|
139
207
|
|
208
|
+
|
209
|
+
private
|
210
|
+
|
211
|
+
|
212
|
+
# Return a hash containing just the key field values
|
213
|
+
def id_fields(key_fields, fields)
|
214
|
+
id = {}
|
215
|
+
key_fields.each do |field_name|
|
216
|
+
id[field_name] = fields[field_name]
|
217
|
+
end
|
218
|
+
id
|
219
|
+
end
|
220
|
+
|
140
221
|
end
|
141
222
|
|
142
223
|
end
|
data/lib/csv-diff/csv_diff.rb
CHANGED
@@ -87,7 +87,7 @@ class CSVDiff
|
|
87
87
|
raise "No field names found in right (to) source" unless @right.field_names && @right.field_names.size > 0
|
88
88
|
@warnings = []
|
89
89
|
@diff_fields = get_diff_fields(@left.field_names, @right.field_names, options)
|
90
|
-
@key_fields = @left.key_fields
|
90
|
+
@key_fields = @left.key_fields
|
91
91
|
diff(options)
|
92
92
|
end
|
93
93
|
|
@@ -148,8 +148,6 @@ class CSVDiff
|
|
148
148
|
right_fields.each_with_index do |fld, i|
|
149
149
|
if left_fields.include?(fld)
|
150
150
|
diff_fields << fld unless ignore_fields.include?(fld.upcase)
|
151
|
-
else
|
152
|
-
@warnings << "Field '#{fld}' is missing from the left (from) file, and won't be diffed"
|
153
151
|
end
|
154
152
|
end
|
155
153
|
else
|
data/lib/csv-diff/csv_source.rb
CHANGED
@@ -6,6 +6,7 @@ class CSVDiff
|
|
6
6
|
|
7
7
|
# @return [String] the path to the source file
|
8
8
|
attr_accessor :path
|
9
|
+
|
9
10
|
# @return [Array<String>] The names of the fields in the source file
|
10
11
|
attr_reader :field_names
|
11
12
|
# @return [Array<String>] The names of the field(s) that uniquely
|
@@ -17,6 +18,17 @@ class CSVDiff
|
|
17
18
|
# @return [Array<String>] The names of the field(s) that distinguish a
|
18
19
|
# child of a parent record.
|
19
20
|
attr_reader :child_fields
|
21
|
+
|
22
|
+
# @return [Array<Fixnum>] The indexes of the key fields in the source
|
23
|
+
# file.
|
24
|
+
attr_reader :key_field_indexes
|
25
|
+
# @return [Array<Fixnum>] The indexes of the parent fields in the source
|
26
|
+
# file.
|
27
|
+
attr_reader :parent_field_indexes
|
28
|
+
# @return [Array<Fixnum>] The indexes of the child fields in the source
|
29
|
+
# file.
|
30
|
+
attr_reader :child_field_indexes
|
31
|
+
|
20
32
|
# @return [Boolean] True if the source has been indexed with case-
|
21
33
|
# sensitive keys, or false if it has been indexed using upper-case key
|
22
34
|
# values.
|
@@ -34,6 +46,12 @@ class CSVDiff
|
|
34
46
|
# @return [Array<String>] An array of any warnings encountered while
|
35
47
|
# processing the source.
|
36
48
|
attr_reader :warnings
|
49
|
+
# @return [Fixnum] A count of the lines processed from this source.
|
50
|
+
# Excludes any header and duplicate records identified during indexing.
|
51
|
+
attr_reader :line_count
|
52
|
+
# @return [Fixnum] A count of the lines from this source that were skipped,
|
53
|
+
# due either to duplicate keys or filter conditions.
|
54
|
+
attr_reader :skip_count
|
37
55
|
|
38
56
|
|
39
57
|
# Creates a new diff source.
|
@@ -72,8 +90,14 @@ class CSVDiff
|
|
72
90
|
# @option options [String] :child_field The name of the field(s) that
|
73
91
|
# uniquely identify a child of a parent.
|
74
92
|
# @option options [Boolean] :case_sensitive If true (the default), keys
|
75
|
-
#
|
76
|
-
#
|
93
|
+
# are indexed as-is; if false, the index is built in upper-case for
|
94
|
+
# case-insensitive comparisons.
|
95
|
+
# @option options [Hash] :include A hash of field name(s) or index(es) to
|
96
|
+
# regular expression(s). Only source rows whose field values satisfy the
|
97
|
+
# regular expressions will be indexed and included in the diff process.
|
98
|
+
# @option options [Hash] :exclude A hash of field name(s) or index(es) to
|
99
|
+
# regular expression(s). Source rows with a field value that satisfies
|
100
|
+
# the regular expressions will be excluded from the diff process.
|
77
101
|
def initialize(source, options = {})
|
78
102
|
if source.is_a?(String)
|
79
103
|
require 'csv'
|
@@ -81,8 +105,12 @@ class CSVDiff
|
|
81
105
|
csv_options = options.fetch(:csv_options, {})
|
82
106
|
@path = source
|
83
107
|
source = CSV.open(@path, mode_string, csv_options).readlines
|
108
|
+
elsif !source.is_a?(Enumerable) || (source.is_a?(Enumerable) && source.size > 0 &&
|
109
|
+
!source.first.is_a?(Enumerable))
|
110
|
+
raise ArgumentError, "source must be a path to a file or an Enumerable<Enumerable>"
|
84
111
|
end
|
85
|
-
if
|
112
|
+
if (options.keys & [:parent_field, :parent_fields, :child_field, :child_fields]).empty? &&
|
113
|
+
(kf = options.fetch(:key_field, options[:key_fields]))
|
86
114
|
@key_fields = [kf].flatten
|
87
115
|
@parent_fields = @key_fields[0...-1]
|
88
116
|
@child_fields = @key_fields[-1..-1]
|
@@ -114,38 +142,69 @@ class CSVDiff
|
|
114
142
|
def index_source(lines, options)
|
115
143
|
@lines = {}
|
116
144
|
@index = Hash.new{ |h, k| h[k] = [] }
|
117
|
-
|
145
|
+
if @field_names
|
146
|
+
index_fields(options)
|
147
|
+
end
|
118
148
|
@case_sensitive = options.fetch(:case_sensitive, true)
|
119
149
|
@trim_whitespace = options.fetch(:trim_whitespace, false)
|
150
|
+
@line_count = 0
|
151
|
+
@skip_count = 0
|
120
152
|
line_num = 0
|
121
153
|
lines.each do |row|
|
122
154
|
line_num += 1
|
123
155
|
next if line_num == 1 && @field_names && options[:ignore_header]
|
124
156
|
unless @field_names
|
125
157
|
@field_names = row
|
126
|
-
|
158
|
+
index_fields(options)
|
127
159
|
next
|
128
160
|
end
|
129
161
|
field_vals = row
|
130
162
|
line = {}
|
163
|
+
filter = false
|
131
164
|
@field_names.each_with_index do |field, i|
|
132
165
|
line[field] = field_vals[i]
|
133
166
|
line[field].strip! if @trim_whitespace && line[field]
|
167
|
+
if @include_filter && f = @include_filter[i]
|
168
|
+
filter = !check_filter(f, line[field])
|
169
|
+
end
|
170
|
+
if @exclude_filter && f = @exclude_filter[i]
|
171
|
+
filter = check_filter(f, line[field])
|
172
|
+
end
|
173
|
+
break if filter
|
174
|
+
end
|
175
|
+
if filter
|
176
|
+
@skip_count += 1
|
177
|
+
next
|
134
178
|
end
|
135
|
-
key_values = @
|
179
|
+
key_values = @key_field_indexes.map{ |kf| field_vals[kf].to_s.upcase }
|
136
180
|
key = key_values.join('~')
|
137
181
|
parent_key = key_values[0...(@parent_fields.length)].join('~')
|
138
182
|
parent_key.upcase! unless @case_sensitive
|
139
183
|
if @lines[key]
|
140
184
|
@warnings << "Duplicate key '#{key}' encountered and ignored at line #{line_num}"
|
185
|
+
@skip_count += 1
|
141
186
|
else
|
142
187
|
@index[parent_key] << key
|
143
188
|
@lines[key] = line
|
189
|
+
@line_count += 1
|
144
190
|
end
|
145
191
|
end
|
146
192
|
end
|
147
193
|
|
148
194
|
|
195
|
+
def index_fields(options)
|
196
|
+
@key_field_indexes = find_field_indexes(@key_fields, @field_names)
|
197
|
+
@parent_field_indexes = find_field_indexes(@parent_fields, @field_names)
|
198
|
+
@child_field_indexes = find_field_indexes(@child_fields, @field_names)
|
199
|
+
@key_fields = @key_field_indexes.map{ |i| @field_names[i] }
|
200
|
+
@parent_fields = @parent_field_indexes.map{ |i| @field_names[i] }
|
201
|
+
@child_fields = @child_field_indexes.map{ |i| @field_names[i] }
|
202
|
+
|
203
|
+
@include_filter = convert_filter(options, :include, @field_names)
|
204
|
+
@exclude_filter = convert_filter(options, :exclude, @field_names)
|
205
|
+
end
|
206
|
+
|
207
|
+
|
149
208
|
# Converts an array of field names to an array of indexes of the fields
|
150
209
|
# matching those names.
|
151
210
|
def find_field_indexes(key_fields, field_names)
|
@@ -160,6 +219,35 @@ class CSVDiff
|
|
160
219
|
end
|
161
220
|
end
|
162
221
|
|
222
|
+
|
223
|
+
def convert_filter(options, key, field_names)
|
224
|
+
return unless hsh = options[key]
|
225
|
+
if !hsh.is_a?(Hash)
|
226
|
+
raise ArgumentError, ":#{key} option must be a Hash of field name(s)/index(es) to RegExp(s)"
|
227
|
+
end
|
228
|
+
keys = hsh.keys
|
229
|
+
idxs = find_field_indexes(keys, @field_names)
|
230
|
+
Hash[keys.each_with_index.map{ |k, i| [idxs[i], hsh[k]] }]
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
def check_filter(filter, field_val)
|
235
|
+
case filter
|
236
|
+
when String
|
237
|
+
if @case_sensitive
|
238
|
+
filter == field_val
|
239
|
+
else
|
240
|
+
filter.downcase == field_val.to_s.downcase
|
241
|
+
end
|
242
|
+
when Regexp
|
243
|
+
filter.match(field_val)
|
244
|
+
when Proc
|
245
|
+
filter.call(field_val)
|
246
|
+
else
|
247
|
+
raise ArgumentError, "Unsupported filter expression: #{filter.inspect}"
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
163
251
|
end
|
164
252
|
|
165
253
|
end
|
data/lib/csv_diff.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,35 +1,42 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Adam Gardiner
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2017-05-17 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
|
-
description:
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
13
|
+
description: |2
|
14
|
+
This library performs diffs of CSV files (or table-like sources).
|
15
|
+
|
16
|
+
Unlike a standard diff that compares line by line, and is sensitive to the
|
17
|
+
ordering of records, CSV-Diff identifies common lines by key field(s), and
|
18
|
+
then compares the contents of the fields in each line.
|
19
|
+
|
20
|
+
Data may be supplied in the form of CSV files, or as an array of arrays. The
|
21
|
+
diff process provides a fine level of control over what to diff, and can
|
22
|
+
optionally ignore certain types of changes (e.g. changes in position).
|
23
|
+
|
24
|
+
CSV-Diff is particularly well suited to data in parent-child format. Parent-
|
25
|
+
child data does not lend itself well to standard text diffs, as small changes
|
26
|
+
in the organisation of the tree at an upper level can lead to big movements
|
27
|
+
in the position of descendant records. By instead matching records by key,
|
28
|
+
CSV-Diff avoids this issue, while still being able to detect changes in
|
29
|
+
sibling order.
|
30
|
+
|
31
|
+
This gem implements the core diff algorithm, and handles the loading and
|
32
|
+
diffing of CSV files (or Arrays of Arrays). It returns a CSVDiff object
|
33
|
+
containing the details of differences in object form. This is useful for
|
34
|
+
projects that need diff capability, but want to handle the reporting or
|
35
|
+
actioning of differences themselves.
|
36
|
+
|
37
|
+
For a pre-built diff reporting capability, see the csv-diff-report gem,
|
38
|
+
which provides a command-line tool for generating diff reports in HTML,
|
39
|
+
Excel, or text formats.
|
33
40
|
email: adam.b.gardiner@gmail.com
|
34
41
|
executables: []
|
35
42
|
extensions: []
|
@@ -43,27 +50,27 @@ files:
|
|
43
50
|
- lib/csv-diff.rb
|
44
51
|
- lib/csv_diff.rb
|
45
52
|
homepage: https://github.com/agardiner/csv-diff
|
46
|
-
licenses:
|
53
|
+
licenses:
|
54
|
+
- MIT
|
55
|
+
metadata: {}
|
47
56
|
post_install_message: For command-line tools and diff reports, 'gem install csv-diff-report'
|
48
57
|
rdoc_options: []
|
49
58
|
require_paths:
|
50
59
|
- lib
|
51
60
|
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
-
none: false
|
53
61
|
requirements:
|
54
|
-
- -
|
62
|
+
- - '>='
|
55
63
|
- !ruby/object:Gem::Version
|
56
64
|
version: '0'
|
57
65
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
-
none: false
|
59
66
|
requirements:
|
60
|
-
- -
|
67
|
+
- - '>='
|
61
68
|
- !ruby/object:Gem::Version
|
62
69
|
version: '0'
|
63
70
|
requirements: []
|
64
71
|
rubyforge_project:
|
65
|
-
rubygems_version:
|
72
|
+
rubygems_version: 2.0.14.1
|
66
73
|
signing_key:
|
67
|
-
specification_version:
|
74
|
+
specification_version: 4
|
68
75
|
summary: CSV Diff is a library for generating diffs from data in CSV format
|
69
76
|
test_files: []
|