csv-diff 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/lib/csv-diff/algorithm.rb +1 -3
- data/lib/csv-diff/csv_diff.rb +23 -12
- data/lib/csv-diff/csv_source.rb +5 -5
- metadata +8 -10
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ZGY3NTBiZGFiNTQxMmZiZmMwOTE2MDIzYmEwNWQ3NTY0MzZmNjRkMg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NzAyNTY1OTk3MTA3Y2ZhNjk2YWRmNTJkYTljNGZhZDY2YjQ1OTg2ZQ==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZDRmODg2NDlhYTY4NDM1MWYxNjgyZGQ0MGRiZDNkYmI3MDkwYjFkZWVhOWYw
|
10
|
+
YmVkNDVhMjk1M2EyNjFkZGIxOGE0Y2MwOWQwMWRhNzhjZDk2N2RhZmEyZGRm
|
11
|
+
MjliNmM4Y2ZmNzY4ZTJkY2EzZWY4Mjg3NmU3ZjQxM2RkYTBjODE=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YTljOGZhNDY0YzdjZGYzMTA2NTM3MzIwNDg4MTcwYWEyM2IyZTc1YWYxMjFm
|
14
|
+
YjMwYWU1NWMzNGVkZGRkYWYyZjUwMTQ2MWZlMjdkNjQwMjIwYWUwNmNlYjM3
|
15
|
+
NDlmZDk5MGNlMTk4ZDhlMzFiOGUyZTIwY2EyZTY3MjUwYjc2NWY=
|
data/lib/csv-diff/algorithm.rb
CHANGED
@@ -31,7 +31,6 @@ class CSVDiff
|
|
31
31
|
right_keys.each_with_index do |key, right_row_id|
|
32
32
|
key_vals = key.split('~')
|
33
33
|
parent = key_vals[0...parent_fields].join('~')
|
34
|
-
child = key_vals[parent_fields..-1].join('~')
|
35
34
|
left_parent = left_index[parent]
|
36
35
|
right_parent = right_index[parent]
|
37
36
|
left_value = left_values[key]
|
@@ -58,7 +57,7 @@ class CSVDiff
|
|
58
57
|
#puts "Move #{left_idx} -> #{right_idx}: #{key}"
|
59
58
|
end
|
60
59
|
end
|
61
|
-
if include_updates && (changes = diff_row(
|
60
|
+
if include_updates && (changes = diff_row(left_value, right_value, diff_fields))
|
62
61
|
diffs[key].merge!(id.merge(changes.merge(:action => 'Update')))
|
63
62
|
#puts "Change: #{key}"
|
64
63
|
end
|
@@ -75,7 +74,6 @@ class CSVDiff
|
|
75
74
|
# Delete
|
76
75
|
key_vals = key.split('~')
|
77
76
|
parent = key_vals[0...parent_fields].join('~')
|
78
|
-
child = key_vals[parent_fields..-1].join('~')
|
79
77
|
left_parent = left_index[parent]
|
80
78
|
left_value = left_values[key]
|
81
79
|
left_idx = left_parent.index(key)
|
data/lib/csv-diff/csv_diff.rb
CHANGED
@@ -45,11 +45,15 @@ class CSVDiff
|
|
45
45
|
# child id, and the remaining fields (if any) are the parent field(s) that
|
46
46
|
# uniquely qualify the child instance.
|
47
47
|
#
|
48
|
-
# @param left [Array
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
48
|
+
# @param left [Array|String|CSVSource] An Array of lines, each of which is in
|
49
|
+
# an Array of fields, or a String specifying a path to a CSV file, or a
|
50
|
+
# CSVSource object.
|
51
|
+
# @param right [Array|String|CSVSource] An Array of lines, each of which is
|
52
|
+
# an Array of fields, or a String specifying a path to a CSV file, or a
|
53
|
+
# CSVSource object.
|
52
54
|
# @param options [Hash] A hash containing options.
|
55
|
+
# @option options [String] :encoding The encoding to use when opening the
|
56
|
+
# CSV files.
|
53
57
|
# @option options [Array<String>] :field_names An Array of field names for
|
54
58
|
# each field in +left+ and +right+. If not provided, the first row is
|
55
59
|
# assumed to contain field names.
|
@@ -75,12 +79,12 @@ class CSVDiff
|
|
75
79
|
# @option options [Boolean] :ignore_deletes If true, records that appear
|
76
80
|
# in the left/from file but not in the right/to file are not reported.
|
77
81
|
def initialize(left, right, options = {})
|
78
|
-
@left = CSVSource.new(left, options)
|
82
|
+
@left = left.is_a?(CSVSource) ? left : CSVSource.new(left, options)
|
79
83
|
raise "No field names found in left (from) source" unless @left.field_names && @left.field_names.size > 0
|
80
|
-
@right = CSVSource.new(right, options)
|
84
|
+
@right = right.is_a?(CSVSource) ? right : CSVSource.new(right, options)
|
81
85
|
raise "No field names found in right (to) source" unless @right.field_names && @right.field_names.size > 0
|
82
86
|
@warnings = []
|
83
|
-
@diff_fields = get_diff_fields(@left.field_names, @right.field_names, options
|
87
|
+
@diff_fields = get_diff_fields(@left.field_names, @right.field_names, options[:ignore_fields])
|
84
88
|
@key_fields = @left.key_fields.map{ |kf| @diff_fields[kf] }
|
85
89
|
diff(options)
|
86
90
|
end
|
@@ -98,7 +102,7 @@ class CSVDiff
|
|
98
102
|
unless @summary
|
99
103
|
@summary = Hash.new{ |h, k| h[k] = 0 }
|
100
104
|
@diffs.each{ |k, v| @summary[v[:action]] += 1 }
|
101
|
-
@summary['
|
105
|
+
@summary['Warning'] = warnings.size if warnings.size > 0
|
102
106
|
end
|
103
107
|
@summary
|
104
108
|
end
|
@@ -112,13 +116,19 @@ class CSVDiff
|
|
112
116
|
end
|
113
117
|
|
114
118
|
|
115
|
-
# @return [Array<String>] an array of warning messages generated
|
116
|
-
# diff process.
|
119
|
+
# @return [Array<String>] an array of warning messages generated from the
|
120
|
+
# sources and the diff process.
|
117
121
|
def warnings
|
118
122
|
@left.warnings + @right.warnings + @warnings
|
119
123
|
end
|
120
124
|
|
121
125
|
|
126
|
+
# @return [Array<String>] an array of warning messages from the diff process.
|
127
|
+
def diff_warnings
|
128
|
+
@warnings
|
129
|
+
end
|
130
|
+
|
131
|
+
|
122
132
|
private
|
123
133
|
|
124
134
|
|
@@ -126,9 +136,10 @@ class CSVDiff
|
|
126
136
|
# in both, on which members can be diffed.
|
127
137
|
def get_diff_fields(left_fields, right_fields, ignore_fields)
|
128
138
|
diff_fields = []
|
129
|
-
right_fields.
|
139
|
+
right_fields.each_with_index do |fld, i|
|
130
140
|
if left_fields.include?(fld)
|
131
|
-
diff_fields << fld unless ignore_fields.include?(fld)
|
141
|
+
diff_fields << fld unless ignore_fields && (ignore_fields.include?(fld) ||
|
142
|
+
ignore_fields.include?(i))
|
132
143
|
else
|
133
144
|
@warnings << "Field '#{fld}' is missing from the left (from) file, and won't be diffed"
|
134
145
|
end
|
data/lib/csv-diff/csv_source.rb
CHANGED
@@ -47,8 +47,8 @@ class CSVDiff
|
|
47
47
|
# Array of Arrays containing CSV data. If the :field_names option is
|
48
48
|
# not specified, the first line must contain the names of the fields.
|
49
49
|
# @param options [Hash] An options hash.
|
50
|
-
# @option options [String] :
|
51
|
-
# CSV file.
|
50
|
+
# @option options [String] :encoding The encoding to use when opening the
|
51
|
+
# CSV file.
|
52
52
|
# @option options [Hash] :csv_options Any options you wish to pass to
|
53
53
|
# CSV.open, e.g. :col_sep.
|
54
54
|
# @option options [Array<String>] :field_names The names of each of the
|
@@ -66,7 +66,7 @@ class CSVDiff
|
|
66
66
|
def initialize(source, options = {})
|
67
67
|
if source.is_a?(String)
|
68
68
|
require 'csv'
|
69
|
-
mode_string = options
|
69
|
+
mode_string = options[:encoding] ? "r:#{options[:encoding]}" : 'r'
|
70
70
|
csv_options = options.fetch(:csv_options, {})
|
71
71
|
@path = source
|
72
72
|
source = CSV.open(@path, mode_string, csv_options).readlines
|
@@ -76,8 +76,8 @@ class CSVDiff
|
|
76
76
|
@parent_fields = @key_fields[0...-1]
|
77
77
|
@child_fields = @key_fields[-1..-1]
|
78
78
|
else
|
79
|
-
@parent_fields = [options.fetch(:parent_field, options
|
80
|
-
@child_fields = [options.fetch(:child_field, options
|
79
|
+
@parent_fields = [options.fetch(:parent_field, options[:parent_fields]) || []].flatten
|
80
|
+
@child_fields = [options.fetch(:child_field, options[:child_fields]) || [0]].flatten
|
81
81
|
@key_fields = @parent_fields + @child_fields
|
82
82
|
end
|
83
83
|
@field_names = options[:field_names]
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
5
|
-
prerelease:
|
4
|
+
version: '0.2'
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Adam Gardiner
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-
|
11
|
+
date: 2014-08-11 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
13
|
description: ! " This library performs diffs of CSV files.\n\n Unlike
|
15
14
|
a standard diff that compares line by line, and is sensitive to the\n ordering
|
@@ -28,35 +27,34 @@ executables: []
|
|
28
27
|
extensions: []
|
29
28
|
extra_rdoc_files: []
|
30
29
|
files:
|
31
|
-
- README.md
|
32
30
|
- LICENSE
|
31
|
+
- README.md
|
32
|
+
- lib/csv-diff.rb
|
33
33
|
- lib/csv-diff/algorithm.rb
|
34
34
|
- lib/csv-diff/csv_diff.rb
|
35
35
|
- lib/csv-diff/csv_source.rb
|
36
|
-
- lib/csv-diff.rb
|
37
36
|
- lib/csv_diff.rb
|
38
37
|
homepage: https://github.com/agardiner/csv-diff
|
39
38
|
licenses: []
|
40
|
-
|
39
|
+
metadata: {}
|
40
|
+
post_install_message: For command-line tools and diff reports, 'gem install csv-diff-report'
|
41
41
|
rdoc_options: []
|
42
42
|
require_paths:
|
43
43
|
- lib
|
44
44
|
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
-
none: false
|
46
45
|
requirements:
|
47
46
|
- - ! '>='
|
48
47
|
- !ruby/object:Gem::Version
|
49
48
|
version: '0'
|
50
49
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
-
none: false
|
52
50
|
requirements:
|
53
51
|
- - ! '>='
|
54
52
|
- !ruby/object:Gem::Version
|
55
53
|
version: '0'
|
56
54
|
requirements: []
|
57
55
|
rubyforge_project:
|
58
|
-
rubygems_version:
|
56
|
+
rubygems_version: 2.4.1
|
59
57
|
signing_key:
|
60
|
-
specification_version:
|
58
|
+
specification_version: 4
|
61
59
|
summary: CSV Diff is a library for generating diffs from data in CSV format
|
62
60
|
test_files: []
|