csv-diff 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/lib/csv-diff/algorithm.rb +1 -3
- data/lib/csv-diff/csv_diff.rb +23 -12
- data/lib/csv-diff/csv_source.rb +5 -5
- metadata +8 -10
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ZGY3NTBiZGFiNTQxMmZiZmMwOTE2MDIzYmEwNWQ3NTY0MzZmNjRkMg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NzAyNTY1OTk3MTA3Y2ZhNjk2YWRmNTJkYTljNGZhZDY2YjQ1OTg2ZQ==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZDRmODg2NDlhYTY4NDM1MWYxNjgyZGQ0MGRiZDNkYmI3MDkwYjFkZWVhOWYw
|
10
|
+
YmVkNDVhMjk1M2EyNjFkZGIxOGE0Y2MwOWQwMWRhNzhjZDk2N2RhZmEyZGRm
|
11
|
+
MjliNmM4Y2ZmNzY4ZTJkY2EzZWY4Mjg3NmU3ZjQxM2RkYTBjODE=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YTljOGZhNDY0YzdjZGYzMTA2NTM3MzIwNDg4MTcwYWEyM2IyZTc1YWYxMjFm
|
14
|
+
YjMwYWU1NWMzNGVkZGRkYWYyZjUwMTQ2MWZlMjdkNjQwMjIwYWUwNmNlYjM3
|
15
|
+
NDlmZDk5MGNlMTk4ZDhlMzFiOGUyZTIwY2EyZTY3MjUwYjc2NWY=
|
data/lib/csv-diff/algorithm.rb
CHANGED
@@ -31,7 +31,6 @@ class CSVDiff
|
|
31
31
|
right_keys.each_with_index do |key, right_row_id|
|
32
32
|
key_vals = key.split('~')
|
33
33
|
parent = key_vals[0...parent_fields].join('~')
|
34
|
-
child = key_vals[parent_fields..-1].join('~')
|
35
34
|
left_parent = left_index[parent]
|
36
35
|
right_parent = right_index[parent]
|
37
36
|
left_value = left_values[key]
|
@@ -58,7 +57,7 @@ class CSVDiff
|
|
58
57
|
#puts "Move #{left_idx} -> #{right_idx}: #{key}"
|
59
58
|
end
|
60
59
|
end
|
61
|
-
if include_updates && (changes = diff_row(
|
60
|
+
if include_updates && (changes = diff_row(left_value, right_value, diff_fields))
|
62
61
|
diffs[key].merge!(id.merge(changes.merge(:action => 'Update')))
|
63
62
|
#puts "Change: #{key}"
|
64
63
|
end
|
@@ -75,7 +74,6 @@ class CSVDiff
|
|
75
74
|
# Delete
|
76
75
|
key_vals = key.split('~')
|
77
76
|
parent = key_vals[0...parent_fields].join('~')
|
78
|
-
child = key_vals[parent_fields..-1].join('~')
|
79
77
|
left_parent = left_index[parent]
|
80
78
|
left_value = left_values[key]
|
81
79
|
left_idx = left_parent.index(key)
|
data/lib/csv-diff/csv_diff.rb
CHANGED
@@ -45,11 +45,15 @@ class CSVDiff
|
|
45
45
|
# child id, and the remaining fields (if any) are the parent field(s) that
|
46
46
|
# uniquely qualify the child instance.
|
47
47
|
#
|
48
|
-
# @param left [Array
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
48
|
+
# @param left [Array|String|CSVSource] An Array of lines, each of which is in
|
49
|
+
# an Array of fields, or a String specifying a path to a CSV file, or a
|
50
|
+
# CSVSource object.
|
51
|
+
# @param right [Array|String|CSVSource] An Array of lines, each of which is
|
52
|
+
# an Array of fields, or a String specifying a path to a CSV file, or a
|
53
|
+
# CSVSource object.
|
52
54
|
# @param options [Hash] A hash containing options.
|
55
|
+
# @option options [String] :encoding The encoding to use when opening the
|
56
|
+
# CSV files.
|
53
57
|
# @option options [Array<String>] :field_names An Array of field names for
|
54
58
|
# each field in +left+ and +right+. If not provided, the first row is
|
55
59
|
# assumed to contain field names.
|
@@ -75,12 +79,12 @@ class CSVDiff
|
|
75
79
|
# @option options [Boolean] :ignore_deletes If true, records that appear
|
76
80
|
# in the left/from file but not in the right/to file are not reported.
|
77
81
|
def initialize(left, right, options = {})
|
78
|
-
@left = CSVSource.new(left, options)
|
82
|
+
@left = left.is_a?(CSVSource) ? left : CSVSource.new(left, options)
|
79
83
|
raise "No field names found in left (from) source" unless @left.field_names && @left.field_names.size > 0
|
80
|
-
@right = CSVSource.new(right, options)
|
84
|
+
@right = right.is_a?(CSVSource) ? right : CSVSource.new(right, options)
|
81
85
|
raise "No field names found in right (to) source" unless @right.field_names && @right.field_names.size > 0
|
82
86
|
@warnings = []
|
83
|
-
@diff_fields = get_diff_fields(@left.field_names, @right.field_names, options
|
87
|
+
@diff_fields = get_diff_fields(@left.field_names, @right.field_names, options[:ignore_fields])
|
84
88
|
@key_fields = @left.key_fields.map{ |kf| @diff_fields[kf] }
|
85
89
|
diff(options)
|
86
90
|
end
|
@@ -98,7 +102,7 @@ class CSVDiff
|
|
98
102
|
unless @summary
|
99
103
|
@summary = Hash.new{ |h, k| h[k] = 0 }
|
100
104
|
@diffs.each{ |k, v| @summary[v[:action]] += 1 }
|
101
|
-
@summary['
|
105
|
+
@summary['Warning'] = warnings.size if warnings.size > 0
|
102
106
|
end
|
103
107
|
@summary
|
104
108
|
end
|
@@ -112,13 +116,19 @@ class CSVDiff
|
|
112
116
|
end
|
113
117
|
|
114
118
|
|
115
|
-
# @return [Array<String>] an array of warning messages generated
|
116
|
-
# diff process.
|
119
|
+
# @return [Array<String>] an array of warning messages generated from the
|
120
|
+
# sources and the diff process.
|
117
121
|
def warnings
|
118
122
|
@left.warnings + @right.warnings + @warnings
|
119
123
|
end
|
120
124
|
|
121
125
|
|
126
|
+
# @return [Array<String>] an array of warning messages from the diff process.
|
127
|
+
def diff_warnings
|
128
|
+
@warnings
|
129
|
+
end
|
130
|
+
|
131
|
+
|
122
132
|
private
|
123
133
|
|
124
134
|
|
@@ -126,9 +136,10 @@ class CSVDiff
|
|
126
136
|
# in both, on which members can be diffed.
|
127
137
|
def get_diff_fields(left_fields, right_fields, ignore_fields)
|
128
138
|
diff_fields = []
|
129
|
-
right_fields.
|
139
|
+
right_fields.each_with_index do |fld, i|
|
130
140
|
if left_fields.include?(fld)
|
131
|
-
diff_fields << fld unless ignore_fields.include?(fld)
|
141
|
+
diff_fields << fld unless ignore_fields && (ignore_fields.include?(fld) ||
|
142
|
+
ignore_fields.include?(i))
|
132
143
|
else
|
133
144
|
@warnings << "Field '#{fld}' is missing from the left (from) file, and won't be diffed"
|
134
145
|
end
|
data/lib/csv-diff/csv_source.rb
CHANGED
@@ -47,8 +47,8 @@ class CSVDiff
|
|
47
47
|
# Array of Arrays containing CSV data. If the :field_names option is
|
48
48
|
# not specified, the first line must contain the names of the fields.
|
49
49
|
# @param options [Hash] An options hash.
|
50
|
-
# @option options [String] :
|
51
|
-
# CSV file.
|
50
|
+
# @option options [String] :encoding The encoding to use when opening the
|
51
|
+
# CSV file.
|
52
52
|
# @option options [Hash] :csv_options Any options you wish to pass to
|
53
53
|
# CSV.open, e.g. :col_sep.
|
54
54
|
# @option options [Array<String>] :field_names The names of each of the
|
@@ -66,7 +66,7 @@ class CSVDiff
|
|
66
66
|
def initialize(source, options = {})
|
67
67
|
if source.is_a?(String)
|
68
68
|
require 'csv'
|
69
|
-
mode_string = options
|
69
|
+
mode_string = options[:encoding] ? "r:#{options[:encoding]}" : 'r'
|
70
70
|
csv_options = options.fetch(:csv_options, {})
|
71
71
|
@path = source
|
72
72
|
source = CSV.open(@path, mode_string, csv_options).readlines
|
@@ -76,8 +76,8 @@ class CSVDiff
|
|
76
76
|
@parent_fields = @key_fields[0...-1]
|
77
77
|
@child_fields = @key_fields[-1..-1]
|
78
78
|
else
|
79
|
-
@parent_fields = [options.fetch(:parent_field, options
|
80
|
-
@child_fields = [options.fetch(:child_field, options
|
79
|
+
@parent_fields = [options.fetch(:parent_field, options[:parent_fields]) || []].flatten
|
80
|
+
@child_fields = [options.fetch(:child_field, options[:child_fields]) || [0]].flatten
|
81
81
|
@key_fields = @parent_fields + @child_fields
|
82
82
|
end
|
83
83
|
@field_names = options[:field_names]
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
5
|
-
prerelease:
|
4
|
+
version: '0.2'
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Adam Gardiner
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-
|
11
|
+
date: 2014-08-11 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
13
|
description: ! " This library performs diffs of CSV files.\n\n Unlike
|
15
14
|
a standard diff that compares line by line, and is sensitive to the\n ordering
|
@@ -28,35 +27,34 @@ executables: []
|
|
28
27
|
extensions: []
|
29
28
|
extra_rdoc_files: []
|
30
29
|
files:
|
31
|
-
- README.md
|
32
30
|
- LICENSE
|
31
|
+
- README.md
|
32
|
+
- lib/csv-diff.rb
|
33
33
|
- lib/csv-diff/algorithm.rb
|
34
34
|
- lib/csv-diff/csv_diff.rb
|
35
35
|
- lib/csv-diff/csv_source.rb
|
36
|
-
- lib/csv-diff.rb
|
37
36
|
- lib/csv_diff.rb
|
38
37
|
homepage: https://github.com/agardiner/csv-diff
|
39
38
|
licenses: []
|
40
|
-
|
39
|
+
metadata: {}
|
40
|
+
post_install_message: For command-line tools and diff reports, 'gem install csv-diff-report'
|
41
41
|
rdoc_options: []
|
42
42
|
require_paths:
|
43
43
|
- lib
|
44
44
|
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
-
none: false
|
46
45
|
requirements:
|
47
46
|
- - ! '>='
|
48
47
|
- !ruby/object:Gem::Version
|
49
48
|
version: '0'
|
50
49
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
-
none: false
|
52
50
|
requirements:
|
53
51
|
- - ! '>='
|
54
52
|
- !ruby/object:Gem::Version
|
55
53
|
version: '0'
|
56
54
|
requirements: []
|
57
55
|
rubyforge_project:
|
58
|
-
rubygems_version:
|
56
|
+
rubygems_version: 2.4.1
|
59
57
|
signing_key:
|
60
|
-
specification_version:
|
58
|
+
specification_version: 4
|
61
59
|
summary: CSV Diff is a library for generating diffs from data in CSV format
|
62
60
|
test_files: []
|