csv-diff 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZGY3NTBiZGFiNTQxMmZiZmMwOTE2MDIzYmEwNWQ3NTY0MzZmNjRkMg==
5
+ data.tar.gz: !binary |-
6
+ NzAyNTY1OTk3MTA3Y2ZhNjk2YWRmNTJkYTljNGZhZDY2YjQ1OTg2ZQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZDRmODg2NDlhYTY4NDM1MWYxNjgyZGQ0MGRiZDNkYmI3MDkwYjFkZWVhOWYw
10
+ YmVkNDVhMjk1M2EyNjFkZGIxOGE0Y2MwOWQwMWRhNzhjZDk2N2RhZmEyZGRm
11
+ MjliNmM4Y2ZmNzY4ZTJkY2EzZWY4Mjg3NmU3ZjQxM2RkYTBjODE=
12
+ data.tar.gz: !binary |-
13
+ YTljOGZhNDY0YzdjZGYzMTA2NTM3MzIwNDg4MTcwYWEyM2IyZTc1YWYxMjFm
14
+ YjMwYWU1NWMzNGVkZGRkYWYyZjUwMTQ2MWZlMjdkNjQwMjIwYWUwNmNlYjM3
15
+ NDlmZDk5MGNlMTk4ZDhlMzFiOGUyZTIwY2EyZTY3MjUwYjc2NWY=
@@ -31,7 +31,6 @@ class CSVDiff
31
31
  right_keys.each_with_index do |key, right_row_id|
32
32
  key_vals = key.split('~')
33
33
  parent = key_vals[0...parent_fields].join('~')
34
- child = key_vals[parent_fields..-1].join('~')
35
34
  left_parent = left_index[parent]
36
35
  right_parent = right_index[parent]
37
36
  left_value = left_values[key]
@@ -58,7 +57,7 @@ class CSVDiff
58
57
  #puts "Move #{left_idx} -> #{right_idx}: #{key}"
59
58
  end
60
59
  end
61
- if include_updates && (changes = diff_row(left_values[key], right_values[key], diff_fields))
60
+ if include_updates && (changes = diff_row(left_value, right_value, diff_fields))
62
61
  diffs[key].merge!(id.merge(changes.merge(:action => 'Update')))
63
62
  #puts "Change: #{key}"
64
63
  end
@@ -75,7 +74,6 @@ class CSVDiff
75
74
  # Delete
76
75
  key_vals = key.split('~')
77
76
  parent = key_vals[0...parent_fields].join('~')
78
- child = key_vals[parent_fields..-1].join('~')
79
77
  left_parent = left_index[parent]
80
78
  left_value = left_values[key]
81
79
  left_idx = left_parent.index(key)
@@ -45,11 +45,15 @@ class CSVDiff
45
45
  # child id, and the remaining fields (if any) are the parent field(s) that
46
46
  # uniquely qualify the child instance.
47
47
  #
48
- # @param left [Array<Array<String>>] An Array of lines, each of which is in
49
- # turn an Array containing fields.
50
- # @param right [Array<Array<String>>] An Array of lines, each of which is in
51
- # turn an Array containing fields.
48
+ # @param left [Array|String|CSVSource] An Array of lines, each of which is in
49
+ # an Array of fields, or a String specifying a path to a CSV file, or a
50
+ # CSVSource object.
51
+ # @param right [Array|String|CSVSource] An Array of lines, each of which is
52
+ # an Array of fields, or a String specifying a path to a CSV file, or a
53
+ # CSVSource object.
52
54
  # @param options [Hash] A hash containing options.
55
+ # @option options [String] :encoding The encoding to use when opening the
56
+ # CSV files.
53
57
  # @option options [Array<String>] :field_names An Array of field names for
54
58
  # each field in +left+ and +right+. If not provided, the first row is
55
59
  # assumed to contain field names.
@@ -75,12 +79,12 @@ class CSVDiff
75
79
  # @option options [Boolean] :ignore_deletes If true, records that appear
76
80
  # in the left/from file but not in the right/to file are not reported.
77
81
  def initialize(left, right, options = {})
78
- @left = CSVSource.new(left, options)
82
+ @left = left.is_a?(CSVSource) ? left : CSVSource.new(left, options)
79
83
  raise "No field names found in left (from) source" unless @left.field_names && @left.field_names.size > 0
80
- @right = CSVSource.new(right, options)
84
+ @right = right.is_a?(CSVSource) ? right : CSVSource.new(right, options)
81
85
  raise "No field names found in right (to) source" unless @right.field_names && @right.field_names.size > 0
82
86
  @warnings = []
83
- @diff_fields = get_diff_fields(@left.field_names, @right.field_names, options.fetch(:ignore_fields, []))
87
+ @diff_fields = get_diff_fields(@left.field_names, @right.field_names, options[:ignore_fields])
84
88
  @key_fields = @left.key_fields.map{ |kf| @diff_fields[kf] }
85
89
  diff(options)
86
90
  end
@@ -98,7 +102,7 @@ class CSVDiff
98
102
  unless @summary
99
103
  @summary = Hash.new{ |h, k| h[k] = 0 }
100
104
  @diffs.each{ |k, v| @summary[v[:action]] += 1 }
101
- @summary['Warnings'] = warnings.size if warnings.size > 0
105
+ @summary['Warning'] = warnings.size if warnings.size > 0
102
106
  end
103
107
  @summary
104
108
  end
@@ -112,13 +116,19 @@ class CSVDiff
112
116
  end
113
117
 
114
118
 
115
- # @return [Array<String>] an array of warning messages generated during the
116
- # diff process.
119
+ # @return [Array<String>] an array of warning messages generated from the
120
+ # sources and the diff process.
117
121
  def warnings
118
122
  @left.warnings + @right.warnings + @warnings
119
123
  end
120
124
 
121
125
 
126
+ # @return [Array<String>] an array of warning messages from the diff process.
127
+ def diff_warnings
128
+ @warnings
129
+ end
130
+
131
+
122
132
  private
123
133
 
124
134
 
@@ -126,9 +136,10 @@ class CSVDiff
126
136
  # in both, on which members can be diffed.
127
137
  def get_diff_fields(left_fields, right_fields, ignore_fields)
128
138
  diff_fields = []
129
- right_fields.each do |fld|
139
+ right_fields.each_with_index do |fld, i|
130
140
  if left_fields.include?(fld)
131
- diff_fields << fld unless ignore_fields.include?(fld)
141
+ diff_fields << fld unless ignore_fields && (ignore_fields.include?(fld) ||
142
+ ignore_fields.include?(i))
132
143
  else
133
144
  @warnings << "Field '#{fld}' is missing from the left (from) file, and won't be diffed"
134
145
  end
@@ -47,8 +47,8 @@ class CSVDiff
47
47
  # Array of Arrays containing CSV data. If the :field_names option is
48
48
  # not specified, the first line must contain the names of the fields.
49
49
  # @param options [Hash] An options hash.
50
- # @option options [String] :mode_string The mode to use when opening the
51
- # CSV file. Defaults to 'r'.
50
+ # @option options [String] :encoding The encoding to use when opening the
51
+ # CSV file.
52
52
  # @option options [Hash] :csv_options Any options you wish to pass to
53
53
  # CSV.open, e.g. :col_sep.
54
54
  # @option options [Array<String>] :field_names The names of each of the
@@ -66,7 +66,7 @@ class CSVDiff
66
66
  def initialize(source, options = {})
67
67
  if source.is_a?(String)
68
68
  require 'csv'
69
- mode_string = options.fetch(:mode_string, 'r')
69
+ mode_string = options[:encoding] ? "r:#{options[:encoding]}" : 'r'
70
70
  csv_options = options.fetch(:csv_options, {})
71
71
  @path = source
72
72
  source = CSV.open(@path, mode_string, csv_options).readlines
@@ -76,8 +76,8 @@ class CSVDiff
76
76
  @parent_fields = @key_fields[0...-1]
77
77
  @child_fields = @key_fields[-1..-1]
78
78
  else
79
- @parent_fields = [options.fetch(:parent_field, options.fetch(:parent_fields, []))].flatten
80
- @child_fields = [options.fetch(:child_field, options.fetch(:child_fields, [0]))].flatten
79
+ @parent_fields = [options.fetch(:parent_field, options[:parent_fields]) || []].flatten
80
+ @child_fields = [options.fetch(:child_field, options[:child_fields]) || [0]].flatten
81
81
  @key_fields = @parent_fields + @child_fields
82
82
  end
83
83
  @field_names = options[:field_names]
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-diff
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
5
- prerelease:
4
+ version: '0.2'
6
5
  platform: ruby
7
6
  authors:
8
7
  - Adam Gardiner
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-05-30 00:00:00.000000000 Z
11
+ date: 2014-08-11 00:00:00.000000000 Z
13
12
  dependencies: []
14
13
  description: ! " This library performs diffs of CSV files.\n\n Unlike
15
14
  a standard diff that compares line by line, and is sensitive to the\n ordering
@@ -28,35 +27,34 @@ executables: []
28
27
  extensions: []
29
28
  extra_rdoc_files: []
30
29
  files:
31
- - README.md
32
30
  - LICENSE
31
+ - README.md
32
+ - lib/csv-diff.rb
33
33
  - lib/csv-diff/algorithm.rb
34
34
  - lib/csv-diff/csv_diff.rb
35
35
  - lib/csv-diff/csv_source.rb
36
- - lib/csv-diff.rb
37
36
  - lib/csv_diff.rb
38
37
  homepage: https://github.com/agardiner/csv-diff
39
38
  licenses: []
40
- post_install_message:
39
+ metadata: {}
40
+ post_install_message: For command-line tools and diff reports, 'gem install csv-diff-report'
41
41
  rdoc_options: []
42
42
  require_paths:
43
43
  - lib
44
44
  required_ruby_version: !ruby/object:Gem::Requirement
45
- none: false
46
45
  requirements:
47
46
  - - ! '>='
48
47
  - !ruby/object:Gem::Version
49
48
  version: '0'
50
49
  required_rubygems_version: !ruby/object:Gem::Requirement
51
- none: false
52
50
  requirements:
53
51
  - - ! '>='
54
52
  - !ruby/object:Gem::Version
55
53
  version: '0'
56
54
  requirements: []
57
55
  rubyforge_project:
58
- rubygems_version: 1.8.23
56
+ rubygems_version: 2.4.1
59
57
  signing_key:
60
- specification_version: 3
58
+ specification_version: 4
61
59
  summary: CSV Diff is a library for generating diffs from data in CSV format
62
60
  test_files: []