csv-diff 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZGY3NTBiZGFiNTQxMmZiZmMwOTE2MDIzYmEwNWQ3NTY0MzZmNjRkMg==
5
+ data.tar.gz: !binary |-
6
+ NzAyNTY1OTk3MTA3Y2ZhNjk2YWRmNTJkYTljNGZhZDY2YjQ1OTg2ZQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZDRmODg2NDlhYTY4NDM1MWYxNjgyZGQ0MGRiZDNkYmI3MDkwYjFkZWVhOWYw
10
+ YmVkNDVhMjk1M2EyNjFkZGIxOGE0Y2MwOWQwMWRhNzhjZDk2N2RhZmEyZGRm
11
+ MjliNmM4Y2ZmNzY4ZTJkY2EzZWY4Mjg3NmU3ZjQxM2RkYTBjODE=
12
+ data.tar.gz: !binary |-
13
+ YTljOGZhNDY0YzdjZGYzMTA2NTM3MzIwNDg4MTcwYWEyM2IyZTc1YWYxMjFm
14
+ YjMwYWU1NWMzNGVkZGRkYWYyZjUwMTQ2MWZlMjdkNjQwMjIwYWUwNmNlYjM3
15
+ NDlmZDk5MGNlMTk4ZDhlMzFiOGUyZTIwY2EyZTY3MjUwYjc2NWY=
@@ -31,7 +31,6 @@ class CSVDiff
31
31
  right_keys.each_with_index do |key, right_row_id|
32
32
  key_vals = key.split('~')
33
33
  parent = key_vals[0...parent_fields].join('~')
34
- child = key_vals[parent_fields..-1].join('~')
35
34
  left_parent = left_index[parent]
36
35
  right_parent = right_index[parent]
37
36
  left_value = left_values[key]
@@ -58,7 +57,7 @@ class CSVDiff
58
57
  #puts "Move #{left_idx} -> #{right_idx}: #{key}"
59
58
  end
60
59
  end
61
- if include_updates && (changes = diff_row(left_values[key], right_values[key], diff_fields))
60
+ if include_updates && (changes = diff_row(left_value, right_value, diff_fields))
62
61
  diffs[key].merge!(id.merge(changes.merge(:action => 'Update')))
63
62
  #puts "Change: #{key}"
64
63
  end
@@ -75,7 +74,6 @@ class CSVDiff
75
74
  # Delete
76
75
  key_vals = key.split('~')
77
76
  parent = key_vals[0...parent_fields].join('~')
78
- child = key_vals[parent_fields..-1].join('~')
79
77
  left_parent = left_index[parent]
80
78
  left_value = left_values[key]
81
79
  left_idx = left_parent.index(key)
@@ -45,11 +45,15 @@ class CSVDiff
45
45
  # child id, and the remaining fields (if any) are the parent field(s) that
46
46
  # uniquely qualify the child instance.
47
47
  #
48
- # @param left [Array<Array<String>>] An Array of lines, each of which is in
49
- # turn an Array containing fields.
50
- # @param right [Array<Array<String>>] An Array of lines, each of which is in
51
- # turn an Array containing fields.
48
+ # @param left [Array|String|CSVSource] An Array of lines, each of which is in
49
+ # an Array of fields, or a String specifying a path to a CSV file, or a
50
+ # CSVSource object.
51
+ # @param right [Array|String|CSVSource] An Array of lines, each of which is
52
+ # an Array of fields, or a String specifying a path to a CSV file, or a
53
+ # CSVSource object.
52
54
  # @param options [Hash] A hash containing options.
55
+ # @option options [String] :encoding The encoding to use when opening the
56
+ # CSV files.
53
57
  # @option options [Array<String>] :field_names An Array of field names for
54
58
  # each field in +left+ and +right+. If not provided, the first row is
55
59
  # assumed to contain field names.
@@ -75,12 +79,12 @@ class CSVDiff
75
79
  # @option options [Boolean] :ignore_deletes If true, records that appear
76
80
  # in the left/from file but not in the right/to file are not reported.
77
81
  def initialize(left, right, options = {})
78
- @left = CSVSource.new(left, options)
82
+ @left = left.is_a?(CSVSource) ? left : CSVSource.new(left, options)
79
83
  raise "No field names found in left (from) source" unless @left.field_names && @left.field_names.size > 0
80
- @right = CSVSource.new(right, options)
84
+ @right = right.is_a?(CSVSource) ? right : CSVSource.new(right, options)
81
85
  raise "No field names found in right (to) source" unless @right.field_names && @right.field_names.size > 0
82
86
  @warnings = []
83
- @diff_fields = get_diff_fields(@left.field_names, @right.field_names, options.fetch(:ignore_fields, []))
87
+ @diff_fields = get_diff_fields(@left.field_names, @right.field_names, options[:ignore_fields])
84
88
  @key_fields = @left.key_fields.map{ |kf| @diff_fields[kf] }
85
89
  diff(options)
86
90
  end
@@ -98,7 +102,7 @@ class CSVDiff
98
102
  unless @summary
99
103
  @summary = Hash.new{ |h, k| h[k] = 0 }
100
104
  @diffs.each{ |k, v| @summary[v[:action]] += 1 }
101
- @summary['Warnings'] = warnings.size if warnings.size > 0
105
+ @summary['Warning'] = warnings.size if warnings.size > 0
102
106
  end
103
107
  @summary
104
108
  end
@@ -112,13 +116,19 @@ class CSVDiff
112
116
  end
113
117
 
114
118
 
115
- # @return [Array<String>] an array of warning messages generated during the
116
- # diff process.
119
+ # @return [Array<String>] an array of warning messages generated from the
120
+ # sources and the diff process.
117
121
  def warnings
118
122
  @left.warnings + @right.warnings + @warnings
119
123
  end
120
124
 
121
125
 
126
+ # @return [Array<String>] an array of warning messages from the diff process.
127
+ def diff_warnings
128
+ @warnings
129
+ end
130
+
131
+
122
132
  private
123
133
 
124
134
 
@@ -126,9 +136,10 @@ class CSVDiff
126
136
  # in both, on which members can be diffed.
127
137
  def get_diff_fields(left_fields, right_fields, ignore_fields)
128
138
  diff_fields = []
129
- right_fields.each do |fld|
139
+ right_fields.each_with_index do |fld, i|
130
140
  if left_fields.include?(fld)
131
- diff_fields << fld unless ignore_fields.include?(fld)
141
+ diff_fields << fld unless ignore_fields && (ignore_fields.include?(fld) ||
142
+ ignore_fields.include?(i))
132
143
  else
133
144
  @warnings << "Field '#{fld}' is missing from the left (from) file, and won't be diffed"
134
145
  end
@@ -47,8 +47,8 @@ class CSVDiff
47
47
  # Array of Arrays containing CSV data. If the :field_names option is
48
48
  # not specified, the first line must contain the names of the fields.
49
49
  # @param options [Hash] An options hash.
50
- # @option options [String] :mode_string The mode to use when opening the
51
- # CSV file. Defaults to 'r'.
50
+ # @option options [String] :encoding The encoding to use when opening the
51
+ # CSV file.
52
52
  # @option options [Hash] :csv_options Any options you wish to pass to
53
53
  # CSV.open, e.g. :col_sep.
54
54
  # @option options [Array<String>] :field_names The names of each of the
@@ -66,7 +66,7 @@ class CSVDiff
66
66
  def initialize(source, options = {})
67
67
  if source.is_a?(String)
68
68
  require 'csv'
69
- mode_string = options.fetch(:mode_string, 'r')
69
+ mode_string = options[:encoding] ? "r:#{options[:encoding]}" : 'r'
70
70
  csv_options = options.fetch(:csv_options, {})
71
71
  @path = source
72
72
  source = CSV.open(@path, mode_string, csv_options).readlines
@@ -76,8 +76,8 @@ class CSVDiff
76
76
  @parent_fields = @key_fields[0...-1]
77
77
  @child_fields = @key_fields[-1..-1]
78
78
  else
79
- @parent_fields = [options.fetch(:parent_field, options.fetch(:parent_fields, []))].flatten
80
- @child_fields = [options.fetch(:child_field, options.fetch(:child_fields, [0]))].flatten
79
+ @parent_fields = [options.fetch(:parent_field, options[:parent_fields]) || []].flatten
80
+ @child_fields = [options.fetch(:child_field, options[:child_fields]) || [0]].flatten
81
81
  @key_fields = @parent_fields + @child_fields
82
82
  end
83
83
  @field_names = options[:field_names]
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-diff
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
5
- prerelease:
4
+ version: '0.2'
6
5
  platform: ruby
7
6
  authors:
8
7
  - Adam Gardiner
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-05-30 00:00:00.000000000 Z
11
+ date: 2014-08-11 00:00:00.000000000 Z
13
12
  dependencies: []
14
13
  description: ! " This library performs diffs of CSV files.\n\n Unlike
15
14
  a standard diff that compares line by line, and is sensitive to the\n ordering
@@ -28,35 +27,34 @@ executables: []
28
27
  extensions: []
29
28
  extra_rdoc_files: []
30
29
  files:
31
- - README.md
32
30
  - LICENSE
31
+ - README.md
32
+ - lib/csv-diff.rb
33
33
  - lib/csv-diff/algorithm.rb
34
34
  - lib/csv-diff/csv_diff.rb
35
35
  - lib/csv-diff/csv_source.rb
36
- - lib/csv-diff.rb
37
36
  - lib/csv_diff.rb
38
37
  homepage: https://github.com/agardiner/csv-diff
39
38
  licenses: []
40
- post_install_message:
39
+ metadata: {}
40
+ post_install_message: For command-line tools and diff reports, 'gem install csv-diff-report'
41
41
  rdoc_options: []
42
42
  require_paths:
43
43
  - lib
44
44
  required_ruby_version: !ruby/object:Gem::Requirement
45
- none: false
46
45
  requirements:
47
46
  - - ! '>='
48
47
  - !ruby/object:Gem::Version
49
48
  version: '0'
50
49
  required_rubygems_version: !ruby/object:Gem::Requirement
51
- none: false
52
50
  requirements:
53
51
  - - ! '>='
54
52
  - !ruby/object:Gem::Version
55
53
  version: '0'
56
54
  requirements: []
57
55
  rubyforge_project:
58
- rubygems_version: 1.8.23
56
+ rubygems_version: 2.4.1
59
57
  signing_key:
60
- specification_version: 3
58
+ specification_version: 4
61
59
  summary: CSV Diff is a library for generating diffs from data in CSV format
62
60
  test_files: []