csv-diff 0.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZGY3NTBiZGFiNTQxMmZiZmMwOTE2MDIzYmEwNWQ3NTY0MzZmNjRkMg==
4
+ MzM3MmMzNzU5ZDE5ZTA5MGI4OGQxNTBkMTE0NWM1MzYzNDVkYTFjYQ==
5
5
  data.tar.gz: !binary |-
6
- NzAyNTY1OTk3MTA3Y2ZhNjk2YWRmNTJkYTljNGZhZDY2YjQ1OTg2ZQ==
6
+ MDQ3Yzk3ZDc4ZTZiNTMwMzc1NGMxMTU5ZTBkMzdjMTMyNzE5OTYwMg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ZDRmODg2NDlhYTY4NDM1MWYxNjgyZGQ0MGRiZDNkYmI3MDkwYjFkZWVhOWYw
10
- YmVkNDVhMjk1M2EyNjFkZGIxOGE0Y2MwOWQwMWRhNzhjZDk2N2RhZmEyZGRm
11
- MjliNmM4Y2ZmNzY4ZTJkY2EzZWY4Mjg3NmU3ZjQxM2RkYTBjODE=
9
+ YTYxMDM1MjUxNDk3ZjE3YWUxZWJjNGFmMzQzMDUyNGJlOGUzMmI2MDVlNjg4
10
+ OWJiYmQxNTI2MjBlMmQzNGFkZTk0ZGY1Y2I0ZDBkODljYjc4NDQ1ZDY1ODky
11
+ ZDU4MDUwZjVjYWU0MTE5NTExMmExYTM0NTY1ZGMzYzc5NDkxYmE=
12
12
  data.tar.gz: !binary |-
13
- YTljOGZhNDY0YzdjZGYzMTA2NTM3MzIwNDg4MTcwYWEyM2IyZTc1YWYxMjFm
14
- YjMwYWU1NWMzNGVkZGRkYWYyZjUwMTQ2MWZlMjdkNjQwMjIwYWUwNmNlYjM3
15
- NDlmZDk5MGNlMTk4ZDhlMzFiOGUyZTIwY2EyZTY3MjUwYjc2NWY=
13
+ YzU4YWM2MmZjMjE4MjlhZDgxN2IxNmI1NmU2YjFiZTcwN2ZlNTZlYzE5MzZm
14
+ NGJkZDNkODUzNGNlNjA2NGZlZWIyMWFmMjljOTQyNjE3OGU4YWFmY2UyZWE2
15
+ YzhlZmI0YTcxMGY3N2Y2ODQ5NTM3ODcyMTUzOTJkODhjNjY0YWM=
@@ -13,7 +13,20 @@ class CSVDiff
13
13
  # that uniquely identify each row.
14
14
  # @param diff_fields [Array] An array containing the names of the fields
15
15
  # to be diff-ed.
16
+ # @param options [Hash] An options hash.
17
+ # @option options [Boolean] :ignore_adds If set to true, we ignore any
18
+ # new items that appear only in +right+.
19
+ # @option options [Boolean] :ignore_moves If set to true, we ignore any
20
+ # changes in sibling order.
21
+ # @option options [Boolean] :ignore_updates If set to true, we ignore any
22
+ # items that exist in both +left+ and +right+.
23
+ # @option options [Boolean] :ignore_deletes If set to true, we ignore any
24
+ # new items that appear only in +left+.
16
25
  def diff_sources(left, right, key_fields, diff_fields, options = {})
26
+ unless left.case_sensitive? == right.case_sensitive?
27
+ raise ArgumentError, "Left and right must have same settings for case-sensitivity"
28
+ end
29
+ case_sensitive = left.case_sensitive?
17
30
  left_index = left.index
18
31
  left_values = left.lines
19
32
  left_keys = left_values.keys
@@ -57,7 +70,7 @@ class CSVDiff
57
70
  #puts "Move #{left_idx} -> #{right_idx}: #{key}"
58
71
  end
59
72
  end
60
- if include_updates && (changes = diff_row(left_value, right_value, diff_fields))
73
+ if include_updates && (changes = diff_row(left_value, right_value, diff_fields, case_sensitive))
61
74
  diffs[key].merge!(id.merge(changes.merge(:action => 'Update')))
62
75
  #puts "Change: #{key}"
63
76
  end
@@ -99,17 +112,21 @@ class CSVDiff
99
112
  # file.
100
113
  # @param right_row [Hash] The version of the CSV row from the right/to
101
114
  # file.
115
+ # @param fields [Array<String>] An array of field names to compare.
116
+ # @param case_sensitive [Boolean] Whether field comparisons should be
117
+ # case sensitive or not.
102
118
  # @return [Hash<String, Array>] A Hash whose keys are the fields that
103
119
  # contain differences, and whose values are a two-element array of
104
120
  # [left/from, right/to] values.
105
- def diff_row(left_row, right_row, fields)
121
+ def diff_row(left_row, right_row, fields, case_sensitive)
106
122
  diffs = {}
107
123
  fields.each do |attr|
108
124
  right_val = right_row[attr]
109
125
  right_val = nil if right_val == ""
110
126
  left_val = left_row[attr]
111
127
  left_val = nil if left_val == ""
112
- if left_val != right_val
128
+ if (case_sensitive && left_val != right_val) ||
129
+ (left_val.to_s.upcase != right_val.to_s.upcase)
113
130
  diffs[attr] = [left_val, right_val]
114
131
  #puts "#{attr}: #{left_val} -> #{right_val}"
115
132
  end
@@ -28,13 +28,15 @@ class CSVDiff
28
28
  # @return [Array<String>] An array of field names that are compared in the
29
29
  # diff process.
30
30
  attr_reader :diff_fields
31
- # @return [Array<Fixnum>] An array of field indexes identifying the key
32
- # fields that uniquely identify each row.
31
+ # @return [Array<String>] An array of field namees of the key fields that
32
+ # uniquely identify each row.
33
33
  attr_reader :key_fields
34
34
  # @return [Array<String>] An array of field names for the parent field(s).
35
35
  attr_reader :parent_fields
36
36
  # @return [Array<String>] An array of field names for the child field(s).
37
37
  attr_reader :child_fields
38
+ # @return [Hash] The options hash used for the diff.
39
+ attr_reader :options
38
40
 
39
41
 
40
42
  # Generates a diff between two hierarchical tree structures, provided
@@ -84,7 +86,7 @@ class CSVDiff
84
86
  @right = right.is_a?(CSVSource) ? right : CSVSource.new(right, options)
85
87
  raise "No field names found in right (to) source" unless @right.field_names && @right.field_names.size > 0
86
88
  @warnings = []
87
- @diff_fields = get_diff_fields(@left.field_names, @right.field_names, options[:ignore_fields])
89
+ @diff_fields = get_diff_fields(@left.field_names, @right.field_names, options)
88
90
  @key_fields = @left.key_fields.map{ |kf| @diff_fields[kf] }
89
91
  diff(options)
90
92
  end
@@ -94,6 +96,7 @@ class CSVDiff
94
96
  def diff(options = {})
95
97
  @summary = nil
96
98
  @diffs = diff_sources(@left, @right, @key_fields, @diff_fields, options)
99
+ @options = options
97
100
  end
98
101
 
99
102
 
@@ -134,15 +137,21 @@ class CSVDiff
134
137
 
135
138
  # Given two sets of field names, determines the common set of fields present
136
139
  # in both, on which members can be diffed.
137
- def get_diff_fields(left_fields, right_fields, ignore_fields)
140
+ def get_diff_fields(left_fields, right_fields, options)
141
+ ignore_fields = (options[:ignore_fields] || []).map do |f|
142
+ f.is_a?(Fixnum) ? right_fields[f] : f
143
+ end
138
144
  diff_fields = []
139
- right_fields.each_with_index do |fld, i|
140
- if left_fields.include?(fld)
141
- diff_fields << fld unless ignore_fields && (ignore_fields.include?(fld) ||
142
- ignore_fields.include?(i))
143
- else
144
- @warnings << "Field '#{fld}' is missing from the left (from) file, and won't be diffed"
145
+ if options[:diff_common_fields_only]
146
+ right_fields.each_with_index do |fld, i|
147
+ if left_fields.include?(fld)
148
+ diff_fields << fld unless ignore_fields.include?(fld)
149
+ else
150
+ @warnings << "Field '#{fld}' is missing from the left (from) file, and won't be diffed"
151
+ end
145
152
  end
153
+ else
154
+ diff_fields = (right_fields + left_fields).uniq.reject{ |fld| ignore_fields.include?(fld) }
146
155
  end
147
156
  diff_fields
148
157
  end
@@ -17,6 +17,11 @@ class CSVDiff
17
17
  # @return [Array<String>] The names of the field(s) that distinguish a
18
18
  # child of a parent record.
19
19
  attr_reader :child_fields
20
+ # @return [Boolean] True if the source has been indexed with case-
21
+ # sensitive keys, or false if it has been indexed using upper-case key
22
+ # values.
23
+ attr_reader :case_sensitive
24
+ alias_method :case_sensitive?, :case_sensitive
20
25
  # @return [Hash<String,Hash>] A hash containing each line of the source,
21
26
  # keyed on the values of the +key_fields+.
22
27
  attr_reader :lines
@@ -59,10 +64,13 @@ class CSVDiff
59
64
  # identifies each row.
60
65
  # @option options [Array<String>] :key_fields The names of the fields
61
66
  # that uniquely identifies each row.
62
- # @option options [String] :parent_field The name of the field that
63
- # identifies a parent within which sibling order should be checked.
64
- # @option options [String] :child_field The name of the field that
65
- # uniquely identifies a child of a parent.
67
+ # @option options [String] :parent_field The name of the field(s) that
68
+ # identify a parent within which sibling order should be checked.
69
+ # @option options [String] :child_field The name of the field(s) that
70
+ # uniquely identify a child of a parent.
71
+ # @option options [Boolean] :case_sensitive If true (the default), keys
72
+ # are indexed as-is; if false, the index is built in upper-case for
73
+ # case-insensitive comparisons.
66
74
  def initialize(source, options = {})
67
75
  if source.is_a?(String)
68
76
  require 'csv'
@@ -104,6 +112,7 @@ class CSVDiff
104
112
  @lines = {}
105
113
  @index = Hash.new{ |h, k| h[k] = [] }
106
114
  @key_fields = find_field_indexes(@key_fields, @field_names) if @field_names
115
+ @case_sensitive = options.fetch(:case_sensitive, true)
107
116
  line_num = 0
108
117
  lines.each do |row|
109
118
  line_num += 1
@@ -121,6 +130,7 @@ class CSVDiff
121
130
  key_values = @key_fields.map{ |kf| field_vals[kf].to_s.upcase }
122
131
  key = key_values.join('~')
123
132
  parent_key = key_values[0...(@parent_fields.length)].join('~')
133
+ parent_key.upcase! unless @case_sensitive
124
134
  if @lines[key]
125
135
  @warnings << "Duplicate key '#{key}' encountered and ignored at line #{line_num}"
126
136
  else
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-diff
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Gardiner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-11 00:00:00.000000000 Z
11
+ date: 2014-08-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: ! " This library performs diffs of CSV files.\n\n Unlike
14
14
  a standard diff that compares line by line, and is sensitive to the\n ordering
@@ -21,7 +21,14 @@ description: ! " This library performs diffs of CSV files.\n\n Unl
21
21
  lend itself well to standard text diffs, as small changes\n in the organisation
22
22
  of the tree at an upper level can lead to big movements\n in the position
23
23
  of descendant records. By instead matching records by key,\n CSV-Diff avoids
24
- this issue, while still being able to detect changes in\n sibling order.\n"
24
+ this issue, while still being able to detect changes in\n sibling order.\n\n
25
+ \ This gem implements the core diff algorithm, and handles the loading and\n
26
+ \ diffing of CSV files. It returns a CSVDiff object, that contains the details\n
27
+ \ of differences in object form. This is useful for projects that need diff\n
28
+ \ capability, but want to handle the reporting of differences themselves.
29
+ For\n a pre-built diff reporting capability, see the csv-diff-report gem,
30
+ which\n provides a command-line tool for generating diff reports in HTML
31
+ or Excel\n format.\n"
25
32
  email: adam.b.gardiner@gmail.com
26
33
  executables: []
27
34
  extensions: []