csv-diff-report 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZTAxMGE1NTA2MjRhMjkzOGUyNThmMGQ1MjJmMGUwY2FjOWUzNzg2OQ==
5
+ data.tar.gz: !binary |-
6
+ NDdhNTE0M2M5MWFmNDUyYTE4NTgyMWIzMWY0NWNkOTYwMGFhM2FmNA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YjU2ZGZjMTJlOTUzYzE1YTE5MmZiOTZkYTRhZjBhOWVmODRiMDlhMDI5ZDZh
10
+ ZTUzYjUxYzNlOGYzMDZiYTgzNjU1YTE2ZDkzZTk4Mjk1ZTQxMmI2MzAyYzM1
11
+ OGFmOWEzMTVjZDM0NThlYmExMWJlZjQwZmZkNWVkYWU1Nzk3ZTQ=
12
+ data.tar.gz: !binary |-
13
+ N2I3NjgzYjQ0YTc5OTJkOTY2YWM3MTQyMDdiZjJiODdmNTQ5MDQ2MjBjZTYx
14
+ MjRhMDBiMTY1NDE0ZjI1YTE2NTUwZWFjZDcyMDFiMDg1MzNkMmU3MjVkNDBi
15
+ MDZkMWY2Yzk4YmVlYjZlMWNiZDQyYzIyMTg5MjA3MzJlODYxZmQ=
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013, Adam Gardiner
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+
13
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
17
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
19
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
20
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
21
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,264 @@
1
+ # CSV-Diff Report
2
+
3
+ CSV-Diff Report is a command-line tool for generating diff reports in Excel or
4
+ HTML format from CSV files. It uses the CSV-Diff gem to perform diffs, and adds
5
+ to that library the ability to generate formatted reports, and a command-line
6
+ tool `csvdiff` for running diffs between files or directories.
7
+
8
+
9
+ ## CSV-Diff
10
+
11
+ Unlike a standard diff that compares line by line, and is sensitive to the
12
+ ordering of records, CSV-Diff identifies common lines by key field(s), and
13
+ then compares the contents of the fields in each line.
14
+
15
+ CSV-Diff is particularly well suited to data in parent-child format. Parent-
16
+ child data does not lend itself well to standard text diffs, as small changes
17
+ in the organisation of the tree at an upper level can lead to big movements
18
+ in the position of descendant records. By instead matching records by key,
19
+ CSV-Diff avoids this issue, while still being able to detect changes in
20
+ sibling order.
21
+
22
+
23
+ ## Usage
24
+
25
+ CSV-Diff Report is supplied as a gem, and has dependencies on a few small libraries.
26
+ To install it, simply:
27
+ ```
28
+ gem install csv-diff-report
29
+ ```
30
+
31
+ To compare two CSV files where the field names are in the first row of the file,
32
+ and the first field contains the unique key for each record, simply use:
33
+ ```
34
+ csvdiff <file1> <file2>
35
+ ```
36
+
37
+ The `csvdiff` command-line tool provides many options to control behaviour of
38
+ the diff and reporting process. To see all available options, run:
39
+ ```
40
+ csv-diff --help
41
+ ```
42
+
43
+ This will display a help screen like the following:
44
+ ```
45
+ CSV-Diff
46
+ ========
47
+
48
+ Generate a diff report between two files using the CSV-Diff algorithm.
49
+
50
+
51
+ USAGE
52
+ -----
53
+ ruby /usr/local/opt/ruby/bin/csvdiff FROM TO [OPTIONS]
54
+
55
+ FROM The file or dir to use as the left or from source in the diff
56
+ TO The file or dir to use as the right or to source in the diff
57
+
58
+
59
+ OPTIONS
60
+ -------
61
+
62
+ Source Options
63
+ --pattern PATTERN A file name pattern to use to filter matching files if a directory diff is
64
+ being performed
65
+ [Default: *]
66
+ --field-names FIELD-NAMES A comma-separated list of field names for each field in the source files
67
+ --parent-fields PARENT-FIELDS The parent field name(s) or index(es)
68
+ --child-fields CHILD-FIELDS The child field name(s) or index(es)
69
+ --key-fields KEY-FIELDS The key field name(s) or index(es)
70
+ --encoding ENCODING The encoding to use when opening the CSV files
71
+ --ignore-header If true, the first line in each source file is ignored; requires the use of
72
+ the --field-names option to name the fields
73
+
74
+ Diff Options
75
+ --ignore-fields IGNORE-FIELDS The names or indexes of any fields to be ignored during the diff
76
+ --ignore-adds If true, items in TO that are not in FROM are ignored
77
+ --ignore-deletes If true, items in FROM that are not in TO are ignored
78
+ --ignore-updates If true, changes to non-key properties are ignored
79
+ --ignore-moves If true, changes in an item's position are ignored
80
+
81
+ Output Options
82
+ --format FORMAT The format in which to produce the diff report
83
+ [Default: HTML]
84
+ --output OUTPUT The path to save the diff report to. If not specified, the diff report will
85
+ be placed in the same directory as the FROM file, and will be named
86
+ Diff_<FROM>_to_<TO>.<FORMAT>
87
+
88
+ ```
89
+
90
+ ## Unique Row Identifiers
91
+
92
+ CSVDiff is preferable over a standard line-by-line diff when row order is
93
+ significantly impacted by small changes. The classic example is a parent-child
94
+ file generated by a hierarchy traversal. A simple change in position of a parent
95
+ member near the root of the hierarchy will have a large impact on the positions
96
+ of all descendant rows. Consider the following example:
97
+ ```
98
+ Root
99
+ |- A
100
+ | |- A1
101
+ | |- A2
102
+ |
103
+ |- B
104
+ |- B1
105
+ |- B2
106
+ ```
107
+
108
+ A hierarchy traversal of this tree into a parent-child format would generate a CSV
109
+ as follows:
110
+ ```
111
+ Root,A
112
+ A,A1
113
+ A,A2
114
+ Root,B
115
+ B,B1
116
+ B,B2
117
+ ```
118
+
119
+ If the positions of A and B were swapped, a hierarchy traversal would now produce a CSV
120
+ as follows:
121
+ ```
122
+ Root,B
123
+ B,B1
124
+ B,B2
125
+ Root,A
126
+ A,A1
127
+ A,A2
128
+ ```
129
+
130
+ A simple diff using a diff utility would highlight this as 3 additions and 3 deletions.
131
+ CSVDiff, however, would classify this as 2 moves (a change in sibling position for A and B).
132
+
133
+ In order to do this, CSVDiff needs to know what field(s) confer uniqueness on each row.
134
+ In this example, we could use the child field alone (since each member name only appears
135
+ once); however, this would imply a flat structure, where all rows are children of a single
136
+ parent. This in turn would cause CSVDiff to classify the above change as a Move (i.e. a
137
+ change in order) of all 6 rows.
138
+
139
+ The more correct specification of this file is that column 0 contains a unique parent
140
+ identifier, and column 1 contains a unique child identifier. CSVDiff can then correctly
141
+ deduce that there is in fact only two changes in order - the swap in positions of A and
142
+ B below Root.
143
+
144
+ Note: If you aren't interested in changes in the order of siblings, then you could use
145
+ CSVDiff with a :key_field option of column 1, and specify the :ignore_moves option.
146
+
147
+ ## Warnings
148
+
149
+ When processing and diffing files, CSVDiff may encounter problems with the data or
150
+ the specifications it has been given. It will continue even in the face of problems,
151
+ but will log details of the problems in a #warnings Array. The number of warnings
152
+ will also be included in the Hash returned by the #summary method.
153
+
154
+ Warnings may be raised for any of the following:
155
+ * Missing fields: If the right/to file contains fields that are not present in the
156
+ left/from file, a warning is raised and the field is ignored for diff purposes.
157
+ * Duplicate keys: If two rows are found that have the same values for the key field(s),
158
+ a warning is raised, and the duplicate values are ignored.
159
+
160
+
161
+ ## Examples
162
+
163
+ The simplest use case is as shown above, where the data to be diffed is in CSV files
164
+ with the column names as the first record, and where the unique key is the first
165
+ column in the data. In this case, a diff can be created simply via:
166
+ ```ruby
167
+ diff = CSVDiff.new(file1, file2)
168
+ ```
169
+
170
+ ### Specifynig Unique Row Identifiers
171
+
172
+ Often however, rows are not uniquely identifiable via the first column in the file.
173
+ In a parent-child hierarchy, for example, combinations of parent and child may be
174
+ necessary to uniquely identify a row. In these cases, it is necessary to indicate
175
+ which fields are used to uniquely identify common rows across the two files. This
176
+ can be done in several different ways.
177
+
178
+ 1. Using the :key_fields option with field numbers (these are 0-based):
179
+
180
+ ```ruby
181
+ diff = CSVDiff.new(file1, file2, key_fields: [0, 1])
182
+ ```
183
+
184
+ 2. Using the :key_fields options with column names:
185
+
186
+ ```ruby
187
+ diff = CSVDiff.new(file1, file2, key_fields: ['Parent', 'Child'])
188
+ ```
189
+
190
+ 3. Using the :parent_fields and :child_fields with field numbers:
191
+
192
+ ```ruby
193
+ diff = CSVDiff.new(file1, file2, parent_field: 1, child_fields: [2, 3])
194
+ ```
195
+
196
+ 4. Using the :parent_fields and :child_fields with column names:
197
+
198
+ ```ruby
199
+ diff = CSVDiff.new(file1, file2, parent_field: 'Date', child_fields: ['HomeTeam', 'AwayTeam'])
200
+ ```
201
+
202
+ ### Using Non-CSV File Sources
203
+
204
+ Data from non-CSV sources can be diffed, as long as it can be supplied as an Array
205
+ of Arrays:
206
+ ```ruby
207
+ DATA1 = [
208
+ ['Parent', 'Child', 'Description'],
209
+ ['A', 'A1', 'Account 1'],
210
+ ['A', 'A2', 'Account 2']
211
+ ]
212
+
213
+ DATA2 = [
214
+ ['Parent', 'Child', 'Description'],
215
+ ['A', 'A1', 'Account1'],
216
+ ['A', 'A2', 'Account2']
217
+ ]
218
+
219
+ diff = CSVDiff.new(DATA1, DATA2, key_fields: [1, 0])
220
+ ```
221
+
222
+ ### Specifying Column Names
223
+
224
+ If your data file does not include column headers, you can specify the names of
225
+ each column when creating the diff. The names supplied are the keys used in the
226
+ diff results:
227
+
228
+ ```ruby
229
+ DATA1 = [
230
+ ['A', 'A1', 'Account 1'],
231
+ ['A', 'A2', 'Account 2']
232
+ ]
233
+
234
+ DATA2 = [
235
+ ['A', 'A1', 'Account1'],
236
+ ['A', 'A2', 'Account2']
237
+ ]
238
+
239
+ diff = CSVDiff.new(DATA1, DATA2, key_fields: [1, 0], field_names: ['Parent', 'Child', 'Description'])
240
+ ```
241
+
242
+ If your data file does contain a header row, but you wish to use your own column
243
+ names, you can specify the :field_names option and the :ignore_header option to
244
+ ignore the first row.
245
+
246
+
247
+ ### Ignoring Fields
248
+
249
+ If your data contains fields that you aren't interested in, these can be excluded
250
+ from the diff process using the :ignore_fields option:
251
+ ```ruby
252
+ diff = CSVDiff.new(file1, file2, parent_field: 'Date', child_fields: ['HomeTeam', 'AwayTeam'],
253
+ ignore_fields: ['CreatedAt', 'UpdatedAt'])
254
+ ```
255
+
256
+ ### Ignoring Certain Changes
257
+
258
+ CSVDiff identifies Adds, Updates, Moves and Deletes; any of these changes can be selectively
259
+ ignored, e.g. if you are not interested in Deletes, you can pass the :ignore_deletes option:
260
+ ```ruby
261
+ diff = CSVDiff.new(file1, file2, parent_field: 'Date', child_fields: ['HomeTeam', 'AwayTeam'],
262
+ ignore_fields: ['CreatedAt', 'UpdatedAt'],
263
+ ignore_deletes: true, ignore_moves: true)
264
+ ```
@@ -0,0 +1,8 @@
1
+ #!ruby -w
2
+
3
+ require 'csv-diff-report'
4
+ require 'csv-diff-report/cli'
5
+
6
+
7
+ CSVDiff::CLI.new.run
8
+
@@ -0,0 +1,6 @@
1
+ require 'pathname'
2
+ require 'yaml'
3
+ require 'csv-diff'
4
+ require 'color-console'
5
+ require 'csv-diff-report/report'
6
+
@@ -0,0 +1,122 @@
1
+ require 'arg-parser'
2
+ require 'csv-diff-report'
3
+
4
+
5
+ class CSVDiff
6
+
7
+ class CLI
8
+
9
+ include ArgParser::DSL
10
+
11
+ # Define an on_parse handler for field names or indexes. Splits the
12
+ # supplied argument value on commas, and converts numbers to Fixnums.
13
+ register_parse_handler(:parse_fields) do |val, arg, hsh|
14
+ val.split(',').map{ |fld| fld =~ /^\d+$/ ? fld.to_i : fld }
15
+ end
16
+
17
+ title 'CSV-Diff'
18
+
19
+ purpose <<-EOT
20
+ Generate a diff report between two files using the CSV-Diff algorithm.
21
+ EOT
22
+
23
+ positional_arg :from, 'The file or dir to use as the left or from source in the diff'
24
+ positional_arg :to, 'The file or dir to use as the right or to source in the diff'
25
+ positional_arg :pattern, 'A file name pattern to use to filter matching files if a directory ' +
26
+ 'diff is being performed', default: '*'
27
+
28
+ usage_break 'Source Options'
29
+ keyword_arg :file_types, 'A comma-separated list of file-type names (supports wildcards) to process. ' +
30
+ 'Requires the presence of a .csvdiff file in the FROM or current directory to define ' +
31
+ 'the file type patterns',
32
+ short_key: 't', on_parse: :split_to_array
33
+ keyword_arg :exclude, 'A file name pattern of files to exclude from the diff if a directory ' +
34
+ 'diff is being performed',
35
+ short_key: 'x'
36
+ keyword_arg :field_names, 'A comma-separated list of field names for each ' +
37
+ 'field in the source files',
38
+ short_key: 'f', on_parse: :split_to_array
39
+ keyword_arg :parent_fields, 'The parent field name(s) or index(es)',
40
+ short_key: 'p', on_parse: :parse_fields
41
+ keyword_arg :child_fields, 'The child field name(s) or index(es)',
42
+ short_key: 'c', on_parse: :parse_fields
43
+ keyword_arg :key_fields, 'The key field name(s) or index(es)',
44
+ short_key: 'k', on_parse: :parse_fields
45
+ keyword_arg :encoding, 'The encoding to use when opening the CSV files',
46
+ short_key: 'e'
47
+ flag_arg :tab_delimited, 'If true, the file is assumed to be tab-delimited rather than comma-delimited'
48
+ flag_arg :ignore_header, 'If true, the first line in each source file is ignored; ' +
49
+ 'requires the use of the --field-names option to name the fields'
50
+
51
+ usage_break 'Diff Options'
52
+ keyword_arg :ignore_fields, 'The names or indexes of any fields to be ignored during the diff',
53
+ short_key: 'i', on_parse: :parse_fields
54
+ flag_arg :ignore_adds, "If true, items in TO that are not in FROM are ignored",
55
+ short_key: 'A'
56
+ flag_arg :ignore_deletes, "If true, items in FROM that are not in TO are ignored",
57
+ short_key: 'D'
58
+ flag_arg :ignore_updates, "If true, changes to non-key properties are ignored",
59
+ short_key: 'U'
60
+ flag_arg :ignore_moves, "If true, changes in an item's position are ignored",
61
+ short_key: 'M'
62
+
63
+ usage_break 'Output Options'
64
+ keyword_arg :format, 'The format in which to produce the diff report',
65
+ default: 'HTML', validation: /^(html|xls(x)?)$/i
66
+ keyword_arg :output, 'The path to save the diff report to. If not specified, the diff ' +
67
+ 'report will be placed in the same directory as the FROM file, and will be named ' +
68
+ 'Diff_<FROM>_to_<TO>.<FORMAT>'
69
+
70
+
71
+ # Parses command-line options, and then performs the diff.
72
+ def run
73
+ if arguments = parse_arguments
74
+ begin
75
+ process(arguments)
76
+ rescue RuntimeError => ex
77
+ Console.puts ex.message, :red
78
+ exit 1
79
+ end
80
+ else
81
+ if show_help?
82
+ show_help(nil, Console.width).each do |line|
83
+ Console.puts line, :cyan
84
+ end
85
+ else
86
+ show_usage(nil, Console.width).each do |line|
87
+ Console.puts line, :yellow
88
+ end
89
+ end
90
+ exit 2
91
+ end
92
+ end
93
+
94
+
95
+ # Process a CSVDiffReport using +arguments+ to determine all options.
96
+ def process(arguments)
97
+ options = {}
98
+ exclude_args = [:from, :to, :tab_delimited]
99
+ arguments.each_pair do |arg, val|
100
+ options[arg] = val if val && !exclude_args.include?(arg)
101
+ end
102
+ options[:csv_options] = {:col_sep => "\t"} if arguments.tab_delimited
103
+ rep = CSVDiff::Report.new
104
+ rep.diff(arguments.from, arguments.to, options)
105
+
106
+ output_dir = FileTest.directory?(arguments.from) ?
107
+ arguments.from : File.dirname(arguments.from)
108
+ left_name = File.basename(arguments.from, File.extname(arguments.from))
109
+ right_name = File.basename(arguments.to, File.extname(arguments.to))
110
+ output = arguments.output ||
111
+ "#{output_dir}/Diff_#{left_name}_to_#{right_name}.diff"
112
+ rep.output(output, arguments.format)
113
+ end
114
+
115
+ end
116
+
117
+ end
118
+
119
+
120
+ if __FILE__ == $0
121
+ CSVDiff::CLI.new.run
122
+ end
@@ -0,0 +1,154 @@
1
+
2
+ class CSVDiff
3
+
4
+ # Defines functionality for exporting a Diff report to Excel in XLSX format
5
+ # using the Axlsx library.
6
+ module Excel
7
+
8
+ private
9
+
10
+ # Generare a diff report in XLSX format.
11
+ def xl_output(output)
12
+ require 'axlsx'
13
+
14
+ # Create workbook
15
+ xl = xl_new
16
+
17
+ # Add a summary sheet and diff sheets for each diff
18
+ xl_summary_sheet(xl)
19
+
20
+ # Save workbook
21
+ path = "#{File.dirname(output)}/#{File.basename(output, File.extname(output))}.xlsx"
22
+ xl_save(xl, path)
23
+ end
24
+
25
+
26
+ # Create a new XL package object
27
+ def xl_new
28
+ @xl_styles = {}
29
+ xl = Axlsx::Package.new
30
+ xl.use_shared_strings = true
31
+ xl.workbook.styles do |s|
32
+ s.fonts[0].sz = 9
33
+ @xl_styles['Title'] = s.add_style(:b => true)
34
+ @xl_styles['Comma'] = s.add_style(:format_code => '#,##0')
35
+ @xl_styles['Right'] = s.add_style(:alignment => {:horizontal => :right})
36
+ @xl_styles['Add'] = s.add_style :fg_color => '00A000'
37
+ @xl_styles['Update'] = s.add_style :fg_color => '0000A0', :bg_color => 'F0F0FF'
38
+ @xl_styles['Move'] = s.add_style :fg_color => '4040FF'
39
+ @xl_styles['Delete'] = s.add_style :fg_color => 'FF0000', :strike => true
40
+ end
41
+ xl
42
+ end
43
+
44
+
45
+ # Add summary sheet
46
+ def xl_summary_sheet(xl)
47
+ compare_from = @left
48
+ compare_to = @right
49
+
50
+ xl.workbook.add_worksheet(name: 'Summary') do |sheet|
51
+ sheet.add_row do |row|
52
+ row.add_cell 'From:', :style => @xl_styles['Title']
53
+ row.add_cell compare_from
54
+ end
55
+ sheet.add_row do |row|
56
+ row.add_cell 'To:', :style => @xl_styles['Title']
57
+ row.add_cell compare_to
58
+ end
59
+ sheet.add_row
60
+ sheet.add_row ['Sheet', 'Adds', 'Deletes', 'Updates', 'Moves'], :style => @xl_styles['Title']
61
+ sheet.column_info.each do |ci|
62
+ ci.width = 10
63
+ end
64
+ sheet.column_info.first.width = 20
65
+
66
+ @diffs.each do |file_diff|
67
+ sheet.add_row([File.basename(file_diff.left.path, File.extname(file_diff.left.path)),
68
+ file_diff.summary['Add'], file_diff.summary['Delete'],
69
+ file_diff.summary['Update'], file_diff.summary['Move']])
70
+ xl_diff_sheet(xl, file_diff) if file_diff.diffs.size > 0
71
+ end
72
+ end
73
+
74
+ end
75
+
76
+
77
+ # Add diff sheet
78
+ def xl_diff_sheet(xl, file_diff)
79
+ sheet_name = File.basename(file_diff.left.path, File.extname(file_diff.left.path))
80
+ all_fields = [:row, :action, :sibling_position] + file_diff.diff_fields
81
+ xl.workbook.add_worksheet(name: sheet_name) do |sheet|
82
+ sheet.add_row(all_fields.map{ |f| f.to_s }, :style => @xl_styles['Title'])
83
+ file_diff.diffs.sort_by{|k, v| v[:row] }.each do |key, diff|
84
+ sheet.add_row do |row|
85
+ chg = diff[:action]
86
+ all_fields.each_with_index do |field, i|
87
+ cell = nil
88
+ comment = nil
89
+ old = nil
90
+ style = case chg
91
+ when 'Add', 'Delete' then @xl_styles[chg]
92
+ else 0
93
+ end
94
+ d = diff[field]
95
+ if d.is_a?(Array)
96
+ old = d.first
97
+ new = d.last
98
+ if old.nil?
99
+ style = @xl_styles['Add']
100
+ else
101
+ style = @xl_styles[chg]
102
+ comment = old
103
+ end
104
+ else
105
+ new = d
106
+ style = @xl_styles[chg] if i == 1
107
+ end
108
+ case new
109
+ when String
110
+ cell = row.add_cell(new.encode('utf-8'), :style => style) #, :type => :string)
111
+ # cell = row.add_cell(new, :style => style)
112
+ else
113
+ cell = row.add_cell(new, :style => style)
114
+ end
115
+ sheet.add_comment(:ref => cell.r, :author => 'Current', :visible => false,
116
+ :text => old.to_s.encode('utf-8')) if comment
117
+ end
118
+ end
119
+ end
120
+ sheet.column_info.each do |ci|
121
+ ci.width = 80 if ci.width > 80
122
+ end
123
+ xl_filter_and_freeze(sheet, 5)
124
+ end
125
+ end
126
+
127
+
128
+ # Freeze the top row and +freeze_cols+ of +sheet+.
129
+ def xl_filter_and_freeze(sheet, freeze_cols = 0)
130
+ sheet.auto_filter = "A1:#{Axlsx::cell_r(sheet.rows.first.cells.size - 1, sheet.rows.size - 1)}"
131
+ sheet.sheet_view do |sv|
132
+ sv.pane do |p|
133
+ p.state = :frozen
134
+ p.x_split = freeze_cols
135
+ p.y_split = 1
136
+ end
137
+ end
138
+ end
139
+
140
+
141
+ # Save +xl+ package to +path+
142
+ def xl_save(xl, path)
143
+ begin
144
+ xl.serialize(path)
145
+ path
146
+ rescue RuntimeError => ex
147
+ Console.puts ex.message, :red
148
+ raise "Unable to replace existing Excel file #{path} - is it already open in Excel?"
149
+ end
150
+ end
151
+
152
+ end
153
+
154
+ end
@@ -0,0 +1,170 @@
1
+
2
+ class CSVDiff
3
+
4
+ # Defines functionality for exporting a Diff report in HTML format.
5
+ module Html
6
+
7
+ private
8
+
9
+ # Generare a diff report in XLSX format.
10
+ def html_output(output)
11
+ content = []
12
+ content << '<html>'
13
+ content << '<head>'
14
+ content << '<title>Diff Report</title>'
15
+ content << '<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">'
16
+ content << html_styles
17
+ content << '</head>'
18
+ content << '<body>'
19
+
20
+ html_summary(content)
21
+ @diffs.each do |file_diff|
22
+ html_diff(content, file_diff) if file_diff.diffs.size > 0
23
+ end
24
+
25
+ content << '</body>'
26
+ content << '</html>'
27
+
28
+ # Save workbook
29
+ path = "#{File.dirname(output)}/#{File.basename(output, File.extname(output))}.html"
30
+ File.open(path, 'w'){ |f| f.write(content.join("\n")) }
31
+ path
32
+ end
33
+
34
+
35
+ # Returns the HTML head content, which contains the styles used for diffing.
36
+ def html_styles
37
+ style = <<-EOT
38
+ <style>
39
+ @font-face {font-family: Calibri;}
40
+
41
+ h1 {font-family: Calibri; font-size: 16pt;}
42
+ h2 {font-family: Calibri; font-size: 14pt; margin: 1em 0em .2em;}
43
+ h3 {font-family: Calibri; font-size: 12pt; margin: 1em 0em .2em;}
44
+ body {font-family: Calibri; font-size: 11pt;}
45
+ p {margin: .2em 0em;}
46
+ table {font-family: Calibri; font-size: 10pt; line-height: 12pt; border-collapse: collapse;}
47
+ th {background-color: #00205B; color: white; font-size: 11pt; font-weight: bold; text-align: left;
48
+ border: 1px solid #DDDDFF; padding: 1px 5px;}
49
+ td {border: 1px solid #DDDDFF; padding: 1px 5px;}
50
+
51
+ .summary {font-size: 13pt;}
52
+ .add {background-color: white; color: #33A000;}
53
+ .delete {background-color: white; color: #FF0000; text-decoration: line-through;}
54
+ .update {background-color: white; color: #0000A0;}
55
+ .move {background-color: white; color: #0000A0;}
56
+ .bold {font-weight: bold;}
57
+ .center {text-align: center;}
58
+ .right {text-align: right;}
59
+ .separator {width: 200px; border-bottom: 1px gray solid;}
60
+ </style>
61
+ EOT
62
+ style
63
+ end
64
+
65
+
66
+ def html_summary(body)
67
+ body << '<h2>Summary</h2>'
68
+
69
+ body << '<p>Source Locations:</p>'
70
+ body << '<table>'
71
+ body << '<tbody>'
72
+ body << "<tr><th>From:</th><td>#{@left}</td></tr>"
73
+ body << "<tr><th>To:</th><td>#{@right}</td></tr>"
74
+ body << '</tbody>'
75
+ body << '</table>'
76
+ body << '<br>'
77
+ body << '<p>Differences:</p>'
78
+ body << '<table>'
79
+ body << '<thead><tr>'
80
+ body << '<th>File</th><th>Adds</th><th>Deletes</th><th>Updates</th><th>Moves</th>'
81
+ body << '</tr></thead>'
82
+ body << '<tbody>'
83
+ @diffs.each do |file_diff|
84
+ label = File.basename(file_diff.left.path)
85
+ body << '<tr>'
86
+ if file_diff.diffs.size > 0
87
+ body << "<td><a href='##{label}'>#{label}</a></td>"
88
+ else
89
+ body << "<td>#{label}</td>"
90
+ end
91
+ body << "<td class='right'>#{file_diff.summary['Add']}</td>"
92
+ body << "<td class='right'>#{file_diff.summary['Delete']}</td>"
93
+ body << "<td class='right'>#{file_diff.summary['Update']}</td>"
94
+ body << "<td class='right'>#{file_diff.summary['Move']}</td>"
95
+ body << '</tr>'
96
+ end
97
+ body << '</tbody>'
98
+ body << '</table>'
99
+ end
100
+
101
+
102
+ def html_diff(body, file_diff)
103
+ label = File.basename(file_diff.left.path)
104
+ body << "<h2 id=#{label}>#{label}</h2>"
105
+ body << '<p>'
106
+ count = 0
107
+ if file_diff.summary['Add'] > 0
108
+ body << "<span class='add'>#{file_diff.summary['Add']} Adds</span>"
109
+ count += 1
110
+ end
111
+ if file_diff.summary['Delete'] > 0
112
+ body << ', ' if count > 0
113
+ body << "<span class='delete'>#{file_diff.summary['Delete']} Deletes</span>"
114
+ count += 1
115
+ end
116
+ if file_diff.summary['Update'] > 0
117
+ body << ', ' if count > 0
118
+ body << "<span class='update'>#{file_diff.summary['Update']} Updates</span>"
119
+ count += 1
120
+ end
121
+ if file_diff.summary['Move'] > 0
122
+ body << ', ' if count > 0
123
+ body << "<span class='move'>#{file_diff.summary['Move']} Moves</span>"
124
+ end
125
+ body << '</p>'
126
+
127
+ all_fields = [:row, :action, :sibling_position] + file_diff.diff_fields
128
+ body << '<table>'
129
+ body << '<thead><tr>'
130
+ all_fields.each do |fld|
131
+ body << "<th>#{fld.to_s}</th>"
132
+ end
133
+ body << '</tr></thead>'
134
+ body << '<tbody>'
135
+ file_diff.diffs.sort_by{|k, v| v[:row] }.each do |key, diff|
136
+ body << '<tr>'
137
+ chg = diff[:action]
138
+ all_fields.each_with_index do |field, i|
139
+ old = nil
140
+ style = case chg
141
+ when 'Add', 'Delete' then chg.downcase
142
+ end
143
+ d = diff[field]
144
+ if d.is_a?(Array)
145
+ old = d.first
146
+ new = d.last
147
+ if old.nil?
148
+ style = 'add'
149
+ else
150
+ style = chg.downcase
151
+ end
152
+ else
153
+ new = d
154
+ style = chg.downcase if i == 1
155
+ end
156
+ body << '<td>'
157
+ body << "<span class='delete'>#{old}</span>" if old
158
+ body << '<br>' if old && old.to_s.length > 10
159
+ body << "<span#{style ? " class='#{style}'" : ''}>#{new}</span>"
160
+ body << '</td>'
161
+ end
162
+ body << '</tr>'
163
+ end
164
+ body << '</tbody>'
165
+ body << '</table>'
166
+ end
167
+
168
+ end
169
+
170
+ end
@@ -0,0 +1,226 @@
1
+ require 'csv-diff-report/excel'
2
+ require 'csv-diff-report/html'
3
+
4
+
5
+ class CSVDiff
6
+
7
+ # Defines a class for generating diff reports using CSVDiff.
8
+ #
9
+ # A diff report may contain multiple file diffs, and can be output as either an
10
+ # XLSX spreadsheet document, or an HTML file.
11
+ class Report
12
+
13
+ include Excel
14
+ include Html
15
+
16
+
17
+ # Instantiate a new diff report object.
18
+ def initialize
19
+ @diffs = []
20
+ end
21
+
22
+
23
+ # Add a CSVDiff object to this report.
24
+ def <<(diff)
25
+ if diff.is_a?(CSVDiff)
26
+ @diffs << diff
27
+ else
28
+ raise ArgumentError, "Only CSVDiff objects can be added to a CSVDiff::Report"
29
+ end
30
+ end
31
+
32
+
33
+ # Add a diff to the diff report.
34
+ #
35
+ # @param options [Hash] Options to be passed to the diff process.
36
+ def diff(left, right, options = {})
37
+ @left = Pathname.new(left)
38
+ @right = Pathname.new(right)
39
+ if @left.file? && @right.file?
40
+ Console.puts "Performing file diff:"
41
+ Console.puts " From File: #{@left}"
42
+ Console.puts " To File: #{@right}"
43
+ opt_file = load_opt_file(@left.dirname)
44
+ diff_file(@left.to_s, @right.to_s, options, opt_file)
45
+ elsif @left.directory? && @right.directory?
46
+ Console.puts "Performing directory diff:"
47
+ Console.puts " From directory: #{@left}"
48
+ Console.puts " To directory: #{@right}"
49
+ opt_file = load_opt_file(@left)
50
+ if fts = options[:file_types]
51
+ file_types = find_matching_file_types(fts, opt_file)
52
+ file_types.each do |file_type|
53
+ hsh = opt_file[:file_types][file_type]
54
+ ft_opts = options.merge(hsh)
55
+ diff_dir(@left, @right, ft_opts, opt_file)
56
+ end
57
+ else
58
+ diff_dir(@left, @right, options, opt_file)
59
+ end
60
+ else
61
+ raise ArgumentError, "Left and right must both exist and be files or directories"
62
+ end
63
+ end
64
+
65
+
66
+ # Saves a diff report to +path+ in +format+.
67
+ #
68
+ # @param path [String] The path to the output report.
69
+ # @param format [Symbol] The output format for the report; one of :html or
70
+ # :xlsx.
71
+ def output(path, format = :html)
72
+ path = case format.to_s
73
+ when /^html$/i
74
+ html_output(path)
75
+ when /^xls(x)?$/i
76
+ xl_output(path)
77
+ else
78
+ raise ArgumentError, "Unrecognised output format: #{format}"
79
+ end
80
+ Console.puts "Diff report saved to '#{path}'"
81
+ end
82
+
83
+
84
+ private
85
+
86
+
87
+ # Loads an options file from +dir+
88
+ def load_opt_file(dir)
89
+ opt_path = Pathname(dir + '.csvdiff')
90
+ opt_path = Pathname('.csvdiff') unless opt_path.exist?
91
+ if opt_path.exist?
92
+ Console.puts "Loading options from .csvdiff at '#{dir}'"
93
+ opt_file = YAML.load(IO.read(opt_path))
94
+ symbolize_keys(opt_file)
95
+ end
96
+ end
97
+
98
+
99
+ # Convert keys in hashes to lower-case symbols for consistency
100
+ def symbolize_keys(hsh)
101
+ Hash[hsh.map{ |k, v| [k.to_s.downcase.intern, v.is_a?(Hash) ?
102
+ symbolize_keys(v) : v] }]
103
+ end
104
+
105
+
106
+ # Locates the file types in +opt_file+ that match the +file_types+ list of
107
+ # file type names or patterns
108
+ def find_matching_file_types(file_types, opt_file)
109
+ matched_fts = []
110
+ if known_fts = opt_file && opt_file[:file_types] && opt_file[:file_types].keys
111
+ file_types.each do |ft|
112
+ re = Regexp.new(ft.gsub('.', '\.').gsub('?', '.').gsub('*', '.*'), true)
113
+ matches = known_fts.select{ |file_type| file_type.to_s =~ re }
114
+ if matches.size > 0
115
+ matched_fts.concat(matches)
116
+ else
117
+ Console.puts "No file type matching '#{ft}' defined in .csvdiff", :yellow
118
+ Console.puts "Known file types are: #{opt_file[:file_types].keys.join(', ')}", :yellow
119
+ end
120
+ end
121
+ else
122
+ if opt_file
123
+ Console.puts "No file types are defined in .csvdiff", :yellow
124
+ else
125
+ Console.puts "The file_types option can only be used when a " +
126
+ ".csvdiff is present in the LEFT or current directory", :yellow
127
+ end
128
+ end
129
+ matched_fts.uniq
130
+ end
131
+
132
+
133
+ # Diff files that exist in both +left+ and +right+ directories.
134
+ def diff_dir(left, right, options, opt_file)
135
+ pattern = Pathname(options[:pattern] || '*')
136
+ exclude = options[:exclude]
137
+
138
+ Console.puts " Include Pattern: #{pattern}"
139
+ Console.puts " Exclude Pattern: #{exclude}" if exclude
140
+
141
+
142
+ left_files = Dir[left + pattern].sort
143
+ excludes = exclude ? Dir[left + exclude] : []
144
+ (left_files - excludes).each_with_index do |file, i|
145
+ right_file = right + File.basename(file)
146
+ if right_file.file?
147
+ diff_file(file, right_file.to_s, options, opt_file)
148
+ else
149
+ Console.puts "Skipping file '#{File.basename(file)}', as there is " +
150
+ "no corresponding TO file", :yellow
151
+ end
152
+ end
153
+ end
154
+
155
+
156
+ # Diff two files, and add the results to the diff report.
157
+ #
158
+ # @param left [String] The path to the left file
159
+ # @param right [String] The path to the right file
160
+ # @param options [Hash] The options to be passed to CSVDiff.
161
+ def diff_file(left, right, options, opt_file)
162
+ settings = find_file_type_settings(left, opt_file)
163
+ return if settings[:ignore]
164
+ options = settings.merge(options)
165
+ from = open_source(left, :from, options)
166
+ to = open_source(right, :to, options)
167
+ diff = CSVDiff.new(left, right, options)
168
+ diff.diff_warnings.each{ |warn| Console.puts warn, :yellow }
169
+ Console.write "Found #{diff.diffs.size} differences"
170
+ diff.summary.each_with_index.map do |pair, i|
171
+ Console.write i == 0 ? ": " : ", "
172
+ k, v = pair
173
+ color = case k
174
+ when 'Add' then :light_green
175
+ when 'Delete' then :red
176
+ when 'Update' then :cyan
177
+ when 'Move' then :light_magenta
178
+ when 'Warning' then :yellow
179
+ end
180
+ Console.write "#{v} #{k}s", color
181
+ end
182
+ Console.puts
183
+ self << diff
184
+ end
185
+
186
+
187
+ # Locates any file type settings for +left+ in the +opt_file+ hash.
188
+ def find_file_type_settings(left, opt_file)
189
+ left = Pathname(left.gsub('\\', '/'))
190
+ settings = opt_file && opt_file[:defaults] || {}
191
+ opt_file && opt_file[:file_types] && opt_file[:file_types].each do |file_type, hsh|
192
+ unless hsh[:pattern]
193
+ Console.puts "Invalid setting for file_type #{file_type} in .csvdiff; " +
194
+ "missing a 'pattern' key to use to match files", :yellow
195
+ hsh[:pattern] = '-'
196
+ end
197
+ next if hsh[:pattern] == '-'
198
+ unless hsh[:matched_files]
199
+ hsh[:matched_files] = Dir[(left.dirname + hsh[:pattern]).to_s]
200
+ hsh[:matched_files] -= Dir[(left.dirname + hsh[:exclude]).to_s] if hsh[:exclude]
201
+ end
202
+ if hsh[:matched_files].include?(left.to_s)
203
+ settings.merge!(hsh)
204
+ [:pattern, :exclude, :matched_files].each{ |k| settings.delete(k) }
205
+ break
206
+ end
207
+ end
208
+ settings
209
+ end
210
+
211
+
212
+ # Opens a source file.
213
+ #
214
+ # @param src [String] A path to the file to be opened.
215
+ # @param options [Hash] An options hash to be passed to CSVSource.
216
+ def open_source(src, left_right, options)
217
+ Console.write "Opening #{left_right.to_s.upcase} file '#{File.basename(src)}'..."
218
+ csv_src = CSVDiff::CSVSource.new(src.to_s, options)
219
+ Console.puts " #{csv_src.lines.size} lines read", :white
220
+ csv_src.warnings.each{ |warn| Console.puts warn, :yellow }
221
+ csv_src
222
+ end
223
+
224
+ end
225
+
226
+ end
@@ -0,0 +1,2 @@
1
+ require 'csv-diff-report'
2
+
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csv-diff-report
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.2'
5
+ platform: ruby
6
+ authors:
7
+ - Adam Gardiner
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csv-diff
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0.1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: arg-parser
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: color-console
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0.1'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0.1'
55
+ - !ruby/object:Gem::Dependency
56
+ name: axlsx
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.3'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '1.3'
69
+ description: ! " This library generates diff reports of CSV files, using the
70
+ diff capabilities\n of the CSV Diff gem.\n\n Unlike a standard diff
71
+ that compares line by line, and is sensitive to the\n ordering of records,
72
+ CSV Diff identifies common lines by key field(s), and\n then compares the
73
+ contents of the fields in each line.\n\n CSV Diff Report takes the diff information
74
+ calculated by CSV Diff, and uses it to produce\n Excel or HTML-based diff
75
+ reports. It also provides a command-line tool for generating\n these diff
76
+ reports from CSV files.\n"
77
+ email: adam.b.gardiner@gmail.com
78
+ executables:
79
+ - csvdiff
80
+ extensions: []
81
+ extra_rdoc_files: []
82
+ files:
83
+ - LICENSE
84
+ - README.md
85
+ - bin/csvdiff
86
+ - lib/csv-diff-report.rb
87
+ - lib/csv-diff-report/cli.rb
88
+ - lib/csv-diff-report/excel.rb
89
+ - lib/csv-diff-report/html.rb
90
+ - lib/csv-diff-report/report.rb
91
+ - lib/csv_diff_report.rb
92
+ homepage: https://github.com/agardiner/csv-diff-report
93
+ licenses: []
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ! '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.4.1
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: CSV Diff Report is a library for generating diff reports using the CSV Diff
115
+ gem
116
+ test_files: []