spout 0.6.0.beta4 → 0.6.0.beta5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 68fc0a3c4fab7e033261d72e655de1bdfbc2754e
4
- data.tar.gz: eb62cad1ad96fc30c3ce3b3dc2a331d59ef3cb26
3
+ metadata.gz: 334f5a8e2330e851b8334cf6109d73dfef8486f6
4
+ data.tar.gz: 95347c28437cbe8866dbf11a903c0e877ce7bb48
5
5
  SHA512:
6
- metadata.gz: 75387420b459a3d3b57aae192ebf9f0a0a7607bac20c41f72a094a5cb1ebd40726d71d8844efbb5631832796b6d642c6777cd78c8685a237c2544f176508c33f
7
- data.tar.gz: f10ca5e16cb21f4d2ab94e55aa56ef92df57cecce2affd6c7a878ce8b8afca9b995eb0b68c1f626c3febd1dac2de5410f9fdecaa437f1e636b5f03958b1c7320
6
+ metadata.gz: 3797558300ae6080c8371ab11d9cbbf92c7174dd1c2bd59341d3c1b84fb88fed299b0409cb6b12018032694ecd06b2e265621e5a77948fdfaecc3f25d795d9e2
7
+ data.tar.gz: 0f139f427db50c0ec1019ecc62bdc881c87d71e16d637d7788340dac4df0a42ecea2c15fa5020d1b8c72a8412b0bdaf24153b52a8a5f856b3780275f46cbac75
@@ -46,66 +46,51 @@ namespace :dd do
46
46
 
47
47
  desc 'Match CSV dataset with JSON repository'
48
48
  task :coverage do
49
- puts csvs = Dir.glob("dd/csvs/*.csv")
49
+ require 'spout/tests/variable_type_validation'
50
50
 
51
- all_column_headers = []
52
-
53
- variable_json_ids = []
54
51
  choice_variables = []
55
- variable_file_names = []
56
52
 
57
53
  Dir.glob("variables/**/*.json").each do |file|
58
54
  if json = JSON.parse(File.read(file)) rescue false
59
- variable_json_ids << json['id']
60
55
  choice_variables << json['id'] if json['type'] == 'choices'
61
56
  end
62
- variable_file_names << file.split('/').last.to_s.split('.json').first.to_s
63
57
  end
64
58
 
65
- csvs.each do |csv_file|
66
- csv_name = csv_file.split('/').last.to_s
67
- column_headers = []
68
-
69
- CSV.parse( File.open(csv_file, 'r:iso-8859-1:utf-8'){|f| f.read} ) do |line|
70
- column_headers = line.collect{|l| [csv_name, l.to_s.downcase]}
71
- break # Only read first line
72
- end
59
+ all_column_headers = []
60
+ value_hash = {}
61
+ csv_names = []
73
62
 
74
- all_column_headers += column_headers
75
- end
63
+ Dir.glob("dd/csvs/*.csv").each do |csv_file|
64
+ csv_name = csv_file.split('/').last.to_s
65
+ csv_names << csv_name
66
+ puts "\nParsing: #{csv_name}"
76
67
 
77
- value_hash = {}
78
- row_count = 0
68
+ column_headers = []
69
+ row_count = 0
79
70
 
80
- csvs.each do |csv_file|
81
- total_row_count = CSV.readlines(csv_file, 'r:iso-8859-1:utf-8').size - 1
82
71
  CSV.parse( File.open(csv_file, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true ) do |line|
83
- puts "ROW: #{row_count+1} of #{total_row_count}"
84
72
  row = line.to_hash
73
+ column_headers = row.collect{|key, val| [csv_name, key.to_s.downcase]} if row_count == 0
74
+
75
+ print "." if row_count % 100 == 0
76
+
85
77
  choice_variables.each do |column_name|
86
78
  value_hash[column_name] ||= []
87
79
  value_hash[column_name] = value_hash[column_name] | [row[column_name]] if row[column_name]
88
80
  end
89
81
 
90
82
  row_count += 1
91
- # break if row_count > 10
92
83
  end
84
+
85
+ print "done\n"
86
+
87
+ all_column_headers += column_headers
93
88
  end
94
89
 
95
90
  @matching_results = []
96
91
 
97
92
  all_column_headers.each do |csv, column|
98
- file = Dir.glob("variables/**/#{column}.json").first
99
- valid_values = []
100
- variable_type = ''
101
- if json = JSON.parse(File.read(file)) rescue false
102
- variable_type = json['type']
103
- if variable_type == 'choices'
104
- valid_values = load_valid_domain_values(json['domain'])
105
- end
106
- end
107
-
108
- scr = SpoutCoverageResult.new(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, value_hash[column])
93
+ scr = SpoutCoverageResult.new(csv, column, value_hash[column])
109
94
  @matching_results << [ csv, column, scr ]
110
95
  end
111
96
 
@@ -113,16 +98,14 @@ namespace :dd do
113
98
 
114
99
  @coverage_results = []
115
100
 
116
- csvs.each do |csv_file|
117
- csv_name = csv_file.split('/').last.to_s
118
-
101
+ csv_names.each do |csv_name|
119
102
  total_column_count = @matching_results.select{|mr| mr[0] == csv_name}.count
120
103
  mapped_column_count = @matching_results.select{|mr| mr[0] == csv_name and mr[2].number_of_errors == 0}.count
121
-
122
104
  @coverage_results << [ csv_name, total_column_count, mapped_column_count ]
123
105
  end
124
106
 
125
107
  coverage_file = File.join(Dir.pwd, 'dd', 'index.html')
108
+ puts "\nGenerating: index.html\n\n"
126
109
 
127
110
  File.open(coverage_file, 'w+') do |file|
128
111
  name = 'index.html'
@@ -134,39 +117,64 @@ namespace :dd do
134
117
  open_command = 'start' if RUBY_PLATFORM.match(/mingw/) != nil
135
118
 
136
119
  system "#{open_command} #{coverage_file}" if ['start', 'open'].include?(open_command)
137
- puts coverage_file
120
+ puts "#{coverage_file}\n\n"
138
121
  end
139
122
 
140
123
  end
141
124
 
142
- def load_valid_domain_values(domain_name)
143
- values = []
144
- file = Dir.glob("domains/**/#{domain_name}.json").first
145
- if json = JSON.parse(File.read(file)) rescue false
146
- values = json.collect{|hash| hash['value']}
147
- end
148
- values
149
- end
150
-
151
125
  class SpoutCoverageResult
152
- attr_accessor :error, :error_message, :file_name_test, :json_id_test, :values_test, :valid_values, :csv_values, :variable_type
126
+ attr_accessor :error, :error_message, :file_name_test, :json_id_test, :values_test, :valid_values, :csv_values, :variable_type_test, :json, :domain_test
127
+
128
+ def initialize(csv, column, csv_values)
129
+ load_json(column)
130
+ load_valid_values
153
131
 
154
- def initialize(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, csv_values)
155
- # puts "Initialize"
156
- @file_name_test = variable_file_names.include?(column)
157
- @json_id_test = variable_json_ids.include?(column)
158
- @variable_type = variable_type
159
- @valid_values = valid_values
160
132
  @csv_values = csv_values
161
133
  @values_test = check_values
134
+ @variable_type_test = check_variable_type
135
+ @domain_test = check_domain_specified
136
+ end
137
+
138
+ def load_json(column)
139
+ file = Dir.glob("variables/**/#{column}.json").first
140
+ @file_name_test = (file != nil)
141
+ @json = JSON.parse(File.read(file)) rescue @json = {}
142
+ @json_id_test = (@json['id'].downcase == column)
143
+ end
144
+
145
+ def load_valid_values
146
+ valid_values = []
147
+ if @json['type'] == 'choices'
148
+ file = Dir.glob("domains/**/#{@json['domain']}.json").first
149
+ if json = JSON.parse(File.read(file)) rescue false
150
+ valid_values = json.collect{|hash| hash['value']}
151
+ end
152
+ end
153
+ @valid_values = valid_values
162
154
  end
163
155
 
164
156
  def number_of_errors
165
- @file_name_test && @json_id_test && @values_test ? 0 : 1
157
+ @file_name_test && @json_id_test && @values_test && @variable_type_test && @domain_test ? 0 : 1
166
158
  end
167
159
 
168
160
  def check_values
169
- variable_type != 'choices' || (valid_values | csv_values.compact).size == valid_values.size
161
+ @json['type'] != 'choices' || (@valid_values | @csv_values.compact).size == @valid_values.size
162
+ end
163
+
164
+ def check_variable_type
165
+ Spout::Tests::VariableTypeValidation::VALID_VARIABLE_TYPES.include?(@json['type'])
166
+ end
167
+
168
+ def check_domain_specified
169
+ if @json['type'] != 'choices'
170
+ true
171
+ else
172
+ domain_file = Dir.glob("domains/**/#{@json['domain']}.json").first
173
+ if domain_json = JSON.parse(File.read(domain_file)) rescue false
174
+ return domain_json.kind_of?(Array)
175
+ end
176
+ false
177
+ end
170
178
  end
171
179
 
172
180
  def errored?
data/lib/spout/version.rb CHANGED
@@ -3,7 +3,7 @@ module Spout
3
3
  MAJOR = 0
4
4
  MINOR = 6
5
5
  TINY = 0
6
- BUILD = "beta4" # nil, "pre", "rc", "rc2"
6
+ BUILD = "beta5" # nil, "pre", "rc", "rc2"
7
7
 
8
8
  STRING = [MAJOR, MINOR, TINY, BUILD].compact.join('.')
9
9
  end
@@ -6,7 +6,7 @@
6
6
  <meta name="author" content="">
7
7
  <link rel="shortcut icon" href="data:image/vnd.microsoft.icon;base64,AAABAAEAEBAAAAEAIABoBAAAFgAAACgAAAAQAAAAIAAAAAEAIAAAAAAAAAQAABMLAAATCwAAAAAAAAAAAAAAAAAAAAAAIAAAAMIAAAAiAAAAAAAAAAAAAAAAAAAARAAAAEEAAAAcAAAAwQAAACEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAADCAAAAIgAAAAAAAAAAAAAAAAAAAEQAAABBAAAAHAAAAMEAAAAhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAwwAAACIAAAAAAAAAAAAAAAAAAABEAAAAQQAAABwAAADBAAAAIQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHAAAAKoAAAAeAAAAAAAAAAAAAAAAAAAARAAAAEEAAAAYAAAAqwAAAB4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAWAAAACAAAAB8AAAAfAAAAAAAAAEQAAABDAAAAAAAAABgAAAAIAAAAHQAAAB0AAAAEAAAAAAAAAAAAAAAAAAAAAAAAABkAAACuAAAArQAAABQAAABCAAAAQwAAAAAAAAAAAAAAGAAAAK0AAACrAAAAFgAAAAAAAAAAAAAAAAAAAAAAAAADAAAAGQAAABkAAAAAAAAARAAAAEMAAAAAAAAAAAAAAAMAAAAZAAAAGQAAAAMAAAAAAAAATAAAAEwAAABOAAAATAAAAEoAAABKAAAASQAAAHwAAAB7AAAASgAAAEwAAABMAAAASgAAAEoAAABMAAAATAAAAFAAAABnAAAA0AAAAGcAAABRAAAAUgAAAE8AAACAAAAAfgAAAE0AAABQAAAAUAAAAFAAAABSAAAAUgAAAFIAAAAAAAAAIAAAAL8AAAAfAAAAAAAAAAAAAAAAAAAARQAAAFkAAAAbAAAAHgAAAB8AAAAbAAAABAAAAAAAAAAAAAAAAAAAACAAAAC/AAAAHwAAAAIAAAAXAAAAAAAAAEgAAADPAAAAwwAAAMMAAADFAAAArAAAABgAAAAAAAAAAAAAAAAAAAAgAAAAvwAAAB4AAAAcAAAAqwAAABkAAABDAAAAWgAAAB0AAAAgAAAAHwAAABsAAAAEAAAAAAAAAAAAAAAAAAAAHAAAAKgAAAAaAAAAIAAAAMMAAAAdAAAAQgAAAEMAAAAAAAAABAAAABsAAAAfAAAAIAAAACEAAAAhAAAAAAAAAAQAAAAXAAAAAgAAACIAAADDAAAAHQAAAEIAAABDAAAAAAAAABkAAACrAAAAwwAAAMIAAADCAAAAwQAAAAAAAAAAAAAAAAAAAAAAAAAiAAAAwwAAAB0AAABCAAAAQwAAAAAAAAAEAAAAHQAAACEAAAAgAAAAIAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAIgAAAMMAAAAdAAAAQgAAAEMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjg8AAI4PAACODwAAjg8AAIABAADgYQAA4mEAAAAAAAAAAAAAjgMAAIIDAACAAwAAgEAAAIBAAADwQAAA8H8AAA==">
8
8
 
9
- <title>Spout Dataset and Data Dictionary Coverage</title>
9
+ <title>Spout Coverage</title>
10
10
 
11
11
  <!-- Bootstrap core CSS -->
12
12
  <link href="http://netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
@@ -36,6 +36,10 @@ body {
36
36
  /* Set the fixed height of the footer here */
37
37
  height: 60px;
38
38
  background-color: #f5f5f5;
39
+ }
40
+ code.success {
41
+ color: #468847;
42
+ background-color: #dff0d8;
39
43
  }
40
44
  </style>
41
45
  </head>
@@ -45,7 +49,7 @@ body {
45
49
  <div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
46
50
  <div class="container">
47
51
  <div class="navbar-header">
48
- <a class="navbar-brand" href="#">Spout Dataset and Data Dictionary Coverage</a>
52
+ <a class="navbar-brand" href="#">Spout Coverage</a>
49
53
  </div>
50
54
  </div>
51
55
  </div>
@@ -83,44 +87,57 @@ body {
83
87
  <thead>
84
88
  <tr>
85
89
  <th>CSV</th>
86
- <th>Dataset Column Header</th>
90
+ <th>Column</th>
87
91
  <th>JSON File</th>
88
- <th>Found in Data Dictionary</th>
92
+ <th>Variable ID</th>
89
93
  <th>Variable Type</th>
94
+ <th>Variable Domain</th>
90
95
  <th>Values</th>
91
96
  </tr>
92
97
  </thead>
93
98
  <tbody>
94
99
  <% @matching_results.each do |csv, column, scr| %>
95
100
  <tr>
96
- <td><code><%= csv %></code></td>
101
+ <td><code class="<%= 'success' if scr.number_of_errors == 0 %>"><%= csv %></code></td>
97
102
  <td><%= column %></td>
98
103
  <td>
99
104
  <% if scr.file_name_test %>
100
- <span class="text-success">File Found</span>
105
+ <div class="text-success" style="text-align:center"><span class="glyphicon glyphicon-ok"></span></div>
101
106
  <% else %>
102
- <span class="text-danger">No JSON File Found, expecting: <code><%= column %>.json</code></span>
107
+ <span class="text-danger"><code><%= column %>.json</code> missing</span>
103
108
  <% end %>
104
109
  </td>
105
110
  <td>
106
- <% if scr.json_id_test %>
107
- <span class="text-success">JSON ID Found</span>
108
- <% else %>
109
- <span class="text-danger">No Matching JSON ID</span>
110
- <pre>{
111
- "id": "---", ...
112
- }</pre>
111
+ <% if scr.file_name_test %>
112
+ <% if scr.json_id_test %>
113
+ <code class="success">"id": "<%= column %>"</code>
114
+ <% else %>
115
+ <code>"id": <%= scr.json['id'].inspect %></code>
116
+ <% end %>
113
117
  <% end %>
114
118
  </td>
115
- <td><%= scr.variable_type %></td>
116
119
  <td>
117
- <% if scr.values_test %>
118
- <span class="text-success">Valid Values</span>
119
- <% else %>
120
- VALID: <%= scr.valid_values %><br /><br />
121
- Bad Values:
122
- <% (scr.csv_values.compact - scr.valid_values).each do |value| %>
123
- <code><%= value %></code>
120
+ <% if scr.file_name_test %>
121
+ <code class="<%= 'success' if scr.variable_type_test %>">"type": <%= scr.json['type'].inspect %></code>
122
+ <% end %>
123
+ </td>
124
+ <td>
125
+ <% if scr.json['type'] == 'choices' and scr.file_name_test %>
126
+ <% if scr.domain_test or scr.json['domain'].to_s.strip == '' %>
127
+ <code class="<%= 'success' if scr.domain_test %>">"domain": <%= scr.json['domain'].inspect %></code>
128
+ <% else %>
129
+ <span class="text-danger"><code><%= scr.json['domain'] %>.json</code> missing</span>
130
+ <% end %>
131
+ <% end %>
132
+ </td>
133
+ <td style="white-space:nowrap">
134
+ <% if scr.json['type'] == 'choices' %>
135
+ <% if scr.values_test %>
136
+ <div class="text-success" style="text-align:center"><span class="glyphicon glyphicon-ok"></span></div>
137
+ <% else %>
138
+ <% (scr.valid_values + scr.csv_values.compact.sort).uniq.each do |value| %>
139
+ <code class="<%= 'success' if scr.valid_values.include?(value) %>"><%= value %></code>
140
+ <% end %>
124
141
  <% end %>
125
142
  <% end %>
126
143
  </td>
@@ -133,7 +150,7 @@ body {
133
150
 
134
151
  <div id="footer">
135
152
  <div class="container">
136
- <p class="text-muted" style="margin: 20px 0;">Generated by <a href="https://github.com/sleepepi/spout">Spout v<%= Spout::VERSION::STRING %></a></p>
153
+ <p class="text-muted" style="margin: 20px 0;">Generated by <a href="https://github.com/sleepepi/spout">Spout</a> v<%= Spout::VERSION::STRING %></p>
137
154
  </div>
138
155
  </div>
139
156
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spout
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0.beta4
4
+ version: 0.6.0.beta5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Remo Mueller