spout 0.6.0.beta4 → 0.6.0.beta5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 68fc0a3c4fab7e033261d72e655de1bdfbc2754e
4
- data.tar.gz: eb62cad1ad96fc30c3ce3b3dc2a331d59ef3cb26
3
+ metadata.gz: 334f5a8e2330e851b8334cf6109d73dfef8486f6
4
+ data.tar.gz: 95347c28437cbe8866dbf11a903c0e877ce7bb48
5
5
  SHA512:
6
- metadata.gz: 75387420b459a3d3b57aae192ebf9f0a0a7607bac20c41f72a094a5cb1ebd40726d71d8844efbb5631832796b6d642c6777cd78c8685a237c2544f176508c33f
7
- data.tar.gz: f10ca5e16cb21f4d2ab94e55aa56ef92df57cecce2affd6c7a878ce8b8afca9b995eb0b68c1f626c3febd1dac2de5410f9fdecaa437f1e636b5f03958b1c7320
6
+ metadata.gz: 3797558300ae6080c8371ab11d9cbbf92c7174dd1c2bd59341d3c1b84fb88fed299b0409cb6b12018032694ecd06b2e265621e5a77948fdfaecc3f25d795d9e2
7
+ data.tar.gz: 0f139f427db50c0ec1019ecc62bdc881c87d71e16d637d7788340dac4df0a42ecea2c15fa5020d1b8c72a8412b0bdaf24153b52a8a5f856b3780275f46cbac75
@@ -46,66 +46,51 @@ namespace :dd do
46
46
 
47
47
  desc 'Match CSV dataset with JSON repository'
48
48
  task :coverage do
49
- puts csvs = Dir.glob("dd/csvs/*.csv")
49
+ require 'spout/tests/variable_type_validation'
50
50
 
51
- all_column_headers = []
52
-
53
- variable_json_ids = []
54
51
  choice_variables = []
55
- variable_file_names = []
56
52
 
57
53
  Dir.glob("variables/**/*.json").each do |file|
58
54
  if json = JSON.parse(File.read(file)) rescue false
59
- variable_json_ids << json['id']
60
55
  choice_variables << json['id'] if json['type'] == 'choices'
61
56
  end
62
- variable_file_names << file.split('/').last.to_s.split('.json').first.to_s
63
57
  end
64
58
 
65
- csvs.each do |csv_file|
66
- csv_name = csv_file.split('/').last.to_s
67
- column_headers = []
68
-
69
- CSV.parse( File.open(csv_file, 'r:iso-8859-1:utf-8'){|f| f.read} ) do |line|
70
- column_headers = line.collect{|l| [csv_name, l.to_s.downcase]}
71
- break # Only read first line
72
- end
59
+ all_column_headers = []
60
+ value_hash = {}
61
+ csv_names = []
73
62
 
74
- all_column_headers += column_headers
75
- end
63
+ Dir.glob("dd/csvs/*.csv").each do |csv_file|
64
+ csv_name = csv_file.split('/').last.to_s
65
+ csv_names << csv_name
66
+ puts "\nParsing: #{csv_name}"
76
67
 
77
- value_hash = {}
78
- row_count = 0
68
+ column_headers = []
69
+ row_count = 0
79
70
 
80
- csvs.each do |csv_file|
81
- total_row_count = CSV.readlines(csv_file, 'r:iso-8859-1:utf-8').size - 1
82
71
  CSV.parse( File.open(csv_file, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true ) do |line|
83
- puts "ROW: #{row_count+1} of #{total_row_count}"
84
72
  row = line.to_hash
73
+ column_headers = row.collect{|key, val| [csv_name, key.to_s.downcase]} if row_count == 0
74
+
75
+ print "." if row_count % 100 == 0
76
+
85
77
  choice_variables.each do |column_name|
86
78
  value_hash[column_name] ||= []
87
79
  value_hash[column_name] = value_hash[column_name] | [row[column_name]] if row[column_name]
88
80
  end
89
81
 
90
82
  row_count += 1
91
- # break if row_count > 10
92
83
  end
84
+
85
+ print "done\n"
86
+
87
+ all_column_headers += column_headers
93
88
  end
94
89
 
95
90
  @matching_results = []
96
91
 
97
92
  all_column_headers.each do |csv, column|
98
- file = Dir.glob("variables/**/#{column}.json").first
99
- valid_values = []
100
- variable_type = ''
101
- if json = JSON.parse(File.read(file)) rescue false
102
- variable_type = json['type']
103
- if variable_type == 'choices'
104
- valid_values = load_valid_domain_values(json['domain'])
105
- end
106
- end
107
-
108
- scr = SpoutCoverageResult.new(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, value_hash[column])
93
+ scr = SpoutCoverageResult.new(csv, column, value_hash[column])
109
94
  @matching_results << [ csv, column, scr ]
110
95
  end
111
96
 
@@ -113,16 +98,14 @@ namespace :dd do
113
98
 
114
99
  @coverage_results = []
115
100
 
116
- csvs.each do |csv_file|
117
- csv_name = csv_file.split('/').last.to_s
118
-
101
+ csv_names.each do |csv_name|
119
102
  total_column_count = @matching_results.select{|mr| mr[0] == csv_name}.count
120
103
  mapped_column_count = @matching_results.select{|mr| mr[0] == csv_name and mr[2].number_of_errors == 0}.count
121
-
122
104
  @coverage_results << [ csv_name, total_column_count, mapped_column_count ]
123
105
  end
124
106
 
125
107
  coverage_file = File.join(Dir.pwd, 'dd', 'index.html')
108
+ puts "\nGenerating: index.html\n\n"
126
109
 
127
110
  File.open(coverage_file, 'w+') do |file|
128
111
  name = 'index.html'
@@ -134,39 +117,64 @@ namespace :dd do
134
117
  open_command = 'start' if RUBY_PLATFORM.match(/mingw/) != nil
135
118
 
136
119
  system "#{open_command} #{coverage_file}" if ['start', 'open'].include?(open_command)
137
- puts coverage_file
120
+ puts "#{coverage_file}\n\n"
138
121
  end
139
122
 
140
123
  end
141
124
 
142
- def load_valid_domain_values(domain_name)
143
- values = []
144
- file = Dir.glob("domains/**/#{domain_name}.json").first
145
- if json = JSON.parse(File.read(file)) rescue false
146
- values = json.collect{|hash| hash['value']}
147
- end
148
- values
149
- end
150
-
151
125
  class SpoutCoverageResult
152
- attr_accessor :error, :error_message, :file_name_test, :json_id_test, :values_test, :valid_values, :csv_values, :variable_type
126
+ attr_accessor :error, :error_message, :file_name_test, :json_id_test, :values_test, :valid_values, :csv_values, :variable_type_test, :json, :domain_test
127
+
128
+ def initialize(csv, column, csv_values)
129
+ load_json(column)
130
+ load_valid_values
153
131
 
154
- def initialize(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, csv_values)
155
- # puts "Initialize"
156
- @file_name_test = variable_file_names.include?(column)
157
- @json_id_test = variable_json_ids.include?(column)
158
- @variable_type = variable_type
159
- @valid_values = valid_values
160
132
  @csv_values = csv_values
161
133
  @values_test = check_values
134
+ @variable_type_test = check_variable_type
135
+ @domain_test = check_domain_specified
136
+ end
137
+
138
+ def load_json(column)
139
+ file = Dir.glob("variables/**/#{column}.json").first
140
+ @file_name_test = (file != nil)
141
+ @json = JSON.parse(File.read(file)) rescue @json = {}
142
+ @json_id_test = (@json['id'].downcase == column)
143
+ end
144
+
145
+ def load_valid_values
146
+ valid_values = []
147
+ if @json['type'] == 'choices'
148
+ file = Dir.glob("domains/**/#{@json['domain']}.json").first
149
+ if json = JSON.parse(File.read(file)) rescue false
150
+ valid_values = json.collect{|hash| hash['value']}
151
+ end
152
+ end
153
+ @valid_values = valid_values
162
154
  end
163
155
 
164
156
  def number_of_errors
165
- @file_name_test && @json_id_test && @values_test ? 0 : 1
157
+ @file_name_test && @json_id_test && @values_test && @variable_type_test && @domain_test ? 0 : 1
166
158
  end
167
159
 
168
160
  def check_values
169
- variable_type != 'choices' || (valid_values | csv_values.compact).size == valid_values.size
161
+ @json['type'] != 'choices' || (@valid_values | @csv_values.compact).size == @valid_values.size
162
+ end
163
+
164
+ def check_variable_type
165
+ Spout::Tests::VariableTypeValidation::VALID_VARIABLE_TYPES.include?(@json['type'])
166
+ end
167
+
168
+ def check_domain_specified
169
+ if @json['type'] != 'choices'
170
+ true
171
+ else
172
+ domain_file = Dir.glob("domains/**/#{@json['domain']}.json").first
173
+ if domain_json = JSON.parse(File.read(domain_file)) rescue false
174
+ return domain_json.kind_of?(Array)
175
+ end
176
+ false
177
+ end
170
178
  end
171
179
 
172
180
  def errored?
data/lib/spout/version.rb CHANGED
@@ -3,7 +3,7 @@ module Spout
3
3
  MAJOR = 0
4
4
  MINOR = 6
5
5
  TINY = 0
6
- BUILD = "beta4" # nil, "pre", "rc", "rc2"
6
+ BUILD = "beta5" # nil, "pre", "rc", "rc2"
7
7
 
8
8
  STRING = [MAJOR, MINOR, TINY, BUILD].compact.join('.')
9
9
  end
@@ -6,7 +6,7 @@
6
6
  <meta name="author" content="">
7
7
  <link rel="shortcut icon" href="">
8
8
 
9
- <title>Spout Dataset and Data Dictionary Coverage</title>
9
+ <title>Spout Coverage</title>
10
10
 
11
11
  <!-- Bootstrap core CSS -->
12
12
  <link href="http://netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
@@ -36,6 +36,10 @@ body {
36
36
  /* Set the fixed height of the footer here */
37
37
  height: 60px;
38
38
  background-color: #f5f5f5;
39
+ }
40
+ code.success {
41
+ color: #468847;
42
+ background-color: #dff0d8;
39
43
  }
40
44
  </style>
41
45
  </head>
@@ -45,7 +49,7 @@ body {
45
49
  <div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
46
50
  <div class="container">
47
51
  <div class="navbar-header">
48
- <a class="navbar-brand" href="#">Spout Dataset and Data Dictionary Coverage</a>
52
+ <a class="navbar-brand" href="#">Spout Coverage</a>
49
53
  </div>
50
54
  </div>
51
55
  </div>
@@ -83,44 +87,57 @@ body {
83
87
  <thead>
84
88
  <tr>
85
89
  <th>CSV</th>
86
- <th>Dataset Column Header</th>
90
+ <th>Column</th>
87
91
  <th>JSON File</th>
88
- <th>Found in Data Dictionary</th>
92
+ <th>Variable ID</th>
89
93
  <th>Variable Type</th>
94
+ <th>Variable Domain</th>
90
95
  <th>Values</th>
91
96
  </tr>
92
97
  </thead>
93
98
  <tbody>
94
99
  <% @matching_results.each do |csv, column, scr| %>
95
100
  <tr>
96
- <td><code><%= csv %></code></td>
101
+ <td><code class="<%= 'success' if scr.number_of_errors == 0 %>"><%= csv %></code></td>
97
102
  <td><%= column %></td>
98
103
  <td>
99
104
  <% if scr.file_name_test %>
100
- <span class="text-success">File Found</span>
105
+ <div class="text-success" style="text-align:center"><span class="glyphicon glyphicon-ok"></span></div>
101
106
  <% else %>
102
- <span class="text-danger">No JSON File Found, expecting: <code><%= column %>.json</code></span>
107
+ <span class="text-danger"><code><%= column %>.json</code> missing</span>
103
108
  <% end %>
104
109
  </td>
105
110
  <td>
106
- <% if scr.json_id_test %>
107
- <span class="text-success">JSON ID Found</span>
108
- <% else %>
109
- <span class="text-danger">No Matching JSON ID</span>
110
- <pre>{
111
- "id": "---", ...
112
- }</pre>
111
+ <% if scr.file_name_test %>
112
+ <% if scr.json_id_test %>
113
+ <code class="success">"id": "<%= column %>"</code>
114
+ <% else %>
115
+ <code>"id": <%= scr.json['id'].inspect %></code>
116
+ <% end %>
113
117
  <% end %>
114
118
  </td>
115
- <td><%= scr.variable_type %></td>
116
119
  <td>
117
- <% if scr.values_test %>
118
- <span class="text-success">Valid Values</span>
119
- <% else %>
120
- VALID: <%= scr.valid_values %><br /><br />
121
- Bad Values:
122
- <% (scr.csv_values.compact - scr.valid_values).each do |value| %>
123
- <code><%= value %></code>
120
+ <% if scr.file_name_test %>
121
+ <code class="<%= 'success' if scr.variable_type_test %>">"type": <%= scr.json['type'].inspect %></code>
122
+ <% end %>
123
+ </td>
124
+ <td>
125
+ <% if scr.json['type'] == 'choices' and scr.file_name_test %>
126
+ <% if scr.domain_test or scr.json['domain'].to_s.strip == '' %>
127
+ <code class="<%= 'success' if scr.domain_test %>">"domain": <%= scr.json['domain'].inspect %></code>
128
+ <% else %>
129
+ <span class="text-danger"><code><%= scr.json['domain'] %>.json</code> missing</span>
130
+ <% end %>
131
+ <% end %>
132
+ </td>
133
+ <td style="white-space:nowrap">
134
+ <% if scr.json['type'] == 'choices' %>
135
+ <% if scr.values_test %>
136
+ <div class="text-success" style="text-align:center"><span class="glyphicon glyphicon-ok"></span></div>
137
+ <% else %>
138
+ <% (scr.valid_values + scr.csv_values.compact.sort).uniq.each do |value| %>
139
+ <code class="<%= 'success' if scr.valid_values.include?(value) %>"><%= value %></code>
140
+ <% end %>
124
141
  <% end %>
125
142
  <% end %>
126
143
  </td>
@@ -133,7 +150,7 @@ body {
133
150
 
134
151
  <div id="footer">
135
152
  <div class="container">
136
- <p class="text-muted" style="margin: 20px 0;">Generated by <a href="https://github.com/sleepepi/spout">Spout v<%= Spout::VERSION::STRING %></a></p>
153
+ <p class="text-muted" style="margin: 20px 0;">Generated by <a href="https://github.com/sleepepi/spout">Spout</a> v<%= Spout::VERSION::STRING %></p>
137
154
  </div>
138
155
  </div>
139
156
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spout
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0.beta4
4
+ version: 0.6.0.beta5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Remo Mueller