spout 0.6.0.beta2 → 0.6.0.beta3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -0
- data/lib/spout/tasks/engine.rake +67 -16
- data/lib/spout/version.rb +1 -1
- data/lib/spout/views/index.html.erb +39 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e85fa1a3ea963f131b61ce1952d10f1744c510a1
|
4
|
+
data.tar.gz: 2569c7aa7cbf7ce5bab644c128707e70a6ac659c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c7766b6aadc863f2159fa7e8ba33d10be37d97feb2f4a71317049941cd29d3a7bcab1128c3613ed945a4da62ea11aea073870b8461b1dd86330b2d95c83e6f95
|
7
|
+
data.tar.gz: 35005ff394184b10aaad88955b7129401c81ee8ee883b91cf952eb0e4c68a88709c583eacb48e64a1e50785ea0f036e694172871038825533a6ce347297a8fbd
|
data/CHANGELOG.md
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
### Enhancement
|
4
4
|
- Added `spout match` command that generates a coverage report of how well a dataset matches the data dictionary
|
5
5
|
- Spout Match generates a viewable report in `dd\index.html` that shows which columns are covered in CSVs located in `dd\csvs`
|
6
|
+
- Spout Match checks that all collected values for a variable with a domain exist in the associated domain
|
6
7
|
- **Gem Changes**
|
7
8
|
- Updated to colorize 0.6.0
|
8
9
|
|
data/lib/spout/tasks/engine.rake
CHANGED
@@ -72,19 +72,40 @@ namespace :dd do
|
|
72
72
|
all_column_headers += column_headers
|
73
73
|
end
|
74
74
|
|
75
|
+
value_hash = {}
|
76
|
+
row_count = 0
|
75
77
|
|
76
|
-
|
78
|
+
csvs.each do |csv_file|
|
79
|
+
CSV.parse( File.open(csv_file, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true ) do |line|
|
80
|
+
row = line.to_hash
|
81
|
+
row.each do |column_name, value|
|
82
|
+
value_hash[column_name] ||= []
|
83
|
+
value_hash[column_name] = value_hash[column_name] | [value]
|
84
|
+
end
|
85
|
+
|
86
|
+
row_count += 1
|
87
|
+
# break if row_count > 10
|
88
|
+
end
|
89
|
+
end
|
77
90
|
|
78
91
|
@matching_results = []
|
79
92
|
|
80
93
|
all_column_headers.each do |csv, column|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
94
|
+
file = Dir.glob("variables/**/#{column}.json").first
|
95
|
+
valid_values = []
|
96
|
+
variable_type = ''
|
97
|
+
if json = JSON.parse(File.read(file)) rescue false
|
98
|
+
variable_type = json['type']
|
99
|
+
if variable_type == 'choices'
|
100
|
+
valid_values = load_valid_domain_values(json['domain'])
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
scr = SpoutCoverageResult.new(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, value_hash[column])
|
105
|
+
@matching_results << [ csv, column, scr ]
|
85
106
|
end
|
86
107
|
|
87
|
-
@matching_results.sort!{|a,b| [
|
108
|
+
@matching_results.sort!{|a,b| [b[2].number_of_errors, a[0].to_s, a[1].to_s] <=> [a[2].number_of_errors, b[0].to_s, b[1].to_s]}
|
88
109
|
|
89
110
|
@coverage_results = []
|
90
111
|
|
@@ -92,7 +113,7 @@ namespace :dd do
|
|
92
113
|
csv_name = csv_file.split('/').last.to_s
|
93
114
|
|
94
115
|
total_column_count = @matching_results.select{|mr| mr[0] == csv_name}.count
|
95
|
-
mapped_column_count = @matching_results.select{|mr| mr[0] == csv_name and mr[2]
|
116
|
+
mapped_column_count = @matching_results.select{|mr| mr[0] == csv_name and mr[2].number_of_errors == 0}.count
|
96
117
|
|
97
118
|
@coverage_results << [ csv_name, total_column_count, mapped_column_count ]
|
98
119
|
end
|
@@ -114,6 +135,45 @@ namespace :dd do
|
|
114
135
|
|
115
136
|
end
|
116
137
|
|
138
|
+
def load_valid_domain_values(domain_name)
|
139
|
+
values = []
|
140
|
+
file = Dir.glob("domains/**/#{domain_name}.json").first
|
141
|
+
if json = JSON.parse(File.read(file)) rescue false
|
142
|
+
values = json.collect{|hash| hash['value']}
|
143
|
+
end
|
144
|
+
values
|
145
|
+
end
|
146
|
+
|
147
|
+
class SpoutCoverageResult
|
148
|
+
attr_accessor :error, :error_message, :file_name_test, :json_id_test, :values_test, :valid_values, :csv_values, :variable_type
|
149
|
+
|
150
|
+
def initialize(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, csv_values)
|
151
|
+
# puts "Initialize"
|
152
|
+
@file_name_test = variable_file_names.include?(column)
|
153
|
+
@json_id_test = variable_json_ids.include?(column)
|
154
|
+
@variable_type = variable_type
|
155
|
+
@valid_values = valid_values
|
156
|
+
@csv_values = csv_values
|
157
|
+
@values_test = check_values
|
158
|
+
end
|
159
|
+
|
160
|
+
def number_of_errors
|
161
|
+
@file_name_test && @json_id_test && @values_test ? 0 : 1
|
162
|
+
end
|
163
|
+
|
164
|
+
def check_values
|
165
|
+
variable_type != 'choices' || (valid_values | csv_values.compact).size == valid_values.size
|
166
|
+
end
|
167
|
+
|
168
|
+
def errored?
|
169
|
+
error == true
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def number_with_delimiter(number, delimiter = ",")
|
174
|
+
number.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
|
175
|
+
end
|
176
|
+
|
117
177
|
def standard_version
|
118
178
|
version = File.open('VERSION', &:readline).strip rescue ''
|
119
179
|
version == '' ? '1.0.0' : version
|
@@ -249,12 +309,3 @@ end
|
|
249
309
|
def additional_csv_info
|
250
310
|
"\n\nFor additional information on specifying CSV column headers before import see:\n\n " + "https://github.com/sleepepi/spout#generate-a-new-repository-from-an-existing-csv-file".colorize( :light_cyan ) + "\n\n"
|
251
311
|
end
|
252
|
-
|
253
|
-
|
254
|
-
# class SpoutCoverageResult
|
255
|
-
# attr_accessor :error, :error_message
|
256
|
-
|
257
|
-
# def errored?
|
258
|
-
# error == true
|
259
|
-
# end
|
260
|
-
# end
|
data/lib/spout/version.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
<meta name="author" content="">
|
7
7
|
<link rel="shortcut icon" href="data:image/vnd.microsoft.icon;base64,AAABAAEAEBAAAAEAIABoBAAAFgAAACgAAAAQAAAAIAAAAAEAIAAAAAAAAAQAABMLAAATCwAAAAAAAAAAAAAAAAAAAAAAIAAAAMIAAAAiAAAAAAAAAAAAAAAAAAAARAAAAEEAAAAcAAAAwQAAACEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAADCAAAAIgAAAAAAAAAAAAAAAAAAAEQAAABBAAAAHAAAAMEAAAAhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAwwAAACIAAAAAAAAAAAAAAAAAAABEAAAAQQAAABwAAADBAAAAIQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHAAAAKoAAAAeAAAAAAAAAAAAAAAAAAAARAAAAEEAAAAYAAAAqwAAAB4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAWAAAACAAAAB8AAAAfAAAAAAAAAEQAAABDAAAAAAAAABgAAAAIAAAAHQAAAB0AAAAEAAAAAAAAAAAAAAAAAAAAAAAAABkAAACuAAAArQAAABQAAABCAAAAQwAAAAAAAAAAAAAAGAAAAK0AAACrAAAAFgAAAAAAAAAAAAAAAAAAAAAAAAADAAAAGQAAABkAAAAAAAAARAAAAEMAAAAAAAAAAAAAAAMAAAAZAAAAGQAAAAMAAAAAAAAATAAAAEwAAABOAAAATAAAAEoAAABKAAAASQAAAHwAAAB7AAAASgAAAEwAAABMAAAASgAAAEoAAABMAAAATAAAAFAAAABnAAAA0AAAAGcAAABRAAAAUgAAAE8AAACAAAAAfgAAAE0AAABQAAAAUAAAAFAAAABSAAAAUgAAAFIAAAAAAAAAIAAAAL8AAAAfAAAAAAAAAAAAAAAAAAAARQAAAFkAAAAbAAAAHgAAAB8AAAAbAAAABAAAAAAAAAAAAAAAAAAAACAAAAC/AAAAHwAAAAIAAAAXAAAAAAAAAEgAAADPAAAAwwAAAMMAAADFAAAArAAAABgAAAAAAAAAAAAAAAAAAAAgAAAAvwAAAB4AAAAcAAAAqwAAABkAAABDAAAAWgAAAB0AAAAgAAAAHwAAABsAAAAEAAAAAAAAAAAAAAAAAAAAHAAAAKgAAAAaAAAAIAAAAMMAAAAdAAAAQgAAAEMAAAAAAAAABAAAABsAAAAfAAAAIAAAACEAAAAhAAAAAAAAAAQAAAAXAAAAAgAAACIAAADDAAAAHQAAAEIAAABDAAAAAAAAABkAAACrAAAAwwAAAMIAAADCAAAAwQAAAAAAAAAAAAAAAAAAAAAAAAAiAAAAwwAAAB0AAABCAAAAQwAAAAAAAAAEAAAAHQAAACEAAAAgAAAAIAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAIgAAAMMAAAAdAAAAQgAAAEMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjg8AAI4PAACODwAAjg8AAIABAADgYQAA4mEAAAAAAAAAAAAAjgMAAIIDAACAAwAAgEAAAIBAAADwQAAA8H8AAA==">
|
8
8
|
|
9
|
-
<title>
|
9
|
+
<title>Spout Dataset and Data Dictionary Coverage</title>
|
10
10
|
|
11
11
|
<!-- Bootstrap core CSS -->
|
12
12
|
<link href="http://netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
|
@@ -20,8 +20,22 @@
|
|
20
20
|
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js"></script>
|
21
21
|
<script src="http://netdna.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
|
22
22
|
<style type="text/css">
|
23
|
+
html {
|
24
|
+
position: relative;
|
25
|
+
min-height: 100%;
|
26
|
+
}
|
23
27
|
body {
|
24
28
|
padding-top: 50px;
|
29
|
+
/* Margin bottom by footer height */
|
30
|
+
margin-bottom: 60px;
|
31
|
+
}
|
32
|
+
#footer {
|
33
|
+
position: absolute;
|
34
|
+
bottom: 0;
|
35
|
+
width: 100%;
|
36
|
+
/* Set the fixed height of the footer here */
|
37
|
+
height: 60px;
|
38
|
+
background-color: #f5f5f5;
|
25
39
|
}
|
26
40
|
</style>
|
27
41
|
</head>
|
@@ -58,8 +72,8 @@ body {
|
|
58
72
|
<span class="text-muted">---</span>
|
59
73
|
<% end %>
|
60
74
|
</td>
|
61
|
-
<td><%= total_column_count %></td>
|
62
|
-
<td><%= total_column_count - mapped_column_count
|
75
|
+
<td><%= number_with_delimiter( total_column_count ) %></td>
|
76
|
+
<td><%= number_with_delimiter( total_column_count - mapped_column_count ) %></td>
|
63
77
|
</tr>
|
64
78
|
<% end %>
|
65
79
|
</tbody>
|
@@ -72,22 +86,24 @@ body {
|
|
72
86
|
<th>Dataset Column Header</th>
|
73
87
|
<th>JSON File</th>
|
74
88
|
<th>Found in Data Dictionary</th>
|
89
|
+
<th>Variable Type</th>
|
90
|
+
<th>Values</th>
|
75
91
|
</tr>
|
76
92
|
</thead>
|
77
93
|
<tbody>
|
78
|
-
<% @matching_results.each do |csv, column,
|
94
|
+
<% @matching_results.each do |csv, column, scr| %>
|
79
95
|
<tr>
|
80
96
|
<td><code><%= csv %></code></td>
|
81
97
|
<td><%= column %></td>
|
82
98
|
<td>
|
83
|
-
<% if file_name_test %>
|
99
|
+
<% if scr.file_name_test %>
|
84
100
|
<span class="text-success">File Found</span>
|
85
101
|
<% else %>
|
86
102
|
<span class="text-danger">No JSON File Found, expecting: <code><%= column %>.json</code></span>
|
87
103
|
<% end %>
|
88
104
|
</td>
|
89
105
|
<td>
|
90
|
-
<% if json_id_test %>
|
106
|
+
<% if scr.json_id_test %>
|
91
107
|
<span class="text-success">JSON ID Found</span>
|
92
108
|
<% else %>
|
93
109
|
<span class="text-danger">No Matching JSON ID</span>
|
@@ -96,6 +112,18 @@ body {
|
|
96
112
|
}</pre>
|
97
113
|
<% end %>
|
98
114
|
</td>
|
115
|
+
<td><%= scr.variable_type %></td>
|
116
|
+
<td>
|
117
|
+
<% if scr.values_test %>
|
118
|
+
<span class="text-success">Valid Values</span>
|
119
|
+
<% else %>
|
120
|
+
VALID: <%= scr.valid_values %><br /><br />
|
121
|
+
Bad Values:
|
122
|
+
<% (scr.csv_values.compact - scr.valid_values).each do |value| %>
|
123
|
+
<code><%= value %></code>
|
124
|
+
<% end %>
|
125
|
+
<% end %>
|
126
|
+
</td>
|
99
127
|
</tr>
|
100
128
|
<% end %>
|
101
129
|
</tbody>
|
@@ -103,5 +131,10 @@ body {
|
|
103
131
|
|
104
132
|
</div><!-- /.container -->
|
105
133
|
|
134
|
+
<div id="footer">
|
135
|
+
<div class="container">
|
136
|
+
<p class="text-muted" style="margin: 20px 0;">Generated by <a href="https://github.com/sleepepi/spout">Spout v<%= Spout::VERSION::STRING %></a></p>
|
137
|
+
</div>
|
138
|
+
</div>
|
106
139
|
|
107
140
|
</body></html>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spout
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.0.
|
4
|
+
version: 0.6.0.beta3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Remo Mueller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|