spout 0.6.0.beta2 → 0.6.0.beta3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -0
- data/lib/spout/tasks/engine.rake +67 -16
- data/lib/spout/version.rb +1 -1
- data/lib/spout/views/index.html.erb +39 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e85fa1a3ea963f131b61ce1952d10f1744c510a1
|
4
|
+
data.tar.gz: 2569c7aa7cbf7ce5bab644c128707e70a6ac659c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c7766b6aadc863f2159fa7e8ba33d10be37d97feb2f4a71317049941cd29d3a7bcab1128c3613ed945a4da62ea11aea073870b8461b1dd86330b2d95c83e6f95
|
7
|
+
data.tar.gz: 35005ff394184b10aaad88955b7129401c81ee8ee883b91cf952eb0e4c68a88709c583eacb48e64a1e50785ea0f036e694172871038825533a6ce347297a8fbd
|
data/CHANGELOG.md
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
### Enhancement
|
4
4
|
- Added `spout match` command that generates a coverage report of how well a dataset matches the data dictionary
|
5
5
|
- Spout Match generates a viewable report in `dd\index.html` that shows which columns are covered in CSVs located in `dd\csvs`
|
6
|
+
- Spout Match checks that all collected values for a variable with a domain exist in the associated domain
|
6
7
|
- **Gem Changes**
|
7
8
|
- Updated to colorize 0.6.0
|
8
9
|
|
data/lib/spout/tasks/engine.rake
CHANGED
@@ -72,19 +72,40 @@ namespace :dd do
|
|
72
72
|
all_column_headers += column_headers
|
73
73
|
end
|
74
74
|
|
75
|
+
value_hash = {}
|
76
|
+
row_count = 0
|
75
77
|
|
76
|
-
|
78
|
+
csvs.each do |csv_file|
|
79
|
+
CSV.parse( File.open(csv_file, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true ) do |line|
|
80
|
+
row = line.to_hash
|
81
|
+
row.each do |column_name, value|
|
82
|
+
value_hash[column_name] ||= []
|
83
|
+
value_hash[column_name] = value_hash[column_name] | [value]
|
84
|
+
end
|
85
|
+
|
86
|
+
row_count += 1
|
87
|
+
# break if row_count > 10
|
88
|
+
end
|
89
|
+
end
|
77
90
|
|
78
91
|
@matching_results = []
|
79
92
|
|
80
93
|
all_column_headers.each do |csv, column|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
94
|
+
file = Dir.glob("variables/**/#{column}.json").first
|
95
|
+
valid_values = []
|
96
|
+
variable_type = ''
|
97
|
+
if json = JSON.parse(File.read(file)) rescue false
|
98
|
+
variable_type = json['type']
|
99
|
+
if variable_type == 'choices'
|
100
|
+
valid_values = load_valid_domain_values(json['domain'])
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
scr = SpoutCoverageResult.new(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, value_hash[column])
|
105
|
+
@matching_results << [ csv, column, scr ]
|
85
106
|
end
|
86
107
|
|
87
|
-
@matching_results.sort!{|a,b| [
|
108
|
+
@matching_results.sort!{|a,b| [b[2].number_of_errors, a[0].to_s, a[1].to_s] <=> [a[2].number_of_errors, b[0].to_s, b[1].to_s]}
|
88
109
|
|
89
110
|
@coverage_results = []
|
90
111
|
|
@@ -92,7 +113,7 @@ namespace :dd do
|
|
92
113
|
csv_name = csv_file.split('/').last.to_s
|
93
114
|
|
94
115
|
total_column_count = @matching_results.select{|mr| mr[0] == csv_name}.count
|
95
|
-
mapped_column_count = @matching_results.select{|mr| mr[0] == csv_name and mr[2]
|
116
|
+
mapped_column_count = @matching_results.select{|mr| mr[0] == csv_name and mr[2].number_of_errors == 0}.count
|
96
117
|
|
97
118
|
@coverage_results << [ csv_name, total_column_count, mapped_column_count ]
|
98
119
|
end
|
@@ -114,6 +135,45 @@ namespace :dd do
|
|
114
135
|
|
115
136
|
end
|
116
137
|
|
138
|
+
def load_valid_domain_values(domain_name)
|
139
|
+
values = []
|
140
|
+
file = Dir.glob("domains/**/#{domain_name}.json").first
|
141
|
+
if json = JSON.parse(File.read(file)) rescue false
|
142
|
+
values = json.collect{|hash| hash['value']}
|
143
|
+
end
|
144
|
+
values
|
145
|
+
end
|
146
|
+
|
147
|
+
class SpoutCoverageResult
|
148
|
+
attr_accessor :error, :error_message, :file_name_test, :json_id_test, :values_test, :valid_values, :csv_values, :variable_type
|
149
|
+
|
150
|
+
def initialize(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, csv_values)
|
151
|
+
# puts "Initialize"
|
152
|
+
@file_name_test = variable_file_names.include?(column)
|
153
|
+
@json_id_test = variable_json_ids.include?(column)
|
154
|
+
@variable_type = variable_type
|
155
|
+
@valid_values = valid_values
|
156
|
+
@csv_values = csv_values
|
157
|
+
@values_test = check_values
|
158
|
+
end
|
159
|
+
|
160
|
+
def number_of_errors
|
161
|
+
@file_name_test && @json_id_test && @values_test ? 0 : 1
|
162
|
+
end
|
163
|
+
|
164
|
+
def check_values
|
165
|
+
variable_type != 'choices' || (valid_values | csv_values.compact).size == valid_values.size
|
166
|
+
end
|
167
|
+
|
168
|
+
def errored?
|
169
|
+
error == true
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def number_with_delimiter(number, delimiter = ",")
|
174
|
+
number.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
|
175
|
+
end
|
176
|
+
|
117
177
|
def standard_version
|
118
178
|
version = File.open('VERSION', &:readline).strip rescue ''
|
119
179
|
version == '' ? '1.0.0' : version
|
@@ -249,12 +309,3 @@ end
|
|
249
309
|
def additional_csv_info
|
250
310
|
"\n\nFor additional information on specifying CSV column headers before import see:\n\n " + "https://github.com/sleepepi/spout#generate-a-new-repository-from-an-existing-csv-file".colorize( :light_cyan ) + "\n\n"
|
251
311
|
end
|
252
|
-
|
253
|
-
|
254
|
-
# class SpoutCoverageResult
|
255
|
-
# attr_accessor :error, :error_message
|
256
|
-
|
257
|
-
# def errored?
|
258
|
-
# error == true
|
259
|
-
# end
|
260
|
-
# end
|
data/lib/spout/version.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
<meta name="author" content="">
|
7
7
|
<link rel="shortcut icon" href="data:image/vnd.microsoft.icon;base64,AAABAAEAEBAAAAEAIABoBAAAFgAAACgAAAAQAAAAIAAAAAEAIAAAAAAAAAQAABMLAAATCwAAAAAAAAAAAAAAAAAAAAAAIAAAAMIAAAAiAAAAAAAAAAAAAAAAAAAARAAAAEEAAAAcAAAAwQAAACEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAADCAAAAIgAAAAAAAAAAAAAAAAAAAEQAAABBAAAAHAAAAMEAAAAhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAwwAAACIAAAAAAAAAAAAAAAAAAABEAAAAQQAAABwAAADBAAAAIQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHAAAAKoAAAAeAAAAAAAAAAAAAAAAAAAARAAAAEEAAAAYAAAAqwAAAB4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAWAAAACAAAAB8AAAAfAAAAAAAAAEQAAABDAAAAAAAAABgAAAAIAAAAHQAAAB0AAAAEAAAAAAAAAAAAAAAAAAAAAAAAABkAAACuAAAArQAAABQAAABCAAAAQwAAAAAAAAAAAAAAGAAAAK0AAACrAAAAFgAAAAAAAAAAAAAAAAAAAAAAAAADAAAAGQAAABkAAAAAAAAARAAAAEMAAAAAAAAAAAAAAAMAAAAZAAAAGQAAAAMAAAAAAAAATAAAAEwAAABOAAAATAAAAEoAAABKAAAASQAAAHwAAAB7AAAASgAAAEwAAABMAAAASgAAAEoAAABMAAAATAAAAFAAAABnAAAA0AAAAGcAAABRAAAAUgAAAE8AAACAAAAAfgAAAE0AAABQAAAAUAAAAFAAAABSAAAAUgAAAFIAAAAAAAAAIAAAAL8AAAAfAAAAAAAAAAAAAAAAAAAARQAAAFkAAAAbAAAAHgAAAB8AAAAbAAAABAAAAAAAAAAAAAAAAAAAACAAAAC/AAAAHwAAAAIAAAAXAAAAAAAAAEgAAADPAAAAwwAAAMMAAADFAAAArAAAABgAAAAAAAAAAAAAAAAAAAAgAAAAvwAAAB4AAAAcAAAAqwAAABkAAABDAAAAWgAAAB0AAAAgAAAAHwAAABsAAAAEAAAAAAAAAAAAAAAAAAAAHAAAAKgAAAAaAAAAIAAAAMMAAAAdAAAAQgAAAEMAAAAAAAAABAAAABsAAAAfAAAAIAAAACEAAAAhAAAAAAAAAAQAAAAXAAAAAgAAACIAAADDAAAAHQAAAEIAAABDAAAAAAAAABkAAACrAAAAwwAAAMIAAADCAAAAwQAAAAAAAAAAAAAAAAAAAAAAAAAiAAAAwwAAAB0AAABCAAAAQwAAAAAAAAAEAAAAHQAAACEAAAAgAAAAIAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAIgAAAMMAAAAdAAAAQgAAAEMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjg8AAI4PAACODwAAjg8AAIABAADgYQAA4mEAAAAAAAAAAAAAjgMAAIIDAACAAwAAgEAAAIBAAADwQAAA8H8AAA==">
|
8
8
|
|
9
|
-
<title>
|
9
|
+
<title>Spout Dataset and Data Dictionary Coverage</title>
|
10
10
|
|
11
11
|
<!-- Bootstrap core CSS -->
|
12
12
|
<link href="http://netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
|
@@ -20,8 +20,22 @@
|
|
20
20
|
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js"></script>
|
21
21
|
<script src="http://netdna.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
|
22
22
|
<style type="text/css">
|
23
|
+
html {
|
24
|
+
position: relative;
|
25
|
+
min-height: 100%;
|
26
|
+
}
|
23
27
|
body {
|
24
28
|
padding-top: 50px;
|
29
|
+
/* Margin bottom by footer height */
|
30
|
+
margin-bottom: 60px;
|
31
|
+
}
|
32
|
+
#footer {
|
33
|
+
position: absolute;
|
34
|
+
bottom: 0;
|
35
|
+
width: 100%;
|
36
|
+
/* Set the fixed height of the footer here */
|
37
|
+
height: 60px;
|
38
|
+
background-color: #f5f5f5;
|
25
39
|
}
|
26
40
|
</style>
|
27
41
|
</head>
|
@@ -58,8 +72,8 @@ body {
|
|
58
72
|
<span class="text-muted">---</span>
|
59
73
|
<% end %>
|
60
74
|
</td>
|
61
|
-
<td><%= total_column_count %></td>
|
62
|
-
<td><%= total_column_count - mapped_column_count
|
75
|
+
<td><%= number_with_delimiter( total_column_count ) %></td>
|
76
|
+
<td><%= number_with_delimiter( total_column_count - mapped_column_count ) %></td>
|
63
77
|
</tr>
|
64
78
|
<% end %>
|
65
79
|
</tbody>
|
@@ -72,22 +86,24 @@ body {
|
|
72
86
|
<th>Dataset Column Header</th>
|
73
87
|
<th>JSON File</th>
|
74
88
|
<th>Found in Data Dictionary</th>
|
89
|
+
<th>Variable Type</th>
|
90
|
+
<th>Values</th>
|
75
91
|
</tr>
|
76
92
|
</thead>
|
77
93
|
<tbody>
|
78
|
-
<% @matching_results.each do |csv, column,
|
94
|
+
<% @matching_results.each do |csv, column, scr| %>
|
79
95
|
<tr>
|
80
96
|
<td><code><%= csv %></code></td>
|
81
97
|
<td><%= column %></td>
|
82
98
|
<td>
|
83
|
-
<% if file_name_test %>
|
99
|
+
<% if scr.file_name_test %>
|
84
100
|
<span class="text-success">File Found</span>
|
85
101
|
<% else %>
|
86
102
|
<span class="text-danger">No JSON File Found, expecting: <code><%= column %>.json</code></span>
|
87
103
|
<% end %>
|
88
104
|
</td>
|
89
105
|
<td>
|
90
|
-
<% if json_id_test %>
|
106
|
+
<% if scr.json_id_test %>
|
91
107
|
<span class="text-success">JSON ID Found</span>
|
92
108
|
<% else %>
|
93
109
|
<span class="text-danger">No Matching JSON ID</span>
|
@@ -96,6 +112,18 @@ body {
|
|
96
112
|
}</pre>
|
97
113
|
<% end %>
|
98
114
|
</td>
|
115
|
+
<td><%= scr.variable_type %></td>
|
116
|
+
<td>
|
117
|
+
<% if scr.values_test %>
|
118
|
+
<span class="text-success">Valid Values</span>
|
119
|
+
<% else %>
|
120
|
+
VALID: <%= scr.valid_values %><br /><br />
|
121
|
+
Bad Values:
|
122
|
+
<% (scr.csv_values.compact - scr.valid_values).each do |value| %>
|
123
|
+
<code><%= value %></code>
|
124
|
+
<% end %>
|
125
|
+
<% end %>
|
126
|
+
</td>
|
99
127
|
</tr>
|
100
128
|
<% end %>
|
101
129
|
</tbody>
|
@@ -103,5 +131,10 @@ body {
|
|
103
131
|
|
104
132
|
</div><!-- /.container -->
|
105
133
|
|
134
|
+
<div id="footer">
|
135
|
+
<div class="container">
|
136
|
+
<p class="text-muted" style="margin: 20px 0;">Generated by <a href="https://github.com/sleepepi/spout">Spout v<%= Spout::VERSION::STRING %></a></p>
|
137
|
+
</div>
|
138
|
+
</div>
|
106
139
|
|
107
140
|
</body></html>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spout
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.0.
|
4
|
+
version: 0.6.0.beta3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Remo Mueller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|