spout 0.6.0.beta2 → 0.6.0.beta3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f4ae8ef2362ef67fc7cb4cb86560fabee408ef7b
4
- data.tar.gz: aa825b0f03d9def1e26e784c09eb205424896258
3
+ metadata.gz: e85fa1a3ea963f131b61ce1952d10f1744c510a1
4
+ data.tar.gz: 2569c7aa7cbf7ce5bab644c128707e70a6ac659c
5
5
  SHA512:
6
- metadata.gz: 815ae970ecfdf3ba9db2aa6f2095eff92608df08312e7903fdaf23e15243aca4a842147ada5031faa7fee594646b6f9632b0bfa1be87de408f7e41fe32d45562
7
- data.tar.gz: 1b55348b328af02eee3e2ee189f88778584f09cc1a82eff5160d4258c8bdf78646facb23c3dd2baa515897260ed223cb2a6d4ec9dce49c1ba23af178051210c3
6
+ metadata.gz: c7766b6aadc863f2159fa7e8ba33d10be37d97feb2f4a71317049941cd29d3a7bcab1128c3613ed945a4da62ea11aea073870b8461b1dd86330b2d95c83e6f95
7
+ data.tar.gz: 35005ff394184b10aaad88955b7129401c81ee8ee883b91cf952eb0e4c68a88709c583eacb48e64a1e50785ea0f036e694172871038825533a6ce347297a8fbd
data/CHANGELOG.md CHANGED
@@ -3,6 +3,7 @@
3
3
  ### Enhancement
4
4
  - Added `spout match` command that generates a coverage report of how well a dataset matches the data dictionary
5
5
  - Spout Match generates a viewable report in `dd\index.html` that shows which columns are covered in CSVs located in `dd\csvs`
6
+ - Spout Match checks that all collected values for a variable with a domain exist in the associated domain
6
7
  - **Gem Changes**
7
8
  - Updated to colorize 0.6.0
8
9
 
@@ -72,19 +72,40 @@ namespace :dd do
72
72
  all_column_headers += column_headers
73
73
  end
74
74
 
75
+ value_hash = {}
76
+ row_count = 0
75
77
 
76
- all_column_headers
78
+ csvs.each do |csv_file|
79
+ CSV.parse( File.open(csv_file, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true ) do |line|
80
+ row = line.to_hash
81
+ row.each do |column_name, value|
82
+ value_hash[column_name] ||= []
83
+ value_hash[column_name] = value_hash[column_name] | [value]
84
+ end
85
+
86
+ row_count += 1
87
+ # break if row_count > 10
88
+ end
89
+ end
77
90
 
78
91
  @matching_results = []
79
92
 
80
93
  all_column_headers.each do |csv, column|
81
- file_name_test = variable_file_names.include?(column)
82
- json_id_test = variable_json_ids.include?(column)
83
- # SpoutCoverageResult
84
- @matching_results << [ csv, column, file_name_test, json_id_test ]
94
+ file = Dir.glob("variables/**/#{column}.json").first
95
+ valid_values = []
96
+ variable_type = ''
97
+ if json = JSON.parse(File.read(file)) rescue false
98
+ variable_type = json['type']
99
+ if variable_type == 'choices'
100
+ valid_values = load_valid_domain_values(json['domain'])
101
+ end
102
+ end
103
+
104
+ scr = SpoutCoverageResult.new(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, value_hash[column])
105
+ @matching_results << [ csv, column, scr ]
85
106
  end
86
107
 
87
- @matching_results.sort!{|a,b| [(a[2] && a[3] ? 1 : 0), a[0].to_s, a[1].to_s] <=> [(b[2] && b[3] ? 1 : 0), b[0].to_s, b[1].to_s]}
108
+ @matching_results.sort!{|a,b| [b[2].number_of_errors, a[0].to_s, a[1].to_s] <=> [a[2].number_of_errors, b[0].to_s, b[1].to_s]}
88
109
 
89
110
  @coverage_results = []
90
111
 
@@ -92,7 +113,7 @@ namespace :dd do
92
113
  csv_name = csv_file.split('/').last.to_s
93
114
 
94
115
  total_column_count = @matching_results.select{|mr| mr[0] == csv_name}.count
95
- mapped_column_count = @matching_results.select{|mr| mr[0] == csv_name and mr[2] and mr[3]}.count
116
+ mapped_column_count = @matching_results.select{|mr| mr[0] == csv_name and mr[2].number_of_errors == 0}.count
96
117
 
97
118
  @coverage_results << [ csv_name, total_column_count, mapped_column_count ]
98
119
  end
@@ -114,6 +135,45 @@ namespace :dd do
114
135
 
115
136
  end
116
137
 
138
+ def load_valid_domain_values(domain_name)
139
+ values = []
140
+ file = Dir.glob("domains/**/#{domain_name}.json").first
141
+ if json = JSON.parse(File.read(file)) rescue false
142
+ values = json.collect{|hash| hash['value']}
143
+ end
144
+ values
145
+ end
146
+
147
+ class SpoutCoverageResult
148
+ attr_accessor :error, :error_message, :file_name_test, :json_id_test, :values_test, :valid_values, :csv_values, :variable_type
149
+
150
+ def initialize(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, csv_values)
151
+ # puts "Initialize"
152
+ @file_name_test = variable_file_names.include?(column)
153
+ @json_id_test = variable_json_ids.include?(column)
154
+ @variable_type = variable_type
155
+ @valid_values = valid_values
156
+ @csv_values = csv_values
157
+ @values_test = check_values
158
+ end
159
+
160
+ def number_of_errors
161
+ @file_name_test && @json_id_test && @values_test ? 0 : 1
162
+ end
163
+
164
+ def check_values
165
+ variable_type != 'choices' || (valid_values | csv_values.compact).size == valid_values.size
166
+ end
167
+
168
+ def errored?
169
+ error == true
170
+ end
171
+ end
172
+
173
+ def number_with_delimiter(number, delimiter = ",")
174
+ number.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
175
+ end
176
+
117
177
  def standard_version
118
178
  version = File.open('VERSION', &:readline).strip rescue ''
119
179
  version == '' ? '1.0.0' : version
@@ -249,12 +309,3 @@ end
249
309
  def additional_csv_info
250
310
  "\n\nFor additional information on specifying CSV column headers before import see:\n\n " + "https://github.com/sleepepi/spout#generate-a-new-repository-from-an-existing-csv-file".colorize( :light_cyan ) + "\n\n"
251
311
  end
252
-
253
-
254
- # class SpoutCoverageResult
255
- # attr_accessor :error, :error_message
256
-
257
- # def errored?
258
- # error == true
259
- # end
260
- # end
data/lib/spout/version.rb CHANGED
@@ -3,7 +3,7 @@ module Spout
3
3
  MAJOR = 0
4
4
  MINOR = 6
5
5
  TINY = 0
6
- BUILD = "beta2" # nil, "pre", "rc", "rc2"
6
+ BUILD = "beta3" # nil, "pre", "rc", "rc2"
7
7
 
8
8
  STRING = [MAJOR, MINOR, TINY, BUILD].compact.join('.')
9
9
  end
@@ -6,7 +6,7 @@
6
6
  <meta name="author" content="">
7
7
  <link rel="shortcut icon" href="">
8
8
 
9
- <title>Starter Template for Bootstrap</title>
9
+ <title>Spout Dataset and Data Dictionary Coverage</title>
10
10
 
11
11
  <!-- Bootstrap core CSS -->
12
12
  <link href="http://netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
@@ -20,8 +20,22 @@
20
20
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js"></script>
21
21
  <script src="http://netdna.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
22
22
  <style type="text/css">
23
+ html {
24
+ position: relative;
25
+ min-height: 100%;
26
+ }
23
27
  body {
24
28
  padding-top: 50px;
29
+ /* Margin bottom by footer height */
30
+ margin-bottom: 60px;
31
+ }
32
+ #footer {
33
+ position: absolute;
34
+ bottom: 0;
35
+ width: 100%;
36
+ /* Set the fixed height of the footer here */
37
+ height: 60px;
38
+ background-color: #f5f5f5;
25
39
  }
26
40
  </style>
27
41
  </head>
@@ -58,8 +72,8 @@ body {
58
72
  <span class="text-muted">---</span>
59
73
  <% end %>
60
74
  </td>
61
- <td><%= total_column_count %></td>
62
- <td><%= total_column_count - mapped_column_count %> </td>
75
+ <td><%= number_with_delimiter( total_column_count ) %></td>
76
+ <td><%= number_with_delimiter( total_column_count - mapped_column_count ) %></td>
63
77
  </tr>
64
78
  <% end %>
65
79
  </tbody>
@@ -72,22 +86,24 @@ body {
72
86
  <th>Dataset Column Header</th>
73
87
  <th>JSON File</th>
74
88
  <th>Found in Data Dictionary</th>
89
+ <th>Variable Type</th>
90
+ <th>Values</th>
75
91
  </tr>
76
92
  </thead>
77
93
  <tbody>
78
- <% @matching_results.each do |csv, column, file_name_test, json_id_test| %>
94
+ <% @matching_results.each do |csv, column, scr| %>
79
95
  <tr>
80
96
  <td><code><%= csv %></code></td>
81
97
  <td><%= column %></td>
82
98
  <td>
83
- <% if file_name_test %>
99
+ <% if scr.file_name_test %>
84
100
  <span class="text-success">File Found</span>
85
101
  <% else %>
86
102
  <span class="text-danger">No JSON File Found, expecting: <code><%= column %>.json</code></span>
87
103
  <% end %>
88
104
  </td>
89
105
  <td>
90
- <% if json_id_test %>
106
+ <% if scr.json_id_test %>
91
107
  <span class="text-success">JSON ID Found</span>
92
108
  <% else %>
93
109
  <span class="text-danger">No Matching JSON ID</span>
@@ -96,6 +112,18 @@ body {
96
112
  }</pre>
97
113
  <% end %>
98
114
  </td>
115
+ <td><%= scr.variable_type %></td>
116
+ <td>
117
+ <% if scr.values_test %>
118
+ <span class="text-success">Valid Values</span>
119
+ <% else %>
120
+ VALID: <%= scr.valid_values %><br /><br />
121
+ Bad Values:
122
+ <% (scr.csv_values.compact - scr.valid_values).each do |value| %>
123
+ <code><%= value %></code>
124
+ <% end %>
125
+ <% end %>
126
+ </td>
99
127
  </tr>
100
128
  <% end %>
101
129
  </tbody>
@@ -103,5 +131,10 @@ body {
103
131
 
104
132
  </div><!-- /.container -->
105
133
 
134
+ <div id="footer">
135
+ <div class="container">
136
+ <p class="text-muted" style="margin: 20px 0;">Generated by <a href="https://github.com/sleepepi/spout">Spout v<%= Spout::VERSION::STRING %></a></p>
137
+ </div>
138
+ </div>
106
139
 
107
140
  </body></html>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spout
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0.beta2
4
+ version: 0.6.0.beta3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Remo Mueller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-28 00:00:00.000000000 Z
11
+ date: 2014-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake