spout 0.6.0.beta2 → 0.6.0.beta3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f4ae8ef2362ef67fc7cb4cb86560fabee408ef7b
4
- data.tar.gz: aa825b0f03d9def1e26e784c09eb205424896258
3
+ metadata.gz: e85fa1a3ea963f131b61ce1952d10f1744c510a1
4
+ data.tar.gz: 2569c7aa7cbf7ce5bab644c128707e70a6ac659c
5
5
  SHA512:
6
- metadata.gz: 815ae970ecfdf3ba9db2aa6f2095eff92608df08312e7903fdaf23e15243aca4a842147ada5031faa7fee594646b6f9632b0bfa1be87de408f7e41fe32d45562
7
- data.tar.gz: 1b55348b328af02eee3e2ee189f88778584f09cc1a82eff5160d4258c8bdf78646facb23c3dd2baa515897260ed223cb2a6d4ec9dce49c1ba23af178051210c3
6
+ metadata.gz: c7766b6aadc863f2159fa7e8ba33d10be37d97feb2f4a71317049941cd29d3a7bcab1128c3613ed945a4da62ea11aea073870b8461b1dd86330b2d95c83e6f95
7
+ data.tar.gz: 35005ff394184b10aaad88955b7129401c81ee8ee883b91cf952eb0e4c68a88709c583eacb48e64a1e50785ea0f036e694172871038825533a6ce347297a8fbd
data/CHANGELOG.md CHANGED
@@ -3,6 +3,7 @@
3
3
  ### Enhancement
4
4
  - Added `spout match` command that generates a coverage report of how well a dataset matches the data dictionary
5
5
  - Spout Match generates a viewable report in `dd\index.html` that shows which columns are covered in CSVs located in `dd\csvs`
6
+ - Spout Match checks that all collected values for a variable with a domain exist in the associated domain
6
7
  - **Gem Changes**
7
8
  - Updated to colorize 0.6.0
8
9
 
@@ -72,19 +72,40 @@ namespace :dd do
72
72
  all_column_headers += column_headers
73
73
  end
74
74
 
75
+ value_hash = {}
76
+ row_count = 0
75
77
 
76
- all_column_headers
78
+ csvs.each do |csv_file|
79
+ CSV.parse( File.open(csv_file, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true ) do |line|
80
+ row = line.to_hash
81
+ row.each do |column_name, value|
82
+ value_hash[column_name] ||= []
83
+ value_hash[column_name] = value_hash[column_name] | [value]
84
+ end
85
+
86
+ row_count += 1
87
+ # break if row_count > 10
88
+ end
89
+ end
77
90
 
78
91
  @matching_results = []
79
92
 
80
93
  all_column_headers.each do |csv, column|
81
- file_name_test = variable_file_names.include?(column)
82
- json_id_test = variable_json_ids.include?(column)
83
- # SpoutCoverageResult
84
- @matching_results << [ csv, column, file_name_test, json_id_test ]
94
+ file = Dir.glob("variables/**/#{column}.json").first
95
+ valid_values = []
96
+ variable_type = ''
97
+ if json = JSON.parse(File.read(file)) rescue false
98
+ variable_type = json['type']
99
+ if variable_type == 'choices'
100
+ valid_values = load_valid_domain_values(json['domain'])
101
+ end
102
+ end
103
+
104
+ scr = SpoutCoverageResult.new(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, value_hash[column])
105
+ @matching_results << [ csv, column, scr ]
85
106
  end
86
107
 
87
- @matching_results.sort!{|a,b| [(a[2] && a[3] ? 1 : 0), a[0].to_s, a[1].to_s] <=> [(b[2] && b[3] ? 1 : 0), b[0].to_s, b[1].to_s]}
108
+ @matching_results.sort!{|a,b| [b[2].number_of_errors, a[0].to_s, a[1].to_s] <=> [a[2].number_of_errors, b[0].to_s, b[1].to_s]}
88
109
 
89
110
  @coverage_results = []
90
111
 
@@ -92,7 +113,7 @@ namespace :dd do
92
113
  csv_name = csv_file.split('/').last.to_s
93
114
 
94
115
  total_column_count = @matching_results.select{|mr| mr[0] == csv_name}.count
95
- mapped_column_count = @matching_results.select{|mr| mr[0] == csv_name and mr[2] and mr[3]}.count
116
+ mapped_column_count = @matching_results.select{|mr| mr[0] == csv_name and mr[2].number_of_errors == 0}.count
96
117
 
97
118
  @coverage_results << [ csv_name, total_column_count, mapped_column_count ]
98
119
  end
@@ -114,6 +135,45 @@ namespace :dd do
114
135
 
115
136
  end
116
137
 
138
+ def load_valid_domain_values(domain_name)
139
+ values = []
140
+ file = Dir.glob("domains/**/#{domain_name}.json").first
141
+ if json = JSON.parse(File.read(file)) rescue false
142
+ values = json.collect{|hash| hash['value']}
143
+ end
144
+ values
145
+ end
146
+
147
+ class SpoutCoverageResult
148
+ attr_accessor :error, :error_message, :file_name_test, :json_id_test, :values_test, :valid_values, :csv_values, :variable_type
149
+
150
+ def initialize(csv, column, variable_file_names, variable_json_ids, variable_type, valid_values, csv_values)
151
+ # puts "Initialize"
152
+ @file_name_test = variable_file_names.include?(column)
153
+ @json_id_test = variable_json_ids.include?(column)
154
+ @variable_type = variable_type
155
+ @valid_values = valid_values
156
+ @csv_values = csv_values
157
+ @values_test = check_values
158
+ end
159
+
160
+ def number_of_errors
161
+ @file_name_test && @json_id_test && @values_test ? 0 : 1
162
+ end
163
+
164
+ def check_values
165
+ variable_type != 'choices' || (valid_values | csv_values.compact).size == valid_values.size
166
+ end
167
+
168
+ def errored?
169
+ error == true
170
+ end
171
+ end
172
+
173
+ def number_with_delimiter(number, delimiter = ",")
174
+ number.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
175
+ end
176
+
117
177
  def standard_version
118
178
  version = File.open('VERSION', &:readline).strip rescue ''
119
179
  version == '' ? '1.0.0' : version
@@ -249,12 +309,3 @@ end
249
309
  def additional_csv_info
250
310
  "\n\nFor additional information on specifying CSV column headers before import see:\n\n " + "https://github.com/sleepepi/spout#generate-a-new-repository-from-an-existing-csv-file".colorize( :light_cyan ) + "\n\n"
251
311
  end
252
-
253
-
254
- # class SpoutCoverageResult
255
- # attr_accessor :error, :error_message
256
-
257
- # def errored?
258
- # error == true
259
- # end
260
- # end
data/lib/spout/version.rb CHANGED
@@ -3,7 +3,7 @@ module Spout
3
3
  MAJOR = 0
4
4
  MINOR = 6
5
5
  TINY = 0
6
- BUILD = "beta2" # nil, "pre", "rc", "rc2"
6
+ BUILD = "beta3" # nil, "pre", "rc", "rc2"
7
7
 
8
8
  STRING = [MAJOR, MINOR, TINY, BUILD].compact.join('.')
9
9
  end
@@ -6,7 +6,7 @@
6
6
  <meta name="author" content="">
7
7
  <link rel="shortcut icon" href="">
8
8
 
9
- <title>Starter Template for Bootstrap</title>
9
+ <title>Spout Dataset and Data Dictionary Coverage</title>
10
10
 
11
11
  <!-- Bootstrap core CSS -->
12
12
  <link href="http://netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
@@ -20,8 +20,22 @@
20
20
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js"></script>
21
21
  <script src="http://netdna.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
22
22
  <style type="text/css">
23
+ html {
24
+ position: relative;
25
+ min-height: 100%;
26
+ }
23
27
  body {
24
28
  padding-top: 50px;
29
+ /* Margin bottom by footer height */
30
+ margin-bottom: 60px;
31
+ }
32
+ #footer {
33
+ position: absolute;
34
+ bottom: 0;
35
+ width: 100%;
36
+ /* Set the fixed height of the footer here */
37
+ height: 60px;
38
+ background-color: #f5f5f5;
25
39
  }
26
40
  </style>
27
41
  </head>
@@ -58,8 +72,8 @@ body {
58
72
  <span class="text-muted">---</span>
59
73
  <% end %>
60
74
  </td>
61
- <td><%= total_column_count %></td>
62
- <td><%= total_column_count - mapped_column_count %> </td>
75
+ <td><%= number_with_delimiter( total_column_count ) %></td>
76
+ <td><%= number_with_delimiter( total_column_count - mapped_column_count ) %></td>
63
77
  </tr>
64
78
  <% end %>
65
79
  </tbody>
@@ -72,22 +86,24 @@ body {
72
86
  <th>Dataset Column Header</th>
73
87
  <th>JSON File</th>
74
88
  <th>Found in Data Dictionary</th>
89
+ <th>Variable Type</th>
90
+ <th>Values</th>
75
91
  </tr>
76
92
  </thead>
77
93
  <tbody>
78
- <% @matching_results.each do |csv, column, file_name_test, json_id_test| %>
94
+ <% @matching_results.each do |csv, column, scr| %>
79
95
  <tr>
80
96
  <td><code><%= csv %></code></td>
81
97
  <td><%= column %></td>
82
98
  <td>
83
- <% if file_name_test %>
99
+ <% if scr.file_name_test %>
84
100
  <span class="text-success">File Found</span>
85
101
  <% else %>
86
102
  <span class="text-danger">No JSON File Found, expecting: <code><%= column %>.json</code></span>
87
103
  <% end %>
88
104
  </td>
89
105
  <td>
90
- <% if json_id_test %>
106
+ <% if scr.json_id_test %>
91
107
  <span class="text-success">JSON ID Found</span>
92
108
  <% else %>
93
109
  <span class="text-danger">No Matching JSON ID</span>
@@ -96,6 +112,18 @@ body {
96
112
  }</pre>
97
113
  <% end %>
98
114
  </td>
115
+ <td><%= scr.variable_type %></td>
116
+ <td>
117
+ <% if scr.values_test %>
118
+ <span class="text-success">Valid Values</span>
119
+ <% else %>
120
+ VALID: <%= scr.valid_values %><br /><br />
121
+ Bad Values:
122
+ <% (scr.csv_values.compact - scr.valid_values).each do |value| %>
123
+ <code><%= value %></code>
124
+ <% end %>
125
+ <% end %>
126
+ </td>
99
127
  </tr>
100
128
  <% end %>
101
129
  </tbody>
@@ -103,5 +131,10 @@ body {
103
131
 
104
132
  </div><!-- /.container -->
105
133
 
134
+ <div id="footer">
135
+ <div class="container">
136
+ <p class="text-muted" style="margin: 20px 0;">Generated by <a href="https://github.com/sleepepi/spout">Spout v<%= Spout::VERSION::STRING %></a></p>
137
+ </div>
138
+ </div>
106
139
 
107
140
  </body></html>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spout
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0.beta2
4
+ version: 0.6.0.beta3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Remo Mueller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-28 00:00:00.000000000 Z
11
+ date: 2014-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake