spout 0.6.0.beta1 → 0.6.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/spout/actions.rb +1 -1
- data/lib/spout/tasks/engine.rake +37 -14
- data/lib/spout/version.rb +1 -1
- data/lib/spout/views/index.html.erb +32 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4ae8ef2362ef67fc7cb4cb86560fabee408ef7b
|
4
|
+
data.tar.gz: aa825b0f03d9def1e26e784c09eb205424896258
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 815ae970ecfdf3ba9db2aa6f2095eff92608df08312e7903fdaf23e15243aca4a842147ada5031faa7fee594646b6f9632b0bfa1be87de408f7e41fe32d45562
|
7
|
+
data.tar.gz: 1b55348b328af02eee3e2ee189f88778584f09cc1a82eff5160d4258c8bdf78646facb23c3dd2baa515897260ed223cb2a6d4ec9dce49c1ba23af178051210c3
|
data/lib/spout/actions.rb
CHANGED
@@ -19,7 +19,7 @@ module Spout
|
|
19
19
|
new_data_dictionary_export(argv)
|
20
20
|
when 'hybrid', '-hybrid', '--hybrid', 'y', 'hy', '-y', '-hy'
|
21
21
|
new_data_dictionary_export(argv, 'hybrid')
|
22
|
-
when 'dataset', '-dataset', '--dataset', 'd', '-d', 'match', '-match', '--match', 'm', '-m'
|
22
|
+
when 'dataset', '-dataset', '--dataset', 'd', '-d', 'match', '-match', '--match', 'm', '-m', 'coverage', '-coverage', '--coverage', 'c', '-c'
|
23
23
|
match_dataset_report(argv)
|
24
24
|
else
|
25
25
|
help
|
data/lib/spout/tasks/engine.rake
CHANGED
@@ -46,41 +46,56 @@ namespace :dd do
|
|
46
46
|
|
47
47
|
desc 'Match CSV dataset with JSON repository'
|
48
48
|
task :coverage do
|
49
|
-
puts 'MDR'
|
50
|
-
puts Dir.pwd
|
51
49
|
puts csvs = Dir.glob("dd/csvs/*.csv")
|
52
50
|
|
53
|
-
|
51
|
+
all_column_headers = []
|
54
52
|
|
55
|
-
|
56
|
-
|
53
|
+
variable_json_ids = []
|
54
|
+
variable_file_names = []
|
57
55
|
|
58
56
|
Dir.glob("variables/**/*.json").each do |file|
|
59
57
|
if json = JSON.parse(File.read(file)) rescue false
|
60
|
-
|
58
|
+
variable_json_ids << json['id']
|
61
59
|
end
|
62
|
-
|
60
|
+
variable_file_names << file.split('/').last.to_s.split('.json').first.to_s
|
63
61
|
end
|
64
62
|
|
65
63
|
csvs.each do |csv_file|
|
64
|
+
csv_name = csv_file.split('/').last.to_s
|
66
65
|
column_headers = []
|
67
66
|
|
68
67
|
CSV.parse( File.open(csv_file, 'r:iso-8859-1:utf-8'){|f| f.read} ) do |line|
|
69
|
-
column_headers = line
|
68
|
+
column_headers = line.collect{|l| [csv_name, l.to_s.downcase]}
|
70
69
|
break # Only read first line
|
71
70
|
end
|
72
71
|
|
73
|
-
|
72
|
+
all_column_headers += column_headers
|
73
|
+
end
|
74
74
|
|
75
|
-
end
|
76
75
|
|
77
|
-
|
76
|
+
all_column_headers
|
77
|
+
|
78
|
+
@matching_results = []
|
79
|
+
|
80
|
+
all_column_headers.each do |csv, column|
|
81
|
+
file_name_test = variable_file_names.include?(column)
|
82
|
+
json_id_test = variable_json_ids.include?(column)
|
83
|
+
# SpoutCoverageResult
|
84
|
+
@matching_results << [ csv, column, file_name_test, json_id_test ]
|
78
85
|
end
|
79
86
|
|
87
|
+
@matching_results.sort!{|a,b| [(a[2] && a[3] ? 1 : 0), a[0].to_s, a[1].to_s] <=> [(b[2] && b[3] ? 1 : 0), b[0].to_s, b[1].to_s]}
|
88
|
+
|
89
|
+
@coverage_results = []
|
90
|
+
|
91
|
+
csvs.each do |csv_file|
|
92
|
+
csv_name = csv_file.split('/').last.to_s
|
80
93
|
|
81
|
-
|
94
|
+
total_column_count = @matching_results.select{|mr| mr[0] == csv_name}.count
|
95
|
+
mapped_column_count = @matching_results.select{|mr| mr[0] == csv_name and mr[2] and mr[3]}.count
|
82
96
|
|
83
|
-
|
97
|
+
@coverage_results << [ csv_name, total_column_count, mapped_column_count ]
|
98
|
+
end
|
84
99
|
|
85
100
|
coverage_file = File.join(Dir.pwd, 'dd', 'index.html')
|
86
101
|
|
@@ -93,7 +108,6 @@ namespace :dd do
|
|
93
108
|
open_command = 'open' if RUBY_PLATFORM.match(/darwin/) != nil
|
94
109
|
open_command = 'start' if RUBY_PLATFORM.match(/mingw/) != nil
|
95
110
|
|
96
|
-
|
97
111
|
system "#{open_command} #{coverage_file}" if ['start', 'open'].include?(open_command)
|
98
112
|
puts coverage_file
|
99
113
|
end
|
@@ -235,3 +249,12 @@ end
|
|
235
249
|
def additional_csv_info
|
236
250
|
"\n\nFor additional information on specifying CSV column headers before import see:\n\n " + "https://github.com/sleepepi/spout#generate-a-new-repository-from-an-existing-csv-file".colorize( :light_cyan ) + "\n\n"
|
237
251
|
end
|
252
|
+
|
253
|
+
|
254
|
+
# class SpoutCoverageResult
|
255
|
+
# attr_accessor :error, :error_message
|
256
|
+
|
257
|
+
# def errored?
|
258
|
+
# error == true
|
259
|
+
# end
|
260
|
+
# end
|
data/lib/spout/version.rb
CHANGED
@@ -38,27 +38,56 @@ body {
|
|
38
38
|
|
39
39
|
<div class="container" style="padding-top:50px">
|
40
40
|
|
41
|
+
<table class="table">
|
42
|
+
<thead>
|
43
|
+
<tr>
|
44
|
+
<th>CSV</th>
|
45
|
+
<th>Coverage</th>
|
46
|
+
<th>Columns</th>
|
47
|
+
<th>Columns Missed</th>
|
48
|
+
</tr>
|
49
|
+
</thead>
|
50
|
+
<tbody>
|
51
|
+
<% @coverage_results.each do |csv, total_column_count, mapped_column_count| %>
|
52
|
+
<tr>
|
53
|
+
<td><%= csv %></td>
|
54
|
+
<td>
|
55
|
+
<% if total_column_count.to_i > 0 %>
|
56
|
+
<%= "%0.02f %" % (mapped_column_count * 100.0 / total_column_count) %>
|
57
|
+
<% else %>
|
58
|
+
<span class="text-muted">---</span>
|
59
|
+
<% end %>
|
60
|
+
</td>
|
61
|
+
<td><%= total_column_count %></td>
|
62
|
+
<td><%= total_column_count - mapped_column_count %> </td>
|
63
|
+
</tr>
|
64
|
+
<% end %>
|
65
|
+
</tbody>
|
66
|
+
</table>
|
67
|
+
|
41
68
|
<table class="table table-bordered table-hover">
|
42
69
|
<thead>
|
43
70
|
<tr>
|
71
|
+
<th>CSV</th>
|
44
72
|
<th>Dataset Column Header</th>
|
45
73
|
<th>JSON File</th>
|
46
74
|
<th>Found in Data Dictionary</th>
|
47
75
|
</tr>
|
48
76
|
</thead>
|
49
77
|
<tbody>
|
50
|
-
<% @
|
78
|
+
<% @matching_results.each do |csv, column, file_name_test, json_id_test| %>
|
51
79
|
<tr>
|
80
|
+
<td><code><%= csv %></code></td>
|
52
81
|
<td><%= column %></td>
|
53
82
|
<td>
|
54
|
-
<% if
|
83
|
+
<% if file_name_test %>
|
55
84
|
<span class="text-success">File Found</span>
|
56
85
|
<% else %>
|
57
86
|
<span class="text-danger">No JSON File Found, expecting: <code><%= column %>.json</code></span>
|
58
87
|
<% end %>
|
59
88
|
</td>
|
60
89
|
<td>
|
61
|
-
<% if
|
90
|
+
<% if json_id_test %>
|
62
91
|
<span class="text-success">JSON ID Found</span>
|
63
92
|
<% else %>
|
64
93
|
<span class="text-danger">No Matching JSON ID</span>
|