spout 0.8.0 → 0.9.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/spout/commands/coverage.rb +1 -2
- data/lib/spout/commands/outliers.rb +1 -2
- data/lib/spout/helpers/semantic.rb +81 -0
- data/lib/spout/helpers/subject_loader.rb +20 -3
- data/lib/spout/version.rb +2 -2
- data/lib/spout/views/index.html.erb +19 -2
- data/lib/spout/views/outliers.html.erb +18 -2
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 92e5535751a5e13e40a0e12f30b5ec2b908406cb
|
4
|
+
data.tar.gz: 58333eadcfc852dd2fbbe059c8b196cc9bbb9167
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbf63c8f770797939e7f952e4a5498dd5f145b5f3cc7b01498e5ade34ffde4b3c05a05b07d7e454805979a3f45829131436fd8e75441aa0238b61c29cc4c29cc
|
7
|
+
data.tar.gz: 4f2e54c50116f2c265e274143b1e704b3211392ca47cac707fe384fb7d44528a431a067ce9677d69df8f8d7d789f1ed410ac61ca0588575a742999085eb4f2c6
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 0.9.0
|
2
|
+
|
3
|
+
### Enhancements
|
4
|
+
- **General Changes**
|
5
|
+
- `spout c`, `spout o`, `spout g`, `spout p`, will now use datasets that are compatible with the data dictionary
|
6
|
+
- The following examples use a Data Dictionary that is currently on version 0.2.1.beta2
|
7
|
+
- Ex: If a dataset exists in folder 0.2.0, then this folder will be used.
|
8
|
+
- Ex: If datasets exist in 0.2.0, 0.2.1.beta2, and 0.2.1, then the exact match, 0.2.1.beta2, will be used.
|
9
|
+
- Ex: If datasets exist in 0.2.0, 0.2.1.beta1, 0.2.1, and 0.3.0, then the highest match on the minor version is used, in this case 0.2.1.
|
10
|
+
|
1
11
|
## 0.8.0 (June 27, 2014)
|
2
12
|
|
3
13
|
### Enhancements
|
@@ -50,8 +50,7 @@ module Spout
|
|
50
50
|
|
51
51
|
@coverage_results = []
|
52
52
|
|
53
|
-
@csv_files
|
54
|
-
@csv_files.each do |csv_file|
|
53
|
+
@subject_loader.csv_files.each do |csv_file|
|
55
54
|
total_column_count = @matching_results.select{|mr| mr[0].include?(csv_file)}.count
|
56
55
|
mapped_column_count = @matching_results.select{|mr| mr[0].include?(csv_file) and mr[2].number_of_errors == 0}.count
|
57
56
|
@coverage_results << [ csv_file, total_column_count, mapped_column_count ]
|
@@ -37,8 +37,7 @@ module Spout
|
|
37
37
|
|
38
38
|
@outlier_results.sort!{|a,b| [a.weight, a.method] <=> [b.weight, b.method]}
|
39
39
|
|
40
|
-
@
|
41
|
-
@overall_results = @csv_files.collect do |csv_file|
|
40
|
+
@overall_results = @subject_loader.csv_files.collect do |csv_file|
|
42
41
|
major_outliers = @outlier_results.select{|outlier_result| outlier_result.csv_files.include?(csv_file) and outlier_result.weight == 0 }.count
|
43
42
|
minor_outliers = @outlier_results.select{|outlier_result| outlier_result.csv_files.include?(csv_file) and outlier_result.weight == 1 }.count
|
44
43
|
total_outliers = major_outliers + minor_outliers
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# def dataset_folders
|
2
|
+
# Dir.entries('csvs').select{|e| File.directory? File.join('csvs', e) }.reject{|e| [".",".."].include?(e)}.sort
|
3
|
+
# end
|
4
|
+
module Spout
|
5
|
+
module Helpers
|
6
|
+
|
7
|
+
class Version
|
8
|
+
attr_accessor :string
|
9
|
+
attr_reader :major, :minor, :tiny, :build
|
10
|
+
|
11
|
+
def initialize(string)
|
12
|
+
@string = string.to_s
|
13
|
+
(@major, @minor, @tiny, @build) = @string.split('.')
|
14
|
+
end
|
15
|
+
|
16
|
+
def major_number
|
17
|
+
@major.to_i
|
18
|
+
end
|
19
|
+
|
20
|
+
def minor_number
|
21
|
+
@minor.to_i
|
22
|
+
end
|
23
|
+
|
24
|
+
def tiny_number
|
25
|
+
@tiny.to_i
|
26
|
+
end
|
27
|
+
|
28
|
+
def build_number
|
29
|
+
(@build == nil ? 1 : 0)
|
30
|
+
end
|
31
|
+
|
32
|
+
def rank
|
33
|
+
[major_number, minor_number, tiny_number, build_number]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class Semantic
|
38
|
+
|
39
|
+
attr_accessor :data_dictionary_version
|
40
|
+
|
41
|
+
def initialize(version, version_strings)
|
42
|
+
@data_dictionary_version = Spout::Helpers::Version.new(version)
|
43
|
+
@versions = version_strings.collect{ |vs| Spout::Helpers::Version.new(vs) }.sort_by(&:rank)
|
44
|
+
end
|
45
|
+
|
46
|
+
def valid_versions
|
47
|
+
@versions.select{ |v| v.major == major and v.minor == minor }
|
48
|
+
end
|
49
|
+
|
50
|
+
def selected_folder
|
51
|
+
if valid_versions.size == 0 or valid_versions.collect(&:string).include?(version)
|
52
|
+
version
|
53
|
+
else
|
54
|
+
valid_versions.collect(&:string).last
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def version
|
59
|
+
@data_dictionary_version.string
|
60
|
+
end
|
61
|
+
|
62
|
+
def major
|
63
|
+
@data_dictionary_version.major
|
64
|
+
end
|
65
|
+
|
66
|
+
def minor
|
67
|
+
@data_dictionary_version.minor
|
68
|
+
end
|
69
|
+
|
70
|
+
def tiny
|
71
|
+
@data_dictionary_version.tiny
|
72
|
+
end
|
73
|
+
|
74
|
+
def build
|
75
|
+
@data_dictionary_version.build
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -1,13 +1,15 @@
|
|
1
|
+
require 'colorize'
|
1
2
|
require 'csv'
|
2
3
|
require 'json'
|
3
4
|
|
4
5
|
require 'spout/models/subject'
|
6
|
+
require 'spout/helpers/semantic'
|
5
7
|
|
6
8
|
module Spout
|
7
9
|
module Helpers
|
8
10
|
class SubjectLoader
|
9
11
|
attr_accessor :subjects
|
10
|
-
attr_reader :all_methods, :all_domains
|
12
|
+
attr_reader :all_methods, :all_domains, :csv_files, :csv_directory
|
11
13
|
|
12
14
|
def initialize(variable_files, valid_ids, standard_version, number_of_rows, visit)
|
13
15
|
@subjects = []
|
@@ -18,6 +20,8 @@ module Spout
|
|
18
20
|
@visit = visit
|
19
21
|
@all_methods = {}
|
20
22
|
@all_domains = []
|
23
|
+
@csv_files = []
|
24
|
+
@csv_directory = ''
|
21
25
|
end
|
22
26
|
|
23
27
|
def load_subjects_from_csvs!
|
@@ -28,8 +32,14 @@ module Spout
|
|
28
32
|
def load_subjects_from_csvs_part_one!
|
29
33
|
@subjects = []
|
30
34
|
|
31
|
-
|
32
|
-
|
35
|
+
available_folders = (Dir.exist?('csvs') ? Dir.entries('csvs').select{|e| File.directory? File.join('csvs', e) }.reject{|e| [".",".."].include?(e)}.sort : [])
|
36
|
+
|
37
|
+
@semantic = Spout::Helpers::Semantic.new(@standard_version, available_folders)
|
38
|
+
|
39
|
+
@csv_directory = @semantic.selected_folder
|
40
|
+
|
41
|
+
@csv_files = Dir.glob("csvs/#{@csv_directory}/*.csv")
|
42
|
+
@csv_files.each_with_index do |csv_file, index|
|
33
43
|
count = 0
|
34
44
|
puts "Parsing: #{csv_file}"
|
35
45
|
CSV.parse( File.open(csv_file, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true, header_converters: lambda { |h| h.to_s.downcase } ) do |line|
|
@@ -60,6 +70,13 @@ module Spout
|
|
60
70
|
end
|
61
71
|
puts "\n\n"
|
62
72
|
end
|
73
|
+
|
74
|
+
if @csv_directory != @standard_version
|
75
|
+
puts "Using dataset in " + "csvs/#{@csv_directory}/".colorize( :green ) + " for dictionary version " + @standard_version.to_s.colorize( :green ) + "\n\n"
|
76
|
+
else
|
77
|
+
puts "Using dataset in " + "csvs/#{@standard_version}/".colorize( :green ) + "\n\n"
|
78
|
+
end
|
79
|
+
|
63
80
|
end
|
64
81
|
|
65
82
|
def load_subjects_from_csvs_part_two!
|
data/lib/spout/version.rb
CHANGED
@@ -65,11 +65,28 @@ tfoot td {
|
|
65
65
|
<div class="container" style="margin-top:30px">
|
66
66
|
<div class="jumbotron">
|
67
67
|
<h1>You made Spout cry... :'-(</h1>
|
68
|
-
<p>No CSVs found in <code><%= Dir.pwd %>/csvs/<%= @
|
68
|
+
<p>No CSVs found in <code><%= Dir.pwd %>/csvs/<%= @subject_loader.csv_directory %>/</code></p>
|
69
69
|
</div>
|
70
70
|
</div>
|
71
71
|
<% else %>
|
72
72
|
<div style="padding: 30px 30px 10px 30px;">
|
73
|
+
|
74
|
+
<div class="row">
|
75
|
+
<div class="col-sm-6">
|
76
|
+
<div class="panel panel-info" style="text-align:center">
|
77
|
+
<div class="panel-heading">Data Dictionary Version</div>
|
78
|
+
<div class="panel-body"><strong><%= @standard_version %></strong></div>
|
79
|
+
</div>
|
80
|
+
</div>
|
81
|
+
|
82
|
+
<div class="col-sm-6">
|
83
|
+
<div class="panel panel-info" style="text-align:center">
|
84
|
+
<div class="panel-heading">Dataset Directory</div>
|
85
|
+
<div class="panel-body"><strong>csvs/<%= @subject_loader.csv_directory %></strong></div>
|
86
|
+
</div>
|
87
|
+
</div>
|
88
|
+
</div>
|
89
|
+
|
73
90
|
<table class="table">
|
74
91
|
<thead>
|
75
92
|
<tr>
|
@@ -219,7 +236,7 @@ tfoot td {
|
|
219
236
|
<tr>
|
220
237
|
<td>
|
221
238
|
<% csv_files.each do |csv_file| %>
|
222
|
-
<code class="<%= 'success' if scr.number_of_errors == 0 %>"><%= csv_file %></code><br />
|
239
|
+
<code class="<%= 'success' if scr.number_of_errors == 0 %>"><%= csv_file.gsub(/^csvs\/#{@subject_loader.csv_directory}\//, '') %></code><br />
|
223
240
|
<% end %>
|
224
241
|
</td>
|
225
242
|
<td><%= column %></td>
|
@@ -77,12 +77,28 @@ tfoot td {
|
|
77
77
|
<div class="container" style="margin-top:30px">
|
78
78
|
<div class="jumbotron">
|
79
79
|
<h1>You made Spout cry... :'-(</h1>
|
80
|
-
<p>No CSVs found in <code><%= Dir.pwd %>/csvs/<%= @
|
80
|
+
<p>No CSVs found in <code><%= Dir.pwd %>/csvs/<%= @subject_loader.csv_directory %>/</code></p>
|
81
81
|
</div>
|
82
82
|
</div>
|
83
83
|
<% else %>
|
84
84
|
<div style="padding: 30px 30px 10px 30px;">
|
85
85
|
|
86
|
+
<div class="row">
|
87
|
+
<div class="col-sm-6">
|
88
|
+
<div class="panel panel-info" style="text-align:center">
|
89
|
+
<div class="panel-heading">Data Dictionary Version</div>
|
90
|
+
<div class="panel-body"><strong><%= @standard_version %></strong></div>
|
91
|
+
</div>
|
92
|
+
</div>
|
93
|
+
|
94
|
+
<div class="col-sm-6">
|
95
|
+
<div class="panel panel-info" style="text-align:center">
|
96
|
+
<div class="panel-heading">Dataset Directory</div>
|
97
|
+
<div class="panel-body"><strong>csvs/<%= @subject_loader.csv_directory %></strong></div>
|
98
|
+
</div>
|
99
|
+
</div>
|
100
|
+
</div>
|
101
|
+
|
86
102
|
<table class="table">
|
87
103
|
<thead>
|
88
104
|
<tr>
|
@@ -128,7 +144,7 @@ tfoot td {
|
|
128
144
|
<% outlier_result.csv_files.each do |csv_file| %>
|
129
145
|
<% css_class = 'success' if outlier_result.outliers.size == 0 %>
|
130
146
|
<% css_class = 'warning-pale' if outlier_result.major_outliers.size == 0 and outlier_result.minor_outliers.size > 0 %>
|
131
|
-
<code class="<%= css_class %>"><%= csv_file %></code><br />
|
147
|
+
<code class="<%= css_class %>"><%= csv_file.gsub(/^csvs\/#{@subject_loader.csv_directory}\//, '') %></code><br />
|
132
148
|
<% end %></td>
|
133
149
|
<td><%= outlier_result.method %></td>
|
134
150
|
<td><%= outlier_result.display_name.to_s[0..20] %></td>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spout
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Remo Mueller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -121,6 +121,7 @@ files:
|
|
121
121
|
- lib/spout/helpers/iterators.rb
|
122
122
|
- lib/spout/helpers/json_loader.rb
|
123
123
|
- lib/spout/helpers/number_helper.rb
|
124
|
+
- lib/spout/helpers/semantic.rb
|
124
125
|
- lib/spout/helpers/subject_loader.rb
|
125
126
|
- lib/spout/helpers/table_formatting.rb
|
126
127
|
- lib/spout/models/coverage_result.rb
|
@@ -179,9 +180,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
179
180
|
version: '0'
|
180
181
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
181
182
|
requirements:
|
182
|
-
- - "
|
183
|
+
- - ">"
|
183
184
|
- !ruby/object:Gem::Version
|
184
|
-
version:
|
185
|
+
version: 1.3.1
|
185
186
|
requirements: []
|
186
187
|
rubyforge_project:
|
187
188
|
rubygems_version: 2.2.2
|