spout 0.6.0 → 0.7.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +11 -0
- data/lib/spout/actions.rb +9 -0
- data/lib/spout/commands/graphs.rb +178 -0
- data/lib/spout/support/javascripts/data.js +17 -0
- data/lib/spout/support/javascripts/highcharts-convert.js +583 -0
- data/lib/spout/support/javascripts/highcharts-more.js +50 -0
- data/lib/spout/support/javascripts/highstock.js +353 -0
- data/lib/spout/support/javascripts/jquery.1.9.1.min.js +5 -0
- data/lib/spout/tasks/engine.rake +6 -0
- data/lib/spout/templates/gitignore +1 -0
- data/lib/spout/version.rb +2 -2
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a0751d0dabac934f14dba3f6a34b7ab0ad181a64
|
4
|
+
data.tar.gz: 5b341a7e4f0c9558c7235a4ed9237ef4815ebe8f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5f58f2ad27f7feebeb7815ff2fc612b0c7f8a4eb2c66033282e42a05296f0c68c9a2e6fa00b2f0f4ae5feae3e1ba54f30c9a8c40bc74843f0c2f314868dabffd
|
7
|
+
data.tar.gz: fc3c91243f3a0fc3518db19e2b0fc543702d1da9826774a7ab521d35595bdcb7f2dcb2b4d3b94aa3d5ef65872a571fda73deba79773fc4862e8c9482ae87179d
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -146,6 +146,17 @@ You can optionally provide a version string
|
|
146
146
|
spout export [1.0.0]
|
147
147
|
```
|
148
148
|
|
149
|
+
### Generate graphs for data in your dataset
|
150
|
+
|
151
|
+
Spout lets you generate graphs for each variable defined in your dataset. Make sure to run `spout coverage` first to validate that your data dictionary and dataset match.
|
152
|
+
|
153
|
+
This command will take some time, and requires [PhantomJS](http://phantomjs.org/) to be installed on your system.
|
154
|
+
|
155
|
+
```
|
156
|
+
spout graphs
|
157
|
+
```
|
158
|
+
|
159
|
+
Generated graphs will be placed in: `./graphs/`
|
149
160
|
|
150
161
|
### Export to the Hybrid Data Dictionary format from your JSON repository
|
151
162
|
|
data/lib/spout/actions.rb
CHANGED
@@ -21,6 +21,8 @@ module Spout
|
|
21
21
|
new_data_dictionary_export(argv, 'hybrid')
|
22
22
|
when 'coverage', '-coverage', '--coverage', 'c', '-c'
|
23
23
|
coverage_report(argv)
|
24
|
+
when 'graphs', '-graphs', '--graphs', 'g', '-g'
|
25
|
+
generate_graphs(argv)
|
24
26
|
else
|
25
27
|
help
|
26
28
|
end
|
@@ -66,6 +68,9 @@ The most common spout commands are:
|
|
66
68
|
dictionary format
|
67
69
|
[c]overage Coverage report, requires dataset CSVs
|
68
70
|
in `<project_name>/csvs/`
|
71
|
+
[g]raphs Generates graphs for each variable in a
|
72
|
+
dataset and places them
|
73
|
+
in `<project_name>/graphs/`
|
69
74
|
[v]ersion Returns the version of Spout
|
70
75
|
|
71
76
|
Commands can be referenced by the first letter:
|
@@ -120,6 +125,10 @@ EOT
|
|
120
125
|
system "bundle exec rake spout:coverage"
|
121
126
|
end
|
122
127
|
|
128
|
+
def generate_graphs(argv)
|
129
|
+
system "bundle exec rake spout:graphs"
|
130
|
+
end
|
131
|
+
|
123
132
|
private
|
124
133
|
|
125
134
|
def copy_file(template_file, file_name = '')
|
@@ -0,0 +1,178 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
module Spout
|
7
|
+
module Commands
|
8
|
+
class Graphs
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
|
12
|
+
total_index_count = Dir.glob("variables/**/*.json").count
|
13
|
+
|
14
|
+
last_completed = 0
|
15
|
+
|
16
|
+
tmp_options_file = File.join( 'graphs', 'options.json' )
|
17
|
+
|
18
|
+
Dir.glob("csvs/*.csv").each do |csv_file|
|
19
|
+
puts "Working on: #{csv_file}"
|
20
|
+
t = Time.now
|
21
|
+
csv_table = CSV.table(csv_file, encoding: 'iso-8859-1').by_col!
|
22
|
+
puts "Loaded #{csv_file} in #{Time.now - t} seconds."
|
23
|
+
|
24
|
+
total_header_count = csv_table.headers.count
|
25
|
+
csv_table.headers.each_with_index do |header, index|
|
26
|
+
puts "Column #{ index + 1 } of #{ total_header_count } for #{header} in #{csv_file}"
|
27
|
+
if variable_file = Dir.glob("variables/**/#{header.downcase}.json", File::FNM_CASEFOLD).first
|
28
|
+
json = JSON.parse(File.read(variable_file)) rescue json = nil
|
29
|
+
next unless json
|
30
|
+
next unless ["choices", "numeric", "integer"].include?(json["type"])
|
31
|
+
|
32
|
+
basename = File.basename(variable_file).gsub(/\.json$/, '').downcase
|
33
|
+
col_data = csv_table[header]
|
34
|
+
|
35
|
+
case json["type"] when "choices"
|
36
|
+
domain_file = Dir.glob("domains/**/#{json['domain']}.json").first
|
37
|
+
domain_json = JSON.parse(File.read(domain_file)) rescue domain_json = nil
|
38
|
+
next unless domain_json
|
39
|
+
|
40
|
+
create_pie_chart_options_file(col_data, tmp_options_file, domain_json)
|
41
|
+
when "numeric", "integer"
|
42
|
+
create_line_chart_options_file(col_data, tmp_options_file, json["units"])
|
43
|
+
else
|
44
|
+
next
|
45
|
+
end
|
46
|
+
|
47
|
+
run_phantom_js("#{basename}-lg.png", 600, tmp_options_file)
|
48
|
+
run_phantom_js("#{basename}.png", 75, tmp_options_file)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
File.delete(tmp_options_file) if File.exists?(tmp_options_file)
|
53
|
+
end
|
54
|
+
|
55
|
+
def graph_values(col_data)
|
56
|
+
categories = []
|
57
|
+
|
58
|
+
col_data = col_data.select{|v| !['', 'null'].include?(v.to_s.strip.downcase)}.collect(&:to_f)
|
59
|
+
|
60
|
+
all_integers = false
|
61
|
+
all_integers = (col_data.count{|i| i.denominator != 1} == 0)
|
62
|
+
|
63
|
+
minimum = col_data.min || 0
|
64
|
+
maximum = col_data.max || 100
|
65
|
+
|
66
|
+
default_max_buckets = 30
|
67
|
+
max_buckets = all_integers ? [maximum - minimum + 1, default_max_buckets].min : default_max_buckets
|
68
|
+
bucket_size = (maximum - minimum + 1).to_f / max_buckets
|
69
|
+
|
70
|
+
(0..(max_buckets-1)).each do |bucket|
|
71
|
+
val_min = (bucket_size * bucket) + minimum
|
72
|
+
val_max = bucket_size * (bucket + 1) + minimum
|
73
|
+
# Greater or equal to val_min, less than val_max
|
74
|
+
# categories << "'#{val_min} to #{val_max}'"
|
75
|
+
categories << "#{all_integers || (maximum - minimum) > (default_max_buckets / 2) ? val_min.round : "%0.02f" % val_min}"
|
76
|
+
end
|
77
|
+
|
78
|
+
new_values = []
|
79
|
+
(0..max_buckets-1).each do |bucket|
|
80
|
+
val_min = (bucket_size * bucket) + minimum
|
81
|
+
val_max = bucket_size * (bucket + 1) + minimum
|
82
|
+
# Greater or equal to val_min, less than val_max
|
83
|
+
new_values << col_data.count{|i| i >= val_min and i < val_max}
|
84
|
+
end
|
85
|
+
|
86
|
+
values = []
|
87
|
+
|
88
|
+
values << { name: '', data: new_values, showInLegend: false }
|
89
|
+
|
90
|
+
[ values, categories ]
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
def create_pie_chart_options_file(values, options_file, domain_json)
|
95
|
+
|
96
|
+
values.select!{|v| !['', 'null'].include?(v.to_s.strip.downcase) }
|
97
|
+
counts = values.group_by{|a| a}.collect{|k,v| [(domain_json.select{|h| h['value'] == k.to_s}.first['display_name'] rescue (k.to_s == '' ? 'NULL' : k)), v.count]}
|
98
|
+
|
99
|
+
total_count = counts.collect(&:last).inject(&:+)
|
100
|
+
|
101
|
+
data = counts.collect{|value, count| [value, (count * 100.0 / total_count)]}
|
102
|
+
|
103
|
+
File.open(options_file, "w") do |outfile|
|
104
|
+
outfile.puts <<-eos
|
105
|
+
{
|
106
|
+
"title": {
|
107
|
+
"text": ""
|
108
|
+
},
|
109
|
+
|
110
|
+
"credits": {
|
111
|
+
"enabled": false,
|
112
|
+
},
|
113
|
+
"series": [{
|
114
|
+
"type": "pie",
|
115
|
+
"name": "",
|
116
|
+
"data": #{data.to_json}
|
117
|
+
}]
|
118
|
+
}
|
119
|
+
eos
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
def create_line_chart_options_file(values, options_file, units)
|
125
|
+
( series, categories ) = graph_values(values)
|
126
|
+
|
127
|
+
File.open(options_file, "w") do |outfile|
|
128
|
+
outfile.puts <<-eos
|
129
|
+
{
|
130
|
+
"chart": {
|
131
|
+
"type": "areaspline"
|
132
|
+
},
|
133
|
+
"title": {
|
134
|
+
"text": ""
|
135
|
+
},
|
136
|
+
"credits": {
|
137
|
+
"enabled": false,
|
138
|
+
},
|
139
|
+
"xAxis": {
|
140
|
+
"categories": #{categories.to_json},
|
141
|
+
"labels": {
|
142
|
+
"step": #{(categories.size.to_f / 12).ceil}
|
143
|
+
},
|
144
|
+
"title": {
|
145
|
+
"text": #{units.to_json}
|
146
|
+
}
|
147
|
+
},
|
148
|
+
"yAxis": {
|
149
|
+
"maxPadding": 0,
|
150
|
+
"minPadding": 0,
|
151
|
+
"title": {
|
152
|
+
"text": "Count"
|
153
|
+
}
|
154
|
+
},
|
155
|
+
"series": #{series.to_json}
|
156
|
+
}
|
157
|
+
eos
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def run_phantom_js(png_name, width, tmp_options_file)
|
162
|
+
graph_path = File.join(Dir.pwd, 'graphs', png_name)
|
163
|
+
directory = File.join( File.dirname(__FILE__), '..', 'support', 'javascripts' )
|
164
|
+
|
165
|
+
open_command = if RUBY_PLATFORM.match(/mingw/) != nil
|
166
|
+
'phantomjs.exe'
|
167
|
+
else
|
168
|
+
'phantomjs'
|
169
|
+
end
|
170
|
+
|
171
|
+
phantomjs_command = "#{open_command} #{directory}/highcharts-convert.js -infile #{tmp_options_file} -outfile #{graph_path} -scale 2.5 -width #{width} -constr Chart"
|
172
|
+
# puts phantomjs_command
|
173
|
+
`#{phantomjs_command}`
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
/*
|
2
|
+
Data plugin for Highcharts
|
3
|
+
|
4
|
+
(c) 2012-2013 Torstein Hønsi
|
5
|
+
Last revision 2013-06-07
|
6
|
+
|
7
|
+
License: www.highcharts.com/license
|
8
|
+
*/
|
9
|
+
(function(h){var k=h.each,m=function(b,a){this.init(b,a)};h.extend(m.prototype,{init:function(b,a){this.options=b;this.chartOptions=a;this.columns=b.columns||this.rowsToColumns(b.rows)||[];this.columns.length?this.dataFound():(this.parseCSV(),this.parseTable(),this.parseGoogleSpreadsheet())},getColumnDistribution:function(){var b=this.chartOptions,a=b&&b.chart&&b.chart.type,c=[];k(b&&b.series||[],function(b){c.push((h.seriesTypes[b.type||a||"line"].prototype.pointArrayMap||[0]).length)});this.valueCount=
|
10
|
+
{global:(h.seriesTypes[a||"line"].prototype.pointArrayMap||[0]).length,individual:c}},dataFound:function(){this.parseTypes();this.findHeaderRow();this.parsed();this.complete()},parseCSV:function(){var b=this,a=this.options,c=a.csv,d=this.columns,f=a.startRow||0,i=a.endRow||Number.MAX_VALUE,j=a.startColumn||0,e=a.endColumn||Number.MAX_VALUE,g=0;c&&(c=c.replace(/\r\n/g,"\n").replace(/\r/g,"\n").split(a.lineDelimiter||"\n"),k(c,function(c,h){var n=b.trim(c),p=n.indexOf("#")===0;h>=f&&h<=i&&!p&&n!==""&&
|
11
|
+
(n=c.split(a.itemDelimiter||","),k(n,function(b,a){a>=j&&a<=e&&(d[a-j]||(d[a-j]=[]),d[a-j][g]=b)}),g+=1)}),this.dataFound())},parseTable:function(){var b=this.options,a=b.table,c=this.columns,d=b.startRow||0,f=b.endRow||Number.MAX_VALUE,i=b.startColumn||0,j=b.endColumn||Number.MAX_VALUE,e;a&&(typeof a==="string"&&(a=document.getElementById(a)),k(a.getElementsByTagName("tr"),function(a,b){e=0;b>=d&&b<=f&&k(a.childNodes,function(a){if((a.tagName==="TD"||a.tagName==="TH")&&e>=i&&e<=j)c[e]||(c[e]=[]),
|
12
|
+
c[e][b-d]=a.innerHTML,e+=1})}),this.dataFound())},parseGoogleSpreadsheet:function(){var b=this,a=this.options,c=a.googleSpreadsheetKey,d=this.columns,f=a.startRow||0,i=a.endRow||Number.MAX_VALUE,j=a.startColumn||0,e=a.endColumn||Number.MAX_VALUE,g,h;c&&jQuery.getJSON("https://spreadsheets.google.com/feeds/cells/"+c+"/"+(a.googleSpreadsheetWorksheet||"od6")+"/public/values?alt=json-in-script&callback=?",function(a){var a=a.feed.entry,c,k=a.length,m=0,o=0,l;for(l=0;l<k;l++)c=a[l],m=Math.max(m,c.gs$cell.col),
|
13
|
+
o=Math.max(o,c.gs$cell.row);for(l=0;l<m;l++)if(l>=j&&l<=e)d[l-j]=[],d[l-j].length=Math.min(o,i-f);for(l=0;l<k;l++)if(c=a[l],g=c.gs$cell.row-1,h=c.gs$cell.col-1,h>=j&&h<=e&&g>=f&&g<=i)d[h-j][g-f]=c.content.$t;b.dataFound()})},findHeaderRow:function(){k(this.columns,function(){});this.headerRow=0},trim:function(b){return typeof b==="string"?b.replace(/^\s+|\s+$/g,""):b},parseTypes:function(){for(var b=this.columns,a=b.length,c,d,f,i;a--;)for(c=b[a].length;c--;)d=b[a][c],f=parseFloat(d),i=this.trim(d),
|
14
|
+
i==f?(b[a][c]=f,f>31536E6?b[a].isDatetime=!0:b[a].isNumeric=!0):(d=this.parseDate(d),a===0&&typeof d==="number"&&!isNaN(d)?(b[a][c]=d,b[a].isDatetime=!0):b[a][c]=i===""?null:i)},dateFormats:{"YYYY-mm-dd":{regex:"^([0-9]{4})-([0-9]{2})-([0-9]{2})$",parser:function(b){return Date.UTC(+b[1],b[2]-1,+b[3])}}},parseDate:function(b){var a=this.options.parseDate,c,d,f;a&&(c=a(b));if(typeof b==="string")for(d in this.dateFormats)a=this.dateFormats[d],(f=b.match(a.regex))&&(c=a.parser(f));return c},rowsToColumns:function(b){var a,
|
15
|
+
c,d,f,i;if(b){i=[];c=b.length;for(a=0;a<c;a++){f=b[a].length;for(d=0;d<f;d++)i[d]||(i[d]=[]),i[d][a]=b[a][d]}}return i},parsed:function(){this.options.parsed&&this.options.parsed.call(this,this.columns)},complete:function(){var b=this.columns,a,c,d=this.options,f,i,j,e,g,k;if(d.complete){this.getColumnDistribution();b.length>1&&(a=b.shift(),this.headerRow===0&&a.shift(),a.isDatetime?c="datetime":a.isNumeric||(c="category"));for(e=0;e<b.length;e++)if(this.headerRow===0)b[e].name=b[e].shift();i=[];
|
16
|
+
for(e=0,k=0;e<b.length;k++){f=h.pick(this.valueCount.individual[k],this.valueCount.global);j=[];for(g=0;g<b[e].length;g++)j[g]=[a[g],b[e][g]!==void 0?b[e][g]:null],f>1&&j[g].push(b[e+1][g]!==void 0?b[e+1][g]:null),f>2&&j[g].push(b[e+2][g]!==void 0?b[e+2][g]:null),f>3&&j[g].push(b[e+3][g]!==void 0?b[e+3][g]:null),f>4&&j[g].push(b[e+4][g]!==void 0?b[e+4][g]:null);i[k]={name:b[e].name,data:j};e+=f}d.complete({xAxis:{type:c},series:i})}}});h.Data=m;h.data=function(b,a){return new m(b,a)};h.wrap(h.Chart.prototype,
|
17
|
+
"init",function(b,a,c){var d=this;a&&a.data?h.data(h.extend(a.data,{complete:function(f){a.series&&k(a.series,function(b,c){a.series[c]=h.merge(b,f.series[c])});a=h.merge(f,a);b.call(d,a,c)}}),a):b.call(d,a,c)})})(Highcharts);
|