spout 0.6.0 → 0.7.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2ced3e644fac755624b158cc68ca0fc305c2ee58
4
- data.tar.gz: 91b6e4f0c6974794664d2a61b32b6c3c0faeff48
3
+ metadata.gz: a0751d0dabac934f14dba3f6a34b7ab0ad181a64
4
+ data.tar.gz: 5b341a7e4f0c9558c7235a4ed9237ef4815ebe8f
5
5
  SHA512:
6
- metadata.gz: b59ef048213f41475dbf1472882b89f6642f98d92b49d9d3fb492bcc68615d4dcaa84fd7487d8d3c97a2d4de397bc41ab8fdeb0825d542b6ce2d247e3ff147e2
7
- data.tar.gz: 8e3e4d94be396306ba4aa40371e204af0ca57825badc3b3f526ef865a66fbcbefe3f337801daa63222d1fc1f9fd23f5748b593f61c03adc345ab3960cf3c0526
6
+ metadata.gz: 5f58f2ad27f7feebeb7815ff2fc612b0c7f8a4eb2c66033282e42a05296f0c68c9a2e6fa00b2f0f4ae5feae3e1ba54f30c9a8c40bc74843f0c2f314868dabffd
7
+ data.tar.gz: fc3c91243f3a0fc3518db19e2b0fc543702d1da9826774a7ab521d35595bdcb7f2dcb2b4d3b94aa3d5ef65872a571fda73deba79773fc4862e8c9482ae87179d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.7.0
2
+
3
+ ### Enhancements
4
+ - Added `spout graphs` command that generates pie charts and histograms of each variable in a dataset
5
+
1
6
  ## 0.6.0 (March 7, 2014)
2
7
 
3
8
  ### Enhancement
data/README.md CHANGED
@@ -146,6 +146,17 @@ You can optionally provide a version string
146
146
  spout export [1.0.0]
147
147
  ```
148
148
 
149
+ ### Generate graphs for data in your dataset
150
+
151
+ Spout lets you generate graphs for each variable defined in your dataset. Make sure to run `spout coverage` first to validate that your data dictionary and dataset match.
152
+
153
+ This command will take some time, and requires [PhantomJS](http://phantomjs.org/) to be installed on your system.
154
+
155
+ ```
156
+ spout graphs
157
+ ```
158
+
159
+ Generated graphs will be placed in: `./graphs/`
149
160
 
150
161
  ### Export to the Hybrid Data Dictionary format from your JSON repository
151
162
 
data/lib/spout/actions.rb CHANGED
@@ -21,6 +21,8 @@ module Spout
21
21
  new_data_dictionary_export(argv, 'hybrid')
22
22
  when 'coverage', '-coverage', '--coverage', 'c', '-c'
23
23
  coverage_report(argv)
24
+ when 'graphs', '-graphs', '--graphs', 'g', '-g'
25
+ generate_graphs(argv)
24
26
  else
25
27
  help
26
28
  end
@@ -66,6 +68,9 @@ The most common spout commands are:
66
68
  dictionary format
67
69
  [c]overage Coverage report, requires dataset CSVs
68
70
  in `<project_name>/csvs/`
71
+ [g]raphs Generates graphs for each variable in a
72
+ dataset and places them
73
+ in `<project_name>/graphs/`
69
74
  [v]ersion Returns the version of Spout
70
75
 
71
76
  Commands can be referenced by the first letter:
@@ -120,6 +125,10 @@ EOT
120
125
  system "bundle exec rake spout:coverage"
121
126
  end
122
127
 
128
+ def generate_graphs(argv)
129
+ system "bundle exec rake spout:graphs"
130
+ end
131
+
123
132
  private
124
133
 
125
134
  def copy_file(template_file, file_name = '')
@@ -0,0 +1,178 @@
1
+ require 'csv'
2
+ require 'fileutils'
3
+ require 'rubygems'
4
+ require 'json'
5
+
6
+ module Spout
7
+ module Commands
8
+ class Graphs
9
+
10
+ def initialize
11
+
12
+ total_index_count = Dir.glob("variables/**/*.json").count
13
+
14
+ last_completed = 0
15
+
16
+ tmp_options_file = File.join( 'graphs', 'options.json' )
17
+
18
+ Dir.glob("csvs/*.csv").each do |csv_file|
19
+ puts "Working on: #{csv_file}"
20
+ t = Time.now
21
+ csv_table = CSV.table(csv_file, encoding: 'iso-8859-1').by_col!
22
+ puts "Loaded #{csv_file} in #{Time.now - t} seconds."
23
+
24
+ total_header_count = csv_table.headers.count
25
+ csv_table.headers.each_with_index do |header, index|
26
+ puts "Column #{ index + 1 } of #{ total_header_count } for #{header} in #{csv_file}"
27
+ if variable_file = Dir.glob("variables/**/#{header.downcase}.json", File::FNM_CASEFOLD).first
28
+ json = JSON.parse(File.read(variable_file)) rescue json = nil
29
+ next unless json
30
+ next unless ["choices", "numeric", "integer"].include?(json["type"])
31
+
32
+ basename = File.basename(variable_file).gsub(/\.json$/, '').downcase
33
+ col_data = csv_table[header]
34
+
35
+ case json["type"] when "choices"
36
+ domain_file = Dir.glob("domains/**/#{json['domain']}.json").first
37
+ domain_json = JSON.parse(File.read(domain_file)) rescue domain_json = nil
38
+ next unless domain_json
39
+
40
+ create_pie_chart_options_file(col_data, tmp_options_file, domain_json)
41
+ when "numeric", "integer"
42
+ create_line_chart_options_file(col_data, tmp_options_file, json["units"])
43
+ else
44
+ next
45
+ end
46
+
47
+ run_phantom_js("#{basename}-lg.png", 600, tmp_options_file)
48
+ run_phantom_js("#{basename}.png", 75, tmp_options_file)
49
+ end
50
+ end
51
+ end
52
+ File.delete(tmp_options_file) if File.exists?(tmp_options_file)
53
+ end
54
+
55
+ def graph_values(col_data)
56
+ categories = []
57
+
58
+ col_data = col_data.select{|v| !['', 'null'].include?(v.to_s.strip.downcase)}.collect(&:to_f)
59
+
60
+ all_integers = false
61
+ all_integers = (col_data.count{|i| i.denominator != 1} == 0)
62
+
63
+ minimum = col_data.min || 0
64
+ maximum = col_data.max || 100
65
+
66
+ default_max_buckets = 30
67
+ max_buckets = all_integers ? [maximum - minimum + 1, default_max_buckets].min : default_max_buckets
68
+ bucket_size = (maximum - minimum + 1).to_f / max_buckets
69
+
70
+ (0..(max_buckets-1)).each do |bucket|
71
+ val_min = (bucket_size * bucket) + minimum
72
+ val_max = bucket_size * (bucket + 1) + minimum
73
+ # Greater or equal to val_min, less than val_max
74
+ # categories << "'#{val_min} to #{val_max}'"
75
+ categories << "#{all_integers || (maximum - minimum) > (default_max_buckets / 2) ? val_min.round : "%0.02f" % val_min}"
76
+ end
77
+
78
+ new_values = []
79
+ (0..max_buckets-1).each do |bucket|
80
+ val_min = (bucket_size * bucket) + minimum
81
+ val_max = bucket_size * (bucket + 1) + minimum
82
+ # Greater or equal to val_min, less than val_max
83
+ new_values << col_data.count{|i| i >= val_min and i < val_max}
84
+ end
85
+
86
+ values = []
87
+
88
+ values << { name: '', data: new_values, showInLegend: false }
89
+
90
+ [ values, categories ]
91
+ end
92
+
93
+
94
+ def create_pie_chart_options_file(values, options_file, domain_json)
95
+
96
+ values.select!{|v| !['', 'null'].include?(v.to_s.strip.downcase) }
97
+ counts = values.group_by{|a| a}.collect{|k,v| [(domain_json.select{|h| h['value'] == k.to_s}.first['display_name'] rescue (k.to_s == '' ? 'NULL' : k)), v.count]}
98
+
99
+ total_count = counts.collect(&:last).inject(&:+)
100
+
101
+ data = counts.collect{|value, count| [value, (count * 100.0 / total_count)]}
102
+
103
+ File.open(options_file, "w") do |outfile|
104
+ outfile.puts <<-eos
105
+ {
106
+ "title": {
107
+ "text": ""
108
+ },
109
+
110
+ "credits": {
111
+ "enabled": false,
112
+ },
113
+ "series": [{
114
+ "type": "pie",
115
+ "name": "",
116
+ "data": #{data.to_json}
117
+ }]
118
+ }
119
+ eos
120
+ end
121
+ end
122
+
123
+
124
+ def create_line_chart_options_file(values, options_file, units)
125
+ ( series, categories ) = graph_values(values)
126
+
127
+ File.open(options_file, "w") do |outfile|
128
+ outfile.puts <<-eos
129
+ {
130
+ "chart": {
131
+ "type": "areaspline"
132
+ },
133
+ "title": {
134
+ "text": ""
135
+ },
136
+ "credits": {
137
+ "enabled": false,
138
+ },
139
+ "xAxis": {
140
+ "categories": #{categories.to_json},
141
+ "labels": {
142
+ "step": #{(categories.size.to_f / 12).ceil}
143
+ },
144
+ "title": {
145
+ "text": #{units.to_json}
146
+ }
147
+ },
148
+ "yAxis": {
149
+ "maxPadding": 0,
150
+ "minPadding": 0,
151
+ "title": {
152
+ "text": "Count"
153
+ }
154
+ },
155
+ "series": #{series.to_json}
156
+ }
157
+ eos
158
+ end
159
+ end
160
+
161
+ def run_phantom_js(png_name, width, tmp_options_file)
162
+ graph_path = File.join(Dir.pwd, 'graphs', png_name)
163
+ directory = File.join( File.dirname(__FILE__), '..', 'support', 'javascripts' )
164
+
165
+ open_command = if RUBY_PLATFORM.match(/mingw/) != nil
166
+ 'phantomjs.exe'
167
+ else
168
+ 'phantomjs'
169
+ end
170
+
171
+ phantomjs_command = "#{open_command} #{directory}/highcharts-convert.js -infile #{tmp_options_file} -outfile #{graph_path} -scale 2.5 -width #{width} -constr Chart"
172
+ # puts phantomjs_command
173
+ `#{phantomjs_command}`
174
+ end
175
+
176
+ end
177
+ end
178
+ end
@@ -0,0 +1,17 @@
1
+ /*
2
+ Data plugin for Highcharts
3
+
4
+ (c) 2012-2013 Torstein Hønsi
5
+ Last revision 2013-06-07
6
+
7
+ License: www.highcharts.com/license
8
+ */
9
+ (function(h){var k=h.each,m=function(b,a){this.init(b,a)};h.extend(m.prototype,{init:function(b,a){this.options=b;this.chartOptions=a;this.columns=b.columns||this.rowsToColumns(b.rows)||[];this.columns.length?this.dataFound():(this.parseCSV(),this.parseTable(),this.parseGoogleSpreadsheet())},getColumnDistribution:function(){var b=this.chartOptions,a=b&&b.chart&&b.chart.type,c=[];k(b&&b.series||[],function(b){c.push((h.seriesTypes[b.type||a||"line"].prototype.pointArrayMap||[0]).length)});this.valueCount=
10
+ {global:(h.seriesTypes[a||"line"].prototype.pointArrayMap||[0]).length,individual:c}},dataFound:function(){this.parseTypes();this.findHeaderRow();this.parsed();this.complete()},parseCSV:function(){var b=this,a=this.options,c=a.csv,d=this.columns,f=a.startRow||0,i=a.endRow||Number.MAX_VALUE,j=a.startColumn||0,e=a.endColumn||Number.MAX_VALUE,g=0;c&&(c=c.replace(/\r\n/g,"\n").replace(/\r/g,"\n").split(a.lineDelimiter||"\n"),k(c,function(c,h){var n=b.trim(c),p=n.indexOf("#")===0;h>=f&&h<=i&&!p&&n!==""&&
11
+ (n=c.split(a.itemDelimiter||","),k(n,function(b,a){a>=j&&a<=e&&(d[a-j]||(d[a-j]=[]),d[a-j][g]=b)}),g+=1)}),this.dataFound())},parseTable:function(){var b=this.options,a=b.table,c=this.columns,d=b.startRow||0,f=b.endRow||Number.MAX_VALUE,i=b.startColumn||0,j=b.endColumn||Number.MAX_VALUE,e;a&&(typeof a==="string"&&(a=document.getElementById(a)),k(a.getElementsByTagName("tr"),function(a,b){e=0;b>=d&&b<=f&&k(a.childNodes,function(a){if((a.tagName==="TD"||a.tagName==="TH")&&e>=i&&e<=j)c[e]||(c[e]=[]),
12
+ c[e][b-d]=a.innerHTML,e+=1})}),this.dataFound())},parseGoogleSpreadsheet:function(){var b=this,a=this.options,c=a.googleSpreadsheetKey,d=this.columns,f=a.startRow||0,i=a.endRow||Number.MAX_VALUE,j=a.startColumn||0,e=a.endColumn||Number.MAX_VALUE,g,h;c&&jQuery.getJSON("https://spreadsheets.google.com/feeds/cells/"+c+"/"+(a.googleSpreadsheetWorksheet||"od6")+"/public/values?alt=json-in-script&callback=?",function(a){var a=a.feed.entry,c,k=a.length,m=0,o=0,l;for(l=0;l<k;l++)c=a[l],m=Math.max(m,c.gs$cell.col),
13
+ o=Math.max(o,c.gs$cell.row);for(l=0;l<m;l++)if(l>=j&&l<=e)d[l-j]=[],d[l-j].length=Math.min(o,i-f);for(l=0;l<k;l++)if(c=a[l],g=c.gs$cell.row-1,h=c.gs$cell.col-1,h>=j&&h<=e&&g>=f&&g<=i)d[h-j][g-f]=c.content.$t;b.dataFound()})},findHeaderRow:function(){k(this.columns,function(){});this.headerRow=0},trim:function(b){return typeof b==="string"?b.replace(/^\s+|\s+$/g,""):b},parseTypes:function(){for(var b=this.columns,a=b.length,c,d,f,i;a--;)for(c=b[a].length;c--;)d=b[a][c],f=parseFloat(d),i=this.trim(d),
14
+ i==f?(b[a][c]=f,f>31536E6?b[a].isDatetime=!0:b[a].isNumeric=!0):(d=this.parseDate(d),a===0&&typeof d==="number"&&!isNaN(d)?(b[a][c]=d,b[a].isDatetime=!0):b[a][c]=i===""?null:i)},dateFormats:{"YYYY-mm-dd":{regex:"^([0-9]{4})-([0-9]{2})-([0-9]{2})$",parser:function(b){return Date.UTC(+b[1],b[2]-1,+b[3])}}},parseDate:function(b){var a=this.options.parseDate,c,d,f;a&&(c=a(b));if(typeof b==="string")for(d in this.dateFormats)a=this.dateFormats[d],(f=b.match(a.regex))&&(c=a.parser(f));return c},rowsToColumns:function(b){var a,
15
+ c,d,f,i;if(b){i=[];c=b.length;for(a=0;a<c;a++){f=b[a].length;for(d=0;d<f;d++)i[d]||(i[d]=[]),i[d][a]=b[a][d]}}return i},parsed:function(){this.options.parsed&&this.options.parsed.call(this,this.columns)},complete:function(){var b=this.columns,a,c,d=this.options,f,i,j,e,g,k;if(d.complete){this.getColumnDistribution();b.length>1&&(a=b.shift(),this.headerRow===0&&a.shift(),a.isDatetime?c="datetime":a.isNumeric||(c="category"));for(e=0;e<b.length;e++)if(this.headerRow===0)b[e].name=b[e].shift();i=[];
16
+ for(e=0,k=0;e<b.length;k++){f=h.pick(this.valueCount.individual[k],this.valueCount.global);j=[];for(g=0;g<b[e].length;g++)j[g]=[a[g],b[e][g]!==void 0?b[e][g]:null],f>1&&j[g].push(b[e+1][g]!==void 0?b[e+1][g]:null),f>2&&j[g].push(b[e+2][g]!==void 0?b[e+2][g]:null),f>3&&j[g].push(b[e+3][g]!==void 0?b[e+3][g]:null),f>4&&j[g].push(b[e+4][g]!==void 0?b[e+4][g]:null);i[k]={name:b[e].name,data:j};e+=f}d.complete({xAxis:{type:c},series:i})}}});h.Data=m;h.data=function(b,a){return new m(b,a)};h.wrap(h.Chart.prototype,
17
+ "init",function(b,a,c){var d=this;a&&a.data?h.data(h.extend(a.data,{complete:function(f){a.series&&k(a.series,function(b,c){a.series[c]=h.merge(b,f.series[c])});a=h.merge(f,a);b.call(d,a,c)}}),a):b.call(d,a,c)})})(Highcharts);