spout 0.6.0 → 0.7.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2ced3e644fac755624b158cc68ca0fc305c2ee58
4
- data.tar.gz: 91b6e4f0c6974794664d2a61b32b6c3c0faeff48
3
+ metadata.gz: a0751d0dabac934f14dba3f6a34b7ab0ad181a64
4
+ data.tar.gz: 5b341a7e4f0c9558c7235a4ed9237ef4815ebe8f
5
5
  SHA512:
6
- metadata.gz: b59ef048213f41475dbf1472882b89f6642f98d92b49d9d3fb492bcc68615d4dcaa84fd7487d8d3c97a2d4de397bc41ab8fdeb0825d542b6ce2d247e3ff147e2
7
- data.tar.gz: 8e3e4d94be396306ba4aa40371e204af0ca57825badc3b3f526ef865a66fbcbefe3f337801daa63222d1fc1f9fd23f5748b593f61c03adc345ab3960cf3c0526
6
+ metadata.gz: 5f58f2ad27f7feebeb7815ff2fc612b0c7f8a4eb2c66033282e42a05296f0c68c9a2e6fa00b2f0f4ae5feae3e1ba54f30c9a8c40bc74843f0c2f314868dabffd
7
+ data.tar.gz: fc3c91243f3a0fc3518db19e2b0fc543702d1da9826774a7ab521d35595bdcb7f2dcb2b4d3b94aa3d5ef65872a571fda73deba79773fc4862e8c9482ae87179d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.7.0
2
+
3
+ ### Enhancements
4
+ - Added `spout graphs` command that generates pie charts and histograms of each variable in a dataset
5
+
1
6
  ## 0.6.0 (March 7, 2014)
2
7
 
3
8
  ### Enhancement
data/README.md CHANGED
@@ -146,6 +146,17 @@ You can optionally provide a version string
146
146
  spout export [1.0.0]
147
147
  ```
148
148
 
149
+ ### Generate graphs for data in your dataset
150
+
151
+ Spout lets you generate graphs for each variable defined in your dataset. Make sure to run `spout coverage` first to validate that your data dictionary and dataset match.
152
+
153
+ This command will take some time, and requires [PhantomJS](http://phantomjs.org/) to be installed on your system.
154
+
155
+ ```
156
+ spout graphs
157
+ ```
158
+
159
+ Generated graphs will be placed in: `./graphs/`
149
160
 
150
161
  ### Export to the Hybrid Data Dictionary format from your JSON repository
151
162
 
data/lib/spout/actions.rb CHANGED
@@ -21,6 +21,8 @@ module Spout
21
21
  new_data_dictionary_export(argv, 'hybrid')
22
22
  when 'coverage', '-coverage', '--coverage', 'c', '-c'
23
23
  coverage_report(argv)
24
+ when 'graphs', '-graphs', '--graphs', 'g', '-g'
25
+ generate_graphs(argv)
24
26
  else
25
27
  help
26
28
  end
@@ -66,6 +68,9 @@ The most common spout commands are:
66
68
  dictionary format
67
69
  [c]overage Coverage report, requires dataset CSVs
68
70
  in `<project_name>/csvs/`
71
+ [g]raphs Generates graphs for each variable in a
72
+ dataset and places them
73
+ in `<project_name>/graphs/`
69
74
  [v]ersion Returns the version of Spout
70
75
 
71
76
  Commands can be referenced by the first letter:
@@ -120,6 +125,10 @@ EOT
120
125
  system "bundle exec rake spout:coverage"
121
126
  end
122
127
 
128
+ def generate_graphs(argv)
129
+ system "bundle exec rake spout:graphs"
130
+ end
131
+
123
132
  private
124
133
 
125
134
  def copy_file(template_file, file_name = '')
@@ -0,0 +1,178 @@
1
+ require 'csv'
2
+ require 'fileutils'
3
+ require 'rubygems'
4
+ require 'json'
5
+
6
+ module Spout
7
+ module Commands
8
+ class Graphs
9
+
10
+ def initialize
11
+
12
+ total_index_count = Dir.glob("variables/**/*.json").count
13
+
14
+ last_completed = 0
15
+
16
+ tmp_options_file = File.join( 'graphs', 'options.json' )
17
+
18
+ Dir.glob("csvs/*.csv").each do |csv_file|
19
+ puts "Working on: #{csv_file}"
20
+ t = Time.now
21
+ csv_table = CSV.table(csv_file, encoding: 'iso-8859-1').by_col!
22
+ puts "Loaded #{csv_file} in #{Time.now - t} seconds."
23
+
24
+ total_header_count = csv_table.headers.count
25
+ csv_table.headers.each_with_index do |header, index|
26
+ puts "Column #{ index + 1 } of #{ total_header_count } for #{header} in #{csv_file}"
27
+ if variable_file = Dir.glob("variables/**/#{header.downcase}.json", File::FNM_CASEFOLD).first
28
+ json = JSON.parse(File.read(variable_file)) rescue json = nil
29
+ next unless json
30
+ next unless ["choices", "numeric", "integer"].include?(json["type"])
31
+
32
+ basename = File.basename(variable_file).gsub(/\.json$/, '').downcase
33
+ col_data = csv_table[header]
34
+
35
+ case json["type"] when "choices"
36
+ domain_file = Dir.glob("domains/**/#{json['domain']}.json").first
37
+ domain_json = JSON.parse(File.read(domain_file)) rescue domain_json = nil
38
+ next unless domain_json
39
+
40
+ create_pie_chart_options_file(col_data, tmp_options_file, domain_json)
41
+ when "numeric", "integer"
42
+ create_line_chart_options_file(col_data, tmp_options_file, json["units"])
43
+ else
44
+ next
45
+ end
46
+
47
+ run_phantom_js("#{basename}-lg.png", 600, tmp_options_file)
48
+ run_phantom_js("#{basename}.png", 75, tmp_options_file)
49
+ end
50
+ end
51
+ end
52
+ File.delete(tmp_options_file) if File.exists?(tmp_options_file)
53
+ end
54
+
55
+ def graph_values(col_data)
56
+ categories = []
57
+
58
+ col_data = col_data.select{|v| !['', 'null'].include?(v.to_s.strip.downcase)}.collect(&:to_f)
59
+
60
+ all_integers = false
61
+ all_integers = (col_data.count{|i| i.denominator != 1} == 0)
62
+
63
+ minimum = col_data.min || 0
64
+ maximum = col_data.max || 100
65
+
66
+ default_max_buckets = 30
67
+ max_buckets = all_integers ? [maximum - minimum + 1, default_max_buckets].min : default_max_buckets
68
+ bucket_size = (maximum - minimum + 1).to_f / max_buckets
69
+
70
+ (0..(max_buckets-1)).each do |bucket|
71
+ val_min = (bucket_size * bucket) + minimum
72
+ val_max = bucket_size * (bucket + 1) + minimum
73
+ # Greater or equal to val_min, less than val_max
74
+ # categories << "'#{val_min} to #{val_max}'"
75
+ categories << "#{all_integers || (maximum - minimum) > (default_max_buckets / 2) ? val_min.round : "%0.02f" % val_min}"
76
+ end
77
+
78
+ new_values = []
79
+ (0..max_buckets-1).each do |bucket|
80
+ val_min = (bucket_size * bucket) + minimum
81
+ val_max = bucket_size * (bucket + 1) + minimum
82
+ # Greater or equal to val_min, less than val_max
83
+ new_values << col_data.count{|i| i >= val_min and i < val_max}
84
+ end
85
+
86
+ values = []
87
+
88
+ values << { name: '', data: new_values, showInLegend: false }
89
+
90
+ [ values, categories ]
91
+ end
92
+
93
+
94
+ def create_pie_chart_options_file(values, options_file, domain_json)
95
+
96
+ values.select!{|v| !['', 'null'].include?(v.to_s.strip.downcase) }
97
+ counts = values.group_by{|a| a}.collect{|k,v| [(domain_json.select{|h| h['value'] == k.to_s}.first['display_name'] rescue (k.to_s == '' ? 'NULL' : k)), v.count]}
98
+
99
+ total_count = counts.collect(&:last).inject(&:+)
100
+
101
+ data = counts.collect{|value, count| [value, (count * 100.0 / total_count)]}
102
+
103
+ File.open(options_file, "w") do |outfile|
104
+ outfile.puts <<-eos
105
+ {
106
+ "title": {
107
+ "text": ""
108
+ },
109
+
110
+ "credits": {
111
+ "enabled": false,
112
+ },
113
+ "series": [{
114
+ "type": "pie",
115
+ "name": "",
116
+ "data": #{data.to_json}
117
+ }]
118
+ }
119
+ eos
120
+ end
121
+ end
122
+
123
+
124
+ def create_line_chart_options_file(values, options_file, units)
125
+ ( series, categories ) = graph_values(values)
126
+
127
+ File.open(options_file, "w") do |outfile|
128
+ outfile.puts <<-eos
129
+ {
130
+ "chart": {
131
+ "type": "areaspline"
132
+ },
133
+ "title": {
134
+ "text": ""
135
+ },
136
+ "credits": {
137
+ "enabled": false,
138
+ },
139
+ "xAxis": {
140
+ "categories": #{categories.to_json},
141
+ "labels": {
142
+ "step": #{(categories.size.to_f / 12).ceil}
143
+ },
144
+ "title": {
145
+ "text": #{units.to_json}
146
+ }
147
+ },
148
+ "yAxis": {
149
+ "maxPadding": 0,
150
+ "minPadding": 0,
151
+ "title": {
152
+ "text": "Count"
153
+ }
154
+ },
155
+ "series": #{series.to_json}
156
+ }
157
+ eos
158
+ end
159
+ end
160
+
161
+ def run_phantom_js(png_name, width, tmp_options_file)
162
+ graph_path = File.join(Dir.pwd, 'graphs', png_name)
163
+ directory = File.join( File.dirname(__FILE__), '..', 'support', 'javascripts' )
164
+
165
+ open_command = if RUBY_PLATFORM.match(/mingw/) != nil
166
+ 'phantomjs.exe'
167
+ else
168
+ 'phantomjs'
169
+ end
170
+
171
+ phantomjs_command = "#{open_command} #{directory}/highcharts-convert.js -infile #{tmp_options_file} -outfile #{graph_path} -scale 2.5 -width #{width} -constr Chart"
172
+ # puts phantomjs_command
173
+ `#{phantomjs_command}`
174
+ end
175
+
176
+ end
177
+ end
178
+ end
@@ -0,0 +1,17 @@
1
+ /*
2
+ Data plugin for Highcharts
3
+
4
+ (c) 2012-2013 Torstein Hønsi
5
+ Last revision 2013-06-07
6
+
7
+ License: www.highcharts.com/license
8
+ */
9
+ (function(h){var k=h.each,m=function(b,a){this.init(b,a)};h.extend(m.prototype,{init:function(b,a){this.options=b;this.chartOptions=a;this.columns=b.columns||this.rowsToColumns(b.rows)||[];this.columns.length?this.dataFound():(this.parseCSV(),this.parseTable(),this.parseGoogleSpreadsheet())},getColumnDistribution:function(){var b=this.chartOptions,a=b&&b.chart&&b.chart.type,c=[];k(b&&b.series||[],function(b){c.push((h.seriesTypes[b.type||a||"line"].prototype.pointArrayMap||[0]).length)});this.valueCount=
10
+ {global:(h.seriesTypes[a||"line"].prototype.pointArrayMap||[0]).length,individual:c}},dataFound:function(){this.parseTypes();this.findHeaderRow();this.parsed();this.complete()},parseCSV:function(){var b=this,a=this.options,c=a.csv,d=this.columns,f=a.startRow||0,i=a.endRow||Number.MAX_VALUE,j=a.startColumn||0,e=a.endColumn||Number.MAX_VALUE,g=0;c&&(c=c.replace(/\r\n/g,"\n").replace(/\r/g,"\n").split(a.lineDelimiter||"\n"),k(c,function(c,h){var n=b.trim(c),p=n.indexOf("#")===0;h>=f&&h<=i&&!p&&n!==""&&
11
+ (n=c.split(a.itemDelimiter||","),k(n,function(b,a){a>=j&&a<=e&&(d[a-j]||(d[a-j]=[]),d[a-j][g]=b)}),g+=1)}),this.dataFound())},parseTable:function(){var b=this.options,a=b.table,c=this.columns,d=b.startRow||0,f=b.endRow||Number.MAX_VALUE,i=b.startColumn||0,j=b.endColumn||Number.MAX_VALUE,e;a&&(typeof a==="string"&&(a=document.getElementById(a)),k(a.getElementsByTagName("tr"),function(a,b){e=0;b>=d&&b<=f&&k(a.childNodes,function(a){if((a.tagName==="TD"||a.tagName==="TH")&&e>=i&&e<=j)c[e]||(c[e]=[]),
12
+ c[e][b-d]=a.innerHTML,e+=1})}),this.dataFound())},parseGoogleSpreadsheet:function(){var b=this,a=this.options,c=a.googleSpreadsheetKey,d=this.columns,f=a.startRow||0,i=a.endRow||Number.MAX_VALUE,j=a.startColumn||0,e=a.endColumn||Number.MAX_VALUE,g,h;c&&jQuery.getJSON("https://spreadsheets.google.com/feeds/cells/"+c+"/"+(a.googleSpreadsheetWorksheet||"od6")+"/public/values?alt=json-in-script&callback=?",function(a){var a=a.feed.entry,c,k=a.length,m=0,o=0,l;for(l=0;l<k;l++)c=a[l],m=Math.max(m,c.gs$cell.col),
13
+ o=Math.max(o,c.gs$cell.row);for(l=0;l<m;l++)if(l>=j&&l<=e)d[l-j]=[],d[l-j].length=Math.min(o,i-f);for(l=0;l<k;l++)if(c=a[l],g=c.gs$cell.row-1,h=c.gs$cell.col-1,h>=j&&h<=e&&g>=f&&g<=i)d[h-j][g-f]=c.content.$t;b.dataFound()})},findHeaderRow:function(){k(this.columns,function(){});this.headerRow=0},trim:function(b){return typeof b==="string"?b.replace(/^\s+|\s+$/g,""):b},parseTypes:function(){for(var b=this.columns,a=b.length,c,d,f,i;a--;)for(c=b[a].length;c--;)d=b[a][c],f=parseFloat(d),i=this.trim(d),
14
+ i==f?(b[a][c]=f,f>31536E6?b[a].isDatetime=!0:b[a].isNumeric=!0):(d=this.parseDate(d),a===0&&typeof d==="number"&&!isNaN(d)?(b[a][c]=d,b[a].isDatetime=!0):b[a][c]=i===""?null:i)},dateFormats:{"YYYY-mm-dd":{regex:"^([0-9]{4})-([0-9]{2})-([0-9]{2})$",parser:function(b){return Date.UTC(+b[1],b[2]-1,+b[3])}}},parseDate:function(b){var a=this.options.parseDate,c,d,f;a&&(c=a(b));if(typeof b==="string")for(d in this.dateFormats)a=this.dateFormats[d],(f=b.match(a.regex))&&(c=a.parser(f));return c},rowsToColumns:function(b){var a,
15
+ c,d,f,i;if(b){i=[];c=b.length;for(a=0;a<c;a++){f=b[a].length;for(d=0;d<f;d++)i[d]||(i[d]=[]),i[d][a]=b[a][d]}}return i},parsed:function(){this.options.parsed&&this.options.parsed.call(this,this.columns)},complete:function(){var b=this.columns,a,c,d=this.options,f,i,j,e,g,k;if(d.complete){this.getColumnDistribution();b.length>1&&(a=b.shift(),this.headerRow===0&&a.shift(),a.isDatetime?c="datetime":a.isNumeric||(c="category"));for(e=0;e<b.length;e++)if(this.headerRow===0)b[e].name=b[e].shift();i=[];
16
+ for(e=0,k=0;e<b.length;k++){f=h.pick(this.valueCount.individual[k],this.valueCount.global);j=[];for(g=0;g<b[e].length;g++)j[g]=[a[g],b[e][g]!==void 0?b[e][g]:null],f>1&&j[g].push(b[e+1][g]!==void 0?b[e+1][g]:null),f>2&&j[g].push(b[e+2][g]!==void 0?b[e+2][g]:null),f>3&&j[g].push(b[e+3][g]!==void 0?b[e+3][g]:null),f>4&&j[g].push(b[e+4][g]!==void 0?b[e+4][g]:null);i[k]={name:b[e].name,data:j};e+=f}d.complete({xAxis:{type:c},series:i})}}});h.Data=m;h.data=function(b,a){return new m(b,a)};h.wrap(h.Chart.prototype,
17
+ "init",function(b,a,c){var d=this;a&&a.data?h.data(h.extend(a.data,{complete:function(f){a.series&&k(a.series,function(b,c){a.series[c]=h.merge(b,f.series[c])});a=h.merge(f,a);b.call(d,a,c)}}),a):b.call(d,a,c)})})(Highcharts);