jeremyf-gattica 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +8 -1
- data/README.rdoc +5 -7
- data/VERSION.yml +1 -1
- data/gattica.gemspec +2 -1
- data/lib/gattica/data_point.rb +8 -8
- data/lib/gattica/data_set.rb +26 -13
- data/lib/gattica/exceptions.rb +2 -0
- data/lib/gattica.rb +74 -13
- metadata +1 -1
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
== 0.3.7
|
2
|
+
* er1c added start_index and max_results
|
3
|
+
* er1c added paging for all results
|
4
|
+
* er1c added get_to_csv to "stream" saving results to a file IO
|
5
|
+
* thieso2 fix DataPoint when GA-Path includec colons
|
6
|
+
* nedski Added support for proxy via env var
|
7
|
+
|
1
8
|
== 0.3.6
|
2
9
|
* Updated gem to pass tests under 1.9.1
|
3
10
|
|
@@ -39,4 +46,4 @@
|
|
39
46
|
* When outputting as CSV, surround each piece of data with double quotes (appears pretty common for various properties (like Browser name) to contain commas
|
40
47
|
|
41
48
|
== 0.1.0 / 2009-03-26
|
42
|
-
* Basic functionality working good. Can't use filters yet.
|
49
|
+
* Basic functionality working good. Can't use filters yet.
|
data/README.rdoc
CHANGED
@@ -27,7 +27,7 @@ There are generally three steps to getting info from the GA API:
|
|
27
27
|
= Usage
|
28
28
|
This library does all three. A typical transaction will look like this:
|
29
29
|
|
30
|
-
gs = Gattica.new({:email => 'johndoe@google.com', :password => 'password', profile_id => 123456})
|
30
|
+
gs = Gattica.new({:email => 'johndoe@google.com', :password => 'password', :profile_id => 123456})
|
31
31
|
results = gs.get({ :start_date => '2008-01-01',
|
32
32
|
:end_date => '2008-02-01',
|
33
33
|
:dimensions => 'browser',
|
@@ -84,7 +84,7 @@ the result to be returned.
|
|
84
84
|
:dimensions => ['browser','browserVersion'],
|
85
85
|
:metrics => ['pageviews','visits'],
|
86
86
|
:sort => ['-pageviews'],
|
87
|
-
:
|
87
|
+
:filters => ['browser == Firefox','pageviews >= 10000']})
|
88
88
|
|
89
89
|
This says "return only results where the 'browser' dimension contains the word 'Firefox' and the
|
90
90
|
'pageviews' metric is greater than or equal to 10,000.
|
@@ -92,9 +92,9 @@ This says "return only results where the 'browser' dimension contains the word '
|
|
92
92
|
Filters can contain spaces around the operators, or not. These two lines are equivalent (I think
|
93
93
|
the spaces make the filter more readable):
|
94
94
|
|
95
|
-
:
|
95
|
+
:filters => ['browser == Firefox','pageviews >= 10000']
|
96
96
|
|
97
|
-
:
|
97
|
+
:filters => ['browser==Firefox','pageviews>=10000']
|
98
98
|
|
99
99
|
Once again, do _not_ include the +ga:+ prefix before the dimension/metric you're filtering against.
|
100
100
|
Gattica will add this automatically.
|
@@ -187,6 +187,4 @@ A couple of things I have planned:
|
|
187
187
|
|
188
188
|
1. Tests!
|
189
189
|
2. The option to use a custom delimiter for output
|
190
|
-
|
191
|
-
one result set, see how many pages there are, then do several calls until all pages are retrieved
|
192
|
-
or it hits the limit of the number of results you want and return all that data as one big block.
|
190
|
+
|
data/VERSION.yml
CHANGED
data/gattica.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{gattica}
|
5
|
-
s.version = "0.3.
|
5
|
+
s.version = "0.3.7"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Rob Cameron"]
|
@@ -37,6 +37,7 @@ Gem::Specification.new do |s|
|
|
37
37
|
"test/test_engine.rb",
|
38
38
|
"test/test_gattica.rb",
|
39
39
|
"test/test_user.rb"
|
40
|
+
|
40
41
|
]
|
41
42
|
s.homepage = %q{http://github.com/cannikin/gattica}
|
42
43
|
s.rdoc_options = ["--charset=UTF-8"]
|
data/lib/gattica/data_point.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "csv"
|
2
2
|
|
3
3
|
module Gattica
|
4
4
|
|
@@ -17,10 +17,10 @@ module Gattica
|
|
17
17
|
@updated = DateTime.parse(xml.at('updated').inner_html)
|
18
18
|
@title = xml.at('title').inner_html
|
19
19
|
@dimensions = xml.search('dxp:dimension').collect do |dimension|
|
20
|
-
{ dimension.attributes['name'].split(':').last.to_sym => dimension.attributes['value'].split(':').last }
|
20
|
+
{ dimension.attributes['name'].split(':').last.to_sym => dimension.attributes['value'].split(':', 1).last }
|
21
21
|
end
|
22
22
|
@metrics = xml.search('dxp:metric').collect do |metric|
|
23
|
-
{ metric.attributes['name'].split(':').last.to_sym => metric.attributes['value'].split(':').last.to_i }
|
23
|
+
{ metric.attributes['name'].split(':').last.to_sym => metric.attributes['value'].split(':', 1).last.to_i }
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
@@ -28,19 +28,19 @@ module Gattica
|
|
28
28
|
# Outputs in Comma Seperated Values format
|
29
29
|
def to_csv(format = :long)
|
30
30
|
output = ''
|
31
|
-
columns = []
|
32
31
|
|
32
|
+
columns = []
|
33
33
|
# only output
|
34
34
|
case format
|
35
35
|
when :long
|
36
|
-
[@id, @updated, @title]
|
36
|
+
columns.concat([@id, @updated, @title])
|
37
37
|
end
|
38
38
|
|
39
39
|
# output all dimensions
|
40
|
-
@dimensions.map {|d| d.value}
|
40
|
+
columns.concat(@dimensions.map {|d| d.value})
|
41
41
|
|
42
42
|
# output all metrics
|
43
|
-
@metrics.map {|m| m.value}
|
43
|
+
columns.concat(@metrics.map {|m| m.value})
|
44
44
|
|
45
45
|
output = CSV.generate_line(columns)
|
46
46
|
return output
|
@@ -57,4 +57,4 @@ module Gattica
|
|
57
57
|
|
58
58
|
end
|
59
59
|
|
60
|
-
end
|
60
|
+
end
|
data/lib/gattica/data_set.rb
CHANGED
@@ -18,27 +18,40 @@ module Gattica
|
|
18
18
|
@points = xml.search(:entry).collect { |entry| DataPoint.new(entry) }
|
19
19
|
end
|
20
20
|
|
21
|
-
|
22
|
-
# output important data to CSV, ignoring all the specific data about this dataset
|
23
|
-
# (total_results, start_date) and just output the data from the points
|
24
|
-
|
25
|
-
def to_csv(format = :long)
|
21
|
+
def to_csv_header(format = :long)
|
26
22
|
# build the headers
|
27
23
|
output = ''
|
28
24
|
columns = []
|
29
25
|
|
30
26
|
# only show the nitty gritty details of id, updated_at and title if requested
|
31
|
-
case format
|
27
|
+
case format #it would be nice if case statements in ruby worked differently
|
32
28
|
when :long
|
33
|
-
["id", "updated", "title"]
|
29
|
+
columns.concat(["id", "updated", "title"])
|
30
|
+
unless @points.empty? # if there was at least one result
|
31
|
+
columns.concat(@points.first.dimensions.map {|d| d.key})
|
32
|
+
columns.concat(@points.first.metrics.map {|m| m.key})
|
33
|
+
end
|
34
|
+
when :short
|
35
|
+
unless @points.empty? # if there was at least one result
|
36
|
+
columns.concat(@points.first.dimensions.map {|d| d.key})
|
37
|
+
columns.concat(@points.first.metrics.map {|m| m.key})
|
38
|
+
end
|
39
|
+
when :noheader
|
34
40
|
end
|
35
41
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
42
|
+
output = CSV.generate_line(columns) + "\n" if (columns.size > 0)
|
43
|
+
|
44
|
+
return output
|
45
|
+
end
|
46
|
+
|
47
|
+
# output important data to CSV, ignoring all the specific data about this dataset
|
48
|
+
# (total_results, start_date) and just output the data from the points
|
49
|
+
|
50
|
+
def to_csv(format = :long)
|
51
|
+
output = ''
|
40
52
|
|
41
|
-
|
53
|
+
# build the headers
|
54
|
+
output = to_csv_header(format)
|
42
55
|
|
43
56
|
# get the data from each point
|
44
57
|
@points.each do |point|
|
@@ -60,4 +73,4 @@ module Gattica
|
|
60
73
|
|
61
74
|
end
|
62
75
|
|
63
|
-
end
|
76
|
+
end
|
data/lib/gattica/exceptions.rb
CHANGED
data/lib/gattica.rb
CHANGED
@@ -2,7 +2,7 @@ $:.unshift File.dirname(__FILE__) # for use/testing when no gem is installed
|
|
2
2
|
|
3
3
|
|
4
4
|
module Gattica
|
5
|
-
VERSION = '0.3.
|
5
|
+
VERSION = '0.3.7'
|
6
6
|
end
|
7
7
|
|
8
8
|
# external
|
@@ -47,7 +47,7 @@ module Gattica
|
|
47
47
|
SERVER = 'www.google.com'
|
48
48
|
PORT = 443
|
49
49
|
SECURE = true
|
50
|
-
DEFAULT_ARGS = { :start_date => nil, :end_date => nil, :dimensions => [], :metrics => [], :filters => [], :sort => [] }
|
50
|
+
DEFAULT_ARGS = { :start_date => nil, :end_date => nil, :dimensions => [], :metrics => [], :filters => [], :sort => [], :start_index => 1, :max_results => 10000, :page => false }
|
51
51
|
DEFAULT_OPTIONS = { :email => nil, :password => nil, :token => nil, :profile_id => nil, :debug => false, :headers => {}, :logger => Logger.new(STDOUT) }
|
52
52
|
FILTER_METRIC_OPERATORS = %w{ == != > < >= <= }
|
53
53
|
FILTER_DIMENSION_OPERATORS = %w{ == != =~ !~ =@ ~@ }
|
@@ -74,8 +74,18 @@ module Gattica
|
|
74
74
|
@user_accounts = nil # filled in later if the user ever calls Gattica::Engine#accounts
|
75
75
|
@headers = {}.merge(@options[:headers]) # headers used for any HTTP requests (Google requires a special 'Authorization' header which is set any time @token is set)
|
76
76
|
|
77
|
-
# save
|
78
|
-
|
77
|
+
# save a proxy-aware http connection for everyone to use
|
78
|
+
proxy_host = nil
|
79
|
+
proxy_port = nil
|
80
|
+
proxy_var = SECURE ? 'https_proxy' : 'http_proxy'
|
81
|
+
[proxy_var, proxy_var.upcase].each do |pxy|
|
82
|
+
if ENV[pxy]
|
83
|
+
uri = URI::parse(ENV[pxy])
|
84
|
+
proxy_host = uri.host
|
85
|
+
proxy_port = uri.port
|
86
|
+
end
|
87
|
+
end
|
88
|
+
@http = Net::HTTP::Proxy(proxy_host,proxy_port).new(SERVER, PORT)
|
79
89
|
@http.use_ssl = SECURE
|
80
90
|
@http.set_debug_output $stdout if @options[:debug]
|
81
91
|
|
@@ -119,7 +129,26 @@ module Gattica
|
|
119
129
|
end
|
120
130
|
return @user_accounts
|
121
131
|
end
|
122
|
-
|
132
|
+
|
133
|
+
# Performs a Gattica::Engine#get but instead of returning the dataset streams it to the file handle in a CSV format
|
134
|
+
#
|
135
|
+
# == Usage
|
136
|
+
#
|
137
|
+
# gs = Gattica.new({:email => 'johndoe@google.com', :password => 'password', :profile_id => 123456})
|
138
|
+
# fh = File.new("file.csv", "w")
|
139
|
+
# gs.get_to_csv({ :start_date => '2008-01-01',
|
140
|
+
# :end_date => '2008-02-01',
|
141
|
+
# :dimensions => 'browser',
|
142
|
+
# :metrics => 'pageviews',
|
143
|
+
# :sort => 'pageviews',
|
144
|
+
# :filters => ['browser == Firefox']}, fh, :short)
|
145
|
+
#
|
146
|
+
# See Gattica::Engine#get to see details of arguments
|
147
|
+
|
148
|
+
def get_to_csv(args={}, fh = nil, format = :long)
|
149
|
+
raise GatticaError::InvalidFileType, "Invalid file handle" unless !fh.nil?
|
150
|
+
results(args, fh, :csv, format)
|
151
|
+
end
|
123
152
|
|
124
153
|
# This is the method that performs the actual request to get data.
|
125
154
|
#
|
@@ -149,6 +178,9 @@ module Gattica
|
|
149
178
|
# * +metrics+ => an array of GA metrics (without the ga: prefix)
|
150
179
|
# * +filter+ => an array of GA dimensions/metrics you want to filter by (without the ga: prefix)
|
151
180
|
# * +sort+ => an array of GA dimensions/metrics you want to sort by (without the ga: prefix)
|
181
|
+
# * +page+ => true|false Does the paging to create a single set of all of the data
|
182
|
+
# * +start_index+ => Beginning offset of the query (default 1)
|
183
|
+
# * +max_results+ => How many results to grab (maximum 10,000)
|
152
184
|
#
|
153
185
|
# == Exceptions
|
154
186
|
#
|
@@ -157,13 +189,44 @@ module Gattica
|
|
157
189
|
# error back from Google Analytics telling you so.
|
158
190
|
|
159
191
|
def get(args={})
|
160
|
-
|
161
|
-
query_string = build_query_string(args,@profile_id)
|
162
|
-
@logger.debug(query_string) if @debug
|
163
|
-
data = do_http_get("/analytics/feeds/data?#{query_string}")
|
164
|
-
return DataSet.new(Hpricot.XML(data))
|
192
|
+
return results(args)
|
165
193
|
end
|
166
194
|
|
195
|
+
private
|
196
|
+
|
197
|
+
def results(args={}, fh=nil, type=nil, format=nil)
|
198
|
+
raise GatticaError::InvalidFileType, "Invalid file type" unless type.nil? ||[:csv,:xml].include?(type)
|
199
|
+
args = validate_and_clean(DEFAULT_ARGS.merge(args))
|
200
|
+
|
201
|
+
header = 0
|
202
|
+
results = nil
|
203
|
+
total_results = args[:max_results]
|
204
|
+
while(args[:start_index] < total_results)
|
205
|
+
query_string = build_query_string(args,@profile_id)
|
206
|
+
@logger.debug("Query String: " + query_string) if @debug
|
207
|
+
|
208
|
+
data = do_http_get("/analytics/feeds/data?#{query_string}")
|
209
|
+
result = DataSet.new(Hpricot.XML(data))
|
210
|
+
|
211
|
+
#handle returning results
|
212
|
+
results.points.concat(result.points) if !results.nil? && fh.nil?
|
213
|
+
#handle csv
|
214
|
+
|
215
|
+
if(!fh.nil? && type == :csv && header == 0)
|
216
|
+
fh.write result.to_csv_header(format)
|
217
|
+
header = 1
|
218
|
+
end
|
219
|
+
|
220
|
+
fh.write result.to_csv(:noheader) if !fh.nil? && type == :csv
|
221
|
+
fh.flush if !fh.nil?
|
222
|
+
|
223
|
+
results = result if results.nil?
|
224
|
+
total_results = result.total_results
|
225
|
+
args[:start_index] += args[:max_results]
|
226
|
+
break if !args[:page] # only continue while if we are suppose to page
|
227
|
+
end
|
228
|
+
return results if fh.nil?
|
229
|
+
end
|
167
230
|
|
168
231
|
# Since google wants the token to appear in any HTTP call's header, we have to set that header
|
169
232
|
# again any time @token is changed so we override the default writer (note that you need to set
|
@@ -175,9 +238,6 @@ module Gattica
|
|
175
238
|
end
|
176
239
|
|
177
240
|
|
178
|
-
private
|
179
|
-
|
180
|
-
|
181
241
|
# Does the work of making HTTP calls and then going through a suite of tests on the response to make
|
182
242
|
# sure it's valid and not an error
|
183
243
|
|
@@ -199,6 +259,7 @@ module Gattica
|
|
199
259
|
return data
|
200
260
|
end
|
201
261
|
|
262
|
+
private
|
202
263
|
|
203
264
|
# Sets up the HTTP headers that Google expects (this is called any time @token is set either by Gattica
|
204
265
|
# or manually by the user since the header must include the token)
|