google_plus_archiver 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/bin/gplus-get ADDED
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.dirname(__FILE__) + "/../lib"
4
+
5
+ require 'google_plus_archiver'
6
+ require 'google_plus_archiver/version.rb'
7
+
8
+ require 'optparse'
9
+
10
+ options = {}
11
+ OptionParser.new do |opts|
12
+ opts.banner = "Usage: gplus-get -a [API_KEY] -u [USER_ID]"
13
+
14
+ opts.on("--api-key [API_KEY]", "Specify the Google API key") do |api_key|
15
+ options[:api_key] = api_key
16
+ end
17
+
18
+ opts.on("--user-id [USER_ID]" ,"Specify the ID of the user to be archived") do |user_id|
19
+ options[:user_id] = user_id
20
+ end
21
+
22
+ opts.on("--delay [SECONDS]" ,"Delay (in seconds) between two requests (0.2 by default, since Google set a 5 requests/second/user limit)") do |delay|
23
+ options[:delay] = delay
24
+ end
25
+
26
+ opts.on("--output-path [OUTPUT_PATH]" ,"Output path (the current directory by default)") do |output_path|
27
+ options[:output_path] = output_path
28
+ end
29
+
30
+ opts.on("--quiet", "Silent mode") do
31
+ options[:quiet] = true
32
+ end
33
+
34
+ opts.on("--exclude-posts", "Don't archive posts") do
35
+ options[:exclude_posts] = true
36
+ end
37
+
38
+ opts.on("--exclude-attachments", "Don't archive attachments") do
39
+ options[:exclude_attachments] = true
40
+ end
41
+
42
+ opts.on("--exclude-replies", "Don't archive replies") do
43
+ options[:exclude_replies] = true
44
+ end
45
+
46
+ opts.on("--exclude-plusoners", "Don't archive plusoners") do
47
+ options[:exclude_plusoners] = true
48
+ end
49
+
50
+ opts.on("--exclude-resharers", "Don't archive resharers") do
51
+ options[:exclude_resharers] = true
52
+ end
53
+
54
+ opts.on("--version", "Display current version") do
55
+ puts "google_plus_archiver #{GooglePlusArchiver::VERSION}"
56
+ exit 0
57
+ end
58
+
59
+ end.parse!
60
+
61
+ if not options[:api_key] or not options[:user_id]
62
+ puts "You must specify both the user ID (-u) and your Google API key (-a)."
63
+ exit 0
64
+ end
65
+
66
+ GooglePlusArchiver::register_client(options[:api_key])
67
+ if GooglePlusArchiver::client_registered?
68
+ GooglePlusArchiver::archive_user(options)
69
+ end
@@ -0,0 +1,4 @@
1
+ module GooglePlusArchiver
2
+ VERSION = "0.0.1"
3
+ DATE = "2012-12-18"
4
+ end
@@ -0,0 +1,282 @@
1
+ require 'json'
2
+ require 'net/http'
3
+ require 'tempfile'
4
+ require 'tmpdir'
5
+ require 'zlib'
6
+
7
+ require 'google/api_client'
8
+
9
+ require 'archive/tar/minitar'
10
+ include Archive::Tar
11
+
12
+ module GooglePlusArchiver
13
+
14
+ def self.api_key
15
+ @@api_key
16
+ end
17
+
18
+ def self.api_key=(api_key)
19
+ @@api_key = @@client.key = api_key
20
+ end
21
+
22
+ def self.request_num
23
+ @@request_num
24
+ end
25
+
26
+ def self.register_client(api_key)
27
+ @@client = Google::APIClient.new
28
+ @@api_key = @@client.key = api_key
29
+ @@request_num = 0
30
+ begin
31
+ @@plus = @@client.discovered_api('plus')
32
+ rescue
33
+ puts "Invalid Google API key."
34
+ end
35
+ end
36
+
37
+ def self.client_registered?
38
+ defined? @@plus
39
+ end
40
+
41
+ def self.archive_user(params)
42
+ begin
43
+ raise "Unregistered client." unless client_registered?
44
+ rescue => e
45
+ puts e.message
46
+ return
47
+ end
48
+
49
+ user_id, delay, output_path, quiet =
50
+ (params[:user_id]),
51
+ (params[:delay] or 0.2),
52
+ (params[:output_path] or FileUtils.pwd),
53
+ (params[:quiet])
54
+
55
+ Dir.mktmpdir do |tmp_dir|
56
+
57
+ begin
58
+
59
+ #>> profile
60
+ puts "##{@@request_num+=1} Fetching people.get ..." unless quiet
61
+ response = @@client.execute(
62
+ :api_method => @@plus.people.get,
63
+ :parameters => {
64
+ 'collection' => 'public',
65
+ 'userId' => user_id
66
+ },
67
+ :authenticated => false
68
+ )
69
+
70
+ #<< profile
71
+ File.open("#{tmp_dir}/profile.json", "w") do |f|
72
+ f.puts response.body
73
+ end
74
+
75
+ user_display_name = JSON.parse(response.body)['displayName']
76
+
77
+ #>> posts
78
+ if not params[:exclude_posts]
79
+ next_page_token = nil
80
+ page_num = 0
81
+ loop do
82
+ puts "##{@@request_num+=1} Fetching activities.list: page[#{page_num}] ..." unless quiet
83
+ response = @@client.execute(
84
+ :api_method => @@plus.activities.list,
85
+ :parameters => {
86
+ 'collection' => 'public',
87
+ 'userId' => user_id,
88
+ 'maxResults' => '100',
89
+ 'pageToken' => next_page_token
90
+ },
91
+ :authenticated => false
92
+ )
93
+ activities = JSON.parse(response.body)
94
+ next_page_token = activities['nextPageToken']
95
+
96
+ #<< posts
97
+ File.open("#{tmp_dir}/posts[#{page_num}].json", "w") do |f|
98
+ f.puts response.body
99
+ end
100
+
101
+ activities['items'].each do |item|
102
+ activity_id = item['id']
103
+
104
+ puts "##{@@request_num} Fetching activities.get: #{activity_id}" unless quiet
105
+
106
+ #<< post
107
+ File.open("#{tmp_dir}/#{activity_id}.json", "w") do |f|
108
+ f.puts item.to_json
109
+ end
110
+
111
+ #>> attachments
112
+ if not params[:exclude_attachments] and item['object']['attachments']
113
+ item['object']['attachments'].each do |attachment|
114
+ image = (attachment['fullImage'] or attachment['image'])
115
+ if image
116
+ puts "##{@@request_num} Fetching attachment: #{image['url']} ..." unless quiet
117
+ uri = URI.parse(URI.escape("#{image['url']}"))
118
+ http = Net::HTTP.new(uri.host, uri.port)
119
+ if http.port == 443
120
+ http.use_ssl = true
121
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
122
+ end
123
+ data = http.get(uri.request_uri)
124
+ image_ext = uri.request_uri.split("/")[-1].split(".")[-1]
125
+ image_ext = nil if image_ext.length > 4
126
+
127
+ #<< attachment
128
+ File.open("#{tmp_dir}/#{activity_id}_#{attachment['id']}#{image_ext ? ".#{image_ext}" : ""}", "w").puts data.body
129
+ end
130
+
131
+ thumbnails = attachment['thumbnails']
132
+ if thumbnails
133
+ thumbnails.each_index do |index|
134
+ thumbnail = thumbnails[index]
135
+ image = thumbnail['image']
136
+ puts "##{@@request_num} Fetching attachment(thumbnail): #{image['url']} ..." unless quiet
137
+ uri = URI.parse(URI.escape("#{image['url']}"))
138
+ http = Net::HTTP.new(uri.host, uri.port)
139
+ if http.port == 443
140
+ http.use_ssl = true
141
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
142
+ end
143
+ data = http.get(uri.request_uri)
144
+ image_ext = uri.request_uri.split("/")[-1].split(".")[-1]
145
+ image_ext = nil if image_ext.length > 4
146
+
147
+ #<< attachment
148
+ File.open("#{tmp_dir}/#{activity_id}_#{attachment['id']}_#{index.to_s}#{image_ext ? ".#{image_ext}" : ""}", "w").puts data.body
149
+ end
150
+ end
151
+ end
152
+ end
153
+
154
+ #>> replies
155
+ if not params[:exclude_replies]
156
+ replies_next_page_token = nil
157
+ replies_page_num = 0
158
+ loop do
159
+ puts "##{@@request_num+=1} Fetching comments.list: page[#{replies_page_num}] ..." unless quiet
160
+ response = @@client.execute(
161
+ :api_method => @@plus.comments.list,
162
+ :parameters => {
163
+ 'activityId' => activity_id,
164
+ 'maxResults' => '500',
165
+ 'pageToken' => replies_next_page_token
166
+ },
167
+ :authenticated => false
168
+ )
169
+ replies_next_page_token = JSON.parse(response.body)['nextPageToken']
170
+
171
+ #<< replies
172
+ File.open("#{tmp_dir}/#{activity_id}_replies#{replies_page_num == 0 && !replies_next_page_token ? "" : "[#{replies_page_num}]"}.json", "w") do |f|
173
+ f.puts response.body
174
+ end
175
+
176
+ break unless replies_next_page_token
177
+ replies_page_num += 1
178
+ sleep delay
179
+ end
180
+ end
181
+
182
+ #>> plusoners
183
+ if not params[:exclude_plusoners]
184
+ plusoners_next_page_token = nil
185
+ plusoners_page_num = 0
186
+ loop do
187
+ puts "##{@@request_num+=1} Fetching people.listByActivity(plusoners): page[#{plusoners_page_num}] ..." unless quiet
188
+ response = @@client.execute(
189
+ :api_method => @@plus.people.list_by_activity,
190
+ :parameters => {
191
+ 'activityId' => activity_id,
192
+ 'collection' => 'plusoners',
193
+ 'maxResults' => '100',
194
+ 'pageToken' => plusoners_next_page_token
195
+ },
196
+ :authenticated => false
197
+ )
198
+ plusoners_next_page_token = JSON.parse(response.body)['nextPageToken']
199
+
200
+ #<< plusoners
201
+ File.open("#{tmp_dir}/#{activity_id}_plusoners#{plusoners_page_num == 0 && !plusoners_next_page_token ? "" : "[#{plusoners_page_num}]"}.json", "w") do |f|
202
+ f.puts response.body
203
+ end
204
+
205
+ break unless plusoners_next_page_token
206
+ plusoners_page_num += 1
207
+ sleep delay
208
+ end
209
+ end
210
+
211
+ #>> resharers
212
+ if not params[:exclude_resharers]
213
+ resharers_next_page_token = nil
214
+ resharers_page_num = 0
215
+ loop do
216
+ puts "##{@@request_num+=1} Fetching people.listByActivity(resharers): page[#{resharers_page_num}] ..." unless quiet
217
+ response = @@client.execute(
218
+ :api_method => @@plus.people.list_by_activity,
219
+ :parameters => {
220
+ 'activityId' => activity_id,
221
+ 'collection' => 'resharers',
222
+ 'maxResults' => '100',
223
+ 'pageToken' => resharers_next_page_token
224
+ },
225
+ :authenticated => false
226
+ )
227
+ resharers_next_page_token = JSON.parse(response.body)['nextPageToken']
228
+
229
+ #<< resharers
230
+ File.open("#{tmp_dir}/#{activity_id}_resharers#{replies_page_num == 0 && !resharers_next_page_token ? "" : "[#{resharers_page_num}]"}.json", "w") do |f|
231
+ f.puts response.body
232
+ end
233
+
234
+ break unless resharers_next_page_token
235
+ resharers_page_num += 1
236
+ sleep delay
237
+ end
238
+ end
239
+
240
+ end
241
+
242
+ break unless next_page_token
243
+ page_num += 1
244
+ sleep delay
245
+ end
246
+
247
+ end
248
+
249
+ rescue Exception => e
250
+ puts e.message
251
+ puts "Archiving interrupted due to unexpected errors."
252
+
253
+ ensure
254
+ # Archive all the files
255
+ archive_time = "#{Time.now.to_s[0..9]}-#{Time.now.to_s[11..-7]}#{Time.now.to_s[-5..-1]}"
256
+ archive_filename = "#{output_path}/#{user_display_name}_#{archive_time}.tar.gz"
257
+ FileUtils.cd(tmp_dir) do
258
+
259
+ Tempfile.open("#{user_id}") do |tar|
260
+ files = []
261
+ Find.find("./") do |path|
262
+ files << File.basename(path) unless File.basename(path) == '.'
263
+ end
264
+ Minitar.pack(files, tar)
265
+
266
+ Zlib::GzipWriter.open(archive_filename) do |gz|
267
+ gz.mtime = File.mtime(tar.path)
268
+ gz.orig_name = tar.path
269
+ gz.write IO.binread(tar.path)
270
+ end
271
+
272
+ end
273
+
274
+ end
275
+
276
+ end
277
+
278
+ end
279
+
280
+ end
281
+
282
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: google_plus_archiver
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Mort Yao
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-18 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: google-api-client
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '0.5'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '0.5'
30
+ - !ruby/object:Gem::Dependency
31
+ name: archive-tar-minitar
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '0.5'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '0.5'
46
+ description: google_plus_archiver is a simple command-line tool to archive Google+
47
+ profiles and public streams.
48
+ email: mort.yao@gmail.com
49
+ executables:
50
+ - gplus-get
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - bin/gplus-get
55
+ - lib/google_plus_archiver.rb
56
+ - lib/google_plus_archiver/version.rb
57
+ homepage: https://github.com/soimort/google_plus_archiver
58
+ licenses:
59
+ - MIT
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements: []
77
+ rubyforge_project:
78
+ rubygems_version: 1.8.24
79
+ signing_key:
80
+ specification_version: 3
81
+ summary: A simple command-line tool to archive Google+ profiles.
82
+ test_files: []