google_plus_archiver 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/gplus-get ADDED
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.dirname(__FILE__) + "/../lib"
4
+
5
+ require 'google_plus_archiver'
6
+ require 'google_plus_archiver/version.rb'
7
+
8
+ require 'optparse'
9
+
10
+ options = {}
11
+ OptionParser.new do |opts|
12
+ opts.banner = "Usage: gplus-get -a [API_KEY] -u [USER_ID]"
13
+
14
+ opts.on("--api-key [API_KEY]", "Specify the Google API key") do |api_key|
15
+ options[:api_key] = api_key
16
+ end
17
+
18
+ opts.on("--user-id [USER_ID]" ,"Specify the ID of the user to be archived") do |user_id|
19
+ options[:user_id] = user_id
20
+ end
21
+
22
+ opts.on("--delay [SECONDS]" ,"Delay (in seconds) between two requests (0.2 by default, since Google set a 5 requests/second/user limit)") do |delay|
23
+ options[:delay] = delay
24
+ end
25
+
26
+ opts.on("--output-path [OUTPUT_PATH]" ,"Output path (the current directory by default)") do |output_path|
27
+ options[:output_path] = output_path
28
+ end
29
+
30
+ opts.on("--quiet", "Silent mode") do
31
+ options[:quiet] = true
32
+ end
33
+
34
+ opts.on("--exclude-posts", "Don't archive posts") do
35
+ options[:exclude_posts] = true
36
+ end
37
+
38
+ opts.on("--exclude-attachments", "Don't archive attachments") do
39
+ options[:exclude_attachments] = true
40
+ end
41
+
42
+ opts.on("--exclude-replies", "Don't archive replies") do
43
+ options[:exclude_replies] = true
44
+ end
45
+
46
+ opts.on("--exclude-plusoners", "Don't archive plusoners") do
47
+ options[:exclude_plusoners] = true
48
+ end
49
+
50
+ opts.on("--exclude-resharers", "Don't archive resharers") do
51
+ options[:exclude_resharers] = true
52
+ end
53
+
54
+ opts.on("--version", "Display current version") do
55
+ puts "google_plus_archiver #{GooglePlusArchiver::VERSION}"
56
+ exit 0
57
+ end
58
+
59
+ end.parse!
60
+
61
+ if not options[:api_key] or not options[:user_id]
62
+ puts "You must specify both the user ID (-u) and your Google API key (-a)."
63
+ exit 0
64
+ end
65
+
66
+ GooglePlusArchiver::register_client(options[:api_key])
67
+ if GooglePlusArchiver::client_registered?
68
+ GooglePlusArchiver::archive_user(options)
69
+ end
@@ -0,0 +1,4 @@
1
+ module GooglePlusArchiver
2
+ VERSION = "0.0.1"
3
+ DATE = "2012-12-18"
4
+ end
@@ -0,0 +1,282 @@
1
+ require 'json'
2
+ require 'net/http'
3
+ require 'tempfile'
4
+ require 'tmpdir'
5
+ require 'zlib'
6
+
7
+ require 'google/api_client'
8
+
9
+ require 'archive/tar/minitar'
10
+ include Archive::Tar
11
+
12
+ module GooglePlusArchiver
13
+
14
+ def self.api_key
15
+ @@api_key
16
+ end
17
+
18
+ def self.api_key=(api_key)
19
+ @@api_key = @@client.key = api_key
20
+ end
21
+
22
+ def self.request_num
23
+ @@request_num
24
+ end
25
+
26
+ def self.register_client(api_key)
27
+ @@client = Google::APIClient.new
28
+ @@api_key = @@client.key = api_key
29
+ @@request_num = 0
30
+ begin
31
+ @@plus = @@client.discovered_api('plus')
32
+ rescue
33
+ puts "Invalid Google API key."
34
+ end
35
+ end
36
+
37
+ def self.client_registered?
38
+ defined? @@plus
39
+ end
40
+
41
+ def self.archive_user(params)
42
+ begin
43
+ raise "Unregistered client." unless client_registered?
44
+ rescue => e
45
+ puts e.message
46
+ return
47
+ end
48
+
49
+ user_id, delay, output_path, quiet =
50
+ (params[:user_id]),
51
+ (params[:delay] or 0.2),
52
+ (params[:output_path] or FileUtils.pwd),
53
+ (params[:quiet])
54
+
55
+ Dir.mktmpdir do |tmp_dir|
56
+
57
+ begin
58
+
59
+ #>> profile
60
+ puts "##{@@request_num+=1} Fetching people.get ..." unless quiet
61
+ response = @@client.execute(
62
+ :api_method => @@plus.people.get,
63
+ :parameters => {
64
+ 'collection' => 'public',
65
+ 'userId' => user_id
66
+ },
67
+ :authenticated => false
68
+ )
69
+
70
+ #<< profile
71
+ File.open("#{tmp_dir}/profile.json", "w") do |f|
72
+ f.puts response.body
73
+ end
74
+
75
+ user_display_name = JSON.parse(response.body)['displayName']
76
+
77
+ #>> posts
78
+ if not params[:exclude_posts]
79
+ next_page_token = nil
80
+ page_num = 0
81
+ loop do
82
+ puts "##{@@request_num+=1} Fetching activities.list: page[#{page_num}] ..." unless quiet
83
+ response = @@client.execute(
84
+ :api_method => @@plus.activities.list,
85
+ :parameters => {
86
+ 'collection' => 'public',
87
+ 'userId' => user_id,
88
+ 'maxResults' => '100',
89
+ 'pageToken' => next_page_token
90
+ },
91
+ :authenticated => false
92
+ )
93
+ activities = JSON.parse(response.body)
94
+ next_page_token = activities['nextPageToken']
95
+
96
+ #<< posts
97
+ File.open("#{tmp_dir}/posts[#{page_num}].json", "w") do |f|
98
+ f.puts response.body
99
+ end
100
+
101
+ activities['items'].each do |item|
102
+ activity_id = item['id']
103
+
104
+ puts "##{@@request_num} Fetching activities.get: #{activity_id}" unless quiet
105
+
106
+ #<< post
107
+ File.open("#{tmp_dir}/#{activity_id}.json", "w") do |f|
108
+ f.puts item.to_json
109
+ end
110
+
111
+ #>> attachments
112
+ if not params[:exclude_attachments] and item['object']['attachments']
113
+ item['object']['attachments'].each do |attachment|
114
+ image = (attachment['fullImage'] or attachment['image'])
115
+ if image
116
+ puts "##{@@request_num} Fetching attachment: #{image['url']} ..." unless quiet
117
+ uri = URI.parse(URI.escape("#{image['url']}"))
118
+ http = Net::HTTP.new(uri.host, uri.port)
119
+ if http.port == 443
120
+ http.use_ssl = true
121
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
122
+ end
123
+ data = http.get(uri.request_uri)
124
+ image_ext = uri.request_uri.split("/")[-1].split(".")[-1]
125
+ image_ext = nil if image_ext.length > 4
126
+
127
+ #<< attachment
128
+ File.open("#{tmp_dir}/#{activity_id}_#{attachment['id']}#{image_ext ? ".#{image_ext}" : ""}", "w").puts data.body
129
+ end
130
+
131
+ thumbnails = attachment['thumbnails']
132
+ if thumbnails
133
+ thumbnails.each_index do |index|
134
+ thumbnail = thumbnails[index]
135
+ image = thumbnail['image']
136
+ puts "##{@@request_num} Fetching attachment(thumbnail): #{image['url']} ..." unless quiet
137
+ uri = URI.parse(URI.escape("#{image['url']}"))
138
+ http = Net::HTTP.new(uri.host, uri.port)
139
+ if http.port == 443
140
+ http.use_ssl = true
141
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
142
+ end
143
+ data = http.get(uri.request_uri)
144
+ image_ext = uri.request_uri.split("/")[-1].split(".")[-1]
145
+ image_ext = nil if image_ext.length > 4
146
+
147
+ #<< attachment
148
+ File.open("#{tmp_dir}/#{activity_id}_#{attachment['id']}_#{index.to_s}#{image_ext ? ".#{image_ext}" : ""}", "w").puts data.body
149
+ end
150
+ end
151
+ end
152
+ end
153
+
154
+ #>> replies
155
+ if not params[:exclude_replies]
156
+ replies_next_page_token = nil
157
+ replies_page_num = 0
158
+ loop do
159
+ puts "##{@@request_num+=1} Fetching comments.list: page[#{replies_page_num}] ..." unless quiet
160
+ response = @@client.execute(
161
+ :api_method => @@plus.comments.list,
162
+ :parameters => {
163
+ 'activityId' => activity_id,
164
+ 'maxResults' => '500',
165
+ 'pageToken' => replies_next_page_token
166
+ },
167
+ :authenticated => false
168
+ )
169
+ replies_next_page_token = JSON.parse(response.body)['nextPageToken']
170
+
171
+ #<< replies
172
+ File.open("#{tmp_dir}/#{activity_id}_replies#{replies_page_num == 0 && !replies_next_page_token ? "" : "[#{replies_page_num}]"}.json", "w") do |f|
173
+ f.puts response.body
174
+ end
175
+
176
+ break unless replies_next_page_token
177
+ replies_page_num += 1
178
+ sleep delay
179
+ end
180
+ end
181
+
182
+ #>> plusoners
183
+ if not params[:exclude_plusoners]
184
+ plusoners_next_page_token = nil
185
+ plusoners_page_num = 0
186
+ loop do
187
+ puts "##{@@request_num+=1} Fetching people.listByActivity(plusoners): page[#{plusoners_page_num}] ..." unless quiet
188
+ response = @@client.execute(
189
+ :api_method => @@plus.people.list_by_activity,
190
+ :parameters => {
191
+ 'activityId' => activity_id,
192
+ 'collection' => 'plusoners',
193
+ 'maxResults' => '100',
194
+ 'pageToken' => plusoners_next_page_token
195
+ },
196
+ :authenticated => false
197
+ )
198
+ plusoners_next_page_token = JSON.parse(response.body)['nextPageToken']
199
+
200
+ #<< plusoners
201
+ File.open("#{tmp_dir}/#{activity_id}_plusoners#{plusoners_page_num == 0 && !plusoners_next_page_token ? "" : "[#{plusoners_page_num}]"}.json", "w") do |f|
202
+ f.puts response.body
203
+ end
204
+
205
+ break unless plusoners_next_page_token
206
+ plusoners_page_num += 1
207
+ sleep delay
208
+ end
209
+ end
210
+
211
+ #>> resharers
212
+ if not params[:exclude_resharers]
213
+ resharers_next_page_token = nil
214
+ resharers_page_num = 0
215
+ loop do
216
+ puts "##{@@request_num+=1} Fetching people.listByActivity(resharers): page[#{resharers_page_num}] ..." unless quiet
217
+ response = @@client.execute(
218
+ :api_method => @@plus.people.list_by_activity,
219
+ :parameters => {
220
+ 'activityId' => activity_id,
221
+ 'collection' => 'resharers',
222
+ 'maxResults' => '100',
223
+ 'pageToken' => resharers_next_page_token
224
+ },
225
+ :authenticated => false
226
+ )
227
+ resharers_next_page_token = JSON.parse(response.body)['nextPageToken']
228
+
229
+ #<< resharers
230
+ File.open("#{tmp_dir}/#{activity_id}_resharers#{replies_page_num == 0 && !resharers_next_page_token ? "" : "[#{resharers_page_num}]"}.json", "w") do |f|
231
+ f.puts response.body
232
+ end
233
+
234
+ break unless resharers_next_page_token
235
+ resharers_page_num += 1
236
+ sleep delay
237
+ end
238
+ end
239
+
240
+ end
241
+
242
+ break unless next_page_token
243
+ page_num += 1
244
+ sleep delay
245
+ end
246
+
247
+ end
248
+
249
+ rescue Exception => e
250
+ puts e.message
251
+ puts "Archiving interrupted due to unexpected errors."
252
+
253
+ ensure
254
+ # Archive all the files
255
+ archive_time = "#{Time.now.to_s[0..9]}-#{Time.now.to_s[11..-7]}#{Time.now.to_s[-5..-1]}"
256
+ archive_filename = "#{output_path}/#{user_display_name}_#{archive_time}.tar.gz"
257
+ FileUtils.cd(tmp_dir) do
258
+
259
+ Tempfile.open("#{user_id}") do |tar|
260
+ files = []
261
+ Find.find("./") do |path|
262
+ files << File.basename(path) unless File.basename(path) == '.'
263
+ end
264
+ Minitar.pack(files, tar)
265
+
266
+ Zlib::GzipWriter.open(archive_filename) do |gz|
267
+ gz.mtime = File.mtime(tar.path)
268
+ gz.orig_name = tar.path
269
+ gz.write IO.binread(tar.path)
270
+ end
271
+
272
+ end
273
+
274
+ end
275
+
276
+ end
277
+
278
+ end
279
+
280
+ end
281
+
282
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: google_plus_archiver
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Mort Yao
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-18 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: google-api-client
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '0.5'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '0.5'
30
+ - !ruby/object:Gem::Dependency
31
+ name: archive-tar-minitar
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '0.5'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '0.5'
46
+ description: google_plus_archiver is a simple command-line tool to archive Google+
47
+ profiles and public streams.
48
+ email: mort.yao@gmail.com
49
+ executables:
50
+ - gplus-get
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - bin/gplus-get
55
+ - lib/google_plus_archiver.rb
56
+ - lib/google_plus_archiver/version.rb
57
+ homepage: https://github.com/soimort/google_plus_archiver
58
+ licenses:
59
+ - MIT
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements: []
77
+ rubyforge_project:
78
+ rubygems_version: 1.8.24
79
+ signing_key:
80
+ specification_version: 3
81
+ summary: A simple command-line tool to archive Google+ profiles.
82
+ test_files: []