google_plus_archiver 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/gplus-get +69 -0
- data/lib/google_plus_archiver/version.rb +4 -0
- data/lib/google_plus_archiver.rb +282 -0
- metadata +82 -0
data/bin/gplus-get
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift File.dirname(__FILE__) + "/../lib"
|
4
|
+
|
5
|
+
require 'google_plus_archiver'
|
6
|
+
require 'google_plus_archiver/version.rb'
|
7
|
+
|
8
|
+
require 'optparse'
|
9
|
+
|
10
|
+
options = {}
|
11
|
+
OptionParser.new do |opts|
|
12
|
+
opts.banner = "Usage: gplus-get -a [API_KEY] -u [USER_ID]"
|
13
|
+
|
14
|
+
opts.on("--api-key [API_KEY]", "Specify the Google API key") do |api_key|
|
15
|
+
options[:api_key] = api_key
|
16
|
+
end
|
17
|
+
|
18
|
+
opts.on("--user-id [USER_ID]" ,"Specify the ID of the user to be archived") do |user_id|
|
19
|
+
options[:user_id] = user_id
|
20
|
+
end
|
21
|
+
|
22
|
+
opts.on("--delay [SECONDS]" ,"Delay (in seconds) between two requests (0.2 by default, since Google set a 5 requests/second/user limit)") do |delay|
|
23
|
+
options[:delay] = delay
|
24
|
+
end
|
25
|
+
|
26
|
+
opts.on("--output-path [OUTPUT_PATH]" ,"Output path (the current directory by default)") do |output_path|
|
27
|
+
options[:output_path] = output_path
|
28
|
+
end
|
29
|
+
|
30
|
+
opts.on("--quiet", "Silent mode") do
|
31
|
+
options[:quiet] = true
|
32
|
+
end
|
33
|
+
|
34
|
+
opts.on("--exclude-posts", "Don't archive posts") do
|
35
|
+
options[:exclude_posts] = true
|
36
|
+
end
|
37
|
+
|
38
|
+
opts.on("--exclude-attachments", "Don't archive attachments") do
|
39
|
+
options[:exclude_attachments] = true
|
40
|
+
end
|
41
|
+
|
42
|
+
opts.on("--exclude-replies", "Don't archive replies") do
|
43
|
+
options[:exclude_replies] = true
|
44
|
+
end
|
45
|
+
|
46
|
+
opts.on("--exclude-plusoners", "Don't archive plusoners") do
|
47
|
+
options[:exclude_plusoners] = true
|
48
|
+
end
|
49
|
+
|
50
|
+
opts.on("--exclude-resharers", "Don't archive resharers") do
|
51
|
+
options[:exclude_resharers] = true
|
52
|
+
end
|
53
|
+
|
54
|
+
opts.on("--version", "Display current version") do
|
55
|
+
puts "google_plus_archiver #{GooglePlusArchiver::VERSION}"
|
56
|
+
exit 0
|
57
|
+
end
|
58
|
+
|
59
|
+
end.parse!
|
60
|
+
|
61
|
+
if not options[:api_key] or not options[:user_id]
|
62
|
+
puts "You must specify both the user ID (-u) and your Google API key (-a)."
|
63
|
+
exit 0
|
64
|
+
end
|
65
|
+
|
66
|
+
GooglePlusArchiver::register_client(options[:api_key])
|
67
|
+
if GooglePlusArchiver::client_registered?
|
68
|
+
GooglePlusArchiver::archive_user(options)
|
69
|
+
end
|
@@ -0,0 +1,282 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'net/http'
|
3
|
+
require 'tempfile'
|
4
|
+
require 'tmpdir'
|
5
|
+
require 'zlib'
|
6
|
+
|
7
|
+
require 'google/api_client'
|
8
|
+
|
9
|
+
require 'archive/tar/minitar'
|
10
|
+
include Archive::Tar
|
11
|
+
|
12
|
+
module GooglePlusArchiver
|
13
|
+
|
14
|
+
def self.api_key
|
15
|
+
@@api_key
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.api_key=(api_key)
|
19
|
+
@@api_key = @@client.key = api_key
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.request_num
|
23
|
+
@@request_num
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.register_client(api_key)
|
27
|
+
@@client = Google::APIClient.new
|
28
|
+
@@api_key = @@client.key = api_key
|
29
|
+
@@request_num = 0
|
30
|
+
begin
|
31
|
+
@@plus = @@client.discovered_api('plus')
|
32
|
+
rescue
|
33
|
+
puts "Invalid Google API key."
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.client_registered?
|
38
|
+
defined? @@plus
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.archive_user(params)
|
42
|
+
begin
|
43
|
+
raise "Unregistered client." unless client_registered?
|
44
|
+
rescue => e
|
45
|
+
puts e.message
|
46
|
+
return
|
47
|
+
end
|
48
|
+
|
49
|
+
user_id, delay, output_path, quiet =
|
50
|
+
(params[:user_id]),
|
51
|
+
(params[:delay] or 0.2),
|
52
|
+
(params[:output_path] or FileUtils.pwd),
|
53
|
+
(params[:quiet])
|
54
|
+
|
55
|
+
Dir.mktmpdir do |tmp_dir|
|
56
|
+
|
57
|
+
begin
|
58
|
+
|
59
|
+
#>> profile
|
60
|
+
puts "##{@@request_num+=1} Fetching people.get ..." unless quiet
|
61
|
+
response = @@client.execute(
|
62
|
+
:api_method => @@plus.people.get,
|
63
|
+
:parameters => {
|
64
|
+
'collection' => 'public',
|
65
|
+
'userId' => user_id
|
66
|
+
},
|
67
|
+
:authenticated => false
|
68
|
+
)
|
69
|
+
|
70
|
+
#<< profile
|
71
|
+
File.open("#{tmp_dir}/profile.json", "w") do |f|
|
72
|
+
f.puts response.body
|
73
|
+
end
|
74
|
+
|
75
|
+
user_display_name = JSON.parse(response.body)['displayName']
|
76
|
+
|
77
|
+
#>> posts
|
78
|
+
if not params[:exclude_posts]
|
79
|
+
next_page_token = nil
|
80
|
+
page_num = 0
|
81
|
+
loop do
|
82
|
+
puts "##{@@request_num+=1} Fetching activities.list: page[#{page_num}] ..." unless quiet
|
83
|
+
response = @@client.execute(
|
84
|
+
:api_method => @@plus.activities.list,
|
85
|
+
:parameters => {
|
86
|
+
'collection' => 'public',
|
87
|
+
'userId' => user_id,
|
88
|
+
'maxResults' => '100',
|
89
|
+
'pageToken' => next_page_token
|
90
|
+
},
|
91
|
+
:authenticated => false
|
92
|
+
)
|
93
|
+
activities = JSON.parse(response.body)
|
94
|
+
next_page_token = activities['nextPageToken']
|
95
|
+
|
96
|
+
#<< posts
|
97
|
+
File.open("#{tmp_dir}/posts[#{page_num}].json", "w") do |f|
|
98
|
+
f.puts response.body
|
99
|
+
end
|
100
|
+
|
101
|
+
activities['items'].each do |item|
|
102
|
+
activity_id = item['id']
|
103
|
+
|
104
|
+
puts "##{@@request_num} Fetching activities.get: #{activity_id}" unless quiet
|
105
|
+
|
106
|
+
#<< post
|
107
|
+
File.open("#{tmp_dir}/#{activity_id}.json", "w") do |f|
|
108
|
+
f.puts item.to_json
|
109
|
+
end
|
110
|
+
|
111
|
+
#>> attachments
|
112
|
+
if not params[:exclude_attachments] and item['object']['attachments']
|
113
|
+
item['object']['attachments'].each do |attachment|
|
114
|
+
image = (attachment['fullImage'] or attachment['image'])
|
115
|
+
if image
|
116
|
+
puts "##{@@request_num} Fetching attachment: #{image['url']} ..." unless quiet
|
117
|
+
uri = URI.parse(URI.escape("#{image['url']}"))
|
118
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
119
|
+
if http.port == 443
|
120
|
+
http.use_ssl = true
|
121
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
122
|
+
end
|
123
|
+
data = http.get(uri.request_uri)
|
124
|
+
image_ext = uri.request_uri.split("/")[-1].split(".")[-1]
|
125
|
+
image_ext = nil if image_ext.length > 4
|
126
|
+
|
127
|
+
#<< attachment
|
128
|
+
File.open("#{tmp_dir}/#{activity_id}_#{attachment['id']}#{image_ext ? ".#{image_ext}" : ""}", "w").puts data.body
|
129
|
+
end
|
130
|
+
|
131
|
+
thumbnails = attachment['thumbnails']
|
132
|
+
if thumbnails
|
133
|
+
thumbnails.each_index do |index|
|
134
|
+
thumbnail = thumbnails[index]
|
135
|
+
image = thumbnail['image']
|
136
|
+
puts "##{@@request_num} Fetching attachment(thumbnail): #{image['url']} ..." unless quiet
|
137
|
+
uri = URI.parse(URI.escape("#{image['url']}"))
|
138
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
139
|
+
if http.port == 443
|
140
|
+
http.use_ssl = true
|
141
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
142
|
+
end
|
143
|
+
data = http.get(uri.request_uri)
|
144
|
+
image_ext = uri.request_uri.split("/")[-1].split(".")[-1]
|
145
|
+
image_ext = nil if image_ext.length > 4
|
146
|
+
|
147
|
+
#<< attachment
|
148
|
+
File.open("#{tmp_dir}/#{activity_id}_#{attachment['id']}_#{index.to_s}#{image_ext ? ".#{image_ext}" : ""}", "w").puts data.body
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
#>> replies
|
155
|
+
if not params[:exclude_replies]
|
156
|
+
replies_next_page_token = nil
|
157
|
+
replies_page_num = 0
|
158
|
+
loop do
|
159
|
+
puts "##{@@request_num+=1} Fetching comments.list: page[#{replies_page_num}] ..." unless quiet
|
160
|
+
response = @@client.execute(
|
161
|
+
:api_method => @@plus.comments.list,
|
162
|
+
:parameters => {
|
163
|
+
'activityId' => activity_id,
|
164
|
+
'maxResults' => '500',
|
165
|
+
'pageToken' => replies_next_page_token
|
166
|
+
},
|
167
|
+
:authenticated => false
|
168
|
+
)
|
169
|
+
replies_next_page_token = JSON.parse(response.body)['nextPageToken']
|
170
|
+
|
171
|
+
#<< replies
|
172
|
+
File.open("#{tmp_dir}/#{activity_id}_replies#{replies_page_num == 0 && !replies_next_page_token ? "" : "[#{replies_page_num}]"}.json", "w") do |f|
|
173
|
+
f.puts response.body
|
174
|
+
end
|
175
|
+
|
176
|
+
break unless replies_next_page_token
|
177
|
+
replies_page_num += 1
|
178
|
+
sleep delay
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
#>> plusoners
|
183
|
+
if not params[:exclude_plusoners]
|
184
|
+
plusoners_next_page_token = nil
|
185
|
+
plusoners_page_num = 0
|
186
|
+
loop do
|
187
|
+
puts "##{@@request_num+=1} Fetching people.listByActivity(plusoners): page[#{plusoners_page_num}] ..." unless quiet
|
188
|
+
response = @@client.execute(
|
189
|
+
:api_method => @@plus.people.list_by_activity,
|
190
|
+
:parameters => {
|
191
|
+
'activityId' => activity_id,
|
192
|
+
'collection' => 'plusoners',
|
193
|
+
'maxResults' => '100',
|
194
|
+
'pageToken' => plusoners_next_page_token
|
195
|
+
},
|
196
|
+
:authenticated => false
|
197
|
+
)
|
198
|
+
plusoners_next_page_token = JSON.parse(response.body)['nextPageToken']
|
199
|
+
|
200
|
+
#<< plusoners
|
201
|
+
File.open("#{tmp_dir}/#{activity_id}_plusoners#{plusoners_page_num == 0 && !plusoners_next_page_token ? "" : "[#{plusoners_page_num}]"}.json", "w") do |f|
|
202
|
+
f.puts response.body
|
203
|
+
end
|
204
|
+
|
205
|
+
break unless plusoners_next_page_token
|
206
|
+
plusoners_page_num += 1
|
207
|
+
sleep delay
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
#>> resharers
|
212
|
+
if not params[:exclude_resharers]
|
213
|
+
resharers_next_page_token = nil
|
214
|
+
resharers_page_num = 0
|
215
|
+
loop do
|
216
|
+
puts "##{@@request_num+=1} Fetching people.listByActivity(resharers): page[#{resharers_page_num}] ..." unless quiet
|
217
|
+
response = @@client.execute(
|
218
|
+
:api_method => @@plus.people.list_by_activity,
|
219
|
+
:parameters => {
|
220
|
+
'activityId' => activity_id,
|
221
|
+
'collection' => 'resharers',
|
222
|
+
'maxResults' => '100',
|
223
|
+
'pageToken' => resharers_next_page_token
|
224
|
+
},
|
225
|
+
:authenticated => false
|
226
|
+
)
|
227
|
+
resharers_next_page_token = JSON.parse(response.body)['nextPageToken']
|
228
|
+
|
229
|
+
#<< resharers
|
230
|
+
File.open("#{tmp_dir}/#{activity_id}_resharers#{replies_page_num == 0 && !resharers_next_page_token ? "" : "[#{resharers_page_num}]"}.json", "w") do |f|
|
231
|
+
f.puts response.body
|
232
|
+
end
|
233
|
+
|
234
|
+
break unless resharers_next_page_token
|
235
|
+
resharers_page_num += 1
|
236
|
+
sleep delay
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
end
|
241
|
+
|
242
|
+
break unless next_page_token
|
243
|
+
page_num += 1
|
244
|
+
sleep delay
|
245
|
+
end
|
246
|
+
|
247
|
+
end
|
248
|
+
|
249
|
+
rescue Exception => e
|
250
|
+
puts e.message
|
251
|
+
puts "Archiving interrupted due to unexpected errors."
|
252
|
+
|
253
|
+
ensure
|
254
|
+
# Archive all the files
|
255
|
+
archive_time = "#{Time.now.to_s[0..9]}-#{Time.now.to_s[11..-7]}#{Time.now.to_s[-5..-1]}"
|
256
|
+
archive_filename = "#{output_path}/#{user_display_name}_#{archive_time}.tar.gz"
|
257
|
+
FileUtils.cd(tmp_dir) do
|
258
|
+
|
259
|
+
Tempfile.open("#{user_id}") do |tar|
|
260
|
+
files = []
|
261
|
+
Find.find("./") do |path|
|
262
|
+
files << File.basename(path) unless File.basename(path) == '.'
|
263
|
+
end
|
264
|
+
Minitar.pack(files, tar)
|
265
|
+
|
266
|
+
Zlib::GzipWriter.open(archive_filename) do |gz|
|
267
|
+
gz.mtime = File.mtime(tar.path)
|
268
|
+
gz.orig_name = tar.path
|
269
|
+
gz.write IO.binread(tar.path)
|
270
|
+
end
|
271
|
+
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|
275
|
+
|
276
|
+
end
|
277
|
+
|
278
|
+
end
|
279
|
+
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: google_plus_archiver
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Mort Yao
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-12-18 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: google-api-client
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0.5'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0.5'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: archive-tar-minitar
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0.5'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0.5'
|
46
|
+
description: google_plus_archiver is a simple command-line tool to archive Google+
|
47
|
+
profiles and public streams.
|
48
|
+
email: mort.yao@gmail.com
|
49
|
+
executables:
|
50
|
+
- gplus-get
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- bin/gplus-get
|
55
|
+
- lib/google_plus_archiver.rb
|
56
|
+
- lib/google_plus_archiver/version.rb
|
57
|
+
homepage: https://github.com/soimort/google_plus_archiver
|
58
|
+
licenses:
|
59
|
+
- MIT
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
require_paths:
|
63
|
+
- lib
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
requirements: []
|
77
|
+
rubyforge_project:
|
78
|
+
rubygems_version: 1.8.24
|
79
|
+
signing_key:
|
80
|
+
specification_version: 3
|
81
|
+
summary: A simple command-line tool to archive Google+ profiles.
|
82
|
+
test_files: []
|