faceoff 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2010-05-11
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
@@ -0,0 +1,13 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/faceoff
6
+ lib/faceoff.rb
7
+ lib/faceoff/album.rb
8
+ lib/faceoff/note.rb
9
+ lib/faceoff/pagelet.rb
10
+ lib/faceoff/photo.rb
11
+ lib/faceoff/user.rb
12
+ lib/faceoff/video.rb
13
+ test/test_faceoff.rb
@@ -0,0 +1,80 @@
1
+ = faceoff
2
+
3
+ * http://github.com/yaksnrainbows/faceoff
4
+
5
+ == DESCRIPTION:
6
+
7
+ A scraper to backup your facebook account (albums, photos, notes, users, video).
8
+ Why a scraper instead of using facebook apps? Logging in as a user gives you
9
+ access to more data and it's harder to block.
10
+
11
+
12
+ == SYNOPSIS:
13
+
14
+ Using the gem lib:
15
+
16
+ require 'faceoff'
17
+
18
+ f = Faceoff.login "email@example.com", "password"
19
+
20
+ f.photos_of_me{|p| p.save!}
21
+ # Saves to ./Photos\ of\ me/
22
+
23
+ f.albums[0].name
24
+ #=> "Album title"
25
+
26
+ f.albums[0].photos(0..10)
27
+ #=> [<#Photo...>,<#Photo...>, ...]
28
+
29
+ f.albums[0].photos[0].url
30
+ #=> "http://photo/url.jpg"
31
+
32
+ f.notes.first.title
33
+ #=> "Title of note"
34
+
35
+
36
+ Using the gem bin:
37
+
38
+ faceoff --all -d path/to/dir
39
+ faceoff --notes --albums '1..4'
40
+ faceoff email@example.com password -A
41
+
42
+ For more information see:
43
+
44
+ faceoff -h
45
+
46
+ == REQUIREMENTS:
47
+
48
+ * mechanize
49
+ * highline
50
+ * json
51
+ * vpim
52
+
53
+ == INSTALL:
54
+
55
+ * sudo gem install faceoff
56
+
57
+ == LICENSE:
58
+
59
+ (The MIT License)
60
+
61
+ Copyright (c) 2010 FIX
62
+
63
+ Permission is hereby granted, free of charge, to any person obtaining
64
+ a copy of this software and associated documentation files (the
65
+ 'Software'), to deal in the Software without restriction, including
66
+ without limitation the rights to use, copy, modify, merge, publish,
67
+ distribute, sublicense, and/or sell copies of the Software, and to
68
+ permit persons to whom the Software is furnished to do so, subject to
69
+ the following conditions:
70
+
71
+ The above copyright notice and this permission notice shall be
72
+ included in all copies or substantial portions of the Software.
73
+
74
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
75
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
76
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
77
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
78
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
79
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
80
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,17 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ Hoe.plugin :isolate
7
+
8
+ Hoe.spec 'faceoff' do
9
+ developer('Jeremie Castagna', 'yaksnrainbows@gmail.com')
10
+
11
+ self.extra_deps << ['highline', '>= 1.5.1']
12
+ self.extra_deps << ['mechanize', '>= 1.0.0']
13
+ self.extra_deps << ['json', '>= 1.2.0']
14
+ self.extra_deps << ['vpim', '0.695']
15
+ end
16
+
17
+ # vim: syntax=ruby
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+
5
+ begin
6
+ require 'faceoff'
7
+ rescue LoadError
8
+ $: << 'lib'
9
+ require 'lib/faceoff'
10
+ end
11
+
12
+ Faceoff.run ARGV
@@ -0,0 +1,390 @@
1
+ # Odd hack that makes the vpim-mechanize conflict disappear.
2
+ Vpim = Module.new
3
+
4
+ # Hack to fix old ruby version support of Vpim
5
+ class Array
6
+ def to_str
7
+ self.to_s
8
+ end
9
+ end
10
+
11
+
12
+ require 'rubygems'
13
+ require 'mechanize'
14
+ require 'highline'
15
+
16
+ require 'vpim/vcard'
17
+
18
+ require 'optparse'
19
+ require 'json'
20
+ require 'time'
21
+ require 'fileutils'
22
+
23
+ class Faceoff
24
+
25
+ require 'faceoff/pagelet'
26
+ require 'faceoff/note'
27
+ require 'faceoff/photo'
28
+ require 'faceoff/album'
29
+ require 'faceoff/video'
30
+ require 'faceoff/user'
31
+
32
+
33
+ # Version of the gem.
34
+ VERSION = '1.0.0'
35
+
36
+ # Command line actions available.
37
+ ACTIONS = %w{albums friends notes photos_of_me profile_pictures videos_of_me}
38
+
39
+ # Default directory to save data to.
40
+ DEFAULT_DIR = "faceoff-#{Time.now.to_i}"
41
+
42
+
43
+ ##
44
+ # Create a new Faceoff instance and login.
45
+
46
+ def self.login email, password
47
+ inst = self.new email, password
48
+ inst.login
49
+ inst if inst.logged_in?
50
+ end
51
+
52
+
53
+ ##
54
+ # Parse ARGVs
55
+
56
+ def self.parse_args argv
57
+ options = {}
58
+
59
+ opts = OptionParser.new do |opt|
60
+ opt.program_name = File.basename $0
61
+ opt.version = Faceoff::VERSION
62
+ opt.release = nil
63
+
64
+ opt.banner = <<-STR
65
+ Faceoff is a facebook scraper that allows you to download and backup
66
+ your content in a reusable format.
67
+
68
+ Usage:
69
+ #{opt.program_name} -h/--help
70
+ #{opt.program_name} -v/--version
71
+ #{opt.program_name} [facebook_email [password]] [options...]
72
+
73
+ Examples:
74
+ #{opt.program_name} example@yahoo.com --all
75
+ #{opt.program_name} example@yahoo.com --albums '2..3'
76
+ #{opt.program_name} example@yahoo.com --friends --profilepics 0
77
+
78
+ Options:
79
+ STR
80
+
81
+ opt.on('-A', '--all', 'Retrieve all facebook data') do
82
+ ACTIONS.each{|a| options[a] = true }
83
+ end
84
+
85
+ opt.on('-a', '--albums [RANGE]', 'Retrieve albums') do |range|
86
+ options['albums'] = parse_range range
87
+ end
88
+
89
+ opt.on('-f', '--friends [RANGE]', 'Retrieve contacts') do |range|
90
+ options['friends'] = parse_range range
91
+ end
92
+
93
+ opt.on('-n', '--notes [RANGE]', 'Retrieve notes') do |range|
94
+ options['notes'] = parse_range range
95
+ end
96
+
97
+ opt.on('-p', '--photosofme [RANGE]', 'Retrieve photos of me') do |range|
98
+ options['photos_of_me'] = parse_range range
99
+ end
100
+
101
+ opt.on('-P', '--profilepics [RANGE]', 'Retrieve profile pics') do |range|
102
+ options['profile_pictures'] = parse_range range
103
+ end
104
+
105
+ opt.on('-V', '--videosofme [RANGE]', 'Retrieve videos of me') do |range|
106
+ options['videos_of_me'] = parse_range range
107
+ end
108
+
109
+ opt.on('-d', '--directory PATH', 'Directory to save to') do |path|
110
+ options['dir'] = path
111
+ end
112
+
113
+ opt.on('-z', '--gzip [NAME]', 'Zip content when done') do |name|
114
+ options['gzip'] = name || true
115
+ end
116
+ end
117
+
118
+ opts.parse! argv
119
+
120
+ options['dir'] ||= "."
121
+
122
+ options['email'], options['password'] = argv
123
+
124
+ options
125
+ end
126
+
127
+
128
+ ##
129
+ # Takes a range or number as a string and returns a range or integer.
130
+ # Returns true if no range or int value is found.
131
+
132
+ def self.parse_range str
133
+ str = str.to_s.strip
134
+
135
+ return Range.new($1.to_i, $2.to_i) if str =~ /^(\d+)\.\.(\d+)$/
136
+ return str.to_i if str == str.to_i.to_s
137
+
138
+ true
139
+ end
140
+
141
+
142
+ ##
143
+ # Run from the command line.
144
+
145
+ def self.run argv
146
+ options = parse_args argv
147
+
148
+ $stdin.sync
149
+ input = HighLine.new $stdin
150
+
151
+ faceoff = nil
152
+
153
+ until faceoff do
154
+ email = options['email'] || input.ask("Facebook Email: ")
155
+ password = options['password'] ||
156
+ input.ask("Facebook Password: "){|i| i.echo = false}
157
+
158
+ faceoff = login email, password
159
+ options['password'] = nil unless faceoff
160
+ end
161
+
162
+ ACTIONS.each do |action|
163
+ next unless options[action]
164
+ dir = File.join options['dir'], action.capitalize.gsub("_", " ")
165
+
166
+ faceoff.send(action, options[action]) do |item|
167
+ name = item.name rescue item.fid
168
+ puts "Saving #{action} '#{name}'"
169
+ item.save! dir
170
+ end
171
+ end
172
+ end
173
+
174
+
175
+ ##
176
+ # Download a photo from a url. Pass a block to catch the data.
177
+
178
+ def self.download url
179
+ uri = URI.parse url
180
+
181
+ resp = Net::HTTP.start(uri.host) do |http|
182
+ http.get uri.path
183
+ end
184
+
185
+ resp.body
186
+ end
187
+
188
+
189
+ ##
190
+ # Safely save a file; rename it if name exists.
191
+
192
+ def self.safe_save filename, &block
193
+ dir = File.dirname(filename)
194
+
195
+ FileUtils.mkdir_p dir
196
+ raise "Invalid directory #{dir}" unless File.directory? dir
197
+
198
+ test_filename = filename
199
+
200
+ i = 0
201
+ while File.file?(test_filename)
202
+ i = i.next
203
+ ext = File.extname filename
204
+
205
+ test_filename = File.join File.dirname(filename),
206
+ "#{File.basename(filename, ext)} (#{i})#{ext}"
207
+ end
208
+
209
+ filename = test_filename
210
+
211
+ File.open(filename, "w+") do |f|
212
+ block.call(f) if block_given?
213
+ end
214
+ end
215
+
216
+
217
+ # Mechanize agent
218
+ attr_accessor :agent
219
+
220
+ # User email
221
+ attr_accessor :email
222
+
223
+ # User password
224
+ attr_accessor :password
225
+
226
+ # Facebook profile id
227
+ attr_writer :profile_id
228
+
229
+
230
+ ##
231
+ # Instantiate with user information.
232
+
233
+ def initialize email, password
234
+ @email = email
235
+ @password = password
236
+ @profile_id = nil
237
+
238
+ @agent = Mechanize.new
239
+ @agent.user_agent_alias = 'Mac Safari'
240
+ @agent.redirect_ok = true
241
+
242
+ @albums = nil
243
+ @friends = nil
244
+ @notes = nil
245
+ @photos_of_me = nil
246
+ @profile_pics = nil
247
+ @videos_of_me = nil
248
+ end
249
+
250
+
251
+ ##
252
+ # Returns an options hash based on the type of input:
253
+ # bracket_for 5
254
+ # # => {:limit => 5}
255
+ #
256
+ # bracket_for 3..5
257
+ # # => {:limit => 3, :start => 3}
258
+ #
259
+ # bracket_for true
260
+ # # => {}
261
+
262
+ def bracket_for obj
263
+ case obj
264
+ when Fixnum then {:limit => obj}
265
+ when Range then {:limit => obj.entries.length, :start => obj.first}
266
+ else {}
267
+ end
268
+ end
269
+
270
+
271
+ ##
272
+ # Check if we're logged into facebook.
273
+
274
+ def logged_in?
275
+ url = URI.parse("http://www.facebook.com")
276
+ #puts @agent.cookie_jar.cookies(url).inspect
277
+ @agent.cookie_jar.cookies(url).select{|c| c.name == "c_user" }.first
278
+ end
279
+
280
+
281
+ ##
282
+ # Login to facebook.
283
+
284
+ def login
285
+ page = @agent.get("http://www.facebook.com")
286
+ form = page.form_with \
287
+ :action => 'https://login.facebook.com/login.php?login_attempt=1'
288
+
289
+ return unless form
290
+
291
+ form.email = @email
292
+ form.pass = @password
293
+
294
+ page = form.submit
295
+ logged_in?
296
+ end
297
+
298
+
299
+ ##
300
+ # Returns an array of photo albums.
301
+
302
+ def albums reload=false, &block
303
+ return @albums if @albums && !reload
304
+ @albums = Album.retrieve_all self, bracket_for(reload), &block
305
+ end
306
+
307
+
308
+ ##
309
+ # Returns the logged in user's User object.
310
+
311
+ def user reload=false
312
+ return @user if @user && !reload
313
+ @user = User.retrieve self, profile_id
314
+ end
315
+
316
+
317
+ ##
318
+ # Returns an array of friends.
319
+
320
+ def friends reload=false, &block
321
+ if @friends && !reload
322
+ @friends.each &block if block_given?
323
+ return @friends
324
+ end
325
+
326
+ @friends = User.retrieve_all self, bracket_for(reload), &block
327
+ end
328
+
329
+
330
+ ##
331
+ # Returns an array of notes.
332
+
333
+ def notes reload=false, &block
334
+ if @notes && !reload
335
+ @notes.each &block if block_given?
336
+ return @notes
337
+ end
338
+
339
+ @notes = Note.retrieve_all self, bracket_for(reload), &block
340
+ end
341
+
342
+
343
+ ##
344
+ # Returns an array of photos of me.
345
+
346
+ def photos_of_me reload=false, &block
347
+ if @photos_of_me && !reload
348
+ @photos_of_me.each &block if block_given?
349
+ return @photos_of_me
350
+ end
351
+
352
+ @photos_of_me = Photo.photos_of_me self, bracket_for(reload), &block
353
+ end
354
+
355
+
356
+ ##
357
+ # Returns an array of profile pictures photos.
358
+
359
+ def profile_pictures reload=false, &block
360
+ if @profile_pics && !reload
361
+ @profile_pics.each &block if block_given?
362
+ return @profile_pics
363
+ end
364
+
365
+ @profile_pics =
366
+ Photo.photos_of_album self, :profile, bracket_for(reload), &block
367
+ end
368
+
369
+
370
+ ##
371
+ # Returns the facebook profile id. Fetches it from the page if not set.
372
+
373
+ def profile_id reload=false
374
+ return @profile_id if @profile_id && !reload
375
+ @profile_id = $1 if @agent.current_page.body =~ /\\"user\\":(\d+)/m
376
+ end
377
+
378
+
379
+ ##
380
+ # Returns an array of videos of me.
381
+
382
+ def videos_of_me reload=false, &block
383
+ if @videos_of_me && !reload
384
+ @videos_of_me.each &block if block_given?
385
+ return @videos_of_me
386
+ end
387
+
388
+ @videos_of_me = Video.videos_of_me self, bracket_for(reload), &block
389
+ end
390
+ end
@@ -0,0 +1,83 @@
1
+ class Faceoff
2
+
3
+ class Album
4
+
5
+
6
+ ##
7
+ # Returns an array of albums for the current user. Pass the :user_id option
8
+ # to override the logged in user.
9
+
10
+ def self.retrieve_all faceoff, options={}, &block
11
+ albums = []
12
+ limit = options[:limit]
13
+ start = options[:start] || 0
14
+
15
+ agent = faceoff.agent
16
+ user_id = options[:user_id] || faceoff.profile_id
17
+
18
+ page = agent.get "/photos.php?id=#{user_id}"
19
+
20
+ xpath = "table[@class='uiGrid fbPhotosGrid']/tbody/"+
21
+ "/div[@class='pls photoDetails']/a"
22
+
23
+ nodes = page.search(xpath)
24
+
25
+ limit ||= nodes.length
26
+
27
+ nodes[start, limit].each do |node|
28
+ album_id = $1 if node['href'] =~ /aid=(\d+)/
29
+ album_name = node.text
30
+
31
+ albums << new(faceoff, album_id, album_name)
32
+
33
+ yield albums.last if block_given?
34
+ end
35
+
36
+ albums
37
+ end
38
+
39
+
40
+ # Facebook album id.
41
+ attr_accessor :fid
42
+
43
+ # Name of the album.
44
+ attr_accessor :name
45
+
46
+
47
+ def initialize faceoff, id, name
48
+ @faceoff = faceoff
49
+ @agent = faceoff.agent
50
+
51
+ @fid = id
52
+ @name = name
53
+ @photos = nil
54
+ end
55
+
56
+
57
+ ##
58
+ # Returns an array of photos.
59
+
60
+ def photos reload=false, &block
61
+ if @photos && !reload
62
+ @photos.each &block if block_given?
63
+ return @photos
64
+ end
65
+
66
+ options = {}
67
+ options[:limit] = reload if Fixnum === reload
68
+
69
+ @photos = Photo.photos_of_album @faceoff, @fid, options, &block
70
+ end
71
+
72
+
73
+ ##
74
+ # Save the album to the provided directory.
75
+
76
+ def save! target="./Albums"
77
+ dirname = File.join target, @name
78
+ FileUtils.mkdir_p dirname
79
+
80
+ self.photos{|p| p.save! dirname}
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,100 @@
1
+ class Faceoff
2
+
3
+ class Note
4
+
5
+ CONTENT_CLASSES = "note_content text_align_ltr direction_ltr clearfix"
6
+
7
+ ##
8
+ # Returns an array of notes.
9
+
10
+ def self.retrieve_all faceoff, options={}, &block
11
+ agent = faceoff.agent
12
+ limit = options[:limit]
13
+ start = options[:start] || 0
14
+
15
+ note_count = limit
16
+ index = start
17
+
18
+ notes = []
19
+ page = agent.current_page
20
+
21
+ while !limit || notes.length < limit && page.link_with(:text => "Next") do
22
+ notes = notes.concat notes_at_index(faceoff, index, &block)
23
+ page = agent.current_page
24
+ index = start + notes.length
25
+
26
+ last_link = page.link_with(:text => 'Last')
27
+ limit ||= $1.to_i + 10 if
28
+ last_link && last_link.href =~ %r{/notes.php\?id=\d+&start=(\d+)}
29
+ limit ||= 10
30
+ end
31
+
32
+ notes[0..(limit-1)]
33
+ end
34
+
35
+
36
+ ##
37
+ # Get notes on one page starting at a note index.
38
+
39
+ def self.notes_at_index faceoff, index, &block
40
+ agent = faceoff.agent
41
+ page = agent.get "/notes.php?id=#{faceoff.profile_id}&start=#{index}"
42
+
43
+ notes = []
44
+
45
+ page.search("div[@class='note_body']").each do |div|
46
+ title_link = div.search("div[@class='note_title']/a").first
47
+ id = $1 if title_link['href'] =~ %r{/note\.php\?note_id=(\d+)}
48
+ title = title_link.text
49
+ date = Time.parse div.search("div[@class='byline']").first.text
50
+ body = div.search("div[@class='#{CONTENT_CLASSES}']/div").first.text
51
+
52
+ notes << new(id, title, body, date)
53
+ yield notes.last if block_given?
54
+ end
55
+
56
+ notes
57
+ end
58
+
59
+
60
+ # Facebook id of the note.
61
+ attr_accessor :fid
62
+
63
+ # Note title.
64
+ attr_accessor :title, :name
65
+
66
+ # Note contents.
67
+ attr_accessor :body
68
+
69
+ # Date note was created at.
70
+ attr_accessor :date
71
+
72
+
73
+ def initialize id, title, body, date=nil
74
+ @fid = id
75
+ @title = @name = title
76
+ @body = body
77
+ @date = date || Time.now
78
+ end
79
+
80
+
81
+ ##
82
+ # Saves the note to the provided file path.
83
+
84
+ def save! target="./Notes"
85
+ filename = File.join(target, "#{@title}.txt")
86
+
87
+ Faceoff.safe_save(filename) do |file|
88
+ file.write self.to_s
89
+ end
90
+ end
91
+
92
+
93
+ ##
94
+ # Returns the object as a string with title, date, and body.
95
+
96
+ def to_s
97
+ "#{@title}\n#{@date.to_s}\n\n#{@body}"
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,44 @@
1
+ class Faceoff
2
+
3
+ ##
4
+ # The pagelet class is used to parse out facebook javascript dynamic content
5
+ # on a given html page.
6
+
7
+ class Pagelet
8
+
9
+ ##
10
+ # Parses an html string and returns a hash of
11
+ # Nokogiri::HTML::Document objects, indexed by page area:
12
+ # Pagelet.parse html
13
+ # #=> {:profile_photo => <#OBJ>, :top_bar => <#OBJ>...}
14
+ #
15
+ # Pagelet.parse html, :profile_photo
16
+ # #=> <#OBJ>
17
+
18
+ def self.parse html, type=nil
19
+ pagelet = nil
20
+
21
+ matches = html.scan regex_for(type)
22
+
23
+ matches.each do |name, html|
24
+ html = JSON.parse("[\"#{html}\"]").first
25
+ html_doc = Nokogiri::HTML.parse html
26
+ return html_doc if type
27
+
28
+ pagelet ||= {}
29
+ pagelet[name.to_sym] = html_doc
30
+ end
31
+
32
+ pagelet
33
+ end
34
+
35
+
36
+ ##
37
+ # Returns a regex to retrieve the given pagelet.
38
+
39
+ def self.regex_for name
40
+ name ||= "\\w+"
41
+ %r{<script>.*"pagelet_(#{name})":"(.*)"\},"page_cache":.*\}\);</script>}
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,128 @@
1
+ class Faceoff
2
+
3
+ class Photo
4
+
5
+
6
+ ##
7
+ # Retrieve all 'Photos of Me'.
8
+
9
+ def self.photos_of_me faceoff, options={}, &block
10
+ agent = faceoff.agent
11
+ start = options[:start] || 0
12
+
13
+ index = start % 15
14
+
15
+ param = "id=#{faceoff.profile_id}&v=photos&so=#{start}&ref=sgm"
16
+
17
+ page = agent.get "/profile.php?#{param}"
18
+
19
+ photo_id, options[:user_id] =
20
+ page.body.scan(%r{/photo.php\?pid=(\d+)[^\\]+;id=(\d+)})[index]
21
+
22
+ return unless photo_id
23
+
24
+ retrieve_all faceoff,
25
+ photo_id,
26
+ options.merge(:album => "Photos of me"),
27
+ &block
28
+ end
29
+
30
+
31
+ ##
32
+ # Retrieve all photos for a given album id. To retrieve the profile
33
+ # album pass :profile as the album_id.
34
+
35
+ def self.photos_of_album faceoff, album_id, options={}, &block
36
+ agent = faceoff.agent
37
+ start = options[:start] || 0
38
+
39
+ options[:album] = "Profile pictures" if album_id == :profile
40
+
41
+ param = album_id == :profile ? "profile=1" : "aid=#{album_id}"
42
+ param = "#{param}&id=#{faceoff.profile_id}&s=#{start}"
43
+
44
+ page = agent.get "/album.php?#{param}"
45
+ photo_link = page.link_with(:href => %r{/photo.php}).href
46
+ photo_id = $1 if photo_link =~ /pid=(\d+)/
47
+
48
+ return unless photo_id
49
+
50
+ retrieve_all faceoff, photo_id, options, &block
51
+ end
52
+
53
+
54
+ ##
55
+ # Retrieve all photos in an album, starting at a given photo id.
56
+ # Setting the 'global' argument to true will attempt to retrieve all
57
+ # 'Photos of Me'.
58
+
59
+ def self.retrieve_all faceoff, photo_id, options={}, &block
60
+ agent = faceoff.agent
61
+ user_id = options[:user_id]
62
+ limit = options[:limit]
63
+
64
+ param = "pid=#{photo_id}&id=#{user_id || faceoff.profile_id}"
65
+ param = "#{param}&view=global&subj=#{faceoff.profile_id}" if user_id
66
+
67
+ page = agent.get "/photo.php?#{param}"
68
+
69
+ photo_count = ($1.to_i - 1) if page.body =~ /Photo \d+ of (\d+)/m
70
+ photo_count = limit if limit && photo_count > limit
71
+
72
+ photos = []
73
+
74
+ photo_count.times do |i|
75
+ url = page.search("img[@id='myphoto']").first['src']
76
+ photo_id = $1 if page.uri.query =~ %r{pid=(\d+)}
77
+
78
+ caption =
79
+ page.search("div[@class='photocaption_text']").first.text rescue nil
80
+
81
+ photos << new(photo_id, url, :caption => caption,
82
+ :album => options[:album])
83
+
84
+ yield photos.last if block_given?
85
+
86
+ page = page.link_with(:text => 'Next').click
87
+ end
88
+
89
+ photos
90
+ end
91
+
92
+
93
+ # Facebook photo id.
94
+ attr_accessor :fid
95
+
96
+ # Url of the photo.
97
+ attr_accessor :url
98
+
99
+ # Caption of the photo.
100
+ attr_accessor :caption
101
+
102
+ # Album name (context) of the photo.
103
+ attr_accessor :album
104
+
105
+
106
+ def initialize id, url, options={}
107
+ @fid = id
108
+ @url = url
109
+ @caption = options[:caption]
110
+ @album = options[:album] || "Photos"
111
+ end
112
+
113
+
114
+ ##
115
+ # Saves the photo to the provided file path.
116
+
117
+ def save! target=nil
118
+ target ||= File.join ".", @album
119
+ filename = File.join(target, "#{@fid}#{File.extname(@url)}")
120
+
121
+ data = Faceoff.download(@url)
122
+
123
+ Faceoff.safe_save(filename) do |file|
124
+ file.write data
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,216 @@
1
+ class Faceoff
2
+
3
+ class User
4
+
5
+ IM_SERVICES = ['AIM', 'Yahoo', 'Skype', 'Google Talk', 'Windows Live',
6
+ 'Gadu-Gadu', 'QQ', 'ICQ', 'Yahoo Japan', 'NateOn']
7
+
8
+
9
+ ##
10
+ # Retrieve all friends of the given faceoff user.
11
+
12
+ def self.retrieve_all faceoff, options={}
13
+ agent = faceoff.agent
14
+ limit = options[:limit]
15
+ start = options[:start] || 0
16
+
17
+ page = agent.get "/friends/ajax/superfriends.php?"+
18
+ "__a=1&filter=afp&type=afp&value&is_pagelet=false&offset=0"
19
+
20
+ user_ids = JSON.parse $1 if page.body =~ %r{"members":(\[[^\]]+\])}
21
+ limit ||= user_ids.length
22
+
23
+ user_ids[start, limit].map do |uid|
24
+ user = retrieve faceoff, uid
25
+ yield user if block_given?
26
+ user
27
+ end
28
+ end
29
+
30
+
31
+ ##
32
+ # Retrieve a single user based on the user's id or alias:
33
+ # User.retrieve f, 12345
34
+ # User.retrieve f, 'bob.smith'
35
+
36
+ def self.retrieve faceoff, user_id
37
+ agent = faceoff.agent
38
+
39
+ path = if user_id =~ /^\d+$/
40
+ "/profile.php?id=#{user_id}&v=info&ref=sgm"
41
+ else
42
+ "/#{user_id}?v=info"
43
+ end
44
+
45
+ page = agent.get path
46
+
47
+ pagelets = Pagelet.parse page.body
48
+
49
+ name = pagelets[:top_bar].css("h1#profile_name").first.text
50
+ id = $1 if
51
+ pagelets[:top_bar].css("a#top_bar_pic").first['href'] =~
52
+ %r{/profile\.php\?id=(\d+)}
53
+
54
+
55
+ user = User.new id, name
56
+
57
+ details = pagelets[:tab_content]
58
+
59
+ birthday = fattr(details, 'Birthday')[0].strip
60
+ user.birthday = Time.parse birthday if birthday && !birthday.empty?
61
+
62
+ user.emails = fattr details, 'Email'
63
+
64
+ user.phones['mobile'] =
65
+ fattr(details, 'Mobile Number').first.gsub(/[^\da-z]/i, '') rescue nil
66
+
67
+ user.phones['main'] =
68
+ fattr(details, 'Phone').first.gsub(/[^\da-z]/i, '') rescue nil
69
+
70
+
71
+ user.photo = pagelets[:profile_photo].css("img#profile_pic").first['src']
72
+
73
+ user.address = {}
74
+ user.address[:street] = fattr(details, "Address", :href => /&a2=/).first
75
+
76
+ user.address[:city], user.address[:state] =
77
+ fattr(details, "Address", :href => /&c2=/).first.to_s.split(", ")
78
+
79
+ user.address[:state], user.address[:country] =
80
+ [nil, user.address[:state]] if user.address[:state].to_s.length > 2
81
+
82
+ user.address[:zip] = fattr(details, "Address", :href => /&z2=/).first
83
+
84
+
85
+ IM_SERVICES.each do |im|
86
+ im_alias = fattr(details, im).first
87
+ next unless im_alias
88
+ user.aliases[im] = im_alias
89
+ end
90
+
91
+ user.aliases['Facebook'] = $1 if
92
+ fattr(details, 'Facebook Profile').first =~ /([^\/]+)$/
93
+
94
+ user
95
+ end
96
+
97
+
98
+ ##
99
+ # Get a facebook attribute from a given Nokogiri::HTML::Document.
100
+
101
+ def self.fattr doc, attrib, options={}
102
+ resp = []
103
+
104
+ nodes = doc.search("th[@class='label'][text()='#{attrib}:']")
105
+
106
+ return resp if nodes.empty?
107
+
108
+ nodes.first.next.children.each do |node|
109
+ text = node.name == "img" ? node['src'] : node.text.strip
110
+ next if text.empty?
111
+
112
+ if options.empty?
113
+ resp << text
114
+ else
115
+ options.each do |key, matcher|
116
+ resp << text if node[key] =~ matcher
117
+ end
118
+ end
119
+ end
120
+
121
+ resp
122
+ end
123
+
124
+
125
+ # Facebook user id.
126
+ attr_accessor :fid
127
+
128
+ # User's name.
129
+ attr_accessor :name
130
+
131
+ # User's birthday.
132
+ attr_accessor :birthday
133
+
134
+ # Facebook profile image.
135
+ attr_accessor :photo
136
+
137
+ # All emails.
138
+ attr_accessor :emails
139
+
140
+ # Instant message aliases.
141
+ attr_accessor :aliases
142
+
143
+ # Phone numbers.
144
+ attr_accessor :phones
145
+
146
+ # Current address.
147
+ attr_accessor :address
148
+
149
+
150
+ def initialize id, name
151
+ @fid = id
152
+ @name = name
153
+
154
+ @photo = nil
155
+ @phones = {}
156
+ @emails = []
157
+ @aliases = {}
158
+ @address = {}
159
+ end
160
+
161
+
162
+ ##
163
+ # Saves the user as a vcard to the provided file path.
164
+
165
+ def save! target="./Contacts", vcard=nil
166
+ vcard = to_vcard vcard
167
+
168
+ Faceoff.safe_save(File.join(target, "#{@name}.vcf")) do |file|
169
+ file.write vcard
170
+ end
171
+ end
172
+
173
+
174
+ ##
175
+ # Returns a Vpip::Vcard object.
176
+
177
+ def to_vcard vcard=nil
178
+ vcard ||= Vpim::Vcard.create
179
+
180
+ vcard.make do |maker|
181
+
182
+ maker.name{|n| n.fullname = @name }
183
+
184
+ maker.add_field Vpim::DirectoryInfo::Field.create('BDAY',
185
+ @birthday.strftime("%Y-%m-%d"))
186
+
187
+ maker.add_addr do |addr|
188
+ addr.region = address[:state]
189
+ addr.locality = address[:city]
190
+ addr.street = address[:street]
191
+ addr.country = address[:country]
192
+ addr.postalcode = address[:zip]
193
+ end
194
+
195
+ emails.each{|email| maker.add_email email }
196
+
197
+ phones.each do |type, number|
198
+ next unless number
199
+ maker.add_tel(number){|tel| tel.location = [type] }
200
+ end
201
+
202
+ aliases.each do |name, value|
203
+ maker.add_field Vpim::DirectoryInfo::Field.create("X-#{name}", value)
204
+ end
205
+
206
+ maker.add_photo do |photo|
207
+ photo.image = Faceoff.download @photo
208
+ photo.type = File.extname(@photo)[1..-1]
209
+ end if @photo
210
+ end
211
+
212
+ vcard
213
+ end
214
+ end
215
+ end
216
+
@@ -0,0 +1,82 @@
1
+ class Faceoff
2
+
3
+ class Video
4
+
5
+ TITLE_REG = /"video_title", "([^"]+)"/m
6
+ SRC_REG = /"video_src", "([^"]+)"/m
7
+ ID_REG = /"video_id", "([^"]+)"/m
8
+
9
+ ##
10
+ # Retrieve all 'Videos of Me'.
11
+
12
+ def self.retrieve_all faceoff, options={}, &block
13
+ agent = faceoff.agent
14
+ limit = options[:limit]
15
+ start = options[:start] || 0
16
+
17
+ page = agent.get "/video/?of=#{faceoff.profile_id}&s=#{start}"
18
+ summary = page.search("div[@class='summary']").first.text
19
+ video_count = ($2.to_i - 1) if summary =~ /(of|all) (\d+) videos/
20
+ video_count = limit if limit && video_count > limit
21
+
22
+ page = page.link_with(:href => %r{/video.php}).click
23
+
24
+ videos = []
25
+
26
+ video_count.times do |i|
27
+ video_title = URI.decode($1.gsub('+', ' ')) if page.body =~ TITLE_REG
28
+ video_src = URI.decode($1) if page.body =~ SRC_REG
29
+ video_id = URI.decode($1) if page.body =~ ID_REG
30
+
31
+ videos << new(video_id, video_src, video_title)
32
+ yield videos.last if block_given?
33
+
34
+ next_link = page.link_with(:text => 'Next')
35
+ break unless next_link
36
+
37
+ page = next_link.click
38
+ end
39
+
40
+ videos
41
+ end
42
+
43
+
44
+ ##
45
+ # Alias for Video::retrieve_all
46
+
47
+ def self.videos_of_me faceoff, options={}, &block
48
+ retrieve_all faceoff, options, &block
49
+ end
50
+
51
+
52
+ # Facebook video id.
53
+ attr_accessor :fid
54
+
55
+ # Url of the video.
56
+ attr_accessor :url
57
+
58
+ # Name of the video.
59
+ attr_accessor :name
60
+
61
+
62
+ def initialize id, url, name
63
+ @fid = id
64
+ @url = url
65
+ @name = name
66
+ end
67
+
68
+
69
+ ##
70
+ # Saves the video the the provided path.
71
+
72
+ def save! target="./Videos of me"
73
+ filename = File.join(target, "#{@name}#{File.extname(@url)}")
74
+
75
+ data = Faceoff.download(@url)
76
+
77
+ Faceoff.safe_save(filename) do |file|
78
+ file.write data
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,8 @@
1
+ require "test/unit"
2
+ require "faceoff"
3
+
4
+ class TestFaceoff < Test::Unit::TestCase
5
+ def test_sanity
6
+ flunk "write tests or I will kneecap you"
7
+ end
8
+ end
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: faceoff
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 0
8
+ - 0
9
+ version: 1.0.0
10
+ platform: ruby
11
+ authors:
12
+ - Jeremie Castagna
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-08-10 00:00:00 -07:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: highline
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 5
30
+ - 1
31
+ version: 1.5.1
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: mechanize
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 1
43
+ - 0
44
+ - 0
45
+ version: 1.0.0
46
+ type: :runtime
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: json
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 1
57
+ - 2
58
+ - 0
59
+ version: 1.2.0
60
+ type: :runtime
61
+ version_requirements: *id003
62
+ - !ruby/object:Gem::Dependency
63
+ name: vpim
64
+ prerelease: false
65
+ requirement: &id004 !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "="
68
+ - !ruby/object:Gem::Version
69
+ segments:
70
+ - 0
71
+ - 695
72
+ version: "0.695"
73
+ type: :runtime
74
+ version_requirements: *id004
75
+ - !ruby/object:Gem::Dependency
76
+ name: hoe
77
+ prerelease: false
78
+ requirement: &id005 !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ segments:
83
+ - 2
84
+ - 3
85
+ - 3
86
+ version: 2.3.3
87
+ type: :development
88
+ version_requirements: *id005
89
+ description: |-
90
+ A scraper to backup your facebook account (albums, photos, notes, users, video).
91
+ Why a scraper instead of using facebook apps? Logging in as a user gives you
92
+ access to more data and it's harder to block.
93
+ email:
94
+ - yaksnrainbows@gmail.com
95
+ executables:
96
+ - faceoff
97
+ extensions: []
98
+
99
+ extra_rdoc_files:
100
+ - History.txt
101
+ - Manifest.txt
102
+ - README.txt
103
+ files:
104
+ - History.txt
105
+ - Manifest.txt
106
+ - README.txt
107
+ - Rakefile
108
+ - bin/faceoff
109
+ - lib/faceoff.rb
110
+ - lib/faceoff/album.rb
111
+ - lib/faceoff/note.rb
112
+ - lib/faceoff/pagelet.rb
113
+ - lib/faceoff/photo.rb
114
+ - lib/faceoff/user.rb
115
+ - lib/faceoff/video.rb
116
+ - test/test_faceoff.rb
117
+ has_rdoc: true
118
+ homepage: http://github.com/yaksnrainbows/faceoff
119
+ licenses: []
120
+
121
+ post_install_message:
122
+ rdoc_options:
123
+ - --main
124
+ - README.txt
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ segments:
132
+ - 0
133
+ version: "0"
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ segments:
139
+ - 0
140
+ version: "0"
141
+ requirements: []
142
+
143
+ rubyforge_project: faceoff
144
+ rubygems_version: 1.3.6
145
+ signing_key:
146
+ specification_version: 3
147
+ summary: A scraper to backup your facebook account (albums, photos, notes, users, video)
148
+ test_files:
149
+ - test/test_faceoff.rb