faceoff 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2010-05-11
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
@@ -0,0 +1,13 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/faceoff
6
+ lib/faceoff.rb
7
+ lib/faceoff/album.rb
8
+ lib/faceoff/note.rb
9
+ lib/faceoff/pagelet.rb
10
+ lib/faceoff/photo.rb
11
+ lib/faceoff/user.rb
12
+ lib/faceoff/video.rb
13
+ test/test_faceoff.rb
@@ -0,0 +1,80 @@
1
+ = faceoff
2
+
3
+ * http://github.com/yaksnrainbows/faceoff
4
+
5
+ == DESCRIPTION:
6
+
7
+ A scraper to backup your facebook account (albums, photos, notes, users, video).
8
+ Why a scraper instead of using facebook apps? Logging in as a user gives you
9
+ access to more data and it's harder to block.
10
+
11
+
12
+ == SYNOPSIS:
13
+
14
+ Using the gem lib:
15
+
16
+ require 'faceoff'
17
+
18
+ f = Faceoff.login "email@example.com", "password"
19
+
20
+ f.photos_of_me{|p| p.save!}
21
+ # Saves to ./Photos\ of\ me/
22
+
23
+ f.albums[0].name
24
+ #=> "Album title"
25
+
26
+ f.albums[0].photos(0..10)
27
+ #=> [<#Photo...>,<#Photo...>, ...]
28
+
29
+ f.albums[0].photos[0].url
30
+ #=> "http://photo/url.jpg"
31
+
32
+ f.notes.first.title
33
+ #=> "Title of note"
34
+
35
+
36
+ Using the gem bin:
37
+
38
+ faceoff --all -d path/to/dir
39
+ faceoff --notes --albums '1..4'
40
+ faceoff email@example.com password -A
41
+
42
+ For more information see:
43
+
44
+ faceoff -h
45
+
46
+ == REQUIREMENTS:
47
+
48
+ * mechanize
49
+ * highline
50
+ * json
51
+ * vpim
52
+
53
+ == INSTALL:
54
+
55
+ * sudo gem install faceoff
56
+
57
+ == LICENSE:
58
+
59
+ (The MIT License)
60
+
61
+ Copyright (c) 2010 FIX
62
+
63
+ Permission is hereby granted, free of charge, to any person obtaining
64
+ a copy of this software and associated documentation files (the
65
+ 'Software'), to deal in the Software without restriction, including
66
+ without limitation the rights to use, copy, modify, merge, publish,
67
+ distribute, sublicense, and/or sell copies of the Software, and to
68
+ permit persons to whom the Software is furnished to do so, subject to
69
+ the following conditions:
70
+
71
+ The above copyright notice and this permission notice shall be
72
+ included in all copies or substantial portions of the Software.
73
+
74
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
75
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
76
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
77
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
78
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
79
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
80
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,17 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ Hoe.plugin :isolate
7
+
8
+ Hoe.spec 'faceoff' do
9
+ developer('Jeremie Castagna', 'yaksnrainbows@gmail.com')
10
+
11
+ self.extra_deps << ['highline', '>= 1.5.1']
12
+ self.extra_deps << ['mechanize', '>= 1.0.0']
13
+ self.extra_deps << ['json', '>= 1.2.0']
14
+ self.extra_deps << ['vpim', '0.695']
15
+ end
16
+
17
+ # vim: syntax=ruby
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+
5
+ begin
6
+ require 'faceoff'
7
+ rescue LoadError
8
+ $: << 'lib'
9
+ require 'lib/faceoff'
10
+ end
11
+
12
+ Faceoff.run ARGV
@@ -0,0 +1,390 @@
1
+ # Odd hack that makes the vpim-mechanize conflict disappear.
2
+ Vpim = Module.new
3
+
4
+ # Hack to fix old ruby version support of Vpim
5
+ class Array
6
+ def to_str
7
+ self.to_s
8
+ end
9
+ end
10
+
11
+
12
+ require 'rubygems'
13
+ require 'mechanize'
14
+ require 'highline'
15
+
16
+ require 'vpim/vcard'
17
+
18
+ require 'optparse'
19
+ require 'json'
20
+ require 'time'
21
+ require 'fileutils'
22
+
23
+ class Faceoff
24
+
25
+ require 'faceoff/pagelet'
26
+ require 'faceoff/note'
27
+ require 'faceoff/photo'
28
+ require 'faceoff/album'
29
+ require 'faceoff/video'
30
+ require 'faceoff/user'
31
+
32
+
33
+ # Version of the gem.
34
+ VERSION = '1.0.0'
35
+
36
+ # Command line actions available.
37
+ ACTIONS = %w{albums friends notes photos_of_me profile_pictures videos_of_me}
38
+
39
+ # Default directory to save data to.
40
+ DEFAULT_DIR = "faceoff-#{Time.now.to_i}"
41
+
42
+
43
+ ##
44
+ # Create a new Faceoff instance and login.
45
+
46
+ def self.login email, password
47
+ inst = self.new email, password
48
+ inst.login
49
+ inst if inst.logged_in?
50
+ end
51
+
52
+
53
+ ##
54
+ # Parse ARGVs
55
+
56
+ def self.parse_args argv
57
+ options = {}
58
+
59
+ opts = OptionParser.new do |opt|
60
+ opt.program_name = File.basename $0
61
+ opt.version = Faceoff::VERSION
62
+ opt.release = nil
63
+
64
+ opt.banner = <<-STR
65
+ Faceoff is a facebook scraper that allows you to download and backup
66
+ your content in a reusable format.
67
+
68
+ Usage:
69
+ #{opt.program_name} -h/--help
70
+ #{opt.program_name} -v/--version
71
+ #{opt.program_name} [facebook_email [password]] [options...]
72
+
73
+ Examples:
74
+ #{opt.program_name} example@yahoo.com --all
75
+ #{opt.program_name} example@yahoo.com --albums '2..3'
76
+ #{opt.program_name} example@yahoo.com --friends --profilepics 0
77
+
78
+ Options:
79
+ STR
80
+
81
+ opt.on('-A', '--all', 'Retrieve all facebook data') do
82
+ ACTIONS.each{|a| options[a] = true }
83
+ end
84
+
85
+ opt.on('-a', '--albums [RANGE]', 'Retrieve albums') do |range|
86
+ options['albums'] = parse_range range
87
+ end
88
+
89
+ opt.on('-f', '--friends [RANGE]', 'Retrieve contacts') do |range|
90
+ options['friends'] = parse_range range
91
+ end
92
+
93
+ opt.on('-n', '--notes [RANGE]', 'Retrieve notes') do |range|
94
+ options['notes'] = parse_range range
95
+ end
96
+
97
+ opt.on('-p', '--photosofme [RANGE]', 'Retrieve photos of me') do |range|
98
+ options['photos_of_me'] = parse_range range
99
+ end
100
+
101
+ opt.on('-P', '--profilepics [RANGE]', 'Retrieve profile pics') do |range|
102
+ options['profile_pictures'] = parse_range range
103
+ end
104
+
105
+ opt.on('-V', '--videosofme [RANGE]', 'Retrieve videos of me') do |range|
106
+ options['videos_of_me'] = parse_range range
107
+ end
108
+
109
+ opt.on('-d', '--directory PATH', 'Directory to save to') do |path|
110
+ options['dir'] = path
111
+ end
112
+
113
+ opt.on('-z', '--gzip [NAME]', 'Zip content when done') do |name|
114
+ options['gzip'] = name || true
115
+ end
116
+ end
117
+
118
+ opts.parse! argv
119
+
120
+ options['dir'] ||= "."
121
+
122
+ options['email'], options['password'] = argv
123
+
124
+ options
125
+ end
126
+
127
+
128
+ ##
129
+ # Takes a range or number as a string and returns a range or integer.
130
+ # Returns true if no range or int value is found.
131
+
132
+ def self.parse_range str
133
+ str = str.to_s.strip
134
+
135
+ return Range.new($1.to_i, $2.to_i) if str =~ /^(\d+)\.\.(\d+)$/
136
+ return str.to_i if str == str.to_i.to_s
137
+
138
+ true
139
+ end
140
+
141
+
142
+ ##
143
+ # Run from the command line.
144
+
145
+ def self.run argv
146
+ options = parse_args argv
147
+
148
+ $stdin.sync
149
+ input = HighLine.new $stdin
150
+
151
+ faceoff = nil
152
+
153
+ until faceoff do
154
+ email = options['email'] || input.ask("Facebook Email: ")
155
+ password = options['password'] ||
156
+ input.ask("Facebook Password: "){|i| i.echo = false}
157
+
158
+ faceoff = login email, password
159
+ options['password'] = nil unless faceoff
160
+ end
161
+
162
+ ACTIONS.each do |action|
163
+ next unless options[action]
164
+ dir = File.join options['dir'], action.capitalize.gsub("_", " ")
165
+
166
+ faceoff.send(action, options[action]) do |item|
167
+ name = item.name rescue item.fid
168
+ puts "Saving #{action} '#{name}'"
169
+ item.save! dir
170
+ end
171
+ end
172
+ end
173
+
174
+
175
+ ##
176
+ # Download a photo from a url. Pass a block to catch the data.
177
+
178
+ def self.download url
179
+ uri = URI.parse url
180
+
181
+ resp = Net::HTTP.start(uri.host) do |http|
182
+ http.get uri.path
183
+ end
184
+
185
+ resp.body
186
+ end
187
+
188
+
189
+ ##
190
+ # Safely save a file; rename it if name exists.
191
+
192
+ def self.safe_save filename, &block
193
+ dir = File.dirname(filename)
194
+
195
+ FileUtils.mkdir_p dir
196
+ raise "Invalid directory #{dir}" unless File.directory? dir
197
+
198
+ test_filename = filename
199
+
200
+ i = 0
201
+ while File.file?(test_filename)
202
+ i = i.next
203
+ ext = File.extname filename
204
+
205
+ test_filename = File.join File.dirname(filename),
206
+ "#{File.basename(filename, ext)} (#{i})#{ext}"
207
+ end
208
+
209
+ filename = test_filename
210
+
211
+ File.open(filename, "w+") do |f|
212
+ block.call(f) if block_given?
213
+ end
214
+ end
215
+
216
+
217
+ # Mechanize agent
218
+ attr_accessor :agent
219
+
220
+ # User email
221
+ attr_accessor :email
222
+
223
+ # User password
224
+ attr_accessor :password
225
+
226
+ # Facebook profile id
227
+ attr_writer :profile_id
228
+
229
+
230
+ ##
231
+ # Instantiate with user information.
232
+
233
+ def initialize email, password
234
+ @email = email
235
+ @password = password
236
+ @profile_id = nil
237
+
238
+ @agent = Mechanize.new
239
+ @agent.user_agent_alias = 'Mac Safari'
240
+ @agent.redirect_ok = true
241
+
242
+ @albums = nil
243
+ @friends = nil
244
+ @notes = nil
245
+ @photos_of_me = nil
246
+ @profile_pics = nil
247
+ @videos_of_me = nil
248
+ end
249
+
250
+
251
+ ##
252
+ # Returns an options hash based on the type of input:
253
+ # bracket_for 5
254
+ # # => {:limit => 5}
255
+ #
256
+ # bracket_for 3..5
257
+ # # => {:limit => 3, :start => 3}
258
+ #
259
+ # bracket_for true
260
+ # # => {}
261
+
262
+ def bracket_for obj
263
+ case obj
264
+ when Fixnum then {:limit => obj}
265
+ when Range then {:limit => obj.entries.length, :start => obj.first}
266
+ else {}
267
+ end
268
+ end
269
+
270
+
271
+ ##
272
+ # Check if we're logged into facebook.
273
+
274
+ def logged_in?
275
+ url = URI.parse("http://www.facebook.com")
276
+ #puts @agent.cookie_jar.cookies(url).inspect
277
+ @agent.cookie_jar.cookies(url).select{|c| c.name == "c_user" }.first
278
+ end
279
+
280
+
281
+ ##
282
+ # Login to facebook.
283
+
284
+ def login
285
+ page = @agent.get("http://www.facebook.com")
286
+ form = page.form_with \
287
+ :action => 'https://login.facebook.com/login.php?login_attempt=1'
288
+
289
+ return unless form
290
+
291
+ form.email = @email
292
+ form.pass = @password
293
+
294
+ page = form.submit
295
+ logged_in?
296
+ end
297
+
298
+
299
+ ##
300
+ # Returns an array of photo albums.
301
+
302
+ def albums reload=false, &block
303
+ return @albums if @albums && !reload
304
+ @albums = Album.retrieve_all self, bracket_for(reload), &block
305
+ end
306
+
307
+
308
+ ##
309
+ # Returns the logged in user's User object.
310
+
311
+ def user reload=false
312
+ return @user if @user && !reload
313
+ @user = User.retrieve self, profile_id
314
+ end
315
+
316
+
317
+ ##
318
+ # Returns an array of friends.
319
+
320
+ def friends reload=false, &block
321
+ if @friends && !reload
322
+ @friends.each &block if block_given?
323
+ return @friends
324
+ end
325
+
326
+ @friends = User.retrieve_all self, bracket_for(reload), &block
327
+ end
328
+
329
+
330
+ ##
331
+ # Returns an array of notes.
332
+
333
+ def notes reload=false, &block
334
+ if @notes && !reload
335
+ @notes.each &block if block_given?
336
+ return @notes
337
+ end
338
+
339
+ @notes = Note.retrieve_all self, bracket_for(reload), &block
340
+ end
341
+
342
+
343
+ ##
344
+ # Returns an array of photos of me.
345
+
346
+ def photos_of_me reload=false, &block
347
+ if @photos_of_me && !reload
348
+ @photos_of_me.each &block if block_given?
349
+ return @photos_of_me
350
+ end
351
+
352
+ @photos_of_me = Photo.photos_of_me self, bracket_for(reload), &block
353
+ end
354
+
355
+
356
+ ##
357
+ # Returns an array of profile pictures photos.
358
+
359
+ def profile_pictures reload=false, &block
360
+ if @profile_pics && !reload
361
+ @profile_pics.each &block if block_given?
362
+ return @profile_pics
363
+ end
364
+
365
+ @profile_pics =
366
+ Photo.photos_of_album self, :profile, bracket_for(reload), &block
367
+ end
368
+
369
+
370
+ ##
371
+ # Returns the facebook profile id. Fetches it from the page if not set.
372
+
373
+ def profile_id reload=false
374
+ return @profile_id if @profile_id && !reload
375
+ @profile_id = $1 if @agent.current_page.body =~ /\\"user\\":(\d+)/m
376
+ end
377
+
378
+
379
+ ##
380
+ # Returns an array of videos of me.
381
+
382
+ def videos_of_me reload=false, &block
383
+ if @videos_of_me && !reload
384
+ @videos_of_me.each &block if block_given?
385
+ return @videos_of_me
386
+ end
387
+
388
+ @videos_of_me = Video.videos_of_me self, bracket_for(reload), &block
389
+ end
390
+ end
@@ -0,0 +1,83 @@
1
+ class Faceoff
2
+
3
+ class Album
4
+
5
+
6
+ ##
7
+ # Returns an array of albums for the current user. Pass the :user_id option
8
+ # to override the logged in user.
9
+
10
+ def self.retrieve_all faceoff, options={}, &block
11
+ albums = []
12
+ limit = options[:limit]
13
+ start = options[:start] || 0
14
+
15
+ agent = faceoff.agent
16
+ user_id = options[:user_id] || faceoff.profile_id
17
+
18
+ page = agent.get "/photos.php?id=#{user_id}"
19
+
20
+ xpath = "table[@class='uiGrid fbPhotosGrid']/tbody/"+
21
+ "/div[@class='pls photoDetails']/a"
22
+
23
+ nodes = page.search(xpath)
24
+
25
+ limit ||= nodes.length
26
+
27
+ nodes[start, limit].each do |node|
28
+ album_id = $1 if node['href'] =~ /aid=(\d+)/
29
+ album_name = node.text
30
+
31
+ albums << new(faceoff, album_id, album_name)
32
+
33
+ yield albums.last if block_given?
34
+ end
35
+
36
+ albums
37
+ end
38
+
39
+
40
+ # Facebook album id.
41
+ attr_accessor :fid
42
+
43
+ # Name of the album.
44
+ attr_accessor :name
45
+
46
+
47
+ def initialize faceoff, id, name
48
+ @faceoff = faceoff
49
+ @agent = faceoff.agent
50
+
51
+ @fid = id
52
+ @name = name
53
+ @photos = nil
54
+ end
55
+
56
+
57
+ ##
58
+ # Returns an array of photos.
59
+
60
+ def photos reload=false, &block
61
+ if @photos && !reload
62
+ @photos.each &block if block_given?
63
+ return @photos
64
+ end
65
+
66
+ options = {}
67
+ options[:limit] = reload if Fixnum === reload
68
+
69
+ @photos = Photo.photos_of_album @faceoff, @fid, options, &block
70
+ end
71
+
72
+
73
+ ##
74
+ # Save the album to the provided directory.
75
+
76
+ def save! target="./Albums"
77
+ dirname = File.join target, @name
78
+ FileUtils.mkdir_p dirname
79
+
80
+ self.photos{|p| p.save! dirname}
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,100 @@
1
+ class Faceoff
2
+
3
+ class Note
4
+
5
+ CONTENT_CLASSES = "note_content text_align_ltr direction_ltr clearfix"
6
+
7
+ ##
8
+ # Returns an array of notes.
9
+
10
+ def self.retrieve_all faceoff, options={}, &block
11
+ agent = faceoff.agent
12
+ limit = options[:limit]
13
+ start = options[:start] || 0
14
+
15
+ note_count = limit
16
+ index = start
17
+
18
+ notes = []
19
+ page = agent.current_page
20
+
21
+ while !limit || notes.length < limit && page.link_with(:text => "Next") do
22
+ notes = notes.concat notes_at_index(faceoff, index, &block)
23
+ page = agent.current_page
24
+ index = start + notes.length
25
+
26
+ last_link = page.link_with(:text => 'Last')
27
+ limit ||= $1.to_i + 10 if
28
+ last_link && last_link.href =~ %r{/notes.php\?id=\d+&start=(\d+)}
29
+ limit ||= 10
30
+ end
31
+
32
+ notes[0..(limit-1)]
33
+ end
34
+
35
+
36
+ ##
37
+ # Get notes on one page starting at a note index.
38
+
39
+ def self.notes_at_index faceoff, index, &block
40
+ agent = faceoff.agent
41
+ page = agent.get "/notes.php?id=#{faceoff.profile_id}&start=#{index}"
42
+
43
+ notes = []
44
+
45
+ page.search("div[@class='note_body']").each do |div|
46
+ title_link = div.search("div[@class='note_title']/a").first
47
+ id = $1 if title_link['href'] =~ %r{/note\.php\?note_id=(\d+)}
48
+ title = title_link.text
49
+ date = Time.parse div.search("div[@class='byline']").first.text
50
+ body = div.search("div[@class='#{CONTENT_CLASSES}']/div").first.text
51
+
52
+ notes << new(id, title, body, date)
53
+ yield notes.last if block_given?
54
+ end
55
+
56
+ notes
57
+ end
58
+
59
+
60
+ # Facebook id of the note.
61
+ attr_accessor :fid
62
+
63
+ # Note title.
64
+ attr_accessor :title, :name
65
+
66
+ # Note contents.
67
+ attr_accessor :body
68
+
69
+ # Date note was created at.
70
+ attr_accessor :date
71
+
72
+
73
+ def initialize id, title, body, date=nil
74
+ @fid = id
75
+ @title = @name = title
76
+ @body = body
77
+ @date = date || Time.now
78
+ end
79
+
80
+
81
+ ##
82
+ # Saves the note to the provided file path.
83
+
84
+ def save! target="./Notes"
85
+ filename = File.join(target, "#{@title}.txt")
86
+
87
+ Faceoff.safe_save(filename) do |file|
88
+ file.write self.to_s
89
+ end
90
+ end
91
+
92
+
93
+ ##
94
+ # Returns the object as a string with title, date, and body.
95
+
96
+ def to_s
97
+ "#{@title}\n#{@date.to_s}\n\n#{@body}"
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,44 @@
1
+ class Faceoff
2
+
3
+ ##
4
+ # The pagelet class is used to parse out facebook javascript dynamic content
5
+ # on a given html page.
6
+
7
+ class Pagelet
8
+
9
+ ##
10
+ # Parses an html string and returns a hash of
11
+ # Nokogiri::HTML::Document objects, indexed by page area:
12
+ # Pagelet.parse html
13
+ # #=> {:profile_photo => <#OBJ>, :top_bar => <#OBJ>...}
14
+ #
15
+ # Pagelet.parse html, :profile_photo
16
+ # #=> <#OBJ>
17
+
18
+ def self.parse html, type=nil
19
+ pagelet = nil
20
+
21
+ matches = html.scan regex_for(type)
22
+
23
+ matches.each do |name, html|
24
+ html = JSON.parse("[\"#{html}\"]").first
25
+ html_doc = Nokogiri::HTML.parse html
26
+ return html_doc if type
27
+
28
+ pagelet ||= {}
29
+ pagelet[name.to_sym] = html_doc
30
+ end
31
+
32
+ pagelet
33
+ end
34
+
35
+
36
+ ##
37
+ # Returns a regex to retrieve the given pagelet.
38
+
39
+ def self.regex_for name
40
+ name ||= "\\w+"
41
+ %r{<script>.*"pagelet_(#{name})":"(.*)"\},"page_cache":.*\}\);</script>}
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,128 @@
1
+ class Faceoff
2
+
3
+ class Photo
4
+
5
+
6
+ ##
7
+ # Retrieve all 'Photos of Me'.
8
+
9
+ def self.photos_of_me faceoff, options={}, &block
10
+ agent = faceoff.agent
11
+ start = options[:start] || 0
12
+
13
+ index = start % 15
14
+
15
+ param = "id=#{faceoff.profile_id}&v=photos&so=#{start}&ref=sgm"
16
+
17
+ page = agent.get "/profile.php?#{param}"
18
+
19
+ photo_id, options[:user_id] =
20
+ page.body.scan(%r{/photo.php\?pid=(\d+)[^\\]+;id=(\d+)})[index]
21
+
22
+ return unless photo_id
23
+
24
+ retrieve_all faceoff,
25
+ photo_id,
26
+ options.merge(:album => "Photos of me"),
27
+ &block
28
+ end
29
+
30
+
31
+ ##
32
+ # Retrieve all photos for a given album id. To retrieve the profile
33
+ # album pass :profile as the album_id.
34
+
35
+ def self.photos_of_album faceoff, album_id, options={}, &block
36
+ agent = faceoff.agent
37
+ start = options[:start] || 0
38
+
39
+ options[:album] = "Profile pictures" if album_id == :profile
40
+
41
+ param = album_id == :profile ? "profile=1" : "aid=#{album_id}"
42
+ param = "#{param}&id=#{faceoff.profile_id}&s=#{start}"
43
+
44
+ page = agent.get "/album.php?#{param}"
45
+ photo_link = page.link_with(:href => %r{/photo.php}).href
46
+ photo_id = $1 if photo_link =~ /pid=(\d+)/
47
+
48
+ return unless photo_id
49
+
50
+ retrieve_all faceoff, photo_id, options, &block
51
+ end
52
+
53
+
54
+ ##
55
+ # Retrieve all photos in an album, starting at a given photo id.
56
+ # Setting the 'global' argument to true will attempt to retrieve all
57
+ # 'Photos of Me'.
58
+
59
+ def self.retrieve_all faceoff, photo_id, options={}, &block
60
+ agent = faceoff.agent
61
+ user_id = options[:user_id]
62
+ limit = options[:limit]
63
+
64
+ param = "pid=#{photo_id}&id=#{user_id || faceoff.profile_id}"
65
+ param = "#{param}&view=global&subj=#{faceoff.profile_id}" if user_id
66
+
67
+ page = agent.get "/photo.php?#{param}"
68
+
69
+ photo_count = ($1.to_i - 1) if page.body =~ /Photo \d+ of (\d+)/m
70
+ photo_count = limit if limit && photo_count > limit
71
+
72
+ photos = []
73
+
74
+ photo_count.times do |i|
75
+ url = page.search("img[@id='myphoto']").first['src']
76
+ photo_id = $1 if page.uri.query =~ %r{pid=(\d+)}
77
+
78
+ caption =
79
+ page.search("div[@class='photocaption_text']").first.text rescue nil
80
+
81
+ photos << new(photo_id, url, :caption => caption,
82
+ :album => options[:album])
83
+
84
+ yield photos.last if block_given?
85
+
86
+ page = page.link_with(:text => 'Next').click
87
+ end
88
+
89
+ photos
90
+ end
91
+
92
+
93
+ # Facebook photo id.
94
+ attr_accessor :fid
95
+
96
+ # Url of the photo.
97
+ attr_accessor :url
98
+
99
+ # Caption of the photo.
100
+ attr_accessor :caption
101
+
102
+ # Album name (context) of the photo.
103
+ attr_accessor :album
104
+
105
+
106
+ def initialize id, url, options={}
107
+ @fid = id
108
+ @url = url
109
+ @caption = options[:caption]
110
+ @album = options[:album] || "Photos"
111
+ end
112
+
113
+
114
+ ##
115
+ # Saves the photo to the provided file path.
116
+
117
+ def save! target=nil
118
+ target ||= File.join ".", @album
119
+ filename = File.join(target, "#{@fid}#{File.extname(@url)}")
120
+
121
+ data = Faceoff.download(@url)
122
+
123
+ Faceoff.safe_save(filename) do |file|
124
+ file.write data
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,216 @@
1
+ class Faceoff
2
+
3
+ class User
4
+
5
+ IM_SERVICES = ['AIM', 'Yahoo', 'Skype', 'Google Talk', 'Windows Live',
6
+ 'Gadu-Gadu', 'QQ', 'ICQ', 'Yahoo Japan', 'NateOn']
7
+
8
+
9
+ ##
10
+ # Retrieve all friends of the given faceoff user.
11
+
12
+ def self.retrieve_all faceoff, options={}
13
+ agent = faceoff.agent
14
+ limit = options[:limit]
15
+ start = options[:start] || 0
16
+
17
+ page = agent.get "/friends/ajax/superfriends.php?"+
18
+ "__a=1&filter=afp&type=afp&value&is_pagelet=false&offset=0"
19
+
20
+ user_ids = JSON.parse $1 if page.body =~ %r{"members":(\[[^\]]+\])}
21
+ limit ||= user_ids.length
22
+
23
+ user_ids[start, limit].map do |uid|
24
+ user = retrieve faceoff, uid
25
+ yield user if block_given?
26
+ user
27
+ end
28
+ end
29
+
30
+
31
+ ##
32
+ # Retrieve a single user based on the user's id or alias:
33
+ # User.retrieve f, 12345
34
+ # User.retrieve f, 'bob.smith'
35
+
36
+ def self.retrieve faceoff, user_id
37
+ agent = faceoff.agent
38
+
39
+ path = if user_id =~ /^\d+$/
40
+ "/profile.php?id=#{user_id}&v=info&ref=sgm"
41
+ else
42
+ "/#{user_id}?v=info"
43
+ end
44
+
45
+ page = agent.get path
46
+
47
+ pagelets = Pagelet.parse page.body
48
+
49
+ name = pagelets[:top_bar].css("h1#profile_name").first.text
50
+ id = $1 if
51
+ pagelets[:top_bar].css("a#top_bar_pic").first['href'] =~
52
+ %r{/profile\.php\?id=(\d+)}
53
+
54
+
55
+ user = User.new id, name
56
+
57
+ details = pagelets[:tab_content]
58
+
59
+ birthday = fattr(details, 'Birthday')[0].strip
60
+ user.birthday = Time.parse birthday if birthday && !birthday.empty?
61
+
62
+ user.emails = fattr details, 'Email'
63
+
64
+ user.phones['mobile'] =
65
+ fattr(details, 'Mobile Number').first.gsub(/[^\da-z]/i, '') rescue nil
66
+
67
+ user.phones['main'] =
68
+ fattr(details, 'Phone').first.gsub(/[^\da-z]/i, '') rescue nil
69
+
70
+
71
+ user.photo = pagelets[:profile_photo].css("img#profile_pic").first['src']
72
+
73
+ user.address = {}
74
+ user.address[:street] = fattr(details, "Address", :href => /&a2=/).first
75
+
76
+ user.address[:city], user.address[:state] =
77
+ fattr(details, "Address", :href => /&c2=/).first.to_s.split(", ")
78
+
79
+ user.address[:state], user.address[:country] =
80
+ [nil, user.address[:state]] if user.address[:state].to_s.length > 2
81
+
82
+ user.address[:zip] = fattr(details, "Address", :href => /&z2=/).first
83
+
84
+
85
+ IM_SERVICES.each do |im|
86
+ im_alias = fattr(details, im).first
87
+ next unless im_alias
88
+ user.aliases[im] = im_alias
89
+ end
90
+
91
+ user.aliases['Facebook'] = $1 if
92
+ fattr(details, 'Facebook Profile').first =~ /([^\/]+)$/
93
+
94
+ user
95
+ end
96
+
97
+
98
+ ##
99
+ # Get a facebook attribute from a given Nokogiri::HTML::Document.
100
+
101
+ def self.fattr doc, attrib, options={}
102
+ resp = []
103
+
104
+ nodes = doc.search("th[@class='label'][text()='#{attrib}:']")
105
+
106
+ return resp if nodes.empty?
107
+
108
+ nodes.first.next.children.each do |node|
109
+ text = node.name == "img" ? node['src'] : node.text.strip
110
+ next if text.empty?
111
+
112
+ if options.empty?
113
+ resp << text
114
+ else
115
+ options.each do |key, matcher|
116
+ resp << text if node[key] =~ matcher
117
+ end
118
+ end
119
+ end
120
+
121
+ resp
122
+ end
123
+
124
+
125
+ # Facebook user id.
126
+ attr_accessor :fid
127
+
128
+ # User's name.
129
+ attr_accessor :name
130
+
131
+ # User's birthday.
132
+ attr_accessor :birthday
133
+
134
+ # Facebook profile image.
135
+ attr_accessor :photo
136
+
137
+ # All emails.
138
+ attr_accessor :emails
139
+
140
+ # Instant message aliases.
141
+ attr_accessor :aliases
142
+
143
+ # Phone numbers.
144
+ attr_accessor :phones
145
+
146
+ # Current address.
147
+ attr_accessor :address
148
+
149
+
150
+ def initialize id, name
151
+ @fid = id
152
+ @name = name
153
+
154
+ @photo = nil
155
+ @phones = {}
156
+ @emails = []
157
+ @aliases = {}
158
+ @address = {}
159
+ end
160
+
161
+
162
+ ##
163
+ # Saves the user as a vcard to the provided file path.
164
+
165
+ def save! target="./Contacts", vcard=nil
166
+ vcard = to_vcard vcard
167
+
168
+ Faceoff.safe_save(File.join(target, "#{@name}.vcf")) do |file|
169
+ file.write vcard
170
+ end
171
+ end
172
+
173
+
174
+ ##
175
+ # Returns a Vpip::Vcard object.
176
+
177
+ def to_vcard vcard=nil
178
+ vcard ||= Vpim::Vcard.create
179
+
180
+ vcard.make do |maker|
181
+
182
+ maker.name{|n| n.fullname = @name }
183
+
184
+ maker.add_field Vpim::DirectoryInfo::Field.create('BDAY',
185
+ @birthday.strftime("%Y-%m-%d"))
186
+
187
+ maker.add_addr do |addr|
188
+ addr.region = address[:state]
189
+ addr.locality = address[:city]
190
+ addr.street = address[:street]
191
+ addr.country = address[:country]
192
+ addr.postalcode = address[:zip]
193
+ end
194
+
195
+ emails.each{|email| maker.add_email email }
196
+
197
+ phones.each do |type, number|
198
+ next unless number
199
+ maker.add_tel(number){|tel| tel.location = [type] }
200
+ end
201
+
202
+ aliases.each do |name, value|
203
+ maker.add_field Vpim::DirectoryInfo::Field.create("X-#{name}", value)
204
+ end
205
+
206
+ maker.add_photo do |photo|
207
+ photo.image = Faceoff.download @photo
208
+ photo.type = File.extname(@photo)[1..-1]
209
+ end if @photo
210
+ end
211
+
212
+ vcard
213
+ end
214
+ end
215
+ end
216
+
@@ -0,0 +1,82 @@
1
+ class Faceoff
2
+
3
+ class Video
4
+
5
+ TITLE_REG = /"video_title", "([^"]+)"/m
6
+ SRC_REG = /"video_src", "([^"]+)"/m
7
+ ID_REG = /"video_id", "([^"]+)"/m
8
+
9
+ ##
10
+ # Retrieve all 'Videos of Me'.
11
+
12
+ def self.retrieve_all faceoff, options={}, &block
13
+ agent = faceoff.agent
14
+ limit = options[:limit]
15
+ start = options[:start] || 0
16
+
17
+ page = agent.get "/video/?of=#{faceoff.profile_id}&s=#{start}"
18
+ summary = page.search("div[@class='summary']").first.text
19
+ video_count = ($2.to_i - 1) if summary =~ /(of|all) (\d+) videos/
20
+ video_count = limit if limit && video_count > limit
21
+
22
+ page = page.link_with(:href => %r{/video.php}).click
23
+
24
+ videos = []
25
+
26
+ video_count.times do |i|
27
+ video_title = URI.decode($1.gsub('+', ' ')) if page.body =~ TITLE_REG
28
+ video_src = URI.decode($1) if page.body =~ SRC_REG
29
+ video_id = URI.decode($1) if page.body =~ ID_REG
30
+
31
+ videos << new(video_id, video_src, video_title)
32
+ yield videos.last if block_given?
33
+
34
+ next_link = page.link_with(:text => 'Next')
35
+ break unless next_link
36
+
37
+ page = next_link.click
38
+ end
39
+
40
+ videos
41
+ end
42
+
43
+
44
+ ##
45
+ # Alias for Video::retrieve_all
46
+
47
+ def self.videos_of_me faceoff, options={}, &block
48
+ retrieve_all faceoff, options, &block
49
+ end
50
+
51
+
52
+ # Facebook video id.
53
+ attr_accessor :fid
54
+
55
+ # Url of the video.
56
+ attr_accessor :url
57
+
58
+ # Name of the video.
59
+ attr_accessor :name
60
+
61
+
62
+ def initialize id, url, name
63
+ @fid = id
64
+ @url = url
65
+ @name = name
66
+ end
67
+
68
+
69
+ ##
70
+ # Saves the video the the provided path.
71
+
72
+ def save! target="./Videos of me"
73
+ filename = File.join(target, "#{@name}#{File.extname(@url)}")
74
+
75
+ data = Faceoff.download(@url)
76
+
77
+ Faceoff.safe_save(filename) do |file|
78
+ file.write data
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,8 @@
1
+ require "test/unit"
2
+ require "faceoff"
3
+
4
+ class TestFaceoff < Test::Unit::TestCase
5
+ def test_sanity
6
+ flunk "write tests or I will kneecap you"
7
+ end
8
+ end
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: faceoff
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 0
8
+ - 0
9
+ version: 1.0.0
10
+ platform: ruby
11
+ authors:
12
+ - Jeremie Castagna
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-08-10 00:00:00 -07:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: highline
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 5
30
+ - 1
31
+ version: 1.5.1
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: mechanize
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 1
43
+ - 0
44
+ - 0
45
+ version: 1.0.0
46
+ type: :runtime
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: json
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 1
57
+ - 2
58
+ - 0
59
+ version: 1.2.0
60
+ type: :runtime
61
+ version_requirements: *id003
62
+ - !ruby/object:Gem::Dependency
63
+ name: vpim
64
+ prerelease: false
65
+ requirement: &id004 !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "="
68
+ - !ruby/object:Gem::Version
69
+ segments:
70
+ - 0
71
+ - 695
72
+ version: "0.695"
73
+ type: :runtime
74
+ version_requirements: *id004
75
+ - !ruby/object:Gem::Dependency
76
+ name: hoe
77
+ prerelease: false
78
+ requirement: &id005 !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ segments:
83
+ - 2
84
+ - 3
85
+ - 3
86
+ version: 2.3.3
87
+ type: :development
88
+ version_requirements: *id005
89
+ description: |-
90
+ A scraper to backup your facebook account (albums, photos, notes, users, video).
91
+ Why a scraper instead of using facebook apps? Logging in as a user gives you
92
+ access to more data and it's harder to block.
93
+ email:
94
+ - yaksnrainbows@gmail.com
95
+ executables:
96
+ - faceoff
97
+ extensions: []
98
+
99
+ extra_rdoc_files:
100
+ - History.txt
101
+ - Manifest.txt
102
+ - README.txt
103
+ files:
104
+ - History.txt
105
+ - Manifest.txt
106
+ - README.txt
107
+ - Rakefile
108
+ - bin/faceoff
109
+ - lib/faceoff.rb
110
+ - lib/faceoff/album.rb
111
+ - lib/faceoff/note.rb
112
+ - lib/faceoff/pagelet.rb
113
+ - lib/faceoff/photo.rb
114
+ - lib/faceoff/user.rb
115
+ - lib/faceoff/video.rb
116
+ - test/test_faceoff.rb
117
+ has_rdoc: true
118
+ homepage: http://github.com/yaksnrainbows/faceoff
119
+ licenses: []
120
+
121
+ post_install_message:
122
+ rdoc_options:
123
+ - --main
124
+ - README.txt
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ segments:
132
+ - 0
133
+ version: "0"
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ segments:
139
+ - 0
140
+ version: "0"
141
+ requirements: []
142
+
143
+ rubyforge_project: faceoff
144
+ rubygems_version: 1.3.6
145
+ signing_key:
146
+ specification_version: 3
147
+ summary: A scraper to backup your facebook account (albums, photos, notes, users, video)
148
+ test_files:
149
+ - test/test_faceoff.rb