briancollins-blackbook 1.0.15
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES.markdown +44 -0
- data/Manifest.txt +59 -0
- data/README.markdown +74 -0
- data/VERSION.yml +4 -0
- data/lib/blackbook.rb +83 -0
- data/lib/blackbook/exporter/base.rb +16 -0
- data/lib/blackbook/exporter/vcf.rb +45 -0
- data/lib/blackbook/exporter/xml.rb +28 -0
- data/lib/blackbook/importer/aol.rb +94 -0
- data/lib/blackbook/importer/base.rb +39 -0
- data/lib/blackbook/importer/csv.rb +68 -0
- data/lib/blackbook/importer/freenet.rb +62 -0
- data/lib/blackbook/importer/gmail.rb +73 -0
- data/lib/blackbook/importer/gmx.rb +68 -0
- data/lib/blackbook/importer/hotmail.rb +128 -0
- data/lib/blackbook/importer/page_scraper.rb +52 -0
- data/lib/blackbook/importer/web.de.rb +67 -0
- data/lib/blackbook/importer/yahoo.rb +73 -0
- data/test/config/credentials.yml.example +9 -0
- data/test/fixtures/aol_application_page.html +566 -0
- data/test/fixtures/aol_bad_login_response_stage_3.html +565 -0
- data/test/fixtures/aol_contacts.html +102 -0
- data/test/fixtures/aol_login_response_stage_1.html +158 -0
- data/test/fixtures/aol_login_response_stage_2.html +559 -0
- data/test/fixtures/aol_login_response_stage_3.html +48 -0
- data/test/fixtures/aol_login_response_stage_4.html +404 -0
- data/test/fixtures/aol_new_contacts.html +431 -0
- data/test/fixtures/gmail.csv +3 -0
- data/test/fixtures/gmail_bad_login_response_stage_2.html +560 -0
- data/test/fixtures/gmail_contacts.html +228 -0
- data/test/fixtures/gmail_login_response_stage_1.html +556 -0
- data/test/fixtures/gmail_login_response_stage_2.html +1 -0
- data/test/fixtures/gmail_login_response_stage_2a.html +1 -0
- data/test/fixtures/gmail_login_response_stage_3.html +249 -0
- data/test/fixtures/gmail_redirect_body.html +10 -0
- data/test/fixtures/hotmail_bad_login_response_stage_2.html +31 -0
- data/test/fixtures/hotmail_contacts.html +262 -0
- data/test/fixtures/hotmail_login_response_stage_1.html +31 -0
- data/test/fixtures/hotmail_login_response_stage_2.html +1 -0
- data/test/fixtures/hotmail_login_response_stage_3.html +519 -0
- data/test/fixtures/hotmail_scrape_first_page.html +77 -0
- data/test/fixtures/hotmail_scrape_response_stage_1.html +90 -0
- data/test/fixtures/hotmail_scrape_response_stage_2.html +77 -0
- data/test/fixtures/hotmail_scrape_response_stage_3.html +0 -0
- data/test/fixtures/yahoo_bad_login_response_stage_2.html +443 -0
- data/test/fixtures/yahoo_contacts.csv +3 -0
- data/test/fixtures/yahoo_contacts_not_logged_in.html +432 -0
- data/test/fixtures/yahoo_contacts_stage_1.html +399 -0
- data/test/fixtures/yahoo_login_response_stage_1.html +433 -0
- data/test/fixtures/yahoo_login_response_stage_2.html +16 -0
- data/test/fixtures/yahoo_no_user_response_stage_2.html +574 -0
- data/test/freenet_importer_test.rb +53 -0
- data/test/gmx_importer_test.rb +53 -0
- data/test/scripts/live_test.rb +25 -0
- data/test/test_blackbook.rb +60 -0
- data/test/test_blackbook_exporter_base.rb +16 -0
- data/test/test_blackbook_exporter_vcf.rb +52 -0
- data/test/test_blackbook_exporter_xml.rb +16 -0
- data/test/test_blackbook_importer_aol.rb +108 -0
- data/test/test_blackbook_importer_base.rb +24 -0
- data/test/test_blackbook_importer_csv.rb +60 -0
- data/test/test_blackbook_importer_gmail.rb +116 -0
- data/test/test_blackbook_importer_hotmail.rb +165 -0
- data/test/test_blackbook_importer_page_scraper.rb +51 -0
- data/test/test_blackbook_importer_yahoo.rb +137 -0
- data/test/test_helper.rb +71 -0
- data/test/web.de_importer_test.rb +53 -0
- metadata +128 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
#
|
2
|
+
# Provides a base template for interface and behavior of contact importers
|
3
|
+
|
4
|
+
module Blackbook::Importer
|
5
|
+
class Base
|
6
|
+
attr_accessor :options
|
7
|
+
|
8
|
+
##
|
9
|
+
# Should return true or false/nil depending on whether the +options+ given
|
10
|
+
# can be handled by this importer
|
11
|
+
|
12
|
+
def =~( options ); end # stub
|
13
|
+
|
14
|
+
##
|
15
|
+
# Does the work of extracting contacts. Returns an Array of Arrays
|
16
|
+
# containing the name and email as the first and second elements. Of
|
17
|
+
# course, you can override this behavior to meet the needs of a
|
18
|
+
# particular service.
|
19
|
+
|
20
|
+
def fetch_contacts!; end # stub
|
21
|
+
|
22
|
+
##
|
23
|
+
# Imports the contacts using the given +options+. Returns an array of
|
24
|
+
# hashes in the internal format (a hash with at least :name and :email
|
25
|
+
# values).
|
26
|
+
|
27
|
+
def import(options = {})
|
28
|
+
self.options = options
|
29
|
+
fetch_contacts!
|
30
|
+
end
|
31
|
+
|
32
|
+
##
|
33
|
+
# Name of the importer service.
|
34
|
+
|
35
|
+
def service_name
|
36
|
+
self.class.name.split("::").last
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
##
|
2
|
+
# Imports contacts from a CSV file
|
3
|
+
|
4
|
+
class Blackbook::Importer::Csv < Blackbook::Importer::Base
|
5
|
+
|
6
|
+
DEFAULT_COLUMNS = [:name,:email,:misc]
|
7
|
+
DEFAULT_PATTERN = /,/
|
8
|
+
|
9
|
+
##
|
10
|
+
# Matches this importer to a file that contains CSV values
|
11
|
+
|
12
|
+
def =~(options)
|
13
|
+
options && options[:file].respond_to?(:open) ? true : false
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# fetch_contacts! implementation for this importer
|
18
|
+
|
19
|
+
def fetch_contacts!
|
20
|
+
lines = IO.readlines(options[:file].path)
|
21
|
+
columns = to_columns(lines.first)
|
22
|
+
lines.shift if columns.first == :name
|
23
|
+
columns = DEFAULT_COLUMNS.dup unless columns.first == :name
|
24
|
+
|
25
|
+
contacts = Array.new
|
26
|
+
lines.each do |l|
|
27
|
+
vals = l.split(pattern)
|
28
|
+
next if vals.empty?
|
29
|
+
contacts << to_hash(columns, vals)
|
30
|
+
end
|
31
|
+
|
32
|
+
contacts
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_hash(cols, vals) # :nodoc:
|
36
|
+
h = Hash.new
|
37
|
+
cols.each do |c|
|
38
|
+
h[c] = (c == cols.last) ? vals.join(',') : vals.shift
|
39
|
+
end
|
40
|
+
h
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_columns(line) # :nodoc:
|
44
|
+
columns = Array.new
|
45
|
+
if line.match(pattern)
|
46
|
+
tags = line.split(pattern)
|
47
|
+
else
|
48
|
+
tags = line.split(DEFAULT_PATTERN)
|
49
|
+
end
|
50
|
+
# deal with "Name,E-mail..." oddity up front
|
51
|
+
if tags.first =~ /^name$/i
|
52
|
+
tags.shift
|
53
|
+
columns << :name
|
54
|
+
if tags.first =~ /^e.?mail/i # E-mail or Email
|
55
|
+
tags.shift
|
56
|
+
columns << :email
|
57
|
+
end
|
58
|
+
end
|
59
|
+
tags.each{|v| columns << v.strip.to_sym}
|
60
|
+
columns
|
61
|
+
end
|
62
|
+
|
63
|
+
def pattern
|
64
|
+
@pattern ||= (options[:pattern] || DEFAULT_PATTERN) rescue DEFAULT_PATTERN
|
65
|
+
end
|
66
|
+
|
67
|
+
Blackbook.register(:csv, self)
|
68
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'blackbook/importer/page_scraper'
|
2
|
+
|
3
|
+
class Blackbook::Importer::Freenet < Blackbook::Importer::PageScraper
|
4
|
+
LOGIN_URL = "https://office.freenet.de"
|
5
|
+
MESSAGES_URL = "/main_overview.html"
|
6
|
+
|
7
|
+
def =~( options )
|
8
|
+
options && options[:username] =~ /@freenet\.de$/i
|
9
|
+
end
|
10
|
+
|
11
|
+
def login
|
12
|
+
username, password = options[:username], options[:password]
|
13
|
+
|
14
|
+
begin
|
15
|
+
page = agent.get LOGIN_URL
|
16
|
+
|
17
|
+
form = page.forms.with.name("loginform").first
|
18
|
+
form.username = username.split("@").first
|
19
|
+
form.password = password
|
20
|
+
page = form.submit
|
21
|
+
|
22
|
+
if page.body.match(/Fehlgeschlagene Login-Versuche/)
|
23
|
+
page = page.forms.with.name("sicherform").submit
|
24
|
+
page = agent.get MESSAGES_URL
|
25
|
+
end
|
26
|
+
|
27
|
+
if page.body.match(/Login (erneut )?fehlgeschlagen/) || page.body.match(/Ich bin bereits Mitglied/)
|
28
|
+
raise Blackbook::BadCredentialsError.new
|
29
|
+
end
|
30
|
+
rescue => e
|
31
|
+
raise e || Blackbook::BlackbookError.new
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def prepare
|
36
|
+
login
|
37
|
+
end
|
38
|
+
|
39
|
+
def scrape_contacts
|
40
|
+
contacts = ["INBOX", "INBOX.sent"].map do |folder|
|
41
|
+
page = agent.get "/messages/mail_mailbox.html?mail_folder=#{folder}"
|
42
|
+
find_contacts(page)
|
43
|
+
end
|
44
|
+
|
45
|
+
contacts.inject([]) do |memo, contact|
|
46
|
+
memo << contact unless memo.include? contact
|
47
|
+
memo
|
48
|
+
end.flatten
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
protected
|
53
|
+
def find_contacts(page)
|
54
|
+
page.search("tr[@id]/td[9]").map do |cell|
|
55
|
+
recp, fullname, email = *cell.attributes["title"].match(/"(.*)"<(.*)>/)
|
56
|
+
|
57
|
+
{ :name => fullname, :email => email }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
Blackbook.register :freenet, self
|
62
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'kconv'
|
2
|
+
require 'blackbook/importer/page_scraper'
|
3
|
+
|
4
|
+
if RUBY_VERSION > "1.9"
|
5
|
+
require "csv"
|
6
|
+
unless defined? FCSV
|
7
|
+
class Object
|
8
|
+
FCSV = CSV
|
9
|
+
alias_method :FCSV, :CSV
|
10
|
+
end
|
11
|
+
end
|
12
|
+
else
|
13
|
+
require "fastercsv"
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Imports contacts from GMail
|
18
|
+
|
19
|
+
class Blackbook::Importer::Gmail < Blackbook::Importer::PageScraper
|
20
|
+
|
21
|
+
RETRY_THRESHOLD = 5
|
22
|
+
##
|
23
|
+
# Matches this importer to an user's name/address
|
24
|
+
|
25
|
+
def =~(options = {})
|
26
|
+
options && options[:username] =~ /@(gmail|googlemail).com$/i ? true : false
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# login to gmail
|
31
|
+
|
32
|
+
def login
|
33
|
+
page = agent.get('http://mail.google.com/mail/')
|
34
|
+
form = page.forms.first
|
35
|
+
form.Email = options[:username]
|
36
|
+
form.Passwd = options[:password]
|
37
|
+
page = agent.submit(form,form.buttons.first)
|
38
|
+
|
39
|
+
raise( Blackbook::BadCredentialsError, "That username and password was not accepted. Please check them and try again." ) if page.body =~ /Username and password do not match/
|
40
|
+
|
41
|
+
if page.search('//meta').first.attributes['content'] =~ /url='?(http.+?)'?$/i
|
42
|
+
page = agent.get $1
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# prepare this importer
|
48
|
+
|
49
|
+
def prepare
|
50
|
+
login
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# scrape gmail contacts for this importer
|
55
|
+
|
56
|
+
def scrape_contacts
|
57
|
+
unless agent.cookies.find{|c| c.name == 'GAUSR' &&
|
58
|
+
(c.value.include? "mail:#{options[:username]}")}
|
59
|
+
raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
|
60
|
+
end
|
61
|
+
|
62
|
+
contacts = []
|
63
|
+
csv = agent.get('https://mail.google.com/mail/contacts/data/export?exportType=ALL&out=GMAIL_CSV')
|
64
|
+
body = Kconv.toutf8(csv.body)
|
65
|
+
FCSV.parse(body) do |row|
|
66
|
+
next if row[0] == "Name" and row[1] == "E-mail"
|
67
|
+
contacts << {:name => row[0], :email => row[1]} unless row[1].blank?
|
68
|
+
end
|
69
|
+
return contacts
|
70
|
+
end
|
71
|
+
|
72
|
+
Blackbook.register(:gmail, self)
|
73
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'blackbook/importer/page_scraper'
|
2
|
+
|
3
|
+
class Blackbook::Importer::GMX < Blackbook::Importer::PageScraper
|
4
|
+
LOGIN_URL = "https://www.gmx.net/"
|
5
|
+
|
6
|
+
def =~( options )
|
7
|
+
options && options[:username] =~ /@gmx\.(de|net)$/i
|
8
|
+
end
|
9
|
+
|
10
|
+
def login
|
11
|
+
username, password = options[:username], options[:password]
|
12
|
+
|
13
|
+
begin
|
14
|
+
page = agent.get LOGIN_URL
|
15
|
+
|
16
|
+
form = page.forms.with.name("login").first
|
17
|
+
form.id = username
|
18
|
+
form.p = password
|
19
|
+
page = form.submit
|
20
|
+
@next = page
|
21
|
+
|
22
|
+
if (page.at("div.index") && page.at("div.index").inner_html == "Passwort vergessen?") ||
|
23
|
+
page.uri.to_s.match(/login-failed/)
|
24
|
+
raise Blackbook::BadCredentialsError.new
|
25
|
+
end
|
26
|
+
rescue => e
|
27
|
+
raise e || Blackbook::BlackbookError.new
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def prepare
|
32
|
+
login
|
33
|
+
end
|
34
|
+
|
35
|
+
def scrape_contacts
|
36
|
+
page = @next
|
37
|
+
|
38
|
+
# Go to E-Mails
|
39
|
+
page = page.links.select { |link| link.text =~ /E-Mail/ }.first.click
|
40
|
+
|
41
|
+
contacts = [/Posteingang/, /Archiv/, /Gesendet/].map do |folder|
|
42
|
+
# puts "folder: #{folder}"
|
43
|
+
# puts "page title: #{page.title}"
|
44
|
+
# puts "page links: #{page.links.select { |link| link.text =~ folder }}"
|
45
|
+
page = page.links.select { |link| link.text =~ folder }.first.click
|
46
|
+
find_contacts(page)
|
47
|
+
end
|
48
|
+
|
49
|
+
contacts.inject([]) do |memo, contact|
|
50
|
+
memo << contact unless memo.include? contact
|
51
|
+
memo
|
52
|
+
end.flatten
|
53
|
+
end
|
54
|
+
|
55
|
+
protected
|
56
|
+
|
57
|
+
def find_contacts(page)
|
58
|
+
links = page.search("form#MI a").select { |link| link.attributes["title"] =~ /@/ }
|
59
|
+
links.map do |link|
|
60
|
+
recp = link.attributes["title"].gsub(/\n/, "").split(/\s/)
|
61
|
+
email = recp.pop.gsub(/[<>]/, "")
|
62
|
+
fullname = recp.join(" ")
|
63
|
+
|
64
|
+
{ :name => fullname, :email => email }
|
65
|
+
end
|
66
|
+
end
|
67
|
+
Blackbook.register :gmx, self
|
68
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'blackbook/importer/page_scraper'
|
2
|
+
require 'cgi'
|
3
|
+
|
4
|
+
##
|
5
|
+
# imports contacts for MSN/Hotmail
|
6
|
+
class Blackbook::Importer::Hotmail < Blackbook::Importer::PageScraper
|
7
|
+
|
8
|
+
DOMAINS = { "compaq.net" => "https://msnia.login.live.com/ppsecure/post.srf",
|
9
|
+
"hotmail.co.jp" => "https://login.live.com/ppsecure/post.srf",
|
10
|
+
"hotmail.co.uk" => "https://login.live.com/ppsecure/post.srf",
|
11
|
+
"hotmail.com" => "https://login.live.com/ppsecure/post.srf",
|
12
|
+
"hotmail.de" => "https://login.live.com/ppsecure/post.srf",
|
13
|
+
"hotmail.fr" => "https://login.live.com/ppsecure/post.srf",
|
14
|
+
"hotmail.it" => "https://login.live.com/ppsecure/post.srf",
|
15
|
+
"live.com" => "https://login.live.com/ppsecure/post.srf",
|
16
|
+
"messengeruser.com" => "https://login.live.com/ppsecure/post.srf",
|
17
|
+
"msn.com" => "https://msnia.login.live.com/ppsecure/post.srf",
|
18
|
+
"passport.com" => "https://login.live.com/ppsecure/post.srf",
|
19
|
+
"webtv.net" => "https://login.live.com/ppsecure/post.srf" }
|
20
|
+
|
21
|
+
##
|
22
|
+
# Matches this importer to an user's name/address
|
23
|
+
|
24
|
+
def =~(options)
|
25
|
+
return false unless options && options[:username]
|
26
|
+
domain = username_domain(options[:username].downcase)
|
27
|
+
!domain.empty? && DOMAINS.keys.include?( domain ) ? true : false
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# Login procedure
|
32
|
+
# 1. Go to login form
|
33
|
+
# 2. Set login and passwd
|
34
|
+
# 3. Set PwdPad to IfYouAreReadingThisYouHaveTooMuchFreeTime minus however many characters are in passwd (so if passwd
|
35
|
+
# was 8 chars, you'd chop 8 chars of the end of IfYouAreReadingThisYouHaveTooMuchFreeTime - giving you IfYouAreReadingThisYouHaveTooMuch)
|
36
|
+
# 4. Set the action to the appropriate URL for the username's domain
|
37
|
+
# 5. Get the query string to append to the new action
|
38
|
+
# 5. Submit the form and parse the url from the resulting page's javascript
|
39
|
+
# 6. Go to that url
|
40
|
+
|
41
|
+
def login
|
42
|
+
page = agent.get('http://login.live.com/login.srf?id=2')
|
43
|
+
form = page.forms.first
|
44
|
+
form.login = options[:username]
|
45
|
+
form.passwd = options[:password]
|
46
|
+
form.PwdPad = ( "IfYouAreReadingThisYouHaveTooMuchFreeTime"[0..(-1 - options[:password].to_s.size )])
|
47
|
+
query_string = page.body.scan(/g_QS="([^"]+)/).first.first rescue nil
|
48
|
+
form.action = login_url + "?#{query_string.to_s}"
|
49
|
+
page = agent.submit(form)
|
50
|
+
|
51
|
+
# Check for login success
|
52
|
+
if page.body =~ /The e-mail address or password is incorrect/ ||
|
53
|
+
page.body =~ /Sign in failed\./
|
54
|
+
raise( Blackbook::BadCredentialsError,
|
55
|
+
"That username and password was not accepted. Please check them and try again." )
|
56
|
+
end
|
57
|
+
|
58
|
+
@first_page = agent.get( page.body.scan(/http\:\/\/[^"]+/).first )
|
59
|
+
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# prepare this importer
|
63
|
+
|
64
|
+
def prepare
|
65
|
+
login
|
66
|
+
end
|
67
|
+
|
68
|
+
##
|
69
|
+
# Scrape contacts for Hotmail
|
70
|
+
# Seems like a POST to directly fetch CSV contacts from options.aspx?subsection=26&n=
|
71
|
+
# raises an end of file error in Net::HTTP via Mechanize.
|
72
|
+
# Seems like Hotmail addresses are now hosted on Windows Live.
|
73
|
+
|
74
|
+
def scrape_contacts
|
75
|
+
unless agent.cookies.find{|c| c.name == 'MSPPre' && c.value == options[:username]}
|
76
|
+
raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
|
77
|
+
end
|
78
|
+
page = agent.get(@first_page.iframes.first.src)
|
79
|
+
|
80
|
+
page = agent.click(page.link_with(:text => 'Mail'))
|
81
|
+
page = agent.get(page.iframes.first.src)
|
82
|
+
page = agent.get('/mail/PrintShell.aspx?type=contact')
|
83
|
+
|
84
|
+
rows = page.search("//div[@class='ContactsPrintPane cPrintContact BorderTop']")
|
85
|
+
rows.collect do |row|
|
86
|
+
vals = {}
|
87
|
+
row.search("table/tr").each do |pair|
|
88
|
+
key = pair.search("td[@class='TextAlignRight Label']").first.inner_text.strip rescue nil
|
89
|
+
next if key.nil?
|
90
|
+
val = pair.search("td[@class='Value']").first.inner_text.strip
|
91
|
+
vals[key.to_sym] = val
|
92
|
+
end
|
93
|
+
vals[:name] = vals['Name:'.to_sym] rescue ''
|
94
|
+
vals[:email] = (vals['Personal e-mail:'.to_sym] || vals['Work e-mail:'.to_sym] || vals['Windows Live ID:'.to_sym]).split(' ').first rescue ''
|
95
|
+
vals
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
##
|
100
|
+
# lookup for the login service that should be used based on the user's
|
101
|
+
# address
|
102
|
+
|
103
|
+
def login_url
|
104
|
+
DOMAINS[username_domain] || DOMAINS['hotmail.com']
|
105
|
+
end
|
106
|
+
|
107
|
+
|
108
|
+
##
|
109
|
+
# normalizes the host for the page that is currently being "viewed" by the
|
110
|
+
# Mechanize agent
|
111
|
+
|
112
|
+
def current_host
|
113
|
+
return nil unless agent && agent.current_page
|
114
|
+
uri = agent.current_page.uri
|
115
|
+
"#{uri.scheme}://#{uri.host}"
|
116
|
+
end
|
117
|
+
|
118
|
+
##
|
119
|
+
# determines the domain for the user
|
120
|
+
|
121
|
+
def username_domain(username = nil)
|
122
|
+
username ||= options[:username] if options
|
123
|
+
return unless username
|
124
|
+
username.to_s.split('@').last
|
125
|
+
end
|
126
|
+
|
127
|
+
Blackbook.register(:hotmail, self)
|
128
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'mechanize', '>= 0.9.2'
|
3
|
+
require 'mechanize'
|
4
|
+
|
5
|
+
##
|
6
|
+
# A base class for importers that scrape their contacts from web services
|
7
|
+
|
8
|
+
class Blackbook::Importer::PageScraper < Blackbook::Importer::Base
|
9
|
+
|
10
|
+
attr_accessor :agent
|
11
|
+
|
12
|
+
##
|
13
|
+
# creates the Mechanize agent used to do the scraping and sets a nice
|
14
|
+
# user agent header for good net educate
|
15
|
+
|
16
|
+
def create_agent
|
17
|
+
self.agent = WWW::Mechanize.new
|
18
|
+
agent.user_agent = "Mozilla/4.0 (compatible; Blackbook #{Blackbook::VERSION})"
|
19
|
+
agent.keep_alive = false
|
20
|
+
agent
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
# Page scrapers will follow a fairly simple pattern of instantiating the
|
25
|
+
# agent, prepping for the scrape and then the actual scrape process
|
26
|
+
|
27
|
+
def fetch_contacts!
|
28
|
+
create_agent
|
29
|
+
prepare
|
30
|
+
scrape_contacts
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# Providers will often require you to login or otherwise prepare to actual
|
35
|
+
# scrape the contacts
|
36
|
+
|
37
|
+
def prepare; end # stub
|
38
|
+
|
39
|
+
##
|
40
|
+
# Some providers have a single page you can scrape from (like Gmail's HTML
|
41
|
+
# Contacts page) while others might require you to navigate several pages,
|
42
|
+
# scraping as you go.
|
43
|
+
|
44
|
+
def scrape_contacts; end # stub
|
45
|
+
|
46
|
+
##
|
47
|
+
# helper to strip html from text
|
48
|
+
|
49
|
+
def strip_html( html )
|
50
|
+
html.gsub(/<\/?[^>]*>/, '')
|
51
|
+
end
|
52
|
+
end
|