alexjp-blackbook_csv 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES.markdown +44 -0
- data/Manifest.txt +59 -0
- data/README.markdown +72 -0
- data/Rakefile +39 -0
- data/debug_blackbook.rb +10 -0
- data/init.rb +1 -0
- data/lib/blackbook/exporter/base.rb +16 -0
- data/lib/blackbook/exporter/vcf.rb +45 -0
- data/lib/blackbook/exporter/xml.rb +28 -0
- data/lib/blackbook/importer/aol.rb +83 -0
- data/lib/blackbook/importer/base.rb +39 -0
- data/lib/blackbook/importer/csv.rb +87 -0
- data/lib/blackbook/importer/gmail.rb +66 -0
- data/lib/blackbook/importer/hotmail.rb +124 -0
- data/lib/blackbook/importer/page_scraper.rb +86 -0
- data/lib/blackbook/importer/yahoo.rb +61 -0
- data/lib/blackbook.rb +76 -0
- data/test/fixtures/aol_application_page.html +566 -0
- data/test/fixtures/aol_bad_login_response_stage_3.html +565 -0
- data/test/fixtures/aol_contacts.html +90 -0
- data/test/fixtures/aol_login_response_stage_1.html +158 -0
- data/test/fixtures/aol_login_response_stage_2.html +559 -0
- data/test/fixtures/aol_login_response_stage_3.html +61 -0
- data/test/fixtures/aol_login_response_stage_4.html +48 -0
- data/test/fixtures/aol_login_response_stage_5.html +404 -0
- data/test/fixtures/aol_new_contacts.html +431 -0
- data/test/fixtures/gmail.csv +3 -0
- data/test/fixtures/gmail_bad_login_response_stage_2.html +560 -0
- data/test/fixtures/gmail_contacts.html +228 -0
- data/test/fixtures/gmail_login_response_stage_1.html +556 -0
- data/test/fixtures/gmail_login_response_stage_2.html +1 -0
- data/test/fixtures/gmail_login_response_stage_2a.html +1 -0
- data/test/fixtures/gmail_login_response_stage_3.html +249 -0
- data/test/fixtures/hotmail_bad_login_response_stage_2.html +31 -0
- data/test/fixtures/hotmail_contacts.html +191 -0
- data/test/fixtures/hotmail_login_response_stage_1.html +31 -0
- data/test/fixtures/hotmail_login_response_stage_2.html +1 -0
- data/test/fixtures/hotmail_login_response_stage_3.html +380 -0
- data/test/fixtures/yahoo_bad_login_response_stage_2.html +443 -0
- data/test/fixtures/yahoo_contacts.csv +3 -0
- data/test/fixtures/yahoo_contacts_not_logged_in.html +432 -0
- data/test/fixtures/yahoo_contacts_stage_1.html +399 -0
- data/test/fixtures/yahoo_login_response_stage_1.html +433 -0
- data/test/fixtures/yahoo_login_response_stage_2.html +16 -0
- data/test/scripts/live_test.rb +25 -0
- data/test/test_blackbook.rb +60 -0
- data/test/test_blackbook_exporter_base.rb +16 -0
- data/test/test_blackbook_exporter_vcf.rb +52 -0
- data/test/test_blackbook_exporter_xml.rb +16 -0
- data/test/test_blackbook_importer_aol.rb +113 -0
- data/test/test_blackbook_importer_base.rb +24 -0
- data/test/test_blackbook_importer_csv.rb +60 -0
- data/test/test_blackbook_importer_gmail.rb +117 -0
- data/test/test_blackbook_importer_hotmail.rb +147 -0
- data/test/test_blackbook_importer_page_scraper.rb +51 -0
- data/test/test_blackbook_importer_yahoo.rb +97 -0
- data/test/test_helper.rb +69 -0
- data/vendor/plugins/blackbook/lib/autotest/blackbook.rb +27 -0
- data/vendor/plugins/blackbook/lib/autotest/discover.rb +3 -0
- metadata +176 -0
@@ -0,0 +1,124 @@
|
|
1
|
+
require 'blackbook/importer/page_scraper'
|
2
|
+
require 'cgi'
|
3
|
+
|
4
|
+
##
|
5
|
+
# imports contacts for MSN/Hotmail
|
6
|
+
class Blackbook::Importer::Hotmail < Blackbook::Importer::PageScraper
|
7
|
+
|
8
|
+
DOMAINS = { "compaq.net" => "https://msnia.login.live.com/ppsecure/post.srf",
|
9
|
+
"hotmail.co.jp" => "https://login.live.com/ppsecure/post.srf",
|
10
|
+
"hotmail.co.uk" => "https://login.live.com/ppsecure/post.srf",
|
11
|
+
"hotmail.com" => "https://login.live.com/ppsecure/post.srf",
|
12
|
+
"hotmail.de" => "https://login.live.com/ppsecure/post.srf",
|
13
|
+
"hotmail.fr" => "https://login.live.com/ppsecure/post.srf",
|
14
|
+
"hotmail.it" => "https://login.live.com/ppsecure/post.srf",
|
15
|
+
"messengeruser.com" => "https://login.live.com/ppsecure/post.srf",
|
16
|
+
"msn.com" => "https://msnia.login.live.com/ppsecure/post.srf",
|
17
|
+
"passport.com" => "https://login.live.com/ppsecure/post.srf",
|
18
|
+
"webtv.net" => "https://login.live.com/ppsecure/post.srf" }
|
19
|
+
|
20
|
+
##
|
21
|
+
# Matches this importer to an user's name/address
|
22
|
+
|
23
|
+
def =~(options)
|
24
|
+
return false unless options && options[:username]
|
25
|
+
domain = username_domain(options[:username].downcase)
|
26
|
+
!domain.empty? && DOMAINS.keys.include?( domain ) ? true : false
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Login procedure
|
31
|
+
# 1. Go to login form
|
32
|
+
# 2. Set login and passwd
|
33
|
+
# 3. Set PwdPad to IfYouAreReadingThisYouHaveTooMuchFreeTime minus however many characters are in passwd (so if passwd
|
34
|
+
# was 8 chars, you'd chop 8 chars of the end of IfYouAreReadingThisYouHaveTooMuchFreeTime - giving you IfYouAreReadingThisYouHaveTooMuch)
|
35
|
+
# 4. Set the action to the appropriate URL for the username's domain
|
36
|
+
# 5. Get the query string to append to the new action
|
37
|
+
# 5. Submit the form and parse the url from the resulting page's javascript
|
38
|
+
# 6. Go to that url
|
39
|
+
|
40
|
+
def login
|
41
|
+
page = agent.get('http://login.live.com/login.srf?id=2')
|
42
|
+
form = page.forms.first
|
43
|
+
form.login = options[:username]
|
44
|
+
form.passwd = options[:password]
|
45
|
+
form.PwdPad = ( "IfYouAreReadingThisYouHaveTooMuchFreeTime"[0..(-1 - options[:password].to_s.size )])
|
46
|
+
query_string = page.body.scan(/g_QS="([^"]+)/).first.first rescue nil
|
47
|
+
form.action = login_url + "?#{query_string.to_s}"
|
48
|
+
page = agent.submit(form)
|
49
|
+
|
50
|
+
# Check for login success
|
51
|
+
if page.body =~ /The e-mail address or password is incorrect/ ||
|
52
|
+
page.body =~ /Sign in failed\./
|
53
|
+
raise( Blackbook::BadCredentialsError,
|
54
|
+
"That username and password was not accepted. Please check them and try again." )
|
55
|
+
end
|
56
|
+
|
57
|
+
page = agent.get( page.body.scan(/http\:\/\/[^"]+/).first )
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
# prepare this importer
|
62
|
+
|
63
|
+
def prepare
|
64
|
+
login
|
65
|
+
end
|
66
|
+
|
67
|
+
##
|
68
|
+
# Scrape contacts for Hotmail
|
69
|
+
# Seems like a POST to directly fetch CSV contacts from options.aspx?subsection=26&n=
|
70
|
+
# raises an end of file error in Net::HTTP via Mechanize.
|
71
|
+
# Seems like Hotmail addresses are now hosted on Windows Live.
|
72
|
+
|
73
|
+
def scrape_contacts
|
74
|
+
unless agent.cookies.find{|c| c.name == 'MSPPre' && c.value == options[:username]}
|
75
|
+
raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
|
76
|
+
end
|
77
|
+
|
78
|
+
page = agent.get('PrintShell.aspx?type=contact')
|
79
|
+
rows = page.search("//div[@class='ContactsPrintPane cPrintContact BorderTop']")
|
80
|
+
rows.collect do |row|
|
81
|
+
name = row.search("//div[@class='cDisplayName']").first.innerText.strip
|
82
|
+
|
83
|
+
vals = {}
|
84
|
+
row.search("//table/tr").each do |pair|
|
85
|
+
key = pair.search("/td[@class='TextAlignRight Label']").first.innerText.strip
|
86
|
+
val = pair.search("/td[@class='Value']").first.innerText.strip
|
87
|
+
vals[key.to_sym] = val
|
88
|
+
end
|
89
|
+
vals[:name] = name
|
90
|
+
vals[:email] = (vals['Personal e-mail:'.to_sym] || vals['Work e-mail:'.to_sym]).split(' ').first rescue ''
|
91
|
+
vals
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# lookup for the login service that should be used based on the user's
|
97
|
+
# address
|
98
|
+
|
99
|
+
def login_url
|
100
|
+
DOMAINS[username_domain] || DOMAINS['hotmail.com']
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
##
|
105
|
+
# normalizes the host for the page that is currently being "viewed" by the
|
106
|
+
# Mechanize agent
|
107
|
+
|
108
|
+
def current_host
|
109
|
+
return nil unless agent && agent.current_page
|
110
|
+
uri = agent.current_page.uri
|
111
|
+
"#{uri.scheme}://#{uri.host}"
|
112
|
+
end
|
113
|
+
|
114
|
+
##
|
115
|
+
# determines the domain for the user
|
116
|
+
|
117
|
+
def username_domain(username = nil)
|
118
|
+
username ||= options[:username] if options
|
119
|
+
return unless username
|
120
|
+
username.to_s.split('@').last
|
121
|
+
end
|
122
|
+
|
123
|
+
Blackbook.register(:hotmail, self)
|
124
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'mechanize', '>= 0.7.0'
|
3
|
+
require 'mechanize'
|
4
|
+
require 'generator' # for SyncEnumerator
|
5
|
+
|
6
|
+
# Patch Mechanize's broken html unescaping Mechanize 0.6.11
|
7
|
+
class WWW::Mechanize
|
8
|
+
def to_absolute_uri(url, cur_page=current_page())
|
9
|
+
unless url.is_a? URI
|
10
|
+
url = url.to_s.strip
|
11
|
+
url = URI.parse(
|
12
|
+
Util.html_unescape(
|
13
|
+
SyncEnumerator.new(
|
14
|
+
url.split(/%[0-9A-Fa-f]{2}/), url.scan(/%[0-9A-Fa-f]{2}/)
|
15
|
+
).map { |x,y|
|
16
|
+
"#{URI.escape(x||'')}#{y}"
|
17
|
+
}.join('').gsub(/%23/, '#')
|
18
|
+
)
|
19
|
+
)
|
20
|
+
# Mechanize here uses #zip to combine the two arrays, which will ignore
|
21
|
+
# excessive elements of the second array (the one which is passed as an
|
22
|
+
# argument). That means if the URL ends with more than one already escaped
|
23
|
+
# character, then only the first one will be restored into the resulting
|
24
|
+
# URL.
|
25
|
+
end
|
26
|
+
|
27
|
+
# construct an absolute uri
|
28
|
+
if url.relative?
|
29
|
+
raise 'no history. please specify an absolute URL' unless cur_page.uri
|
30
|
+
url = cur_page.uri + url
|
31
|
+
# Strip initial "/.." bits from the path
|
32
|
+
url.path.sub!(/^(\/\.\.)+(?=\/)/, '')
|
33
|
+
end
|
34
|
+
|
35
|
+
return url
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# A base class for importers that scrape their contacts from web services
|
41
|
+
|
42
|
+
class Blackbook::Importer::PageScraper < Blackbook::Importer::Base
|
43
|
+
|
44
|
+
attr_accessor :agent
|
45
|
+
|
46
|
+
##
|
47
|
+
# creates the Mechanize agent used to do the scraping and sets a nice
|
48
|
+
# user agent header for good net educate
|
49
|
+
|
50
|
+
def create_agent
|
51
|
+
self.agent = WWW::Mechanize.new
|
52
|
+
agent.user_agent = "Mozilla/4.0 (compatible; Blackbook #{Blackbook::VERSION})"
|
53
|
+
agent.keep_alive = false
|
54
|
+
agent
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
# Page scrapers will follow a fairly simple pattern of instantiating the
|
59
|
+
# agent, prepping for the scrape and then the actual scrape process
|
60
|
+
|
61
|
+
def fetch_contacts!
|
62
|
+
create_agent
|
63
|
+
prepare
|
64
|
+
scrape_contacts
|
65
|
+
end
|
66
|
+
|
67
|
+
##
|
68
|
+
# Providers will often require you to login or otherwise prepare to actual
|
69
|
+
# scrape the contacts
|
70
|
+
|
71
|
+
def prepare; end # stub
|
72
|
+
|
73
|
+
##
|
74
|
+
# Some providers have a single page you can scrape from (like Gmail's HTML
|
75
|
+
# Contacts page) while others might require you to navigate several pages,
|
76
|
+
# scraping as you go.
|
77
|
+
|
78
|
+
def scrape_contacts; end # stub
|
79
|
+
|
80
|
+
##
|
81
|
+
# helper to strip html from text
|
82
|
+
|
83
|
+
def strip_html( html )
|
84
|
+
html.gsub(/<\/?[^>]*>/, '')
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'blackbook/importer/page_scraper'
|
2
|
+
require 'fastercsv'
|
3
|
+
|
4
|
+
##
|
5
|
+
# contacts importer for Yahoo!
|
6
|
+
|
7
|
+
class Blackbook::Importer::Yahoo < Blackbook::Importer::PageScraper
|
8
|
+
|
9
|
+
##
|
10
|
+
# Matches this importer to an user's name/address
|
11
|
+
|
12
|
+
def =~(options = {})
|
13
|
+
options && options[:username] =~ /@yahoo.com$/i ? true : false
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# login for Yahoo!
|
18
|
+
|
19
|
+
def login
|
20
|
+
page = agent.get('https://login.yahoo.com/config/login_verify2?')
|
21
|
+
form = page.forms.first
|
22
|
+
form.login = options[:username].split("@").first
|
23
|
+
form.passwd = options[:password]
|
24
|
+
page = agent.submit(form, form.buttons.first)
|
25
|
+
|
26
|
+
# Check for login success
|
27
|
+
raise( Blackbook::BadCredentialsError, "That username and password was not accepted. Please check them and try again." ) if page.body =~ /Invalid ID or password./
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
31
|
+
##
|
32
|
+
# prepare the importer
|
33
|
+
|
34
|
+
def prepare
|
35
|
+
login
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# scrape yahoo contacts
|
40
|
+
|
41
|
+
def scrape_contacts
|
42
|
+
page = agent.get("http://address.yahoo.com/?1=&VPC=import_export")
|
43
|
+
if page.body =~ /To access Yahoo! Address Book\.\.\..*Sign in./m
|
44
|
+
raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
|
45
|
+
end
|
46
|
+
form = page.forms.last
|
47
|
+
csv = agent.submit(form, form.buttons[2]) # third button is Yahoo-format CSV
|
48
|
+
|
49
|
+
contact_rows = FasterCSV.parse(csv.body)
|
50
|
+
labels = contact_rows.shift # TODO: Actually use the labels to find the indexes of the data we want
|
51
|
+
contact_rows.collect do |row|
|
52
|
+
next if !row[7].empty? && options[:username] =~ /^#{row[7]}/ # Don't collect self
|
53
|
+
{
|
54
|
+
:name => "#{row[0]} #{row[2]}".to_s,
|
55
|
+
:email => (row[4] || "#{row[7]}@yahoo.com") # email is a field in the data, but will be blank for Yahoo users so we create their email address
|
56
|
+
}
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
Blackbook.register(:yahoo, self)
|
61
|
+
end
|
data/lib/blackbook.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
$:.unshift File.expand_path(File.join(File.dirname(__FILE__)))
|
2
|
+
require 'singleton'
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
class Blackbook
|
6
|
+
include ::Singleton
|
7
|
+
VERSION = '1.0.5'
|
8
|
+
|
9
|
+
class BlackbookError < ::StandardError; end
|
10
|
+
class BadCredentialsError < BlackbookError; end
|
11
|
+
|
12
|
+
attr_accessor :importers
|
13
|
+
attr_accessor :exporters
|
14
|
+
|
15
|
+
def self.get( *args )
|
16
|
+
instance.get( *args )
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.register(name, adapter_class)
|
20
|
+
case adapter = adapter_class.new
|
21
|
+
when Importer::Base
|
22
|
+
instance.importers[name.to_sym] = adapter
|
23
|
+
when Exporter::Base
|
24
|
+
instance.exporters[name.to_sym] = adapter
|
25
|
+
else
|
26
|
+
raise ArgumentError, "Unknown adapter"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Sends the vcards from the import to whatever is handling the export
|
31
|
+
def export( importer, exporter, options )
|
32
|
+
exporter.export importer.import( options )
|
33
|
+
end
|
34
|
+
|
35
|
+
# Searches registered importers for one that will handle the given options
|
36
|
+
def find_importer( options )
|
37
|
+
importers.each{ |key, importer| return importer if importer =~ options }
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
|
41
|
+
# Fetches contacts from various services or filetypes. The default is to return an array
|
42
|
+
# of hashes - Blackbook's internal format
|
43
|
+
#
|
44
|
+
# Handles several different calls:
|
45
|
+
# get( :username => 'something@gmail.com', :password => 'whatever' )
|
46
|
+
# get( :as => :xml, :username => 'something@gmail.com', :password => 'whatever' )
|
47
|
+
# get( :csv, :file => #<File:/path/to/file.csv> )
|
48
|
+
def get( *args )
|
49
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
50
|
+
to_format = exporters[ options[:as] || :basic ]
|
51
|
+
source = (importers[args.first.to_sym] rescue nil) || find_importer(options)
|
52
|
+
|
53
|
+
raise ArgumentError, "Unknown exporter" unless to_format
|
54
|
+
raise ArgumentError, "Unknown source" unless source
|
55
|
+
|
56
|
+
export source, to_format, options
|
57
|
+
end
|
58
|
+
|
59
|
+
def initialize
|
60
|
+
self.importers = {}
|
61
|
+
self.exporters = {}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Require all the importers/exporters
|
66
|
+
require 'blackbook/importer/base'
|
67
|
+
require 'blackbook/exporter/base'
|
68
|
+
Dir.glob(File.join(File.dirname(__FILE__), 'blackbook/importer/*.rb')).each {|f| require f }
|
69
|
+
Dir.glob(File.join(File.dirname(__FILE__), 'blackbook/exporter/*.rb')).each {|f| require f }
|
70
|
+
|
71
|
+
class NilClass
|
72
|
+
def empty?
|
73
|
+
true
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|