briancollins-blackbook 1.0.15

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. data/CHANGES.markdown +44 -0
  2. data/Manifest.txt +59 -0
  3. data/README.markdown +74 -0
  4. data/VERSION.yml +4 -0
  5. data/lib/blackbook.rb +83 -0
  6. data/lib/blackbook/exporter/base.rb +16 -0
  7. data/lib/blackbook/exporter/vcf.rb +45 -0
  8. data/lib/blackbook/exporter/xml.rb +28 -0
  9. data/lib/blackbook/importer/aol.rb +94 -0
  10. data/lib/blackbook/importer/base.rb +39 -0
  11. data/lib/blackbook/importer/csv.rb +68 -0
  12. data/lib/blackbook/importer/freenet.rb +62 -0
  13. data/lib/blackbook/importer/gmail.rb +73 -0
  14. data/lib/blackbook/importer/gmx.rb +68 -0
  15. data/lib/blackbook/importer/hotmail.rb +128 -0
  16. data/lib/blackbook/importer/page_scraper.rb +52 -0
  17. data/lib/blackbook/importer/web.de.rb +67 -0
  18. data/lib/blackbook/importer/yahoo.rb +73 -0
  19. data/test/config/credentials.yml.example +9 -0
  20. data/test/fixtures/aol_application_page.html +566 -0
  21. data/test/fixtures/aol_bad_login_response_stage_3.html +565 -0
  22. data/test/fixtures/aol_contacts.html +102 -0
  23. data/test/fixtures/aol_login_response_stage_1.html +158 -0
  24. data/test/fixtures/aol_login_response_stage_2.html +559 -0
  25. data/test/fixtures/aol_login_response_stage_3.html +48 -0
  26. data/test/fixtures/aol_login_response_stage_4.html +404 -0
  27. data/test/fixtures/aol_new_contacts.html +431 -0
  28. data/test/fixtures/gmail.csv +3 -0
  29. data/test/fixtures/gmail_bad_login_response_stage_2.html +560 -0
  30. data/test/fixtures/gmail_contacts.html +228 -0
  31. data/test/fixtures/gmail_login_response_stage_1.html +556 -0
  32. data/test/fixtures/gmail_login_response_stage_2.html +1 -0
  33. data/test/fixtures/gmail_login_response_stage_2a.html +1 -0
  34. data/test/fixtures/gmail_login_response_stage_3.html +249 -0
  35. data/test/fixtures/gmail_redirect_body.html +10 -0
  36. data/test/fixtures/hotmail_bad_login_response_stage_2.html +31 -0
  37. data/test/fixtures/hotmail_contacts.html +262 -0
  38. data/test/fixtures/hotmail_login_response_stage_1.html +31 -0
  39. data/test/fixtures/hotmail_login_response_stage_2.html +1 -0
  40. data/test/fixtures/hotmail_login_response_stage_3.html +519 -0
  41. data/test/fixtures/hotmail_scrape_first_page.html +77 -0
  42. data/test/fixtures/hotmail_scrape_response_stage_1.html +90 -0
  43. data/test/fixtures/hotmail_scrape_response_stage_2.html +77 -0
  44. data/test/fixtures/hotmail_scrape_response_stage_3.html +0 -0
  45. data/test/fixtures/yahoo_bad_login_response_stage_2.html +443 -0
  46. data/test/fixtures/yahoo_contacts.csv +3 -0
  47. data/test/fixtures/yahoo_contacts_not_logged_in.html +432 -0
  48. data/test/fixtures/yahoo_contacts_stage_1.html +399 -0
  49. data/test/fixtures/yahoo_login_response_stage_1.html +433 -0
  50. data/test/fixtures/yahoo_login_response_stage_2.html +16 -0
  51. data/test/fixtures/yahoo_no_user_response_stage_2.html +574 -0
  52. data/test/freenet_importer_test.rb +53 -0
  53. data/test/gmx_importer_test.rb +53 -0
  54. data/test/scripts/live_test.rb +25 -0
  55. data/test/test_blackbook.rb +60 -0
  56. data/test/test_blackbook_exporter_base.rb +16 -0
  57. data/test/test_blackbook_exporter_vcf.rb +52 -0
  58. data/test/test_blackbook_exporter_xml.rb +16 -0
  59. data/test/test_blackbook_importer_aol.rb +108 -0
  60. data/test/test_blackbook_importer_base.rb +24 -0
  61. data/test/test_blackbook_importer_csv.rb +60 -0
  62. data/test/test_blackbook_importer_gmail.rb +116 -0
  63. data/test/test_blackbook_importer_hotmail.rb +165 -0
  64. data/test/test_blackbook_importer_page_scraper.rb +51 -0
  65. data/test/test_blackbook_importer_yahoo.rb +137 -0
  66. data/test/test_helper.rb +71 -0
  67. data/test/web.de_importer_test.rb +53 -0
  68. metadata +128 -0
@@ -0,0 +1,39 @@
1
+ #
2
+ # Provides a base template for interface and behavior of contact importers
3
+
4
+ module Blackbook::Importer
5
+ class Base
6
+ attr_accessor :options
7
+
8
+ ##
9
+ # Should return true or false/nil depending on whether the +options+ given
10
+ # can be handled by this importer
11
+
12
+ def =~( options ); end # stub
13
+
14
+ ##
15
+ # Does the work of extracting contacts. Returns an Array of Arrays
16
+ # containing the name and email as the first and second elements. Of
17
+ # course, you can override this behavior to meet the needs of a
18
+ # particular service.
19
+
20
+ def fetch_contacts!; end # stub
21
+
22
+ ##
23
+ # Imports the contacts using the given +options+. Returns an array of
24
+ # hashes in the internal format (a hash with at least :name and :email
25
+ # values).
26
+
27
+ def import(options = {})
28
+ self.options = options
29
+ fetch_contacts!
30
+ end
31
+
32
+ ##
33
+ # Name of the importer service.
34
+
35
+ def service_name
36
+ self.class.name.split("::").last
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,68 @@
1
+ ##
2
+ # Imports contacts from a CSV file
3
+
4
+ class Blackbook::Importer::Csv < Blackbook::Importer::Base
5
+
6
+ DEFAULT_COLUMNS = [:name,:email,:misc]
7
+ DEFAULT_PATTERN = /,/
8
+
9
+ ##
10
+ # Matches this importer to a file that contains CSV values
11
+
12
+ def =~(options)
13
+ options && options[:file].respond_to?(:open) ? true : false
14
+ end
15
+
16
+ ##
17
+ # fetch_contacts! implementation for this importer
18
+
19
+ def fetch_contacts!
20
+ lines = IO.readlines(options[:file].path)
21
+ columns = to_columns(lines.first)
22
+ lines.shift if columns.first == :name
23
+ columns = DEFAULT_COLUMNS.dup unless columns.first == :name
24
+
25
+ contacts = Array.new
26
+ lines.each do |l|
27
+ vals = l.split(pattern)
28
+ next if vals.empty?
29
+ contacts << to_hash(columns, vals)
30
+ end
31
+
32
+ contacts
33
+ end
34
+
35
+ def to_hash(cols, vals) # :nodoc:
36
+ h = Hash.new
37
+ cols.each do |c|
38
+ h[c] = (c == cols.last) ? vals.join(',') : vals.shift
39
+ end
40
+ h
41
+ end
42
+
43
+ def to_columns(line) # :nodoc:
44
+ columns = Array.new
45
+ if line.match(pattern)
46
+ tags = line.split(pattern)
47
+ else
48
+ tags = line.split(DEFAULT_PATTERN)
49
+ end
50
+ # deal with "Name,E-mail..." oddity up front
51
+ if tags.first =~ /^name$/i
52
+ tags.shift
53
+ columns << :name
54
+ if tags.first =~ /^e.?mail/i # E-mail or Email
55
+ tags.shift
56
+ columns << :email
57
+ end
58
+ end
59
+ tags.each{|v| columns << v.strip.to_sym}
60
+ columns
61
+ end
62
+
63
+ def pattern
64
+ @pattern ||= (options[:pattern] || DEFAULT_PATTERN) rescue DEFAULT_PATTERN
65
+ end
66
+
67
+ Blackbook.register(:csv, self)
68
+ end
@@ -0,0 +1,62 @@
1
+ require 'blackbook/importer/page_scraper'
2
+
3
+ class Blackbook::Importer::Freenet < Blackbook::Importer::PageScraper
4
+ LOGIN_URL = "https://office.freenet.de"
5
+ MESSAGES_URL = "/main_overview.html"
6
+
7
+ def =~( options )
8
+ options && options[:username] =~ /@freenet\.de$/i
9
+ end
10
+
11
+ def login
12
+ username, password = options[:username], options[:password]
13
+
14
+ begin
15
+ page = agent.get LOGIN_URL
16
+
17
+ form = page.forms.with.name("loginform").first
18
+ form.username = username.split("@").first
19
+ form.password = password
20
+ page = form.submit
21
+
22
+ if page.body.match(/Fehlgeschlagene Login-Versuche/)
23
+ page = page.forms.with.name("sicherform").submit
24
+ page = agent.get MESSAGES_URL
25
+ end
26
+
27
+ if page.body.match(/Login (erneut )?fehlgeschlagen/) || page.body.match(/Ich bin bereits Mitglied/)
28
+ raise Blackbook::BadCredentialsError.new
29
+ end
30
+ rescue => e
31
+ raise e || Blackbook::BlackbookError.new
32
+ end
33
+ end
34
+
35
+ def prepare
36
+ login
37
+ end
38
+
39
+ def scrape_contacts
40
+ contacts = ["INBOX", "INBOX.sent"].map do |folder|
41
+ page = agent.get "/messages/mail_mailbox.html?mail_folder=#{folder}"
42
+ find_contacts(page)
43
+ end
44
+
45
+ contacts.inject([]) do |memo, contact|
46
+ memo << contact unless memo.include? contact
47
+ memo
48
+ end.flatten
49
+ end
50
+
51
+
52
+ protected
53
+ def find_contacts(page)
54
+ page.search("tr[@id]/td[9]").map do |cell|
55
+ recp, fullname, email = *cell.attributes["title"].match(/"(.*)"<(.*)>/)
56
+
57
+ { :name => fullname, :email => email }
58
+ end
59
+ end
60
+
61
+ Blackbook.register :freenet, self
62
+ end
@@ -0,0 +1,73 @@
1
+ require 'kconv'
2
+ require 'blackbook/importer/page_scraper'
3
+
4
+ if RUBY_VERSION > "1.9"
5
+ require "csv"
6
+ unless defined? FCSV
7
+ class Object
8
+ FCSV = CSV
9
+ alias_method :FCSV, :CSV
10
+ end
11
+ end
12
+ else
13
+ require "fastercsv"
14
+ end
15
+
16
+ ##
17
+ # Imports contacts from GMail
18
+
19
+ class Blackbook::Importer::Gmail < Blackbook::Importer::PageScraper
20
+
21
+ RETRY_THRESHOLD = 5
22
+ ##
23
+ # Matches this importer to an user's name/address
24
+
25
+ def =~(options = {})
26
+ options && options[:username] =~ /@(gmail|googlemail).com$/i ? true : false
27
+ end
28
+
29
+ ##
30
+ # login to gmail
31
+
32
+ def login
33
+ page = agent.get('http://mail.google.com/mail/')
34
+ form = page.forms.first
35
+ form.Email = options[:username]
36
+ form.Passwd = options[:password]
37
+ page = agent.submit(form,form.buttons.first)
38
+
39
+ raise( Blackbook::BadCredentialsError, "That username and password was not accepted. Please check them and try again." ) if page.body =~ /Username and password do not match/
40
+
41
+ if page.search('//meta').first.attributes['content'] =~ /url='?(http.+?)'?$/i
42
+ page = agent.get $1
43
+ end
44
+ end
45
+
46
+ ##
47
+ # prepare this importer
48
+
49
+ def prepare
50
+ login
51
+ end
52
+
53
+ ##
54
+ # scrape gmail contacts for this importer
55
+
56
+ def scrape_contacts
57
+ unless agent.cookies.find{|c| c.name == 'GAUSR' &&
58
+ (c.value.include? "mail:#{options[:username]}")}
59
+ raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
60
+ end
61
+
62
+ contacts = []
63
+ csv = agent.get('https://mail.google.com/mail/contacts/data/export?exportType=ALL&out=GMAIL_CSV')
64
+ body = Kconv.toutf8(csv.body)
65
+ FCSV.parse(body) do |row|
66
+ next if row[0] == "Name" and row[1] == "E-mail"
67
+ contacts << {:name => row[0], :email => row[1]} unless row[1].blank?
68
+ end
69
+ return contacts
70
+ end
71
+
72
+ Blackbook.register(:gmail, self)
73
+ end
@@ -0,0 +1,68 @@
1
+ require 'blackbook/importer/page_scraper'
2
+
3
+ class Blackbook::Importer::GMX < Blackbook::Importer::PageScraper
4
+ LOGIN_URL = "https://www.gmx.net/"
5
+
6
+ def =~( options )
7
+ options && options[:username] =~ /@gmx\.(de|net)$/i
8
+ end
9
+
10
+ def login
11
+ username, password = options[:username], options[:password]
12
+
13
+ begin
14
+ page = agent.get LOGIN_URL
15
+
16
+ form = page.forms.with.name("login").first
17
+ form.id = username
18
+ form.p = password
19
+ page = form.submit
20
+ @next = page
21
+
22
+ if (page.at("div.index") && page.at("div.index").inner_html == "Passwort vergessen?") ||
23
+ page.uri.to_s.match(/login-failed/)
24
+ raise Blackbook::BadCredentialsError.new
25
+ end
26
+ rescue => e
27
+ raise e || Blackbook::BlackbookError.new
28
+ end
29
+ end
30
+
31
+ def prepare
32
+ login
33
+ end
34
+
35
+ def scrape_contacts
36
+ page = @next
37
+
38
+ # Go to E-Mails
39
+ page = page.links.select { |link| link.text =~ /E-Mail/ }.first.click
40
+
41
+ contacts = [/Posteingang/, /Archiv/, /Gesendet/].map do |folder|
42
+ # puts "folder: #{folder}"
43
+ # puts "page title: #{page.title}"
44
+ # puts "page links: #{page.links.select { |link| link.text =~ folder }}"
45
+ page = page.links.select { |link| link.text =~ folder }.first.click
46
+ find_contacts(page)
47
+ end
48
+
49
+ contacts.inject([]) do |memo, contact|
50
+ memo << contact unless memo.include? contact
51
+ memo
52
+ end.flatten
53
+ end
54
+
55
+ protected
56
+
57
+ def find_contacts(page)
58
+ links = page.search("form#MI a").select { |link| link.attributes["title"] =~ /@/ }
59
+ links.map do |link|
60
+ recp = link.attributes["title"].gsub(/\n/, "").split(/\s/)
61
+ email = recp.pop.gsub(/[<>]/, "")
62
+ fullname = recp.join(" ")
63
+
64
+ { :name => fullname, :email => email }
65
+ end
66
+ end
67
+ Blackbook.register :gmx, self
68
+ end
@@ -0,0 +1,128 @@
1
+ require 'blackbook/importer/page_scraper'
2
+ require 'cgi'
3
+
4
+ ##
5
+ # imports contacts for MSN/Hotmail
6
+ class Blackbook::Importer::Hotmail < Blackbook::Importer::PageScraper
7
+
8
+ DOMAINS = { "compaq.net" => "https://msnia.login.live.com/ppsecure/post.srf",
9
+ "hotmail.co.jp" => "https://login.live.com/ppsecure/post.srf",
10
+ "hotmail.co.uk" => "https://login.live.com/ppsecure/post.srf",
11
+ "hotmail.com" => "https://login.live.com/ppsecure/post.srf",
12
+ "hotmail.de" => "https://login.live.com/ppsecure/post.srf",
13
+ "hotmail.fr" => "https://login.live.com/ppsecure/post.srf",
14
+ "hotmail.it" => "https://login.live.com/ppsecure/post.srf",
15
+ "live.com" => "https://login.live.com/ppsecure/post.srf",
16
+ "messengeruser.com" => "https://login.live.com/ppsecure/post.srf",
17
+ "msn.com" => "https://msnia.login.live.com/ppsecure/post.srf",
18
+ "passport.com" => "https://login.live.com/ppsecure/post.srf",
19
+ "webtv.net" => "https://login.live.com/ppsecure/post.srf" }
20
+
21
+ ##
22
+ # Matches this importer to an user's name/address
23
+
24
+ def =~(options)
25
+ return false unless options && options[:username]
26
+ domain = username_domain(options[:username].downcase)
27
+ !domain.empty? && DOMAINS.keys.include?( domain ) ? true : false
28
+ end
29
+
30
+ ##
31
+ # Login procedure
32
+ # 1. Go to login form
33
+ # 2. Set login and passwd
34
+ # 3. Set PwdPad to IfYouAreReadingThisYouHaveTooMuchFreeTime minus however many characters are in passwd (so if passwd
35
+ # was 8 chars, you'd chop 8 chars of the end of IfYouAreReadingThisYouHaveTooMuchFreeTime - giving you IfYouAreReadingThisYouHaveTooMuch)
36
+ # 4. Set the action to the appropriate URL for the username's domain
37
+ # 5. Get the query string to append to the new action
38
+ # 5. Submit the form and parse the url from the resulting page's javascript
39
+ # 6. Go to that url
40
+
41
+ def login
42
+ page = agent.get('http://login.live.com/login.srf?id=2')
43
+ form = page.forms.first
44
+ form.login = options[:username]
45
+ form.passwd = options[:password]
46
+ form.PwdPad = ( "IfYouAreReadingThisYouHaveTooMuchFreeTime"[0..(-1 - options[:password].to_s.size )])
47
+ query_string = page.body.scan(/g_QS="([^"]+)/).first.first rescue nil
48
+ form.action = login_url + "?#{query_string.to_s}"
49
+ page = agent.submit(form)
50
+
51
+ # Check for login success
52
+ if page.body =~ /The e-mail address or password is incorrect/ ||
53
+ page.body =~ /Sign in failed\./
54
+ raise( Blackbook::BadCredentialsError,
55
+ "That username and password was not accepted. Please check them and try again." )
56
+ end
57
+
58
+ @first_page = agent.get( page.body.scan(/http\:\/\/[^"]+/).first )
59
+ end
60
+
61
+ ##
62
+ # prepare this importer
63
+
64
+ def prepare
65
+ login
66
+ end
67
+
68
+ ##
69
+ # Scrape contacts for Hotmail
70
+ # Seems like a POST to directly fetch CSV contacts from options.aspx?subsection=26&n=
71
+ # raises an end of file error in Net::HTTP via Mechanize.
72
+ # Seems like Hotmail addresses are now hosted on Windows Live.
73
+
74
+ def scrape_contacts
75
+ unless agent.cookies.find{|c| c.name == 'MSPPre' && c.value == options[:username]}
76
+ raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
77
+ end
78
+ page = agent.get(@first_page.iframes.first.src)
79
+
80
+ page = agent.click(page.link_with(:text => 'Mail'))
81
+ page = agent.get(page.iframes.first.src)
82
+ page = agent.get('/mail/PrintShell.aspx?type=contact')
83
+
84
+ rows = page.search("//div[@class='ContactsPrintPane cPrintContact BorderTop']")
85
+ rows.collect do |row|
86
+ vals = {}
87
+ row.search("table/tr").each do |pair|
88
+ key = pair.search("td[@class='TextAlignRight Label']").first.inner_text.strip rescue nil
89
+ next if key.nil?
90
+ val = pair.search("td[@class='Value']").first.inner_text.strip
91
+ vals[key.to_sym] = val
92
+ end
93
+ vals[:name] = vals['Name:'.to_sym] rescue ''
94
+ vals[:email] = (vals['Personal e-mail:'.to_sym] || vals['Work e-mail:'.to_sym] || vals['Windows Live ID:'.to_sym]).split(' ').first rescue ''
95
+ vals
96
+ end
97
+ end
98
+
99
+ ##
100
+ # lookup for the login service that should be used based on the user's
101
+ # address
102
+
103
+ def login_url
104
+ DOMAINS[username_domain] || DOMAINS['hotmail.com']
105
+ end
106
+
107
+
108
+ ##
109
+ # normalizes the host for the page that is currently being "viewed" by the
110
+ # Mechanize agent
111
+
112
+ def current_host
113
+ return nil unless agent && agent.current_page
114
+ uri = agent.current_page.uri
115
+ "#{uri.scheme}://#{uri.host}"
116
+ end
117
+
118
+ ##
119
+ # determines the domain for the user
120
+
121
+ def username_domain(username = nil)
122
+ username ||= options[:username] if options
123
+ return unless username
124
+ username.to_s.split('@').last
125
+ end
126
+
127
+ Blackbook.register(:hotmail, self)
128
+ end
@@ -0,0 +1,52 @@
1
+ require 'rubygems'
2
+ gem 'mechanize', '>= 0.9.2'
3
+ require 'mechanize'
4
+
5
+ ##
6
+ # A base class for importers that scrape their contacts from web services
7
+
8
+ class Blackbook::Importer::PageScraper < Blackbook::Importer::Base
9
+
10
+ attr_accessor :agent
11
+
12
+ ##
13
+ # creates the Mechanize agent used to do the scraping and sets a nice
14
+ # user agent header for good net educate
15
+
16
+ def create_agent
17
+ self.agent = WWW::Mechanize.new
18
+ agent.user_agent = "Mozilla/4.0 (compatible; Blackbook #{Blackbook::VERSION})"
19
+ agent.keep_alive = false
20
+ agent
21
+ end
22
+
23
+ ##
24
+ # Page scrapers will follow a fairly simple pattern of instantiating the
25
+ # agent, prepping for the scrape and then the actual scrape process
26
+
27
+ def fetch_contacts!
28
+ create_agent
29
+ prepare
30
+ scrape_contacts
31
+ end
32
+
33
+ ##
34
+ # Providers will often require you to login or otherwise prepare to actual
35
+ # scrape the contacts
36
+
37
+ def prepare; end # stub
38
+
39
+ ##
40
+ # Some providers have a single page you can scrape from (like Gmail's HTML
41
+ # Contacts page) while others might require you to navigate several pages,
42
+ # scraping as you go.
43
+
44
+ def scrape_contacts; end # stub
45
+
46
+ ##
47
+ # helper to strip html from text
48
+
49
+ def strip_html( html )
50
+ html.gsub(/<\/?[^>]*>/, '')
51
+ end
52
+ end