rakutenusa-blackbook 1.0.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. data/CHANGES.markdown +44 -0
  2. data/Manifest.txt +59 -0
  3. data/README.markdown +72 -0
  4. data/VERSION.yml +4 -0
  5. data/lib/blackbook/exporter/base.rb +16 -0
  6. data/lib/blackbook/exporter/vcf.rb +45 -0
  7. data/lib/blackbook/exporter/xml.rb +28 -0
  8. data/lib/blackbook/importer/aol.rb +89 -0
  9. data/lib/blackbook/importer/base.rb +39 -0
  10. data/lib/blackbook/importer/csv.rb +71 -0
  11. data/lib/blackbook/importer/freenet.rb +62 -0
  12. data/lib/blackbook/importer/gmail.rb +74 -0
  13. data/lib/blackbook/importer/gmx.rb +68 -0
  14. data/lib/blackbook/importer/hotmail.rb +128 -0
  15. data/lib/blackbook/importer/page_scraper.rb +86 -0
  16. data/lib/blackbook/importer/web.de.rb +67 -0
  17. data/lib/blackbook/importer/yahoo.rb +63 -0
  18. data/lib/blackbook.rb +82 -0
  19. data/test/config/credentials.yml.example +9 -0
  20. data/test/fixtures/aol_application_page.html +566 -0
  21. data/test/fixtures/aol_bad_login_response_stage_3.html +565 -0
  22. data/test/fixtures/aol_contacts.html +102 -0
  23. data/test/fixtures/aol_login_response_stage_1.html +158 -0
  24. data/test/fixtures/aol_login_response_stage_2.html +559 -0
  25. data/test/fixtures/aol_login_response_stage_3.html +48 -0
  26. data/test/fixtures/aol_login_response_stage_4.html +404 -0
  27. data/test/fixtures/aol_new_contacts.html +431 -0
  28. data/test/fixtures/gmail.csv +3 -0
  29. data/test/fixtures/gmail_bad_login_response_stage_2.html +560 -0
  30. data/test/fixtures/gmail_contacts.html +228 -0
  31. data/test/fixtures/gmail_login_response_stage_1.html +556 -0
  32. data/test/fixtures/gmail_login_response_stage_2.html +1 -0
  33. data/test/fixtures/gmail_login_response_stage_2a.html +1 -0
  34. data/test/fixtures/gmail_login_response_stage_3.html +249 -0
  35. data/test/fixtures/gmail_redirect_body.html +10 -0
  36. data/test/fixtures/hotmail_bad_login_response_stage_2.html +31 -0
  37. data/test/fixtures/hotmail_contacts.html +262 -0
  38. data/test/fixtures/hotmail_login_response_stage_1.html +31 -0
  39. data/test/fixtures/hotmail_login_response_stage_2.html +1 -0
  40. data/test/fixtures/hotmail_login_response_stage_3.html +519 -0
  41. data/test/fixtures/hotmail_scrape_first_page.html +77 -0
  42. data/test/fixtures/hotmail_scrape_response_stage_1.html +90 -0
  43. data/test/fixtures/hotmail_scrape_response_stage_2.html +77 -0
  44. data/test/fixtures/hotmail_scrape_response_stage_3.html +0 -0
  45. data/test/fixtures/yahoo_bad_login_response_stage_2.html +443 -0
  46. data/test/fixtures/yahoo_contacts.csv +3 -0
  47. data/test/fixtures/yahoo_contacts_not_logged_in.html +432 -0
  48. data/test/fixtures/yahoo_contacts_stage_1.html +399 -0
  49. data/test/fixtures/yahoo_login_response_stage_1.html +433 -0
  50. data/test/fixtures/yahoo_login_response_stage_2.html +16 -0
  51. data/test/fixtures/yahoo_no_user_response_stage_2.html +574 -0
  52. data/test/freenet_importer_test.rb +53 -0
  53. data/test/gmx_importer_test.rb +53 -0
  54. data/test/scripts/live_test.rb +25 -0
  55. data/test/test_blackbook.rb +60 -0
  56. data/test/test_blackbook_exporter_base.rb +16 -0
  57. data/test/test_blackbook_exporter_vcf.rb +52 -0
  58. data/test/test_blackbook_exporter_xml.rb +16 -0
  59. data/test/test_blackbook_importer_aol.rb +108 -0
  60. data/test/test_blackbook_importer_base.rb +24 -0
  61. data/test/test_blackbook_importer_csv.rb +60 -0
  62. data/test/test_blackbook_importer_gmail.rb +116 -0
  63. data/test/test_blackbook_importer_hotmail.rb +165 -0
  64. data/test/test_blackbook_importer_page_scraper.rb +51 -0
  65. data/test/test_blackbook_importer_yahoo.rb +137 -0
  66. data/test/test_helper.rb +71 -0
  67. data/test/web.de_importer_test.rb +53 -0
  68. metadata +127 -0
@@ -0,0 +1,128 @@
1
+ require 'blackbook/importer/page_scraper'
2
+ require 'cgi'
3
+
4
+ ##
5
+ # imports contacts for MSN/Hotmail
6
+ class Blackbook::Importer::Hotmail < Blackbook::Importer::PageScraper
7
+
8
+ DOMAINS = { "compaq.net" => "https://msnia.login.live.com/ppsecure/post.srf",
9
+ "hotmail.co.jp" => "https://login.live.com/ppsecure/post.srf",
10
+ "hotmail.co.uk" => "https://login.live.com/ppsecure/post.srf",
11
+ "hotmail.com" => "https://login.live.com/ppsecure/post.srf",
12
+ "hotmail.de" => "https://login.live.com/ppsecure/post.srf",
13
+ "hotmail.fr" => "https://login.live.com/ppsecure/post.srf",
14
+ "hotmail.it" => "https://login.live.com/ppsecure/post.srf",
15
+ "live.com" => "https://login.live.com/ppsecure/post.srf",
16
+ "messengeruser.com" => "https://login.live.com/ppsecure/post.srf",
17
+ "msn.com" => "https://msnia.login.live.com/ppsecure/post.srf",
18
+ "passport.com" => "https://login.live.com/ppsecure/post.srf",
19
+ "webtv.net" => "https://login.live.com/ppsecure/post.srf" }
20
+
21
+ ##
22
+ # Matches this importer to an user's name/address
23
+
24
+ def =~(options)
25
+ return false unless options && options[:username]
26
+ domain = username_domain(options[:username].downcase)
27
+ !domain.empty? && DOMAINS.keys.include?( domain ) ? true : false
28
+ end
29
+
30
+ ##
31
+ # Login procedure
32
+ # 1. Go to login form
33
+ # 2. Set login and passwd
34
+ # 3. Set PwdPad to IfYouAreReadingThisYouHaveTooMuchFreeTime minus however many characters are in passwd (so if passwd
35
+ # was 8 chars, you'd chop 8 chars of the end of IfYouAreReadingThisYouHaveTooMuchFreeTime - giving you IfYouAreReadingThisYouHaveTooMuch)
36
+ # 4. Set the action to the appropriate URL for the username's domain
37
+ # 5. Get the query string to append to the new action
38
+ # 5. Submit the form and parse the url from the resulting page's javascript
39
+ # 6. Go to that url
40
+
41
+ def login
42
+ page = agent.get('http://login.live.com/login.srf?id=2')
43
+ form = page.forms.first
44
+ form.login = options[:username]
45
+ form.passwd = options[:password]
46
+ form.PwdPad = ( "IfYouAreReadingThisYouHaveTooMuchFreeTime"[0..(-1 - options[:password].to_s.size )])
47
+ query_string = page.body.scan(/g_QS="([^"]+)/).first.first rescue nil
48
+ form.action = login_url + "?#{query_string.to_s}"
49
+ page = agent.submit(form)
50
+
51
+ # Check for login success
52
+ if page.body =~ /The e-mail address or password is incorrect/ ||
53
+ page.body =~ /Sign in failed\./
54
+ raise( Blackbook::BadCredentialsError,
55
+ "That username and password was not accepted. Please check them and try again." )
56
+ end
57
+
58
+ @first_page = agent.get( page.body.scan(/http\:\/\/[^"]+/).first )
59
+ end
60
+
61
+ ##
62
+ # prepare this importer
63
+
64
+ def prepare
65
+ login
66
+ end
67
+
68
+ ##
69
+ # Scrape contacts for Hotmail
70
+ # Seems like a POST to directly fetch CSV contacts from options.aspx?subsection=26&n=
71
+ # raises an end of file error in Net::HTTP via Mechanize.
72
+ # Seems like Hotmail addresses are now hosted on Windows Live.
73
+
74
+ def scrape_contacts
75
+ unless agent.cookies.find{|c| c.name == 'MSPPre' && c.value == options[:username]}
76
+ raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
77
+ end
78
+ page = agent.get(@first_page.iframes.first.src)
79
+
80
+ page = agent.click(page.link_with(:text => 'Mail'))
81
+ page = agent.get(page.iframes.first.src)
82
+ page = agent.get('/mail/PrintShell.aspx?type=contact')
83
+
84
+ rows = page.search("//div[@class='ContactsPrintPane cPrintContact BorderTop']")
85
+ rows.collect do |row|
86
+ vals = {}
87
+ row.search("table/tr").each do |pair|
88
+ key = pair.search("td[@class='TextAlignRight Label']").first.inner_text.strip rescue nil
89
+ next if key.nil?
90
+ val = pair.search("td[@class='Value']").first.inner_text.strip
91
+ vals[key.to_sym] = val
92
+ end
93
+ vals[:name] = vals['Name:'.to_sym] rescue ''
94
+ vals[:email] = (vals['Personal e-mail:'.to_sym] || vals['Work e-mail:'.to_sym] || vals['Windows Live ID:'.to_sym]).split(' ').first rescue ''
95
+ vals
96
+ end
97
+ end
98
+
99
+ ##
100
+ # lookup for the login service that should be used based on the user's
101
+ # address
102
+
103
+ def login_url
104
+ DOMAINS[username_domain] || DOMAINS['hotmail.com']
105
+ end
106
+
107
+
108
+ ##
109
+ # normalizes the host for the page that is currently being "viewed" by the
110
+ # Mechanize agent
111
+
112
+ def current_host
113
+ return nil unless agent && agent.current_page
114
+ uri = agent.current_page.uri
115
+ "#{uri.scheme}://#{uri.host}"
116
+ end
117
+
118
+ ##
119
+ # determines the domain for the user
120
+
121
+ def username_domain(username = nil)
122
+ username ||= options[:username] if options
123
+ return unless username
124
+ username.to_s.split('@').last
125
+ end
126
+
127
+ Blackbook.register(:hotmail, self)
128
+ end
@@ -0,0 +1,86 @@
1
+ require 'rubygems'
2
+ gem 'mechanize', '>= 0.7.0'
3
+ require 'mechanize'
4
+ require 'generator' # for SyncEnumerator
5
+
6
+ # Patch Mechanize's broken html unescaping Mechanize 0.6.11
7
+ class WWW::Mechanize
8
+ def to_absolute_uri(url, cur_page=current_page())
9
+ unless url.is_a? URI
10
+ url = url.to_s.strip
11
+ url = URI.parse(
12
+ Util.html_unescape(
13
+ SyncEnumerator.new(
14
+ url.split(/%[0-9A-Fa-f]{2}/), url.scan(/%[0-9A-Fa-f]{2}/)
15
+ ).map { |x,y|
16
+ "#{URI.escape(x||'')}#{y}"
17
+ }.join('').gsub(/%23/, '#')
18
+ )
19
+ )
20
+ # Mechanize here uses #zip to combine the two arrays, which will ignore
21
+ # excessive elements of the second array (the one which is passed as an
22
+ # argument). That means if the URL ends with more than one already escaped
23
+ # character, then only the first one will be restored into the resulting
24
+ # URL.
25
+ end
26
+
27
+ # construct an absolute uri
28
+ if url.relative?
29
+ raise 'no history. please specify an absolute URL' unless cur_page.uri
30
+ url = cur_page.uri + url
31
+ # Strip initial "/.." bits from the path
32
+ url.path.sub!(/^(\/\.\.)+(?=\/)/, '')
33
+ end
34
+
35
+ return url
36
+ end
37
+ end
38
+
39
+ ##
40
+ # A base class for importers that scrape their contacts from web services
41
+
42
+ class Blackbook::Importer::PageScraper < Blackbook::Importer::Base
43
+
44
+ attr_accessor :agent
45
+
46
+ ##
47
+ # creates the Mechanize agent used to do the scraping and sets a nice
48
+ # user agent header for good net educate
49
+
50
+ def create_agent
51
+ self.agent = WWW::Mechanize.new
52
+ agent.user_agent = "Mozilla/4.0 (compatible; Blackbook #{Blackbook::VERSION})"
53
+ agent.keep_alive = false
54
+ agent
55
+ end
56
+
57
+ ##
58
+ # Page scrapers will follow a fairly simple pattern of instantiating the
59
+ # agent, prepping for the scrape and then the actual scrape process
60
+
61
+ def fetch_contacts!
62
+ create_agent
63
+ prepare
64
+ scrape_contacts
65
+ end
66
+
67
+ ##
68
+ # Providers will often require you to login or otherwise prepare to actual
69
+ # scrape the contacts
70
+
71
+ def prepare; end # stub
72
+
73
+ ##
74
+ # Some providers have a single page you can scrape from (like Gmail's HTML
75
+ # Contacts page) while others might require you to navigate several pages,
76
+ # scraping as you go.
77
+
78
+ def scrape_contacts; end # stub
79
+
80
+ ##
81
+ # helper to strip html from text
82
+
83
+ def strip_html( html )
84
+ html.gsub(/<\/?[^>]*>/, '')
85
+ end
86
+ end
@@ -0,0 +1,67 @@
1
+ require 'blackbook/importer/page_scraper'
2
+
3
+ class Blackbook::Importer::WebDE < Blackbook::Importer::PageScraper
4
+ LOGIN_URL = "https://freemail.web.de"
5
+
6
+ def =~( options )
7
+ options && options[:username] =~ /@web\.de$/i
8
+ end
9
+
10
+ def login
11
+ username, password = options[:username], options[:password]
12
+
13
+ begin
14
+ page = agent.get LOGIN_URL
15
+
16
+ form = page.forms.with.name("login").first
17
+ form.username = username
18
+ form.password = password
19
+
20
+ page = form.submit
21
+ # follow logout hint
22
+ if (continue_link = page.links.select { |link| link.text =~ /weiter zu FreeMail/ }.first)
23
+ page = continue_link.click
24
+ end
25
+
26
+ if page.uri.to_s.match(/logonfailed/)
27
+ raise Blackbook::BadCredentialsError.new
28
+ end
29
+
30
+ # follow content frame
31
+ @next = agent.get page.frames[1].src
32
+ rescue => e
33
+ raise e || Blackbook::BlackbookError.new
34
+ end
35
+ end
36
+
37
+ def prepare
38
+ login
39
+ end
40
+
41
+ def scrape_contacts
42
+ page = @next
43
+
44
+ contacts = [/Posteingang/, /Unbekannt/].map do |folder|
45
+ page = page.links.select { |link| link.text =~ folder }.first.click
46
+ find_contacts(page)
47
+ end
48
+
49
+ contacts.inject([]) do |memo, contact|
50
+ memo << contact unless memo.include? contact
51
+ memo
52
+ end
53
+ end
54
+
55
+ protected
56
+ def find_contacts(page)
57
+ page.search("span.from a").map do |link|
58
+ recp = link.attributes["title"].gsub(/\n/, "").split(/\s/)
59
+ email = recp.pop
60
+ fullname = recp.join(" ")
61
+
62
+ { :name => fullname, :email => email }
63
+ end
64
+ end
65
+
66
+ Blackbook.register :webde, self
67
+ end
@@ -0,0 +1,63 @@
1
+ require 'blackbook/importer/page_scraper'
2
+ require 'fastercsv'
3
+
4
+ ##
5
+ # contacts importer for Yahoo!
6
+
7
+ class Blackbook::Importer::Yahoo < Blackbook::Importer::PageScraper
8
+
9
+ ##
10
+ # Matches this importer to an user's name/address
11
+
12
+ def =~(options = {})
13
+ options && options[:username] =~ /@yahoo.co(m|\.uk)$/i ? true : false
14
+ end
15
+
16
+ ##
17
+ # login for Yahoo!
18
+
19
+ def login
20
+ page = agent.get('https://login.yahoo.com/config/login_verify2?')
21
+ form = page.forms.first
22
+ form.login = options[:username].split("@").first
23
+ form.passwd = options[:password]
24
+ page = agent.submit(form, form.buttons.first)
25
+
26
+ if page.body =~ /Invalid ID or password./ || page.body =~ /This ID is not yet taken./
27
+ raise Blackbook::BadCredentialsError, "That username and password was not accepted. Please check them and try again."
28
+ end
29
+
30
+ true
31
+ end
32
+
33
+ ##
34
+ # prepare the importer
35
+
36
+ def prepare
37
+ login
38
+ end
39
+
40
+ ##
41
+ # scrape yahoo contacts
42
+
43
+ def scrape_contacts
44
+ page = agent.get("http://address.yahoo.com/?1=&VPC=import_export")
45
+ if page.body =~ /To access Yahoo! Address Book\.\.\..*Sign in./m
46
+ raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
47
+ end
48
+ form = page.forms.last
49
+ csv = agent.submit(form, form.buttons[2]) # third button is Yahoo-format CSV
50
+
51
+ contact_rows = FasterCSV.parse(csv.body)
52
+ labels = contact_rows.shift # TODO: Actually use the labels to find the indexes of the data we want
53
+ contact_rows.collect do |row|
54
+ next if !row[7].empty? && options[:username] =~ /^#{Regexp.escape(row[7])}/ # Don't collect self
55
+ {
56
+ :name => "#{row[0]} #{row[2]}".to_s,
57
+ :email => (row[4] || "#{row[7]}@yahoo.com") # email is a field in the data, but will be blank for Yahoo users so we create their email address
58
+ }
59
+ end
60
+ end
61
+
62
+ Blackbook.register(:yahoo, self)
63
+ end
data/lib/blackbook.rb ADDED
@@ -0,0 +1,82 @@
1
+ $:.unshift File.expand_path(File.join(File.dirname(__FILE__)))
2
+ require 'singleton'
3
+ require 'rubygems'
4
+
5
+ class Blackbook
6
+ include ::Singleton
7
+ VERSION = '1.0.5'
8
+
9
+ class BlackbookError < ::StandardError; end
10
+ class BadCredentialsError < BlackbookError; end
11
+
12
+ attr_accessor :importers
13
+ attr_accessor :exporters
14
+
15
+ def self.get( *args )
16
+ instance.get( *args )
17
+ end
18
+
19
+ def self.register(name, adapter_class)
20
+ case adapter = adapter_class.new
21
+ when Importer::Base
22
+ instance.importers[name.to_sym] = adapter
23
+ when Exporter::Base
24
+ instance.exporters[name.to_sym] = adapter
25
+ else
26
+ raise ArgumentError, "Unknown adapter"
27
+ end
28
+ end
29
+
30
+ # Sends the vcards from the import to whatever is handling the export
31
+ def export( importer, exporter, options )
32
+ exporter.export importer.import( options )
33
+ end
34
+
35
+ # Searches registered importers for one that will handle the given options
36
+ def find_importer( options )
37
+ importers.each{ |key, importer| return importer if importer =~ options }
38
+ nil
39
+ end
40
+
41
+ # Fetches contacts from various services or filetypes. The default is to return an array
42
+ # of hashes - Blackbook's internal format
43
+ #
44
+ # Handles several different calls:
45
+ # get( :username => 'something@gmail.com', :password => 'whatever' )
46
+ # get( :as => :xml, :username => 'something@gmail.com', :password => 'whatever' )
47
+ # get( :csv, :file => #<File:/path/to/file.csv> )
48
+ def get( *args )
49
+ options = args.last.is_a?(Hash) ? args.pop : {}
50
+ to_format = exporters[ options[:as] || :basic ]
51
+ source = (importers[args.first.to_sym] rescue nil) || find_importer(options)
52
+
53
+ raise ArgumentError, "Unknown exporter" unless to_format
54
+ raise ArgumentError, "Unknown source" unless source
55
+
56
+ export source, to_format, options
57
+ end
58
+
59
+ def initialize
60
+ self.importers = {}
61
+ self.exporters = {}
62
+ end
63
+ end
64
+
65
+ # Require all the importers/exporters
66
+ require 'blackbook/importer/base'
67
+ require 'blackbook/exporter/base'
68
+ Dir.glob(File.join(File.dirname(__FILE__), 'blackbook/importer/*.rb')).each {|f| require f }
69
+ Dir.glob(File.join(File.dirname(__FILE__), 'blackbook/exporter/*.rb')).each {|f| require f }
70
+
71
+ class NilClass
72
+ def empty?
73
+ true
74
+ end
75
+ end
76
+
77
+ class Object
78
+ def blank?
79
+ respond_to?(:empty?) ? empty? : !self
80
+ end
81
+ end
82
+
@@ -0,0 +1,9 @@
1
+ gmx:
2
+ username: nil
3
+ password: nil
4
+ freenet:
5
+ username: nil
6
+ password: nil
7
+ webde:
8
+ username: nil
9
+ password: nil