alexjp-blackbook_csv 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. data/CHANGES.markdown +44 -0
  2. data/Manifest.txt +59 -0
  3. data/README.markdown +72 -0
  4. data/Rakefile +39 -0
  5. data/debug_blackbook.rb +10 -0
  6. data/init.rb +1 -0
  7. data/lib/blackbook/exporter/base.rb +16 -0
  8. data/lib/blackbook/exporter/vcf.rb +45 -0
  9. data/lib/blackbook/exporter/xml.rb +28 -0
  10. data/lib/blackbook/importer/aol.rb +83 -0
  11. data/lib/blackbook/importer/base.rb +39 -0
  12. data/lib/blackbook/importer/csv.rb +87 -0
  13. data/lib/blackbook/importer/gmail.rb +66 -0
  14. data/lib/blackbook/importer/hotmail.rb +124 -0
  15. data/lib/blackbook/importer/page_scraper.rb +86 -0
  16. data/lib/blackbook/importer/yahoo.rb +61 -0
  17. data/lib/blackbook.rb +76 -0
  18. data/test/fixtures/aol_application_page.html +566 -0
  19. data/test/fixtures/aol_bad_login_response_stage_3.html +565 -0
  20. data/test/fixtures/aol_contacts.html +90 -0
  21. data/test/fixtures/aol_login_response_stage_1.html +158 -0
  22. data/test/fixtures/aol_login_response_stage_2.html +559 -0
  23. data/test/fixtures/aol_login_response_stage_3.html +61 -0
  24. data/test/fixtures/aol_login_response_stage_4.html +48 -0
  25. data/test/fixtures/aol_login_response_stage_5.html +404 -0
  26. data/test/fixtures/aol_new_contacts.html +431 -0
  27. data/test/fixtures/gmail.csv +3 -0
  28. data/test/fixtures/gmail_bad_login_response_stage_2.html +560 -0
  29. data/test/fixtures/gmail_contacts.html +228 -0
  30. data/test/fixtures/gmail_login_response_stage_1.html +556 -0
  31. data/test/fixtures/gmail_login_response_stage_2.html +1 -0
  32. data/test/fixtures/gmail_login_response_stage_2a.html +1 -0
  33. data/test/fixtures/gmail_login_response_stage_3.html +249 -0
  34. data/test/fixtures/hotmail_bad_login_response_stage_2.html +31 -0
  35. data/test/fixtures/hotmail_contacts.html +191 -0
  36. data/test/fixtures/hotmail_login_response_stage_1.html +31 -0
  37. data/test/fixtures/hotmail_login_response_stage_2.html +1 -0
  38. data/test/fixtures/hotmail_login_response_stage_3.html +380 -0
  39. data/test/fixtures/yahoo_bad_login_response_stage_2.html +443 -0
  40. data/test/fixtures/yahoo_contacts.csv +3 -0
  41. data/test/fixtures/yahoo_contacts_not_logged_in.html +432 -0
  42. data/test/fixtures/yahoo_contacts_stage_1.html +399 -0
  43. data/test/fixtures/yahoo_login_response_stage_1.html +433 -0
  44. data/test/fixtures/yahoo_login_response_stage_2.html +16 -0
  45. data/test/scripts/live_test.rb +25 -0
  46. data/test/test_blackbook.rb +60 -0
  47. data/test/test_blackbook_exporter_base.rb +16 -0
  48. data/test/test_blackbook_exporter_vcf.rb +52 -0
  49. data/test/test_blackbook_exporter_xml.rb +16 -0
  50. data/test/test_blackbook_importer_aol.rb +113 -0
  51. data/test/test_blackbook_importer_base.rb +24 -0
  52. data/test/test_blackbook_importer_csv.rb +60 -0
  53. data/test/test_blackbook_importer_gmail.rb +117 -0
  54. data/test/test_blackbook_importer_hotmail.rb +147 -0
  55. data/test/test_blackbook_importer_page_scraper.rb +51 -0
  56. data/test/test_blackbook_importer_yahoo.rb +97 -0
  57. data/test/test_helper.rb +69 -0
  58. data/vendor/plugins/blackbook/lib/autotest/blackbook.rb +27 -0
  59. data/vendor/plugins/blackbook/lib/autotest/discover.rb +3 -0
  60. metadata +176 -0
@@ -0,0 +1,124 @@
1
+ require 'blackbook/importer/page_scraper'
2
+ require 'cgi'
3
+
4
+ ##
5
+ # imports contacts for MSN/Hotmail
6
+ class Blackbook::Importer::Hotmail < Blackbook::Importer::PageScraper
7
+
8
+ DOMAINS = { "compaq.net" => "https://msnia.login.live.com/ppsecure/post.srf",
9
+ "hotmail.co.jp" => "https://login.live.com/ppsecure/post.srf",
10
+ "hotmail.co.uk" => "https://login.live.com/ppsecure/post.srf",
11
+ "hotmail.com" => "https://login.live.com/ppsecure/post.srf",
12
+ "hotmail.de" => "https://login.live.com/ppsecure/post.srf",
13
+ "hotmail.fr" => "https://login.live.com/ppsecure/post.srf",
14
+ "hotmail.it" => "https://login.live.com/ppsecure/post.srf",
15
+ "messengeruser.com" => "https://login.live.com/ppsecure/post.srf",
16
+ "msn.com" => "https://msnia.login.live.com/ppsecure/post.srf",
17
+ "passport.com" => "https://login.live.com/ppsecure/post.srf",
18
+ "webtv.net" => "https://login.live.com/ppsecure/post.srf" }
19
+
20
+ ##
21
+ # Matches this importer to an user's name/address
22
+
23
+ def =~(options)
24
+ return false unless options && options[:username]
25
+ domain = username_domain(options[:username].downcase)
26
+ !domain.empty? && DOMAINS.keys.include?( domain ) ? true : false
27
+ end
28
+
29
+ ##
30
+ # Login procedure
31
+ # 1. Go to login form
32
+ # 2. Set login and passwd
33
+ # 3. Set PwdPad to IfYouAreReadingThisYouHaveTooMuchFreeTime minus however many characters are in passwd (so if passwd
34
+ # was 8 chars, you'd chop 8 chars of the end of IfYouAreReadingThisYouHaveTooMuchFreeTime - giving you IfYouAreReadingThisYouHaveTooMuch)
35
+ # 4. Set the action to the appropriate URL for the username's domain
36
+ # 5. Get the query string to append to the new action
37
+ # 5. Submit the form and parse the url from the resulting page's javascript
38
+ # 6. Go to that url
39
+
40
+ def login
41
+ page = agent.get('http://login.live.com/login.srf?id=2')
42
+ form = page.forms.first
43
+ form.login = options[:username]
44
+ form.passwd = options[:password]
45
+ form.PwdPad = ( "IfYouAreReadingThisYouHaveTooMuchFreeTime"[0..(-1 - options[:password].to_s.size )])
46
+ query_string = page.body.scan(/g_QS="([^"]+)/).first.first rescue nil
47
+ form.action = login_url + "?#{query_string.to_s}"
48
+ page = agent.submit(form)
49
+
50
+ # Check for login success
51
+ if page.body =~ /The e-mail address or password is incorrect/ ||
52
+ page.body =~ /Sign in failed\./
53
+ raise( Blackbook::BadCredentialsError,
54
+ "That username and password was not accepted. Please check them and try again." )
55
+ end
56
+
57
+ page = agent.get( page.body.scan(/http\:\/\/[^"]+/).first )
58
+ end
59
+
60
+ ##
61
+ # prepare this importer
62
+
63
+ def prepare
64
+ login
65
+ end
66
+
67
+ ##
68
+ # Scrape contacts for Hotmail
69
+ # Seems like a POST to directly fetch CSV contacts from options.aspx?subsection=26&n=
70
+ # raises an end of file error in Net::HTTP via Mechanize.
71
+ # Seems like Hotmail addresses are now hosted on Windows Live.
72
+
73
+ def scrape_contacts
74
+ unless agent.cookies.find{|c| c.name == 'MSPPre' && c.value == options[:username]}
75
+ raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
76
+ end
77
+
78
+ page = agent.get('PrintShell.aspx?type=contact')
79
+ rows = page.search("//div[@class='ContactsPrintPane cPrintContact BorderTop']")
80
+ rows.collect do |row|
81
+ name = row.search("//div[@class='cDisplayName']").first.innerText.strip
82
+
83
+ vals = {}
84
+ row.search("//table/tr").each do |pair|
85
+ key = pair.search("/td[@class='TextAlignRight Label']").first.innerText.strip
86
+ val = pair.search("/td[@class='Value']").first.innerText.strip
87
+ vals[key.to_sym] = val
88
+ end
89
+ vals[:name] = name
90
+ vals[:email] = (vals['Personal e-mail:'.to_sym] || vals['Work e-mail:'.to_sym]).split(' ').first rescue ''
91
+ vals
92
+ end
93
+ end
94
+
95
+ ##
96
+ # lookup for the login service that should be used based on the user's
97
+ # address
98
+
99
+ def login_url
100
+ DOMAINS[username_domain] || DOMAINS['hotmail.com']
101
+ end
102
+
103
+
104
+ ##
105
+ # normalizes the host for the page that is currently being "viewed" by the
106
+ # Mechanize agent
107
+
108
+ def current_host
109
+ return nil unless agent && agent.current_page
110
+ uri = agent.current_page.uri
111
+ "#{uri.scheme}://#{uri.host}"
112
+ end
113
+
114
+ ##
115
+ # determines the domain for the user
116
+
117
+ def username_domain(username = nil)
118
+ username ||= options[:username] if options
119
+ return unless username
120
+ username.to_s.split('@').last
121
+ end
122
+
123
+ Blackbook.register(:hotmail, self)
124
+ end
@@ -0,0 +1,86 @@
1
+ require 'rubygems'
2
+ gem 'mechanize', '>= 0.7.0'
3
+ require 'mechanize'
4
+ require 'generator' # for SyncEnumerator
5
+
6
+ # Patch Mechanize's broken html unescaping Mechanize 0.6.11
7
+ class WWW::Mechanize
8
+ def to_absolute_uri(url, cur_page=current_page())
9
+ unless url.is_a? URI
10
+ url = url.to_s.strip
11
+ url = URI.parse(
12
+ Util.html_unescape(
13
+ SyncEnumerator.new(
14
+ url.split(/%[0-9A-Fa-f]{2}/), url.scan(/%[0-9A-Fa-f]{2}/)
15
+ ).map { |x,y|
16
+ "#{URI.escape(x||'')}#{y}"
17
+ }.join('').gsub(/%23/, '#')
18
+ )
19
+ )
20
+ # Mechanize here uses #zip to combine the two arrays, which will ignore
21
+ # excessive elements of the second array (the one which is passed as an
22
+ # argument). That means if the URL ends with more than one already escaped
23
+ # character, then only the first one will be restored into the resulting
24
+ # URL.
25
+ end
26
+
27
+ # construct an absolute uri
28
+ if url.relative?
29
+ raise 'no history. please specify an absolute URL' unless cur_page.uri
30
+ url = cur_page.uri + url
31
+ # Strip initial "/.." bits from the path
32
+ url.path.sub!(/^(\/\.\.)+(?=\/)/, '')
33
+ end
34
+
35
+ return url
36
+ end
37
+ end
38
+
39
+ ##
40
+ # A base class for importers that scrape their contacts from web services
41
+
42
+ class Blackbook::Importer::PageScraper < Blackbook::Importer::Base
43
+
44
+ attr_accessor :agent
45
+
46
+ ##
47
+ # creates the Mechanize agent used to do the scraping and sets a nice
48
+ # user agent header for good net educate
49
+
50
+ def create_agent
51
+ self.agent = WWW::Mechanize.new
52
+ agent.user_agent = "Mozilla/4.0 (compatible; Blackbook #{Blackbook::VERSION})"
53
+ agent.keep_alive = false
54
+ agent
55
+ end
56
+
57
+ ##
58
+ # Page scrapers will follow a fairly simple pattern of instantiating the
59
+ # agent, prepping for the scrape and then the actual scrape process
60
+
61
+ def fetch_contacts!
62
+ create_agent
63
+ prepare
64
+ scrape_contacts
65
+ end
66
+
67
+ ##
68
+ # Providers will often require you to login or otherwise prepare to actual
69
+ # scrape the contacts
70
+
71
+ def prepare; end # stub
72
+
73
+ ##
74
+ # Some providers have a single page you can scrape from (like Gmail's HTML
75
+ # Contacts page) while others might require you to navigate several pages,
76
+ # scraping as you go.
77
+
78
+ def scrape_contacts; end # stub
79
+
80
+ ##
81
+ # helper to strip html from text
82
+
83
+ def strip_html( html )
84
+ html.gsub(/<\/?[^>]*>/, '')
85
+ end
86
+ end
@@ -0,0 +1,61 @@
1
+ require 'blackbook/importer/page_scraper'
2
+ require 'fastercsv'
3
+
4
+ ##
5
+ # contacts importer for Yahoo!
6
+
7
+ class Blackbook::Importer::Yahoo < Blackbook::Importer::PageScraper
8
+
9
+ ##
10
+ # Matches this importer to an user's name/address
11
+
12
+ def =~(options = {})
13
+ options && options[:username] =~ /@yahoo.com$/i ? true : false
14
+ end
15
+
16
+ ##
17
+ # login for Yahoo!
18
+
19
+ def login
20
+ page = agent.get('https://login.yahoo.com/config/login_verify2?')
21
+ form = page.forms.first
22
+ form.login = options[:username].split("@").first
23
+ form.passwd = options[:password]
24
+ page = agent.submit(form, form.buttons.first)
25
+
26
+ # Check for login success
27
+ raise( Blackbook::BadCredentialsError, "That username and password was not accepted. Please check them and try again." ) if page.body =~ /Invalid ID or password./
28
+ true
29
+ end
30
+
31
+ ##
32
+ # prepare the importer
33
+
34
+ def prepare
35
+ login
36
+ end
37
+
38
+ ##
39
+ # scrape yahoo contacts
40
+
41
+ def scrape_contacts
42
+ page = agent.get("http://address.yahoo.com/?1=&VPC=import_export")
43
+ if page.body =~ /To access Yahoo! Address Book\.\.\..*Sign in./m
44
+ raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
45
+ end
46
+ form = page.forms.last
47
+ csv = agent.submit(form, form.buttons[2]) # third button is Yahoo-format CSV
48
+
49
+ contact_rows = FasterCSV.parse(csv.body)
50
+ labels = contact_rows.shift # TODO: Actually use the labels to find the indexes of the data we want
51
+ contact_rows.collect do |row|
52
+ next if !row[7].empty? && options[:username] =~ /^#{row[7]}/ # Don't collect self
53
+ {
54
+ :name => "#{row[0]} #{row[2]}".to_s,
55
+ :email => (row[4] || "#{row[7]}@yahoo.com") # email is a field in the data, but will be blank for Yahoo users so we create their email address
56
+ }
57
+ end
58
+ end
59
+
60
+ Blackbook.register(:yahoo, self)
61
+ end
data/lib/blackbook.rb ADDED
@@ -0,0 +1,76 @@
1
+ $:.unshift File.expand_path(File.join(File.dirname(__FILE__)))
2
+ require 'singleton'
3
+ require 'rubygems'
4
+
5
+ class Blackbook
6
+ include ::Singleton
7
+ VERSION = '1.0.5'
8
+
9
+ class BlackbookError < ::StandardError; end
10
+ class BadCredentialsError < BlackbookError; end
11
+
12
+ attr_accessor :importers
13
+ attr_accessor :exporters
14
+
15
+ def self.get( *args )
16
+ instance.get( *args )
17
+ end
18
+
19
+ def self.register(name, adapter_class)
20
+ case adapter = adapter_class.new
21
+ when Importer::Base
22
+ instance.importers[name.to_sym] = adapter
23
+ when Exporter::Base
24
+ instance.exporters[name.to_sym] = adapter
25
+ else
26
+ raise ArgumentError, "Unknown adapter"
27
+ end
28
+ end
29
+
30
+ # Sends the vcards from the import to whatever is handling the export
31
+ def export( importer, exporter, options )
32
+ exporter.export importer.import( options )
33
+ end
34
+
35
+ # Searches registered importers for one that will handle the given options
36
+ def find_importer( options )
37
+ importers.each{ |key, importer| return importer if importer =~ options }
38
+ nil
39
+ end
40
+
41
+ # Fetches contacts from various services or filetypes. The default is to return an array
42
+ # of hashes - Blackbook's internal format
43
+ #
44
+ # Handles several different calls:
45
+ # get( :username => 'something@gmail.com', :password => 'whatever' )
46
+ # get( :as => :xml, :username => 'something@gmail.com', :password => 'whatever' )
47
+ # get( :csv, :file => #<File:/path/to/file.csv> )
48
+ def get( *args )
49
+ options = args.last.is_a?(Hash) ? args.pop : {}
50
+ to_format = exporters[ options[:as] || :basic ]
51
+ source = (importers[args.first.to_sym] rescue nil) || find_importer(options)
52
+
53
+ raise ArgumentError, "Unknown exporter" unless to_format
54
+ raise ArgumentError, "Unknown source" unless source
55
+
56
+ export source, to_format, options
57
+ end
58
+
59
+ def initialize
60
+ self.importers = {}
61
+ self.exporters = {}
62
+ end
63
+ end
64
+
65
+ # Require all the importers/exporters
66
+ require 'blackbook/importer/base'
67
+ require 'blackbook/exporter/base'
68
+ Dir.glob(File.join(File.dirname(__FILE__), 'blackbook/importer/*.rb')).each {|f| require f }
69
+ Dir.glob(File.join(File.dirname(__FILE__), 'blackbook/exporter/*.rb')).each {|f| require f }
70
+
71
+ class NilClass
72
+ def empty?
73
+ true
74
+ end
75
+ end
76
+