alexjp-blackbook_csv 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. data/CHANGES.markdown +44 -0
  2. data/Manifest.txt +59 -0
  3. data/README.markdown +72 -0
  4. data/Rakefile +39 -0
  5. data/debug_blackbook.rb +10 -0
  6. data/init.rb +1 -0
  7. data/lib/blackbook/exporter/base.rb +16 -0
  8. data/lib/blackbook/exporter/vcf.rb +45 -0
  9. data/lib/blackbook/exporter/xml.rb +28 -0
  10. data/lib/blackbook/importer/aol.rb +83 -0
  11. data/lib/blackbook/importer/base.rb +39 -0
  12. data/lib/blackbook/importer/csv.rb +87 -0
  13. data/lib/blackbook/importer/gmail.rb +66 -0
  14. data/lib/blackbook/importer/hotmail.rb +124 -0
  15. data/lib/blackbook/importer/page_scraper.rb +86 -0
  16. data/lib/blackbook/importer/yahoo.rb +61 -0
  17. data/lib/blackbook.rb +76 -0
  18. data/test/fixtures/aol_application_page.html +566 -0
  19. data/test/fixtures/aol_bad_login_response_stage_3.html +565 -0
  20. data/test/fixtures/aol_contacts.html +90 -0
  21. data/test/fixtures/aol_login_response_stage_1.html +158 -0
  22. data/test/fixtures/aol_login_response_stage_2.html +559 -0
  23. data/test/fixtures/aol_login_response_stage_3.html +61 -0
  24. data/test/fixtures/aol_login_response_stage_4.html +48 -0
  25. data/test/fixtures/aol_login_response_stage_5.html +404 -0
  26. data/test/fixtures/aol_new_contacts.html +431 -0
  27. data/test/fixtures/gmail.csv +3 -0
  28. data/test/fixtures/gmail_bad_login_response_stage_2.html +560 -0
  29. data/test/fixtures/gmail_contacts.html +228 -0
  30. data/test/fixtures/gmail_login_response_stage_1.html +556 -0
  31. data/test/fixtures/gmail_login_response_stage_2.html +1 -0
  32. data/test/fixtures/gmail_login_response_stage_2a.html +1 -0
  33. data/test/fixtures/gmail_login_response_stage_3.html +249 -0
  34. data/test/fixtures/hotmail_bad_login_response_stage_2.html +31 -0
  35. data/test/fixtures/hotmail_contacts.html +191 -0
  36. data/test/fixtures/hotmail_login_response_stage_1.html +31 -0
  37. data/test/fixtures/hotmail_login_response_stage_2.html +1 -0
  38. data/test/fixtures/hotmail_login_response_stage_3.html +380 -0
  39. data/test/fixtures/yahoo_bad_login_response_stage_2.html +443 -0
  40. data/test/fixtures/yahoo_contacts.csv +3 -0
  41. data/test/fixtures/yahoo_contacts_not_logged_in.html +432 -0
  42. data/test/fixtures/yahoo_contacts_stage_1.html +399 -0
  43. data/test/fixtures/yahoo_login_response_stage_1.html +433 -0
  44. data/test/fixtures/yahoo_login_response_stage_2.html +16 -0
  45. data/test/scripts/live_test.rb +25 -0
  46. data/test/test_blackbook.rb +60 -0
  47. data/test/test_blackbook_exporter_base.rb +16 -0
  48. data/test/test_blackbook_exporter_vcf.rb +52 -0
  49. data/test/test_blackbook_exporter_xml.rb +16 -0
  50. data/test/test_blackbook_importer_aol.rb +113 -0
  51. data/test/test_blackbook_importer_base.rb +24 -0
  52. data/test/test_blackbook_importer_csv.rb +60 -0
  53. data/test/test_blackbook_importer_gmail.rb +117 -0
  54. data/test/test_blackbook_importer_hotmail.rb +147 -0
  55. data/test/test_blackbook_importer_page_scraper.rb +51 -0
  56. data/test/test_blackbook_importer_yahoo.rb +97 -0
  57. data/test/test_helper.rb +69 -0
  58. data/vendor/plugins/blackbook/lib/autotest/blackbook.rb +27 -0
  59. data/vendor/plugins/blackbook/lib/autotest/discover.rb +3 -0
  60. metadata +176 -0
@@ -0,0 +1,124 @@
1
+ require 'blackbook/importer/page_scraper'
2
+ require 'cgi'
3
+
4
+ ##
5
+ # imports contacts for MSN/Hotmail
6
+ class Blackbook::Importer::Hotmail < Blackbook::Importer::PageScraper
7
+
8
+ DOMAINS = { "compaq.net" => "https://msnia.login.live.com/ppsecure/post.srf",
9
+ "hotmail.co.jp" => "https://login.live.com/ppsecure/post.srf",
10
+ "hotmail.co.uk" => "https://login.live.com/ppsecure/post.srf",
11
+ "hotmail.com" => "https://login.live.com/ppsecure/post.srf",
12
+ "hotmail.de" => "https://login.live.com/ppsecure/post.srf",
13
+ "hotmail.fr" => "https://login.live.com/ppsecure/post.srf",
14
+ "hotmail.it" => "https://login.live.com/ppsecure/post.srf",
15
+ "messengeruser.com" => "https://login.live.com/ppsecure/post.srf",
16
+ "msn.com" => "https://msnia.login.live.com/ppsecure/post.srf",
17
+ "passport.com" => "https://login.live.com/ppsecure/post.srf",
18
+ "webtv.net" => "https://login.live.com/ppsecure/post.srf" }
19
+
20
+ ##
21
+ # Matches this importer to an user's name/address
22
+
23
+ def =~(options)
24
+ return false unless options && options[:username]
25
+ domain = username_domain(options[:username].downcase)
26
+ !domain.empty? && DOMAINS.keys.include?( domain ) ? true : false
27
+ end
28
+
29
+ ##
30
+ # Login procedure
31
+ # 1. Go to login form
32
+ # 2. Set login and passwd
33
+ # 3. Set PwdPad to IfYouAreReadingThisYouHaveTooMuchFreeTime minus however many characters are in passwd (so if passwd
34
+ # was 8 chars, you'd chop 8 chars of the end of IfYouAreReadingThisYouHaveTooMuchFreeTime - giving you IfYouAreReadingThisYouHaveTooMuch)
35
+ # 4. Set the action to the appropriate URL for the username's domain
36
+ # 5. Get the query string to append to the new action
37
+ # 5. Submit the form and parse the url from the resulting page's javascript
38
+ # 6. Go to that url
39
+
40
+ def login
41
+ page = agent.get('http://login.live.com/login.srf?id=2')
42
+ form = page.forms.first
43
+ form.login = options[:username]
44
+ form.passwd = options[:password]
45
+ form.PwdPad = ( "IfYouAreReadingThisYouHaveTooMuchFreeTime"[0..(-1 - options[:password].to_s.size )])
46
+ query_string = page.body.scan(/g_QS="([^"]+)/).first.first rescue nil
47
+ form.action = login_url + "?#{query_string.to_s}"
48
+ page = agent.submit(form)
49
+
50
+ # Check for login success
51
+ if page.body =~ /The e-mail address or password is incorrect/ ||
52
+ page.body =~ /Sign in failed\./
53
+ raise( Blackbook::BadCredentialsError,
54
+ "That username and password was not accepted. Please check them and try again." )
55
+ end
56
+
57
+ page = agent.get( page.body.scan(/http\:\/\/[^"]+/).first )
58
+ end
59
+
60
+ ##
61
+ # prepare this importer
62
+
63
+ def prepare
64
+ login
65
+ end
66
+
67
+ ##
68
+ # Scrape contacts for Hotmail
69
+ # Seems like a POST to directly fetch CSV contacts from options.aspx?subsection=26&n=
70
+ # raises an end of file error in Net::HTTP via Mechanize.
71
+ # Seems like Hotmail addresses are now hosted on Windows Live.
72
+
73
+ def scrape_contacts
74
+ unless agent.cookies.find{|c| c.name == 'MSPPre' && c.value == options[:username]}
75
+ raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
76
+ end
77
+
78
+ page = agent.get('PrintShell.aspx?type=contact')
79
+ rows = page.search("//div[@class='ContactsPrintPane cPrintContact BorderTop']")
80
+ rows.collect do |row|
81
+ name = row.search("//div[@class='cDisplayName']").first.innerText.strip
82
+
83
+ vals = {}
84
+ row.search("//table/tr").each do |pair|
85
+ key = pair.search("/td[@class='TextAlignRight Label']").first.innerText.strip
86
+ val = pair.search("/td[@class='Value']").first.innerText.strip
87
+ vals[key.to_sym] = val
88
+ end
89
+ vals[:name] = name
90
+ vals[:email] = (vals['Personal e-mail:'.to_sym] || vals['Work e-mail:'.to_sym]).split(' ').first rescue ''
91
+ vals
92
+ end
93
+ end
94
+
95
+ ##
96
+ # lookup for the login service that should be used based on the user's
97
+ # address
98
+
99
+ def login_url
100
+ DOMAINS[username_domain] || DOMAINS['hotmail.com']
101
+ end
102
+
103
+
104
+ ##
105
+ # normalizes the host for the page that is currently being "viewed" by the
106
+ # Mechanize agent
107
+
108
+ def current_host
109
+ return nil unless agent && agent.current_page
110
+ uri = agent.current_page.uri
111
+ "#{uri.scheme}://#{uri.host}"
112
+ end
113
+
114
+ ##
115
+ # determines the domain for the user
116
+
117
+ def username_domain(username = nil)
118
+ username ||= options[:username] if options
119
+ return unless username
120
+ username.to_s.split('@').last
121
+ end
122
+
123
+ Blackbook.register(:hotmail, self)
124
+ end
@@ -0,0 +1,86 @@
1
+ require 'rubygems'
2
+ gem 'mechanize', '>= 0.7.0'
3
+ require 'mechanize'
4
+ require 'generator' # for SyncEnumerator
5
+
6
+ # Patch Mechanize's broken html unescaping Mechanize 0.6.11
7
+ class WWW::Mechanize
8
+ def to_absolute_uri(url, cur_page=current_page())
9
+ unless url.is_a? URI
10
+ url = url.to_s.strip
11
+ url = URI.parse(
12
+ Util.html_unescape(
13
+ SyncEnumerator.new(
14
+ url.split(/%[0-9A-Fa-f]{2}/), url.scan(/%[0-9A-Fa-f]{2}/)
15
+ ).map { |x,y|
16
+ "#{URI.escape(x||'')}#{y}"
17
+ }.join('').gsub(/%23/, '#')
18
+ )
19
+ )
20
+ # Mechanize here uses #zip to combine the two arrays, which will ignore
21
+ # excessive elements of the second array (the one which is passed as an
22
+ # argument). That means if the URL ends with more than one already escaped
23
+ # character, then only the first one will be restored into the resulting
24
+ # URL.
25
+ end
26
+
27
+ # construct an absolute uri
28
+ if url.relative?
29
+ raise 'no history. please specify an absolute URL' unless cur_page.uri
30
+ url = cur_page.uri + url
31
+ # Strip initial "/.." bits from the path
32
+ url.path.sub!(/^(\/\.\.)+(?=\/)/, '')
33
+ end
34
+
35
+ return url
36
+ end
37
+ end
38
+
39
+ ##
40
+ # A base class for importers that scrape their contacts from web services
41
+
42
+ class Blackbook::Importer::PageScraper < Blackbook::Importer::Base
43
+
44
+ attr_accessor :agent
45
+
46
+ ##
47
+ # creates the Mechanize agent used to do the scraping and sets a nice
48
+ # user agent header for good net educate
49
+
50
+ def create_agent
51
+ self.agent = WWW::Mechanize.new
52
+ agent.user_agent = "Mozilla/4.0 (compatible; Blackbook #{Blackbook::VERSION})"
53
+ agent.keep_alive = false
54
+ agent
55
+ end
56
+
57
+ ##
58
+ # Page scrapers will follow a fairly simple pattern of instantiating the
59
+ # agent, prepping for the scrape and then the actual scrape process
60
+
61
+ def fetch_contacts!
62
+ create_agent
63
+ prepare
64
+ scrape_contacts
65
+ end
66
+
67
+ ##
68
+ # Providers will often require you to login or otherwise prepare to actual
69
+ # scrape the contacts
70
+
71
+ def prepare; end # stub
72
+
73
+ ##
74
+ # Some providers have a single page you can scrape from (like Gmail's HTML
75
+ # Contacts page) while others might require you to navigate several pages,
76
+ # scraping as you go.
77
+
78
+ def scrape_contacts; end # stub
79
+
80
+ ##
81
+ # helper to strip html from text
82
+
83
+ def strip_html( html )
84
+ html.gsub(/<\/?[^>]*>/, '')
85
+ end
86
+ end
@@ -0,0 +1,61 @@
1
+ require 'blackbook/importer/page_scraper'
2
+ require 'fastercsv'
3
+
4
+ ##
5
+ # contacts importer for Yahoo!
6
+
7
+ class Blackbook::Importer::Yahoo < Blackbook::Importer::PageScraper
8
+
9
+ ##
10
+ # Matches this importer to an user's name/address
11
+
12
+ def =~(options = {})
13
+ options && options[:username] =~ /@yahoo.com$/i ? true : false
14
+ end
15
+
16
+ ##
17
+ # login for Yahoo!
18
+
19
+ def login
20
+ page = agent.get('https://login.yahoo.com/config/login_verify2?')
21
+ form = page.forms.first
22
+ form.login = options[:username].split("@").first
23
+ form.passwd = options[:password]
24
+ page = agent.submit(form, form.buttons.first)
25
+
26
+ # Check for login success
27
+ raise( Blackbook::BadCredentialsError, "That username and password was not accepted. Please check them and try again." ) if page.body =~ /Invalid ID or password./
28
+ true
29
+ end
30
+
31
+ ##
32
+ # prepare the importer
33
+
34
+ def prepare
35
+ login
36
+ end
37
+
38
+ ##
39
+ # scrape yahoo contacts
40
+
41
+ def scrape_contacts
42
+ page = agent.get("http://address.yahoo.com/?1=&VPC=import_export")
43
+ if page.body =~ /To access Yahoo! Address Book\.\.\..*Sign in./m
44
+ raise( Blackbook::BadCredentialsError, "Must be authenticated to access contacts." )
45
+ end
46
+ form = page.forms.last
47
+ csv = agent.submit(form, form.buttons[2]) # third button is Yahoo-format CSV
48
+
49
+ contact_rows = FasterCSV.parse(csv.body)
50
+ labels = contact_rows.shift # TODO: Actually use the labels to find the indexes of the data we want
51
+ contact_rows.collect do |row|
52
+ next if !row[7].empty? && options[:username] =~ /^#{row[7]}/ # Don't collect self
53
+ {
54
+ :name => "#{row[0]} #{row[2]}".to_s,
55
+ :email => (row[4] || "#{row[7]}@yahoo.com") # email is a field in the data, but will be blank for Yahoo users so we create their email address
56
+ }
57
+ end
58
+ end
59
+
60
+ Blackbook.register(:yahoo, self)
61
+ end
data/lib/blackbook.rb ADDED
@@ -0,0 +1,76 @@
1
+ $:.unshift File.expand_path(File.join(File.dirname(__FILE__)))
2
+ require 'singleton'
3
+ require 'rubygems'
4
+
5
+ class Blackbook
6
+ include ::Singleton
7
+ VERSION = '1.0.5'
8
+
9
+ class BlackbookError < ::StandardError; end
10
+ class BadCredentialsError < BlackbookError; end
11
+
12
+ attr_accessor :importers
13
+ attr_accessor :exporters
14
+
15
+ def self.get( *args )
16
+ instance.get( *args )
17
+ end
18
+
19
+ def self.register(name, adapter_class)
20
+ case adapter = adapter_class.new
21
+ when Importer::Base
22
+ instance.importers[name.to_sym] = adapter
23
+ when Exporter::Base
24
+ instance.exporters[name.to_sym] = adapter
25
+ else
26
+ raise ArgumentError, "Unknown adapter"
27
+ end
28
+ end
29
+
30
+ # Sends the vcards from the import to whatever is handling the export
31
+ def export( importer, exporter, options )
32
+ exporter.export importer.import( options )
33
+ end
34
+
35
+ # Searches registered importers for one that will handle the given options
36
+ def find_importer( options )
37
+ importers.each{ |key, importer| return importer if importer =~ options }
38
+ nil
39
+ end
40
+
41
+ # Fetches contacts from various services or filetypes. The default is to return an array
42
+ # of hashes - Blackbook's internal format
43
+ #
44
+ # Handles several different calls:
45
+ # get( :username => 'something@gmail.com', :password => 'whatever' )
46
+ # get( :as => :xml, :username => 'something@gmail.com', :password => 'whatever' )
47
+ # get( :csv, :file => #<File:/path/to/file.csv> )
48
+ def get( *args )
49
+ options = args.last.is_a?(Hash) ? args.pop : {}
50
+ to_format = exporters[ options[:as] || :basic ]
51
+ source = (importers[args.first.to_sym] rescue nil) || find_importer(options)
52
+
53
+ raise ArgumentError, "Unknown exporter" unless to_format
54
+ raise ArgumentError, "Unknown source" unless source
55
+
56
+ export source, to_format, options
57
+ end
58
+
59
+ def initialize
60
+ self.importers = {}
61
+ self.exporters = {}
62
+ end
63
+ end
64
+
65
+ # Require all the importers/exporters
66
+ require 'blackbook/importer/base'
67
+ require 'blackbook/exporter/base'
68
+ Dir.glob(File.join(File.dirname(__FILE__), 'blackbook/importer/*.rb')).each {|f| require f }
69
+ Dir.glob(File.join(File.dirname(__FILE__), 'blackbook/exporter/*.rb')).each {|f| require f }
70
+
71
+ class NilClass
72
+ def empty?
73
+ true
74
+ end
75
+ end
76
+