graybook 1.0.22

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/.gitignore +1 -0
  2. data/CHANGES.md +49 -0
  3. data/Manifest.txt +59 -0
  4. data/README.md +66 -0
  5. data/Rakefile +40 -0
  6. data/VERSION +1 -0
  7. data/VERSION.yml +4 -0
  8. data/VERSION_NAME +1 -0
  9. data/debug_graybook.rb +10 -0
  10. data/init.rb +1 -0
  11. data/lib/graybook/exporter/base.rb +16 -0
  12. data/lib/graybook/exporter/vcf.rb +45 -0
  13. data/lib/graybook/exporter/xml.rb +28 -0
  14. data/lib/graybook/importer/aol.rb +87 -0
  15. data/lib/graybook/importer/base.rb +39 -0
  16. data/lib/graybook/importer/csv.rb +74 -0
  17. data/lib/graybook/importer/freenet.rb +62 -0
  18. data/lib/graybook/importer/gmail.rb +84 -0
  19. data/lib/graybook/importer/gmx.rb +68 -0
  20. data/lib/graybook/importer/hotmail.rb +135 -0
  21. data/lib/graybook/importer/page_scraper.rb +86 -0
  22. data/lib/graybook/importer/web.de.rb +67 -0
  23. data/lib/graybook/importer/yahoo.rb +63 -0
  24. data/lib/graybook.rb +83 -0
  25. data/test/config/credentials.yml.example +9 -0
  26. data/test/fixtures/aol_application_page.html +566 -0
  27. data/test/fixtures/aol_bad_login_response_stage_3.html +565 -0
  28. data/test/fixtures/aol_contacts.html +102 -0
  29. data/test/fixtures/aol_login_response_stage_1.html +158 -0
  30. data/test/fixtures/aol_login_response_stage_2.html +559 -0
  31. data/test/fixtures/aol_login_response_stage_3.html +48 -0
  32. data/test/fixtures/aol_login_response_stage_4.html +404 -0
  33. data/test/fixtures/aol_login_response_stage_5.html +404 -0
  34. data/test/fixtures/aol_new_contacts.html +431 -0
  35. data/test/fixtures/gmail.csv +3 -0
  36. data/test/fixtures/gmail_bad_login_response_stage_2.html +560 -0
  37. data/test/fixtures/gmail_contacts.html +228 -0
  38. data/test/fixtures/gmail_login_response_stage_1.html +556 -0
  39. data/test/fixtures/gmail_login_response_stage_2.html +1 -0
  40. data/test/fixtures/gmail_login_response_stage_2a.html +1 -0
  41. data/test/fixtures/gmail_login_response_stage_3.html +249 -0
  42. data/test/fixtures/gmail_redirect_body.html +10 -0
  43. data/test/fixtures/hotmail_bad_login_response_stage_2.html +31 -0
  44. data/test/fixtures/hotmail_contacts.html +262 -0
  45. data/test/fixtures/hotmail_login_response_stage_1.html +31 -0
  46. data/test/fixtures/hotmail_login_response_stage_2.html +1 -0
  47. data/test/fixtures/hotmail_login_response_stage_3.html +519 -0
  48. data/test/fixtures/hotmail_scrape_first_page.html +77 -0
  49. data/test/fixtures/hotmail_scrape_response_stage_1.html +90 -0
  50. data/test/fixtures/hotmail_scrape_response_stage_2.html +77 -0
  51. data/test/fixtures/hotmail_scrape_response_stage_3.html +0 -0
  52. data/test/fixtures/yahoo_bad_login_response_stage_2.html +443 -0
  53. data/test/fixtures/yahoo_contacts.csv +3 -0
  54. data/test/fixtures/yahoo_contacts_not_logged_in.html +432 -0
  55. data/test/fixtures/yahoo_contacts_stage_1.html +399 -0
  56. data/test/fixtures/yahoo_login_response_stage_1.html +433 -0
  57. data/test/fixtures/yahoo_login_response_stage_2.html +16 -0
  58. data/test/fixtures/yahoo_no_user_response_stage_2.html +574 -0
  59. data/test/freenet_importer_test.rb +53 -0
  60. data/test/gmx_importer_test.rb +53 -0
  61. data/test/scripts/live_test.rb +25 -0
  62. data/test/test_graybook.rb +60 -0
  63. data/test/test_graybook_exporter_base.rb +16 -0
  64. data/test/test_graybook_exporter_vcf.rb +52 -0
  65. data/test/test_graybook_exporter_xml.rb +16 -0
  66. data/test/test_graybook_importer_aol.rb +108 -0
  67. data/test/test_graybook_importer_base.rb +24 -0
  68. data/test/test_graybook_importer_csv.rb +60 -0
  69. data/test/test_graybook_importer_gmail.rb +116 -0
  70. data/test/test_graybook_importer_hotmail.rb +165 -0
  71. data/test/test_graybook_importer_page_scraper.rb +51 -0
  72. data/test/test_graybook_importer_yahoo.rb +137 -0
  73. data/test/test_helper.rb +71 -0
  74. data/test/web.de_importer_test.rb +53 -0
  75. data/updater.rb +15 -0
  76. data/vendor/plugins/graybook/lib/autotest/discover.rb +3 -0
  77. data/vendor/plugins/graybook/lib/autotest/graybook.rb +27 -0
  78. metadata +185 -0
@@ -0,0 +1,68 @@
1
+ require 'graybook/importer/page_scraper'
2
+
3
+ class Graybook::Importer::GMX < Graybook::Importer::PageScraper
4
+ LOGIN_URL = "https://www.gmx.net/"
5
+
6
+ def =~( options )
7
+ options && options[:username] =~ /@gmx\.(de|net)$/i
8
+ end
9
+
10
+ def login
11
+ username, password = options[:username], options[:password]
12
+
13
+ begin
14
+ page = agent.get LOGIN_URL
15
+
16
+ form = page.forms.with.name("login").first
17
+ form.id = username
18
+ form.p = password
19
+ page = form.submit
20
+ @next = page
21
+
22
+ if (page.at("div.index") && page.at("div.index").inner_html == "Passwort vergessen?") ||
23
+ page.uri.to_s.match(/login-failed/)
24
+ raise Graybook::BadCredentialsError.new
25
+ end
26
+ rescue => e
27
+ raise e || Graybook::GraybookError.new
28
+ end
29
+ end
30
+
31
+ def prepare
32
+ login
33
+ end
34
+
35
+ def scrape_contacts
36
+ page = @next
37
+
38
+ # Go to E-Mails
39
+ page = page.links.select { |link| link.text =~ /E-Mail/ }.first.click
40
+
41
+ contacts = [/Posteingang/, /Archiv/, /Gesendet/].map do |folder|
42
+ # puts "folder: #{folder}"
43
+ # puts "page title: #{page.title}"
44
+ # puts "page links: #{page.links.select { |link| link.text =~ folder }}"
45
+ page = page.links.select { |link| link.text =~ folder }.first.click
46
+ find_contacts(page)
47
+ end
48
+
49
+ contacts.inject([]) do |memo, contact|
50
+ memo << contact unless memo.include? contact
51
+ memo
52
+ end.flatten
53
+ end
54
+
55
+ protected
56
+
57
+ def find_contacts(page)
58
+ links = page.search("form#MI a").select { |link| link.attributes["title"] =~ /@/ }
59
+ links.map do |link|
60
+ recp = link.attributes["title"].gsub(/\n/, "").split(/\s/)
61
+ email = recp.pop.gsub(/[<>]/, "")
62
+ fullname = recp.join(" ")
63
+
64
+ { :name => fullname, :email => email }
65
+ end
66
+ end
67
+ Graybook.register :gmx, self
68
+ end
@@ -0,0 +1,135 @@
1
+ require 'graybook/importer/page_scraper'
2
+ require 'cgi'
3
+
4
+ ##
5
+ # imports contacts for MSN/Hotmail
6
+ class Graybook::Importer::Hotmail < Graybook::Importer::PageScraper
7
+
8
+ DOMAINS = { "compaq.net" => "https://msnia.login.live.com/ppsecure/post.srf",
9
+ "hotmail.co.jp" => "https://login.live.com/ppsecure/post.srf",
10
+ "hotmail.co.uk" => "https://login.live.com/ppsecure/post.srf",
11
+ "hotmail.com" => "https://login.live.com/ppsecure/post.srf",
12
+ "hotmail.de" => "https://login.live.com/ppsecure/post.srf",
13
+ "hotmail.fr" => "https://login.live.com/ppsecure/post.srf",
14
+ "hotmail.it" => "https://login.live.com/ppsecure/post.srf",
15
+ "live.com" => "https://login.live.com/ppsecure/post.srf",
16
+ "messengeruser.com" => "https://login.live.com/ppsecure/post.srf",
17
+ "msn.com" => "https://msnia.login.live.com/ppsecure/post.srf",
18
+ "passport.com" => "https://login.live.com/ppsecure/post.srf",
19
+ "webtv.net" => "https://login.live.com/ppsecure/post.srf" }
20
+
21
+ ##
22
+ # Matches this importer to an user's name/address
23
+
24
+ def =~(options)
25
+ return false unless options && options[:username]
26
+ domain = username_domain(options[:username].downcase)
27
+ !domain.empty? && DOMAINS.keys.include?( domain ) ? true : false
28
+ end
29
+
30
+ ##
31
+ # Login procedure
32
+ # 1. Go to login form
33
+ # 2. Set login and passwd
34
+ # 3. Set PwdPad to IfYouAreReadingThisYouHaveTooMuchFreeTime minus however many characters are in passwd (so if passwd
35
+ # was 8 chars, you'd chop 8 chars of the end of IfYouAreReadingThisYouHaveTooMuchFreeTime - giving you IfYouAreReadingThisYouHaveTooMuch)
36
+ # 4. Set the action to the appropriate URL for the username's domain
37
+ # 5. Get the query string to append to the new action
38
+ # 5. Submit the form and parse the url from the resulting page's javascript
39
+ # 6. Go to that url
40
+
41
+ def login
42
+ page = agent.get('http://login.live.com/login.srf?id=2')
43
+ form = page.forms.first
44
+ form.login = options[:username]
45
+ form.passwd = options[:password]
46
+ form.PwdPad = ( "IfYouAreReadingThisYouHaveTooMuchFreeTime"[0..(-1 - options[:password].to_s.size )])
47
+ query_string = page.body.scan(/g_QS="([^"]+)/).first.first rescue nil
48
+ form.action = login_url + "?#{query_string.to_s}"
49
+ page = agent.submit(form)
50
+
51
+ # Check for login success
52
+ if page.body =~ /The e-mail address or password is incorrect/ ||
53
+ page.body =~ /Sign in failed\./
54
+ raise( Graybook::BadCredentialsError,
55
+ "That username and password was not accepted. Please check them and try again." )
56
+ end
57
+
58
+ @first_page = agent.get( page.body.scan(/http\:\/\/[^"]+/).first )
59
+ end
60
+
61
+ ##
62
+ # prepare this importer
63
+
64
+ def prepare
65
+ login
66
+ end
67
+
68
+ ##
69
+ # Scrape contacts for Hotmail
70
+ # Seems like a POST to directly fetch CSV contacts from options.aspx?subsection=26&n=
71
+ # raises an end of file error in Net::HTTP via Mechanize.
72
+ # Seems like Hotmail addresses are now hosted on Windows Live.
73
+
74
+ def scrape_contacts
75
+ unless agent.cookies.find{|c| c.name == 'MSPPre' && c.value == options[:username]}
76
+ raise( Graybook::BadCredentialsError, "Must be authenticated to access contacts." )
77
+ end
78
+
79
+ page = agent.get('http://mail.live.com/')
80
+
81
+ if page.iframes.detect { |f| f.src =~ /\/mail\/TodayLight.aspx/ }
82
+ page = agent.get( page.iframes.first.src )
83
+
84
+ if button = page.forms.first.buttons.detect { |b| b.name == 'TakeMeToInbox' }
85
+ page = agent.submit( page.forms.first, button )
86
+ end
87
+ end
88
+
89
+ page = page.link_with(:text => 'Contact list').click
90
+
91
+ contacts = parse_contacts(page.body)
92
+ while link = page.link_with(:text => 'Next page')
93
+ page = link.click
94
+ contacts += parse_contacts(page.body)
95
+ end
96
+
97
+ contacts
98
+ end
99
+
100
+ def parse_contacts(source)
101
+ source.scan(/ICc.*\:\[.*?,.*?,\['ct'\],'(.*?)',.*?,.*?,'(.*?)',.*\]/).collect do |name, email|
102
+ { :name => (name =~ /\\x26\\x2364\\x3b/ ? nil : name), :email => email.gsub(/\\x40/, '@') }
103
+ end
104
+ end
105
+
106
+ ##
107
+ # lookup for the login service that should be used based on the user's
108
+ # address
109
+
110
+ def login_url
111
+ DOMAINS[username_domain] || DOMAINS['hotmail.com']
112
+ end
113
+
114
+
115
+ ##
116
+ # normalizes the host for the page that is currently being "viewed" by the
117
+ # Mechanize agent
118
+
119
+ def current_host
120
+ return nil unless agent && agent.current_page
121
+ uri = agent.current_page.uri
122
+ "#{uri.scheme}://#{uri.host}"
123
+ end
124
+
125
+ ##
126
+ # determines the domain for the user
127
+
128
+ def username_domain(username = nil)
129
+ username ||= options[:username] if options
130
+ return unless username
131
+ username.to_s.split('@').last
132
+ end
133
+
134
+ Graybook.register(:hotmail, self)
135
+ end
@@ -0,0 +1,86 @@
1
+ require 'rubygems'
2
+ gem 'mechanize', '>= 0.7.0'
3
+ require 'mechanize'
4
+ require 'generator' # for SyncEnumerator
5
+
6
+ # Patch Mechanize's broken html unescaping Mechanize 0.6.11
7
+ class WWW::Mechanize
8
+ def to_absolute_uri(url, cur_page=current_page())
9
+ unless url.is_a? URI
10
+ url = url.to_s.strip
11
+ url = URI.parse(
12
+ Util.html_unescape(
13
+ SyncEnumerator.new(
14
+ url.split(/%[0-9A-Fa-f]{2}/), url.scan(/%[0-9A-Fa-f]{2}/)
15
+ ).map { |x,y|
16
+ "#{URI.escape(x||'')}#{y}"
17
+ }.join('').gsub(/%23/, '#')
18
+ )
19
+ )
20
+ # Mechanize here uses #zip to combine the two arrays, which will ignore
21
+ # excessive elements of the second array (the one which is passed as an
22
+ # argument). That means if the URL ends with more than one already escaped
23
+ # character, then only the first one will be restored into the resulting
24
+ # URL.
25
+ end
26
+
27
+ # construct an absolute uri
28
+ if url.relative?
29
+ raise 'no history. please specify an absolute URL' unless cur_page.uri
30
+ url = cur_page.uri + url
31
+ # Strip initial "/.." bits from the path
32
+ url.path.sub!(/^(\/\.\.)+(?=\/)/, '')
33
+ end
34
+
35
+ return url
36
+ end
37
+ end
38
+
39
+ ##
40
+ # A base class for importers that scrape their contacts from web services
41
+
42
+ class Graybook::Importer::PageScraper < Graybook::Importer::Base
43
+
44
+ attr_accessor :agent
45
+
46
+ ##
47
+ # creates the Mechanize agent used to do the scraping and sets a nice
48
+ # user agent header for good net educate
49
+
50
+ def create_agent
51
+ self.agent = WWW::Mechanize.new
52
+ agent.user_agent = "Mozilla/4.0 (compatible; Graybook #{Graybook::VERSION})"
53
+ agent.keep_alive = false
54
+ agent
55
+ end
56
+
57
+ ##
58
+ # Page scrapers will follow a fairly simple pattern of instantiating the
59
+ # agent, prepping for the scrape and then the actual scrape process
60
+
61
+ def fetch_contacts!
62
+ create_agent
63
+ prepare
64
+ scrape_contacts
65
+ end
66
+
67
+ ##
68
+ # Providers will often require you to login or otherwise prepare to actual
69
+ # scrape the contacts
70
+
71
+ def prepare; end # stub
72
+
73
+ ##
74
+ # Some providers have a single page you can scrape from (like Gmail's HTML
75
+ # Contacts page) while others might require you to navigate several pages,
76
+ # scraping as you go.
77
+
78
+ def scrape_contacts; end # stub
79
+
80
+ ##
81
+ # helper to strip html from text
82
+
83
+ def strip_html( html )
84
+ html.gsub(/<\/?[^>]*>/, '')
85
+ end
86
+ end
@@ -0,0 +1,67 @@
1
+ require 'graybook/importer/page_scraper'
2
+
3
+ class Graybook::Importer::WebDE < Graybook::Importer::PageScraper
4
+ LOGIN_URL = "https://freemail.web.de"
5
+
6
+ def =~( options )
7
+ options && options[:username] =~ /@web\.de$/i
8
+ end
9
+
10
+ def login
11
+ username, password = options[:username], options[:password]
12
+
13
+ begin
14
+ page = agent.get LOGIN_URL
15
+
16
+ form = page.forms.with.name("login").first
17
+ form.username = username
18
+ form.password = password
19
+
20
+ page = form.submit
21
+ # follow logout hint
22
+ if (continue_link = page.links.select { |link| link.text =~ /weiter zu FreeMail/ }.first)
23
+ page = continue_link.click
24
+ end
25
+
26
+ if page.uri.to_s.match(/logonfailed/)
27
+ raise Graybook::BadCredentialsError.new
28
+ end
29
+
30
+ # follow content frame
31
+ @next = agent.get page.frames[1].src
32
+ rescue => e
33
+ raise e || Graybook::GraybookError.new
34
+ end
35
+ end
36
+
37
+ def prepare
38
+ login
39
+ end
40
+
41
+ def scrape_contacts
42
+ page = @next
43
+
44
+ contacts = [/Posteingang/, /Unbekannt/].map do |folder|
45
+ page = page.links.select { |link| link.text =~ folder }.first.click
46
+ find_contacts(page)
47
+ end
48
+
49
+ contacts.inject([]) do |memo, contact|
50
+ memo << contact unless memo.include? contact
51
+ memo
52
+ end
53
+ end
54
+
55
+ protected
56
+ def find_contacts(page)
57
+ page.search("span.from a").map do |link|
58
+ recp = link.attributes["title"].gsub(/\n/, "").split(/\s/)
59
+ email = recp.pop
60
+ fullname = recp.join(" ")
61
+
62
+ { :name => fullname, :email => email }
63
+ end
64
+ end
65
+
66
+ Graybook.register :webde, self
67
+ end
@@ -0,0 +1,63 @@
1
+ require 'graybook/importer/page_scraper'
2
+ require 'fastercsv'
3
+
4
+ ##
5
+ # contacts importer for Yahoo!
6
+
7
+ class Graybook::Importer::Yahoo < Graybook::Importer::PageScraper
8
+
9
+ ##
10
+ # Matches this importer to an user's name/address
11
+
12
+ def =~(options = {})
13
+ options && options[:username] =~ /@yahoo.co(m|\.uk)$/i ? true : false
14
+ end
15
+
16
+ ##
17
+ # login for Yahoo!
18
+
19
+ def login
20
+ page = agent.get('https://login.yahoo.com/config/login_verify2?')
21
+ form = page.forms.first
22
+ form.login = options[:username].split("@").first
23
+ form.passwd = options[:password]
24
+ page = agent.submit(form, form.buttons.first)
25
+
26
+ if page.body =~ /Invalid ID or password./ || page.body =~ /This ID is not yet taken./
27
+ raise Graybook::BadCredentialsError, "That username and password was not accepted. Please check them and try again."
28
+ end
29
+
30
+ true
31
+ end
32
+
33
+ ##
34
+ # prepare the importer
35
+
36
+ def prepare
37
+ login
38
+ end
39
+
40
+ ##
41
+ # scrape yahoo contacts
42
+
43
+ def scrape_contacts
44
+ page = agent.get("http://address.yahoo.com/?1=&VPC=import_export")
45
+ if page.body =~ /To access Yahoo! Address Book\.\.\..*Sign in./m
46
+ raise( Graybook::BadCredentialsError, "Must be authenticated to access contacts." )
47
+ end
48
+ form = page.forms.last
49
+ csv = agent.submit(form, form.buttons[2]) # third button is Yahoo-format CSV
50
+
51
+ contact_rows = FasterCSV.parse(csv.body)
52
+ labels = contact_rows.shift # TODO: Actually use the labels to find the indexes of the data we want
53
+ contact_rows.collect do |row|
54
+ next if !row[7].empty? && options[:username] =~ /^#{Regexp.escape(row[7])}/ # Don't collect self
55
+ {
56
+ :name => "#{row[0]} #{row[2]}".to_s,
57
+ :email => (row[4] || "#{row[7]}@yahoo.com") # email is a field in the data, but will be blank for Yahoo users so we create their email address
58
+ }
59
+ end
60
+ end
61
+
62
+ Graybook.register(:yahoo, self)
63
+ end
data/lib/graybook.rb ADDED
@@ -0,0 +1,83 @@
1
+ $:.unshift File.expand_path(File.join(File.dirname(__FILE__)))
2
+ require 'singleton'
3
+ require 'rubygems'
4
+
5
+ class Graybook
6
+ include ::Singleton
7
+ VERSION = '1.0.22'
8
+
9
+ class GraybookError < ::StandardError; end
10
+ class BadCredentialsError < GraybookError; end
11
+ class LegacyAccount < GraybookError; end
12
+
13
+ attr_accessor :importers
14
+ attr_accessor :exporters
15
+
16
+ def self.get( *args )
17
+ instance.get( *args )
18
+ end
19
+
20
+ def self.register(name, adapter_class)
21
+ case adapter = adapter_class.new
22
+ when Importer::Base
23
+ instance.importers[name.to_sym] = adapter
24
+ when Exporter::Base
25
+ instance.exporters[name.to_sym] = adapter
26
+ else
27
+ raise ArgumentError, "Unknown adapter"
28
+ end
29
+ end
30
+
31
+ # Sends the vcards from the import to whatever is handling the export
32
+ def export( importer, exporter, options )
33
+ exporter.export importer.import( options )
34
+ end
35
+
36
+ # Searches registered importers for one that will handle the given options
37
+ def find_importer( options )
38
+ importers.each{ |key, importer| return importer if importer =~ options }
39
+ nil
40
+ end
41
+
42
+ # Fetches contacts from various services or filetypes. The default is to return an array
43
+ # of hashes - Graybook's internal format
44
+ #
45
+ # Handles several different calls:
46
+ # get( :username => 'something@gmail.com', :password => 'whatever' )
47
+ # get( :as => :xml, :username => 'something@gmail.com', :password => 'whatever' )
48
+ # get( :csv, :file => #<File:/path/to/file.csv> )
49
+ def get( *args )
50
+ options = args.last.is_a?(Hash) ? args.pop : {}
51
+ to_format = exporters[ options[:as] || :basic ]
52
+ source = (importers[args.first.to_sym] rescue nil) || find_importer(options)
53
+
54
+ raise ArgumentError, "Unknown exporter" unless to_format
55
+ raise ArgumentError, "Unknown source" unless source
56
+
57
+ export source, to_format, options
58
+ end
59
+
60
+ def initialize
61
+ self.importers = {}
62
+ self.exporters = {}
63
+ end
64
+ end
65
+
66
+ # Require all the importers/exporters
67
+ require 'graybook/importer/base'
68
+ require 'graybook/exporter/base'
69
+ Dir.glob(File.join(File.dirname(__FILE__), 'graybook/importer/*.rb')).each {|f| require f }
70
+ Dir.glob(File.join(File.dirname(__FILE__), 'graybook/exporter/*.rb')).each {|f| require f }
71
+
72
+ class NilClass
73
+ def empty?
74
+ true
75
+ end
76
+ end
77
+
78
+ class Object
79
+ def blank?
80
+ respond_to?(:empty?) ? empty? : !self
81
+ end
82
+ end
83
+
@@ -0,0 +1,9 @@
1
+ gmx:
2
+ username: nil
3
+ password: nil
4
+ freenet:
5
+ username: nil
6
+ password: nil
7
+ webde:
8
+ username: nil
9
+ password: nil