graybook 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. data/.gitignore +1 -0
  2. data/CHANGES.md +49 -0
  3. data/Manifest.txt +59 -0
  4. data/README.md +66 -0
  5. data/Rakefile +40 -0
  6. data/VERSION +1 -0
  7. data/VERSION.yml +4 -0
  8. data/VERSION_NAME +1 -0
  9. data/debug_graybook.rb +10 -0
  10. data/init.rb +1 -0
  11. data/lib/graybook/exporter/base.rb +16 -0
  12. data/lib/graybook/exporter/vcf.rb +45 -0
  13. data/lib/graybook/exporter/xml.rb +28 -0
  14. data/lib/graybook/importer/aol.rb +87 -0
  15. data/lib/graybook/importer/base.rb +39 -0
  16. data/lib/graybook/importer/csv.rb +74 -0
  17. data/lib/graybook/importer/freenet.rb +62 -0
  18. data/lib/graybook/importer/gmail.rb +84 -0
  19. data/lib/graybook/importer/gmx.rb +68 -0
  20. data/lib/graybook/importer/hotmail.rb +135 -0
  21. data/lib/graybook/importer/page_scraper.rb +86 -0
  22. data/lib/graybook/importer/web.de.rb +67 -0
  23. data/lib/graybook/importer/yahoo.rb +63 -0
  24. data/lib/graybook.rb +83 -0
  25. data/test/config/credentials.yml.example +9 -0
  26. data/test/fixtures/aol_application_page.html +566 -0
  27. data/test/fixtures/aol_bad_login_response_stage_3.html +565 -0
  28. data/test/fixtures/aol_contacts.html +102 -0
  29. data/test/fixtures/aol_login_response_stage_1.html +158 -0
  30. data/test/fixtures/aol_login_response_stage_2.html +559 -0
  31. data/test/fixtures/aol_login_response_stage_3.html +48 -0
  32. data/test/fixtures/aol_login_response_stage_4.html +404 -0
  33. data/test/fixtures/aol_login_response_stage_5.html +404 -0
  34. data/test/fixtures/aol_new_contacts.html +431 -0
  35. data/test/fixtures/gmail.csv +3 -0
  36. data/test/fixtures/gmail_bad_login_response_stage_2.html +560 -0
  37. data/test/fixtures/gmail_contacts.html +228 -0
  38. data/test/fixtures/gmail_login_response_stage_1.html +556 -0
  39. data/test/fixtures/gmail_login_response_stage_2.html +1 -0
  40. data/test/fixtures/gmail_login_response_stage_2a.html +1 -0
  41. data/test/fixtures/gmail_login_response_stage_3.html +249 -0
  42. data/test/fixtures/gmail_redirect_body.html +10 -0
  43. data/test/fixtures/hotmail_bad_login_response_stage_2.html +31 -0
  44. data/test/fixtures/hotmail_contacts.html +262 -0
  45. data/test/fixtures/hotmail_login_response_stage_1.html +31 -0
  46. data/test/fixtures/hotmail_login_response_stage_2.html +1 -0
  47. data/test/fixtures/hotmail_login_response_stage_3.html +519 -0
  48. data/test/fixtures/hotmail_scrape_first_page.html +77 -0
  49. data/test/fixtures/hotmail_scrape_response_stage_1.html +90 -0
  50. data/test/fixtures/hotmail_scrape_response_stage_2.html +77 -0
  51. data/test/fixtures/hotmail_scrape_response_stage_3.html +0 -0
  52. data/test/fixtures/yahoo_bad_login_response_stage_2.html +443 -0
  53. data/test/fixtures/yahoo_contacts.csv +3 -0
  54. data/test/fixtures/yahoo_contacts_not_logged_in.html +432 -0
  55. data/test/fixtures/yahoo_contacts_stage_1.html +399 -0
  56. data/test/fixtures/yahoo_login_response_stage_1.html +433 -0
  57. data/test/fixtures/yahoo_login_response_stage_2.html +16 -0
  58. data/test/fixtures/yahoo_no_user_response_stage_2.html +574 -0
  59. data/test/freenet_importer_test.rb +53 -0
  60. data/test/gmx_importer_test.rb +53 -0
  61. data/test/scripts/live_test.rb +25 -0
  62. data/test/test_graybook.rb +60 -0
  63. data/test/test_graybook_exporter_base.rb +16 -0
  64. data/test/test_graybook_exporter_vcf.rb +52 -0
  65. data/test/test_graybook_exporter_xml.rb +16 -0
  66. data/test/test_graybook_importer_aol.rb +108 -0
  67. data/test/test_graybook_importer_base.rb +24 -0
  68. data/test/test_graybook_importer_csv.rb +60 -0
  69. data/test/test_graybook_importer_gmail.rb +116 -0
  70. data/test/test_graybook_importer_hotmail.rb +165 -0
  71. data/test/test_graybook_importer_page_scraper.rb +51 -0
  72. data/test/test_graybook_importer_yahoo.rb +137 -0
  73. data/test/test_helper.rb +71 -0
  74. data/test/web.de_importer_test.rb +53 -0
  75. data/updater.rb +15 -0
  76. data/vendor/plugins/graybook/lib/autotest/discover.rb +3 -0
  77. data/vendor/plugins/graybook/lib/autotest/graybook.rb +27 -0
  78. metadata +185 -0
@@ -0,0 +1,68 @@
1
+ require 'graybook/importer/page_scraper'
2
+
3
+ class Graybook::Importer::GMX < Graybook::Importer::PageScraper
4
+ LOGIN_URL = "https://www.gmx.net/"
5
+
6
+ def =~( options )
7
+ options && options[:username] =~ /@gmx\.(de|net)$/i
8
+ end
9
+
10
+ def login
11
+ username, password = options[:username], options[:password]
12
+
13
+ begin
14
+ page = agent.get LOGIN_URL
15
+
16
+ form = page.forms.with.name("login").first
17
+ form.id = username
18
+ form.p = password
19
+ page = form.submit
20
+ @next = page
21
+
22
+ if (page.at("div.index") && page.at("div.index").inner_html == "Passwort vergessen?") ||
23
+ page.uri.to_s.match(/login-failed/)
24
+ raise Graybook::BadCredentialsError.new
25
+ end
26
+ rescue => e
27
+ raise e || Graybook::GraybookError.new
28
+ end
29
+ end
30
+
31
+ def prepare
32
+ login
33
+ end
34
+
35
+ def scrape_contacts
36
+ page = @next
37
+
38
+ # Go to E-Mails
39
+ page = page.links.select { |link| link.text =~ /E-Mail/ }.first.click
40
+
41
+ contacts = [/Posteingang/, /Archiv/, /Gesendet/].map do |folder|
42
+ # puts "folder: #{folder}"
43
+ # puts "page title: #{page.title}"
44
+ # puts "page links: #{page.links.select { |link| link.text =~ folder }}"
45
+ page = page.links.select { |link| link.text =~ folder }.first.click
46
+ find_contacts(page)
47
+ end
48
+
49
+ contacts.inject([]) do |memo, contact|
50
+ memo << contact unless memo.include? contact
51
+ memo
52
+ end.flatten
53
+ end
54
+
55
+ protected
56
+
57
+ def find_contacts(page)
58
+ links = page.search("form#MI a").select { |link| link.attributes["title"] =~ /@/ }
59
+ links.map do |link|
60
+ recp = link.attributes["title"].gsub(/\n/, "").split(/\s/)
61
+ email = recp.pop.gsub(/[<>]/, "")
62
+ fullname = recp.join(" ")
63
+
64
+ { :name => fullname, :email => email }
65
+ end
66
+ end
67
+ Graybook.register :gmx, self
68
+ end
@@ -0,0 +1,135 @@
1
+ require 'graybook/importer/page_scraper'
2
+ require 'cgi'
3
+
4
+ ##
5
+ # imports contacts for MSN/Hotmail
6
+ class Graybook::Importer::Hotmail < Graybook::Importer::PageScraper
7
+
8
+ DOMAINS = { "compaq.net" => "https://msnia.login.live.com/ppsecure/post.srf",
9
+ "hotmail.co.jp" => "https://login.live.com/ppsecure/post.srf",
10
+ "hotmail.co.uk" => "https://login.live.com/ppsecure/post.srf",
11
+ "hotmail.com" => "https://login.live.com/ppsecure/post.srf",
12
+ "hotmail.de" => "https://login.live.com/ppsecure/post.srf",
13
+ "hotmail.fr" => "https://login.live.com/ppsecure/post.srf",
14
+ "hotmail.it" => "https://login.live.com/ppsecure/post.srf",
15
+ "live.com" => "https://login.live.com/ppsecure/post.srf",
16
+ "messengeruser.com" => "https://login.live.com/ppsecure/post.srf",
17
+ "msn.com" => "https://msnia.login.live.com/ppsecure/post.srf",
18
+ "passport.com" => "https://login.live.com/ppsecure/post.srf",
19
+ "webtv.net" => "https://login.live.com/ppsecure/post.srf" }
20
+
21
+ ##
22
+ # Matches this importer to an user's name/address
23
+
24
+ def =~(options)
25
+ return false unless options && options[:username]
26
+ domain = username_domain(options[:username].downcase)
27
+ !domain.empty? && DOMAINS.keys.include?( domain ) ? true : false
28
+ end
29
+
30
+ ##
31
+ # Login procedure
32
+ # 1. Go to login form
33
+ # 2. Set login and passwd
34
+ # 3. Set PwdPad to IfYouAreReadingThisYouHaveTooMuchFreeTime minus however many characters are in passwd (so if passwd
35
+ # was 8 chars, you'd chop 8 chars of the end of IfYouAreReadingThisYouHaveTooMuchFreeTime - giving you IfYouAreReadingThisYouHaveTooMuch)
36
+ # 4. Set the action to the appropriate URL for the username's domain
37
+ # 5. Get the query string to append to the new action
38
+ # 5. Submit the form and parse the url from the resulting page's javascript
39
+ # 6. Go to that url
40
+
41
+ def login
42
+ page = agent.get('http://login.live.com/login.srf?id=2')
43
+ form = page.forms.first
44
+ form.login = options[:username]
45
+ form.passwd = options[:password]
46
+ form.PwdPad = ( "IfYouAreReadingThisYouHaveTooMuchFreeTime"[0..(-1 - options[:password].to_s.size )])
47
+ query_string = page.body.scan(/g_QS="([^"]+)/).first.first rescue nil
48
+ form.action = login_url + "?#{query_string.to_s}"
49
+ page = agent.submit(form)
50
+
51
+ # Check for login success
52
+ if page.body =~ /The e-mail address or password is incorrect/ ||
53
+ page.body =~ /Sign in failed\./
54
+ raise( Graybook::BadCredentialsError,
55
+ "That username and password was not accepted. Please check them and try again." )
56
+ end
57
+
58
+ @first_page = agent.get( page.body.scan(/http\:\/\/[^"]+/).first )
59
+ end
60
+
61
+ ##
62
+ # prepare this importer
63
+
64
+ def prepare
65
+ login
66
+ end
67
+
68
+ ##
69
+ # Scrape contacts for Hotmail
70
+ # Seems like a POST to directly fetch CSV contacts from options.aspx?subsection=26&n=
71
+ # raises an end of file error in Net::HTTP via Mechanize.
72
+ # Seems like Hotmail addresses are now hosted on Windows Live.
73
+
74
+ def scrape_contacts
75
+ unless agent.cookies.find{|c| c.name == 'MSPPre' && c.value == options[:username]}
76
+ raise( Graybook::BadCredentialsError, "Must be authenticated to access contacts." )
77
+ end
78
+
79
+ page = agent.get('http://mail.live.com/')
80
+
81
+ if page.iframes.detect { |f| f.src =~ /\/mail\/TodayLight.aspx/ }
82
+ page = agent.get( page.iframes.first.src )
83
+
84
+ if button = page.forms.first.buttons.detect { |b| b.name == 'TakeMeToInbox' }
85
+ page = agent.submit( page.forms.first, button )
86
+ end
87
+ end
88
+
89
+ page = page.link_with(:text => 'Contact list').click
90
+
91
+ contacts = parse_contacts(page.body)
92
+ while link = page.link_with(:text => 'Next page')
93
+ page = link.click
94
+ contacts += parse_contacts(page.body)
95
+ end
96
+
97
+ contacts
98
+ end
99
+
100
+ def parse_contacts(source)
101
+ source.scan(/ICc.*\:\[.*?,.*?,\['ct'\],'(.*?)',.*?,.*?,'(.*?)',.*\]/).collect do |name, email|
102
+ { :name => (name =~ /\\x26\\x2364\\x3b/ ? nil : name), :email => email.gsub(/\\x40/, '@') }
103
+ end
104
+ end
105
+
106
+ ##
107
+ # lookup for the login service that should be used based on the user's
108
+ # address
109
+
110
+ def login_url
111
+ DOMAINS[username_domain] || DOMAINS['hotmail.com']
112
+ end
113
+
114
+
115
+ ##
116
+ # normalizes the host for the page that is currently being "viewed" by the
117
+ # Mechanize agent
118
+
119
+ def current_host
120
+ return nil unless agent && agent.current_page
121
+ uri = agent.current_page.uri
122
+ "#{uri.scheme}://#{uri.host}"
123
+ end
124
+
125
+ ##
126
+ # determines the domain for the user
127
+
128
+ def username_domain(username = nil)
129
+ username ||= options[:username] if options
130
+ return unless username
131
+ username.to_s.split('@').last
132
+ end
133
+
134
+ Graybook.register(:hotmail, self)
135
+ end
@@ -0,0 +1,86 @@
1
+ require 'rubygems'
2
+ gem 'mechanize', '>= 0.7.0'
3
+ require 'mechanize'
4
+ require 'generator' # for SyncEnumerator
5
+
6
+ # Patch Mechanize's broken html unescaping Mechanize 0.6.11
7
+ class WWW::Mechanize
8
+ def to_absolute_uri(url, cur_page=current_page())
9
+ unless url.is_a? URI
10
+ url = url.to_s.strip
11
+ url = URI.parse(
12
+ Util.html_unescape(
13
+ SyncEnumerator.new(
14
+ url.split(/%[0-9A-Fa-f]{2}/), url.scan(/%[0-9A-Fa-f]{2}/)
15
+ ).map { |x,y|
16
+ "#{URI.escape(x||'')}#{y}"
17
+ }.join('').gsub(/%23/, '#')
18
+ )
19
+ )
20
+ # Mechanize here uses #zip to combine the two arrays, which will ignore
21
+ # excessive elements of the second array (the one which is passed as an
22
+ # argument). That means if the URL ends with more than one already escaped
23
+ # character, then only the first one will be restored into the resulting
24
+ # URL.
25
+ end
26
+
27
+ # construct an absolute uri
28
+ if url.relative?
29
+ raise 'no history. please specify an absolute URL' unless cur_page.uri
30
+ url = cur_page.uri + url
31
+ # Strip initial "/.." bits from the path
32
+ url.path.sub!(/^(\/\.\.)+(?=\/)/, '')
33
+ end
34
+
35
+ return url
36
+ end
37
+ end
38
+
39
+ ##
40
+ # A base class for importers that scrape their contacts from web services
41
+
42
+ class Graybook::Importer::PageScraper < Graybook::Importer::Base
43
+
44
+ attr_accessor :agent
45
+
46
+ ##
47
+ # creates the Mechanize agent used to do the scraping and sets a nice
48
+ # user agent header for good net educate
49
+
50
+ def create_agent
51
+ self.agent = WWW::Mechanize.new
52
+ agent.user_agent = "Mozilla/4.0 (compatible; Graybook #{Graybook::VERSION})"
53
+ agent.keep_alive = false
54
+ agent
55
+ end
56
+
57
+ ##
58
+ # Page scrapers will follow a fairly simple pattern of instantiating the
59
+ # agent, prepping for the scrape and then the actual scrape process
60
+
61
+ def fetch_contacts!
62
+ create_agent
63
+ prepare
64
+ scrape_contacts
65
+ end
66
+
67
+ ##
68
+ # Providers will often require you to login or otherwise prepare to actual
69
+ # scrape the contacts
70
+
71
+ def prepare; end # stub
72
+
73
+ ##
74
+ # Some providers have a single page you can scrape from (like Gmail's HTML
75
+ # Contacts page) while others might require you to navigate several pages,
76
+ # scraping as you go.
77
+
78
+ def scrape_contacts; end # stub
79
+
80
+ ##
81
+ # helper to strip html from text
82
+
83
+ def strip_html( html )
84
+ html.gsub(/<\/?[^>]*>/, '')
85
+ end
86
+ end
@@ -0,0 +1,67 @@
1
+ require 'graybook/importer/page_scraper'
2
+
3
+ class Graybook::Importer::WebDE < Graybook::Importer::PageScraper
4
+ LOGIN_URL = "https://freemail.web.de"
5
+
6
+ def =~( options )
7
+ options && options[:username] =~ /@web\.de$/i
8
+ end
9
+
10
+ def login
11
+ username, password = options[:username], options[:password]
12
+
13
+ begin
14
+ page = agent.get LOGIN_URL
15
+
16
+ form = page.forms.with.name("login").first
17
+ form.username = username
18
+ form.password = password
19
+
20
+ page = form.submit
21
+ # follow logout hint
22
+ if (continue_link = page.links.select { |link| link.text =~ /weiter zu FreeMail/ }.first)
23
+ page = continue_link.click
24
+ end
25
+
26
+ if page.uri.to_s.match(/logonfailed/)
27
+ raise Graybook::BadCredentialsError.new
28
+ end
29
+
30
+ # follow content frame
31
+ @next = agent.get page.frames[1].src
32
+ rescue => e
33
+ raise e || Graybook::GraybookError.new
34
+ end
35
+ end
36
+
37
+ def prepare
38
+ login
39
+ end
40
+
41
+ def scrape_contacts
42
+ page = @next
43
+
44
+ contacts = [/Posteingang/, /Unbekannt/].map do |folder|
45
+ page = page.links.select { |link| link.text =~ folder }.first.click
46
+ find_contacts(page)
47
+ end
48
+
49
+ contacts.inject([]) do |memo, contact|
50
+ memo << contact unless memo.include? contact
51
+ memo
52
+ end
53
+ end
54
+
55
+ protected
56
+ def find_contacts(page)
57
+ page.search("span.from a").map do |link|
58
+ recp = link.attributes["title"].gsub(/\n/, "").split(/\s/)
59
+ email = recp.pop
60
+ fullname = recp.join(" ")
61
+
62
+ { :name => fullname, :email => email }
63
+ end
64
+ end
65
+
66
+ Graybook.register :webde, self
67
+ end
@@ -0,0 +1,63 @@
1
+ require 'graybook/importer/page_scraper'
2
+ require 'fastercsv'
3
+
4
+ ##
5
+ # contacts importer for Yahoo!
6
+
7
+ class Graybook::Importer::Yahoo < Graybook::Importer::PageScraper
8
+
9
+ ##
10
+ # Matches this importer to an user's name/address
11
+
12
+ def =~(options = {})
13
+ options && options[:username] =~ /@yahoo.co(m|\.uk)$/i ? true : false
14
+ end
15
+
16
+ ##
17
+ # login for Yahoo!
18
+
19
+ def login
20
+ page = agent.get('https://login.yahoo.com/config/login_verify2?')
21
+ form = page.forms.first
22
+ form.login = options[:username].split("@").first
23
+ form.passwd = options[:password]
24
+ page = agent.submit(form, form.buttons.first)
25
+
26
+ if page.body =~ /Invalid ID or password./ || page.body =~ /This ID is not yet taken./
27
+ raise Graybook::BadCredentialsError, "That username and password was not accepted. Please check them and try again."
28
+ end
29
+
30
+ true
31
+ end
32
+
33
+ ##
34
+ # prepare the importer
35
+
36
+ def prepare
37
+ login
38
+ end
39
+
40
+ ##
41
+ # scrape yahoo contacts
42
+
43
+ def scrape_contacts
44
+ page = agent.get("http://address.yahoo.com/?1=&VPC=import_export")
45
+ if page.body =~ /To access Yahoo! Address Book\.\.\..*Sign in./m
46
+ raise( Graybook::BadCredentialsError, "Must be authenticated to access contacts." )
47
+ end
48
+ form = page.forms.last
49
+ csv = agent.submit(form, form.buttons[2]) # third button is Yahoo-format CSV
50
+
51
+ contact_rows = FasterCSV.parse(csv.body)
52
+ labels = contact_rows.shift # TODO: Actually use the labels to find the indexes of the data we want
53
+ contact_rows.collect do |row|
54
+ next if !row[7].empty? && options[:username] =~ /^#{Regexp.escape(row[7])}/ # Don't collect self
55
+ {
56
+ :name => "#{row[0]} #{row[2]}".to_s,
57
+ :email => (row[4] || "#{row[7]}@yahoo.com") # email is a field in the data, but will be blank for Yahoo users so we create their email address
58
+ }
59
+ end
60
+ end
61
+
62
+ Graybook.register(:yahoo, self)
63
+ end
data/lib/graybook.rb ADDED
@@ -0,0 +1,83 @@
1
+ $:.unshift File.expand_path(File.join(File.dirname(__FILE__)))
2
+ require 'singleton'
3
+ require 'rubygems'
4
+
5
+ class Graybook
6
+ include ::Singleton
7
+ VERSION = '1.0.22'
8
+
9
+ class GraybookError < ::StandardError; end
10
+ class BadCredentialsError < GraybookError; end
11
+ class LegacyAccount < GraybookError; end
12
+
13
+ attr_accessor :importers
14
+ attr_accessor :exporters
15
+
16
+ def self.get( *args )
17
+ instance.get( *args )
18
+ end
19
+
20
+ def self.register(name, adapter_class)
21
+ case adapter = adapter_class.new
22
+ when Importer::Base
23
+ instance.importers[name.to_sym] = adapter
24
+ when Exporter::Base
25
+ instance.exporters[name.to_sym] = adapter
26
+ else
27
+ raise ArgumentError, "Unknown adapter"
28
+ end
29
+ end
30
+
31
+ # Sends the vcards from the import to whatever is handling the export
32
+ def export( importer, exporter, options )
33
+ exporter.export importer.import( options )
34
+ end
35
+
36
+ # Searches registered importers for one that will handle the given options
37
+ def find_importer( options )
38
+ importers.each{ |key, importer| return importer if importer =~ options }
39
+ nil
40
+ end
41
+
42
+ # Fetches contacts from various services or filetypes. The default is to return an array
43
+ # of hashes - Graybook's internal format
44
+ #
45
+ # Handles several different calls:
46
+ # get( :username => 'something@gmail.com', :password => 'whatever' )
47
+ # get( :as => :xml, :username => 'something@gmail.com', :password => 'whatever' )
48
+ # get( :csv, :file => #<File:/path/to/file.csv> )
49
+ def get( *args )
50
+ options = args.last.is_a?(Hash) ? args.pop : {}
51
+ to_format = exporters[ options[:as] || :basic ]
52
+ source = (importers[args.first.to_sym] rescue nil) || find_importer(options)
53
+
54
+ raise ArgumentError, "Unknown exporter" unless to_format
55
+ raise ArgumentError, "Unknown source" unless source
56
+
57
+ export source, to_format, options
58
+ end
59
+
60
+ def initialize
61
+ self.importers = {}
62
+ self.exporters = {}
63
+ end
64
+ end
65
+
66
+ # Require all the importers/exporters
67
+ require 'graybook/importer/base'
68
+ require 'graybook/exporter/base'
69
+ Dir.glob(File.join(File.dirname(__FILE__), 'graybook/importer/*.rb')).each {|f| require f }
70
+ Dir.glob(File.join(File.dirname(__FILE__), 'graybook/exporter/*.rb')).each {|f| require f }
71
+
72
+ class NilClass
73
+ def empty?
74
+ true
75
+ end
76
+ end
77
+
78
+ class Object
79
+ def blank?
80
+ respond_to?(:empty?) ? empty? : !self
81
+ end
82
+ end
83
+
@@ -0,0 +1,9 @@
1
+ gmx:
2
+ username: nil
3
+ password: nil
4
+ freenet:
5
+ username: nil
6
+ password: nil
7
+ webde:
8
+ username: nil
9
+ password: nil