AddressBookImporter 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/address_book_importer.rb +49 -12
  2. metadata +2 -2
@@ -1,8 +1,9 @@
1
1
  require 'rubygems'
2
2
  require 'mechanize'
3
+ require 'cgi'
3
4
 
4
5
  module AddressBookImporter
5
- VERSION = '0.0.4'
6
+ VERSION = '0.0.5'
6
7
 
7
8
  class EmptyEmailException < Exception ; end
8
9
  class LoginErrorException < Exception ; end
@@ -14,7 +15,8 @@ module AddressBookImporter
14
15
  def initialize(login, password)
15
16
  begin
16
17
  @agent = ::WWW::Mechanize.new {|a| a.log = nil }
17
- @agent.user_agent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)"
18
+ #@agent.user_agent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)"
19
+ @agent.user_agent = "Mozilla/5.0 (X11; U; Linux i686; en; rv:1.8.1.1) Gecko/20060601 Epiphany/2.16 Firefox/2.0.0.1 (Ubuntu-edgy)"
18
20
  p = login(login, password)
19
21
  @contacts = fetch_contacts(p)
20
22
  rescue Exception => e
@@ -32,28 +34,63 @@ module AddressBookImporter
32
34
 
33
35
  class Hotmail < Importer
34
36
 
35
- attr_accessor :curr_page
37
+ attr_accessor :curr_page, :mode
36
38
 
37
39
  def login(login, password)
38
- page = @agent.get('http://www.hotmail.com')
40
+ page = @agent.get('http://login.live.com/login.srf?id=2')
39
41
  form = page.forms.first
40
42
  form.fields.find {|f| f.name == 'login'}.value = login
41
43
  form.fields.find {|f| f.name == 'passwd'}.value = password
42
44
  page = @agent.submit(form, form.buttons.first)
43
45
  raise LoginErrorException if page.body.match(/icon_err\.gif/)
46
+
44
47
  url = page.root.at('script').inner_html.match(%r{(http:[^"]+)})[0]
45
- @agent.get(url)
48
+ p = @agent.get(url) # http://by132w.bay132.mail.live.com/mail/mail.aspx
49
+ if (match = p.body.match(/(http:\/\/[^"'\/\\]*\.mail\.live\.com)/))
50
+ @mode = :live
51
+ p = @agent.get(match[1])
52
+ else
53
+ @mode = :normal
54
+ end
55
+ p
46
56
  end
47
57
 
48
58
  def fetch_contacts(page)
49
- params = page.body.match(/(curmbox=[^&]+)/)[1]
50
- start = page.body.match(/(a=[^"]+)/)[1]
51
- contact_page = @agent.get("/cgi-bin/AddressPicker?" + start+params + "&Context=InsertAddress&_HMaction=Edit&qF=to")
52
59
  rval = []
53
- match_string = contact_page.body
54
- while !(match = match_string.match(/'([^@\s]+@[-a-z0-9]+\.+[a-z]{2,})'/)).nil?
55
- rval << match[1]
56
- match_string = match.post_match
60
+ if @mode == :normal
61
+ params = page.body.match(/(curmbox=[^&]+)/)[1]
62
+ start = page.body.match(/(a=[^"]+)/)[1]
63
+ contact_page = @agent.get("/cgi-bin/AddressPicker?" + start+params + "&Context=InsertAddress&_HMaction=Edit&qF=to")
64
+ rval = get_email(contact_page.body)
65
+ else
66
+ link = page.links.select{|l|l.href.match(/Inbox/)}.first
67
+ p = @agent.click(link)
68
+ link = page.links.select{|l|l.href.match(/EditMessage/)}.first
69
+ if p.body.match(/NewMessageGo/)
70
+ if (match = p.body.match(/(\/mail\/ApplicationMainReach\.aspx\?Control=EditMessage[^"]+)/))
71
+ p = @agent.get(match[1])
72
+ end
73
+ form = p.forms.first
74
+ form.add_field!('ToContact.x', 9)
75
+ form.add_field!('ToContact.y', 11)
76
+ p2 = form.submit
77
+ rval = get_email(p2.body)
78
+ else
79
+ p = @agent.click(link)
80
+ rval = get_email(p.body, /([_\-a-z0-9.A-Z]+%40((?:[-a-z0-9]+\.)+[a-z]{2,}))/)
81
+ rval.collect!{|a|a.gsub(/%40/, '@')}
82
+ end
83
+
84
+ rval
85
+ end
86
+ end
87
+
88
+ protected
89
+ def get_email(body, match_regex = /([_\-a-z0-9.A-Z]+@((?:[-a-z0-9]+\.)+[a-z]{2,}))/)
90
+ rval = []
91
+ while !(match = body.match(match_regex)).nil?
92
+ rval << match[0]
93
+ body = match.post_match
57
94
  end
58
95
  rval
59
96
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: AddressBookImporter
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.4
7
- date: 2007-01-22 00:00:00 +01:00
6
+ version: 0.0.5
7
+ date: 2007-02-05 00:00:00 +01:00
8
8
  summary: Mechanize scraper for address books
9
9
  require_paths:
10
10
  - lib