AddressBookImporter 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/address_book_importer.rb +49 -12
  2. metadata +2 -2
@@ -1,8 +1,9 @@
1
1
  require 'rubygems'
2
2
  require 'mechanize'
3
+ require 'cgi'
3
4
 
4
5
  module AddressBookImporter
5
- VERSION = '0.0.4'
6
+ VERSION = '0.0.5'
6
7
 
7
8
  class EmptyEmailException < Exception ; end
8
9
  class LoginErrorException < Exception ; end
@@ -14,7 +15,8 @@ module AddressBookImporter
14
15
  def initialize(login, password)
15
16
  begin
16
17
  @agent = ::WWW::Mechanize.new {|a| a.log = nil }
17
- @agent.user_agent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)"
18
+ #@agent.user_agent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)"
19
+ @agent.user_agent = "Mozilla/5.0 (X11; U; Linux i686; en; rv:1.8.1.1) Gecko/20060601 Epiphany/2.16 Firefox/2.0.0.1 (Ubuntu-edgy)"
18
20
  p = login(login, password)
19
21
  @contacts = fetch_contacts(p)
20
22
  rescue Exception => e
@@ -32,28 +34,63 @@ module AddressBookImporter
32
34
 
33
35
  class Hotmail < Importer
34
36
 
35
- attr_accessor :curr_page
37
+ attr_accessor :curr_page, :mode
36
38
 
37
39
  def login(login, password)
38
- page = @agent.get('http://www.hotmail.com')
40
+ page = @agent.get('http://login.live.com/login.srf?id=2')
39
41
  form = page.forms.first
40
42
  form.fields.find {|f| f.name == 'login'}.value = login
41
43
  form.fields.find {|f| f.name == 'passwd'}.value = password
42
44
  page = @agent.submit(form, form.buttons.first)
43
45
  raise LoginErrorException if page.body.match(/icon_err\.gif/)
46
+
44
47
  url = page.root.at('script').inner_html.match(%r{(http:[^"]+)})[0]
45
- @agent.get(url)
48
+ p = @agent.get(url) # http://by132w.bay132.mail.live.com/mail/mail.aspx
49
+ if (match = p.body.match(/(http:\/\/[^"'\/\\]*\.mail\.live\.com)/))
50
+ @mode = :live
51
+ p = @agent.get(match[1])
52
+ else
53
+ @mode = :normal
54
+ end
55
+ p
46
56
  end
47
57
 
48
58
  def fetch_contacts(page)
49
- params = page.body.match(/(curmbox=[^&]+)/)[1]
50
- start = page.body.match(/(a=[^"]+)/)[1]
51
- contact_page = @agent.get("/cgi-bin/AddressPicker?" + start+params + "&Context=InsertAddress&_HMaction=Edit&qF=to")
52
59
  rval = []
53
- match_string = contact_page.body
54
- while !(match = match_string.match(/'([^@\s]+@[-a-z0-9]+\.+[a-z]{2,})'/)).nil?
55
- rval << match[1]
56
- match_string = match.post_match
60
+ if @mode == :normal
61
+ params = page.body.match(/(curmbox=[^&]+)/)[1]
62
+ start = page.body.match(/(a=[^"]+)/)[1]
63
+ contact_page = @agent.get("/cgi-bin/AddressPicker?" + start+params + "&Context=InsertAddress&_HMaction=Edit&qF=to")
64
+ rval = get_email(contact_page.body)
65
+ else
66
+ link = page.links.select{|l|l.href.match(/Inbox/)}.first
67
+ p = @agent.click(link)
68
+ link = page.links.select{|l|l.href.match(/EditMessage/)}.first
69
+ if p.body.match(/NewMessageGo/)
70
+ if (match = p.body.match(/(\/mail\/ApplicationMainReach\.aspx\?Control=EditMessage[^"]+)/))
71
+ p = @agent.get(match[1])
72
+ end
73
+ form = p.forms.first
74
+ form.add_field!('ToContact.x', 9)
75
+ form.add_field!('ToContact.y', 11)
76
+ p2 = form.submit
77
+ rval = get_email(p2.body)
78
+ else
79
+ p = @agent.click(link)
80
+ rval = get_email(p.body, /([_\-a-z0-9.A-Z]+%40((?:[-a-z0-9]+\.)+[a-z]{2,}))/)
81
+ rval.collect!{|a|a.gsub(/%40/, '@')}
82
+ end
83
+
84
+ rval
85
+ end
86
+ end
87
+
88
+ protected
89
+ def get_email(body, match_regex = /([_\-a-z0-9.A-Z]+@((?:[-a-z0-9]+\.)+[a-z]{2,}))/)
90
+ rval = []
91
+ while !(match = body.match(match_regex)).nil?
92
+ rval << match[0]
93
+ body = match.post_match
57
94
  end
58
95
  rval
59
96
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: AddressBookImporter
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.4
7
- date: 2007-01-22 00:00:00 +01:00
6
+ version: 0.0.5
7
+ date: 2007-02-05 00:00:00 +01:00
8
8
  summary: Mechanize scraper for address books
9
9
  require_paths:
10
10
  - lib