AddressBookImporter 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/address_book_importer.rb +49 -12
- metadata +2 -2
@@ -1,8 +1,9 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'mechanize'
|
3
|
+
require 'cgi'
|
3
4
|
|
4
5
|
module AddressBookImporter
|
5
|
-
VERSION = '0.0.
|
6
|
+
VERSION = '0.0.5'
|
6
7
|
|
7
8
|
class EmptyEmailException < Exception ; end
|
8
9
|
class LoginErrorException < Exception ; end
|
@@ -14,7 +15,8 @@ module AddressBookImporter
|
|
14
15
|
def initialize(login, password)
|
15
16
|
begin
|
16
17
|
@agent = ::WWW::Mechanize.new {|a| a.log = nil }
|
17
|
-
|
18
|
+
#@agent.user_agent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)"
|
19
|
+
@agent.user_agent = "Mozilla/5.0 (X11; U; Linux i686; en; rv:1.8.1.1) Gecko/20060601 Epiphany/2.16 Firefox/2.0.0.1 (Ubuntu-edgy)"
|
18
20
|
p = login(login, password)
|
19
21
|
@contacts = fetch_contacts(p)
|
20
22
|
rescue Exception => e
|
@@ -32,28 +34,63 @@ module AddressBookImporter
|
|
32
34
|
|
33
35
|
class Hotmail < Importer
|
34
36
|
|
35
|
-
attr_accessor :curr_page
|
37
|
+
attr_accessor :curr_page, :mode
|
36
38
|
|
37
39
|
def login(login, password)
|
38
|
-
page = @agent.get('http://
|
40
|
+
page = @agent.get('http://login.live.com/login.srf?id=2')
|
39
41
|
form = page.forms.first
|
40
42
|
form.fields.find {|f| f.name == 'login'}.value = login
|
41
43
|
form.fields.find {|f| f.name == 'passwd'}.value = password
|
42
44
|
page = @agent.submit(form, form.buttons.first)
|
43
45
|
raise LoginErrorException if page.body.match(/icon_err\.gif/)
|
46
|
+
|
44
47
|
url = page.root.at('script').inner_html.match(%r{(http:[^"]+)})[0]
|
45
|
-
@agent.get(url)
|
48
|
+
p = @agent.get(url) # http://by132w.bay132.mail.live.com/mail/mail.aspx
|
49
|
+
if (match = p.body.match(/(http:\/\/[^"'\/\\]*\.mail\.live\.com)/))
|
50
|
+
@mode = :live
|
51
|
+
p = @agent.get(match[1])
|
52
|
+
else
|
53
|
+
@mode = :normal
|
54
|
+
end
|
55
|
+
p
|
46
56
|
end
|
47
57
|
|
48
58
|
def fetch_contacts(page)
|
49
|
-
params = page.body.match(/(curmbox=[^&]+)/)[1]
|
50
|
-
start = page.body.match(/(a=[^"]+)/)[1]
|
51
|
-
contact_page = @agent.get("/cgi-bin/AddressPicker?" + start+params + "&Context=InsertAddress&_HMaction=Edit&qF=to")
|
52
59
|
rval = []
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
60
|
+
if @mode == :normal
|
61
|
+
params = page.body.match(/(curmbox=[^&]+)/)[1]
|
62
|
+
start = page.body.match(/(a=[^"]+)/)[1]
|
63
|
+
contact_page = @agent.get("/cgi-bin/AddressPicker?" + start+params + "&Context=InsertAddress&_HMaction=Edit&qF=to")
|
64
|
+
rval = get_email(contact_page.body)
|
65
|
+
else
|
66
|
+
link = page.links.select{|l|l.href.match(/Inbox/)}.first
|
67
|
+
p = @agent.click(link)
|
68
|
+
link = page.links.select{|l|l.href.match(/EditMessage/)}.first
|
69
|
+
if p.body.match(/NewMessageGo/)
|
70
|
+
if (match = p.body.match(/(\/mail\/ApplicationMainReach\.aspx\?Control=EditMessage[^"]+)/))
|
71
|
+
p = @agent.get(match[1])
|
72
|
+
end
|
73
|
+
form = p.forms.first
|
74
|
+
form.add_field!('ToContact.x', 9)
|
75
|
+
form.add_field!('ToContact.y', 11)
|
76
|
+
p2 = form.submit
|
77
|
+
rval = get_email(p2.body)
|
78
|
+
else
|
79
|
+
p = @agent.click(link)
|
80
|
+
rval = get_email(p.body, /([_\-a-z0-9.A-Z]+%40((?:[-a-z0-9]+\.)+[a-z]{2,}))/)
|
81
|
+
rval.collect!{|a|a.gsub(/%40/, '@')}
|
82
|
+
end
|
83
|
+
|
84
|
+
rval
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
protected
|
89
|
+
def get_email(body, match_regex = /([_\-a-z0-9.A-Z]+@((?:[-a-z0-9]+\.)+[a-z]{2,}))/)
|
90
|
+
rval = []
|
91
|
+
while !(match = body.match(match_regex)).nil?
|
92
|
+
rval << match[0]
|
93
|
+
body = match.post_match
|
57
94
|
end
|
58
95
|
rval
|
59
96
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: AddressBookImporter
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.0.5
|
7
|
+
date: 2007-02-05 00:00:00 +01:00
|
8
8
|
summary: Mechanize scraper for address books
|
9
9
|
require_paths:
|
10
10
|
- lib
|