gmail-scraper 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/conversation.rb +1 -1
- data/lib/gmail.rb +23 -16
- data/spec/basic_spec.rb +5 -12
- metadata +3 -3
data/lib/conversation.rb
CHANGED
data/lib/gmail.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'mechanize'
|
3
|
-
require 'conversation.rb'
|
4
|
-
require 'email.rb'
|
3
|
+
require 'lib/conversation.rb'
|
4
|
+
require 'lib/email.rb'
|
5
5
|
|
6
6
|
|
7
7
|
class ThreadNotFoundException<Exception
|
@@ -13,7 +13,6 @@ class Gmail
|
|
13
13
|
XPATH_CONV_IN_LIST="//tr[@bgcolor='#E8EEF7'] | //tr[@bgcolor='#ffffff']"
|
14
14
|
XPATH_EMAIL_IN_CONV=".//table[@bgcolor='#efefef' and @border='0']"
|
15
15
|
|
16
|
-
|
17
16
|
|
18
17
|
def initialize(email, pwd)
|
19
18
|
@email=email
|
@@ -24,30 +23,32 @@ class Gmail
|
|
24
23
|
def connect
|
25
24
|
base_url="http://www.gmail.com"
|
26
25
|
@agent = WWW::Mechanize.new
|
26
|
+
@agent.follow_meta_refresh = true
|
27
27
|
base_page = @agent.get base_url
|
28
28
|
login_form = base_page.forms.first
|
29
29
|
login_form.Email = @email
|
30
30
|
login_form.Passwd = @pwd
|
31
31
|
@agent.submit(login_form)
|
32
|
-
page= @agent.get "http://mail.google.com/mail/?ui=html&zy=a
|
32
|
+
page= @agent.get "http://mail.google.com/mail/?ui=html&zy=a"
|
33
33
|
|
34
34
|
# bad(but working) method to detect the connection
|
35
|
-
connected=!(page.title.
|
35
|
+
connected=!(page.title[/Inbox/].nil?)
|
36
36
|
|
37
37
|
return connected
|
38
38
|
end
|
39
39
|
|
40
40
|
# Scrap the list of summaries of conversations
|
41
41
|
|
42
|
+
def search(tag, conv_start=0, conv_end=nil)
|
43
|
+
summary_as_html("s=l&l=#{tag}", conv_start, conv_end){|html_conv, i|
|
44
|
+
yield(html_conv, i)
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
42
48
|
def list(conv_start=0, conv_end=nil)
|
43
49
|
# display the list of emails
|
44
|
-
summary_as_html(conv_start, conv_end){|html_conv, i|
|
45
|
-
|
46
|
-
c=ConvSummary.create_from_html(html_conv)
|
47
|
-
yield(c)
|
48
|
-
rescue DraftException
|
49
|
-
puts "Skiping a draft"
|
50
|
-
end
|
50
|
+
summary_as_html("s=a", conv_start, conv_end){|html_conv, i|
|
51
|
+
|
51
52
|
|
52
53
|
}
|
53
54
|
end
|
@@ -64,14 +65,16 @@ class Gmail
|
|
64
65
|
|
65
66
|
private
|
66
67
|
|
67
|
-
|
68
|
+
|
69
|
+
def summary_as_html(mode, conv_start, conv_end)
|
68
70
|
|
69
71
|
page_index=conv_start/CONV_PER_PAGE
|
70
72
|
|
71
73
|
while (!@error)
|
72
74
|
|
73
75
|
# get the page of threads
|
74
|
-
url="
|
76
|
+
url="?#{mode}&st=#{page_index*CONV_PER_PAGE}"
|
77
|
+
|
75
78
|
puts "fetching page: #{url}"
|
76
79
|
page_list=@agent.get url
|
77
80
|
|
@@ -79,14 +82,18 @@ class Gmail
|
|
79
82
|
|
80
83
|
page_list.search(XPATH_CONV_IN_LIST).each_with_index{|conv, i|
|
81
84
|
conv_index=page_index*CONV_PER_PAGE+i
|
82
|
-
puts "conversation_index: #{conv_index}"
|
83
85
|
|
84
86
|
if (((!conv_end.nil?) && (conv_index>conv_end)) || @error)
|
85
87
|
return
|
86
88
|
end
|
87
89
|
|
88
90
|
if (conv_index>=conv_start)
|
89
|
-
|
91
|
+
begin
|
92
|
+
c=ConvSummary.create_from_html(conv)
|
93
|
+
yield(c)
|
94
|
+
rescue DraftException
|
95
|
+
puts "Skiping a draft"
|
96
|
+
end
|
90
97
|
end
|
91
98
|
}
|
92
99
|
|
data/spec/basic_spec.rb
CHANGED
@@ -24,11 +24,11 @@ describe Gmail do
|
|
24
24
|
|
25
25
|
|
26
26
|
it "should be able to extract a summary of the first 50 conversations" do
|
27
|
-
|
28
|
-
|
27
|
+
conv_start=0
|
28
|
+
conv_end=49
|
29
29
|
@gmail.connect
|
30
30
|
i=0
|
31
|
-
@gmail.list(
|
31
|
+
@gmail.list(conv_start, conv_end) { |thread_summary|
|
32
32
|
thread_summary.subject.should_not==nil
|
33
33
|
thread_summary.nb_emails.should>=1
|
34
34
|
thread_summary.uid.should_not==nil
|
@@ -36,17 +36,10 @@ describe Gmail do
|
|
36
36
|
|
37
37
|
i+=1
|
38
38
|
}
|
39
|
-
i.should==(
|
39
|
+
i.should==(conv_end-conv_start+1)
|
40
40
|
end
|
41
41
|
|
42
|
-
|
43
|
-
i=0
|
44
|
-
@gmail.list(1,2){ |conv_summary|
|
45
|
-
conv=@gmail.fetch_conversation(conv_summary)
|
46
|
-
i+=1
|
47
|
-
}
|
48
|
-
i.should==2
|
49
|
-
end
|
42
|
+
|
50
43
|
|
51
44
|
it "should be able to fetch the correct number of emails in a conversations" do
|
52
45
|
@gmail.list(500,551){ |conv_summary|
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gmail-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "0.
|
4
|
+
version: "0.2"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nicolas Maisonneuve
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-02-26 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|