gmail-scraper 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,4 @@
1
- require 'conv_summary.rb'
1
+ require 'lib/conv_summary.rb'
2
2
  class Conversation<ConvSummary
3
3
 
4
4
  attr_accessor :created_at, :emails
@@ -1,7 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'mechanize'
3
- require 'conversation.rb'
4
- require 'email.rb'
3
+ require 'lib/conversation.rb'
4
+ require 'lib/email.rb'
5
5
 
6
6
 
7
7
  class ThreadNotFoundException<Exception
@@ -13,7 +13,6 @@ class Gmail
13
13
  XPATH_CONV_IN_LIST="//tr[@bgcolor='#E8EEF7'] | //tr[@bgcolor='#ffffff']"
14
14
  XPATH_EMAIL_IN_CONV=".//table[@bgcolor='#efefef' and @border='0']"
15
15
 
16
-
17
16
 
18
17
  def initialize(email, pwd)
19
18
  @email=email
@@ -24,30 +23,32 @@ class Gmail
24
23
  def connect
25
24
  base_url="http://www.gmail.com"
26
25
  @agent = WWW::Mechanize.new
26
+ @agent.follow_meta_refresh = true
27
27
  base_page = @agent.get base_url
28
28
  login_form = base_page.forms.first
29
29
  login_form.Email = @email
30
30
  login_form.Passwd = @pwd
31
31
  @agent.submit(login_form)
32
- page= @agent.get "http://mail.google.com/mail/?ui=html&zy=a&s=a"
32
+ page= @agent.get "http://mail.google.com/mail/?ui=html&zy=a"
33
33
 
34
34
  # bad(but working) method to detect the connection
35
- connected=!(page.title.gsub("\n", "").strip=="Gmail: Email from Google")
35
+ connected=!(page.title[/Inbox/].nil?)
36
36
 
37
37
  return connected
38
38
  end
39
39
 
40
40
  # Scrap the list of summaries of conversations
41
41
 
42
+ def search(tag, conv_start=0, conv_end=nil)
43
+ summary_as_html("s=l&l=#{tag}", conv_start, conv_end){|html_conv, i|
44
+ yield(html_conv, i)
45
+ }
46
+ end
47
+
42
48
  def list(conv_start=0, conv_end=nil)
43
49
  # display the list of emails
44
- summary_as_html(conv_start, conv_end){|html_conv, i|
45
- begin
46
- c=ConvSummary.create_from_html(html_conv)
47
- yield(c)
48
- rescue DraftException
49
- puts "Skiping a draft"
50
- end
50
+ summary_as_html("s=a", conv_start, conv_end){|html_conv, i|
51
+
51
52
 
52
53
  }
53
54
  end
@@ -64,14 +65,16 @@ class Gmail
64
65
 
65
66
  private
66
67
 
67
- def summary_as_html(conv_start, conv_end)
68
+
69
+ def summary_as_html(mode, conv_start, conv_end)
68
70
 
69
71
  page_index=conv_start/CONV_PER_PAGE
70
72
 
71
73
  while (!@error)
72
74
 
73
75
  # get the page of threads
74
- url="?s=a&st=#{page_index*CONV_PER_PAGE}"
76
+ url="?#{mode}&st=#{page_index*CONV_PER_PAGE}"
77
+
75
78
  puts "fetching page: #{url}"
76
79
  page_list=@agent.get url
77
80
 
@@ -79,14 +82,18 @@ class Gmail
79
82
 
80
83
  page_list.search(XPATH_CONV_IN_LIST).each_with_index{|conv, i|
81
84
  conv_index=page_index*CONV_PER_PAGE+i
82
- puts "conversation_index: #{conv_index}"
83
85
 
84
86
  if (((!conv_end.nil?) && (conv_index>conv_end)) || @error)
85
87
  return
86
88
  end
87
89
 
88
90
  if (conv_index>=conv_start)
89
- yield(conv)
91
+ begin
92
+ c=ConvSummary.create_from_html(conv)
93
+ yield(c)
94
+ rescue DraftException
95
+ puts "Skiping a draft"
96
+ end
90
97
  end
91
98
  }
92
99
 
@@ -24,11 +24,11 @@ describe Gmail do
24
24
 
25
25
 
26
26
  it "should be able to extract a summary of the first 50 conversations" do
27
- email_start=0
28
- email_end=1
27
+ conv_start=0
28
+ conv_end=49
29
29
  @gmail.connect
30
30
  i=0
31
- @gmail.list(email_start, email_end) { |thread_summary|
31
+ @gmail.list(conv_start, conv_end) { |thread_summary|
32
32
  thread_summary.subject.should_not==nil
33
33
  thread_summary.nb_emails.should>=1
34
34
  thread_summary.uid.should_not==nil
@@ -36,17 +36,10 @@ describe Gmail do
36
36
 
37
37
  i+=1
38
38
  }
39
- i.should==(email_end-email_start+1)
39
+ i.should==(conv_end-conv_start+1)
40
40
  end
41
41
 
42
- it "should be able only to extract the correct interval of conversations" do
43
- i=0
44
- @gmail.list(1,2){ |conv_summary|
45
- conv=@gmail.fetch_conversation(conv_summary)
46
- i+=1
47
- }
48
- i.should==2
49
- end
42
+
50
43
 
51
44
  it "should be able to fetch the correct number of emails in a conversations" do
52
45
  @gmail.list(500,551){ |conv_summary|
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gmail-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.1"
4
+ version: "0.2"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nicolas Maisonneuve
8
- autorequire: gmail-scraper
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-04 00:00:00 +01:00
12
+ date: 2010-02-26 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency