gmail_extractor 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/README.md +24 -0
- data/Rakefile +1 -0
- data/bin/gmail_extractor +4 -0
- data/gmail_extractor.gemspec +24 -0
- data/lib/gmail_extractor.rb +100 -0
- data/lib/gmail_extractor/email.rb +13 -0
- data/lib/gmail_extractor/email_extractor.rb +69 -0
- data/lib/gmail_extractor/email_formatter.rb +3 -0
- data/lib/gmail_extractor/email_formatter/email_csv_formatter.rb +25 -0
- data/lib/gmail_extractor/email_formatter/email_html_formatter.rb +48 -0
- data/lib/gmail_extractor/email_formatter/email_xml_formatter.rb +39 -0
- data/lib/gmail_extractor/email_progressbar.rb +36 -0
- data/lib/gmail_extractor/printer.rb +2 -0
- data/lib/gmail_extractor/printer/console_printer.rb +25 -0
- data/lib/gmail_extractor/printer/file_printer.rb +19 -0
- data/lib/gmail_extractor/version.rb +3 -0
- metadata +97 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# GmailExtractor
|
|
2
|
+
|
|
3
|
+
A small command line tool to extract emails from Google Mail (GMail).
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
$> gem install gmail_extractor
|
|
8
|
+
$> gmail_extractor --help
|
|
9
|
+
|
|
10
|
+
## Examples
|
|
11
|
+
|
|
12
|
+
To download all emails within the label LABEL and the GMail account test123:
|
|
13
|
+
|
|
14
|
+
$> gmail_extractor --user test123 --label LABEL
|
|
15
|
+
|
|
16
|
+
## Development
|
|
17
|
+
|
|
18
|
+
This gem is built ontop of the following gems:
|
|
19
|
+
|
|
20
|
+
* `gmail`
|
|
21
|
+
* `progressbar`
|
|
22
|
+
* `highline`
|
|
23
|
+
|
|
24
|
+
It has been tested on Ruby 1.9.3-p0!
|
data/Rakefile
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/gmail_extractor
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
|
3
|
+
require "gmail_extractor/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |s|
|
|
6
|
+
s.name = "gmail_extractor"
|
|
7
|
+
s.version = GmailExtractor::VERSION
|
|
8
|
+
s.authors = ["Simon Harrer"]
|
|
9
|
+
s.email = ["simon.harrer@gmail.com"]
|
|
10
|
+
s.homepage = 'https://github.com/simonharrer/gmail_extractor'
|
|
11
|
+
s.summary = %q{Downloads emails from Google Mail to XML, CSV or HTML}
|
|
12
|
+
s.description = s.summary
|
|
13
|
+
|
|
14
|
+
s.rubyforge_project = "gmail_extractor"
|
|
15
|
+
|
|
16
|
+
s.files = `git ls-files`.split("\n")
|
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
|
19
|
+
s.require_paths = ["lib"]
|
|
20
|
+
|
|
21
|
+
s.add_runtime_dependency "gmail"
|
|
22
|
+
s.add_runtime_dependency "progressbar"
|
|
23
|
+
s.add_runtime_dependency "highline"
|
|
24
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
require "gmail_extractor/version"
|
|
3
|
+
|
|
4
|
+
# data structures
|
|
5
|
+
require "gmail_extractor/email"
|
|
6
|
+
|
|
7
|
+
# formatter
|
|
8
|
+
require "gmail_extractor/email_formatter"
|
|
9
|
+
|
|
10
|
+
# printer
|
|
11
|
+
require "gmail_extractor/printer"
|
|
12
|
+
|
|
13
|
+
# printer decorators
|
|
14
|
+
require "gmail_extractor/email_progressbar"
|
|
15
|
+
|
|
16
|
+
# gmail communication
|
|
17
|
+
require "gmail_extractor/email_extractor"
|
|
18
|
+
|
|
19
|
+
module GmailExtractor
|
|
20
|
+
|
|
21
|
+
def self.execute
|
|
22
|
+
# Reading from command line
|
|
23
|
+
require "optparse"
|
|
24
|
+
options = {}
|
|
25
|
+
OptionParser.new do |opts|
|
|
26
|
+
opts.banner = "Usage: get_mails.rb [options]"
|
|
27
|
+
opts.on("-u","--username USERNAME","Set gmail USERNAME") do |username|
|
|
28
|
+
options[:username] = username
|
|
29
|
+
end
|
|
30
|
+
opts.on("-p","--password PASSWORD","Set gmail PASSWORD") do |password|
|
|
31
|
+
options[:password] = password
|
|
32
|
+
end
|
|
33
|
+
opts.on("-n","--name NAME","Set own NAME; defaults to Self if not given") do |name|
|
|
34
|
+
options[:name] = name || "Self"
|
|
35
|
+
end
|
|
36
|
+
opts.on("-m","--mailbox MAILBOX","Set MAILBOX from where the mails are fetched. Label names can be used too!") do |mailbox|
|
|
37
|
+
options[:mailbox] = mailbox
|
|
38
|
+
end
|
|
39
|
+
opts.on("-l","--limit LIMIT","Set limit LIMIT to how many emails are fetched; defaults to all if not given") do |limit|
|
|
40
|
+
options[:limit] = limit.to_i
|
|
41
|
+
end
|
|
42
|
+
opts.on("-o","--output TYPE","Set output TYPE to xml, csv or html. defaults to csv") do |output|
|
|
43
|
+
options[:output] = output || "csv"
|
|
44
|
+
end
|
|
45
|
+
opts.on("-f","--file FILE","Set output FILE. if not present, only output to STDOUT") do |file|
|
|
46
|
+
options[:file] = file
|
|
47
|
+
end
|
|
48
|
+
opts.on("-pb","--progressbar","Show progressbar on STDOUT") do
|
|
49
|
+
options[:progressbar] = true
|
|
50
|
+
end
|
|
51
|
+
opts.on("-h", "--help", "Show this message") do
|
|
52
|
+
puts opts
|
|
53
|
+
exit
|
|
54
|
+
end
|
|
55
|
+
end.parse!
|
|
56
|
+
|
|
57
|
+
# set printer
|
|
58
|
+
file = options[:file]
|
|
59
|
+
if file
|
|
60
|
+
printer = ConsolePrinter.new(FilePrinter.new(file))
|
|
61
|
+
else
|
|
62
|
+
printer = ConsolePrinter.new
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# set correct formatter
|
|
66
|
+
if options[:output]
|
|
67
|
+
output = options[:output].downcase
|
|
68
|
+
else
|
|
69
|
+
output = "csv"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
if output == "xml"
|
|
73
|
+
formatter = EmailXmlFormatter.new(printer)
|
|
74
|
+
elsif output == "csv"
|
|
75
|
+
formatter = EmailCsvFormatter.new(printer)
|
|
76
|
+
elsif output == "html"
|
|
77
|
+
formatter = EmailHtmlFormatter.new(printer)
|
|
78
|
+
else
|
|
79
|
+
raise "Internal error - no output option given"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# handle progressbar
|
|
83
|
+
if options[:progressbar]
|
|
84
|
+
formatter = EmailProgressbar.new(formatter)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Asking missing values
|
|
88
|
+
require "highline/import"
|
|
89
|
+
user = options[:username] || ask("Enter user: ")
|
|
90
|
+
password = options[:password] || ask("Enter password: ") { |q| q.echo = false }
|
|
91
|
+
name = options[:name]
|
|
92
|
+
extractor = EmailExtractor.new(user,password,name,formatter)
|
|
93
|
+
|
|
94
|
+
# Ask missing values and extract
|
|
95
|
+
label = options[:mailbox] || ask("Enter mailbox/label: ")
|
|
96
|
+
limit = options[:limit]
|
|
97
|
+
extractor.extract(label, limit)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
require "gmail"
|
|
2
|
+
require "date"
|
|
3
|
+
|
|
4
|
+
module GmailExtractor
|
|
5
|
+
|
|
6
|
+
# Controls the extraction
|
|
7
|
+
class EmailExtractor
|
|
8
|
+
attr_accessor :user, :password, :printer, :name
|
|
9
|
+
|
|
10
|
+
def initialize(user, password, name, printer)
|
|
11
|
+
@user = user
|
|
12
|
+
@password = password
|
|
13
|
+
@printer = printer
|
|
14
|
+
@name = name
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def extract(label, limit)
|
|
18
|
+
connect
|
|
19
|
+
extract_mails_for label, limit
|
|
20
|
+
disconnect
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
def extract_mails_for(label, limit)
|
|
25
|
+
header
|
|
26
|
+
body label, limit
|
|
27
|
+
footer
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def disconnect
|
|
31
|
+
@gmail.logout
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def connect
|
|
35
|
+
@gmail = Gmail.connect(user, password)
|
|
36
|
+
raise "password/username wrong" unless @gmail.logged_in?
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def body(label, limit)
|
|
40
|
+
label = @gmail.label(label)
|
|
41
|
+
printer.label(label, limit)
|
|
42
|
+
|
|
43
|
+
mails = 0
|
|
44
|
+
label.emails do |email|
|
|
45
|
+
from = email.from.first.name || @name
|
|
46
|
+
date = DateTime.parse(email.date)
|
|
47
|
+
content = email.body.to_s.force_encoding("UTF-8")
|
|
48
|
+
|
|
49
|
+
mail = Email.new(from, date, content)
|
|
50
|
+
|
|
51
|
+
printer.email(mail)
|
|
52
|
+
|
|
53
|
+
mails += 1
|
|
54
|
+
if limit and mails == limit
|
|
55
|
+
break
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def header
|
|
61
|
+
printer.header
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def footer
|
|
65
|
+
printer.footer
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
require "csv"
|
|
2
|
+
|
|
3
|
+
module GmailExtractor
|
|
4
|
+
|
|
5
|
+
class EmailCsvFormatter
|
|
6
|
+
|
|
7
|
+
attr_accessor :printer
|
|
8
|
+
|
|
9
|
+
def initialize(printer)
|
|
10
|
+
@printer = printer
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def header; end
|
|
14
|
+
|
|
15
|
+
def footer; end
|
|
16
|
+
|
|
17
|
+
def email(email)
|
|
18
|
+
@printer << [email.date, email.from, email.content].to_csv
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def label(label,limit); end
|
|
22
|
+
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
module GmailExtractor
|
|
2
|
+
|
|
3
|
+
# Prints the output as html
|
|
4
|
+
class EmailHtmlFormatter
|
|
5
|
+
|
|
6
|
+
attr_accessor :printer
|
|
7
|
+
|
|
8
|
+
def initialize(printer)
|
|
9
|
+
@printer = printer
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def header
|
|
13
|
+
result = "<!doctype html><html><head><title>E-Mails</title><meta charset='utf-8'></head><body>"
|
|
14
|
+
|
|
15
|
+
@printer << result
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def footer
|
|
19
|
+
result = "</body></html>"
|
|
20
|
+
|
|
21
|
+
@printer << result
|
|
22
|
+
@printer.close
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def email(email)
|
|
26
|
+
result = ""
|
|
27
|
+
result << "<div class='email'>"
|
|
28
|
+
result << "<div class='from'>#{email.from}</div>"
|
|
29
|
+
result << "<div class='date'>#{email.date}</div>"
|
|
30
|
+
result << "<div class='content'>#{email.content}</div>"
|
|
31
|
+
result << "</div>"
|
|
32
|
+
|
|
33
|
+
@printer << result
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def label(label,limit)
|
|
37
|
+
result = ""
|
|
38
|
+
result << "<div class='label'>"
|
|
39
|
+
result << "<div class='name'>#{label.name}</div>"
|
|
40
|
+
result << "<div class='count'>#{label.count}</div>"
|
|
41
|
+
result << "</div>"
|
|
42
|
+
|
|
43
|
+
@printer << result
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
module GmailExtractor
|
|
2
|
+
|
|
3
|
+
class EmailXmlFormatter
|
|
4
|
+
|
|
5
|
+
attr_accessor :printer
|
|
6
|
+
|
|
7
|
+
def initialize(printer)
|
|
8
|
+
@printer = printer
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def header
|
|
12
|
+
result = "<emails>"
|
|
13
|
+
|
|
14
|
+
@printer << result
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def footer
|
|
18
|
+
result = "</emails>"
|
|
19
|
+
|
|
20
|
+
@printer << result
|
|
21
|
+
@printer.close
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def email(email)
|
|
25
|
+
result = ""
|
|
26
|
+
result << "<email>"
|
|
27
|
+
result << "<from>#{email.from}</from>"
|
|
28
|
+
result << "<date>#{email.date}</date>"
|
|
29
|
+
result << "<content>#{email.content}</content>"
|
|
30
|
+
result << "</email>"
|
|
31
|
+
|
|
32
|
+
@printer << result
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def label(label,limit); end
|
|
36
|
+
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
require "progressbar"
|
|
2
|
+
|
|
3
|
+
module GmailExtractor
|
|
4
|
+
|
|
5
|
+
# Simple printer proxy to enable tracking the progress with a progress bar transparently.
|
|
6
|
+
class EmailProgressbar
|
|
7
|
+
|
|
8
|
+
attr_accessor :printer
|
|
9
|
+
|
|
10
|
+
def initialize(printer)
|
|
11
|
+
@printer = printer
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def header
|
|
15
|
+
printer.header
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def footer
|
|
19
|
+
printer.footer
|
|
20
|
+
@progress_bar.finish
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def email(email)
|
|
24
|
+
printer.email(email)
|
|
25
|
+
@progress_bar.inc
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def label(label,limit)
|
|
29
|
+
printer.label(label,limit)
|
|
30
|
+
total_count = limit || label.count
|
|
31
|
+
@progress_bar = ProgressBar.new("emails",total_count)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
module GmailExtractor
|
|
2
|
+
|
|
3
|
+
class ConsolePrinter
|
|
4
|
+
|
|
5
|
+
def initialize(printer = nil)
|
|
6
|
+
@printer = printer
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def << (string)
|
|
10
|
+
puts string
|
|
11
|
+
@printer << string if printer?
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def close
|
|
15
|
+
@printer.close if printer?
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
def printer?
|
|
20
|
+
@printer
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module GmailExtractor
|
|
2
|
+
|
|
3
|
+
class FilePrinter
|
|
4
|
+
|
|
5
|
+
def initialize(output_file)
|
|
6
|
+
@output_file = File.open(output_file,"w:utf-8")
|
|
7
|
+
@output_file.sync = true
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def << (string)
|
|
11
|
+
@output_file << string
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def close
|
|
15
|
+
@output_file.close
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: gmail_extractor
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
prerelease:
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- Simon Harrer
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2012-03-05 00:00:00.000000000 Z
|
|
13
|
+
dependencies:
|
|
14
|
+
- !ruby/object:Gem::Dependency
|
|
15
|
+
name: gmail
|
|
16
|
+
requirement: &29237616 !ruby/object:Gem::Requirement
|
|
17
|
+
none: false
|
|
18
|
+
requirements:
|
|
19
|
+
- - ! '>='
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '0'
|
|
22
|
+
type: :runtime
|
|
23
|
+
prerelease: false
|
|
24
|
+
version_requirements: *29237616
|
|
25
|
+
- !ruby/object:Gem::Dependency
|
|
26
|
+
name: progressbar
|
|
27
|
+
requirement: &29253564 !ruby/object:Gem::Requirement
|
|
28
|
+
none: false
|
|
29
|
+
requirements:
|
|
30
|
+
- - ! '>='
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '0'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: *29253564
|
|
36
|
+
- !ruby/object:Gem::Dependency
|
|
37
|
+
name: highline
|
|
38
|
+
requirement: &29254740 !ruby/object:Gem::Requirement
|
|
39
|
+
none: false
|
|
40
|
+
requirements:
|
|
41
|
+
- - ! '>='
|
|
42
|
+
- !ruby/object:Gem::Version
|
|
43
|
+
version: '0'
|
|
44
|
+
type: :runtime
|
|
45
|
+
prerelease: false
|
|
46
|
+
version_requirements: *29254740
|
|
47
|
+
description: Downloads emails from Google Mail to XML, CSV or HTML
|
|
48
|
+
email:
|
|
49
|
+
- simon.harrer@gmail.com
|
|
50
|
+
executables:
|
|
51
|
+
- gmail_extractor
|
|
52
|
+
extensions: []
|
|
53
|
+
extra_rdoc_files: []
|
|
54
|
+
files:
|
|
55
|
+
- .gitignore
|
|
56
|
+
- Gemfile
|
|
57
|
+
- README.md
|
|
58
|
+
- Rakefile
|
|
59
|
+
- bin/gmail_extractor
|
|
60
|
+
- gmail_extractor.gemspec
|
|
61
|
+
- lib/gmail_extractor.rb
|
|
62
|
+
- lib/gmail_extractor/email.rb
|
|
63
|
+
- lib/gmail_extractor/email_extractor.rb
|
|
64
|
+
- lib/gmail_extractor/email_formatter.rb
|
|
65
|
+
- lib/gmail_extractor/email_formatter/email_csv_formatter.rb
|
|
66
|
+
- lib/gmail_extractor/email_formatter/email_html_formatter.rb
|
|
67
|
+
- lib/gmail_extractor/email_formatter/email_xml_formatter.rb
|
|
68
|
+
- lib/gmail_extractor/email_progressbar.rb
|
|
69
|
+
- lib/gmail_extractor/printer.rb
|
|
70
|
+
- lib/gmail_extractor/printer/console_printer.rb
|
|
71
|
+
- lib/gmail_extractor/printer/file_printer.rb
|
|
72
|
+
- lib/gmail_extractor/version.rb
|
|
73
|
+
homepage: https://github.com/simonharrer/gmail_extractor
|
|
74
|
+
licenses: []
|
|
75
|
+
post_install_message:
|
|
76
|
+
rdoc_options: []
|
|
77
|
+
require_paths:
|
|
78
|
+
- lib
|
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
80
|
+
none: false
|
|
81
|
+
requirements:
|
|
82
|
+
- - ! '>='
|
|
83
|
+
- !ruby/object:Gem::Version
|
|
84
|
+
version: '0'
|
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
86
|
+
none: false
|
|
87
|
+
requirements:
|
|
88
|
+
- - ! '>='
|
|
89
|
+
- !ruby/object:Gem::Version
|
|
90
|
+
version: '0'
|
|
91
|
+
requirements: []
|
|
92
|
+
rubyforge_project: gmail_extractor
|
|
93
|
+
rubygems_version: 1.8.11
|
|
94
|
+
signing_key:
|
|
95
|
+
specification_version: 3
|
|
96
|
+
summary: Downloads emails from Google Mail to XML, CSV or HTML
|
|
97
|
+
test_files: []
|