gmail_extractor 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in gmail_extractor.gemspec
4
+ gemspec
@@ -0,0 +1,24 @@
1
+ # GmailExtractor
2
+
3
+ A small command line tool to extract emails from Google Mail (GMail).
4
+
5
+ ## Usage
6
+
7
+ $> gem install gmail_extractor
8
+ $> gmail_extractor --help
9
+
10
+ ## Examples
11
+
12
+ To download all emails within the label LABEL and the GMail account test123:
13
+
14
+ $> gmail_extractor --user test123 --label LABEL
15
+
16
+ ## Development
17
+
18
+ This gem is built ontop of the following gems:
19
+
20
+ * `gmail`
21
+ * `progressbar`
22
+ * `highline`
23
+
24
+ It has been tested on Ruby 1.9.3-p0!
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- mode: ruby -*-
3
+ require "gmail_extractor"
4
+ GmailExtractor.execute
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "gmail_extractor/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "gmail_extractor"
7
+ s.version = GmailExtractor::VERSION
8
+ s.authors = ["Simon Harrer"]
9
+ s.email = ["simon.harrer@gmail.com"]
10
+ s.homepage = 'https://github.com/simonharrer/gmail_extractor'
11
+ s.summary = %q{Downloads emails from Google Mail to XML, CSV or HTML}
12
+ s.description = s.summary
13
+
14
+ s.rubyforge_project = "gmail_extractor"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_runtime_dependency "gmail"
22
+ s.add_runtime_dependency "progressbar"
23
+ s.add_runtime_dependency "highline"
24
+ end
@@ -0,0 +1,100 @@
1
+ # encoding: utf-8
2
+ require "gmail_extractor/version"
3
+
4
+ # data structures
5
+ require "gmail_extractor/email"
6
+
7
+ # formatter
8
+ require "gmail_extractor/email_formatter"
9
+
10
+ # printer
11
+ require "gmail_extractor/printer"
12
+
13
+ # printer decorators
14
+ require "gmail_extractor/email_progressbar"
15
+
16
+ # gmail communication
17
+ require "gmail_extractor/email_extractor"
18
+
19
+ module GmailExtractor
20
+
21
+ def self.execute
22
+ # Reading from command line
23
+ require "optparse"
24
+ options = {}
25
+ OptionParser.new do |opts|
26
+ opts.banner = "Usage: get_mails.rb [options]"
27
+ opts.on("-u","--username USERNAME","Set gmail USERNAME") do |username|
28
+ options[:username] = username
29
+ end
30
+ opts.on("-p","--password PASSWORD","Set gmail PASSWORD") do |password|
31
+ options[:password] = password
32
+ end
33
+ opts.on("-n","--name NAME","Set own NAME; defaults to Self if not given") do |name|
34
+ options[:name] = name || "Self"
35
+ end
36
+ opts.on("-m","--mailbox MAILBOX","Set MAILBOX from where the mails are fetched. Label names can be used too!") do |mailbox|
37
+ options[:mailbox] = mailbox
38
+ end
39
+ opts.on("-l","--limit LIMIT","Set limit LIMIT to how many emails are fetched; defaults to all if not given") do |limit|
40
+ options[:limit] = limit.to_i
41
+ end
42
+ opts.on("-o","--output TYPE","Set output TYPE to xml, csv or html. defaults to csv") do |output|
43
+ options[:output] = output || "csv"
44
+ end
45
+ opts.on("-f","--file FILE","Set output FILE. if not present, only output to STDOUT") do |file|
46
+ options[:file] = file
47
+ end
48
+ opts.on("-pb","--progressbar","Show progressbar on STDOUT") do
49
+ options[:progressbar] = true
50
+ end
51
+ opts.on("-h", "--help", "Show this message") do
52
+ puts opts
53
+ exit
54
+ end
55
+ end.parse!
56
+
57
+ # set printer
58
+ file = options[:file]
59
+ if file
60
+ printer = ConsolePrinter.new(FilePrinter.new(file))
61
+ else
62
+ printer = ConsolePrinter.new
63
+ end
64
+
65
+ # set correct formatter
66
+ if options[:output]
67
+ output = options[:output].downcase
68
+ else
69
+ output = "csv"
70
+ end
71
+
72
+ if output == "xml"
73
+ formatter = EmailXmlFormatter.new(printer)
74
+ elsif output == "csv"
75
+ formatter = EmailCsvFormatter.new(printer)
76
+ elsif output == "html"
77
+ formatter = EmailHtmlFormatter.new(printer)
78
+ else
79
+ raise "Internal error - no output option given"
80
+ end
81
+
82
+ # handle progressbar
83
+ if options[:progressbar]
84
+ formatter = EmailProgressbar.new(formatter)
85
+ end
86
+
87
+ # Asking missing values
88
+ require "highline/import"
89
+ user = options[:username] || ask("Enter user: ")
90
+ password = options[:password] || ask("Enter password: ") { |q| q.echo = false }
91
+ name = options[:name]
92
+ extractor = EmailExtractor.new(user,password,name,formatter)
93
+
94
+ # Ask missing values and extract
95
+ label = options[:mailbox] || ask("Enter mailbox/label: ")
96
+ limit = options[:limit]
97
+ extractor.extract(label, limit)
98
+ end
99
+
100
+ end
@@ -0,0 +1,13 @@
1
+ module GmailExtractor
2
+
3
+ class Email
4
+ attr_accessor :from, :content, :date
5
+
6
+ def initialize(from, date, content)
7
+ @from = from
8
+ @date = date
9
+ @content = content
10
+ end
11
+ end
12
+
13
+ end
@@ -0,0 +1,69 @@
1
+ require "gmail"
2
+ require "date"
3
+
4
+ module GmailExtractor
5
+
6
+ # Controls the extraction
7
+ class EmailExtractor
8
+ attr_accessor :user, :password, :printer, :name
9
+
10
+ def initialize(user, password, name, printer)
11
+ @user = user
12
+ @password = password
13
+ @printer = printer
14
+ @name = name
15
+ end
16
+
17
+ def extract(label, limit)
18
+ connect
19
+ extract_mails_for label, limit
20
+ disconnect
21
+ end
22
+
23
+ private
24
+ def extract_mails_for(label, limit)
25
+ header
26
+ body label, limit
27
+ footer
28
+ end
29
+
30
+ def disconnect
31
+ @gmail.logout
32
+ end
33
+
34
+ def connect
35
+ @gmail = Gmail.connect(user, password)
36
+ raise "password/username wrong" unless @gmail.logged_in?
37
+ end
38
+
39
+ def body(label, limit)
40
+ label = @gmail.label(label)
41
+ printer.label(label, limit)
42
+
43
+ mails = 0
44
+ label.emails do |email|
45
+ from = email.from.first.name || @name
46
+ date = DateTime.parse(email.date)
47
+ content = email.body.to_s.force_encoding("UTF-8")
48
+
49
+ mail = Email.new(from, date, content)
50
+
51
+ printer.email(mail)
52
+
53
+ mails += 1
54
+ if limit and mails == limit
55
+ break
56
+ end
57
+ end
58
+ end
59
+
60
+ def header
61
+ printer.header
62
+ end
63
+
64
+ def footer
65
+ printer.footer
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,3 @@
1
+ require "gmail_extractor/email_formatter/email_csv_formatter"
2
+ require "gmail_extractor/email_formatter/email_xml_formatter"
3
+ require "gmail_extractor/email_formatter/email_html_formatter"
@@ -0,0 +1,25 @@
1
+ require "csv"
2
+
3
+ module GmailExtractor
4
+
5
+ class EmailCsvFormatter
6
+
7
+ attr_accessor :printer
8
+
9
+ def initialize(printer)
10
+ @printer = printer
11
+ end
12
+
13
+ def header; end
14
+
15
+ def footer; end
16
+
17
+ def email(email)
18
+ @printer << [email.date, email.from, email.content].to_csv
19
+ end
20
+
21
+ def label(label,limit); end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,48 @@
1
+ module GmailExtractor
2
+
3
+ # Prints the output as html
4
+ class EmailHtmlFormatter
5
+
6
+ attr_accessor :printer
7
+
8
+ def initialize(printer)
9
+ @printer = printer
10
+ end
11
+
12
+ def header
13
+ result = "<!doctype html><html><head><title>E-Mails</title><meta charset='utf-8'></head><body>"
14
+
15
+ @printer << result
16
+ end
17
+
18
+ def footer
19
+ result = "</body></html>"
20
+
21
+ @printer << result
22
+ @printer.close
23
+ end
24
+
25
+ def email(email)
26
+ result = ""
27
+ result << "<div class='email'>"
28
+ result << "<div class='from'>#{email.from}</div>"
29
+ result << "<div class='date'>#{email.date}</div>"
30
+ result << "<div class='content'>#{email.content}</div>"
31
+ result << "</div>"
32
+
33
+ @printer << result
34
+ end
35
+
36
+ def label(label,limit)
37
+ result = ""
38
+ result << "<div class='label'>"
39
+ result << "<div class='name'>#{label.name}</div>"
40
+ result << "<div class='count'>#{label.count}</div>"
41
+ result << "</div>"
42
+
43
+ @printer << result
44
+ end
45
+
46
+ end
47
+
48
+ end
@@ -0,0 +1,39 @@
1
+ module GmailExtractor
2
+
3
+ class EmailXmlFormatter
4
+
5
+ attr_accessor :printer
6
+
7
+ def initialize(printer)
8
+ @printer = printer
9
+ end
10
+
11
+ def header
12
+ result = "<emails>"
13
+
14
+ @printer << result
15
+ end
16
+
17
+ def footer
18
+ result = "</emails>"
19
+
20
+ @printer << result
21
+ @printer.close
22
+ end
23
+
24
+ def email(email)
25
+ result = ""
26
+ result << "<email>"
27
+ result << "<from>#{email.from}</from>"
28
+ result << "<date>#{email.date}</date>"
29
+ result << "<content>#{email.content}</content>"
30
+ result << "</email>"
31
+
32
+ @printer << result
33
+ end
34
+
35
+ def label(label,limit); end
36
+
37
+ end
38
+
39
+ end
@@ -0,0 +1,36 @@
1
+ require "progressbar"
2
+
3
+ module GmailExtractor
4
+
5
+ # Simple printer proxy to enable tracking the progress with a progress bar transparently.
6
+ class EmailProgressbar
7
+
8
+ attr_accessor :printer
9
+
10
+ def initialize(printer)
11
+ @printer = printer
12
+ end
13
+
14
+ def header
15
+ printer.header
16
+ end
17
+
18
+ def footer
19
+ printer.footer
20
+ @progress_bar.finish
21
+ end
22
+
23
+ def email(email)
24
+ printer.email(email)
25
+ @progress_bar.inc
26
+ end
27
+
28
+ def label(label,limit)
29
+ printer.label(label,limit)
30
+ total_count = limit || label.count
31
+ @progress_bar = ProgressBar.new("emails",total_count)
32
+ end
33
+
34
+ end
35
+
36
+ end
@@ -0,0 +1,2 @@
1
+ require "gmail_extractor/printer/console_printer"
2
+ require "gmail_extractor/printer/file_printer"
@@ -0,0 +1,25 @@
1
+ module GmailExtractor
2
+
3
+ class ConsolePrinter
4
+
5
+ def initialize(printer = nil)
6
+ @printer = printer
7
+ end
8
+
9
+ def << (string)
10
+ puts string
11
+ @printer << string if printer?
12
+ end
13
+
14
+ def close
15
+ @printer.close if printer?
16
+ end
17
+
18
+ private
19
+ def printer?
20
+ @printer
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,19 @@
1
+ module GmailExtractor
2
+
3
+ class FilePrinter
4
+
5
+ def initialize(output_file)
6
+ @output_file = File.open(output_file,"w:utf-8")
7
+ @output_file.sync = true
8
+ end
9
+
10
+ def << (string)
11
+ @output_file << string
12
+ end
13
+
14
+ def close
15
+ @output_file.close
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,3 @@
1
+ module GmailExtractor
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gmail_extractor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Simon Harrer
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-05 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: gmail
16
+ requirement: &29237616 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *29237616
25
+ - !ruby/object:Gem::Dependency
26
+ name: progressbar
27
+ requirement: &29253564 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *29253564
36
+ - !ruby/object:Gem::Dependency
37
+ name: highline
38
+ requirement: &29254740 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *29254740
47
+ description: Downloads emails from Google Mail to XML, CSV or HTML
48
+ email:
49
+ - simon.harrer@gmail.com
50
+ executables:
51
+ - gmail_extractor
52
+ extensions: []
53
+ extra_rdoc_files: []
54
+ files:
55
+ - .gitignore
56
+ - Gemfile
57
+ - README.md
58
+ - Rakefile
59
+ - bin/gmail_extractor
60
+ - gmail_extractor.gemspec
61
+ - lib/gmail_extractor.rb
62
+ - lib/gmail_extractor/email.rb
63
+ - lib/gmail_extractor/email_extractor.rb
64
+ - lib/gmail_extractor/email_formatter.rb
65
+ - lib/gmail_extractor/email_formatter/email_csv_formatter.rb
66
+ - lib/gmail_extractor/email_formatter/email_html_formatter.rb
67
+ - lib/gmail_extractor/email_formatter/email_xml_formatter.rb
68
+ - lib/gmail_extractor/email_progressbar.rb
69
+ - lib/gmail_extractor/printer.rb
70
+ - lib/gmail_extractor/printer/console_printer.rb
71
+ - lib/gmail_extractor/printer/file_printer.rb
72
+ - lib/gmail_extractor/version.rb
73
+ homepage: https://github.com/simonharrer/gmail_extractor
74
+ licenses: []
75
+ post_install_message:
76
+ rdoc_options: []
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ! '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ requirements: []
92
+ rubyforge_project: gmail_extractor
93
+ rubygems_version: 1.8.11
94
+ signing_key:
95
+ specification_version: 3
96
+ summary: Downloads emails from Google Mail to XML, CSV or HTML
97
+ test_files: []