gmail_extractor 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in gmail_extractor.gemspec
4
+ gemspec
@@ -0,0 +1,24 @@
1
+ # GmailExtractor
2
+
3
+ A small command line tool to extract emails from Google Mail (GMail).
4
+
5
+ ## Usage
6
+
7
+ $> gem install gmail_extractor
8
+ $> gmail_extractor --help
9
+
10
+ ## Examples
11
+
12
+ To download all emails within the label LABEL and the GMail account test123:
13
+
14
+ $> gmail_extractor --user test123 --label LABEL
15
+
16
+ ## Development
17
+
18
+ This gem is built ontop of the following gems:
19
+
20
+ * `gmail`
21
+ * `progressbar`
22
+ * `highline`
23
+
24
+ It has been tested on Ruby 1.9.3-p0!
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- mode: ruby -*-
3
+ require "gmail_extractor"
4
+ GmailExtractor.execute
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "gmail_extractor/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "gmail_extractor"
7
+ s.version = GmailExtractor::VERSION
8
+ s.authors = ["Simon Harrer"]
9
+ s.email = ["simon.harrer@gmail.com"]
10
+ s.homepage = 'https://github.com/simonharrer/gmail_extractor'
11
+ s.summary = %q{Downloads emails from Google Mail to XML, CSV or HTML}
12
+ s.description = s.summary
13
+
14
+ s.rubyforge_project = "gmail_extractor"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_runtime_dependency "gmail"
22
+ s.add_runtime_dependency "progressbar"
23
+ s.add_runtime_dependency "highline"
24
+ end
@@ -0,0 +1,100 @@
1
+ # encoding: utf-8
2
+ require "gmail_extractor/version"
3
+
4
+ # data structures
5
+ require "gmail_extractor/email"
6
+
7
+ # formatter
8
+ require "gmail_extractor/email_formatter"
9
+
10
+ # printer
11
+ require "gmail_extractor/printer"
12
+
13
+ # printer decorators
14
+ require "gmail_extractor/email_progressbar"
15
+
16
+ # gmail communication
17
+ require "gmail_extractor/email_extractor"
18
+
19
+ module GmailExtractor
20
+
21
+ def self.execute
22
+ # Reading from command line
23
+ require "optparse"
24
+ options = {}
25
+ OptionParser.new do |opts|
26
+ opts.banner = "Usage: get_mails.rb [options]"
27
+ opts.on("-u","--username USERNAME","Set gmail USERNAME") do |username|
28
+ options[:username] = username
29
+ end
30
+ opts.on("-p","--password PASSWORD","Set gmail PASSWORD") do |password|
31
+ options[:password] = password
32
+ end
33
+ opts.on("-n","--name NAME","Set own NAME; defaults to Self if not given") do |name|
34
+ options[:name] = name || "Self"
35
+ end
36
+ opts.on("-m","--mailbox MAILBOX","Set MAILBOX from where the mails are fetched. Label names can be used too!") do |mailbox|
37
+ options[:mailbox] = mailbox
38
+ end
39
+ opts.on("-l","--limit LIMIT","Set limit LIMIT to how many emails are fetched; defaults to all if not given") do |limit|
40
+ options[:limit] = limit.to_i
41
+ end
42
+ opts.on("-o","--output TYPE","Set output TYPE to xml, csv or html. defaults to csv") do |output|
43
+ options[:output] = output || "csv"
44
+ end
45
+ opts.on("-f","--file FILE","Set output FILE. if not present, only output to STDOUT") do |file|
46
+ options[:file] = file
47
+ end
48
+ opts.on("-pb","--progressbar","Show progressbar on STDOUT") do
49
+ options[:progressbar] = true
50
+ end
51
+ opts.on("-h", "--help", "Show this message") do
52
+ puts opts
53
+ exit
54
+ end
55
+ end.parse!
56
+
57
+ # set printer
58
+ file = options[:file]
59
+ if file
60
+ printer = ConsolePrinter.new(FilePrinter.new(file))
61
+ else
62
+ printer = ConsolePrinter.new
63
+ end
64
+
65
+ # set correct formatter
66
+ if options[:output]
67
+ output = options[:output].downcase
68
+ else
69
+ output = "csv"
70
+ end
71
+
72
+ if output == "xml"
73
+ formatter = EmailXmlFormatter.new(printer)
74
+ elsif output == "csv"
75
+ formatter = EmailCsvFormatter.new(printer)
76
+ elsif output == "html"
77
+ formatter = EmailHtmlFormatter.new(printer)
78
+ else
79
+ raise "Internal error - no output option given"
80
+ end
81
+
82
+ # handle progressbar
83
+ if options[:progressbar]
84
+ formatter = EmailProgressbar.new(formatter)
85
+ end
86
+
87
+ # Asking missing values
88
+ require "highline/import"
89
+ user = options[:username] || ask("Enter user: ")
90
+ password = options[:password] || ask("Enter password: ") { |q| q.echo = false }
91
+ name = options[:name]
92
+ extractor = EmailExtractor.new(user,password,name,formatter)
93
+
94
+ # Ask missing values and extract
95
+ label = options[:mailbox] || ask("Enter mailbox/label: ")
96
+ limit = options[:limit]
97
+ extractor.extract(label, limit)
98
+ end
99
+
100
+ end
@@ -0,0 +1,13 @@
1
+ module GmailExtractor
2
+
3
+ class Email
4
+ attr_accessor :from, :content, :date
5
+
6
+ def initialize(from, date, content)
7
+ @from = from
8
+ @date = date
9
+ @content = content
10
+ end
11
+ end
12
+
13
+ end
@@ -0,0 +1,69 @@
1
+ require "gmail"
2
+ require "date"
3
+
4
+ module GmailExtractor
5
+
6
+ # Controls the extraction
7
+ class EmailExtractor
8
+ attr_accessor :user, :password, :printer, :name
9
+
10
+ def initialize(user, password, name, printer)
11
+ @user = user
12
+ @password = password
13
+ @printer = printer
14
+ @name = name
15
+ end
16
+
17
+ def extract(label, limit)
18
+ connect
19
+ extract_mails_for label, limit
20
+ disconnect
21
+ end
22
+
23
+ private
24
+ def extract_mails_for(label, limit)
25
+ header
26
+ body label, limit
27
+ footer
28
+ end
29
+
30
+ def disconnect
31
+ @gmail.logout
32
+ end
33
+
34
+ def connect
35
+ @gmail = Gmail.connect(user, password)
36
+ raise "password/username wrong" unless @gmail.logged_in?
37
+ end
38
+
39
+ def body(label, limit)
40
+ label = @gmail.label(label)
41
+ printer.label(label, limit)
42
+
43
+ mails = 0
44
+ label.emails do |email|
45
+ from = email.from.first.name || @name
46
+ date = DateTime.parse(email.date)
47
+ content = email.body.to_s.force_encoding("UTF-8")
48
+
49
+ mail = Email.new(from, date, content)
50
+
51
+ printer.email(mail)
52
+
53
+ mails += 1
54
+ if limit and mails == limit
55
+ break
56
+ end
57
+ end
58
+ end
59
+
60
+ def header
61
+ printer.header
62
+ end
63
+
64
+ def footer
65
+ printer.footer
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,3 @@
1
+ require "gmail_extractor/email_formatter/email_csv_formatter"
2
+ require "gmail_extractor/email_formatter/email_xml_formatter"
3
+ require "gmail_extractor/email_formatter/email_html_formatter"
@@ -0,0 +1,25 @@
1
+ require "csv"
2
+
3
+ module GmailExtractor
4
+
5
+ class EmailCsvFormatter
6
+
7
+ attr_accessor :printer
8
+
9
+ def initialize(printer)
10
+ @printer = printer
11
+ end
12
+
13
+ def header; end
14
+
15
+ def footer; end
16
+
17
+ def email(email)
18
+ @printer << [email.date, email.from, email.content].to_csv
19
+ end
20
+
21
+ def label(label,limit); end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,48 @@
1
+ module GmailExtractor
2
+
3
+ # Prints the output as html
4
+ class EmailHtmlFormatter
5
+
6
+ attr_accessor :printer
7
+
8
+ def initialize(printer)
9
+ @printer = printer
10
+ end
11
+
12
+ def header
13
+ result = "<!doctype html><html><head><title>E-Mails</title><meta charset='utf-8'></head><body>"
14
+
15
+ @printer << result
16
+ end
17
+
18
+ def footer
19
+ result = "</body></html>"
20
+
21
+ @printer << result
22
+ @printer.close
23
+ end
24
+
25
+ def email(email)
26
+ result = ""
27
+ result << "<div class='email'>"
28
+ result << "<div class='from'>#{email.from}</div>"
29
+ result << "<div class='date'>#{email.date}</div>"
30
+ result << "<div class='content'>#{email.content}</div>"
31
+ result << "</div>"
32
+
33
+ @printer << result
34
+ end
35
+
36
+ def label(label,limit)
37
+ result = ""
38
+ result << "<div class='label'>"
39
+ result << "<div class='name'>#{label.name}</div>"
40
+ result << "<div class='count'>#{label.count}</div>"
41
+ result << "</div>"
42
+
43
+ @printer << result
44
+ end
45
+
46
+ end
47
+
48
+ end
@@ -0,0 +1,39 @@
1
+ module GmailExtractor
2
+
3
+ class EmailXmlFormatter
4
+
5
+ attr_accessor :printer
6
+
7
+ def initialize(printer)
8
+ @printer = printer
9
+ end
10
+
11
+ def header
12
+ result = "<emails>"
13
+
14
+ @printer << result
15
+ end
16
+
17
+ def footer
18
+ result = "</emails>"
19
+
20
+ @printer << result
21
+ @printer.close
22
+ end
23
+
24
+ def email(email)
25
+ result = ""
26
+ result << "<email>"
27
+ result << "<from>#{email.from}</from>"
28
+ result << "<date>#{email.date}</date>"
29
+ result << "<content>#{email.content}</content>"
30
+ result << "</email>"
31
+
32
+ @printer << result
33
+ end
34
+
35
+ def label(label,limit); end
36
+
37
+ end
38
+
39
+ end
@@ -0,0 +1,36 @@
1
+ require "progressbar"
2
+
3
+ module GmailExtractor
4
+
5
+ # Simple printer proxy to enable tracking the progress with a progress bar transparently.
6
+ class EmailProgressbar
7
+
8
+ attr_accessor :printer
9
+
10
+ def initialize(printer)
11
+ @printer = printer
12
+ end
13
+
14
+ def header
15
+ printer.header
16
+ end
17
+
18
+ def footer
19
+ printer.footer
20
+ @progress_bar.finish
21
+ end
22
+
23
+ def email(email)
24
+ printer.email(email)
25
+ @progress_bar.inc
26
+ end
27
+
28
+ def label(label,limit)
29
+ printer.label(label,limit)
30
+ total_count = limit || label.count
31
+ @progress_bar = ProgressBar.new("emails",total_count)
32
+ end
33
+
34
+ end
35
+
36
+ end
@@ -0,0 +1,2 @@
1
+ require "gmail_extractor/printer/console_printer"
2
+ require "gmail_extractor/printer/file_printer"
@@ -0,0 +1,25 @@
1
+ module GmailExtractor
2
+
3
+ class ConsolePrinter
4
+
5
+ def initialize(printer = nil)
6
+ @printer = printer
7
+ end
8
+
9
+ def << (string)
10
+ puts string
11
+ @printer << string if printer?
12
+ end
13
+
14
+ def close
15
+ @printer.close if printer?
16
+ end
17
+
18
+ private
19
+ def printer?
20
+ @printer
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,19 @@
1
+ module GmailExtractor
2
+
3
+ class FilePrinter
4
+
5
+ def initialize(output_file)
6
+ @output_file = File.open(output_file,"w:utf-8")
7
+ @output_file.sync = true
8
+ end
9
+
10
+ def << (string)
11
+ @output_file << string
12
+ end
13
+
14
+ def close
15
+ @output_file.close
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,3 @@
1
+ module GmailExtractor
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gmail_extractor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Simon Harrer
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-05 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: gmail
16
+ requirement: &29237616 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *29237616
25
+ - !ruby/object:Gem::Dependency
26
+ name: progressbar
27
+ requirement: &29253564 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *29253564
36
+ - !ruby/object:Gem::Dependency
37
+ name: highline
38
+ requirement: &29254740 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *29254740
47
+ description: Downloads emails from Google Mail to XML, CSV or HTML
48
+ email:
49
+ - simon.harrer@gmail.com
50
+ executables:
51
+ - gmail_extractor
52
+ extensions: []
53
+ extra_rdoc_files: []
54
+ files:
55
+ - .gitignore
56
+ - Gemfile
57
+ - README.md
58
+ - Rakefile
59
+ - bin/gmail_extractor
60
+ - gmail_extractor.gemspec
61
+ - lib/gmail_extractor.rb
62
+ - lib/gmail_extractor/email.rb
63
+ - lib/gmail_extractor/email_extractor.rb
64
+ - lib/gmail_extractor/email_formatter.rb
65
+ - lib/gmail_extractor/email_formatter/email_csv_formatter.rb
66
+ - lib/gmail_extractor/email_formatter/email_html_formatter.rb
67
+ - lib/gmail_extractor/email_formatter/email_xml_formatter.rb
68
+ - lib/gmail_extractor/email_progressbar.rb
69
+ - lib/gmail_extractor/printer.rb
70
+ - lib/gmail_extractor/printer/console_printer.rb
71
+ - lib/gmail_extractor/printer/file_printer.rb
72
+ - lib/gmail_extractor/version.rb
73
+ homepage: https://github.com/simonharrer/gmail_extractor
74
+ licenses: []
75
+ post_install_message:
76
+ rdoc_options: []
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ! '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ requirements: []
92
+ rubyforge_project: gmail_extractor
93
+ rubygems_version: 1.8.11
94
+ signing_key:
95
+ specification_version: 3
96
+ summary: Downloads emails from Google Mail to XML, CSV or HTML
97
+ test_files: []