emailparser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/emailparser.rb +100 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3e6873cbc10f2ca98f4c1eb06998a03723006291
|
4
|
+
data.tar.gz: bf6276064de2d634ae0b295584c6ec17983e07b9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 08d5b376e97ee1d0af8cd75351f38b20a7d52df576e5d9dbafa824a1ee57a53fd9b95d9d29d7e3af9cc369ca5320fc1b469a5cf70ef5fb388453fcb866e4cd6f
|
7
|
+
data.tar.gz: 6a6a4435d7d2dd4056b36cd3fa3e83b7a96a7bfe2d2ed00d72d453ab6fc4a9082fcfebea793039b862c4bf168eab4a0d9cc461da25d82c6e8dd5b59c5cb1c4b3
|
data/emailparser.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'pry'
|
2
|
+
require 'json'
|
3
|
+
require 'mail'
|
4
|
+
|
5
|
+
class Emailparser
|
6
|
+
|
7
|
+
def initialize(message, attachment_dir)
|
8
|
+
@message = message
|
9
|
+
@attachment_dir = attachment_dir
|
10
|
+
end
|
11
|
+
|
12
|
+
# Voodoo to fix nasty encoded strings
|
13
|
+
def fix_encode(str)
|
14
|
+
if str.is_a?(String)
|
15
|
+
return str.unpack('C*').pack('U*')
|
16
|
+
else
|
17
|
+
return str
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Accepts a message
|
22
|
+
def parse_message
|
23
|
+
|
24
|
+
print "loading file: " + @message
|
25
|
+
|
26
|
+
email = Mail.read(@message)
|
27
|
+
|
28
|
+
# Defaults
|
29
|
+
source_file = @message.split("/").last
|
30
|
+
|
31
|
+
if email.subject
|
32
|
+
subject = fix_encode(email.subject)
|
33
|
+
else
|
34
|
+
subject = "No Subject"
|
35
|
+
end
|
36
|
+
|
37
|
+
body_plain = ""
|
38
|
+
body_html = ""
|
39
|
+
attachments = []
|
40
|
+
|
41
|
+
# Check for Multipart
|
42
|
+
if email.multipart?
|
43
|
+
|
44
|
+
# Parse Parts
|
45
|
+
email.parts.map do |part|
|
46
|
+
if (part.content_type.start_with?('text/plain'))
|
47
|
+
body_plain = fix_encode(part.body.decoded)
|
48
|
+
elsif (part.content_type.start_with?('text/html'))
|
49
|
+
body_html = fix_encode(part.body.decoded)
|
50
|
+
elsif (part.content_type.start_with?('multipart/alternative'))
|
51
|
+
print "has multipart/alternative\n"
|
52
|
+
else
|
53
|
+
print "has part: " + part.content_type + "\n"
|
54
|
+
end
|
55
|
+
print "-------------------------------------------------------\n"
|
56
|
+
end
|
57
|
+
|
58
|
+
body_plain = fix_encode(email.text_part.body.decoded)
|
59
|
+
body_html = fix_encode(email.html_part.body.decoded)
|
60
|
+
|
61
|
+
else
|
62
|
+
print "found single part email\n"
|
63
|
+
body_plain = fix_encode(email.body.decoded)
|
64
|
+
body_html = fix_encode(email.body.decoded)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Handle Attachments
|
68
|
+
email.attachments.each do | attachment |
|
69
|
+
if (attachment.content_type.start_with?('image/'))
|
70
|
+
filename = fix_encode(attachment.filename)
|
71
|
+
attachments.push(filename)
|
72
|
+
print "found attachment " + filename + "\n"
|
73
|
+
#begin
|
74
|
+
# File.open(@attachment_dir + filename, "w+b", 0644) do |f|
|
75
|
+
# f.write attachment.body.decoded
|
76
|
+
# end
|
77
|
+
#rescue => e
|
78
|
+
# puts "Unable to save data for #{filename} because #{e.message}"
|
79
|
+
#end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Structure Data
|
84
|
+
email_data = {
|
85
|
+
source_file: source_file,
|
86
|
+
message_id: email.message_id,
|
87
|
+
from: email.from,
|
88
|
+
to: email.to,
|
89
|
+
cc: email.cc,
|
90
|
+
subject: subject,
|
91
|
+
body_plain: body_plain,
|
92
|
+
body_html: body_html,
|
93
|
+
attachments: attachments
|
94
|
+
}
|
95
|
+
|
96
|
+
email_json = JSON.pretty_generate(email_data)
|
97
|
+
return email_json
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: emailparser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Brennan Novak
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-05-16 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Parses a signle email file to JSON
|
14
|
+
email: hi@brennannovak.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- emailparser.rb
|
20
|
+
homepage: https://github.com/TransparencyToolkit/EmailParser
|
21
|
+
licenses:
|
22
|
+
- GPL
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubyforge_project:
|
40
|
+
rubygems_version: 2.4.8
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: Parses a single email file to JSON
|
44
|
+
test_files: []
|