emailparser 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/emailparser.rb +57 -60
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f54662f39ecfbd6bf875b3eefc92a427f7a0958e
|
4
|
+
data.tar.gz: fb42fed00d2a7eed6fed4dd4fb102d55e36a7800
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8869b528a7ee753bac34b8bd685e3f4c96a7b511189de6a9c5b6460b1baa1cb656ed2cb4843486fa3347206c6f59b55112dc713881c2b3c9d740ac4aba08032e
|
7
|
+
data.tar.gz: 868153660b3bf993634a6719311abddae9bcd6ea42215ba41ff2e1f3d2b93fa4af3438a60fea2eaf4240343bdaee15eb0080e72145be7e259454eed983f2aa92
|
data/lib/emailparser.rb
CHANGED
@@ -2,11 +2,12 @@ require 'pry'
|
|
2
2
|
require 'json'
|
3
3
|
require 'mail'
|
4
4
|
require 'digest'
|
5
|
+
require 'pathname'
|
5
6
|
|
6
|
-
class
|
7
|
+
class EmailParser
|
7
8
|
|
8
|
-
def initialize(
|
9
|
-
@
|
9
|
+
def initialize(path, out_dir, attachment_dir)
|
10
|
+
@path = path
|
10
11
|
@attachment_dir = out_dir + "/" + attachment_dir
|
11
12
|
@allowed_documents = [
|
12
13
|
'application/x-mobipocket-ebook',
|
@@ -129,66 +130,61 @@ class Emailparser
|
|
129
130
|
|
130
131
|
# Voodoo to fix nasty encoded strings
|
131
132
|
def fix_encode(text)
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
133
|
+
if text.is_a?(String)
|
134
|
+
text_out = text.to_s.encode('UTF-8', {
|
135
|
+
:invalid => :replace,
|
136
|
+
:undef => :replace,
|
137
|
+
:replace => '?'
|
138
|
+
})
|
139
|
+
return text_out
|
140
|
+
elsif text.is_a?(Array)
|
141
|
+
fixed = []
|
142
|
+
text.each do | item |
|
143
|
+
item_fixed = item.to_s.encode('UTF-8', {
|
144
|
+
:invalid => :replace,
|
145
|
+
:undef => :replace,
|
146
|
+
:replace => '?'
|
147
|
+
})
|
148
|
+
fixed.push(item_fixed)
|
143
149
|
end
|
144
|
-
|
145
|
-
|
146
|
-
return
|
150
|
+
return fixed
|
151
|
+
else
|
152
|
+
return text
|
147
153
|
end
|
148
154
|
end
|
149
155
|
|
150
156
|
def make_attachment_folder(attachments, source_hash)
|
151
157
|
if (!attachments.empty?)
|
152
|
-
puts " -
|
158
|
+
puts "Creating sub-directory: " + source_hash
|
153
159
|
attachments_dir = @attachment_dir + source_hash
|
154
160
|
Dir.mkdir(attachments_dir) if !Dir.exist?(attachments_dir)
|
155
161
|
end
|
156
162
|
end
|
157
163
|
|
158
164
|
def save_attachment(attachment, message_id, filename)
|
159
|
-
puts " - " + filename + "\n"
|
165
|
+
puts " - found attachment " + filename + "\n"
|
160
166
|
begin
|
161
167
|
File.open(@attachment_dir + message_id + "/" + filename, "w+b", 0644) do |f|
|
162
168
|
f.write attachment.body.decoded
|
163
169
|
end
|
164
170
|
rescue => e
|
165
|
-
puts "
|
171
|
+
puts "Unable to save data for #{filename} because #{e.message}"
|
166
172
|
end
|
167
173
|
end
|
168
174
|
|
169
175
|
# Accepts a message
|
170
176
|
def parse_message
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
source_hash = Digest::SHA256.hexdigest(
|
175
|
-
email = Mail.
|
177
|
+
puts "Loading email: " + @path + "\n"
|
178
|
+
email_file = File.read(@path).unpack('C*').pack('U*')
|
179
|
+
source_file = File.basename(@path)
|
180
|
+
source_hash = Digest::SHA256.hexdigest(email_file)
|
181
|
+
email = Mail.new(email_file)
|
176
182
|
|
177
183
|
# Defaults
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
body_html = ""
|
183
|
-
attachments = []
|
184
|
-
|
185
|
-
# Message-ID
|
186
|
-
begin
|
187
|
-
if !email.message_id.nil?
|
188
|
-
message_id = fix_encode(email.message_id)
|
189
|
-
end
|
190
|
-
rescue
|
191
|
-
puts " X - Getting Message-ID failed"
|
184
|
+
if email.message_id.nil?
|
185
|
+
message_id = ""
|
186
|
+
else
|
187
|
+
message_id = fix_encode(email.message_id)
|
192
188
|
end
|
193
189
|
|
194
190
|
# Date
|
@@ -218,23 +214,24 @@ class Emailparser
|
|
218
214
|
recipients = email_to.concat(email_cc)
|
219
215
|
addresses = recipients + email_from
|
220
216
|
rescue
|
221
|
-
puts "
|
217
|
+
puts "oops something failed here..."
|
222
218
|
# binding.pry
|
223
219
|
end
|
224
220
|
|
225
221
|
# Subject
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
rescue
|
231
|
-
puts " X Subject breaks encoding"
|
232
|
-
subject = "Unreadable Subject"
|
222
|
+
if email.subject
|
223
|
+
subject = fix_encode(email.subject)
|
224
|
+
else
|
225
|
+
subject = "No Subject"
|
233
226
|
end
|
234
227
|
|
235
|
-
|
228
|
+
body_plain = ""
|
229
|
+
body_html = ""
|
230
|
+
attachments = []
|
231
|
+
|
232
|
+
# Check for Multipart
|
236
233
|
if email.multipart?
|
237
|
-
puts " -
|
234
|
+
puts " - is multipart\n"
|
238
235
|
if email.text_part
|
239
236
|
body_plain = fix_encode(email.text_part.body.decoded)
|
240
237
|
end
|
@@ -242,12 +239,11 @@ class Emailparser
|
|
242
239
|
body_html = fix_encode(email.html_part.body.decoded)
|
243
240
|
end
|
244
241
|
else
|
245
|
-
puts " -
|
242
|
+
puts " - is single part\n"
|
246
243
|
if !email.content_type.nil? and email.content_type.start_with?('text/html')
|
247
244
|
body_html = fix_encode(email.body.decoded)
|
248
|
-
else
|
249
|
-
body_plain = fix_encode(email.body.decoded)
|
250
245
|
end
|
246
|
+
body_plain = fix_encode(email.body.decoded)
|
251
247
|
end
|
252
248
|
|
253
249
|
# Handle Attachments
|
@@ -256,27 +252,28 @@ class Emailparser
|
|
256
252
|
attachment_save = false
|
257
253
|
filename = fix_encode(attachment.filename)
|
258
254
|
mime_type, remaining = attachment.content_type.split(';', 2)
|
255
|
+
puts " - Attachment mime: " + mime_type
|
259
256
|
# Check Allowed Mime Types
|
260
257
|
if (@allowed_documents.include? mime_type)
|
261
|
-
puts " - Attachment:
|
258
|
+
puts " - Attachment is: document"
|
262
259
|
attachment_save = true
|
263
260
|
elsif (@allowed_spreadsheets.include? mime_type)
|
264
|
-
puts " - Attachment:
|
261
|
+
puts " - Attachment is: spreadsheet"
|
265
262
|
attachment_save = true
|
266
263
|
elsif (@allowed_audio.include? mime_type)
|
267
|
-
puts " - Attachment:
|
264
|
+
puts " - Attachment is: audio"
|
268
265
|
attachment_save = true
|
269
266
|
elsif (@allowed_contacts.include? mime_type)
|
270
|
-
puts " - Attachment:
|
267
|
+
puts " - Attachment is: contact"
|
271
268
|
attachment_save = true
|
272
269
|
elsif (@allowed_images.include? mime_type)
|
273
|
-
puts " - Attachment:
|
270
|
+
puts " - Attachment is: image"
|
274
271
|
attachment_save = true
|
275
272
|
elsif (@allowed_slideshows.include? mime_type)
|
276
|
-
puts " - Attachment:
|
273
|
+
puts " - Attachment is: slideshow"
|
277
274
|
attachment_save = true
|
278
275
|
elsif (@allowed_videos.include? mime_type)
|
279
|
-
puts " - Attachment:
|
276
|
+
puts " - Attachment is: video"
|
280
277
|
attachment_save = true
|
281
278
|
end
|
282
279
|
|
@@ -286,7 +283,7 @@ class Emailparser
|
|
286
283
|
save_attachment(attachment, source_hash, filename)
|
287
284
|
end
|
288
285
|
end
|
289
|
-
|
286
|
+
|
290
287
|
# Structure Data
|
291
288
|
email_data = {
|
292
289
|
source_file: source_file,
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: emailparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brennan Novak
|
@@ -37,7 +37,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
37
37
|
version: '0'
|
38
38
|
requirements: []
|
39
39
|
rubyforge_project:
|
40
|
-
rubygems_version: 2.
|
40
|
+
rubygems_version: 2.6.8
|
41
41
|
signing_key:
|
42
42
|
specification_version: 4
|
43
43
|
summary: Parses a single email file to JSON with attachments
|