emailparser 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/emailparser.rb +57 -60
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f54662f39ecfbd6bf875b3eefc92a427f7a0958e
|
4
|
+
data.tar.gz: fb42fed00d2a7eed6fed4dd4fb102d55e36a7800
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8869b528a7ee753bac34b8bd685e3f4c96a7b511189de6a9c5b6460b1baa1cb656ed2cb4843486fa3347206c6f59b55112dc713881c2b3c9d740ac4aba08032e
|
7
|
+
data.tar.gz: 868153660b3bf993634a6719311abddae9bcd6ea42215ba41ff2e1f3d2b93fa4af3438a60fea2eaf4240343bdaee15eb0080e72145be7e259454eed983f2aa92
|
data/lib/emailparser.rb
CHANGED
@@ -2,11 +2,12 @@ require 'pry'
|
|
2
2
|
require 'json'
|
3
3
|
require 'mail'
|
4
4
|
require 'digest'
|
5
|
+
require 'pathname'
|
5
6
|
|
6
|
-
class
|
7
|
+
class EmailParser
|
7
8
|
|
8
|
-
def initialize(
|
9
|
-
@
|
9
|
+
def initialize(path, out_dir, attachment_dir)
|
10
|
+
@path = path
|
10
11
|
@attachment_dir = out_dir + "/" + attachment_dir
|
11
12
|
@allowed_documents = [
|
12
13
|
'application/x-mobipocket-ebook',
|
@@ -129,66 +130,61 @@ class Emailparser
|
|
129
130
|
|
130
131
|
# Voodoo to fix nasty encoded strings
|
131
132
|
def fix_encode(text)
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
133
|
+
if text.is_a?(String)
|
134
|
+
text_out = text.to_s.encode('UTF-8', {
|
135
|
+
:invalid => :replace,
|
136
|
+
:undef => :replace,
|
137
|
+
:replace => '?'
|
138
|
+
})
|
139
|
+
return text_out
|
140
|
+
elsif text.is_a?(Array)
|
141
|
+
fixed = []
|
142
|
+
text.each do | item |
|
143
|
+
item_fixed = item.to_s.encode('UTF-8', {
|
144
|
+
:invalid => :replace,
|
145
|
+
:undef => :replace,
|
146
|
+
:replace => '?'
|
147
|
+
})
|
148
|
+
fixed.push(item_fixed)
|
143
149
|
end
|
144
|
-
|
145
|
-
|
146
|
-
return
|
150
|
+
return fixed
|
151
|
+
else
|
152
|
+
return text
|
147
153
|
end
|
148
154
|
end
|
149
155
|
|
150
156
|
def make_attachment_folder(attachments, source_hash)
|
151
157
|
if (!attachments.empty?)
|
152
|
-
puts " -
|
158
|
+
puts "Creating sub-directory: " + source_hash
|
153
159
|
attachments_dir = @attachment_dir + source_hash
|
154
160
|
Dir.mkdir(attachments_dir) if !Dir.exist?(attachments_dir)
|
155
161
|
end
|
156
162
|
end
|
157
163
|
|
158
164
|
def save_attachment(attachment, message_id, filename)
|
159
|
-
puts " - " + filename + "\n"
|
165
|
+
puts " - found attachment " + filename + "\n"
|
160
166
|
begin
|
161
167
|
File.open(@attachment_dir + message_id + "/" + filename, "w+b", 0644) do |f|
|
162
168
|
f.write attachment.body.decoded
|
163
169
|
end
|
164
170
|
rescue => e
|
165
|
-
puts "
|
171
|
+
puts "Unable to save data for #{filename} because #{e.message}"
|
166
172
|
end
|
167
173
|
end
|
168
174
|
|
169
175
|
# Accepts a message
|
170
176
|
def parse_message
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
source_hash = Digest::SHA256.hexdigest(
|
175
|
-
email = Mail.
|
177
|
+
puts "Loading email: " + @path + "\n"
|
178
|
+
email_file = File.read(@path).unpack('C*').pack('U*')
|
179
|
+
source_file = File.basename(@path)
|
180
|
+
source_hash = Digest::SHA256.hexdigest(email_file)
|
181
|
+
email = Mail.new(email_file)
|
176
182
|
|
177
183
|
# Defaults
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
body_html = ""
|
183
|
-
attachments = []
|
184
|
-
|
185
|
-
# Message-ID
|
186
|
-
begin
|
187
|
-
if !email.message_id.nil?
|
188
|
-
message_id = fix_encode(email.message_id)
|
189
|
-
end
|
190
|
-
rescue
|
191
|
-
puts " X - Getting Message-ID failed"
|
184
|
+
if email.message_id.nil?
|
185
|
+
message_id = ""
|
186
|
+
else
|
187
|
+
message_id = fix_encode(email.message_id)
|
192
188
|
end
|
193
189
|
|
194
190
|
# Date
|
@@ -218,23 +214,24 @@ class Emailparser
|
|
218
214
|
recipients = email_to.concat(email_cc)
|
219
215
|
addresses = recipients + email_from
|
220
216
|
rescue
|
221
|
-
puts "
|
217
|
+
puts "oops something failed here..."
|
222
218
|
# binding.pry
|
223
219
|
end
|
224
220
|
|
225
221
|
# Subject
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
rescue
|
231
|
-
puts " X Subject breaks encoding"
|
232
|
-
subject = "Unreadable Subject"
|
222
|
+
if email.subject
|
223
|
+
subject = fix_encode(email.subject)
|
224
|
+
else
|
225
|
+
subject = "No Subject"
|
233
226
|
end
|
234
227
|
|
235
|
-
|
228
|
+
body_plain = ""
|
229
|
+
body_html = ""
|
230
|
+
attachments = []
|
231
|
+
|
232
|
+
# Check for Multipart
|
236
233
|
if email.multipart?
|
237
|
-
puts " -
|
234
|
+
puts " - is multipart\n"
|
238
235
|
if email.text_part
|
239
236
|
body_plain = fix_encode(email.text_part.body.decoded)
|
240
237
|
end
|
@@ -242,12 +239,11 @@ class Emailparser
|
|
242
239
|
body_html = fix_encode(email.html_part.body.decoded)
|
243
240
|
end
|
244
241
|
else
|
245
|
-
puts " -
|
242
|
+
puts " - is single part\n"
|
246
243
|
if !email.content_type.nil? and email.content_type.start_with?('text/html')
|
247
244
|
body_html = fix_encode(email.body.decoded)
|
248
|
-
else
|
249
|
-
body_plain = fix_encode(email.body.decoded)
|
250
245
|
end
|
246
|
+
body_plain = fix_encode(email.body.decoded)
|
251
247
|
end
|
252
248
|
|
253
249
|
# Handle Attachments
|
@@ -256,27 +252,28 @@ class Emailparser
|
|
256
252
|
attachment_save = false
|
257
253
|
filename = fix_encode(attachment.filename)
|
258
254
|
mime_type, remaining = attachment.content_type.split(';', 2)
|
255
|
+
puts " - Attachment mime: " + mime_type
|
259
256
|
# Check Allowed Mime Types
|
260
257
|
if (@allowed_documents.include? mime_type)
|
261
|
-
puts " - Attachment:
|
258
|
+
puts " - Attachment is: document"
|
262
259
|
attachment_save = true
|
263
260
|
elsif (@allowed_spreadsheets.include? mime_type)
|
264
|
-
puts " - Attachment:
|
261
|
+
puts " - Attachment is: spreadsheet"
|
265
262
|
attachment_save = true
|
266
263
|
elsif (@allowed_audio.include? mime_type)
|
267
|
-
puts " - Attachment:
|
264
|
+
puts " - Attachment is: audio"
|
268
265
|
attachment_save = true
|
269
266
|
elsif (@allowed_contacts.include? mime_type)
|
270
|
-
puts " - Attachment:
|
267
|
+
puts " - Attachment is: contact"
|
271
268
|
attachment_save = true
|
272
269
|
elsif (@allowed_images.include? mime_type)
|
273
|
-
puts " - Attachment:
|
270
|
+
puts " - Attachment is: image"
|
274
271
|
attachment_save = true
|
275
272
|
elsif (@allowed_slideshows.include? mime_type)
|
276
|
-
puts " - Attachment:
|
273
|
+
puts " - Attachment is: slideshow"
|
277
274
|
attachment_save = true
|
278
275
|
elsif (@allowed_videos.include? mime_type)
|
279
|
-
puts " - Attachment:
|
276
|
+
puts " - Attachment is: video"
|
280
277
|
attachment_save = true
|
281
278
|
end
|
282
279
|
|
@@ -286,7 +283,7 @@ class Emailparser
|
|
286
283
|
save_attachment(attachment, source_hash, filename)
|
287
284
|
end
|
288
285
|
end
|
289
|
-
|
286
|
+
|
290
287
|
# Structure Data
|
291
288
|
email_data = {
|
292
289
|
source_file: source_file,
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: emailparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brennan Novak
|
@@ -37,7 +37,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
37
37
|
version: '0'
|
38
38
|
requirements: []
|
39
39
|
rubyforge_project:
|
40
|
-
rubygems_version: 2.
|
40
|
+
rubygems_version: 2.6.8
|
41
41
|
signing_key:
|
42
42
|
specification_version: 4
|
43
43
|
summary: Parses a single email file to JSON with attachments
|