emailparser 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/emailparser.rb +53 -42
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9453c8fe2c99ad7fbf91ece0ca61d294bfa2d0b0
|
4
|
+
data.tar.gz: cc5075498b9b81f18bff729c792e70514d8d78d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b11c724d3603c5f5775650d5ca21d2d8cb052f7e7068d24f1aa538e268a7db3cca02ec2684b6dde40f0798c3ca0cd901045c4117cffd94e57695570fdda8bf5
|
7
|
+
data.tar.gz: 60cad1df4447f65f44cad337bff57894ef0e4427f65e357dc0a18a4fd70b57e76db5f29882b38730d50db17f26e749604c2d48126a853ff70a0ce4bc9e714078
|
data/lib/emailparser.rb
CHANGED
@@ -129,54 +129,66 @@ class Emailparser
|
|
129
129
|
|
130
130
|
# Voodoo to fix nasty encoded strings
|
131
131
|
def fix_encode(text)
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
132
|
+
begin
|
133
|
+
if text.is_a?(String)
|
134
|
+
return text.unpack('C*').pack('U*')
|
135
|
+
elsif text.is_a?(Array)
|
136
|
+
fixed = []
|
137
|
+
text.each do | item |
|
138
|
+
fixed.push(item.unpack('C*').pack('U*'))
|
139
|
+
end
|
140
|
+
return fixed
|
141
|
+
else
|
142
|
+
return text
|
138
143
|
end
|
139
|
-
|
140
|
-
|
141
|
-
return
|
144
|
+
rescue
|
145
|
+
puts " X Fixing encoding failed"
|
146
|
+
return "Unreadable Text"
|
142
147
|
end
|
143
148
|
end
|
144
149
|
|
145
150
|
def make_attachment_folder(attachments, source_hash)
|
146
151
|
if (!attachments.empty?)
|
147
|
-
puts "Creating
|
152
|
+
puts " - Creating directory: " + source_hash
|
148
153
|
attachments_dir = @attachment_dir + source_hash
|
149
154
|
Dir.mkdir(attachments_dir) if !Dir.exist?(attachments_dir)
|
150
155
|
end
|
151
156
|
end
|
152
157
|
|
153
158
|
def save_attachment(attachment, message_id, filename)
|
154
|
-
puts " -
|
159
|
+
puts " - " + filename + "\n"
|
155
160
|
begin
|
156
161
|
File.open(@attachment_dir + message_id + "/" + filename, "w+b", 0644) do |f|
|
157
162
|
f.write attachment.body.decoded
|
158
163
|
end
|
159
164
|
rescue => e
|
160
|
-
puts "Unable to save data for #{filename} because #{e.message}"
|
165
|
+
puts " X Unable to save data for #{filename} because #{e.message}"
|
161
166
|
end
|
162
167
|
end
|
163
168
|
|
164
169
|
# Accepts a message
|
165
170
|
def parse_message
|
166
171
|
|
167
|
-
puts "
|
172
|
+
puts "-" * 70
|
173
|
+
puts "Loading email: " + @message
|
168
174
|
source_hash = Digest::SHA256.hexdigest(File.read(@message))
|
169
|
-
puts "SHA256 Hash: " + source_hash
|
170
|
-
|
171
175
|
email = Mail.read(@message)
|
172
176
|
|
173
177
|
# Defaults
|
174
178
|
source_file = @message.split("/").last
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
179
|
+
message_id = ""
|
180
|
+
subject = "No Subject"
|
181
|
+
body_plain = ""
|
182
|
+
body_html = ""
|
183
|
+
attachments = []
|
184
|
+
|
185
|
+
# Message-ID
|
186
|
+
begin
|
187
|
+
if !email.message_id.nil?
|
188
|
+
message_id = fix_encode(email.message_id)
|
189
|
+
end
|
190
|
+
rescue
|
191
|
+
puts " X - Getting Message-ID failed"
|
180
192
|
end
|
181
193
|
|
182
194
|
# Date
|
@@ -206,24 +218,23 @@ class Emailparser
|
|
206
218
|
recipients = email_to.concat(email_cc)
|
207
219
|
addresses = recipients + email_from
|
208
220
|
rescue
|
209
|
-
puts "
|
221
|
+
puts " X Some addresses failed"
|
210
222
|
# binding.pry
|
211
223
|
end
|
212
224
|
|
213
225
|
# Subject
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
226
|
+
begin
|
227
|
+
if email.subject
|
228
|
+
subject = fix_encode(email.subject)
|
229
|
+
end
|
230
|
+
rescue
|
231
|
+
puts " X Subject breaks encoding"
|
232
|
+
subject = "Unreadable Subject"
|
218
233
|
end
|
219
|
-
|
220
|
-
body_plain = ""
|
221
|
-
body_html = ""
|
222
|
-
attachments = []
|
223
234
|
|
224
|
-
# Check for Multipart
|
235
|
+
# Body - Check for Multipart
|
225
236
|
if email.multipart?
|
226
|
-
puts " -
|
237
|
+
puts " - Multipart message"
|
227
238
|
if email.text_part
|
228
239
|
body_plain = fix_encode(email.text_part.body.decoded)
|
229
240
|
end
|
@@ -231,11 +242,12 @@ class Emailparser
|
|
231
242
|
body_html = fix_encode(email.html_part.body.decoded)
|
232
243
|
end
|
233
244
|
else
|
234
|
-
puts " -
|
245
|
+
puts " - Single part message"
|
235
246
|
if !email.content_type.nil? and email.content_type.start_with?('text/html')
|
236
247
|
body_html = fix_encode(email.body.decoded)
|
248
|
+
else
|
249
|
+
body_plain = fix_encode(email.body.decoded)
|
237
250
|
end
|
238
|
-
body_plain = fix_encode(email.body.decoded)
|
239
251
|
end
|
240
252
|
|
241
253
|
# Handle Attachments
|
@@ -244,28 +256,27 @@ class Emailparser
|
|
244
256
|
attachment_save = false
|
245
257
|
filename = fix_encode(attachment.filename)
|
246
258
|
mime_type, remaining = attachment.content_type.split(';', 2)
|
247
|
-
puts " - Attachment mime: " + mime_type
|
248
259
|
# Check Allowed Mime Types
|
249
260
|
if (@allowed_documents.include? mime_type)
|
250
|
-
puts " - Attachment
|
261
|
+
puts " - Attachment: Document"
|
251
262
|
attachment_save = true
|
252
263
|
elsif (@allowed_spreadsheets.include? mime_type)
|
253
|
-
puts " - Attachment
|
264
|
+
puts " - Attachment: Spreadsheet"
|
254
265
|
attachment_save = true
|
255
266
|
elsif (@allowed_audio.include? mime_type)
|
256
|
-
puts " - Attachment
|
267
|
+
puts " - Attachment: Audio"
|
257
268
|
attachment_save = true
|
258
269
|
elsif (@allowed_contacts.include? mime_type)
|
259
|
-
puts " - Attachment
|
270
|
+
puts " - Attachment: Contact"
|
260
271
|
attachment_save = true
|
261
272
|
elsif (@allowed_images.include? mime_type)
|
262
|
-
puts " - Attachment
|
273
|
+
puts " - Attachment: Image"
|
263
274
|
attachment_save = true
|
264
275
|
elsif (@allowed_slideshows.include? mime_type)
|
265
|
-
puts " - Attachment
|
276
|
+
puts " - Attachment: Slideshow"
|
266
277
|
attachment_save = true
|
267
278
|
elsif (@allowed_videos.include? mime_type)
|
268
|
-
puts " - Attachment
|
279
|
+
puts " - Attachment: Video"
|
269
280
|
attachment_save = true
|
270
281
|
end
|
271
282
|
|
@@ -275,7 +286,7 @@ class Emailparser
|
|
275
286
|
save_attachment(attachment, source_hash, filename)
|
276
287
|
end
|
277
288
|
end
|
278
|
-
|
289
|
+
|
279
290
|
# Structure Data
|
280
291
|
email_data = {
|
281
292
|
source_file: source_file,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: emailparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brennan Novak
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-01-
|
11
|
+
date: 2017-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Parses a signle email file to JSON with attachments
|
14
14
|
email: hi@brennannovak.com
|