emailparser 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/emailparser.rb +57 -60
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9453c8fe2c99ad7fbf91ece0ca61d294bfa2d0b0
4
- data.tar.gz: cc5075498b9b81f18bff729c792e70514d8d78d1
3
+ metadata.gz: f54662f39ecfbd6bf875b3eefc92a427f7a0958e
4
+ data.tar.gz: fb42fed00d2a7eed6fed4dd4fb102d55e36a7800
5
5
  SHA512:
6
- metadata.gz: 4b11c724d3603c5f5775650d5ca21d2d8cb052f7e7068d24f1aa538e268a7db3cca02ec2684b6dde40f0798c3ca0cd901045c4117cffd94e57695570fdda8bf5
7
- data.tar.gz: 60cad1df4447f65f44cad337bff57894ef0e4427f65e357dc0a18a4fd70b57e76db5f29882b38730d50db17f26e749604c2d48126a853ff70a0ce4bc9e714078
6
+ metadata.gz: 8869b528a7ee753bac34b8bd685e3f4c96a7b511189de6a9c5b6460b1baa1cb656ed2cb4843486fa3347206c6f59b55112dc713881c2b3c9d740ac4aba08032e
7
+ data.tar.gz: 868153660b3bf993634a6719311abddae9bcd6ea42215ba41ff2e1f3d2b93fa4af3438a60fea2eaf4240343bdaee15eb0080e72145be7e259454eed983f2aa92
data/lib/emailparser.rb CHANGED
@@ -2,11 +2,12 @@ require 'pry'
2
2
  require 'json'
3
3
  require 'mail'
4
4
  require 'digest'
5
+ require 'pathname'
5
6
 
6
- class Emailparser
7
+ class EmailParser
7
8
 
8
- def initialize(message, out_dir, attachment_dir)
9
- @message = message
9
+ def initialize(path, out_dir, attachment_dir)
10
+ @path = path
10
11
  @attachment_dir = out_dir + "/" + attachment_dir
11
12
  @allowed_documents = [
12
13
  'application/x-mobipocket-ebook',
@@ -129,66 +130,61 @@ class Emailparser
129
130
 
130
131
  # Voodoo to fix nasty encoded strings
131
132
  def fix_encode(text)
132
- begin
133
- if text.is_a?(String)
134
- return text.unpack('C*').pack('U*')
135
- elsif text.is_a?(Array)
136
- fixed = []
137
- text.each do | item |
138
- fixed.push(item.unpack('C*').pack('U*'))
139
- end
140
- return fixed
141
- else
142
- return text
133
+ if text.is_a?(String)
134
+ text_out = text.to_s.encode('UTF-8', {
135
+ :invalid => :replace,
136
+ :undef => :replace,
137
+ :replace => '?'
138
+ })
139
+ return text_out
140
+ elsif text.is_a?(Array)
141
+ fixed = []
142
+ text.each do | item |
143
+ item_fixed = item.to_s.encode('UTF-8', {
144
+ :invalid => :replace,
145
+ :undef => :replace,
146
+ :replace => '?'
147
+ })
148
+ fixed.push(item_fixed)
143
149
  end
144
- rescue
145
- puts " X Fixing encoding failed"
146
- return "Unreadable Text"
150
+ return fixed
151
+ else
152
+ return text
147
153
  end
148
154
  end
149
155
 
150
156
  def make_attachment_folder(attachments, source_hash)
151
157
  if (!attachments.empty?)
152
- puts " - Creating directory: " + source_hash
158
+ puts "Creating sub-directory: " + source_hash
153
159
  attachments_dir = @attachment_dir + source_hash
154
160
  Dir.mkdir(attachments_dir) if !Dir.exist?(attachments_dir)
155
161
  end
156
162
  end
157
163
 
158
164
  def save_attachment(attachment, message_id, filename)
159
- puts " - " + filename + "\n"
165
+ puts " - found attachment " + filename + "\n"
160
166
  begin
161
167
  File.open(@attachment_dir + message_id + "/" + filename, "w+b", 0644) do |f|
162
168
  f.write attachment.body.decoded
163
169
  end
164
170
  rescue => e
165
- puts " X Unable to save data for #{filename} because #{e.message}"
171
+ puts "Unable to save data for #{filename} because #{e.message}"
166
172
  end
167
173
  end
168
174
 
169
175
  # Accepts a message
170
176
  def parse_message
171
-
172
- puts "-" * 70
173
- puts "Loading email: " + @message
174
- source_hash = Digest::SHA256.hexdigest(File.read(@message))
175
- email = Mail.read(@message)
177
+ puts "Loading email: " + @path + "\n"
178
+ email_file = File.read(@path).unpack('C*').pack('U*')
179
+ source_file = File.basename(@path)
180
+ source_hash = Digest::SHA256.hexdigest(email_file)
181
+ email = Mail.new(email_file)
176
182
 
177
183
  # Defaults
178
- source_file = @message.split("/").last
179
- message_id = ""
180
- subject = "No Subject"
181
- body_plain = ""
182
- body_html = ""
183
- attachments = []
184
-
185
- # Message-ID
186
- begin
187
- if !email.message_id.nil?
188
- message_id = fix_encode(email.message_id)
189
- end
190
- rescue
191
- puts " X - Getting Message-ID failed"
184
+ if email.message_id.nil?
185
+ message_id = ""
186
+ else
187
+ message_id = fix_encode(email.message_id)
192
188
  end
193
189
 
194
190
  # Date
@@ -218,23 +214,24 @@ class Emailparser
218
214
  recipients = email_to.concat(email_cc)
219
215
  addresses = recipients + email_from
220
216
  rescue
221
- puts " X Some addresses failed"
217
+ puts "oops something failed here..."
222
218
  # binding.pry
223
219
  end
224
220
 
225
221
  # Subject
226
- begin
227
- if email.subject
228
- subject = fix_encode(email.subject)
229
- end
230
- rescue
231
- puts " X Subject breaks encoding"
232
- subject = "Unreadable Subject"
222
+ if email.subject
223
+ subject = fix_encode(email.subject)
224
+ else
225
+ subject = "No Subject"
233
226
  end
234
227
 
235
- # Body - Check for Multipart
228
+ body_plain = ""
229
+ body_html = ""
230
+ attachments = []
231
+
232
+ # Check for Multipart
236
233
  if email.multipart?
237
- puts " - Multipart message"
234
+ puts " - is multipart\n"
238
235
  if email.text_part
239
236
  body_plain = fix_encode(email.text_part.body.decoded)
240
237
  end
@@ -242,12 +239,11 @@ class Emailparser
242
239
  body_html = fix_encode(email.html_part.body.decoded)
243
240
  end
244
241
  else
245
- puts " - Single part message"
242
+ puts " - is single part\n"
246
243
  if !email.content_type.nil? and email.content_type.start_with?('text/html')
247
244
  body_html = fix_encode(email.body.decoded)
248
- else
249
- body_plain = fix_encode(email.body.decoded)
250
245
  end
246
+ body_plain = fix_encode(email.body.decoded)
251
247
  end
252
248
 
253
249
  # Handle Attachments
@@ -256,27 +252,28 @@ class Emailparser
256
252
  attachment_save = false
257
253
  filename = fix_encode(attachment.filename)
258
254
  mime_type, remaining = attachment.content_type.split(';', 2)
255
+ puts " - Attachment mime: " + mime_type
259
256
  # Check Allowed Mime Types
260
257
  if (@allowed_documents.include? mime_type)
261
- puts " - Attachment: Document"
258
+ puts " - Attachment is: document"
262
259
  attachment_save = true
263
260
  elsif (@allowed_spreadsheets.include? mime_type)
264
- puts " - Attachment: Spreadsheet"
261
+ puts " - Attachment is: spreadsheet"
265
262
  attachment_save = true
266
263
  elsif (@allowed_audio.include? mime_type)
267
- puts " - Attachment: Audio"
264
+ puts " - Attachment is: audio"
268
265
  attachment_save = true
269
266
  elsif (@allowed_contacts.include? mime_type)
270
- puts " - Attachment: Contact"
267
+ puts " - Attachment is: contact"
271
268
  attachment_save = true
272
269
  elsif (@allowed_images.include? mime_type)
273
- puts " - Attachment: Image"
270
+ puts " - Attachment is: image"
274
271
  attachment_save = true
275
272
  elsif (@allowed_slideshows.include? mime_type)
276
- puts " - Attachment: Slideshow"
273
+ puts " - Attachment is: slideshow"
277
274
  attachment_save = true
278
275
  elsif (@allowed_videos.include? mime_type)
279
- puts " - Attachment: Video"
276
+ puts " - Attachment is: video"
280
277
  attachment_save = true
281
278
  end
282
279
 
@@ -286,7 +283,7 @@ class Emailparser
286
283
  save_attachment(attachment, source_hash, filename)
287
284
  end
288
285
  end
289
-
286
+
290
287
  # Structure Data
291
288
  email_data = {
292
289
  source_file: source_file,
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: emailparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brennan Novak
@@ -37,7 +37,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
37
37
  version: '0'
38
38
  requirements: []
39
39
  rubyforge_project:
40
- rubygems_version: 2.5.1
40
+ rubygems_version: 2.6.8
41
41
  signing_key:
42
42
  specification_version: 4
43
43
  summary: Parses a single email file to JSON with attachments