emailparser 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/emailparser.rb +57 -60
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9453c8fe2c99ad7fbf91ece0ca61d294bfa2d0b0
4
- data.tar.gz: cc5075498b9b81f18bff729c792e70514d8d78d1
3
+ metadata.gz: f54662f39ecfbd6bf875b3eefc92a427f7a0958e
4
+ data.tar.gz: fb42fed00d2a7eed6fed4dd4fb102d55e36a7800
5
5
  SHA512:
6
- metadata.gz: 4b11c724d3603c5f5775650d5ca21d2d8cb052f7e7068d24f1aa538e268a7db3cca02ec2684b6dde40f0798c3ca0cd901045c4117cffd94e57695570fdda8bf5
7
- data.tar.gz: 60cad1df4447f65f44cad337bff57894ef0e4427f65e357dc0a18a4fd70b57e76db5f29882b38730d50db17f26e749604c2d48126a853ff70a0ce4bc9e714078
6
+ metadata.gz: 8869b528a7ee753bac34b8bd685e3f4c96a7b511189de6a9c5b6460b1baa1cb656ed2cb4843486fa3347206c6f59b55112dc713881c2b3c9d740ac4aba08032e
7
+ data.tar.gz: 868153660b3bf993634a6719311abddae9bcd6ea42215ba41ff2e1f3d2b93fa4af3438a60fea2eaf4240343bdaee15eb0080e72145be7e259454eed983f2aa92
data/lib/emailparser.rb CHANGED
@@ -2,11 +2,12 @@ require 'pry'
2
2
  require 'json'
3
3
  require 'mail'
4
4
  require 'digest'
5
+ require 'pathname'
5
6
 
6
- class Emailparser
7
+ class EmailParser
7
8
 
8
- def initialize(message, out_dir, attachment_dir)
9
- @message = message
9
+ def initialize(path, out_dir, attachment_dir)
10
+ @path = path
10
11
  @attachment_dir = out_dir + "/" + attachment_dir
11
12
  @allowed_documents = [
12
13
  'application/x-mobipocket-ebook',
@@ -129,66 +130,61 @@ class Emailparser
129
130
 
130
131
  # Voodoo to fix nasty encoded strings
131
132
  def fix_encode(text)
132
- begin
133
- if text.is_a?(String)
134
- return text.unpack('C*').pack('U*')
135
- elsif text.is_a?(Array)
136
- fixed = []
137
- text.each do | item |
138
- fixed.push(item.unpack('C*').pack('U*'))
139
- end
140
- return fixed
141
- else
142
- return text
133
+ if text.is_a?(String)
134
+ text_out = text.to_s.encode('UTF-8', {
135
+ :invalid => :replace,
136
+ :undef => :replace,
137
+ :replace => '?'
138
+ })
139
+ return text_out
140
+ elsif text.is_a?(Array)
141
+ fixed = []
142
+ text.each do | item |
143
+ item_fixed = item.to_s.encode('UTF-8', {
144
+ :invalid => :replace,
145
+ :undef => :replace,
146
+ :replace => '?'
147
+ })
148
+ fixed.push(item_fixed)
143
149
  end
144
- rescue
145
- puts " X Fixing encoding failed"
146
- return "Unreadable Text"
150
+ return fixed
151
+ else
152
+ return text
147
153
  end
148
154
  end
149
155
 
150
156
  def make_attachment_folder(attachments, source_hash)
151
157
  if (!attachments.empty?)
152
- puts " - Creating directory: " + source_hash
158
+ puts "Creating sub-directory: " + source_hash
153
159
  attachments_dir = @attachment_dir + source_hash
154
160
  Dir.mkdir(attachments_dir) if !Dir.exist?(attachments_dir)
155
161
  end
156
162
  end
157
163
 
158
164
  def save_attachment(attachment, message_id, filename)
159
- puts " - " + filename + "\n"
165
+ puts " - found attachment " + filename + "\n"
160
166
  begin
161
167
  File.open(@attachment_dir + message_id + "/" + filename, "w+b", 0644) do |f|
162
168
  f.write attachment.body.decoded
163
169
  end
164
170
  rescue => e
165
- puts " X Unable to save data for #{filename} because #{e.message}"
171
+ puts "Unable to save data for #{filename} because #{e.message}"
166
172
  end
167
173
  end
168
174
 
169
175
  # Accepts a message
170
176
  def parse_message
171
-
172
- puts "-" * 70
173
- puts "Loading email: " + @message
174
- source_hash = Digest::SHA256.hexdigest(File.read(@message))
175
- email = Mail.read(@message)
177
+ puts "Loading email: " + @path + "\n"
178
+ email_file = File.read(@path).unpack('C*').pack('U*')
179
+ source_file = File.basename(@path)
180
+ source_hash = Digest::SHA256.hexdigest(email_file)
181
+ email = Mail.new(email_file)
176
182
 
177
183
  # Defaults
178
- source_file = @message.split("/").last
179
- message_id = ""
180
- subject = "No Subject"
181
- body_plain = ""
182
- body_html = ""
183
- attachments = []
184
-
185
- # Message-ID
186
- begin
187
- if !email.message_id.nil?
188
- message_id = fix_encode(email.message_id)
189
- end
190
- rescue
191
- puts " X - Getting Message-ID failed"
184
+ if email.message_id.nil?
185
+ message_id = ""
186
+ else
187
+ message_id = fix_encode(email.message_id)
192
188
  end
193
189
 
194
190
  # Date
@@ -218,23 +214,24 @@ class Emailparser
218
214
  recipients = email_to.concat(email_cc)
219
215
  addresses = recipients + email_from
220
216
  rescue
221
- puts " X Some addresses failed"
217
+ puts "oops something failed here..."
222
218
  # binding.pry
223
219
  end
224
220
 
225
221
  # Subject
226
- begin
227
- if email.subject
228
- subject = fix_encode(email.subject)
229
- end
230
- rescue
231
- puts " X Subject breaks encoding"
232
- subject = "Unreadable Subject"
222
+ if email.subject
223
+ subject = fix_encode(email.subject)
224
+ else
225
+ subject = "No Subject"
233
226
  end
234
227
 
235
- # Body - Check for Multipart
228
+ body_plain = ""
229
+ body_html = ""
230
+ attachments = []
231
+
232
+ # Check for Multipart
236
233
  if email.multipart?
237
- puts " - Multipart message"
234
+ puts " - is multipart\n"
238
235
  if email.text_part
239
236
  body_plain = fix_encode(email.text_part.body.decoded)
240
237
  end
@@ -242,12 +239,11 @@ class Emailparser
242
239
  body_html = fix_encode(email.html_part.body.decoded)
243
240
  end
244
241
  else
245
- puts " - Single part message"
242
+ puts " - is single part\n"
246
243
  if !email.content_type.nil? and email.content_type.start_with?('text/html')
247
244
  body_html = fix_encode(email.body.decoded)
248
- else
249
- body_plain = fix_encode(email.body.decoded)
250
245
  end
246
+ body_plain = fix_encode(email.body.decoded)
251
247
  end
252
248
 
253
249
  # Handle Attachments
@@ -256,27 +252,28 @@ class Emailparser
256
252
  attachment_save = false
257
253
  filename = fix_encode(attachment.filename)
258
254
  mime_type, remaining = attachment.content_type.split(';', 2)
255
+ puts " - Attachment mime: " + mime_type
259
256
  # Check Allowed Mime Types
260
257
  if (@allowed_documents.include? mime_type)
261
- puts " - Attachment: Document"
258
+ puts " - Attachment is: document"
262
259
  attachment_save = true
263
260
  elsif (@allowed_spreadsheets.include? mime_type)
264
- puts " - Attachment: Spreadsheet"
261
+ puts " - Attachment is: spreadsheet"
265
262
  attachment_save = true
266
263
  elsif (@allowed_audio.include? mime_type)
267
- puts " - Attachment: Audio"
264
+ puts " - Attachment is: audio"
268
265
  attachment_save = true
269
266
  elsif (@allowed_contacts.include? mime_type)
270
- puts " - Attachment: Contact"
267
+ puts " - Attachment is: contact"
271
268
  attachment_save = true
272
269
  elsif (@allowed_images.include? mime_type)
273
- puts " - Attachment: Image"
270
+ puts " - Attachment is: image"
274
271
  attachment_save = true
275
272
  elsif (@allowed_slideshows.include? mime_type)
276
- puts " - Attachment: Slideshow"
273
+ puts " - Attachment is: slideshow"
277
274
  attachment_save = true
278
275
  elsif (@allowed_videos.include? mime_type)
279
- puts " - Attachment: Video"
276
+ puts " - Attachment is: video"
280
277
  attachment_save = true
281
278
  end
282
279
 
@@ -286,7 +283,7 @@ class Emailparser
286
283
  save_attachment(attachment, source_hash, filename)
287
284
  end
288
285
  end
289
-
286
+
290
287
  # Structure Data
291
288
  email_data = {
292
289
  source_file: source_file,
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: emailparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brennan Novak
@@ -37,7 +37,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
37
37
  version: '0'
38
38
  requirements: []
39
39
  rubyforge_project:
40
- rubygems_version: 2.5.1
40
+ rubygems_version: 2.6.8
41
41
  signing_key:
42
42
  specification_version: 4
43
43
  summary: Parses a single email file to JSON with attachments