emailparser 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/emailparser.rb +53 -42
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 9453c8fe2c99ad7fbf91ece0ca61d294bfa2d0b0
         | 
| 4 | 
            +
              data.tar.gz: cc5075498b9b81f18bff729c792e70514d8d78d1
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 4b11c724d3603c5f5775650d5ca21d2d8cb052f7e7068d24f1aa538e268a7db3cca02ec2684b6dde40f0798c3ca0cd901045c4117cffd94e57695570fdda8bf5
         | 
| 7 | 
            +
              data.tar.gz: 60cad1df4447f65f44cad337bff57894ef0e4427f65e357dc0a18a4fd70b57e76db5f29882b38730d50db17f26e749604c2d48126a853ff70a0ce4bc9e714078
         | 
    
        data/lib/emailparser.rb
    CHANGED
    
    | @@ -129,54 +129,66 @@ class Emailparser | |
| 129 129 |  | 
| 130 130 | 
             
            	# Voodoo to fix nasty encoded strings
         | 
| 131 131 | 
             
            	def fix_encode(text)
         | 
| 132 | 
            -
            		 | 
| 133 | 
            -
            			 | 
| 134 | 
            -
             | 
| 135 | 
            -
            			 | 
| 136 | 
            -
             | 
| 137 | 
            -
            				 | 
| 132 | 
            +
            		begin
         | 
| 133 | 
            +
            			if text.is_a?(String)
         | 
| 134 | 
            +
            				return text.unpack('C*').pack('U*')
         | 
| 135 | 
            +
            			elsif text.is_a?(Array)
         | 
| 136 | 
            +
            				fixed = []
         | 
| 137 | 
            +
            				text.each do | item |
         | 
| 138 | 
            +
            					fixed.push(item.unpack('C*').pack('U*'))
         | 
| 139 | 
            +
            				end
         | 
| 140 | 
            +
            				return fixed
         | 
| 141 | 
            +
            			else
         | 
| 142 | 
            +
            				return text
         | 
| 138 143 | 
             
            			end
         | 
| 139 | 
            -
             | 
| 140 | 
            -
             | 
| 141 | 
            -
            			return  | 
| 144 | 
            +
            		rescue
         | 
| 145 | 
            +
            			puts " X Fixing encoding failed"
         | 
| 146 | 
            +
            			return "Unreadable Text"
         | 
| 142 147 | 
             
            		end
         | 
| 143 148 | 
             
            	end
         | 
| 144 149 |  | 
| 145 150 | 
             
            	def make_attachment_folder(attachments, source_hash)
         | 
| 146 151 | 
             
            		if (!attachments.empty?)
         | 
| 147 | 
            -
            			puts "Creating  | 
| 152 | 
            +
            			puts " - Creating directory: " + source_hash
         | 
| 148 153 | 
             
            			attachments_dir = @attachment_dir + source_hash
         | 
| 149 154 | 
             
                  		Dir.mkdir(attachments_dir) if !Dir.exist?(attachments_dir)
         | 
| 150 155 | 
             
            		end
         | 
| 151 156 | 
             
            	end
         | 
| 152 157 |  | 
| 153 158 | 
             
            	def save_attachment(attachment, message_id, filename)
         | 
| 154 | 
            -
            		puts " -  | 
| 159 | 
            +
            		puts " - " + filename + "\n"
         | 
| 155 160 | 
             
            		begin
         | 
| 156 161 | 
             
            			File.open(@attachment_dir + message_id + "/" + filename, "w+b", 0644) do |f|
         | 
| 157 162 | 
             
            				f.write attachment.body.decoded 
         | 
| 158 163 | 
             
            			end
         | 
| 159 164 | 
             
            		rescue => e
         | 
| 160 | 
            -
            			puts "Unable to save data for #{filename} because #{e.message}"
         | 
| 165 | 
            +
            			puts " X Unable to save data for #{filename} because #{e.message}"
         | 
| 161 166 | 
             
            		end
         | 
| 162 167 | 
             
            	end
         | 
| 163 168 |  | 
| 164 169 | 
             
            	# Accepts a message
         | 
| 165 170 | 
             
            	def parse_message
         | 
| 166 171 |  | 
| 167 | 
            -
            		puts " | 
| 172 | 
            +
            		puts "-" * 70
         | 
| 173 | 
            +
            		puts "Loading email: " + @message
         | 
| 168 174 | 
             
            		source_hash = Digest::SHA256.hexdigest(File.read(@message))
         | 
| 169 | 
            -
            		puts "SHA256 Hash: " + source_hash
         | 
| 170 | 
            -
             | 
| 171 175 | 
             
            		email = Mail.read(@message)
         | 
| 172 176 |  | 
| 173 177 | 
             
            		# Defaults
         | 
| 174 178 | 
             
            		source_file = @message.split("/").last
         | 
| 175 | 
            -
            		
         | 
| 176 | 
            -
            		 | 
| 177 | 
            -
             | 
| 178 | 
            -
            		 | 
| 179 | 
            -
             | 
| 179 | 
            +
            		message_id = ""
         | 
| 180 | 
            +
            		subject = "No Subject"
         | 
| 181 | 
            +
            		body_plain	= ""
         | 
| 182 | 
            +
            		body_html	= ""
         | 
| 183 | 
            +
            		attachments = []
         | 
| 184 | 
            +
             | 
| 185 | 
            +
            		# Message-ID
         | 
| 186 | 
            +
            		begin
         | 
| 187 | 
            +
            			if !email.message_id.nil?
         | 
| 188 | 
            +
            				message_id = fix_encode(email.message_id)
         | 
| 189 | 
            +
            			end
         | 
| 190 | 
            +
            		rescue
         | 
| 191 | 
            +
            			puts " X - Getting Message-ID failed"
         | 
| 180 192 | 
             
            		end
         | 
| 181 193 |  | 
| 182 194 | 
             
            		# Date
         | 
| @@ -206,24 +218,23 @@ class Emailparser | |
| 206 218 | 
             
            			recipients = email_to.concat(email_cc)
         | 
| 207 219 | 
             
            			addresses = recipients + email_from
         | 
| 208 220 | 
             
            		rescue
         | 
| 209 | 
            -
            			puts " | 
| 221 | 
            +
            			puts " X Some addresses failed"
         | 
| 210 222 | 
             
            			# binding.pry
         | 
| 211 223 | 
             
            		end
         | 
| 212 224 |  | 
| 213 225 | 
             
            		# Subject
         | 
| 214 | 
            -
            		 | 
| 215 | 
            -
            			 | 
| 216 | 
            -
             | 
| 217 | 
            -
            			 | 
| 226 | 
            +
            		begin
         | 
| 227 | 
            +
            			if email.subject
         | 
| 228 | 
            +
            				subject = fix_encode(email.subject)
         | 
| 229 | 
            +
            			end
         | 
| 230 | 
            +
            		rescue
         | 
| 231 | 
            +
            			puts " X Subject breaks encoding"
         | 
| 232 | 
            +
              			subject = "Unreadable Subject"
         | 
| 218 233 | 
             
            		end
         | 
| 219 | 
            -
            		
         | 
| 220 | 
            -
            		body_plain	= ""
         | 
| 221 | 
            -
            		body_html	= ""
         | 
| 222 | 
            -
            		attachments = []
         | 
| 223 234 |  | 
| 224 | 
            -
            		# Check for Multipart
         | 
| 235 | 
            +
            		# Body - Check for Multipart
         | 
| 225 236 | 
             
            		if email.multipart?
         | 
| 226 | 
            -
            			puts " -  | 
| 237 | 
            +
            			puts " - Multipart message"
         | 
| 227 238 | 
             
            			if email.text_part 
         | 
| 228 239 | 
             
            				body_plain = fix_encode(email.text_part.body.decoded)
         | 
| 229 240 | 
             
            			end
         | 
| @@ -231,11 +242,12 @@ class Emailparser | |
| 231 242 | 
             
            				body_html = fix_encode(email.html_part.body.decoded)
         | 
| 232 243 | 
             
            			end
         | 
| 233 244 | 
             
            		else
         | 
| 234 | 
            -
            			puts " -  | 
| 245 | 
            +
            			puts " - Single part message"
         | 
| 235 246 | 
             
            			if !email.content_type.nil? and email.content_type.start_with?('text/html')
         | 
| 236 247 | 
             
            				body_html = fix_encode(email.body.decoded)
         | 
| 248 | 
            +
            			else
         | 
| 249 | 
            +
            				body_plain = fix_encode(email.body.decoded)
         | 
| 237 250 | 
             
            			end
         | 
| 238 | 
            -
            			body_plain = fix_encode(email.body.decoded)
         | 
| 239 251 | 
             
            		end
         | 
| 240 252 |  | 
| 241 253 | 
             
            		# Handle Attachments
         | 
| @@ -244,28 +256,27 @@ class Emailparser | |
| 244 256 | 
             
            			attachment_save = false
         | 
| 245 257 | 
             
            			filename = fix_encode(attachment.filename)
         | 
| 246 258 | 
             
            			mime_type, remaining = attachment.content_type.split(';', 2)
         | 
| 247 | 
            -
            			puts " - Attachment mime: " + mime_type
         | 
| 248 259 | 
             
            			# Check Allowed Mime Types
         | 
| 249 260 | 
             
            			if (@allowed_documents.include? mime_type)
         | 
| 250 | 
            -
            				puts " - Attachment | 
| 261 | 
            +
            				puts " - Attachment: Document"
         | 
| 251 262 | 
             
            				attachment_save = true
         | 
| 252 263 | 
             
            			elsif (@allowed_spreadsheets.include? mime_type)
         | 
| 253 | 
            -
            				puts " - Attachment | 
| 264 | 
            +
            				puts " - Attachment: Spreadsheet"
         | 
| 254 265 | 
             
            				attachment_save = true
         | 
| 255 266 | 
             
            			elsif (@allowed_audio.include? mime_type)
         | 
| 256 | 
            -
            				puts " - Attachment | 
| 267 | 
            +
            				puts " - Attachment: Audio"
         | 
| 257 268 | 
             
            				attachment_save = true
         | 
| 258 269 | 
             
            			elsif (@allowed_contacts.include? mime_type)
         | 
| 259 | 
            -
            				puts " - Attachment | 
| 270 | 
            +
            				puts " - Attachment: Contact"
         | 
| 260 271 | 
             
            				attachment_save = true
         | 
| 261 272 | 
             
            			elsif (@allowed_images.include? mime_type)
         | 
| 262 | 
            -
            				puts " - Attachment | 
| 273 | 
            +
            				puts " - Attachment: Image"
         | 
| 263 274 | 
             
            				attachment_save = true
         | 
| 264 275 | 
             
            			elsif (@allowed_slideshows.include? mime_type)
         | 
| 265 | 
            -
            				puts " - Attachment | 
| 276 | 
            +
            				puts " - Attachment: Slideshow"
         | 
| 266 277 | 
             
            				attachment_save = true
         | 
| 267 278 | 
             
            			elsif (@allowed_videos.include? mime_type)
         | 
| 268 | 
            -
            				puts " - Attachment | 
| 279 | 
            +
            				puts " - Attachment: Video"
         | 
| 269 280 | 
             
            				attachment_save = true
         | 
| 270 281 | 
             
            			end
         | 
| 271 282 |  | 
| @@ -275,7 +286,7 @@ class Emailparser | |
| 275 286 | 
             
            				save_attachment(attachment, source_hash, filename)
         | 
| 276 287 | 
             
            			end
         | 
| 277 288 | 
             
            		end
         | 
| 278 | 
            -
             | 
| 289 | 
            +
            	
         | 
| 279 290 | 
             
            		# Structure Data
         | 
| 280 291 | 
             
            		email_data = {
         | 
| 281 292 | 
             
            			source_file: source_file,
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: emailparser
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.7
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Brennan Novak
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2017-01- | 
| 11 | 
            +
            date: 2017-01-12 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies: []
         | 
| 13 13 | 
             
            description: Parses a signle email file to JSON with attachments
         | 
| 14 14 | 
             
            email: hi@brennannovak.com
         |