mms2r 3.7.1 → 3.8.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- mms2r (3.7.1)
4
+ mms2r (3.8.0)
5
5
  exifr (>= 1.0.3)
6
6
  json (>= 1.6.0)
7
7
  mail (>= 2.4.0)
@@ -19,11 +19,11 @@ GEM
19
19
  mime-types (~> 1.16)
20
20
  treetop (~> 1.4.8)
21
21
  metaclass (0.0.1)
22
- mime-types (1.18)
22
+ mime-types (1.19)
23
23
  mocha (0.11.4)
24
24
  metaclass (~> 0.0.1)
25
25
  multi_json (1.3.5)
26
- nokogiri (1.5.2)
26
+ nokogiri (1.5.5)
27
27
  polyglot (0.3.3)
28
28
  rake (0.9.2.2)
29
29
  rdoc (3.12)
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ ### 3.8.0 / 2012-07-04 (Dr. Donald Gorfield – Comedy specialist)
2
+
3
+ * 1 major enhancement
4
+ * Handle MMS from Sprint that have their media attached rather than CDN'd
5
+
1
6
  ### 3.7.1 / 2012-06-04 (Abrigail Remeltindtdrinc - The Record Cleaner)
2
7
 
3
8
  * 2 minor enhancements
data/README.rdoc CHANGED
@@ -9,7 +9,7 @@ https://github.com/monde/mms2r
9
9
  https://rubygems.org/gems/mms2r
10
10
  http://peepcode.com/products/mms2r-pdf
11
11
 
12
- MMS2R is a library that decodes the parts of an MMS message to disk while
12
+ MMS2R is a library that decodes the parts of a MMS message to disk while
13
13
  stripping out advertising injected by the mobile carriers. MMS messages are
14
14
  multipart email and the carriers often inject branding into these messages. Use
15
15
  MMS2R if you want to get at the real user generated content from a MMS without
@@ -4,6 +4,8 @@ ignore:
4
4
  - !ruby/regexp /We're sorry, this page is not available. We apologize for the inconvenience./mi
5
5
  text/plain:
6
6
  - !ruby/regexp /You have new [Picture|Video] Mail!\s+Click Go\/View to see now./mi
7
+ - !ruby/regexp /You have received a Picture Mail from /mi
8
+ - !ruby/regexp /Click Go\/View to see/mi
7
9
  transform:
8
10
  text/plain:
9
11
  - - !ruby/regexp /^You have new Picture Mail!$/i
data/lib/mms2r.rb CHANGED
@@ -42,14 +42,67 @@ module MMS2R
42
42
  USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2"
43
43
  end
44
44
 
45
+ ##
45
46
  # Simple convenience function to make it a one-liner:
46
- # MMS2R.parse raw_mail or MMS2R.parse File.load(raw_mail)
47
+ # MMS2R.parse raw_mail or
48
+ # MMS2R.parse File.load(file)
49
+ # MMS2R.parse File.load(path_to_file)
47
50
  # Combined w/ the method_missing delegation, this should behave as an enhanced Mail object, more or less.
48
- def self.parse raw_mail, options = {}
49
- mail = Mail.new raw_mail
51
+
52
+ def self.parse thing, options = {}
53
+ mail = case
54
+ when File.exist?(thing); Mail.new(open(thing).read)
55
+ when thing.respond_to?(:read); Mail.new(thing.read)
56
+ else
57
+ Mail.new(thing)
58
+ end
59
+
50
60
  MMS2R::Media.new(mail, options)
51
61
  end
52
62
 
63
+ ##
64
+ # Compare original MMS2R results with original mail values and other metrics.
65
+ #
66
+ # Takes a file path, mms2r object, mail object, or mail text blob.
67
+
68
+ def self.debug(thing, options = {})
69
+ mms = case thing
70
+ when MMS2R::Media; thing
71
+ when Mail::Message; MMS2R::Media.new(thing, options)
72
+ else
73
+ self.parse(thing, options)
74
+ end
75
+
76
+ <<OUT
77
+ #{'-' * 80}
78
+
79
+ original mail
80
+ #{'from:'.ljust(15)} #{mms.mail.from}
81
+ #{'to:'.ljust(15)} #{mms.mail.to}
82
+ #{'subject:'.ljust(15)} #{mms.mail.subject}
83
+
84
+ mms2r
85
+ #{'from:'.ljust(15)} #{mms.from}
86
+ #{'to:'.ljust(15)} #{mms.to}
87
+ #{'subject:'.ljust(15)} #{mms.subject}
88
+ #{'number:'.ljust(15)} #{mms.number}
89
+
90
+ default media
91
+ #{mms.default_media.inspect}
92
+
93
+ default text
94
+ #{mms.default_text.inspect}
95
+ #{mms.default_text.read if mms.default_text}
96
+
97
+ all plain text
98
+ #{(mms.media['text/plain']||[]).each {|t| open(t).read}.join("\n\n")}
99
+
100
+ media hash
101
+ #{mms.media.inspect}
102
+
103
+ OUT
104
+ end
105
+
53
106
  end
54
107
 
55
108
  %W{ yaml mail fileutils pathname tmpdir yaml digest/sha1 iconv exifr }.each do |g|
data/lib/mms2r/media.rb CHANGED
@@ -343,33 +343,16 @@ module MMS2R
343
343
  # note: purge must be explicitly called to remove the media files
344
344
  # mms2r extracts from an mms message.
345
345
 
346
- def process() # :yields: media_type, file
346
+ def process # :yields: media_type, file
347
347
  unless @was_processed
348
348
  log("#{self.class} processing", :info)
349
349
 
350
- parts = mail.multipart? ? mail.parts : [mail]
351
-
352
- # Double check for multipart/related, if it exists replace it with its
353
- # children parts. Do this twice as multipart/alternative can have
354
- # children and we want to fold everything down
355
- for i in 1..2
356
- flat = []
357
- parts.each do |p|
358
- if p.multipart?
359
- p.parts.each {|mp| flat << mp }
360
- else
361
- flat << p
362
- end
363
- end
364
- parts = flat.dup
365
- end
366
-
367
- # get to work
368
- parts.each do |p|
369
- t = p.part_type?
370
- unless ignore_media?(t,p)
371
- t,f = process_media(p)
372
- add_file(t,f) unless t.nil? || f.nil?
350
+ parts = self.folded_parts(mail)
351
+ parts.each do |part|
352
+ if part.part_type? == 'text/html'
353
+ process_html_part(part)
354
+ else
355
+ process_part(part)
373
356
  end
374
357
  end
375
358
 
@@ -436,6 +419,25 @@ module MMS2R
436
419
  return type, file
437
420
  end
438
421
 
422
+ ##
423
+ # Helper to decide if a part should be kept or ignored
424
+
425
+ def process_part(part)
426
+ return if ignore_media?(part.part_type?, part)
427
+
428
+ type, file = process_media(part)
429
+ add_file(type, file) unless type.nil? || file.nil?
430
+ end
431
+
432
+ ##
433
+ # Helper to decide if a html part should be kept or ignored.
434
+ # We are defining it here primarily for the benefit so that Sprint
435
+ # can override a special case for processing.
436
+
437
+ def process_html_part(part)
438
+ process_part(part)
439
+ end
440
+
439
441
  ##
440
442
  # Helper for process_media template method to transform text.
441
443
  # See the transform section in the discussion of the built-in
@@ -490,7 +492,7 @@ module MMS2R
490
492
  # Purges the unique MMS2R::Media.media_dir directory created
491
493
  # for this producer and all of the media that it contains.
492
494
 
493
- def purge()
495
+ def purge
494
496
  log("#{self.class} purging #{@media_dir} and all its contents", :info)
495
497
  FileUtils.rm_rf(@media_dir)
496
498
  end
@@ -507,7 +509,7 @@ module MMS2R
507
509
  # Helper to temp_file to create a unique temporary directory that is a
508
510
  # child of tmp_dir This version is based on the message_id of the mail.
509
511
 
510
- def msg_tmp_dir()
512
+ def msg_tmp_dir
511
513
  @dir_count += 1
512
514
  dir = File.expand_path(File.join(@media_dir, "#{@dir_count}"))
513
515
  FileUtils.mkdir_p(dir)
@@ -762,7 +764,7 @@ module MMS2R
762
764
  self.class.initialize_config(config)
763
765
  end
764
766
 
765
- private
767
+ protected
766
768
 
767
769
  ##
768
770
  # accessor for the config
@@ -780,6 +782,19 @@ module MMS2R
780
782
  ent.nil? ? nil : ent.first
781
783
  end
782
784
 
785
+ ##
786
+ # Helper to fold all the parts of multipart mail down into a flat array.
787
+ # multipart/related and multipart/alternative parts can have child parts.
788
+ def folded_parts(parts)
789
+ return folded_parts([parts]) unless parts.respond_to?(:each)
790
+
791
+ result = [] # NOTE could use #tap but want 1.8.7 compat
792
+ parts.each do |part|
793
+ result << (part.multipart? ? folded_parts(part.parts) : part)
794
+ end
795
+ result.flatten
796
+ end
797
+
783
798
  ##
784
799
  # used by #default_media and #text to return the biggest attachment type
785
800
  # listed in the types array
@@ -28,53 +28,25 @@ module MMS2R
28
28
 
29
29
  module Sprint
30
30
 
31
- ##
32
- # Override process() because Sprint doesn't attach media (images, video,
33
- # etc.) to its MMS. Media such as images and videos are hosted on a
34
- # Sprint content server. MMS2R::Media::Sprint has to pick apart an
35
- # HTML attachment to find the URL to the media on Sprint's content
36
- # server and download each piece of content. Any text message part of
37
- # the MMS if it exists is embedded in the html.
38
-
39
- def process
40
- unless @was_processed
41
- log("#{self.class} processing", :info)
42
- #sprint MMS are multipart
43
- parts = @mail.parts
44
-
45
- #find the payload html
46
- doc = nil
47
- parts.each do |p|
48
- next unless p.part_type? == 'text/html'
49
- d = Nokogiri(p.body.decoded)
50
- title = d.at('title').inner_html
51
- if title =~ /You have new Picture Mail!/
52
- doc = d
53
- @is_video = (p.body.decoded =~ /type=&quot;VIDEO&quot;&gt;/m ? true : false)
54
- end
55
- end
56
- return if doc.nil? # it was a dud
57
- @is_video ||= false
58
-
59
- # break it down
60
- sprint_phone_number(doc)
61
- sprint_process_text(doc)
62
- sprint_process_media(doc)
63
-
64
- @was_processed = true
65
- end
66
-
67
- # when process acts upon a block
68
- if block_given?
69
- media.each do |k, v|
70
- yield(k, v)
71
- end
72
- end
31
+ protected
73
32
 
33
+ ##
34
+ # Helper to process old style media on the Sprint CDN which didn't attach
35
+ # media (images, video, etc.) to its MMS. Media such as images and
36
+ # videos are hosted on a Sprint content server. MMS2R::Media::Sprint has
37
+ # to pick apart an HTML attachment to find the URL to the media on
38
+ # Sprint's content server and download each piece of content. Any text
39
+ # message part of the MMS if it exists is embedded in the html.
40
+
41
+ def process_html_part(part)
42
+ doc = Nokogiri(part.body.decoded)
43
+
44
+ is_video = (part.body.decoded =~ /type=&quot;VIDEO&quot;&gt;/m ? true : false)
45
+ sprint_process_media(doc, is_video)
46
+ sprint_process_text(doc)
47
+ sprint_phone_number(doc)
74
48
  end
75
49
 
76
- private
77
-
78
50
  ##
79
51
  # Digs out where Sprint hides the phone number
80
52
 
@@ -82,7 +54,8 @@ module MMS2R
82
54
  c = doc.search("/html/head/comment()").last
83
55
  t = c.content.gsub(/\s+/m," ").strip
84
56
  #@number returned in parent's #number
85
- @number = / name=&quot;MDN&quot;&gt;(\d+)&lt;/.match(t)[1]
57
+ matched = / name=&quot;MDN&quot;&gt;(\d+)&lt;/.match(t)
58
+ @number = matched[1] if matched
86
59
  end
87
60
 
88
61
  ##
@@ -157,7 +130,7 @@ module MMS2R
157
130
  ##
158
131
  # Fetch all the media that is referred to in the doc
159
132
 
160
- def sprint_process_media(doc)
133
+ def sprint_process_media(doc, is_video=false)
161
134
  srcs = Array.new
162
135
  # collect all the images in the document, even though
163
136
  # they are <img> tag some might actually refer to video.
@@ -183,14 +156,14 @@ module MMS2R
183
156
  begin
184
157
 
185
158
  uri = URI.parse(CGI.unescapeHTML(src))
186
- unless @is_video
159
+ unless is_video
187
160
  query={}
188
161
  uri.query.split('&').each{|a| p=a.split('='); query[p[0]] = p[1]}
189
162
  query.delete_if{|k, v| k == 'limitsize' || k == 'squareoutput' }
190
163
  uri.query = query.map{|k,v| "#{k}=#{v}"}.join("&")
191
164
  end
192
165
  # sprint is a ghetto, they expect to see &amp; for video request
193
- uri.query = uri.query.gsub(/&/, "&amp;") if @is_video
166
+ uri.query = uri.query.gsub(/&/, "&amp;") if is_video
194
167
 
195
168
  connection = Net::HTTP.new(uri.host, uri.port)
196
169
  #connection.set_debug_output $stdout
data/lib/mms2r/version.rb CHANGED
@@ -6,11 +6,11 @@ module MMS2R
6
6
  end
7
7
 
8
8
  def self.minor
9
- 7
9
+ 8
10
10
  end
11
11
 
12
12
  def self.patch
13
- 1
13
+ 0
14
14
  end
15
15
 
16
16
  def self.pre
data/mms2r.gemspec CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |gem|
21
21
  gem.email = ["mikemondragon@gmail.com"]
22
22
  gem.homepage = "https://github.com/monde/mms2r"
23
23
  gem.summary = "Extract user media from MMS (and not carrier cruft)"
24
- gem.description = "MMS2R is a library that decodes the parts of an MMS message to disk while stripping out advertising injected by the mobile carriers."
24
+ gem.description = "MMS2R is a library that decodes the parts of a MMS message to disk while stripping out advertising injected by the mobile carriers."
25
25
  gem.rubyforge_project = "mms2r"
26
26
  gem.rubygems_version = ">= 1.3.6"
27
27
  gem.files = `git ls-files`.split("\n")
@@ -311,18 +311,10 @@ class TestMms2rMedia < Test::Unit::TestCase
311
311
 
312
312
  def test_attachment_should_return_duck_typed_file
313
313
  mms = MMS2R::Media.new stub_mail
314
- temp_big = temp_text_file("hello world")
315
- size = File.size(temp_text_file("hello world"))
316
- temp_small = temp_text_file("hello")
317
- mms.stubs(:media).returns({'text/plain' => [temp_small, temp_big]})
318
314
  duck_file = mms.send(:attachment, ['text'])
319
- assert_not_nil duck_file
320
- assert_equal true, File::exist?(duck_file)
321
- assert_equal true, File::exist?(temp_big)
322
- assert_equal temp_big, duck_file.local_path
323
- assert_equal File.basename(temp_big), duck_file.original_filename
324
- assert_equal size, duck_file.size
315
+ assert_equal 1, duck_file.size
325
316
  assert_equal 'text/plain', duck_file.content_type
317
+ assert_equal "a", open(mms.media['text/plain'].first).read
326
318
  end
327
319
 
328
320
  def test_empty_body
@@ -632,86 +624,24 @@ class TestMms2rMedia < Test::Unit::TestCase
632
624
  mms.purge
633
625
  end
634
626
 
635
- def test_process_with_multipart_alternative_parts
636
- mail = stub_mail
637
-
638
- plain = stub('plain', :filename => 'message.txt', :content_type => 'text/plain', :part_type? => 'text/plain', :body => Mail::Body.new('a'), :main_type => 'text')
639
- plain.stubs(:multipart?).at_least_once.returns(false)
640
-
641
- html = stub('html', :filename => 'message.html', :content_type => 'text/html', :part_type? => 'text/html', :body => Mail::Body.new('a'), :main_type => 'text')
642
- html.stubs(:multipart?).at_least_once.returns(false)
643
-
644
- multi = stub('multi', :content_type => 'multipart/alternative', :part_type? => 'multipart/alternative', :parts => [plain, html])
645
- multi.stubs(:multipart?).at_least_once.returns(true)
646
-
647
- mail.stubs(:multipart?).at_least_once.returns(true)
648
- mail.stubs(:parts).at_least_once.returns([multi])
649
-
650
- # the multipart/alternative should get flattend to text and html
651
- mms = MMS2R::Media.new(mail)
652
- assert_equal 2, mms.media.size
653
- assert_equal 2, mms.media.size
654
- assert_not_nil mms.media['text/plain']
655
- assert_not_nil mms.media['text/html']
656
- assert_equal 1, mms.media['text/plain'].size
657
- assert_equal 1, mms.media['text/html'].size
658
- assert_equal 'message.txt', File.basename(mms.media['text/plain'].first)
659
- assert_equal 'message.html', File.basename(mms.media['text/html'].first)
660
- assert_equal true, File.exist?(mms.media['text/plain'].first)
661
- assert_equal true, File.exist?(mms.media['text/html'].first)
662
- assert_equal 1, File.size(mms.media['text/plain'].first)
663
- assert_equal 1, File.size(mms.media['text/html'].first)
664
- mms.purge
627
+ def test_folding_with_multipart_alternative_parts
628
+ mail = mail('helio-message-01.mail')
629
+ mms = MMS2R::Media.new(Mail.new)
630
+ assert_equal 5, mms.send(:folded_parts, mail.parts).size
665
631
  end
666
632
 
667
633
  def test_process_when_media_is_ignored
668
- mail = stub_mail
669
- plain = stub('plain', :filename => 'message.txt', :content_type => 'text/plain', :part_type? => 'text/plain', :body => Mail::Body.new(''), :main_type => 'text')
670
- plain.stubs(:multipart?).at_least_once.returns(false)
671
-
672
- html = stub('html', :filename => 'message.html', :content_type => 'text/html', :part_type? => 'text/html', :body => Mail::Body.new(''), :main_type => 'text')
673
- html.stubs(:multipart?).at_least_once.returns(false)
674
-
675
-
676
- multi = stub('multi', :content_type => 'multipart/alternative', :part_type? => 'multipart/alternative', :parts => [plain, html])
677
- multi.stubs(:multipart?).at_least_once.returns(true)
678
-
679
- mail.stubs(:multipart?).at_least_once.returns(true)
680
- mail.stubs(:parts).at_least_once.returns([multi])
681
-
682
- mms = MMS2R::Media.new(mail, :process => :lazy)
683
- mms.stubs(:config).returns({'ignore' => {'text/plain' => ['message.txt'],
684
- 'text/html' => ['message.html']}})
685
- assert_nothing_raised { mms.process }
686
- # the multipart/alternative should get flattend to text and html and then
687
- # what's flattened is ignored
688
- assert_equal 0, mms.media.size
689
- mms.purge
634
+ # TODO - I'd like to get away from mocks and test on real data, and
635
+ # this is covered repeatedly for various samples from the carrier
690
636
  end
691
637
 
692
638
  def test_process_when_yielding_to_a_block
693
- mail = stub_mail
694
-
695
- plain = stub('plain', :filename => 'message.txt', :content_type => 'text/plain', :part_type? => 'text/plain', :body => Mail::Body.new('a'), :main_type => 'text')
696
- plain.stubs(:multipart?).at_least_once.returns(false)
697
-
698
- html = stub('html', :filename => 'message.html', :content_type => 'text/html', :part_type? => 'text/html', :body => Mail::Body.new('b'), :main_type => 'text')
699
- html.stubs(:multipart?).at_least_once.returns(false)
700
-
701
- multi = stub('multi', :content_type => 'multipart/alternative', :part_type? => 'multipart/alternative', :parts => [plain, html])
702
- multi.stubs(:multipart?).at_least_once.returns(true)
703
-
704
- mail.stubs(:multipart?).at_least_once.returns(true)
705
- mail.stubs(:parts).at_least_once.returns([multi])
706
-
707
- # the multipart/alternative should get flattend to text and html
639
+ mail = mail('att-image-01.mail')
708
640
  mms = MMS2R::Media.new(mail)
709
- assert_equal 2, mms.media.size
710
641
  mms.process do |type, files|
711
642
  assert_equal 1, files.size
712
- assert_equal true, type == 'text/plain' || type == 'text/html'
713
- assert_equal true, File.basename(files.first) == 'message.txt' ||
714
- File.basename(files.first) == 'message.html'
643
+ assert_equal true, type == 'image/jpeg'
644
+ assert_equal true, File.basename(files.first) == 'Photo_12.jpg'
715
645
  assert_equal true, File::exist?(files.first)
716
646
  end
717
647
  mms.purge
@@ -21,6 +21,7 @@ class TestMmsMyhelioCom < Test::Unit::TestCase
21
21
  assert_equal "mms.myhelio.com", mms.carrier
22
22
  assert_equal 1, mms.media.size
23
23
  assert_equal 1, mms.media['text/plain'].size
24
+ assert_equal "Test message", open(mms.media['text/plain'].first).read
24
25
  mms.purge
25
26
  end
26
27
 
@@ -259,7 +259,6 @@ class TestPmSprintCom < Test::Unit::TestCase
259
259
 
260
260
  assert_equal '5135455555', mms.number
261
261
  assert_equal "pm.sprint.com", mms.carrier
262
-
263
262
  assert_equal 0, mms.media.size
264
263
 
265
264
  mms.purge
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mms2r
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.7.1
4
+ version: 3.8.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-05 00:00:00.000000000 Z
12
+ date: 2012-07-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -155,7 +155,7 @@ dependencies:
155
155
  - - ! '>='
156
156
  - !ruby/object:Gem::Version
157
157
  version: '0'
158
- description: MMS2R is a library that decodes the parts of an MMS message to disk while
158
+ description: MMS2R is a library that decodes the parts of a MMS message to disk while
159
159
  stripping out advertising injected by the mobile carriers.
160
160
  email:
161
161
  - mikemondragon@gmail.com
@@ -374,7 +374,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
374
374
  version: '0'
375
375
  segments:
376
376
  - 0
377
- hash: -3061222747631140380
377
+ hash: 2515700881121403429
378
378
  required_rubygems_version: !ruby/object:Gem::Requirement
379
379
  none: false
380
380
  requirements:
@@ -383,7 +383,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
383
383
  version: '0'
384
384
  segments:
385
385
  - 0
386
- hash: -3061222747631140380
386
+ hash: 2515700881121403429
387
387
  requirements: []
388
388
  rubyforge_project: mms2r
389
389
  rubygems_version: 1.8.24