format_parser 2.3.0 → 2.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'matrix'
4
+
5
+ module FormatParser
6
+ module ISOBaseMediaFileFormat
7
+ module Utils
8
+
9
+ IDENTITY_MATRIX = Matrix.identity(3)
10
+
11
+ def dimensions(box_tree)
12
+ moov_box = box_tree.find { |box| box.type == 'moov' }
13
+ return unless moov_box
14
+ movie_matrix = moov_box.first_child('mvhd')&.dig(:fields, :matrix) || IDENTITY_MATRIX
15
+ extreme_coordinates = video_trak_boxes(box_tree).each_with_object({}) do |trak_box, extreme_coordinates|
16
+ tkhd_box = trak_box.first_child('tkhd')
17
+ next unless tkhd_box
18
+ x = tkhd_box.fields[:width]
19
+ y = tkhd_box.fields[:height]
20
+ next unless x && y
21
+ track_matrix = tkhd_box.fields[:matrix] || IDENTITY_MATRIX
22
+ [[0, 0], [0, y], [x, 0], [x, y]].each do |coordinates|
23
+ x, y = (Matrix[[*coordinates, 1]] * track_matrix * movie_matrix).to_a[0][0..1]
24
+ extreme_coordinates[:min_x] = x if !extreme_coordinates[:min_x] || x < extreme_coordinates[:min_x]
25
+ extreme_coordinates[:max_x] = x if !extreme_coordinates[:max_x] || x > extreme_coordinates[:max_x]
26
+ extreme_coordinates[:min_y] = y if !extreme_coordinates[:min_y] || y < extreme_coordinates[:min_y]
27
+ extreme_coordinates[:max_y] = y if !extreme_coordinates[:max_y] || y > extreme_coordinates[:max_y]
28
+ end
29
+ end
30
+ unless extreme_coordinates.empty?
31
+ [
32
+ extreme_coordinates[:max_x] - extreme_coordinates[:min_x],
33
+ extreme_coordinates[:max_y] - extreme_coordinates[:min_y]
34
+ ]
35
+ end
36
+ end
37
+
38
+ def duration(box_tree)
39
+ mvhd_box = box_tree.find { |box| box.type == 'moov' }&.first_child('mvhd')
40
+ return unless mvhd_box
41
+ duration = mvhd_box.fields[:duration]
42
+ timescale = mvhd_box.fields[:timescale]&.to_f
43
+ duration / timescale if duration && timescale
44
+ end
45
+
46
+ def frame_rate(box_tree)
47
+ video_trak_boxes(box_tree).each do |trak_box|
48
+ mdhd_box = trak_box.first_descendent_by_path(%w[mdia mdhd])
49
+ stts_box = trak_box.first_descendent_by_path(%w[mdia minf stbl stts])
50
+
51
+ next unless mdhd_box && stts_box
52
+
53
+ timescale = mdhd_box.fields[:timescale]&.to_f
54
+ sample_delta = stts_box.dig(:fields, :entries, 0, :sample_delta)
55
+
56
+ next unless timescale && sample_delta
57
+
58
+ return (timescale / sample_delta).truncate(2)
59
+ end
60
+ nil
61
+ # TODO: Properly account for and represent variable frame-rates.
62
+ end
63
+
64
+ def video_codecs(box_tree)
65
+ video_trak_boxes(box_tree).flat_map do |trak_box|
66
+ trak_box.all_descendents_by_path(%w[mdia minf stbl stsd]).flat_map { |stsd_box| stsd_box.children.map(&:type) }
67
+ end.compact.uniq
68
+ end
69
+
70
+ private
71
+
72
+ # Find any and all `trak` boxes containing a video media handler.
73
+ def video_trak_boxes(box_tree)
74
+ moov_box = box_tree.find { |box| box.type == 'moov' }
75
+ return [] unless moov_box
76
+ moov_box.all_children('trak').select do |trak_box|
77
+ trak_box.all_descendents('hdlr').find do |hdlr_box|
78
+ hdlr_fields = hdlr_box.fields
79
+ if hdlr_fields.include?(:component_type) && hdlr_fields.include?(:component_subtype) # MOV
80
+ hdlr_fields[:component_type] == 'mhlr' && hdlr_fields[:component_subtype] == 'vide'
81
+ else
82
+ hdlr_fields[:handler_type] == 'vide'
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,53 @@
1
+ require 'parsers/iso_base_media_file_format/decoder'
2
+
3
+ class FormatParser::MOVParser::Decoder < FormatParser::ISOBaseMediaFileFormat::Decoder
4
+ protected
5
+
6
+ def hdlr(size)
7
+ fields = read_version_and_flags.merge({
8
+ component_type: read_string(4),
9
+ component_subtype: read_string(4),
10
+ component_manufacturer: read_bytes(4),
11
+ component_flags: read_bytes(4),
12
+ component_flags_mask: read_bytes(4),
13
+ component_name: read_string(size - 24)
14
+ })
15
+ [fields, nil]
16
+ end
17
+
18
+ def mvhd(_)
19
+ fields = read_version_and_flags.merge({
20
+ creation_time: read_int,
21
+ modification_time: read_int,
22
+ timescale: read_int,
23
+ duration: read_int,
24
+ rate: read_fixed_point(n: 4),
25
+ volume: read_fixed_point(n: 2, signed: true),
26
+ matrix: skip_bytes(10) { read_matrix },
27
+ preview_time: read_int,
28
+ preview_duration: read_int,
29
+ poster_time: read_int,
30
+ selection_time: read_int,
31
+ selection_duration: read_int,
32
+ current_time: read_int,
33
+ next_trak_id: read_int,
34
+ })
35
+ [fields, nil]
36
+ end
37
+
38
+ def tkhd(_)
39
+ fields = read_version_and_flags.merge({
40
+ creation_time: read_int,
41
+ modification_time: read_int,
42
+ track_id: read_int,
43
+ duration: skip_bytes(4) { read_int },
44
+ layer: skip_bytes(8) { read_int(n: 2) },
45
+ alternate_group: read_int(n: 2),
46
+ volume: read_fixed_point(n: 2, signed: true),
47
+ matrix: skip_bytes(2) { read_matrix },
48
+ width: read_fixed_point(n: 4),
49
+ height: read_fixed_point(n: 4)
50
+ })
51
+ [fields, nil]
52
+ end
53
+ end
@@ -0,0 +1,48 @@
1
+ require 'parsers/iso_base_media_file_format/utils'
2
+
3
+ class FormatParser::MOVParser
4
+ include FormatParser::IOUtils
5
+ include FormatParser::ISOBaseMediaFileFormat::Utils
6
+ require_relative 'mov_parser/decoder'
7
+
8
+ MAGIC_BYTES = 'ftypqt '
9
+ MOV_MIME_TYPE = 'video/quicktime'
10
+
11
+ def likely_match?(filename)
12
+ /\.(mov|moov|qt)$/i.match?(filename)
13
+ end
14
+
15
+ def call(io)
16
+ @buf = FormatParser::IOConstraint.new(io)
17
+
18
+ return unless matches_mov_definition?
19
+
20
+ box_tree = Measurometer.instrument('format_parser.mov_parser.decoder.build_box_tree') do
21
+ Decoder.new.build_box_tree(0xffffffff, @buf)
22
+ end
23
+
24
+ width, height = dimensions(box_tree)
25
+
26
+ FormatParser::Video.new(
27
+ format: :mov,
28
+ width_px: width,
29
+ height_px: height,
30
+ frame_rate: frame_rate(box_tree),
31
+ media_duration_seconds: duration(box_tree),
32
+ content_type: MOV_MIME_TYPE,
33
+ codecs: video_codecs(box_tree),
34
+ intrinsics: box_tree
35
+ )
36
+ end
37
+
38
+ private
39
+
40
+ def matches_mov_definition?
41
+ skip_bytes(4)
42
+ matches = read_string(8) == MAGIC_BYTES
43
+ @buf.seek(0)
44
+ matches
45
+ end
46
+
47
+ FormatParser.register_parser new, natures: [:video], formats: [:mov], priority: 3
48
+ end
@@ -0,0 +1,80 @@
1
+ require_relative 'iso_base_media_file_format/decoder'
2
+
3
+ class FormatParser::MP4Parser
4
+ include FormatParser::IOUtils
5
+ include FormatParser::ISOBaseMediaFileFormat
6
+ include FormatParser::ISOBaseMediaFileFormat::Utils
7
+
8
+ MAGIC_BYTES = /^ftyp(iso[m2]|mp4[12]|m4[abprv] )$/i
9
+
10
+ BRAND_FORMATS = {
11
+ 'isom' => :mp4, # Prohibited as a major brand by ISO/IEC 14496-12 sec 6.3 paragraph 2, but occasionally used.
12
+ 'iso2' => :mp4, # Prohibited as a major brand by ISO/IEC 14496-12 sec 6.3 paragraph 2, but occasionally used.
13
+ 'mp41' => :mp4,
14
+ 'mp42' => :mp4,
15
+ 'm4a ' => :m4a,
16
+ 'm4b ' => :m4b, # iTunes audiobooks
17
+ 'm4p ' => :m4p, # iTunes audio
18
+ 'm4r ' => :m4r, # iTunes ringtones
19
+ 'm4v ' => :m4v, # iTunes video
20
+ }
21
+ AUDIO_FORMATS = Set[:m4a, :m4b, :m4p, :m4r]
22
+ VIDEO_FORMATS = Set[:mp4, :m4v]
23
+
24
+ AUDIO_MIMETYPE = 'audio/mp4'
25
+ VIDEO_MIMETYPE = 'video/mp4'
26
+
27
+ def likely_match?(filename)
28
+ /\.(mp4|m4[abprv])$/i.match?(filename)
29
+ end
30
+
31
+ def call(io)
32
+ @buf = FormatParser::IOConstraint.new(io)
33
+
34
+ return unless matches_mp4_definition?
35
+
36
+ box_tree = Measurometer.instrument('format_parser.mp4_parser.decoder.build_box_tree') do
37
+ Decoder.new.build_box_tree(0xffffffff, @buf)
38
+ end
39
+
40
+ case file_format = file_format(box_tree)
41
+ when VIDEO_FORMATS
42
+ width, height = dimensions(box_tree)
43
+ FormatParser::Video.new(
44
+ codecs: video_codecs(box_tree),
45
+ content_type: VIDEO_MIMETYPE,
46
+ format: file_format,
47
+ frame_rate: frame_rate(box_tree),
48
+ height_px: height,
49
+ intrinsics: box_tree,
50
+ media_duration_seconds: duration(box_tree),
51
+ width_px: width,
52
+ )
53
+ when AUDIO_FORMATS
54
+ FormatParser::Audio.new(
55
+ content_type: AUDIO_MIMETYPE,
56
+ format: file_format,
57
+ intrinsics: box_tree,
58
+ media_duration_seconds: duration(box_tree),
59
+ )
60
+ else
61
+ nil
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def file_format(box_tree)
68
+ major_brand = box_tree.find { |box| box.type == 'ftyp' }&.fields[:major_brand]
69
+ BRAND_FORMATS[major_brand.downcase] if major_brand
70
+ end
71
+
72
+ def matches_mp4_definition?
73
+ skip_bytes(4)
74
+ matches = MAGIC_BYTES.match?(read_string(8))
75
+ @buf.seek(0)
76
+ matches
77
+ end
78
+
79
+ FormatParser.register_parser new, natures: [:audio, :video], formats: BRAND_FORMATS.values.uniq, priority: 3
80
+ end
@@ -6,7 +6,7 @@ class FormatParser::PDFParser
6
6
  #
7
7
  # There are however exceptions, which are left out for now.
8
8
  #
9
- PDF_MARKER = /%PDF-1\.[0-8]{1}/
9
+ PDF_MARKER = /%PDF-[12]\.[0-8]{1}/
10
10
  PDF_CONTENT_TYPE = 'application/pdf'
11
11
 
12
12
  def likely_match?(filename)
@@ -16,9 +16,12 @@ class FormatParser::PDFParser
16
16
  def call(io)
17
17
  io = FormatParser::IOConstraint.new(io)
18
18
 
19
- return unless safe_read(io, 9) =~ PDF_MARKER
19
+ header = safe_read(io, 9)
20
+ return unless header =~ PDF_MARKER
20
21
 
21
22
  FormatParser::Document.new(format: :pdf, content_type: PDF_CONTENT_TYPE)
23
+ rescue FormatParser::IOUtils::InvalidRead
24
+ nil
22
25
  end
23
26
 
24
27
  FormatParser.register_parser new, natures: :document, formats: :pdf, priority: 3
@@ -69,7 +69,7 @@ class FormatParser::WebpParser
69
69
  # The subsequent 4 bytes contain the image width and height, respectively, as 14-bit unsigned little endian
70
70
  # integers (minus one). The 4 remaining bits consist of a 1-bit flag indicating whether alpha is used, and a 3-bit
71
71
  # version that is always zero.
72
- dimensions = read_little_endian_int_32
72
+ dimensions = read_int(big_endian: false)
73
73
  width = (dimensions & 0x3fff) + 1
74
74
  height = (dimensions >> 14 & 0x3fff) + 1
75
75
  has_transparency = (dimensions >> 28 & 0x1) == 1
@@ -92,7 +92,7 @@ class FormatParser::WebpParser
92
92
  # - E = Set if file contains Exif metadata.
93
93
  # - X = Set if file contains XMP metadata.
94
94
  # - A = Set if file is an animated image.
95
- flags = read_int_8
95
+ flags = read_int(n: 1)
96
96
  has_transparency = flags & 0x10 != 0
97
97
  has_exif_metadata = flags & 0x08 != 0
98
98
  has_xmp_metadata = flags & 0x04 != 0
@@ -184,7 +184,7 @@ describe FormatParser do
184
184
  'FormatParser::GIFParser',
185
185
  'Class',
186
186
  'FormatParser::PNGParser',
187
- 'FormatParser::MOOVParser',
187
+ 'FormatParser::MP4Parser',
188
188
  'FormatParser::CR2Parser',
189
189
  'FormatParser::CR3Parser',
190
190
  'FormatParser::DPXParser',
@@ -18,9 +18,9 @@ describe FormatParser::CR3Parser do
18
18
  expect(result).to be_nil
19
19
  end
20
20
 
21
- it 'should return nil if no CMT1 atom is present' do
21
+ it 'should return nil if no CMT1 box is present' do
22
22
  # This is a MOV file with the ftyp header modified to masquerade as a CR3 file. It is therefore missing the
23
- # CR3-specific CMT1 atom containing the image metadata.
23
+ # CR3-specific CMT1 box containing the image metadata.
24
24
  result = subject.call(File.open(fixtures_dir + '/CR3/invalid'))
25
25
  expect(result).to be_nil
26
26
  end
@@ -50,7 +50,7 @@ describe FormatParser::CR3Parser do
50
50
  expect(result.display_height_px).to eq(4000)
51
51
  expect(result.content_type).to eq('image/x-canon-cr3')
52
52
  expect(result.intrinsics).not_to be_nil
53
- expect(result.intrinsics[:atom_tree]).not_to be_nil
53
+ expect(result.intrinsics[:box_tree]).not_to be_nil
54
54
  expect(result.intrinsics[:exif]).not_to be_nil
55
55
  expect(result.intrinsics[:exif][:image_length]).to eq(result.height_px)
56
56
  expect(result.intrinsics[:exif][:image_width]).to eq(result.width_px)