file_data 5.0.0 → 5.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: d38cf3da31fc0fc355f64228574f06888f52563e
4
- data.tar.gz: 6babcaa52b7fe146f4a310eb2eb34868522edd2b
2
+ SHA256:
3
+ metadata.gz: 441d9a1d1481ac066af3b8c0bcd2bae00e7b54b88c322ef251a7579ee79f270b
4
+ data.tar.gz: af5797dd26aefac7eddee225a81012b7942283bee76e5add1911d67cb5158d4b
5
5
  SHA512:
6
- metadata.gz: f537057f31aa2f9021cdf9f9d75a94fb03024f2e9b796f99c56a26c3826a7b801931771e73ffecdee4492050eb57fc320403b8007dbda3403317af577b95428b
7
- data.tar.gz: 4ffd9045d319937d8b2561ee046eca8e6a08ca6cadcf938f4a3a132ec872aca310645c8b7a517ef6ed7e8f4f1793e9736fc3483010fd9a18ddaf87ab4635e596
6
+ metadata.gz: ea6f2869ffa5b4f2a54d5767b7affc853c546e65866d55d6b655a6ddfddb00f732370abeea3e431cc3a951fb8cf4789d155d9321974181b84cefe8619591db3d
7
+ data.tar.gz: efb5a79f2b6297ef5b1f85a7acd6f6d33e8b313a47e6c3cfc0b69080819d6fca5d116158341ede91c5e6b3e0a34c7f641fc27f1fd63c08c9bf28ec11f89ce15b
data/.coveralls.yml CHANGED
File without changes
data/.gitignore CHANGED
@@ -47,4 +47,8 @@ build-iPhoneSimulator/
47
47
  # .ruby-gemset
48
48
 
49
49
  # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
- .rvmrc
50
+ .rvmrc
51
+
52
+ # Deep cover files
53
+ .sass-cache/
54
+ deep_cover/
data/.rubocop.yml CHANGED
@@ -8,7 +8,7 @@ Metrics/ClassLength:
8
8
  Max: 100
9
9
  EndOfLine:
10
10
  Enabled: false
11
- Style/BlockLength:
11
+ Metrics/BlockLength:
12
12
  Exclude:
13
13
  - 'spec/**/*_spec.rb'
14
14
  Style/BlockComments:
@@ -16,4 +16,4 @@ Style/BlockComments:
16
16
  - 'spec/spec_helper.rb'
17
17
  AllCops:
18
18
  Exclude:
19
- - 'file_data.gemspec'
19
+ - 'file_data.gemspec'
data/.travis.yml CHANGED
File without changes
data/Gemfile.lock CHANGED
@@ -1,22 +1,52 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- file_data (5.0.0)
4
+ file_data (5.2.3)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ ast (2.4.0)
10
+ backports (3.11.3)
11
+ binding_of_caller (0.8.0)
12
+ debug_inspector (>= 0.0.1)
13
+ coderay (1.1.2)
9
14
  coveralls (0.8.19)
10
15
  json (>= 1.8, < 3)
11
16
  simplecov (~> 0.12.0)
12
17
  term-ansicolor (~> 1.3)
13
18
  thor (~> 0.19.1)
14
19
  tins (~> 1.6)
20
+ debug_inspector (1.1.0)
21
+ deep-cover (0.6.2)
22
+ backports (>= 3.11.0)
23
+ binding_of_caller
24
+ bundler
25
+ highline
26
+ parser (~> 2.5.0)
27
+ pry
28
+ sass
29
+ slop (~> 4.0)
30
+ term-ansicolor
31
+ terminal-table
32
+ with_progress
15
33
  diff-lcs (1.3)
16
34
  docile (1.1.5)
17
35
  fakefs (0.10.2)
18
- json (2.0.3)
19
- rake (10.5.0)
36
+ ffi (1.9.25)
37
+ ffi (1.9.25-x64-mingw32)
38
+ highline (2.0.0)
39
+ json (2.6.0)
40
+ method_source (0.9.0)
41
+ parser (2.5.1.0)
42
+ ast (~> 2.4.0)
43
+ pry (0.11.3)
44
+ coderay (~> 1.1.0)
45
+ method_source (~> 0.9.0)
46
+ rake (13.0.6)
47
+ rb-fsevent (0.10.3)
48
+ rb-inotify (0.9.10)
49
+ ffi (>= 0.5.0, < 2)
20
50
  rspec (3.5.0)
21
51
  rspec-core (~> 3.5.0)
22
52
  rspec-expectations (~> 3.5.0)
@@ -30,27 +60,40 @@ GEM
30
60
  diff-lcs (>= 1.2.0, < 2.0)
31
61
  rspec-support (~> 3.5.0)
32
62
  rspec-support (3.5.0)
63
+ ruby-progressbar (1.9.0)
64
+ sass (3.5.6)
65
+ sass-listen (~> 4.0.0)
66
+ sass-listen (4.0.0)
67
+ rb-fsevent (~> 0.9, >= 0.9.4)
68
+ rb-inotify (~> 0.9, >= 0.9.7)
33
69
  simplecov (0.12.0)
34
70
  docile (~> 1.1.0)
35
71
  json (>= 1.8, < 3)
36
72
  simplecov-html (~> 0.10.0)
37
73
  simplecov-html (0.10.0)
74
+ slop (4.6.2)
38
75
  term-ansicolor (1.4.0)
39
76
  tins (~> 1.0)
77
+ terminal-table (1.8.0)
78
+ unicode-display_width (~> 1.1, >= 1.1.1)
40
79
  thor (0.19.4)
41
80
  tins (1.13.2)
81
+ unicode-display_width (1.4.0)
82
+ with_progress (1.0.1)
83
+ ruby-progressbar (~> 1.4)
42
84
 
43
85
  PLATFORMS
44
86
  ruby
45
87
  x64-mingw32
46
88
 
47
89
  DEPENDENCIES
48
- bundler (~> 1.14)
90
+ bundler (~> 2.2)
49
91
  coveralls (~> 0.8)
92
+ deep-cover (~> 0.6)
50
93
  fakefs (~> 0.10)
51
94
  file_data!
52
- rake (~> 10.0)
95
+ rake (~> 13.0)
53
96
  rspec (~> 3.0)
54
97
 
55
98
  BUNDLED WITH
56
- 1.14.6
99
+ 2.2.29
data/LICENSE.txt CHANGED
File without changes
data/README.md CHANGED
@@ -4,12 +4,27 @@ file_data
4
4
  [![Build Status](https://travis-ci.org/ScottHaney/file_data.svg?branch=master)](https://travis-ci.org/ScottHaney/file_data)
5
5
  [![Coverage Status](https://coveralls.io/repos/github/ScottHaney/file_data/badge.svg?branch=master)](https://coveralls.io/github/ScottHaney/file_data?branch=master)
6
6
  [![Code Climate](https://codeclimate.com/github/ScottHaney/file_data/badges/gpa.svg)](https://codeclimate.com/github/ScottHaney/file_data)
7
+ [![Gem Version](https://badge.fury.io/rb/file_data.svg)](https://badge.fury.io/rb/file_data)
7
8
 
8
9
  Ruby library that reads file metadata.
9
10
 
10
- Current support
11
+ The api provides a basic usage and an advanced usage. The basic usage will reopen and reparse the file every time it is called which is no problem when reading a single value but can be a performance drain for multiple values. The advanced usage allows the user to grab more than one value without having to read the file more than once.
11
12
 
12
- * Exif: Only jpeg files are supported and FlashPix extensions are not supported
13
+ ## Basic Usage
14
+
15
+ ```ruby
16
+ filepath = '...' # Path to a jpeg or mpeg4 file
17
+
18
+ # Get the date when the file content originated. When a photo was taken, when a movie was recorded, etc
19
+ FileData::FileInfo.origin_date(filepath)
20
+
21
+ # Get the date when the file was considered to be created. This is usually tied in some way to when the file itself was created on a disk somewhere (not usually as useful as origin date)
22
+ FileData::FileInfo.creation_date(filepath)
23
+ ```
24
+
25
+ ## Advanced Usage
26
+
27
+ Varies by file format type. Currently there are low level classes for parsing exif and mpeg4 metadata
13
28
 
14
29
  ## Exif documentation
15
30
 
@@ -221,4 +236,17 @@ FileData::ExifTags.tag_groups[40_965] =
221
236
  }
222
237
  ```
223
238
 
239
+ ## Mpeg4 documentation
240
+
241
+ ```ruby
242
+
243
+ filepath = '...' # path to an mpeg4 file
244
+ File.open(filepath, 'rb') do |stream|
245
+ parser = FileData::MvhdBoxParser # class that parses the box you want
246
+ method = :creation_time # attribute to get from the parse result
247
+ box_path = ['moov', 'mvhd'] # path to get to the box that you want
224
248
 
249
+ # final result that you are looking for
250
+ result = FileData::Mpeg4.get_value(stream, parser, method, *box_path)
251
+ end
252
+ ```
data/Rakefile CHANGED
File without changes
data/dockerfile ADDED
@@ -0,0 +1,13 @@
1
+ FROM ruby:3.0.2-alpine3.13
2
+
3
+ RUN gem install bundler
4
+
5
+ RUN apk update && apk upgrade && \
6
+ apk add --no-cache bash git openssh && \
7
+ apk add --update make && \
8
+ apk add --update gcc && \
9
+ apk add libc-dev
10
+
11
+ WORKDIR /tmp/code
12
+
13
+ CMD [ "/bin/sh" ]
data/file_data.gemspec CHANGED
@@ -9,9 +9,9 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Scott"]
10
10
  spec.email = [""]
11
11
 
12
- spec.summary = %q{Extracts file metadata information (currently only supports exif metadata for jpeg files)}
13
- spec.description = %q{Extracts file metadata information (currently only supports exif metadata for jpeg files)}
14
- spec.homepage = ""
12
+ spec.summary = %q{Provides apis for extracting common metadata out of files as well as low level apis for advanced metadata parsing. Currently exif (jpeg/jpg) is almost entirely supported and mpeg4 (mp4,m4v,moov...) has limited support}
13
+ spec.description = %q{Provides apis for extracting common metadata out of files as well as low level apis for advanced metadata parsing. Currently exif (jpeg/jpg) is almost entirely supported and mpeg4 (mp4,m4v,moov...) has limited support. For common metadata the FileInfo class provides methods names after the metadata items taking a filename. As an example, to get the origin date of a file you would call FileData::FileInfo.origin_date(filename). Advanced apis are provided via specific classes for each metadata type. For example, Exif for exif data and Mpeg4 for mpeg4 data. These can be used to improve the performance of gathering multiple metadata values from a file}
14
+ spec.homepage = "https://github.com/ScottHaney/file_data"
15
15
  spec.license = "MIT"
16
16
 
17
17
  # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
@@ -30,9 +30,10 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ["lib"]
32
32
 
33
- spec.add_development_dependency "bundler", "~> 1.14"
34
- spec.add_development_dependency "rake", "~> 10.0"
33
+ spec.add_development_dependency "bundler", "~> 2.2"
34
+ spec.add_development_dependency "rake", "~> 13.0"
35
35
  spec.add_development_dependency "rspec", "~> 3.0"
36
36
  spec.add_development_dependency "coveralls", "~> 0.8"
37
37
  spec.add_development_dependency "fakefs", "~> 0.10"
38
+ spec.add_development_dependency "deep-cover", "~> 0.6"
38
39
  end
@@ -0,0 +1,13 @@
1
+ # Methods for reading values from a binary stream
2
+ module BinaryExtensions
3
+ def read_value(num_bytes)
4
+ bytes = each_byte.take(num_bytes)
5
+ bytes.reverse! if @is_little_endian
6
+
7
+ bytes.inject { |total, val| (total << 8) + val }
8
+ end
9
+
10
+ def read_ascii(num_bytes)
11
+ each_byte.take(num_bytes).map(&:chr).join
12
+ end
13
+ end
@@ -0,0 +1,33 @@
1
+ module FileData
2
+ # Operations common to all files
3
+ class FileInfo
4
+ class << self
5
+ attr_reader :info_maps
6
+ end
7
+
8
+ @info_maps ||= {}
9
+
10
+ %w[creation_date origin_date].each do |method_name|
11
+ define_singleton_method(method_name) do |filename|
12
+ File.open(filename, 'rb') do |stream|
13
+ reader = reader_class(filename)
14
+ raise "No metadata parser class found for the file #{filename}" if reader.nil?
15
+
16
+ reader_class(filename).send(method_name, stream)
17
+ end
18
+ end
19
+ end
20
+
21
+ def self.reader_class(filename)
22
+ info_maps[get_reader_key(filename)]
23
+ end
24
+
25
+ def self.can_handle?(filename)
26
+ info_maps.key?(get_reader_key(filename))
27
+ end
28
+
29
+ def self.get_reader_key(filename)
30
+ File.extname(filename).downcase
31
+ end
32
+ end
33
+ end
@@ -1,52 +1,48 @@
1
+ require_relative '../helpers/sized_field'
2
+ require_relative '../helpers/stream_view'
3
+
1
4
  module FileData
2
5
  # Represents a Jpeg image stream
3
6
  class Jpeg
4
7
  SOI_BYTES = [255, 216].freeze
8
+ EOI_BYTES = [255, 217].freeze
5
9
  SECTION_HEADER_SIZE = 4
6
10
  INVALID_HEADER_MSG = 'the given file is not a jpeg file since it does not'\
7
11
  'begin with the start of image (SOI) bytes.'.freeze
8
12
 
9
- def initialize(stream)
10
- @stream = stream
11
- end
12
-
13
- def each_section
14
- read_header
15
- Enumerator.new { |e| yield_sections(e) }.lazy
13
+ def self.each_section(stream)
14
+ view = Helpers::StreamView.new(stream)
15
+ read_header(view)
16
+ Enumerator.new { |e| yield_sections(view, e) }.lazy
16
17
  end
17
18
 
18
- def read_header
19
- soi = read_bytes(SOI_BYTES.size)
19
+ def self.read_header(stream)
20
+ soi = stream.each_byte.take(SOI_BYTES.size)
20
21
  raise INVALID_HEADER_MSG unless soi == SOI_BYTES
21
22
  end
22
23
 
23
- def yield_sections(e)
24
- loop do
25
- next_section_pos = yield_section(e)
26
- break unless section_pos?(next_section_pos)
27
- @stream.seek(next_section_pos)
28
- end
29
- end
30
-
31
- def yield_section(e)
32
- section_start_pos = @stream.pos + 2
33
- marker, size = read_section_header
34
- e.yield marker, size
35
- section_start_pos + size
36
- end
24
+ def self.yield_sections(stream, enumerator)
25
+ until stream.eof?
26
+ marker = stream.each_byte.take(2)
27
+ break if marker == EOI_BYTES
37
28
 
38
- def section_pos?(section_pos)
39
- # Make sure that there are enough bytes for a section header.
40
- # This also handles an ending two byte JPEG EOI sequence.
41
- @stream.size - section_pos >= SECTION_HEADER_SIZE
29
+ section = current_section(stream, marker)
30
+ enumerator.yield section
31
+ stream.seek(section.content_stream.end_pos + 1)
32
+ end
42
33
  end
43
34
 
44
- def read_section_header
45
- [read_bytes(2), read_bytes(2).inject { |a, v| (a << 8) + v }]
46
- end
35
+ # def self.section_pos?(stream)
36
+ # # Make sure that there are enough bytes for a section header.
37
+ # # This also handles an ending two byte JPEG EOI sequence.
38
+ # stream.size >= SECTION_HEADER_SIZE
39
+ # end
47
40
 
48
- def read_bytes(num_bytes)
49
- @stream.each_byte.take(num_bytes)
41
+ def self.current_section(stream, marker)
42
+ content_stream = Helpers::SizedField.create_view(stream, 2)
43
+ JpegSection.new(marker, content_stream)
50
44
  end
51
45
  end
46
+
47
+ JpegSection = Struct.new(:marker, :content_stream)
52
48
  end
@@ -1,9 +1,12 @@
1
1
  require_relative 'exif_reader'
2
2
  require_relative 'exif_jpeg'
3
+ require 'time'
3
4
 
4
5
  module FileData
5
6
  # Convenience class for extracting exif data from a file or stream
6
7
  class Exif
8
+ ['.jpeg', '.jpg'].each { |e| FileInfo.info_maps[e] = Exif }
9
+
7
10
  # Create methods that forward to ExifReader
8
11
  # Each method requires the stream as a parameter to help the user
9
12
  # fall into a "pit of success" by only opening and closing
@@ -23,10 +26,19 @@ module FileData
23
26
 
24
27
  def self.streamify(input)
25
28
  if input.is_a?(String)
26
- File.open(input, 'rb') { |f| yield f }
29
+ ::File.open(input, 'rb') { |f| yield f }
27
30
  else
28
31
  yield input
29
32
  end
30
33
  end
34
+
35
+ def self.creation_date(input)
36
+ raw_tag = FileData::Exif.only_image_tag(input, [34_665, 36_867])
37
+ Time.strptime(raw_tag, '%Y:%m:%d %H:%M:%S') unless raw_tag.nil?
38
+ end
39
+
40
+ def self.origin_date(input)
41
+ creation_date(input)
42
+ end
31
43
  end
32
44
  end
@@ -15,14 +15,20 @@ module FileData
15
15
  ExifStream.new(@stream) if seek_exif
16
16
  end
17
17
 
18
+ private
19
+
18
20
  def seek_exif
19
- Jpeg.new(@stream).each_section
20
- .select { |marker, _| exif_section?(marker) }
21
+ Jpeg.each_section(@stream)
22
+ .select { |section| exif_section?(section) }
21
23
  .first
22
24
  end
23
25
 
24
- def exif_section?(marker)
25
- marker == APP1_BYTES && @stream.each_byte.take(EXIF_ID.size) == EXIF_ID
26
+ def exif_section?(section)
27
+ section.marker == APP1_BYTES && read_exif_id(section)
28
+ end
29
+
30
+ def read_exif_id(section)
31
+ section.content_stream.each_byte.take(EXIF_ID.size) == EXIF_ID
26
32
  end
27
33
  end
28
34
  end
@@ -1,8 +1,11 @@
1
1
  require 'forwardable'
2
+ require_relative '../../core_extensions/binary_extensions'
2
3
 
3
4
  module FileData
4
5
  # Wraps a stream with exif specific logic
5
6
  class ExifStream
7
+ include BinaryExtensions
8
+
6
9
  MOTOROLLA_BYTES = 'MM'.bytes.to_a.freeze
7
10
  INTEL_BYTES = 'II'.bytes.to_a.freeze
8
11
 
@@ -20,7 +23,7 @@ module FileData
20
23
  VALUE_OFFSET_SIZE = 4
21
24
 
22
25
  extend Forwardable
23
- def_delegators :@stream, :seek, :pos
26
+ def_delegators :@stream, :seek, :pos, :each_byte
24
27
 
25
28
  def initialize(stream)
26
29
  @stream = stream
@@ -28,10 +31,10 @@ module FileData
28
31
  end
29
32
 
30
33
  def read_header
31
- @is_big_endian =
34
+ @is_little_endian =
32
35
  case @stream.each_byte.take(2)
33
- when INTEL_BYTES then false
34
- when MOTOROLLA_BYTES then true
36
+ when INTEL_BYTES then true
37
+ when MOTOROLLA_BYTES then false
35
38
  else raise 'the byte order bytes did not match any expected value'
36
39
  end
37
40
 
@@ -63,7 +66,7 @@ module FileData
63
66
  end
64
67
 
65
68
  def read_undefined(size)
66
- [read_raw_val(size), @is_big_endian]
69
+ [read_raw_val(size), @is_little_endian]
67
70
  end
68
71
 
69
72
  def read_raw_val(size)
@@ -95,12 +98,5 @@ module FileData
95
98
  def to_slong(raw_value)
96
99
  -(raw_value & HIGH_BIT_MASK) + (raw_value & ~HIGH_BIT_MASK)
97
100
  end
98
-
99
- def read_value(num_bytes)
100
- bytes = @stream.each_byte.take(num_bytes)
101
- bytes.reverse! unless @is_big_endian
102
-
103
- bytes.inject { |total, val| (total << 8) + val }
104
- end
105
101
  end
106
102
  end
@@ -22,11 +22,11 @@ module FileData
22
22
  end
23
23
  end
24
24
 
25
- def process_ifd(ifd, e)
25
+ def process_ifd(ifd, enumerator)
26
26
  # Yield the tags or just skip ahead
27
27
 
28
28
  if ifds_to_include.include?(ifd.index)
29
- ifd.tags.each { |t| e.yield t }
29
+ ifd.tags.each { |t| enumerator.yield t }
30
30
  else
31
31
  # Avoid skipping the last ifd as this is needless work
32
32
  ifd.skip unless ifd.index == 1
@@ -0,0 +1,38 @@
1
+ require_relative '../../helpers/stream_view'
2
+
3
+ module FileData
4
+ # Mpeg4 box
5
+ class Box
6
+ attr_reader :type, :content_stream, :end_pos
7
+
8
+ def initialize(type, content_stream)
9
+ @type = type
10
+ @content_stream = content_stream
11
+ @end_pos = @content_stream.end_pos
12
+ end
13
+
14
+ def self.parse(view)
15
+ type, pos, size = parse_header(view)
16
+ new(type, Helpers::SubStreamView.new(view.stream, pos, size))
17
+ end
18
+
19
+ def self.parse_header(view)
20
+ start_pos = view.pos
21
+ first_field = view.read_value(4)
22
+ type = view.read_ascii(4)
23
+
24
+ total_size =
25
+ if first_field == 1
26
+ view.read_value(8)
27
+ else
28
+ first_field
29
+ end
30
+
31
+ content_pos = view.pos
32
+ header_size = content_pos - start_pos
33
+ content_size = total_size - header_size
34
+
35
+ [type, content_pos, content_size]
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,10 @@
1
+ module FileData
2
+ # Factory mapping box type to a class that can parse it
3
+ class BoxFactory
4
+ class << self
5
+ attr_reader :map
6
+ end
7
+
8
+ @map ||= {}
9
+ end
10
+ end
@@ -0,0 +1,25 @@
1
+ require_relative '../boxes_reader'
2
+ require_relative '../../../helpers/stream_view'
3
+ require_relative 'ilst_data_box'
4
+
5
+ module FileData
6
+ # Parsers for the 'ilst' box
7
+ class IlstBoxParser
8
+ def self.parse(view)
9
+ size = view.read_value(4)
10
+ index = view.read_value(4)
11
+
12
+ db = find_data_box(view, size)
13
+ data_box = db.nil? ? nil : IlstDataBoxParser.parse(db)
14
+
15
+ IlstBox.new(index, data_box)
16
+ end
17
+
18
+ def self.find_data_box(parent_view, parent_size)
19
+ view = Helpers::SubStreamView.new(parent_view.stream, parent_view.stream.pos, parent_size - 8)
20
+ BoxesReader.read(view).find { |box| box.type == 'data' }
21
+ end
22
+ end
23
+
24
+ IlstBox = Struct.new(:index, :data_box)
25
+ end
@@ -0,0 +1,17 @@
1
+ module FileData
2
+ # Parser for the 'data' box
3
+ class IlstDataBoxParser
4
+ def self.parse(box)
5
+ view = box.content_stream
6
+
7
+ # TO DO - Currently a text value is always assumed...
8
+ data_type = view.read_value(4)
9
+ locale = view.read_value(4)
10
+ value = view.read_ascii(view.remaining_bytes)
11
+
12
+ DataBox.new(data_type, locale, value)
13
+ end
14
+ end
15
+
16
+ DataBox = Struct.new(:data_type, :locale, :value_text)
17
+ end
@@ -0,0 +1,24 @@
1
+ require_relative '../../../helpers/sized_field'
2
+
3
+ module FileData
4
+ # Parser for the 'keys' box
5
+ class KeysBoxParser
6
+ def self.parse(view)
7
+ view.read_value(1) # version field
8
+ view.read_value(3) # flags field
9
+
10
+ entry_count = view.read_value(4)
11
+ Array.new(entry_count) { |index| parse_key(view, index) }
12
+ end
13
+
14
+ def self.parse_key(view, index)
15
+ key_view = Helpers::SizedField.create_view(view, 4)
16
+ namespace = key_view.read_ascii(4)
17
+ value = key_view.read_ascii(key_view.remaining_bytes)
18
+
19
+ Key.new(index + 1, namespace, value)
20
+ end
21
+ end
22
+
23
+ Key = Struct.new(:index, :namespace, :value)
24
+ end
@@ -0,0 +1,42 @@
1
+ require_relative 'keys_box'
2
+ require_relative 'ilst_box'
3
+ require_relative '../box_path'
4
+ require 'time'
5
+
6
+ module FileData
7
+ # Parser for the 'meta' box
8
+ class MetaBoxParser
9
+ def self.parse(view)
10
+ creation_key = get_creation_key(view)
11
+ return MetaBox.new(nil) if creation_key.nil?
12
+
13
+ creation_date_data = get_creation_date(view, creation_key.index)
14
+ return MetaBox.new(nil) if creation_date_data.nil?
15
+
16
+ MetaBox.new(Time.parse(creation_date_data.data_box.value_text))
17
+ end
18
+
19
+ def self.get_creation_key(view)
20
+ kb = BoxPath.get_path(view, 'keys')
21
+ return nil if kb.nil?
22
+
23
+ keys = KeysBoxParser.parse(kb.content_stream)
24
+ keys.find { |key| key.value == 'com.apple.quicktime.creationdate' }
25
+ end
26
+
27
+ def self.get_creation_date(view, index)
28
+ ilst_boxes = get_ilst_boxes(view)
29
+ ilst_boxes.find { |x| x.index == index }
30
+ end
31
+
32
+ def self.get_ilst_boxes(view)
33
+ view.seek view.start_pos
34
+ box = BoxPath.get_path(view, 'ilst')
35
+ ilst_boxes = []
36
+ ilst_boxes << IlstBoxParser.parse(box.content_stream) until box.content_stream.eof?
37
+ ilst_boxes
38
+ end
39
+ end
40
+
41
+ MetaBox = Struct.new(:creation_date)
42
+ end
@@ -0,0 +1,19 @@
1
+ module FileData
2
+ # Parser for the 'mvhd' box
3
+ class MvhdBoxParser
4
+ def self.parse(view)
5
+ MvhdBox.new(parse_mvhd_creation_date(view))
6
+ end
7
+
8
+ def self.parse_mvhd_creation_date(view)
9
+ version = view.read_value(1)
10
+ view.read_value(3) # Flags bytes
11
+
12
+ creation_time = view.read_value(version == 1 ? 8 : 4)
13
+ epoch_delta = 2_082_844_800
14
+ Time.at(creation_time - epoch_delta)
15
+ end
16
+ end
17
+
18
+ MvhdBox = Struct.new(:creation_time)
19
+ end
@@ -0,0 +1,26 @@
1
+ require_relative 'boxes_reader'
2
+
3
+ module FileData
4
+ # Finds Mpeg4 boxes within a stream
5
+ class BoxPath
6
+ def self.get_root_path(stream, *box_path)
7
+ get_path(Helpers::StreamView.new(stream), *box_path)
8
+ end
9
+
10
+ # def self.get_box_path(box, *box_path)
11
+ # get_path(box.content_stream, *box_path)
12
+ # end
13
+
14
+ def self.get_path(stream_view, *box_path)
15
+ match = BoxesReader.read(stream_view).find { |x| x.type == box_path[0] }
16
+
17
+ if match.nil?
18
+ nil
19
+ elsif box_path.length == 1
20
+ match
21
+ else
22
+ get_path(match.content_stream, *box_path[1..-1])
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,19 @@
1
+ require_relative 'box'
2
+ require_relative '../../helpers/stream_view'
3
+
4
+ module FileData
5
+ # Returns all boxes starting from the current position of a stream
6
+ class BoxesReader
7
+ def self.read(view)
8
+ Enumerator.new do |e|
9
+ view.seek view.start_pos
10
+ until view.eof?
11
+ box = Box.parse(view)
12
+
13
+ e.yield box
14
+ view.seek box.end_pos + 1
15
+ end
16
+ end.lazy
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,30 @@
1
+ require_relative 'box_path'
2
+ require_relative 'box_parsers/meta_box'
3
+ require_relative 'box_parsers/mvhd_box'
4
+
5
+ module FileData
6
+ # Parses and returns metadata from an Mpeg4 file
7
+ class Mpeg4
8
+ class << self
9
+ ['.mp4', '.mpeg4', '.m4v', '.mov'].each { |e| FileInfo.info_maps[e] = Mpeg4 }
10
+
11
+ values = [['origin_date', MetaBoxParser,
12
+ 'creation_date', 'moov', 'meta'],
13
+ ['creation_date', MvhdBoxParser,
14
+ 'creation_time', 'moov', 'mvhd']]
15
+
16
+ values.each do |v|
17
+ define_method(v[0]) do |stream|
18
+ get_value(*v.drop(1).unshift(stream))
19
+ end
20
+ end
21
+ end
22
+
23
+ def self.get_value(stream, parser, method, *box_path)
24
+ box = BoxPath.get_root_path(stream, *box_path)
25
+ parser.parse(box.content_stream).send(method) unless box.nil?
26
+ end
27
+ end
28
+
29
+ Mpeg4ValueInfo = Struct.new(:name, :parser_class, :method_name, :box_path)
30
+ end
@@ -0,0 +1,11 @@
1
+ require_relative 'stream_view'
2
+
3
+ module Helpers
4
+ # Binary block that has a size equal to the value of its first field
5
+ class SizedField
6
+ def self.create_view(view, size_len)
7
+ content_size = view.read_value(size_len) - size_len
8
+ SubStreamView.new(view.stream, view.stream.pos, content_size)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,49 @@
1
+ require 'forwardable'
2
+ require_relative '../core_extensions/binary_extensions'
3
+
4
+ module Helpers
5
+ # Abstract view of a stream
6
+ class BaseStreamView
7
+ extend Forwardable
8
+ include BinaryExtensions
9
+
10
+ attr_reader :stream, :start_pos
11
+
12
+ def initialize(stream, start_pos)
13
+ @stream = stream
14
+ @start_pos = start_pos
15
+ end
16
+
17
+ def_delegators :@stream, :seek, :each_byte, :pos
18
+ end
19
+
20
+ # View of a stream that has a specified size in bytes
21
+ class SubStreamView < BaseStreamView
22
+ attr_reader :end_pos, :size
23
+
24
+ def initialize(stream, start_pos, size)
25
+ super(stream, start_pos)
26
+ @end_pos = @start_pos + size - 1
27
+ @size = size
28
+ end
29
+
30
+ def remaining_bytes
31
+ @end_pos - pos + 1
32
+ end
33
+
34
+ def eof?
35
+ pos > @end_pos || @stream.eof?
36
+ end
37
+ end
38
+
39
+ # View of a stream that ends when eof? is true
40
+ class StreamView < BaseStreamView
41
+ def initialize(stream)
42
+ super(stream, 0)
43
+ end
44
+
45
+ def eof?
46
+ @stream.eof?
47
+ end
48
+ end
49
+ end
@@ -1,3 +1,3 @@
1
1
  module FileData
2
- VERSION = '5.0.0'.freeze
2
+ VERSION = '5.2.3'.freeze
3
3
  end
data/lib/file_data.rb CHANGED
@@ -1 +1,4 @@
1
+ require 'file_data/version'
2
+ require 'file_data/file_types/file_info'
1
3
  require 'file_data/formats/exif/exif'
4
+ require 'file_data/formats/mpeg4/mpeg4'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file_data
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.0
4
+ version: 5.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Scott
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-05-24 00:00:00.000000000 Z
11
+ date: 2021-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.14'
19
+ version: '2.2'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.14'
26
+ version: '2.2'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '13.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '13.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -80,8 +80,28 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.10'
83
- description: Extracts file metadata information (currently only supports exif metadata
84
- for jpeg files)
83
+ - !ruby/object:Gem::Dependency
84
+ name: deep-cover
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.6'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.6'
97
+ description: Provides apis for extracting common metadata out of files as well as
98
+ low level apis for advanced metadata parsing. Currently exif (jpeg/jpg) is almost
99
+ entirely supported and mpeg4 (mp4,m4v,moov...) has limited support. For common metadata
100
+ the FileInfo class provides methods names after the metadata items taking a filename.
101
+ As an example, to get the origin date of a file you would call FileData::FileInfo.origin_date(filename).
102
+ Advanced apis are provided via specific classes for each metadata type. For example,
103
+ Exif for exif data and Mpeg4 for mpeg4 data. These can be used to improve the performance
104
+ of gathering multiple metadata values from a file
85
105
  email:
86
106
  - ''
87
107
  executables: []
@@ -98,9 +118,12 @@ files:
98
118
  - LICENSE.txt
99
119
  - README.md
100
120
  - Rakefile
121
+ - dockerfile
101
122
  - file_data.gemspec
102
123
  - lib/file_data.rb
124
+ - lib/file_data/core_extensions/binary_extensions.rb
103
125
  - lib/file_data/core_extensions/enumerable_extensions.rb
126
+ - lib/file_data/file_types/file_info.rb
104
127
  - lib/file_data/file_types/jpeg.rb
105
128
  - lib/file_data/formats/exif/exif.rb
106
129
  - lib/file_data/formats/exif/exif_data.rb
@@ -111,13 +134,25 @@ files:
111
134
  - lib/file_data/formats/exif/exif_tags.rb
112
135
  - lib/file_data/formats/exif/ifd.rb
113
136
  - lib/file_data/formats/exif/ordinal_ifd.rb
137
+ - lib/file_data/formats/mpeg4/box.rb
138
+ - lib/file_data/formats/mpeg4/box_factory.rb
139
+ - lib/file_data/formats/mpeg4/box_parsers/ilst_box.rb
140
+ - lib/file_data/formats/mpeg4/box_parsers/ilst_data_box.rb
141
+ - lib/file_data/formats/mpeg4/box_parsers/keys_box.rb
142
+ - lib/file_data/formats/mpeg4/box_parsers/meta_box.rb
143
+ - lib/file_data/formats/mpeg4/box_parsers/mvhd_box.rb
144
+ - lib/file_data/formats/mpeg4/box_path.rb
145
+ - lib/file_data/formats/mpeg4/boxes_reader.rb
146
+ - lib/file_data/formats/mpeg4/mpeg4.rb
147
+ - lib/file_data/helpers/sized_field.rb
148
+ - lib/file_data/helpers/stream_view.rb
114
149
  - lib/file_data/version.rb
115
- homepage: ''
150
+ homepage: https://github.com/ScottHaney/file_data
116
151
  licenses:
117
152
  - MIT
118
153
  metadata:
119
154
  allowed_push_host: https://rubygems.org
120
- post_install_message:
155
+ post_install_message:
121
156
  rdoc_options: []
122
157
  require_paths:
123
158
  - lib
@@ -132,10 +167,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
132
167
  - !ruby/object:Gem::Version
133
168
  version: '0'
134
169
  requirements: []
135
- rubyforge_project:
136
- rubygems_version: 2.5.1
137
- signing_key:
170
+ rubygems_version: 3.2.22
171
+ signing_key:
138
172
  specification_version: 4
139
- summary: Extracts file metadata information (currently only supports exif metadata
140
- for jpeg files)
173
+ summary: Provides apis for extracting common metadata out of files as well as low
174
+ level apis for advanced metadata parsing. Currently exif (jpeg/jpg) is almost entirely
175
+ supported and mpeg4 (mp4,m4v,moov...) has limited support
141
176
  test_files: []