file_data 5.0.0 → 5.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.coveralls.yml +0 -0
- data/.gitignore +5 -1
- data/.rubocop.yml +2 -2
- data/.travis.yml +0 -0
- data/Gemfile.lock +49 -6
- data/LICENSE.txt +0 -0
- data/README.md +30 -2
- data/Rakefile +0 -0
- data/dockerfile +13 -0
- data/file_data.gemspec +6 -5
- data/lib/file_data/core_extensions/binary_extensions.rb +13 -0
- data/lib/file_data/file_types/file_info.rb +33 -0
- data/lib/file_data/file_types/jpeg.rb +28 -32
- data/lib/file_data/formats/exif/exif.rb +13 -1
- data/lib/file_data/formats/exif/exif_jpeg.rb +10 -4
- data/lib/file_data/formats/exif/exif_stream.rb +8 -12
- data/lib/file_data/formats/exif/exif_tag_reader.rb +2 -2
- data/lib/file_data/formats/mpeg4/box.rb +38 -0
- data/lib/file_data/formats/mpeg4/box_factory.rb +10 -0
- data/lib/file_data/formats/mpeg4/box_parsers/ilst_box.rb +25 -0
- data/lib/file_data/formats/mpeg4/box_parsers/ilst_data_box.rb +17 -0
- data/lib/file_data/formats/mpeg4/box_parsers/keys_box.rb +24 -0
- data/lib/file_data/formats/mpeg4/box_parsers/meta_box.rb +42 -0
- data/lib/file_data/formats/mpeg4/box_parsers/mvhd_box.rb +19 -0
- data/lib/file_data/formats/mpeg4/box_path.rb +26 -0
- data/lib/file_data/formats/mpeg4/boxes_reader.rb +19 -0
- data/lib/file_data/formats/mpeg4/mpeg4.rb +30 -0
- data/lib/file_data/helpers/sized_field.rb +11 -0
- data/lib/file_data/helpers/stream_view.rb +49 -0
- data/lib/file_data/version.rb +1 -1
- data/lib/file_data.rb +3 -0
- metadata +51 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 441d9a1d1481ac066af3b8c0bcd2bae00e7b54b88c322ef251a7579ee79f270b
|
4
|
+
data.tar.gz: af5797dd26aefac7eddee225a81012b7942283bee76e5add1911d67cb5158d4b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea6f2869ffa5b4f2a54d5767b7affc853c546e65866d55d6b655a6ddfddb00f732370abeea3e431cc3a951fb8cf4789d155d9321974181b84cefe8619591db3d
|
7
|
+
data.tar.gz: efb5a79f2b6297ef5b1f85a7acd6f6d33e8b313a47e6c3cfc0b69080819d6fca5d116158341ede91c5e6b3e0a34c7f641fc27f1fd63c08c9bf28ec11f89ce15b
|
data/.coveralls.yml
CHANGED
File without changes
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -8,7 +8,7 @@ Metrics/ClassLength:
|
|
8
8
|
Max: 100
|
9
9
|
EndOfLine:
|
10
10
|
Enabled: false
|
11
|
-
|
11
|
+
Metrics/BlockLength:
|
12
12
|
Exclude:
|
13
13
|
- 'spec/**/*_spec.rb'
|
14
14
|
Style/BlockComments:
|
@@ -16,4 +16,4 @@ Style/BlockComments:
|
|
16
16
|
- 'spec/spec_helper.rb'
|
17
17
|
AllCops:
|
18
18
|
Exclude:
|
19
|
-
- 'file_data.gemspec'
|
19
|
+
- 'file_data.gemspec'
|
data/.travis.yml
CHANGED
File without changes
|
data/Gemfile.lock
CHANGED
@@ -1,22 +1,52 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
file_data (5.
|
4
|
+
file_data (5.2.3)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
+
ast (2.4.0)
|
10
|
+
backports (3.11.3)
|
11
|
+
binding_of_caller (0.8.0)
|
12
|
+
debug_inspector (>= 0.0.1)
|
13
|
+
coderay (1.1.2)
|
9
14
|
coveralls (0.8.19)
|
10
15
|
json (>= 1.8, < 3)
|
11
16
|
simplecov (~> 0.12.0)
|
12
17
|
term-ansicolor (~> 1.3)
|
13
18
|
thor (~> 0.19.1)
|
14
19
|
tins (~> 1.6)
|
20
|
+
debug_inspector (1.1.0)
|
21
|
+
deep-cover (0.6.2)
|
22
|
+
backports (>= 3.11.0)
|
23
|
+
binding_of_caller
|
24
|
+
bundler
|
25
|
+
highline
|
26
|
+
parser (~> 2.5.0)
|
27
|
+
pry
|
28
|
+
sass
|
29
|
+
slop (~> 4.0)
|
30
|
+
term-ansicolor
|
31
|
+
terminal-table
|
32
|
+
with_progress
|
15
33
|
diff-lcs (1.3)
|
16
34
|
docile (1.1.5)
|
17
35
|
fakefs (0.10.2)
|
18
|
-
|
19
|
-
|
36
|
+
ffi (1.9.25)
|
37
|
+
ffi (1.9.25-x64-mingw32)
|
38
|
+
highline (2.0.0)
|
39
|
+
json (2.6.0)
|
40
|
+
method_source (0.9.0)
|
41
|
+
parser (2.5.1.0)
|
42
|
+
ast (~> 2.4.0)
|
43
|
+
pry (0.11.3)
|
44
|
+
coderay (~> 1.1.0)
|
45
|
+
method_source (~> 0.9.0)
|
46
|
+
rake (13.0.6)
|
47
|
+
rb-fsevent (0.10.3)
|
48
|
+
rb-inotify (0.9.10)
|
49
|
+
ffi (>= 0.5.0, < 2)
|
20
50
|
rspec (3.5.0)
|
21
51
|
rspec-core (~> 3.5.0)
|
22
52
|
rspec-expectations (~> 3.5.0)
|
@@ -30,27 +60,40 @@ GEM
|
|
30
60
|
diff-lcs (>= 1.2.0, < 2.0)
|
31
61
|
rspec-support (~> 3.5.0)
|
32
62
|
rspec-support (3.5.0)
|
63
|
+
ruby-progressbar (1.9.0)
|
64
|
+
sass (3.5.6)
|
65
|
+
sass-listen (~> 4.0.0)
|
66
|
+
sass-listen (4.0.0)
|
67
|
+
rb-fsevent (~> 0.9, >= 0.9.4)
|
68
|
+
rb-inotify (~> 0.9, >= 0.9.7)
|
33
69
|
simplecov (0.12.0)
|
34
70
|
docile (~> 1.1.0)
|
35
71
|
json (>= 1.8, < 3)
|
36
72
|
simplecov-html (~> 0.10.0)
|
37
73
|
simplecov-html (0.10.0)
|
74
|
+
slop (4.6.2)
|
38
75
|
term-ansicolor (1.4.0)
|
39
76
|
tins (~> 1.0)
|
77
|
+
terminal-table (1.8.0)
|
78
|
+
unicode-display_width (~> 1.1, >= 1.1.1)
|
40
79
|
thor (0.19.4)
|
41
80
|
tins (1.13.2)
|
81
|
+
unicode-display_width (1.4.0)
|
82
|
+
with_progress (1.0.1)
|
83
|
+
ruby-progressbar (~> 1.4)
|
42
84
|
|
43
85
|
PLATFORMS
|
44
86
|
ruby
|
45
87
|
x64-mingw32
|
46
88
|
|
47
89
|
DEPENDENCIES
|
48
|
-
bundler (~>
|
90
|
+
bundler (~> 2.2)
|
49
91
|
coveralls (~> 0.8)
|
92
|
+
deep-cover (~> 0.6)
|
50
93
|
fakefs (~> 0.10)
|
51
94
|
file_data!
|
52
|
-
rake (~>
|
95
|
+
rake (~> 13.0)
|
53
96
|
rspec (~> 3.0)
|
54
97
|
|
55
98
|
BUNDLED WITH
|
56
|
-
|
99
|
+
2.2.29
|
data/LICENSE.txt
CHANGED
File without changes
|
data/README.md
CHANGED
@@ -4,12 +4,27 @@ file_data
|
|
4
4
|
[![Build Status](https://travis-ci.org/ScottHaney/file_data.svg?branch=master)](https://travis-ci.org/ScottHaney/file_data)
|
5
5
|
[![Coverage Status](https://coveralls.io/repos/github/ScottHaney/file_data/badge.svg?branch=master)](https://coveralls.io/github/ScottHaney/file_data?branch=master)
|
6
6
|
[![Code Climate](https://codeclimate.com/github/ScottHaney/file_data/badges/gpa.svg)](https://codeclimate.com/github/ScottHaney/file_data)
|
7
|
+
[![Gem Version](https://badge.fury.io/rb/file_data.svg)](https://badge.fury.io/rb/file_data)
|
7
8
|
|
8
9
|
Ruby library that reads file metadata.
|
9
10
|
|
10
|
-
|
11
|
+
The api provides a basic usage and an advanced usage. The basic usage will reopen and reparse the file every time it is called which is no problem when reading a single value but can be a performance drain for multiple values. The advanced usage allows the user to grab more than one value without having to read the file more than once.
|
11
12
|
|
12
|
-
|
13
|
+
## Basic Usage
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
filepath = '...' # Path to a jpeg or mpeg4 file
|
17
|
+
|
18
|
+
# Get the date when the file content originated. When a photo was taken, when a movie was recorded, etc
|
19
|
+
FileData::FileInfo.origin_date(filepath)
|
20
|
+
|
21
|
+
# Get the date when the file was considered to be created. This is usually tied in some way to when the file itself was created on a disk somewhere (not usually as useful as origin date)
|
22
|
+
FileData::FileInfo.creation_date(filepath)
|
23
|
+
```
|
24
|
+
|
25
|
+
## Advanced Usage
|
26
|
+
|
27
|
+
Varies by file format type. Currently there are low level classes for parsing exif and mpeg4 metadata
|
13
28
|
|
14
29
|
## Exif documentation
|
15
30
|
|
@@ -221,4 +236,17 @@ FileData::ExifTags.tag_groups[40_965] =
|
|
221
236
|
}
|
222
237
|
```
|
223
238
|
|
239
|
+
## Mpeg4 documentation
|
240
|
+
|
241
|
+
```ruby
|
242
|
+
|
243
|
+
filepath = '...' # path to an mpeg4 file
|
244
|
+
File.open(filepath, 'rb') do |stream|
|
245
|
+
parser = FileData::MvhdBoxParser # class that parses the box you want
|
246
|
+
method = :creation_time # attribute to get from the parse result
|
247
|
+
box_path = ['moov', 'mvhd'] # path to get to the box that you want
|
224
248
|
|
249
|
+
# final result that you are looking for
|
250
|
+
result = FileData::Mpeg4.get_value(stream, parser, method, *box_path)
|
251
|
+
end
|
252
|
+
```
|
data/Rakefile
CHANGED
File without changes
|
data/dockerfile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
FROM ruby:3.0.2-alpine3.13
|
2
|
+
|
3
|
+
RUN gem install bundler
|
4
|
+
|
5
|
+
RUN apk update && apk upgrade && \
|
6
|
+
apk add --no-cache bash git openssh && \
|
7
|
+
apk add --update make && \
|
8
|
+
apk add --update gcc && \
|
9
|
+
apk add libc-dev
|
10
|
+
|
11
|
+
WORKDIR /tmp/code
|
12
|
+
|
13
|
+
CMD [ "/bin/sh" ]
|
data/file_data.gemspec
CHANGED
@@ -9,9 +9,9 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Scott"]
|
10
10
|
spec.email = [""]
|
11
11
|
|
12
|
-
spec.summary = %q{
|
13
|
-
spec.description = %q{
|
14
|
-
spec.homepage = ""
|
12
|
+
spec.summary = %q{Provides apis for extracting common metadata out of files as well as low level apis for advanced metadata parsing. Currently exif (jpeg/jpg) is almost entirely supported and mpeg4 (mp4,m4v,moov...) has limited support}
|
13
|
+
spec.description = %q{Provides apis for extracting common metadata out of files as well as low level apis for advanced metadata parsing. Currently exif (jpeg/jpg) is almost entirely supported and mpeg4 (mp4,m4v,moov...) has limited support. For common metadata the FileInfo class provides methods names after the metadata items taking a filename. As an example, to get the origin date of a file you would call FileData::FileInfo.origin_date(filename). Advanced apis are provided via specific classes for each metadata type. For example, Exif for exif data and Mpeg4 for mpeg4 data. These can be used to improve the performance of gathering multiple metadata values from a file}
|
14
|
+
spec.homepage = "https://github.com/ScottHaney/file_data"
|
15
15
|
spec.license = "MIT"
|
16
16
|
|
17
17
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
@@ -30,9 +30,10 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ["lib"]
|
32
32
|
|
33
|
-
spec.add_development_dependency "bundler", "~>
|
34
|
-
spec.add_development_dependency "rake", "~>
|
33
|
+
spec.add_development_dependency "bundler", "~> 2.2"
|
34
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
35
35
|
spec.add_development_dependency "rspec", "~> 3.0"
|
36
36
|
spec.add_development_dependency "coveralls", "~> 0.8"
|
37
37
|
spec.add_development_dependency "fakefs", "~> 0.10"
|
38
|
+
spec.add_development_dependency "deep-cover", "~> 0.6"
|
38
39
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Methods for reading values from a binary stream
|
2
|
+
module BinaryExtensions
|
3
|
+
def read_value(num_bytes)
|
4
|
+
bytes = each_byte.take(num_bytes)
|
5
|
+
bytes.reverse! if @is_little_endian
|
6
|
+
|
7
|
+
bytes.inject { |total, val| (total << 8) + val }
|
8
|
+
end
|
9
|
+
|
10
|
+
def read_ascii(num_bytes)
|
11
|
+
each_byte.take(num_bytes).map(&:chr).join
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module FileData
|
2
|
+
# Operations common to all files
|
3
|
+
class FileInfo
|
4
|
+
class << self
|
5
|
+
attr_reader :info_maps
|
6
|
+
end
|
7
|
+
|
8
|
+
@info_maps ||= {}
|
9
|
+
|
10
|
+
%w[creation_date origin_date].each do |method_name|
|
11
|
+
define_singleton_method(method_name) do |filename|
|
12
|
+
File.open(filename, 'rb') do |stream|
|
13
|
+
reader = reader_class(filename)
|
14
|
+
raise "No metadata parser class found for the file #{filename}" if reader.nil?
|
15
|
+
|
16
|
+
reader_class(filename).send(method_name, stream)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.reader_class(filename)
|
22
|
+
info_maps[get_reader_key(filename)]
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.can_handle?(filename)
|
26
|
+
info_maps.key?(get_reader_key(filename))
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.get_reader_key(filename)
|
30
|
+
File.extname(filename).downcase
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -1,52 +1,48 @@
|
|
1
|
+
require_relative '../helpers/sized_field'
|
2
|
+
require_relative '../helpers/stream_view'
|
3
|
+
|
1
4
|
module FileData
|
2
5
|
# Represents a Jpeg image stream
|
3
6
|
class Jpeg
|
4
7
|
SOI_BYTES = [255, 216].freeze
|
8
|
+
EOI_BYTES = [255, 217].freeze
|
5
9
|
SECTION_HEADER_SIZE = 4
|
6
10
|
INVALID_HEADER_MSG = 'the given file is not a jpeg file since it does not'\
|
7
11
|
'begin with the start of image (SOI) bytes.'.freeze
|
8
12
|
|
9
|
-
def
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
def each_section
|
14
|
-
read_header
|
15
|
-
Enumerator.new { |e| yield_sections(e) }.lazy
|
13
|
+
def self.each_section(stream)
|
14
|
+
view = Helpers::StreamView.new(stream)
|
15
|
+
read_header(view)
|
16
|
+
Enumerator.new { |e| yield_sections(view, e) }.lazy
|
16
17
|
end
|
17
18
|
|
18
|
-
def read_header
|
19
|
-
soi =
|
19
|
+
def self.read_header(stream)
|
20
|
+
soi = stream.each_byte.take(SOI_BYTES.size)
|
20
21
|
raise INVALID_HEADER_MSG unless soi == SOI_BYTES
|
21
22
|
end
|
22
23
|
|
23
|
-
def yield_sections(
|
24
|
-
|
25
|
-
|
26
|
-
break
|
27
|
-
@stream.seek(next_section_pos)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def yield_section(e)
|
32
|
-
section_start_pos = @stream.pos + 2
|
33
|
-
marker, size = read_section_header
|
34
|
-
e.yield marker, size
|
35
|
-
section_start_pos + size
|
36
|
-
end
|
24
|
+
def self.yield_sections(stream, enumerator)
|
25
|
+
until stream.eof?
|
26
|
+
marker = stream.each_byte.take(2)
|
27
|
+
break if marker == EOI_BYTES
|
37
28
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
29
|
+
section = current_section(stream, marker)
|
30
|
+
enumerator.yield section
|
31
|
+
stream.seek(section.content_stream.end_pos + 1)
|
32
|
+
end
|
42
33
|
end
|
43
34
|
|
44
|
-
def
|
45
|
-
|
46
|
-
|
35
|
+
# def self.section_pos?(stream)
|
36
|
+
# # Make sure that there are enough bytes for a section header.
|
37
|
+
# # This also handles an ending two byte JPEG EOI sequence.
|
38
|
+
# stream.size >= SECTION_HEADER_SIZE
|
39
|
+
# end
|
47
40
|
|
48
|
-
def
|
49
|
-
|
41
|
+
def self.current_section(stream, marker)
|
42
|
+
content_stream = Helpers::SizedField.create_view(stream, 2)
|
43
|
+
JpegSection.new(marker, content_stream)
|
50
44
|
end
|
51
45
|
end
|
46
|
+
|
47
|
+
JpegSection = Struct.new(:marker, :content_stream)
|
52
48
|
end
|
@@ -1,9 +1,12 @@
|
|
1
1
|
require_relative 'exif_reader'
|
2
2
|
require_relative 'exif_jpeg'
|
3
|
+
require 'time'
|
3
4
|
|
4
5
|
module FileData
|
5
6
|
# Convenience class for extracting exif data from a file or stream
|
6
7
|
class Exif
|
8
|
+
['.jpeg', '.jpg'].each { |e| FileInfo.info_maps[e] = Exif }
|
9
|
+
|
7
10
|
# Create methods that forward to ExifReader
|
8
11
|
# Each method requires the stream as a parameter to help the user
|
9
12
|
# fall into a "pit of success" by only opening and closing
|
@@ -23,10 +26,19 @@ module FileData
|
|
23
26
|
|
24
27
|
def self.streamify(input)
|
25
28
|
if input.is_a?(String)
|
26
|
-
File.open(input, 'rb') { |f| yield f }
|
29
|
+
::File.open(input, 'rb') { |f| yield f }
|
27
30
|
else
|
28
31
|
yield input
|
29
32
|
end
|
30
33
|
end
|
34
|
+
|
35
|
+
def self.creation_date(input)
|
36
|
+
raw_tag = FileData::Exif.only_image_tag(input, [34_665, 36_867])
|
37
|
+
Time.strptime(raw_tag, '%Y:%m:%d %H:%M:%S') unless raw_tag.nil?
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.origin_date(input)
|
41
|
+
creation_date(input)
|
42
|
+
end
|
31
43
|
end
|
32
44
|
end
|
@@ -15,14 +15,20 @@ module FileData
|
|
15
15
|
ExifStream.new(@stream) if seek_exif
|
16
16
|
end
|
17
17
|
|
18
|
+
private
|
19
|
+
|
18
20
|
def seek_exif
|
19
|
-
Jpeg.
|
20
|
-
.select { |
|
21
|
+
Jpeg.each_section(@stream)
|
22
|
+
.select { |section| exif_section?(section) }
|
21
23
|
.first
|
22
24
|
end
|
23
25
|
|
24
|
-
def exif_section?(
|
25
|
-
marker == APP1_BYTES &&
|
26
|
+
def exif_section?(section)
|
27
|
+
section.marker == APP1_BYTES && read_exif_id(section)
|
28
|
+
end
|
29
|
+
|
30
|
+
def read_exif_id(section)
|
31
|
+
section.content_stream.each_byte.take(EXIF_ID.size) == EXIF_ID
|
26
32
|
end
|
27
33
|
end
|
28
34
|
end
|
@@ -1,8 +1,11 @@
|
|
1
1
|
require 'forwardable'
|
2
|
+
require_relative '../../core_extensions/binary_extensions'
|
2
3
|
|
3
4
|
module FileData
|
4
5
|
# Wraps a stream with exif specific logic
|
5
6
|
class ExifStream
|
7
|
+
include BinaryExtensions
|
8
|
+
|
6
9
|
MOTOROLLA_BYTES = 'MM'.bytes.to_a.freeze
|
7
10
|
INTEL_BYTES = 'II'.bytes.to_a.freeze
|
8
11
|
|
@@ -20,7 +23,7 @@ module FileData
|
|
20
23
|
VALUE_OFFSET_SIZE = 4
|
21
24
|
|
22
25
|
extend Forwardable
|
23
|
-
def_delegators :@stream, :seek, :pos
|
26
|
+
def_delegators :@stream, :seek, :pos, :each_byte
|
24
27
|
|
25
28
|
def initialize(stream)
|
26
29
|
@stream = stream
|
@@ -28,10 +31,10 @@ module FileData
|
|
28
31
|
end
|
29
32
|
|
30
33
|
def read_header
|
31
|
-
@
|
34
|
+
@is_little_endian =
|
32
35
|
case @stream.each_byte.take(2)
|
33
|
-
when INTEL_BYTES then
|
34
|
-
when MOTOROLLA_BYTES then
|
36
|
+
when INTEL_BYTES then true
|
37
|
+
when MOTOROLLA_BYTES then false
|
35
38
|
else raise 'the byte order bytes did not match any expected value'
|
36
39
|
end
|
37
40
|
|
@@ -63,7 +66,7 @@ module FileData
|
|
63
66
|
end
|
64
67
|
|
65
68
|
def read_undefined(size)
|
66
|
-
[read_raw_val(size), @
|
69
|
+
[read_raw_val(size), @is_little_endian]
|
67
70
|
end
|
68
71
|
|
69
72
|
def read_raw_val(size)
|
@@ -95,12 +98,5 @@ module FileData
|
|
95
98
|
def to_slong(raw_value)
|
96
99
|
-(raw_value & HIGH_BIT_MASK) + (raw_value & ~HIGH_BIT_MASK)
|
97
100
|
end
|
98
|
-
|
99
|
-
def read_value(num_bytes)
|
100
|
-
bytes = @stream.each_byte.take(num_bytes)
|
101
|
-
bytes.reverse! unless @is_big_endian
|
102
|
-
|
103
|
-
bytes.inject { |total, val| (total << 8) + val }
|
104
|
-
end
|
105
101
|
end
|
106
102
|
end
|
@@ -22,11 +22,11 @@ module FileData
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
|
-
def process_ifd(ifd,
|
25
|
+
def process_ifd(ifd, enumerator)
|
26
26
|
# Yield the tags or just skip ahead
|
27
27
|
|
28
28
|
if ifds_to_include.include?(ifd.index)
|
29
|
-
ifd.tags.each { |t|
|
29
|
+
ifd.tags.each { |t| enumerator.yield t }
|
30
30
|
else
|
31
31
|
# Avoid skipping the last ifd as this is needless work
|
32
32
|
ifd.skip unless ifd.index == 1
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require_relative '../../helpers/stream_view'
|
2
|
+
|
3
|
+
module FileData
|
4
|
+
# Mpeg4 box
|
5
|
+
class Box
|
6
|
+
attr_reader :type, :content_stream, :end_pos
|
7
|
+
|
8
|
+
def initialize(type, content_stream)
|
9
|
+
@type = type
|
10
|
+
@content_stream = content_stream
|
11
|
+
@end_pos = @content_stream.end_pos
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.parse(view)
|
15
|
+
type, pos, size = parse_header(view)
|
16
|
+
new(type, Helpers::SubStreamView.new(view.stream, pos, size))
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.parse_header(view)
|
20
|
+
start_pos = view.pos
|
21
|
+
first_field = view.read_value(4)
|
22
|
+
type = view.read_ascii(4)
|
23
|
+
|
24
|
+
total_size =
|
25
|
+
if first_field == 1
|
26
|
+
view.read_value(8)
|
27
|
+
else
|
28
|
+
first_field
|
29
|
+
end
|
30
|
+
|
31
|
+
content_pos = view.pos
|
32
|
+
header_size = content_pos - start_pos
|
33
|
+
content_size = total_size - header_size
|
34
|
+
|
35
|
+
[type, content_pos, content_size]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require_relative '../boxes_reader'
|
2
|
+
require_relative '../../../helpers/stream_view'
|
3
|
+
require_relative 'ilst_data_box'
|
4
|
+
|
5
|
+
module FileData
|
6
|
+
# Parsers for the 'ilst' box
|
7
|
+
class IlstBoxParser
|
8
|
+
def self.parse(view)
|
9
|
+
size = view.read_value(4)
|
10
|
+
index = view.read_value(4)
|
11
|
+
|
12
|
+
db = find_data_box(view, size)
|
13
|
+
data_box = db.nil? ? nil : IlstDataBoxParser.parse(db)
|
14
|
+
|
15
|
+
IlstBox.new(index, data_box)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.find_data_box(parent_view, parent_size)
|
19
|
+
view = Helpers::SubStreamView.new(parent_view.stream, parent_view.stream.pos, parent_size - 8)
|
20
|
+
BoxesReader.read(view).find { |box| box.type == 'data' }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
IlstBox = Struct.new(:index, :data_box)
|
25
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module FileData
|
2
|
+
# Parser for the 'data' box
|
3
|
+
class IlstDataBoxParser
|
4
|
+
def self.parse(box)
|
5
|
+
view = box.content_stream
|
6
|
+
|
7
|
+
# TO DO - Currently a text value is always assumed...
|
8
|
+
data_type = view.read_value(4)
|
9
|
+
locale = view.read_value(4)
|
10
|
+
value = view.read_ascii(view.remaining_bytes)
|
11
|
+
|
12
|
+
DataBox.new(data_type, locale, value)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
DataBox = Struct.new(:data_type, :locale, :value_text)
|
17
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require_relative '../../../helpers/sized_field'
|
2
|
+
|
3
|
+
module FileData
|
4
|
+
# Parser for the 'keys' box
|
5
|
+
class KeysBoxParser
|
6
|
+
def self.parse(view)
|
7
|
+
view.read_value(1) # version field
|
8
|
+
view.read_value(3) # flags field
|
9
|
+
|
10
|
+
entry_count = view.read_value(4)
|
11
|
+
Array.new(entry_count) { |index| parse_key(view, index) }
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.parse_key(view, index)
|
15
|
+
key_view = Helpers::SizedField.create_view(view, 4)
|
16
|
+
namespace = key_view.read_ascii(4)
|
17
|
+
value = key_view.read_ascii(key_view.remaining_bytes)
|
18
|
+
|
19
|
+
Key.new(index + 1, namespace, value)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Key = Struct.new(:index, :namespace, :value)
|
24
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require_relative 'keys_box'
|
2
|
+
require_relative 'ilst_box'
|
3
|
+
require_relative '../box_path'
|
4
|
+
require 'time'
|
5
|
+
|
6
|
+
module FileData
|
7
|
+
# Parser for the 'meta' box
|
8
|
+
class MetaBoxParser
|
9
|
+
def self.parse(view)
|
10
|
+
creation_key = get_creation_key(view)
|
11
|
+
return MetaBox.new(nil) if creation_key.nil?
|
12
|
+
|
13
|
+
creation_date_data = get_creation_date(view, creation_key.index)
|
14
|
+
return MetaBox.new(nil) if creation_date_data.nil?
|
15
|
+
|
16
|
+
MetaBox.new(Time.parse(creation_date_data.data_box.value_text))
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.get_creation_key(view)
|
20
|
+
kb = BoxPath.get_path(view, 'keys')
|
21
|
+
return nil if kb.nil?
|
22
|
+
|
23
|
+
keys = KeysBoxParser.parse(kb.content_stream)
|
24
|
+
keys.find { |key| key.value == 'com.apple.quicktime.creationdate' }
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.get_creation_date(view, index)
|
28
|
+
ilst_boxes = get_ilst_boxes(view)
|
29
|
+
ilst_boxes.find { |x| x.index == index }
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.get_ilst_boxes(view)
|
33
|
+
view.seek view.start_pos
|
34
|
+
box = BoxPath.get_path(view, 'ilst')
|
35
|
+
ilst_boxes = []
|
36
|
+
ilst_boxes << IlstBoxParser.parse(box.content_stream) until box.content_stream.eof?
|
37
|
+
ilst_boxes
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
MetaBox = Struct.new(:creation_date)
|
42
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module FileData
|
2
|
+
# Parser for the 'mvhd' box
|
3
|
+
class MvhdBoxParser
|
4
|
+
def self.parse(view)
|
5
|
+
MvhdBox.new(parse_mvhd_creation_date(view))
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.parse_mvhd_creation_date(view)
|
9
|
+
version = view.read_value(1)
|
10
|
+
view.read_value(3) # Flags bytes
|
11
|
+
|
12
|
+
creation_time = view.read_value(version == 1 ? 8 : 4)
|
13
|
+
epoch_delta = 2_082_844_800
|
14
|
+
Time.at(creation_time - epoch_delta)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
MvhdBox = Struct.new(:creation_time)
|
19
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require_relative 'boxes_reader'
|
2
|
+
|
3
|
+
module FileData
|
4
|
+
# Finds Mpeg4 boxes within a stream
|
5
|
+
class BoxPath
|
6
|
+
def self.get_root_path(stream, *box_path)
|
7
|
+
get_path(Helpers::StreamView.new(stream), *box_path)
|
8
|
+
end
|
9
|
+
|
10
|
+
# def self.get_box_path(box, *box_path)
|
11
|
+
# get_path(box.content_stream, *box_path)
|
12
|
+
# end
|
13
|
+
|
14
|
+
def self.get_path(stream_view, *box_path)
|
15
|
+
match = BoxesReader.read(stream_view).find { |x| x.type == box_path[0] }
|
16
|
+
|
17
|
+
if match.nil?
|
18
|
+
nil
|
19
|
+
elsif box_path.length == 1
|
20
|
+
match
|
21
|
+
else
|
22
|
+
get_path(match.content_stream, *box_path[1..-1])
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative 'box'
|
2
|
+
require_relative '../../helpers/stream_view'
|
3
|
+
|
4
|
+
module FileData
|
5
|
+
# Returns all boxes starting from the current position of a stream
|
6
|
+
class BoxesReader
|
7
|
+
def self.read(view)
|
8
|
+
Enumerator.new do |e|
|
9
|
+
view.seek view.start_pos
|
10
|
+
until view.eof?
|
11
|
+
box = Box.parse(view)
|
12
|
+
|
13
|
+
e.yield box
|
14
|
+
view.seek box.end_pos + 1
|
15
|
+
end
|
16
|
+
end.lazy
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require_relative 'box_path'
|
2
|
+
require_relative 'box_parsers/meta_box'
|
3
|
+
require_relative 'box_parsers/mvhd_box'
|
4
|
+
|
5
|
+
module FileData
|
6
|
+
# Parses and returns metadata from an Mpeg4 file
|
7
|
+
class Mpeg4
|
8
|
+
class << self
|
9
|
+
['.mp4', '.mpeg4', '.m4v', '.mov'].each { |e| FileInfo.info_maps[e] = Mpeg4 }
|
10
|
+
|
11
|
+
values = [['origin_date', MetaBoxParser,
|
12
|
+
'creation_date', 'moov', 'meta'],
|
13
|
+
['creation_date', MvhdBoxParser,
|
14
|
+
'creation_time', 'moov', 'mvhd']]
|
15
|
+
|
16
|
+
values.each do |v|
|
17
|
+
define_method(v[0]) do |stream|
|
18
|
+
get_value(*v.drop(1).unshift(stream))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.get_value(stream, parser, method, *box_path)
|
24
|
+
box = BoxPath.get_root_path(stream, *box_path)
|
25
|
+
parser.parse(box.content_stream).send(method) unless box.nil?
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
Mpeg4ValueInfo = Struct.new(:name, :parser_class, :method_name, :box_path)
|
30
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require_relative 'stream_view'
|
2
|
+
|
3
|
+
module Helpers
|
4
|
+
# Binary block that has a size equal to the value of its first field
|
5
|
+
class SizedField
|
6
|
+
def self.create_view(view, size_len)
|
7
|
+
content_size = view.read_value(size_len) - size_len
|
8
|
+
SubStreamView.new(view.stream, view.stream.pos, content_size)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require_relative '../core_extensions/binary_extensions'
|
3
|
+
|
4
|
+
module Helpers
|
5
|
+
# Abstract view of a stream
|
6
|
+
class BaseStreamView
|
7
|
+
extend Forwardable
|
8
|
+
include BinaryExtensions
|
9
|
+
|
10
|
+
attr_reader :stream, :start_pos
|
11
|
+
|
12
|
+
def initialize(stream, start_pos)
|
13
|
+
@stream = stream
|
14
|
+
@start_pos = start_pos
|
15
|
+
end
|
16
|
+
|
17
|
+
def_delegators :@stream, :seek, :each_byte, :pos
|
18
|
+
end
|
19
|
+
|
20
|
+
# View of a stream that has a specified size in bytes
|
21
|
+
class SubStreamView < BaseStreamView
|
22
|
+
attr_reader :end_pos, :size
|
23
|
+
|
24
|
+
def initialize(stream, start_pos, size)
|
25
|
+
super(stream, start_pos)
|
26
|
+
@end_pos = @start_pos + size - 1
|
27
|
+
@size = size
|
28
|
+
end
|
29
|
+
|
30
|
+
def remaining_bytes
|
31
|
+
@end_pos - pos + 1
|
32
|
+
end
|
33
|
+
|
34
|
+
def eof?
|
35
|
+
pos > @end_pos || @stream.eof?
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# View of a stream that ends when eof? is true
|
40
|
+
class StreamView < BaseStreamView
|
41
|
+
def initialize(stream)
|
42
|
+
super(stream, 0)
|
43
|
+
end
|
44
|
+
|
45
|
+
def eof?
|
46
|
+
@stream.eof?
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/file_data/version.rb
CHANGED
data/lib/file_data.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file_data
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Scott
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-10-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '2.2'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '2.2'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '13.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '13.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,8 +80,28 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0.10'
|
83
|
-
|
84
|
-
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: deep-cover
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.6'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0.6'
|
97
|
+
description: Provides apis for extracting common metadata out of files as well as
|
98
|
+
low level apis for advanced metadata parsing. Currently exif (jpeg/jpg) is almost
|
99
|
+
entirely supported and mpeg4 (mp4,m4v,moov...) has limited support. For common metadata
|
100
|
+
the FileInfo class provides methods names after the metadata items taking a filename.
|
101
|
+
As an example, to get the origin date of a file you would call FileData::FileInfo.origin_date(filename).
|
102
|
+
Advanced apis are provided via specific classes for each metadata type. For example,
|
103
|
+
Exif for exif data and Mpeg4 for mpeg4 data. These can be used to improve the performance
|
104
|
+
of gathering multiple metadata values from a file
|
85
105
|
email:
|
86
106
|
- ''
|
87
107
|
executables: []
|
@@ -98,9 +118,12 @@ files:
|
|
98
118
|
- LICENSE.txt
|
99
119
|
- README.md
|
100
120
|
- Rakefile
|
121
|
+
- dockerfile
|
101
122
|
- file_data.gemspec
|
102
123
|
- lib/file_data.rb
|
124
|
+
- lib/file_data/core_extensions/binary_extensions.rb
|
103
125
|
- lib/file_data/core_extensions/enumerable_extensions.rb
|
126
|
+
- lib/file_data/file_types/file_info.rb
|
104
127
|
- lib/file_data/file_types/jpeg.rb
|
105
128
|
- lib/file_data/formats/exif/exif.rb
|
106
129
|
- lib/file_data/formats/exif/exif_data.rb
|
@@ -111,13 +134,25 @@ files:
|
|
111
134
|
- lib/file_data/formats/exif/exif_tags.rb
|
112
135
|
- lib/file_data/formats/exif/ifd.rb
|
113
136
|
- lib/file_data/formats/exif/ordinal_ifd.rb
|
137
|
+
- lib/file_data/formats/mpeg4/box.rb
|
138
|
+
- lib/file_data/formats/mpeg4/box_factory.rb
|
139
|
+
- lib/file_data/formats/mpeg4/box_parsers/ilst_box.rb
|
140
|
+
- lib/file_data/formats/mpeg4/box_parsers/ilst_data_box.rb
|
141
|
+
- lib/file_data/formats/mpeg4/box_parsers/keys_box.rb
|
142
|
+
- lib/file_data/formats/mpeg4/box_parsers/meta_box.rb
|
143
|
+
- lib/file_data/formats/mpeg4/box_parsers/mvhd_box.rb
|
144
|
+
- lib/file_data/formats/mpeg4/box_path.rb
|
145
|
+
- lib/file_data/formats/mpeg4/boxes_reader.rb
|
146
|
+
- lib/file_data/formats/mpeg4/mpeg4.rb
|
147
|
+
- lib/file_data/helpers/sized_field.rb
|
148
|
+
- lib/file_data/helpers/stream_view.rb
|
114
149
|
- lib/file_data/version.rb
|
115
|
-
homepage:
|
150
|
+
homepage: https://github.com/ScottHaney/file_data
|
116
151
|
licenses:
|
117
152
|
- MIT
|
118
153
|
metadata:
|
119
154
|
allowed_push_host: https://rubygems.org
|
120
|
-
post_install_message:
|
155
|
+
post_install_message:
|
121
156
|
rdoc_options: []
|
122
157
|
require_paths:
|
123
158
|
- lib
|
@@ -132,10 +167,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
132
167
|
- !ruby/object:Gem::Version
|
133
168
|
version: '0'
|
134
169
|
requirements: []
|
135
|
-
|
136
|
-
|
137
|
-
signing_key:
|
170
|
+
rubygems_version: 3.2.22
|
171
|
+
signing_key:
|
138
172
|
specification_version: 4
|
139
|
-
summary:
|
140
|
-
for jpeg
|
173
|
+
summary: Provides apis for extracting common metadata out of files as well as low
|
174
|
+
level apis for advanced metadata parsing. Currently exif (jpeg/jpg) is almost entirely
|
175
|
+
supported and mpeg4 (mp4,m4v,moov...) has limited support
|
141
176
|
test_files: []
|