format_parser 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/care.rb +1 -1
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/zip_parser.rb +10 -4
- data/spec/parsers/zip_parser_spec.rb +10 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db8e4506635f3537372a808c9ab1fae08aba28a80cd5671616be56728f472b15
|
4
|
+
data.tar.gz: 6ba88eeff2cba9e3d77cb2bf3a5af25cc73dbff384a6c92104a2f7f669313c7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74f4bfce599ad2a9897009b9cb4f443dc00233ac0aff1eec104f71cdb820e5e4b63cfa82ca92ac67c44e3092a307801512fc275af86d60283ea1c9c604175712
|
7
|
+
data.tar.gz: ce6b0972fa92c85d04a8a6c3cfa42d1834a1c3bb63e157021c694a8e390922f8d61d51f7072ed818805a8ac7b1358401a1a1d8ee7866c34c8698446dd6b386ed
|
data/README.md
CHANGED
@@ -150,6 +150,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
150
150
|
|
151
151
|
### JPEG
|
152
152
|
- `divergent_pixel_dimensions_exif.jpg` is used with permission from LiveKom GmbH
|
153
|
+
- `extended_reads.jpg` has kindly been made available by Raphaelle Pellerin for use exclusively with format_parser
|
153
154
|
|
154
155
|
### AIFF
|
155
156
|
- fixture.aiff was created by one of the project maintainers and is MIT licensed
|
data/lib/care.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# is only available via HTTP, for example, we can have less
|
5
5
|
# fetches and have them return more data for one fetch
|
6
6
|
class Care
|
7
|
-
DEFAULT_PAGE_SIZE =
|
7
|
+
DEFAULT_PAGE_SIZE = 64 * 1024
|
8
8
|
|
9
9
|
class IOWrapper
|
10
10
|
def initialize(io, cache = Cache.new(DEFAULT_PAGE_SIZE))
|
data/lib/parsers/zip_parser.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
class FormatParser::ZIPParser
|
2
|
+
UNICODE_REPLACEMENT_CHAR = [0xFFFD].pack('U')
|
2
3
|
require_relative 'zip_parser/file_reader'
|
3
4
|
require_relative 'zip_parser/office_formats'
|
4
5
|
|
@@ -11,7 +12,7 @@ class FormatParser::ZIPParser
|
|
11
12
|
filenames_set = Set.new
|
12
13
|
entries_archive = entries.map do |ze|
|
13
14
|
ft = directory?(ze) ? :directory : :file
|
14
|
-
decoded_filename =
|
15
|
+
decoded_filename = decode_filename_of(ze)
|
15
16
|
filenames_set << decoded_filename
|
16
17
|
FormatParser::Archive::Entry.new(type: ft, size: ze.uncompressed_size, filename: decoded_filename)
|
17
18
|
end
|
@@ -35,13 +36,18 @@ class FormatParser::ZIPParser
|
|
35
36
|
zip_entry.filename.end_with?('/')
|
36
37
|
end
|
37
38
|
|
38
|
-
def decode_filename(
|
39
|
+
def decode_filename(filename, likely_unicode:)
|
40
|
+
filename.force_encoding(Encoding::UTF_8) if likely_unicode
|
41
|
+
filename.encode(Encoding::UTF_8, undefined: :replace, replace: UNICODE_REPLACEMENT_CHAR)
|
42
|
+
end
|
43
|
+
|
44
|
+
def decode_filename_of(zip_entry)
|
39
45
|
# Check for the EFS bit in the general-purpose flags. If it is set,
|
40
46
|
# the entry filename can be treated as UTF-8
|
41
47
|
if zip_entry.gp_flags & 0b100000000000 == 0b100000000000
|
42
|
-
zip_entry.filename
|
48
|
+
decode_filename(zip_entry.filename, likely_unicode: true)
|
43
49
|
else
|
44
|
-
zip_entry.filename
|
50
|
+
decode_filename(zip_entry.filename, likely_unicode: false)
|
45
51
|
end
|
46
52
|
end
|
47
53
|
|
@@ -89,4 +89,14 @@ describe FormatParser::ZIPParser do
|
|
89
89
|
expect(e[:type]).to be_kind_of(String)
|
90
90
|
end
|
91
91
|
end
|
92
|
+
|
93
|
+
it 'parses filenames in ZIP encoded in a local DOS encoding' do
|
94
|
+
spanish_zip_path = fixtures_dir + '/ZIP/broken_filename.zip'
|
95
|
+
|
96
|
+
result = subject.call(File.open(spanish_zip_path, 'rb'))
|
97
|
+
|
98
|
+
first_entry = result.entries.first
|
99
|
+
expect(first_entry.filename).to eq('Li��nia Extreme//')
|
100
|
+
expect(first_entry.type).to eq(:directory)
|
101
|
+
end
|
92
102
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-04-
|
12
|
+
date: 2018-04-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|