format_parser 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/care.rb +1 -1
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/zip_parser.rb +10 -4
- data/spec/parsers/zip_parser_spec.rb +10 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db8e4506635f3537372a808c9ab1fae08aba28a80cd5671616be56728f472b15
|
4
|
+
data.tar.gz: 6ba88eeff2cba9e3d77cb2bf3a5af25cc73dbff384a6c92104a2f7f669313c7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74f4bfce599ad2a9897009b9cb4f443dc00233ac0aff1eec104f71cdb820e5e4b63cfa82ca92ac67c44e3092a307801512fc275af86d60283ea1c9c604175712
|
7
|
+
data.tar.gz: ce6b0972fa92c85d04a8a6c3cfa42d1834a1c3bb63e157021c694a8e390922f8d61d51f7072ed818805a8ac7b1358401a1a1d8ee7866c34c8698446dd6b386ed
|
data/README.md
CHANGED
@@ -150,6 +150,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
150
150
|
|
151
151
|
### JPEG
|
152
152
|
- `divergent_pixel_dimensions_exif.jpg` is used with permission from LiveKom GmbH
|
153
|
+
- `extended_reads.jpg` has kindly been made available by Raphaelle Pellerin for use exclusively with format_parser
|
153
154
|
|
154
155
|
### AIFF
|
155
156
|
- fixture.aiff was created by one of the project maintainers and is MIT licensed
|
data/lib/care.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# is only available via HTTP, for example, we can have less
|
5
5
|
# fetches and have them return more data for one fetch
|
6
6
|
class Care
|
7
|
-
DEFAULT_PAGE_SIZE =
|
7
|
+
DEFAULT_PAGE_SIZE = 64 * 1024
|
8
8
|
|
9
9
|
class IOWrapper
|
10
10
|
def initialize(io, cache = Cache.new(DEFAULT_PAGE_SIZE))
|
data/lib/parsers/zip_parser.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
class FormatParser::ZIPParser
|
2
|
+
UNICODE_REPLACEMENT_CHAR = [0xFFFD].pack('U')
|
2
3
|
require_relative 'zip_parser/file_reader'
|
3
4
|
require_relative 'zip_parser/office_formats'
|
4
5
|
|
@@ -11,7 +12,7 @@ class FormatParser::ZIPParser
|
|
11
12
|
filenames_set = Set.new
|
12
13
|
entries_archive = entries.map do |ze|
|
13
14
|
ft = directory?(ze) ? :directory : :file
|
14
|
-
decoded_filename =
|
15
|
+
decoded_filename = decode_filename_of(ze)
|
15
16
|
filenames_set << decoded_filename
|
16
17
|
FormatParser::Archive::Entry.new(type: ft, size: ze.uncompressed_size, filename: decoded_filename)
|
17
18
|
end
|
@@ -35,13 +36,18 @@ class FormatParser::ZIPParser
|
|
35
36
|
zip_entry.filename.end_with?('/')
|
36
37
|
end
|
37
38
|
|
38
|
-
def decode_filename(
|
39
|
+
def decode_filename(filename, likely_unicode:)
|
40
|
+
filename.force_encoding(Encoding::UTF_8) if likely_unicode
|
41
|
+
filename.encode(Encoding::UTF_8, undefined: :replace, replace: UNICODE_REPLACEMENT_CHAR)
|
42
|
+
end
|
43
|
+
|
44
|
+
def decode_filename_of(zip_entry)
|
39
45
|
# Check for the EFS bit in the general-purpose flags. If it is set,
|
40
46
|
# the entry filename can be treated as UTF-8
|
41
47
|
if zip_entry.gp_flags & 0b100000000000 == 0b100000000000
|
42
|
-
zip_entry.filename
|
48
|
+
decode_filename(zip_entry.filename, likely_unicode: true)
|
43
49
|
else
|
44
|
-
zip_entry.filename
|
50
|
+
decode_filename(zip_entry.filename, likely_unicode: false)
|
45
51
|
end
|
46
52
|
end
|
47
53
|
|
@@ -89,4 +89,14 @@ describe FormatParser::ZIPParser do
|
|
89
89
|
expect(e[:type]).to be_kind_of(String)
|
90
90
|
end
|
91
91
|
end
|
92
|
+
|
93
|
+
it 'parses filenames in ZIP encoded in a local DOS encoding' do
|
94
|
+
spanish_zip_path = fixtures_dir + '/ZIP/broken_filename.zip'
|
95
|
+
|
96
|
+
result = subject.call(File.open(spanish_zip_path, 'rb'))
|
97
|
+
|
98
|
+
first_entry = result.entries.first
|
99
|
+
expect(first_entry.filename).to eq('Li��nia Extreme//')
|
100
|
+
expect(first_entry.type).to eq(:directory)
|
101
|
+
end
|
92
102
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-04-
|
12
|
+
date: 2018-04-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|