format_parser 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7ff294de8e6759d2705cabe93f548ffb6733a121410e1b3c9dc929de52356745
4
- data.tar.gz: 663240675efd9e8e5425f27333098e8661ef5e8dbbbd38bab91fff1e605bbefc
3
+ metadata.gz: db8e4506635f3537372a808c9ab1fae08aba28a80cd5671616be56728f472b15
4
+ data.tar.gz: 6ba88eeff2cba9e3d77cb2bf3a5af25cc73dbff384a6c92104a2f7f669313c7c
5
5
  SHA512:
6
- metadata.gz: ed7ea153adf28d2efb9352a880d9f4a146ebdad53e282f75d36c2dee2887740d727c25373d33c83dc8ab863e680e12175bb19109cbecf2347950a6fc0233c385
7
- data.tar.gz: cf6d750264fdfe5a9520c3bb9a8a02b856ba825b709435661b3fcd614f118fc3709d3ba88a25bff0a142f1d03d4fc2389f63740606ff5df4e2db837977a66488
6
+ metadata.gz: 74f4bfce599ad2a9897009b9cb4f443dc00233ac0aff1eec104f71cdb820e5e4b63cfa82ca92ac67c44e3092a307801512fc275af86d60283ea1c9c604175712
7
+ data.tar.gz: ce6b0972fa92c85d04a8a6c3cfa42d1834a1c3bb63e157021c694a8e390922f8d61d51f7072ed818805a8ac7b1358401a1a1d8ee7866c34c8698446dd6b386ed
data/README.md CHANGED
@@ -150,6 +150,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
150
150
 
151
151
  ### JPEG
152
152
  - `divergent_pixel_dimensions_exif.jpg` is used with permission from LiveKom GmbH
153
+ - `extended_reads.jpg` has kindly been made available by Raphaelle Pellerin for use exclusively with format_parser
153
154
 
154
155
  ### AIFF
155
156
  - fixture.aiff was created by one of the project maintainers and is MIT licensed
data/lib/care.rb CHANGED
@@ -4,7 +4,7 @@
4
4
  # is only available via HTTP, for example, we can have less
5
5
  # fetches and have them return more data for one fetch
6
6
  class Care
7
- DEFAULT_PAGE_SIZE = 16 * 1024
7
+ DEFAULT_PAGE_SIZE = 64 * 1024
8
8
 
9
9
  class IOWrapper
10
10
  def initialize(io, cache = Cache.new(DEFAULT_PAGE_SIZE))
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.5.0'
2
+ VERSION = '0.5.1'
3
3
  end
@@ -1,4 +1,5 @@
1
1
  class FormatParser::ZIPParser
2
+ UNICODE_REPLACEMENT_CHAR = [0xFFFD].pack('U')
2
3
  require_relative 'zip_parser/file_reader'
3
4
  require_relative 'zip_parser/office_formats'
4
5
 
@@ -11,7 +12,7 @@ class FormatParser::ZIPParser
11
12
  filenames_set = Set.new
12
13
  entries_archive = entries.map do |ze|
13
14
  ft = directory?(ze) ? :directory : :file
14
- decoded_filename = decode_filename(ze)
15
+ decoded_filename = decode_filename_of(ze)
15
16
  filenames_set << decoded_filename
16
17
  FormatParser::Archive::Entry.new(type: ft, size: ze.uncompressed_size, filename: decoded_filename)
17
18
  end
@@ -35,13 +36,18 @@ class FormatParser::ZIPParser
35
36
  zip_entry.filename.end_with?('/')
36
37
  end
37
38
 
38
- def decode_filename(zip_entry)
39
+ def decode_filename(filename, likely_unicode:)
40
+ filename.force_encoding(Encoding::UTF_8) if likely_unicode
41
+ filename.encode(Encoding::UTF_8, undefined: :replace, replace: UNICODE_REPLACEMENT_CHAR)
42
+ end
43
+
44
+ def decode_filename_of(zip_entry)
39
45
  # Check for the EFS bit in the general-purpose flags. If it is set,
40
46
  # the entry filename can be treated as UTF-8
41
47
  if zip_entry.gp_flags & 0b100000000000 == 0b100000000000
42
- zip_entry.filename.unpack('U*').pack('U*')
48
+ decode_filename(zip_entry.filename, likely_unicode: true)
43
49
  else
44
- zip_entry.filename.encode(Encoding::UTF_8, undefined: :replace)
50
+ decode_filename(zip_entry.filename, likely_unicode: false)
45
51
  end
46
52
  end
47
53
 
@@ -89,4 +89,14 @@ describe FormatParser::ZIPParser do
89
89
  expect(e[:type]).to be_kind_of(String)
90
90
  end
91
91
  end
92
+
93
+ it 'parses filenames in ZIP encoded in a local DOS encoding' do
94
+ spanish_zip_path = fixtures_dir + '/ZIP/broken_filename.zip'
95
+
96
+ result = subject.call(File.open(spanish_zip_path, 'rb'))
97
+
98
+ first_entry = result.entries.first
99
+ expect(first_entry.filename).to eq('Li��nia Extreme//')
100
+ expect(first_entry.type).to eq(:directory)
101
+ end
92
102
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2018-04-03 00:00:00.000000000 Z
12
+ date: 2018-04-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks