format_parser 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7ff294de8e6759d2705cabe93f548ffb6733a121410e1b3c9dc929de52356745
4
- data.tar.gz: 663240675efd9e8e5425f27333098e8661ef5e8dbbbd38bab91fff1e605bbefc
3
+ metadata.gz: db8e4506635f3537372a808c9ab1fae08aba28a80cd5671616be56728f472b15
4
+ data.tar.gz: 6ba88eeff2cba9e3d77cb2bf3a5af25cc73dbff384a6c92104a2f7f669313c7c
5
5
  SHA512:
6
- metadata.gz: ed7ea153adf28d2efb9352a880d9f4a146ebdad53e282f75d36c2dee2887740d727c25373d33c83dc8ab863e680e12175bb19109cbecf2347950a6fc0233c385
7
- data.tar.gz: cf6d750264fdfe5a9520c3bb9a8a02b856ba825b709435661b3fcd614f118fc3709d3ba88a25bff0a142f1d03d4fc2389f63740606ff5df4e2db837977a66488
6
+ metadata.gz: 74f4bfce599ad2a9897009b9cb4f443dc00233ac0aff1eec104f71cdb820e5e4b63cfa82ca92ac67c44e3092a307801512fc275af86d60283ea1c9c604175712
7
+ data.tar.gz: ce6b0972fa92c85d04a8a6c3cfa42d1834a1c3bb63e157021c694a8e390922f8d61d51f7072ed818805a8ac7b1358401a1a1d8ee7866c34c8698446dd6b386ed
data/README.md CHANGED
@@ -150,6 +150,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
150
150
 
151
151
  ### JPEG
152
152
  - `divergent_pixel_dimensions_exif.jpg` is used with permission from LiveKom GmbH
153
+ - `extended_reads.jpg` has kindly been made available by Raphaelle Pellerin for use exclusively with format_parser
153
154
 
154
155
  ### AIFF
155
156
  - fixture.aiff was created by one of the project maintainers and is MIT licensed
data/lib/care.rb CHANGED
@@ -4,7 +4,7 @@
4
4
  # is only available via HTTP, for example, we can have less
5
5
  # fetches and have them return more data for one fetch
6
6
  class Care
7
- DEFAULT_PAGE_SIZE = 16 * 1024
7
+ DEFAULT_PAGE_SIZE = 64 * 1024
8
8
 
9
9
  class IOWrapper
10
10
  def initialize(io, cache = Cache.new(DEFAULT_PAGE_SIZE))
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '0.5.0'
2
+ VERSION = '0.5.1'
3
3
  end
@@ -1,4 +1,5 @@
1
1
  class FormatParser::ZIPParser
2
+ UNICODE_REPLACEMENT_CHAR = [0xFFFD].pack('U')
2
3
  require_relative 'zip_parser/file_reader'
3
4
  require_relative 'zip_parser/office_formats'
4
5
 
@@ -11,7 +12,7 @@ class FormatParser::ZIPParser
11
12
  filenames_set = Set.new
12
13
  entries_archive = entries.map do |ze|
13
14
  ft = directory?(ze) ? :directory : :file
14
- decoded_filename = decode_filename(ze)
15
+ decoded_filename = decode_filename_of(ze)
15
16
  filenames_set << decoded_filename
16
17
  FormatParser::Archive::Entry.new(type: ft, size: ze.uncompressed_size, filename: decoded_filename)
17
18
  end
@@ -35,13 +36,18 @@ class FormatParser::ZIPParser
35
36
  zip_entry.filename.end_with?('/')
36
37
  end
37
38
 
38
- def decode_filename(zip_entry)
39
+ def decode_filename(filename, likely_unicode:)
40
+ filename.force_encoding(Encoding::UTF_8) if likely_unicode
41
+ filename.encode(Encoding::UTF_8, undefined: :replace, replace: UNICODE_REPLACEMENT_CHAR)
42
+ end
43
+
44
+ def decode_filename_of(zip_entry)
39
45
  # Check for the EFS bit in the general-purpose flags. If it is set,
40
46
  # the entry filename can be treated as UTF-8
41
47
  if zip_entry.gp_flags & 0b100000000000 == 0b100000000000
42
- zip_entry.filename.unpack('U*').pack('U*')
48
+ decode_filename(zip_entry.filename, likely_unicode: true)
43
49
  else
44
- zip_entry.filename.encode(Encoding::UTF_8, undefined: :replace)
50
+ decode_filename(zip_entry.filename, likely_unicode: false)
45
51
  end
46
52
  end
47
53
 
@@ -89,4 +89,14 @@ describe FormatParser::ZIPParser do
89
89
  expect(e[:type]).to be_kind_of(String)
90
90
  end
91
91
  end
92
+
93
+ it 'parses filenames in ZIP encoded in a local DOS encoding' do
94
+ spanish_zip_path = fixtures_dir + '/ZIP/broken_filename.zip'
95
+
96
+ result = subject.call(File.open(spanish_zip_path, 'rb'))
97
+
98
+ first_entry = result.entries.first
99
+ expect(first_entry.filename).to eq('Li��nia Extreme//')
100
+ expect(first_entry.type).to eq(:directory)
101
+ end
92
102
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2018-04-03 00:00:00.000000000 Z
12
+ date: 2018-04-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ks