hexapdf 0.14.0 → 0.14.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0cea16b918ff9aa6e7b32759295ef4ab38c899bcbd227d76ad42e0c971360239
4
- data.tar.gz: 932c5edf01114a59d0a64776f304e29f3c8865a2c2c52c340064180464aabad7
3
+ metadata.gz: e4010e277168cec5c8cc5d584ec324064461e63756d18b538cd335235fe04e6d
4
+ data.tar.gz: 2b7a71463082a32605adee682c81cdde6b0eb48d360ca66249b08884f82e571b
5
5
  SHA512:
6
- metadata.gz: 5883c5788487830b0403459b38b4ed1761c1015688977e9823f3c572f1ad645b06eb0578b185ce26f7f02c560050dd8ec7c09e8524b59cd35df4fd6abd1fb4aa
7
- data.tar.gz: cdda51a089c86f27319fe424c9a74dc599ed60860338ef49958cd6a820141fa87a0624f2c657565e3f1b4a2392300807b89886178da6af62d22fa03fb543e372
6
+ metadata.gz: 5748273dc4dc532cd365598e25c4a9cc5872011d2eb638c2986050aeed0a68d2dc5769fda075eb60cbcb76fccbfb1a5b52c3c58581cb6e969978c17d770013e6
7
+ data.tar.gz: 0ab3abf80967804486fa1f50f186b508fd792acfbd8c47646fa7d0c5b0245161e2833620142b2f05a1ee73b01145016dca7bf7781d579284160c9d2dd2c78d0c
@@ -1,3 +1,20 @@
1
+ ## 0.14.1 - 2021-01-21
2
+
3
+ ### Changed
4
+
5
+ * Validation message when checking for allowed values to include the invalid
6
+ object
7
+ * [HexaPDF::FontLoader::FromFile] to allow (re)using an existing font object
8
+ * [HexaPDF::Importer] internals to avoid problems with retained memory
9
+
10
+ ### Fixed
11
+
12
+ * Parsing of invalid PDF files where whitespace is missing after the integer
13
+ value of an indirect object
14
+ * [HexaPDF::Dictionary] so that adding new key-value pairs during validation is
15
+ possible
16
+
17
+
1
18
  ## 0.14.0 - 2020-12-30
2
19
 
3
20
  ### Added
@@ -155,6 +155,9 @@ module HexaPDF
155
155
  # available (see ::define_field).
156
156
  #
157
157
  # * Returns the default value if one is specified and no value is available.
158
+ #
159
+ # Note: This method may throw a "can't add a new key into hash during iteration" error in
160
+ # certain cases because it potentially modifies the underlying hash!
158
161
  def [](name)
159
162
  field = self.class.field(name)
160
163
  data = if key?(name)
@@ -255,7 +258,7 @@ module HexaPDF
255
258
 
256
259
  # Iterates over all currently set fields and those that are required.
257
260
  def each_set_key_or_required_field #:yields: name, field
258
- value.each_key {|name| yield(name, self.class.field(name)) }
261
+ value.keys.each {|name| yield(name, self.class.field(name)) }
259
262
  self.class.each_field do |name, field|
260
263
  yield(name, field) if field.required? && !value.key?(name)
261
264
  end
@@ -301,7 +304,7 @@ module HexaPDF
301
304
 
302
305
  # Check the value of the field against the allowed values.
303
306
  if field.allowed_values && !field.allowed_values.include?(obj)
304
- yield("Field #{name} does not contain an allowed value")
307
+ yield("Field #{name} does not contain an allowed value: #{obj.inspect}")
305
308
  end
306
309
 
307
310
  # Check if field value needs to be (in)direct
@@ -63,6 +63,12 @@ module HexaPDF
63
63
  def use_glyph(glyph_id)
64
64
  return @glyph_map[glyph_id] if @glyph_map.key?(glyph_id)
65
65
  @last_id += 1
66
+ # Handle codes for ASCII characters \r, (, ) and \ specially so that they never appear in
67
+ # the output (PDF serialization would need to escape them)
68
+ if @last_id == 13 || @last_id == 40 || @last_id == 41 || @last_id == 92
69
+ @glyph_map[:"s#{@last_id}"] = @last_id
70
+ @last_id += 1
71
+ end
66
72
  @glyph_map[glyph_id] = @last_id
67
73
  end
68
74
 
@@ -107,7 +113,7 @@ module HexaPDF
107
113
  locations = []
108
114
 
109
115
  @glyph_map.each_key do |old_gid|
110
- glyph = orig_glyf[old_gid]
116
+ glyph = orig_glyf[old_gid.kind_of?(Symbol) ? 0 : old_gid]
111
117
  locations << table.size
112
118
  data = glyph.raw_data
113
119
  if glyph.compound?
@@ -134,7 +140,7 @@ module HexaPDF
134
140
  hmtx = @font[:hmtx]
135
141
  data = ''.b
136
142
  @glyph_map.each_key do |old_gid|
137
- metric = hmtx[old_gid]
143
+ metric = hmtx[old_gid.kind_of?(Symbol) ? 0 : old_gid]
138
144
  data << [metric.advance_width, metric.left_side_bearing].pack('n2')
139
145
  end
140
146
  data
@@ -166,7 +172,10 @@ module HexaPDF
166
172
  # Adds the components of compound glyphs to the subset.
167
173
  def add_glyph_components
168
174
  glyf = @font[:glyf]
169
- @glyph_map.keys.each {|gid| glyf[gid].components&.each {|cgid| use_glyph(cgid) } }
175
+ @glyph_map.keys.each do |gid|
176
+ next if gid.kind_of?(Symbol)
177
+ glyf[gid].components&.each {|cgid| use_glyph(cgid) }
178
+ end
170
179
  end
171
180
 
172
181
  end
@@ -99,18 +99,23 @@ module HexaPDF
99
99
  @max_mem_type42, @min_mem_type1, @max_mem_type1 = read_formatted(24, 's>2N5')
100
100
 
101
101
  sub_table_length = directory_entry.length - 32
102
- @glyph_names = case @format
103
- when 1 then Format1.parse(io, sub_table_length)
104
- when 2 then Format2.parse(io, sub_table_length)
105
- when 3 then Format3.parse(io, sub_table_length)
106
- when 4 then Format4.parse(io, sub_table_length)
107
- else
108
- if font.config['font.true_type.unknown_format'] == :raise
109
- raise HexaPDF::Error, "Unsupported post table format: #{@format}"
102
+ cur_pos = io.pos
103
+ @glyph_names = lambda do |glyph_id|
104
+ io.pos = cur_pos
105
+ @glyph_names = case @format
106
+ when 1 then Format1.parse(io, sub_table_length)
107
+ when 2 then Format2.parse(io, sub_table_length)
108
+ when 3 then Format3.parse(io, sub_table_length)
109
+ when 4 then Format4.parse(io, sub_table_length)
110
110
  else
111
- []
111
+ if font.config['font.true_type.unknown_format'] == :raise
112
+ raise HexaPDF::Error, "Unsupported post table format: #{@format}"
113
+ else
114
+ []
115
+ end
112
116
  end
113
- end
117
+ @glyph_names[glyph_id]
118
+ end
114
119
  end
115
120
 
116
121
  # 'post' table format 1
@@ -63,8 +63,8 @@ module HexaPDF
63
63
  file = document.config['font.map'].dig(name, variant)
64
64
  return nil if file.nil?
65
65
 
66
- unless File.file?(file)
67
- raise HexaPDF::Error, "The configured font file #{file} does not exist"
66
+ unless file.kind_of?(HexaPDF::Font::TrueType::Font) || File.file?(file)
67
+ raise HexaPDF::Error, "The configured font file #{file} is not a valid value"
68
68
  end
69
69
  FromFile.call(document, file, subset: subset)
70
70
  end
@@ -39,26 +39,36 @@ require 'hexapdf/font/true_type_wrapper'
39
39
  module HexaPDF
40
40
  module FontLoader
41
41
 
42
- # This module interprets the font name as file name and tries to load it.
42
+ # This module interprets the font name either as file name and tries to load it, or as font
43
+ # object to be wrapped directly.
43
44
  module FromFile
44
45
 
45
- # Loads the given font by interpreting the font name as file name.
46
+ # :call-seq:
47
+ # FromFile.call(document, file_name, subset: true, **) -> wrapped_font
48
+ # FromFile.call(document, font_object, subset: true, **) -> wrapped_font
46
49
  #
47
- # The file object representing the font file is *not* closed and if needed must be closed by
48
- # the caller once the font is not needed anymore.
50
+ # Returns an appropriate font wrapper for the given file name or font object.
51
+ #
52
+ # If a file name is given, the file object representing the font file is *not* closed and if
53
+ # needed must be closed by the caller once the font is not needed anymore.
54
+ #
55
+ # The first form using a file name is easier to use in one-off cases. However, if multiple
56
+ # documents always refer to the same font, the second form is better to avoid re-parsing the
57
+ # font file.
49
58
  #
50
59
  # +document+::
51
60
  # The PDF document to associate the font object with.
52
61
  #
53
- # +name+::
54
- # The file name.
62
+ # +file_name+/+font_object+::
63
+ # The file name or TrueType font object.
55
64
  #
56
65
  # +subset+::
57
66
  # Specifies whether the font should be subset if possible.
58
67
  def self.call(document, name, subset: true, **)
59
- return nil unless File.file?(name)
68
+ is_font = name.kind_of?(HexaPDF::Font::TrueType::Font)
69
+ return nil unless is_font || File.file?(name)
60
70
 
61
- font = HexaPDF::Font::TrueType::Font.new(File.open(name, 'rb'))
71
+ font = is_font ? name : HexaPDF::Font::TrueType::Font.new(File.open(name, 'rb'))
62
72
  HexaPDF::Font::TrueTypeWrapper.new(document, font, subset: subset)
63
73
  end
64
74
 
@@ -90,7 +90,7 @@ module HexaPDF
90
90
  #
91
91
  # An error is raised if the object doesn't belong to the +source+ document.
92
92
  def import(object)
93
- mapped_object = @mapper[object.data] if object.kind_of?(HexaPDF::Object)
93
+ mapped_object = @mapper[object.data]&.__getobj__ if object.kind_of?(HexaPDF::Object)
94
94
  if object.kind_of?(HexaPDF::Object) && object.document? && @source != object.document
95
95
  raise HexaPDF::Error, "Import error: Incorrect document object for importer"
96
96
  elsif mapped_object && mapped_object == @destination.object(mapped_object)
@@ -118,7 +118,8 @@ module HexaPDF
118
118
  if object.type == :Catalog || object.type == :Pages
119
119
  @mapper[object.data] = nil
120
120
  else
121
- obj = @mapper[object.data] = object.dup
121
+ obj = object.dup
122
+ @mapper[object.data] = NullableWeakRef.new(obj)
122
123
  obj.document = @destination.__getobj__
123
124
  obj.instance_variable_set(:@data, obj.data.dup)
124
125
  obj.data.oid = 0
@@ -113,7 +113,15 @@ module HexaPDF
113
113
  maybe_raise("No indirect object value between 'obj' and 'endobj'", pos: @tokenizer.pos)
114
114
  object = nil
115
115
  else
116
- object = @tokenizer.next_object
116
+ begin
117
+ object = @tokenizer.next_object
118
+ rescue MalformedPDFError
119
+ # Handle often found invalid indirect object with missing whitespace after number
120
+ maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
121
+ force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
122
+ object = tok.to_i
123
+ @tokenizer.pos -= 6
124
+ end
117
125
  end
118
126
 
119
127
  tok = @tokenizer.next_token
@@ -66,7 +66,7 @@ module HexaPDF
66
66
  # subclasses of HexaPDF::Object are returned as is (it makes no sense, for example, to return
67
67
  # the hash that describes the Catalog instead of the Catalog object).
68
68
  def [](arg1, arg2 = nil)
69
- data = value[arg1, *arg2]
69
+ data = arg2 ? value[arg1, arg2] : value[arg1]
70
70
  return if data.nil?
71
71
 
72
72
  if arg2 || arg1.kind_of?(Range)
@@ -88,13 +88,39 @@ module HexaPDF
88
88
 
89
89
  # Creates a new Serializer object.
90
90
  def initialize
91
- @dispatcher = Hash.new do |h, klass|
92
- method = nil
93
- klass.ancestors.each do |ancestor_klass|
94
- method = "serialize_#{ancestor_klass.name.to_s.downcase.gsub(/::/, '_')}"
95
- (h[klass] = method; break) if respond_to?(method, true)
96
- end
97
- method
91
+ @dispatcher = {
92
+ Hash => 'serialize_hash',
93
+ Array => 'serialize_array',
94
+ Symbol => 'serialize_symbol',
95
+ String => 'serialize_string',
96
+ Integer => 'serialize_integer',
97
+ Float => 'serialize_float',
98
+ Time => 'serialize_time',
99
+ TrueClass => 'serialize_trueclass',
100
+ FalseClass => 'serialize_falseclass',
101
+ NilClass => 'serialize_nilclass',
102
+ HexaPDF::Reference => 'serialize_hexapdf_reference',
103
+ HexaPDF::Object => 'serialize_hexapdf_object',
104
+ HexaPDF::Stream => 'serialize_hexapdf_stream',
105
+ HexaPDF::Dictionary => 'serialize_hexapdf_object',
106
+ HexaPDF::PDFArray => 'serialize_hexapdf_object',
107
+ HexaPDF::Rectangle => 'serialize_hexapdf_object',
108
+ }
109
+ @dispatcher.default_proc = lambda do |h, klass|
110
+ h[klass] = if klass <= HexaPDF::Stream
111
+ "serialize_hexapdf_stream"
112
+ elsif klass <= HexaPDF::Object
113
+ "serialize_hexapdf_object"
114
+ else
115
+ method = nil
116
+ klass.ancestors.each do |ancestor_klass|
117
+ name = ancestor_klass.name.to_s.downcase
118
+ name.gsub!(/::/, '_')
119
+ method = "serialize_#{name}"
120
+ break if respond_to?(method, true)
121
+ end
122
+ method
123
+ end
98
124
  end
99
125
  @encrypter = false
100
126
  @io = nil
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.14.0'
40
+ VERSION = '0.14.1'
41
41
 
42
42
  end
@@ -72,7 +72,7 @@ describe HexaPDF::Font::TrueType::Table::Post do
72
72
  assert_equal('.notdef', table[0])
73
73
 
74
74
  @font.config['font.true_type.unknown_format'] = :raise
75
- assert_raises(HexaPDF::Error) { create_table(:Post) }
75
+ assert_raises(HexaPDF::Error) { create_table(:Post)[0] }
76
76
  end
77
77
  end
78
78
  end
@@ -27,6 +27,11 @@ describe HexaPDF::Font::TrueType::Subsetter do
27
27
  assert_equal(value, @subsetter.subset_glyph_id(5))
28
28
  end
29
29
 
30
+ it "doesn't use certain subset glyph IDs for performance reasons" do
31
+ 1.upto(13) {|i| @subsetter.use_glyph(i) }
32
+ assert_equal(14, @subsetter.subset_glyph_id(13))
33
+ end
34
+
30
35
  it "creates the subset font file" do
31
36
  gid = @font[:cmap].preferred_table[0x41]
32
37
  @subsetter.use_glyph(gid)
@@ -8,13 +8,17 @@ describe HexaPDF::FontLoader::FromConfiguration do
8
8
  before do
9
9
  @doc = HexaPDF::Document.new
10
10
  font_file = File.join(TEST_DATA_DIR, "fonts", "Ubuntu-Title.ttf")
11
- @doc.config['font.map'] = {'font' => {none: font_file}}
11
+ @font_obj = HexaPDF::Font::TrueType::Font.new(File.open(font_file, 'rb'))
12
+ @doc.config['font.map'] = {'font' => {none: font_file}, 'font1' => {none: @font_obj}}
12
13
  @klass = HexaPDF::FontLoader::FromConfiguration
13
14
  end
14
15
 
15
16
  it "loads the configured font" do
16
17
  wrapper = @klass.call(@doc, "font")
17
18
  assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
19
+ wrapper = @klass.call(@doc, "font1")
20
+ assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
21
+ assert_same(@font_obj, wrapper.wrapped_font)
18
22
  end
19
23
 
20
24
  it "passes the subset value to the wrapper" do
@@ -24,7 +28,7 @@ describe HexaPDF::FontLoader::FromConfiguration do
24
28
  refute(wrapper.subset?)
25
29
  end
26
30
 
27
- it "fails if the font file cannot be read" do
31
+ it "fails if the provided font is invalid" do
28
32
  @doc.config['font.map']['font'][:none] << "unknown"
29
33
  assert_raises(HexaPDF::Error) { @klass.call(@doc, "font") }
30
34
  end
@@ -34,6 +38,6 @@ describe HexaPDF::FontLoader::FromConfiguration do
34
38
  end
35
39
 
36
40
  it "returns a hash with all configured fonts" do
37
- assert_equal({'font' => [:none]}, @klass.available_fonts(@doc))
41
+ assert_equal({'font' => [:none], 'font1' => [:none]}, @klass.available_fonts(@doc))
38
42
  end
39
43
  end
@@ -16,6 +16,13 @@ describe HexaPDF::FontLoader::FromFile do
16
16
  assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
17
17
  end
18
18
 
19
+ it "loads the specified font object" do
20
+ font = HexaPDF::Font::TrueType::Font.new(File.open(@font_file, 'rb'))
21
+ wrapper = @klass.call(@doc, font)
22
+ assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
23
+ assert_same(font, wrapper.wrapped_font)
24
+ end
25
+
19
26
  it "passes the subset value to the wrapper" do
20
27
  wrapper = @klass.call(@doc, @font_file)
21
28
  assert(wrapper.subset?)
@@ -283,6 +283,11 @@ describe HexaPDF::Dictionary do
283
283
  @obj[:TestClass][:Nested][:Nested][:TestClass][:Inherited] = :symbol
284
284
  assert(@obj.validate)
285
285
  end
286
+
287
+ it "makes sure validation works in special case where the dictionary is modified" do
288
+ @dict[:Array] = 5
289
+ refute(@dict.validate {|_, _, object| object[:Boolean] })
290
+ end
286
291
  end
287
292
 
288
293
  describe "delete" do
@@ -88,6 +88,12 @@ describe HexaPDF::Parser do
88
88
  assert_equal('12', TestHelper.collector(stream.fiber))
89
89
  end
90
90
 
91
+ it "handles invalid indirect object value consisting of number followed by endobj without space" do
92
+ create_parser("1 0 obj 749endobj")
93
+ object, * = @parser.parse_indirect_object
94
+ assert_equal(749, object)
95
+ end
96
+
91
97
  it "recovers from an invalid stream length value" do
92
98
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
93
99
  obj, _, _, stream = @parser.parse_indirect_object
@@ -151,6 +157,12 @@ describe HexaPDF::Parser do
151
157
  assert_match(/not CR alone/, exp.message)
152
158
  end
153
159
 
160
+ it "fails for numbers followed by endobj without space" do
161
+ create_parser("1 0 obj 749endobj")
162
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
163
+ assert_match(/Invalid object value after 'obj'/, exp.message)
164
+ end
165
+
154
166
  it "fails if the stream length value is invalid" do
155
167
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
156
168
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
40
40
  219
41
41
  %%EOF
42
42
  3 0 obj
43
- <</Producer(HexaPDF version 0.14.0)>>
43
+ <</Producer(HexaPDF version 0.14.1)>>
44
44
  endobj
45
45
  xref
46
46
  3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
72
72
  141
73
73
  %%EOF
74
74
  6 0 obj
75
- <</Producer(HexaPDF version 0.14.0)>>
75
+ <</Producer(HexaPDF version 0.14.1)>>
76
76
  endobj
77
77
  2 0 obj
78
78
  <</Length 10>>stream
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.14.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-30 00:00:00.000000000 Z
11
+ date: 2021-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse
@@ -631,7 +631,7 @@ files:
631
631
  homepage: https://hexapdf.gettalong.org
632
632
  licenses:
633
633
  - AGPL-3.0
634
- - Commercial License
634
+ - Nonstandard
635
635
  metadata: {}
636
636
  post_install_message:
637
637
  rdoc_options: []