hexapdf 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0cea16b918ff9aa6e7b32759295ef4ab38c899bcbd227d76ad42e0c971360239
4
- data.tar.gz: 932c5edf01114a59d0a64776f304e29f3c8865a2c2c52c340064180464aabad7
3
+ metadata.gz: e4010e277168cec5c8cc5d584ec324064461e63756d18b538cd335235fe04e6d
4
+ data.tar.gz: 2b7a71463082a32605adee682c81cdde6b0eb48d360ca66249b08884f82e571b
5
5
  SHA512:
6
- metadata.gz: 5883c5788487830b0403459b38b4ed1761c1015688977e9823f3c572f1ad645b06eb0578b185ce26f7f02c560050dd8ec7c09e8524b59cd35df4fd6abd1fb4aa
7
- data.tar.gz: cdda51a089c86f27319fe424c9a74dc599ed60860338ef49958cd6a820141fa87a0624f2c657565e3f1b4a2392300807b89886178da6af62d22fa03fb543e372
6
+ metadata.gz: 5748273dc4dc532cd365598e25c4a9cc5872011d2eb638c2986050aeed0a68d2dc5769fda075eb60cbcb76fccbfb1a5b52c3c58581cb6e969978c17d770013e6
7
+ data.tar.gz: 0ab3abf80967804486fa1f50f186b508fd792acfbd8c47646fa7d0c5b0245161e2833620142b2f05a1ee73b01145016dca7bf7781d579284160c9d2dd2c78d0c
@@ -1,3 +1,20 @@
1
+ ## 0.14.1 - 2021-01-21
2
+
3
+ ### Changed
4
+
5
+ * Validation message when checking for allowed values to include the invalid
6
+ object
7
+ * [HexaPDF::FontLoader::FromFile] to allow (re)using an existing font object
8
+ * [HexaPDF::Importer] internals to avoid problems with retained memory
9
+
10
+ ### Fixed
11
+
12
+ * Parsing of invalid PDF files where whitespace is missing after the integer
13
+ value of an indirect object
14
+ * [HexaPDF::Dictionary] so that adding new key-value pairs during validation is
15
+ possible
16
+
17
+
1
18
  ## 0.14.0 - 2020-12-30
2
19
 
3
20
  ### Added
@@ -155,6 +155,9 @@ module HexaPDF
155
155
  # available (see ::define_field).
156
156
  #
157
157
  # * Returns the default value if one is specified and no value is available.
158
+ #
159
+ # Note: This method may throw a "can't add a new key into hash during iteration" error in
160
+ # certain cases because it potentially modifies the underlying hash!
158
161
  def [](name)
159
162
  field = self.class.field(name)
160
163
  data = if key?(name)
@@ -255,7 +258,7 @@ module HexaPDF
255
258
 
256
259
  # Iterates over all currently set fields and those that are required.
257
260
  def each_set_key_or_required_field #:yields: name, field
258
- value.each_key {|name| yield(name, self.class.field(name)) }
261
+ value.keys.each {|name| yield(name, self.class.field(name)) }
259
262
  self.class.each_field do |name, field|
260
263
  yield(name, field) if field.required? && !value.key?(name)
261
264
  end
@@ -301,7 +304,7 @@ module HexaPDF
301
304
 
302
305
  # Check the value of the field against the allowed values.
303
306
  if field.allowed_values && !field.allowed_values.include?(obj)
304
- yield("Field #{name} does not contain an allowed value")
307
+ yield("Field #{name} does not contain an allowed value: #{obj.inspect}")
305
308
  end
306
309
 
307
310
  # Check if field value needs to be (in)direct
@@ -63,6 +63,12 @@ module HexaPDF
63
63
  def use_glyph(glyph_id)
64
64
  return @glyph_map[glyph_id] if @glyph_map.key?(glyph_id)
65
65
  @last_id += 1
66
+ # Handle codes for ASCII characters \r, (, ) and \ specially so that they never appear in
67
+ # the output (PDF serialization would need to escape them)
68
+ if @last_id == 13 || @last_id == 40 || @last_id == 41 || @last_id == 92
69
+ @glyph_map[:"s#{@last_id}"] = @last_id
70
+ @last_id += 1
71
+ end
66
72
  @glyph_map[glyph_id] = @last_id
67
73
  end
68
74
 
@@ -107,7 +113,7 @@ module HexaPDF
107
113
  locations = []
108
114
 
109
115
  @glyph_map.each_key do |old_gid|
110
- glyph = orig_glyf[old_gid]
116
+ glyph = orig_glyf[old_gid.kind_of?(Symbol) ? 0 : old_gid]
111
117
  locations << table.size
112
118
  data = glyph.raw_data
113
119
  if glyph.compound?
@@ -134,7 +140,7 @@ module HexaPDF
134
140
  hmtx = @font[:hmtx]
135
141
  data = ''.b
136
142
  @glyph_map.each_key do |old_gid|
137
- metric = hmtx[old_gid]
143
+ metric = hmtx[old_gid.kind_of?(Symbol) ? 0 : old_gid]
138
144
  data << [metric.advance_width, metric.left_side_bearing].pack('n2')
139
145
  end
140
146
  data
@@ -166,7 +172,10 @@ module HexaPDF
166
172
  # Adds the components of compound glyphs to the subset.
167
173
  def add_glyph_components
168
174
  glyf = @font[:glyf]
169
- @glyph_map.keys.each {|gid| glyf[gid].components&.each {|cgid| use_glyph(cgid) } }
175
+ @glyph_map.keys.each do |gid|
176
+ next if gid.kind_of?(Symbol)
177
+ glyf[gid].components&.each {|cgid| use_glyph(cgid) }
178
+ end
170
179
  end
171
180
 
172
181
  end
@@ -99,18 +99,23 @@ module HexaPDF
99
99
  @max_mem_type42, @min_mem_type1, @max_mem_type1 = read_formatted(24, 's>2N5')
100
100
 
101
101
  sub_table_length = directory_entry.length - 32
102
- @glyph_names = case @format
103
- when 1 then Format1.parse(io, sub_table_length)
104
- when 2 then Format2.parse(io, sub_table_length)
105
- when 3 then Format3.parse(io, sub_table_length)
106
- when 4 then Format4.parse(io, sub_table_length)
107
- else
108
- if font.config['font.true_type.unknown_format'] == :raise
109
- raise HexaPDF::Error, "Unsupported post table format: #{@format}"
102
+ cur_pos = io.pos
103
+ @glyph_names = lambda do |glyph_id|
104
+ io.pos = cur_pos
105
+ @glyph_names = case @format
106
+ when 1 then Format1.parse(io, sub_table_length)
107
+ when 2 then Format2.parse(io, sub_table_length)
108
+ when 3 then Format3.parse(io, sub_table_length)
109
+ when 4 then Format4.parse(io, sub_table_length)
110
110
  else
111
- []
111
+ if font.config['font.true_type.unknown_format'] == :raise
112
+ raise HexaPDF::Error, "Unsupported post table format: #{@format}"
113
+ else
114
+ []
115
+ end
112
116
  end
113
- end
117
+ @glyph_names[glyph_id]
118
+ end
114
119
  end
115
120
 
116
121
  # 'post' table format 1
@@ -63,8 +63,8 @@ module HexaPDF
63
63
  file = document.config['font.map'].dig(name, variant)
64
64
  return nil if file.nil?
65
65
 
66
- unless File.file?(file)
67
- raise HexaPDF::Error, "The configured font file #{file} does not exist"
66
+ unless file.kind_of?(HexaPDF::Font::TrueType::Font) || File.file?(file)
67
+ raise HexaPDF::Error, "The configured font file #{file} is not a valid value"
68
68
  end
69
69
  FromFile.call(document, file, subset: subset)
70
70
  end
@@ -39,26 +39,36 @@ require 'hexapdf/font/true_type_wrapper'
39
39
  module HexaPDF
40
40
  module FontLoader
41
41
 
42
- # This module interprets the font name as file name and tries to load it.
42
+ # This module interprets the font name either as file name and tries to load it, or as font
43
+ # object to be wrapped directly.
43
44
  module FromFile
44
45
 
45
- # Loads the given font by interpreting the font name as file name.
46
+ # :call-seq:
47
+ # FromFile.call(document, file_name, subset: true, **) -> wrapped_font
48
+ # FromFile.call(document, font_object, subset: true, **) -> wrapped_font
46
49
  #
47
- # The file object representing the font file is *not* closed and if needed must be closed by
48
- # the caller once the font is not needed anymore.
50
+ # Returns an appropriate font wrapper for the given file name or font object.
51
+ #
52
+ # If a file name is given, the file object representing the font file is *not* closed and if
53
+ # needed must be closed by the caller once the font is not needed anymore.
54
+ #
55
+ # The first form using a file name is easier to use in one-off cases. However, if multiple
56
+ # documents always refer to the same font, the second form is better to avoid re-parsing the
57
+ # font file.
49
58
  #
50
59
  # +document+::
51
60
  # The PDF document to associate the font object with.
52
61
  #
53
- # +name+::
54
- # The file name.
62
+ # +file_name+/+font_object+::
63
+ # The file name or TrueType font object.
55
64
  #
56
65
  # +subset+::
57
66
  # Specifies whether the font should be subset if possible.
58
67
  def self.call(document, name, subset: true, **)
59
- return nil unless File.file?(name)
68
+ is_font = name.kind_of?(HexaPDF::Font::TrueType::Font)
69
+ return nil unless is_font || File.file?(name)
60
70
 
61
- font = HexaPDF::Font::TrueType::Font.new(File.open(name, 'rb'))
71
+ font = is_font ? name : HexaPDF::Font::TrueType::Font.new(File.open(name, 'rb'))
62
72
  HexaPDF::Font::TrueTypeWrapper.new(document, font, subset: subset)
63
73
  end
64
74
 
@@ -90,7 +90,7 @@ module HexaPDF
90
90
  #
91
91
  # An error is raised if the object doesn't belong to the +source+ document.
92
92
  def import(object)
93
- mapped_object = @mapper[object.data] if object.kind_of?(HexaPDF::Object)
93
+ mapped_object = @mapper[object.data]&.__getobj__ if object.kind_of?(HexaPDF::Object)
94
94
  if object.kind_of?(HexaPDF::Object) && object.document? && @source != object.document
95
95
  raise HexaPDF::Error, "Import error: Incorrect document object for importer"
96
96
  elsif mapped_object && mapped_object == @destination.object(mapped_object)
@@ -118,7 +118,8 @@ module HexaPDF
118
118
  if object.type == :Catalog || object.type == :Pages
119
119
  @mapper[object.data] = nil
120
120
  else
121
- obj = @mapper[object.data] = object.dup
121
+ obj = object.dup
122
+ @mapper[object.data] = NullableWeakRef.new(obj)
122
123
  obj.document = @destination.__getobj__
123
124
  obj.instance_variable_set(:@data, obj.data.dup)
124
125
  obj.data.oid = 0
@@ -113,7 +113,15 @@ module HexaPDF
113
113
  maybe_raise("No indirect object value between 'obj' and 'endobj'", pos: @tokenizer.pos)
114
114
  object = nil
115
115
  else
116
- object = @tokenizer.next_object
116
+ begin
117
+ object = @tokenizer.next_object
118
+ rescue MalformedPDFError
119
+ # Handle often found invalid indirect object with missing whitespace after number
120
+ maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
121
+ force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
122
+ object = tok.to_i
123
+ @tokenizer.pos -= 6
124
+ end
117
125
  end
118
126
 
119
127
  tok = @tokenizer.next_token
@@ -66,7 +66,7 @@ module HexaPDF
66
66
  # subclasses of HexaPDF::Object are returned as is (it makes no sense, for example, to return
67
67
  # the hash that describes the Catalog instead of the Catalog object).
68
68
  def [](arg1, arg2 = nil)
69
- data = value[arg1, *arg2]
69
+ data = arg2 ? value[arg1, arg2] : value[arg1]
70
70
  return if data.nil?
71
71
 
72
72
  if arg2 || arg1.kind_of?(Range)
@@ -88,13 +88,39 @@ module HexaPDF
88
88
 
89
89
  # Creates a new Serializer object.
90
90
  def initialize
91
- @dispatcher = Hash.new do |h, klass|
92
- method = nil
93
- klass.ancestors.each do |ancestor_klass|
94
- method = "serialize_#{ancestor_klass.name.to_s.downcase.gsub(/::/, '_')}"
95
- (h[klass] = method; break) if respond_to?(method, true)
96
- end
97
- method
91
+ @dispatcher = {
92
+ Hash => 'serialize_hash',
93
+ Array => 'serialize_array',
94
+ Symbol => 'serialize_symbol',
95
+ String => 'serialize_string',
96
+ Integer => 'serialize_integer',
97
+ Float => 'serialize_float',
98
+ Time => 'serialize_time',
99
+ TrueClass => 'serialize_trueclass',
100
+ FalseClass => 'serialize_falseclass',
101
+ NilClass => 'serialize_nilclass',
102
+ HexaPDF::Reference => 'serialize_hexapdf_reference',
103
+ HexaPDF::Object => 'serialize_hexapdf_object',
104
+ HexaPDF::Stream => 'serialize_hexapdf_stream',
105
+ HexaPDF::Dictionary => 'serialize_hexapdf_object',
106
+ HexaPDF::PDFArray => 'serialize_hexapdf_object',
107
+ HexaPDF::Rectangle => 'serialize_hexapdf_object',
108
+ }
109
+ @dispatcher.default_proc = lambda do |h, klass|
110
+ h[klass] = if klass <= HexaPDF::Stream
111
+ "serialize_hexapdf_stream"
112
+ elsif klass <= HexaPDF::Object
113
+ "serialize_hexapdf_object"
114
+ else
115
+ method = nil
116
+ klass.ancestors.each do |ancestor_klass|
117
+ name = ancestor_klass.name.to_s.downcase
118
+ name.gsub!(/::/, '_')
119
+ method = "serialize_#{name}"
120
+ break if respond_to?(method, true)
121
+ end
122
+ method
123
+ end
98
124
  end
99
125
  @encrypter = false
100
126
  @io = nil
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.14.0'
40
+ VERSION = '0.14.1'
41
41
 
42
42
  end
@@ -72,7 +72,7 @@ describe HexaPDF::Font::TrueType::Table::Post do
72
72
  assert_equal('.notdef', table[0])
73
73
 
74
74
  @font.config['font.true_type.unknown_format'] = :raise
75
- assert_raises(HexaPDF::Error) { create_table(:Post) }
75
+ assert_raises(HexaPDF::Error) { create_table(:Post)[0] }
76
76
  end
77
77
  end
78
78
  end
@@ -27,6 +27,11 @@ describe HexaPDF::Font::TrueType::Subsetter do
27
27
  assert_equal(value, @subsetter.subset_glyph_id(5))
28
28
  end
29
29
 
30
+ it "doesn't use certain subset glyph IDs for performance reasons" do
31
+ 1.upto(13) {|i| @subsetter.use_glyph(i) }
32
+ assert_equal(14, @subsetter.subset_glyph_id(13))
33
+ end
34
+
30
35
  it "creates the subset font file" do
31
36
  gid = @font[:cmap].preferred_table[0x41]
32
37
  @subsetter.use_glyph(gid)
@@ -8,13 +8,17 @@ describe HexaPDF::FontLoader::FromConfiguration do
8
8
  before do
9
9
  @doc = HexaPDF::Document.new
10
10
  font_file = File.join(TEST_DATA_DIR, "fonts", "Ubuntu-Title.ttf")
11
- @doc.config['font.map'] = {'font' => {none: font_file}}
11
+ @font_obj = HexaPDF::Font::TrueType::Font.new(File.open(font_file, 'rb'))
12
+ @doc.config['font.map'] = {'font' => {none: font_file}, 'font1' => {none: @font_obj}}
12
13
  @klass = HexaPDF::FontLoader::FromConfiguration
13
14
  end
14
15
 
15
16
  it "loads the configured font" do
16
17
  wrapper = @klass.call(@doc, "font")
17
18
  assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
19
+ wrapper = @klass.call(@doc, "font1")
20
+ assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
21
+ assert_same(@font_obj, wrapper.wrapped_font)
18
22
  end
19
23
 
20
24
  it "passes the subset value to the wrapper" do
@@ -24,7 +28,7 @@ describe HexaPDF::FontLoader::FromConfiguration do
24
28
  refute(wrapper.subset?)
25
29
  end
26
30
 
27
- it "fails if the font file cannot be read" do
31
+ it "fails if the provided font is invalid" do
28
32
  @doc.config['font.map']['font'][:none] << "unknown"
29
33
  assert_raises(HexaPDF::Error) { @klass.call(@doc, "font") }
30
34
  end
@@ -34,6 +38,6 @@ describe HexaPDF::FontLoader::FromConfiguration do
34
38
  end
35
39
 
36
40
  it "returns a hash with all configured fonts" do
37
- assert_equal({'font' => [:none]}, @klass.available_fonts(@doc))
41
+ assert_equal({'font' => [:none], 'font1' => [:none]}, @klass.available_fonts(@doc))
38
42
  end
39
43
  end
@@ -16,6 +16,13 @@ describe HexaPDF::FontLoader::FromFile do
16
16
  assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
17
17
  end
18
18
 
19
+ it "loads the specified font object" do
20
+ font = HexaPDF::Font::TrueType::Font.new(File.open(@font_file, 'rb'))
21
+ wrapper = @klass.call(@doc, font)
22
+ assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
23
+ assert_same(font, wrapper.wrapped_font)
24
+ end
25
+
19
26
  it "passes the subset value to the wrapper" do
20
27
  wrapper = @klass.call(@doc, @font_file)
21
28
  assert(wrapper.subset?)
@@ -283,6 +283,11 @@ describe HexaPDF::Dictionary do
283
283
  @obj[:TestClass][:Nested][:Nested][:TestClass][:Inherited] = :symbol
284
284
  assert(@obj.validate)
285
285
  end
286
+
287
+ it "makes sure validation works in special case where the dictionary is modified" do
288
+ @dict[:Array] = 5
289
+ refute(@dict.validate {|_, _, object| object[:Boolean] })
290
+ end
286
291
  end
287
292
 
288
293
  describe "delete" do
@@ -88,6 +88,12 @@ describe HexaPDF::Parser do
88
88
  assert_equal('12', TestHelper.collector(stream.fiber))
89
89
  end
90
90
 
91
+ it "handles invalid indirect object value consisting of number followed by endobj without space" do
92
+ create_parser("1 0 obj 749endobj")
93
+ object, * = @parser.parse_indirect_object
94
+ assert_equal(749, object)
95
+ end
96
+
91
97
  it "recovers from an invalid stream length value" do
92
98
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
93
99
  obj, _, _, stream = @parser.parse_indirect_object
@@ -151,6 +157,12 @@ describe HexaPDF::Parser do
151
157
  assert_match(/not CR alone/, exp.message)
152
158
  end
153
159
 
160
+ it "fails for numbers followed by endobj without space" do
161
+ create_parser("1 0 obj 749endobj")
162
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
163
+ assert_match(/Invalid object value after 'obj'/, exp.message)
164
+ end
165
+
154
166
  it "fails if the stream length value is invalid" do
155
167
  create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
156
168
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
40
40
  219
41
41
  %%EOF
42
42
  3 0 obj
43
- <</Producer(HexaPDF version 0.14.0)>>
43
+ <</Producer(HexaPDF version 0.14.1)>>
44
44
  endobj
45
45
  xref
46
46
  3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
72
72
  141
73
73
  %%EOF
74
74
  6 0 obj
75
- <</Producer(HexaPDF version 0.14.0)>>
75
+ <</Producer(HexaPDF version 0.14.1)>>
76
76
  endobj
77
77
  2 0 obj
78
78
  <</Length 10>>stream
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.14.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-30 00:00:00.000000000 Z
11
+ date: 2021-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse
@@ -631,7 +631,7 @@ files:
631
631
  homepage: https://hexapdf.gettalong.org
632
632
  licenses:
633
633
  - AGPL-3.0
634
- - Commercial License
634
+ - Nonstandard
635
635
  metadata: {}
636
636
  post_install_message:
637
637
  rdoc_options: []