hexapdf 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +54 -0
  3. data/README.md +8 -7
  4. data/examples/022-outline.rb +5 -1
  5. data/examples/032-acro_form_list_and_fill.rb +47 -0
  6. data/examples/033-text_extraction.rb +34 -0
  7. data/lib/hexapdf/cli/debug_info.rb +98 -0
  8. data/lib/hexapdf/cli/images.rb +2 -2
  9. data/lib/hexapdf/cli/info.rb +2 -0
  10. data/lib/hexapdf/cli/inspect.rb +5 -1
  11. data/lib/hexapdf/cli.rb +2 -0
  12. data/lib/hexapdf/configuration.rb +8 -0
  13. data/lib/hexapdf/content/canvas.rb +1 -1
  14. data/lib/hexapdf/content/smart_text_extractor.rb +305 -0
  15. data/lib/hexapdf/content.rb +2 -0
  16. data/lib/hexapdf/digital_signature/signing/default_handler.rb +1 -15
  17. data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +21 -8
  18. data/lib/hexapdf/document.rb +7 -3
  19. data/lib/hexapdf/encryption/security_handler.rb +3 -1
  20. data/lib/hexapdf/filter/brotli_decode.rb +88 -0
  21. data/lib/hexapdf/filter.rb +1 -0
  22. data/lib/hexapdf/font/cmap.rb +10 -6
  23. data/lib/hexapdf/font/true_type/builder.rb +1 -1
  24. data/lib/hexapdf/font/true_type/font.rb +13 -0
  25. data/lib/hexapdf/font/true_type/subsetter.rb +7 -2
  26. data/lib/hexapdf/font/true_type/table/directory.rb +5 -0
  27. data/lib/hexapdf/font/true_type.rb +1 -0
  28. data/lib/hexapdf/layout/style.rb +6 -2
  29. data/lib/hexapdf/parser.rb +29 -4
  30. data/lib/hexapdf/revision.rb +6 -2
  31. data/lib/hexapdf/task/pdfa.rb +108 -1
  32. data/lib/hexapdf/type/acro_form/field.rb +4 -1
  33. data/lib/hexapdf/type/acro_form/form.rb +4 -0
  34. data/lib/hexapdf/type/acro_form/text_field.rb +4 -2
  35. data/lib/hexapdf/type/annotations/widget.rb +9 -0
  36. data/lib/hexapdf/type/document_security_store.rb +80 -0
  37. data/lib/hexapdf/type/page.rb +11 -0
  38. data/lib/hexapdf/type.rb +1 -0
  39. data/lib/hexapdf/version.rb +1 -1
  40. data/test/data/pdfa/mismatching_glyph_widths_cidfont_type2.pdf +0 -0
  41. data/test/hexapdf/content/test_smart_text_extractor.rb +129 -0
  42. data/test/hexapdf/digital_signature/common.rb +19 -5
  43. data/test/hexapdf/digital_signature/signing/test_signed_data_creator.rb +29 -4
  44. data/test/hexapdf/digital_signature/test_signatures.rb +3 -3
  45. data/test/hexapdf/encryption/test_security_handler.rb +7 -5
  46. data/test/hexapdf/filter/test_brotli_decode.rb +34 -0
  47. data/test/hexapdf/font/true_type/table/test_directory.rb +5 -3
  48. data/test/hexapdf/font/true_type/test_builder.rb +9 -0
  49. data/test/hexapdf/font/true_type/test_font.rb +17 -3
  50. data/test/hexapdf/font/true_type/test_subsetter.rb +4 -3
  51. data/test/hexapdf/task/test_pdfa.rb +72 -0
  52. data/test/hexapdf/test_document.rb +13 -0
  53. data/test/hexapdf/test_parser.rb +55 -3
  54. data/test/hexapdf/test_revision.rb +27 -6
  55. data/test/hexapdf/type/acro_form/test_field.rb +5 -0
  56. data/test/hexapdf/type/acro_form/test_form.rb +6 -0
  57. data/test/hexapdf/type/acro_form/test_text_field.rb +7 -1
  58. data/test/hexapdf/type/annotations/test_widget.rb +11 -0
  59. data/test/hexapdf/type/test_page.rb +8 -0
  60. data/test/test_helper.rb +6 -0
  61. metadata +41 -4
@@ -70,7 +70,7 @@ describe HexaPDF::DigitalSignature::Signatures do
70
70
  end
71
71
  @doc.signatures.add(@io, @handler, write_options: {update_fields: false})
72
72
  sig = @doc.signatures.first
73
- assert_equal([0, 925, 925 + sig[:Contents].size * 2 + 2, 2455 + HexaPDF::VERSION.length],
73
+ assert_equal([0, 925, 925 + (sig[:Contents].size + 5) * 2 + 2, 2455 + HexaPDF::VERSION.length],
74
74
  sig[:ByteRange].value)
75
75
  assert_equal(:sig, sig[:key])
76
76
  assert_equal(:sig_field, @doc.acro_form.each_field.first[:key])
@@ -134,7 +134,7 @@ describe HexaPDF::DigitalSignature::Signatures do
134
134
  @doc.delete(7)
135
135
  sig = @doc.signatures.add(@io, @handler, write_options: {update_fields: false})
136
136
  l1 = 1030 + HexaPDF::VERSION.length
137
- assert_equal([0, l1, l1 + sig[:Contents].size * 2 + 2, 2437 + HexaPDF::VERSION.length],
137
+ assert_equal([0, l1, l1 + (sig[:Contents].size + 5) * 2 + 2, 2437 + HexaPDF::VERSION.length],
138
138
  sig[:ByteRange].value)
139
139
  end
140
140
 
@@ -143,7 +143,7 @@ describe HexaPDF::DigitalSignature::Signatures do
143
143
  field.create_widget(@doc.pages[0], Rect: [0, 0, 0, 0])
144
144
  sig = @doc.signatures.add(@io, @handler, signature: field, write_options: {update_fields: false})
145
145
  l1 = 3097 + HexaPDF::VERSION.length
146
- assert_equal([0, l1, l1 + sig[:Contents].size * 2 + 2, 374 + HexaPDF::VERSION.length],
146
+ assert_equal([0, l1, l1 + (sig[:Contents].size + 5) * 2 + 2, 374 + HexaPDF::VERSION.length],
147
147
  sig[:ByteRange].value)
148
148
  end
149
149
 
@@ -129,16 +129,18 @@ describe HexaPDF::Encryption::SecurityHandler do
129
129
  end
130
130
 
131
131
  it "sets the correct /Length value for the given key length" do
132
- [[40, nil], [48, 48], [128, 128], [256, nil]].each do |key_length, result|
133
- algorithm = (key_length == 256 ? :aes : :arc4)
134
- @handler.set_up_encryption(key_length: key_length, algorithm: algorithm)
135
- assert(result == @handler.dict[:Length])
132
+ [[40, nil], [48, 48], [128, 128]].each do |key_length, result|
133
+ @handler.set_up_encryption(key_length: key_length, algorithm: :arc4)
134
+ result.nil? ? assert_nil(@handler.dict[:Length]) : assert_equal(result, @handler.dict[:Length])
136
135
  end
137
136
 
138
- # Work-around buggy software
137
+ # Work-around for buggy software needing the /Length key
139
138
  @handler.set_up_encryption(key_length: 128, algorithm: :aes)
140
139
  assert_equal(4, @handler.dict[:V])
141
140
  assert_equal(128, @handler.dict[:Length])
141
+ @handler.set_up_encryption(key_length: 256, algorithm: :aes)
142
+ assert_equal(5, @handler.dict[:V])
143
+ assert_equal(256, @handler.dict[:Length])
142
144
  end
143
145
 
144
146
  it "calls the prepare_encryption method" do
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require_relative 'common'
4
+ require 'hexapdf/filter/flate_decode'
5
+
6
+ describe HexaPDF::Filter::BrotliDecode do
7
+ include CommonFilterTests
8
+
9
+ before do
10
+ @obj = HexaPDF::Filter::BrotliDecode
11
+ @all_test_cases = [["abcdefg".b, Brotli.deflate("abcdefg".b)]]
12
+ @decoded = @all_test_cases[0][0]
13
+ @encoded = @all_test_cases[0][1]
14
+ @encoded_predictor = "\e\r\x00\xF8%\x05\x02\xC2\xC2\x86\x00\x80%".b
15
+ @predictor_opts = {Predictor: 12}
16
+ end
17
+
18
+ describe "decoder" do
19
+ it "works for empty input" do
20
+ assert_equal('', collector(@obj.decoder(Fiber.new { "" })))
21
+ assert_equal('', collector(@obj.decoder(Fiber.new {})))
22
+ end
23
+
24
+ it "applies the Predictor after decoding" do
25
+ assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded_predictor), @predictor_opts)))
26
+ end
27
+ end
28
+
29
+ describe "encoder" do
30
+ it "applies the Predictor before encoding" do
31
+ assert_equal(@encoded_predictor, collector(@obj.encoder(feeder(@decoded), @predictor_opts)))
32
+ end
33
+ end
34
+ end
@@ -5,8 +5,9 @@ require 'hexapdf/font/true_type/table/directory'
5
5
 
6
6
  describe HexaPDF::Font::TrueType::Table::Directory do
7
7
  before do
8
- io = StringIO.new("TEST\x00\x01\x00\x00\x00\x00\x00\x00" \
9
- "CUST----\x00\x00\x00\x1C\x00\x00\x00\x05ENTRY".b)
8
+ io = StringIO.new("TEST\x00\x02\x00\x00\x00\x00\x00\x00" \
9
+ "CUST----\x00\x00\x00\x2C\x00\x00\x00\x05" \
10
+ " TWO----\x00\x00\x00\x31\x00\x00\x00\x05ENTRYENTRY".b)
10
11
  @file = Object.new
11
12
  @file.define_singleton_method(:io) { io }
12
13
  @self_entry = HexaPDF::Font::TrueType::Table::Directory::SELF_ENTRY
@@ -23,8 +24,9 @@ describe HexaPDF::Font::TrueType::Table::Directory do
23
24
  entry = dir.entry('CUST')
24
25
  assert_equal('CUST', entry.tag)
25
26
  assert_equal('----'.unpack1('N'), entry.checksum)
26
- assert_equal(28, entry.offset)
27
+ assert_equal(44, entry.offset)
27
28
  assert_equal(5, entry.length)
29
+ assert_equal(['CUST', ' TWO'], dir.table_names)
28
30
  end
29
31
  end
30
32
  end
@@ -39,4 +39,13 @@ describe HexaPDF::Font::TrueType::Builder do
39
39
  tables = built_font.directory.instance_variable_get(:@tables)
40
40
  assert_equal(tables.keys.sort, tables.keys)
41
41
  end
42
+
43
+ it "allows setting the font version to OTTO for OpenType CFF fonts" do
44
+ tables = {
45
+ "head" => @font[:head].raw_data,
46
+ "maxp" => @font[:maxp].raw_data,
47
+ }
48
+ font_data = HexaPDF::Font::TrueType::Builder.build(tables)
49
+ assert_equal('OTTO', font_data[0, 4])
50
+ end
42
51
  end
@@ -7,16 +7,30 @@ require_relative 'common'
7
7
 
8
8
  describe HexaPDF::Font::TrueType::Font do
9
9
  before do
10
- @io = StringIO.new("TEST\x00\x01\x00\x00\x00\x00\x00\x00" \
11
- "TEST----\x00\x00\x00\x1C\x00\x00\x00\x05ENTRY".b)
10
+ @io = StringIO.new("OTTO\x00\x02\x00 \x00\x01\x00\x00" \
11
+ "TESTDATA\x00\x00\x00\x2C\x00\x00\x00\x04" \
12
+ "head`\x11?\xFA\x00\x00\x00\x30\x00\x00\x00\x36" \
13
+ "DATA" \
14
+ "\x00\x00\x00\x01\x01\x02\x03\x04]\t}\x85_\x0F<\xF5#{"\x00" * 38}\x00\x00".b)
12
15
  @font = HexaPDF::Font::TrueType::Font.new(@io)
13
16
  @font.config['font.true_type.table_mapping'][:TEST] = TrueTypeTestTable.name
14
17
  end
15
18
 
19
+ describe "build" do
20
+ it "creates a font file from the tables" do
21
+ assert_equal(@io.string, @font.build)
22
+
23
+ result = @io.string.dup
24
+ result[16, 4] = result[44, 4] = 'OTHR'
25
+ result[56, 4] = "F\xE3\x95c".b
26
+ assert_equal(result, @font.build('TEST' => 'OTHR'))
27
+ end
28
+ end
29
+
16
30
  describe "[]" do
17
31
  it "returns a named table" do
18
32
  table = @font[:TEST]
19
- assert_equal('ENTRY', table.data)
33
+ assert_equal('DATA', table.data)
20
34
  end
21
35
 
22
36
  it "always returns the same table instance" do
@@ -54,18 +54,19 @@ describe HexaPDF::Font::TrueType::Subsetter do
54
54
  end
55
55
 
56
56
  it "correctly subsets compound glyphs" do
57
- font_file = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
57
+ font_file = "/usr/share/fonts/truetype/noto/NotoSansMono-Regular.ttf"
58
58
  skip unless File.exist?(font_file)
59
59
 
60
60
  begin
61
61
  @font = HexaPDF::Font::TrueType::Font.new(File.open(font_file))
62
62
  @subsetter = HexaPDF::Font::TrueType::Subsetter.new(@font)
63
63
 
64
- @subsetter.use_glyph(@font[:cmap].preferred_table['À'.ord])
64
+ @subsetter.use_glyph(@font[:cmap].preferred_table['ë'.ord])
65
65
  subset = HexaPDF::Font::TrueType::Font.new(StringIO.new(@subsetter.build_font))
66
66
 
67
- assert_equal(4, subset[:maxp].num_glyphs)
67
+ assert_equal(5, subset[:maxp].num_glyphs)
68
68
  assert_equal([2, 3], subset[:glyf][1].components)
69
+ assert_equal([4], subset[:glyf][3].components)
69
70
  ensure
70
71
  @font.io.close
71
72
  end
@@ -38,4 +38,76 @@ describe HexaPDF::Task::PDFA do
38
38
  assert_equal('sRGB2014.icc', oi[:Info])
39
39
  assert_kind_of(HexaPDF::Stream, oi[:DestOutputProfile])
40
40
  end
41
+
42
+ it "applies fixes based on the optional fixes argument" do
43
+ file = File.join(TEST_DATA_DIR, 'pdfa', 'mismatching_glyph_widths_cidfont_type2.pdf')
44
+
45
+ # Document loaded -> all fixes applied by default
46
+ doc = HexaPDF::Document.open(file)
47
+ doc.task(:pdfa, level: '3b')
48
+ doc.dispatch_message(:complete_objects)
49
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(doc.object(10).stream))
50
+ assert_equal(348, font[:hmtx][1].advance_width)
51
+
52
+ # Not loaded -> fixes for loaded documents excluded
53
+ doc = HexaPDF::Document.open(file)
54
+ created = HexaPDF::Document.new
55
+ created.pages << created.import(doc.pages[0])
56
+ created.task(:pdfa, level: '3b')
57
+ created.dispatch_message(:complete_objects)
58
+ font_file = created.pages[0].resources.font(:F1).descendant_font[:FontDescriptor][:FontFile2]
59
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(font_file.stream))
60
+ assert_equal(346, font[:hmtx][1].advance_width)
61
+
62
+ # Explicitly specify to apply all fixes
63
+ created.task(:pdfa, level: '3b', fixes: :all)
64
+ created.dispatch_message(:complete_objects)
65
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(font_file.stream))
66
+ assert_equal(348, font[:hmtx][1].advance_width)
67
+ end
68
+
69
+ describe "fix_glyph_widths" do
70
+ before do
71
+ @file = File.join(TEST_DATA_DIR, 'pdfa', 'mismatching_glyph_widths_cidfont_type2.pdf')
72
+ end
73
+
74
+ it "fixes glyph width inconsistencies between the font and the font dictionary" do
75
+ doc = HexaPDF::Document.open(@file)
76
+ doc.task(:pdfa, level: '3b', fixes: [:fix_glyph_widths])
77
+
78
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(doc.object(10).stream))
79
+ assert_equal(346, font[:hmtx][1].advance_width)
80
+ doc.dispatch_message(:complete_objects)
81
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(doc.object(10).stream))
82
+ assert_equal(348, font[:hmtx][1].advance_width)
83
+ end
84
+
85
+ it "works if there is an explicit CIDToGIDMap stream" do
86
+ doc = HexaPDF::Document.open(@file)
87
+ doc.object(5)[:CIDToGIDMap] = doc.wrap({}, stream: [0, 1, 2, 3, 4].pack('n*'))
88
+ doc.task(:pdfa, level: '3b', fixes: [:fix_glyph_widths])
89
+ doc.dispatch_message(:complete_objects)
90
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(doc.object(10).stream))
91
+ assert_equal(348, font[:hmtx][1].advance_width)
92
+ end
93
+
94
+ it "processes annotation appearances" do
95
+ doc = HexaPDF::Document.new
96
+ doc.pages.add
97
+ doc.annotations.create_rectangle(doc.pages[0], 20, 20, 20, 60).
98
+ regenerate_appearance
99
+ form = doc.pages[0][:Annots][0].create_appearance
100
+ form.canvas.
101
+ font(File.join(TEST_DATA_DIR, 'fonts', 'Ubuntu-Title.ttf'), size: 10).
102
+ text('Hola', at: [0, 0])
103
+
104
+ doc = HexaPDF::Document.new(io: StringIO.new(doc.write_to_string))
105
+ font = doc.pages[0][:Annots][0].appearance.resources.font(:F1).descendant_font
106
+ font[:W][1][0] = 10
107
+ doc.task(:pdfa, level: '3b', fixes: [:fix_glyph_widths])
108
+ doc.dispatch_message(:complete_objects)
109
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(font[:FontDescriptor][:FontFile2].stream))
110
+ assert_equal(10, font[:hmtx][1].advance_width)
111
+ end
112
+ end
41
113
  end
@@ -286,6 +286,13 @@ describe HexaPDF::Document do
286
286
  assert_equal({a: {b: 10}}, @doc.unwrap(value))
287
287
  end
288
288
 
289
+ it "doesn't unwrap PDF stream objects" do
290
+ stream = @io_doc.wrap({a: HexaPDF::Reference.new(1, 0)}, stream: 'data')
291
+ result = @io_doc.unwrap(stream)
292
+ assert_same(stream, result)
293
+ assert_equal(HexaPDF::Reference.new(1, 0), result.value[:a])
294
+ end
295
+
289
296
  it "fails to unwrap recursive structures" do
290
297
  obj1 = @doc.add({})
291
298
  obj2 = @doc.add({})
@@ -413,6 +420,12 @@ describe HexaPDF::Document do
413
420
  assert(@doc.trailer.info.key?(:Author))
414
421
  end
415
422
 
423
+ it "works even in case of invalid PDFs with a non-dictionary value for trailer.info" do
424
+ @doc.trailer[:Info] = :something_else
425
+ @doc.write(StringIO.new)
426
+ assert(@doc.trailer.info.key?(:ModDate))
427
+ end
428
+
416
429
  it "it doesn't optimize the file by default" do
417
430
  io = StringIO.new(''.b)
418
431
  @io_doc.write(io)
@@ -10,6 +10,7 @@ describe HexaPDF::Parser do
10
10
  @document = HexaPDF::Document.new
11
11
  @document.config['parser.try_xref_reconstruction'] = false
12
12
  @document.add(@document.wrap(10, oid: 1, gen: 0))
13
+ @document.add(@document.wrap({Recurse: HexaPDF::Reference.new(3)}, oid: 3))
13
14
 
14
15
  create_parser(+<<~EOF)
15
16
  %PDF-1.7
@@ -173,6 +174,18 @@ describe HexaPDF::Parser do
173
174
  assert_equal({Length: 4}, object)
174
175
  end
175
176
 
177
+ it "recovers in case of an invalid /Filter leading to indirect object recursion" do
178
+ create_parser("1 0 obj<</Length 1/Filter 3 0 R>>stream\n1\nendstream endobj")
179
+ object, * = @parser.parse_indirect_object
180
+ assert_equal({Length: 1}, object)
181
+ end
182
+
183
+ it "recovers in case of an invalid /DecodeParms leading to indirect object recursion" do
184
+ create_parser("1 0 obj<</Length 1/DecodeParms 3 0 R>>stream\n1\nendstream endobj")
185
+ object, * = @parser.parse_indirect_object
186
+ assert_equal({Length: 1}, object)
187
+ end
188
+
176
189
  it "fails if the oid, gen or 'obj' keyword is invalid" do
177
190
  create_parser("a 0 obj\n5\nendobj")
178
191
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
@@ -267,6 +280,18 @@ describe HexaPDF::Parser do
267
280
  exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
268
281
  assert_match(/keyword endobj/, exp.message)
269
282
  end
283
+
284
+ it "fails if an invalid /Filter leads to indirect object recursion" do
285
+ create_parser("1 0 obj<</Length 1/Filter 3 0 R>>stream\n1\nendstream endobj")
286
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
287
+ assert_match(/Invalid \/Filter/, exp.message)
288
+ end
289
+
290
+ it "fails if an invalid /DecodeParms leads to indirect object recursion" do
291
+ create_parser("1 0 obj<</Length 1/DecodeParms 3 0 R>>stream\n1\nendstream endobj")
292
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
293
+ assert_match(/Invalid \/DecodeParms/, exp.message)
294
+ end
270
295
  end
271
296
  end
272
297
 
@@ -315,14 +340,32 @@ describe HexaPDF::Parser do
315
340
  assert_equal(1, obj.oid)
316
341
  end
317
342
 
343
+ it "handles the case when generation numbers don't match with a single revision" do
344
+ @entry.gen = 2
345
+ obj = @parser.load_object(@entry)
346
+ assert_equal(2, obj.oid)
347
+ assert_equal(5, obj[0])
348
+ end
349
+
318
350
  describe "with strict parsing" do
319
- it "raises an error if an indirect object has an offset of 0" do
351
+ before do
320
352
  @document.config['parser.on_correctable_error'] = proc { true }
353
+ end
354
+
355
+ it "raises an error if an indirect object has an offset of 0" do
321
356
  exp = assert_raises(HexaPDF::MalformedPDFError) do
322
357
  @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
323
358
  end
324
359
  assert_match(/has offset 0/, exp.message)
325
360
  end
361
+
362
+ it "fails if the generation numbers don't match with a single revision" do
363
+ exp = assert_raises(HexaPDF::MalformedPDFError) do
364
+ @entry.gen = 2
365
+ @parser.load_object(@entry)
366
+ end
367
+ assert_match(/oid,gen.*don't match/, exp.message)
368
+ end
326
369
  end
327
370
 
328
371
  it "fails if another object is found instead of an object stream" do
@@ -342,9 +385,18 @@ describe HexaPDF::Parser do
342
385
  assert_match(/invalid cross-reference type/i, exp.message)
343
386
  end
344
387
 
345
- it "fails if the object/generation numbers don't match" do
388
+ it "fails if the object numbers don't match" do
389
+ exp = assert_raises(HexaPDF::MalformedPDFError) do
390
+ @entry.oid = 5
391
+ @parser.load_object(@entry)
392
+ end
393
+ assert_match(/oid,gen.*don't match/, exp.message)
394
+ end
395
+
396
+ it "fails if the generation numbers don't match for multiple revisions" do
397
+ @document.revisions.add
346
398
  exp = assert_raises(HexaPDF::MalformedPDFError) do
347
- @entry.gen = 2
399
+ @entry.gen = 5
348
400
  @parser.load_object(@entry)
349
401
  end
350
402
  assert_match(/oid,gen.*don't match/, exp.message)
@@ -17,6 +17,7 @@ describe HexaPDF::Revision do
17
17
  @xref_section.add_in_use_entry(5, 0, 1000)
18
18
  @xref_section.add_in_use_entry(6, 0, 5000)
19
19
  @xref_section.add_in_use_entry(7, 0, 5000)
20
+ @xref_section.add_in_use_entry(8, 2, 5000)
20
21
  @obj = HexaPDF::Object.new(:val, oid: 1, gen: 0)
21
22
  @ref = HexaPDF::Reference.new(1, 0)
22
23
 
@@ -30,6 +31,7 @@ describe HexaPDF::Revision do
30
31
  when 5 then HexaPDF::Dictionary.new({Type: :ObjStm}, oid: entry.oid, gen: entry.gen)
31
32
  when 7 then HexaPDF::Type::Catalog.new({Type: :Catalog}, oid: entry.oid, gen: entry.gen,
32
33
  document: self)
34
+ when 8 then HexaPDF::Object.new(:DifferentGen, oid: entry.oid, gen: 0)
33
35
  when 6 then HexaPDF::Dictionary.new({Array: HexaPDF::PDFArray.new([1, 2])},
34
36
  oid: entry.oid, gen: entry.gen)
35
37
  else HexaPDF::Object.new(:Test, oid: entry.oid, gen: entry.gen)
@@ -50,10 +52,10 @@ describe HexaPDF::Revision do
50
52
  end
51
53
 
52
54
  it "returns the next free object number" do
53
- assert_equal(8, @rev.next_free_oid)
54
- @obj.oid = 8
55
- @rev.add(@obj)
56
55
  assert_equal(9, @rev.next_free_oid)
56
+ @obj.oid = 9
57
+ @rev.add(@obj)
58
+ assert_equal(10, @rev.next_free_oid)
57
59
  end
58
60
 
59
61
  describe "add" do
@@ -113,6 +115,12 @@ describe HexaPDF::Revision do
113
115
  refute_nil(obj)
114
116
  end
115
117
 
118
+ it "loads an object that is defined in the cross-reference section with an invalid generation number" do
119
+ obj = @rev.object(HexaPDF::Reference.new(8, 0))
120
+ assert_equal(0, obj.gen)
121
+ assert_equal(:DifferentGen, obj.value)
122
+ end
123
+
116
124
  it "loads free entries in the cross-reference section as special PDF null objects" do
117
125
  obj = @rev.object(HexaPDF::Reference.new(3, 0))
118
126
  assert_nil(obj.value)
@@ -172,7 +180,20 @@ describe HexaPDF::Revision do
172
180
  describe "object iteration" do
173
181
  it "iterates over all objects via each" do
174
182
  @rev.add(@obj)
175
- assert_equal([@obj, *(2..7).map {|i| @rev.object(i) }], @rev.each.to_a)
183
+ assert_equal([@obj, *(2..8).map {|i| @rev.object(i) }], @rev.each.to_a)
184
+ end
185
+
186
+ it "ensures no object is loaded multiple times" do
187
+ obj_2_data = nil
188
+ @rev.add(@obj) # ensures this is yielded first
189
+ @rev.each do |obj|
190
+ if obj == @obj
191
+ obj_2_data = @rev.object(2).data
192
+ elsif obj.oid == 2
193
+ assert_same(obj_2_data, obj.data)
194
+ break
195
+ end
196
+ end
176
197
  end
177
198
 
178
199
  it "iterates only over loaded objects" do
@@ -216,8 +237,8 @@ describe HexaPDF::Revision do
216
237
  end
217
238
 
218
239
  it "handles object and xref streams that were added appropriately depending on the 'all' arg" do
219
- xref = @rev.add(HexaPDF::Dictionary.new({Type: :XRef}, oid: 8))
220
- objstm = @rev.add(HexaPDF::Dictionary.new({Type: :ObjStm}, oid: 9))
240
+ xref = @rev.add(HexaPDF::Dictionary.new({Type: :XRef}, oid: 20))
241
+ objstm = @rev.add(HexaPDF::Dictionary.new({Type: :ObjStm}, oid: 21))
221
242
  assert_equal([], @rev.each_modified_object.to_a)
222
243
  assert_equal([xref, objstm], @rev.each_modified_object(all: true).to_a)
223
244
  end
@@ -147,6 +147,11 @@ describe HexaPDF::Type::AcroForm::Field do
147
147
  it "yields nothing if no widgets are defined" do
148
148
  assert_equal([], @field.each_widget.to_a)
149
149
  end
150
+
151
+ it "ignores entries in the /Kids array that are not widgets" do
152
+ @field[:Kids] = [{Subtype: :Widget, Rect: [0, 0, 0, 0], X: 1}, {FT: :Tx, Kids: []}]
153
+ assert_equal(1, @field.each_widget.to_a.size)
154
+ end
150
155
  end
151
156
 
152
157
  describe "create_widget" do
@@ -322,6 +322,12 @@ describe HexaPDF::Type::AcroForm::Form do
322
322
  assert_equal("value", field.field_value)
323
323
  end
324
324
 
325
+ it "ignores values for password fields" do
326
+ field = @acro_form.create_password_field('test')
327
+ @acro_form.fill("test" => "value")
328
+ assert_nil(field.field_value)
329
+ end
330
+
325
331
  it "works for radio buttons" do
326
332
  field = @acro_form.create_radio_button("test")
327
333
  field.create_widget(@doc.pages.add, value: :name)
@@ -272,10 +272,16 @@ describe HexaPDF::Type::AcroForm::TextField do
272
272
 
273
273
  it "checks that the field value has a valid type" do
274
274
  assert(@field.validate) # no field value
275
- @field[:V] = :sym
275
+ @field[:V] = [5]
276
276
  refute(@field.validate)
277
277
  end
278
278
 
279
+ it "converts an invalid Symbol value to string" do
280
+ @field[:V] = :sym
281
+ assert(@field.validate)
282
+ assert_equal('sym', @field[:V])
283
+ end
284
+
279
285
  it "checks the field value against /MaxLen" do
280
286
  @field[:V] = 'Test'
281
287
  assert(@field.validate)
@@ -188,4 +188,15 @@ describe HexaPDF::Type::Annotations::Widget do
188
188
  end
189
189
  end
190
190
  end
191
+
192
+ describe "perform_validation" do
193
+ it "validates the widget as form field if they are the same" do
194
+ @widget[:Rect] = [0, 0, 0, 0]
195
+ @widget[:FT] = :Tx
196
+ @widget[:T] = 'field'
197
+ @widget[:V] = :Sym
198
+ assert(@widget.validate)
199
+ assert_equal('Sym', @widget[:V]) # this auto-correct is part of TextField
200
+ end
201
+ end
191
202
  end
@@ -416,6 +416,14 @@ describe HexaPDF::Type::Page do
416
416
  end
417
417
  end
418
418
 
419
+ describe "extract_text" do
420
+ it "extracts the layouted text from the page" do
421
+ page = @doc.pages.add
422
+ page.canvas.font('Helvetica', size: 10).text('Hello', at: [10, 10])
423
+ assert_equal('Hello', page.extract_text(line_tolerance_factor: 5))
424
+ end
425
+ end
426
+
419
427
  describe "index" do
420
428
  it "returns the index of the page in the page tree" do
421
429
  kid1 = @doc.add({Type: :Pages, Parent: @doc.pages.root, Count: 4})
data/test/test_helper.rb CHANGED
@@ -11,6 +11,12 @@ rescue LoadError
11
11
  end
12
12
 
13
13
  gem 'minitest'
14
+ begin
15
+ gem 'minitest-mock'
16
+ require 'minitest/mock'
17
+ rescue Gem::MissingSpecError
18
+ # Assume Minitest < 6 is in use for older Rubies
19
+ end
14
20
  gem 'strscan'
15
21
  require 'minitest/autorun'
16
22
  require 'fiber'