hexapdf 0.17.1 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1024 -0
  3. data/LICENSE +29 -0
  4. data/README.md +129 -0
  5. data/Rakefile +109 -0
  6. data/agpl-3.0.txt +661 -0
  7. data/examples/001-hello_world.rb +16 -0
  8. data/examples/002-graphics.rb +275 -0
  9. data/examples/003-arcs.rb +50 -0
  10. data/examples/004-optimizing.rb +23 -0
  11. data/examples/005-merging.rb +27 -0
  12. data/examples/006-standard_pdf_fonts.rb +73 -0
  13. data/examples/007-truetype.rb +42 -0
  14. data/examples/008-show_char_bboxes.rb +55 -0
  15. data/examples/009-text_layouter_alignment.rb +47 -0
  16. data/examples/010-text_layouter_inline_boxes.rb +64 -0
  17. data/examples/011-text_layouter_line_wrapping.rb +57 -0
  18. data/examples/012-text_layouter_styling.rb +122 -0
  19. data/examples/013-text_layouter_shapes.rb +176 -0
  20. data/examples/014-text_in_polygon.rb +60 -0
  21. data/examples/015-boxes.rb +76 -0
  22. data/examples/016-frame_automatic_box_placement.rb +90 -0
  23. data/examples/017-frame_text_flow.rb +60 -0
  24. data/examples/018-composer.rb +44 -0
  25. data/examples/019-acro_form.rb +88 -0
  26. data/examples/emoji-smile.png +0 -0
  27. data/examples/emoji-wink.png +0 -0
  28. data/examples/machupicchu.jpg +0 -0
  29. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +66 -0
  30. data/lib/hexapdf/content/graphic_object/geom2d.rb +13 -0
  31. data/lib/hexapdf/version.rb +1 -1
  32. data/test/data/aes-test-vectors/CBCGFSbox-128-decrypt.data.gz +0 -0
  33. data/test/data/aes-test-vectors/CBCGFSbox-128-encrypt.data.gz +0 -0
  34. data/test/data/aes-test-vectors/CBCGFSbox-192-decrypt.data.gz +0 -0
  35. data/test/data/aes-test-vectors/CBCGFSbox-192-encrypt.data.gz +0 -0
  36. data/test/data/aes-test-vectors/CBCGFSbox-256-decrypt.data.gz +0 -0
  37. data/test/data/aes-test-vectors/CBCGFSbox-256-encrypt.data.gz +0 -0
  38. data/test/data/aes-test-vectors/CBCKeySbox-128-decrypt.data.gz +0 -0
  39. data/test/data/aes-test-vectors/CBCKeySbox-128-encrypt.data.gz +0 -0
  40. data/test/data/aes-test-vectors/CBCKeySbox-192-decrypt.data.gz +0 -0
  41. data/test/data/aes-test-vectors/CBCKeySbox-192-encrypt.data.gz +0 -0
  42. data/test/data/aes-test-vectors/CBCKeySbox-256-decrypt.data.gz +0 -0
  43. data/test/data/aes-test-vectors/CBCKeySbox-256-encrypt.data.gz +0 -0
  44. data/test/data/aes-test-vectors/CBCVarKey-128-decrypt.data.gz +0 -0
  45. data/test/data/aes-test-vectors/CBCVarKey-128-encrypt.data.gz +0 -0
  46. data/test/data/aes-test-vectors/CBCVarKey-192-decrypt.data.gz +0 -0
  47. data/test/data/aes-test-vectors/CBCVarKey-192-encrypt.data.gz +0 -0
  48. data/test/data/aes-test-vectors/CBCVarKey-256-decrypt.data.gz +0 -0
  49. data/test/data/aes-test-vectors/CBCVarKey-256-encrypt.data.gz +0 -0
  50. data/test/data/aes-test-vectors/CBCVarTxt-128-decrypt.data.gz +0 -0
  51. data/test/data/aes-test-vectors/CBCVarTxt-128-encrypt.data.gz +0 -0
  52. data/test/data/aes-test-vectors/CBCVarTxt-192-decrypt.data.gz +0 -0
  53. data/test/data/aes-test-vectors/CBCVarTxt-192-encrypt.data.gz +0 -0
  54. data/test/data/aes-test-vectors/CBCVarTxt-256-decrypt.data.gz +0 -0
  55. data/test/data/aes-test-vectors/CBCVarTxt-256-encrypt.data.gz +0 -0
  56. data/test/data/fonts/Ubuntu-Title.ttf +0 -0
  57. data/test/data/images/cmyk.jpg +0 -0
  58. data/test/data/images/fillbytes.jpg +0 -0
  59. data/test/data/images/gray.jpg +0 -0
  60. data/test/data/images/greyscale-1bit.png +0 -0
  61. data/test/data/images/greyscale-2bit.png +0 -0
  62. data/test/data/images/greyscale-4bit.png +0 -0
  63. data/test/data/images/greyscale-8bit.png +0 -0
  64. data/test/data/images/greyscale-alpha-8bit.png +0 -0
  65. data/test/data/images/greyscale-trns-8bit.png +0 -0
  66. data/test/data/images/greyscale-with-gamma1.0.png +0 -0
  67. data/test/data/images/greyscale-with-gamma1.5.png +0 -0
  68. data/test/data/images/indexed-1bit.png +0 -0
  69. data/test/data/images/indexed-2bit.png +0 -0
  70. data/test/data/images/indexed-4bit.png +0 -0
  71. data/test/data/images/indexed-8bit.png +0 -0
  72. data/test/data/images/indexed-alpha-4bit.png +0 -0
  73. data/test/data/images/indexed-alpha-8bit.png +0 -0
  74. data/test/data/images/rgb.jpg +0 -0
  75. data/test/data/images/truecolour-8bit.png +0 -0
  76. data/test/data/images/truecolour-alpha-8bit.png +0 -0
  77. data/test/data/images/truecolour-gama-chrm-8bit.png +0 -0
  78. data/test/data/images/truecolour-srgb-8bit.png +0 -0
  79. data/test/data/images/ycck.jpg +0 -0
  80. data/test/data/minimal.pdf +44 -0
  81. data/test/data/standard-security-handler/README +9 -0
  82. data/test/data/standard-security-handler/bothpwd-aes-128bit-V4.pdf +44 -0
  83. data/test/data/standard-security-handler/bothpwd-aes-256bit-V5.pdf +0 -0
  84. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V2.pdf +43 -0
  85. data/test/data/standard-security-handler/bothpwd-arc4-128bit-V4.pdf +43 -0
  86. data/test/data/standard-security-handler/bothpwd-arc4-40bit-V1.pdf +0 -0
  87. data/test/data/standard-security-handler/nopwd-aes-128bit-V4.pdf +43 -0
  88. data/test/data/standard-security-handler/nopwd-aes-256bit-V5.pdf +0 -0
  89. data/test/data/standard-security-handler/nopwd-arc4-128bit-V2.pdf +43 -0
  90. data/test/data/standard-security-handler/nopwd-arc4-128bit-V4.pdf +43 -0
  91. data/test/data/standard-security-handler/nopwd-arc4-40bit-V1.pdf +43 -0
  92. data/test/data/standard-security-handler/ownerpwd-aes-128bit-V4.pdf +0 -0
  93. data/test/data/standard-security-handler/ownerpwd-aes-256bit-V5.pdf +43 -0
  94. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V2.pdf +43 -0
  95. data/test/data/standard-security-handler/ownerpwd-arc4-128bit-V4.pdf +43 -0
  96. data/test/data/standard-security-handler/ownerpwd-arc4-40bit-V1.pdf +43 -0
  97. data/test/data/standard-security-handler/userpwd-aes-128bit-V4.pdf +43 -0
  98. data/test/data/standard-security-handler/userpwd-aes-256bit-V5.pdf +43 -0
  99. data/test/data/standard-security-handler/userpwd-arc4-128bit-V2.pdf +0 -0
  100. data/test/data/standard-security-handler/userpwd-arc4-128bit-V4.pdf +0 -0
  101. data/test/data/standard-security-handler/userpwd-arc4-40bit-V1.pdf +43 -0
  102. data/test/hexapdf/common_tokenizer_tests.rb +236 -0
  103. data/test/hexapdf/content/common.rb +39 -0
  104. data/test/hexapdf/content/graphic_object/test_arc.rb +102 -0
  105. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +90 -0
  106. data/test/hexapdf/content/graphic_object/test_geom2d.rb +79 -0
  107. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +86 -0
  108. data/test/hexapdf/content/test_canvas.rb +1279 -0
  109. data/test/hexapdf/content/test_color_space.rb +176 -0
  110. data/test/hexapdf/content/test_graphics_state.rb +151 -0
  111. data/test/hexapdf/content/test_operator.rb +619 -0
  112. data/test/hexapdf/content/test_parser.rb +99 -0
  113. data/test/hexapdf/content/test_processor.rb +163 -0
  114. data/test/hexapdf/content/test_transformation_matrix.rb +64 -0
  115. data/test/hexapdf/document/test_files.rb +72 -0
  116. data/test/hexapdf/document/test_fonts.rb +60 -0
  117. data/test/hexapdf/document/test_images.rb +72 -0
  118. data/test/hexapdf/document/test_pages.rb +130 -0
  119. data/test/hexapdf/encryption/common.rb +87 -0
  120. data/test/hexapdf/encryption/test_aes.rb +129 -0
  121. data/test/hexapdf/encryption/test_arc4.rb +39 -0
  122. data/test/hexapdf/encryption/test_fast_aes.rb +17 -0
  123. data/test/hexapdf/encryption/test_fast_arc4.rb +12 -0
  124. data/test/hexapdf/encryption/test_identity.rb +21 -0
  125. data/test/hexapdf/encryption/test_ruby_aes.rb +23 -0
  126. data/test/hexapdf/encryption/test_ruby_arc4.rb +20 -0
  127. data/test/hexapdf/encryption/test_security_handler.rb +380 -0
  128. data/test/hexapdf/encryption/test_standard_security_handler.rb +322 -0
  129. data/test/hexapdf/filter/common.rb +53 -0
  130. data/test/hexapdf/filter/test_ascii85_decode.rb +59 -0
  131. data/test/hexapdf/filter/test_ascii_hex_decode.rb +38 -0
  132. data/test/hexapdf/filter/test_crypt.rb +21 -0
  133. data/test/hexapdf/filter/test_encryption.rb +24 -0
  134. data/test/hexapdf/filter/test_flate_decode.rb +44 -0
  135. data/test/hexapdf/filter/test_lzw_decode.rb +52 -0
  136. data/test/hexapdf/filter/test_predictor.rb +219 -0
  137. data/test/hexapdf/filter/test_run_length_decode.rb +32 -0
  138. data/test/hexapdf/font/cmap/test_parser.rb +102 -0
  139. data/test/hexapdf/font/cmap/test_writer.rb +66 -0
  140. data/test/hexapdf/font/encoding/test_base.rb +45 -0
  141. data/test/hexapdf/font/encoding/test_difference_encoding.rb +29 -0
  142. data/test/hexapdf/font/encoding/test_glyph_list.rb +59 -0
  143. data/test/hexapdf/font/encoding/test_zapf_dingbats_encoding.rb +16 -0
  144. data/test/hexapdf/font/test_cmap.rb +104 -0
  145. data/test/hexapdf/font/test_encoding.rb +27 -0
  146. data/test/hexapdf/font/test_invalid_glyph.rb +34 -0
  147. data/test/hexapdf/font/test_true_type_wrapper.rb +186 -0
  148. data/test/hexapdf/font/test_type1_wrapper.rb +107 -0
  149. data/test/hexapdf/font/true_type/common.rb +17 -0
  150. data/test/hexapdf/font/true_type/table/common.rb +27 -0
  151. data/test/hexapdf/font/true_type/table/test_cmap.rb +47 -0
  152. data/test/hexapdf/font/true_type/table/test_cmap_subtable.rb +141 -0
  153. data/test/hexapdf/font/true_type/table/test_directory.rb +30 -0
  154. data/test/hexapdf/font/true_type/table/test_glyf.rb +58 -0
  155. data/test/hexapdf/font/true_type/table/test_head.rb +56 -0
  156. data/test/hexapdf/font/true_type/table/test_hhea.rb +26 -0
  157. data/test/hexapdf/font/true_type/table/test_hmtx.rb +30 -0
  158. data/test/hexapdf/font/true_type/table/test_kern.rb +61 -0
  159. data/test/hexapdf/font/true_type/table/test_loca.rb +33 -0
  160. data/test/hexapdf/font/true_type/table/test_maxp.rb +50 -0
  161. data/test/hexapdf/font/true_type/table/test_name.rb +76 -0
  162. data/test/hexapdf/font/true_type/table/test_os2.rb +55 -0
  163. data/test/hexapdf/font/true_type/table/test_post.rb +78 -0
  164. data/test/hexapdf/font/true_type/test_builder.rb +42 -0
  165. data/test/hexapdf/font/true_type/test_font.rb +116 -0
  166. data/test/hexapdf/font/true_type/test_optimizer.rb +26 -0
  167. data/test/hexapdf/font/true_type/test_subsetter.rb +73 -0
  168. data/test/hexapdf/font/true_type/test_table.rb +48 -0
  169. data/test/hexapdf/font/type1/common.rb +6 -0
  170. data/test/hexapdf/font/type1/test_afm_parser.rb +65 -0
  171. data/test/hexapdf/font/type1/test_font.rb +104 -0
  172. data/test/hexapdf/font/type1/test_font_metrics.rb +22 -0
  173. data/test/hexapdf/font/type1/test_pfb_parser.rb +37 -0
  174. data/test/hexapdf/font_loader/test_from_configuration.rb +43 -0
  175. data/test/hexapdf/font_loader/test_from_file.rb +36 -0
  176. data/test/hexapdf/font_loader/test_standard14.rb +33 -0
  177. data/test/hexapdf/image_loader/test_jpeg.rb +93 -0
  178. data/test/hexapdf/image_loader/test_pdf.rb +47 -0
  179. data/test/hexapdf/image_loader/test_png.rb +259 -0
  180. data/test/hexapdf/layout/test_box.rb +154 -0
  181. data/test/hexapdf/layout/test_frame.rb +350 -0
  182. data/test/hexapdf/layout/test_image_box.rb +73 -0
  183. data/test/hexapdf/layout/test_inline_box.rb +71 -0
  184. data/test/hexapdf/layout/test_line.rb +206 -0
  185. data/test/hexapdf/layout/test_style.rb +790 -0
  186. data/test/hexapdf/layout/test_text_box.rb +140 -0
  187. data/test/hexapdf/layout/test_text_fragment.rb +375 -0
  188. data/test/hexapdf/layout/test_text_layouter.rb +758 -0
  189. data/test/hexapdf/layout/test_text_shaper.rb +62 -0
  190. data/test/hexapdf/layout/test_width_from_polygon.rb +109 -0
  191. data/test/hexapdf/task/test_dereference.rb +51 -0
  192. data/test/hexapdf/task/test_optimize.rb +162 -0
  193. data/test/hexapdf/test_composer.rb +258 -0
  194. data/test/hexapdf/test_configuration.rb +93 -0
  195. data/test/hexapdf/test_data_dir.rb +32 -0
  196. data/test/hexapdf/test_dictionary.rb +340 -0
  197. data/test/hexapdf/test_dictionary_fields.rb +269 -0
  198. data/test/hexapdf/test_document.rb +641 -0
  199. data/test/hexapdf/test_filter.rb +100 -0
  200. data/test/hexapdf/test_importer.rb +106 -0
  201. data/test/hexapdf/test_object.rb +258 -0
  202. data/test/hexapdf/test_parser.rb +645 -0
  203. data/test/hexapdf/test_pdf_array.rb +169 -0
  204. data/test/hexapdf/test_rectangle.rb +73 -0
  205. data/test/hexapdf/test_reference.rb +50 -0
  206. data/test/hexapdf/test_revision.rb +188 -0
  207. data/test/hexapdf/test_revisions.rb +196 -0
  208. data/test/hexapdf/test_serializer.rb +195 -0
  209. data/test/hexapdf/test_stream.rb +274 -0
  210. data/test/hexapdf/test_tokenizer.rb +80 -0
  211. data/test/hexapdf/test_type.rb +18 -0
  212. data/test/hexapdf/test_writer.rb +140 -0
  213. data/test/hexapdf/test_xref_section.rb +61 -0
  214. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +795 -0
  215. data/test/hexapdf/type/acro_form/test_button_field.rb +308 -0
  216. data/test/hexapdf/type/acro_form/test_choice_field.rb +220 -0
  217. data/test/hexapdf/type/acro_form/test_field.rb +259 -0
  218. data/test/hexapdf/type/acro_form/test_form.rb +357 -0
  219. data/test/hexapdf/type/acro_form/test_signature_field.rb +38 -0
  220. data/test/hexapdf/type/acro_form/test_text_field.rb +201 -0
  221. data/test/hexapdf/type/acro_form/test_variable_text_field.rb +88 -0
  222. data/test/hexapdf/type/actions/test_launch.rb +24 -0
  223. data/test/hexapdf/type/actions/test_uri.rb +23 -0
  224. data/test/hexapdf/type/annotations/test_markup_annotation.rb +22 -0
  225. data/test/hexapdf/type/annotations/test_text.rb +34 -0
  226. data/test/hexapdf/type/annotations/test_widget.rb +225 -0
  227. data/test/hexapdf/type/test_annotation.rb +97 -0
  228. data/test/hexapdf/type/test_catalog.rb +48 -0
  229. data/test/hexapdf/type/test_cid_font.rb +61 -0
  230. data/test/hexapdf/type/test_file_specification.rb +141 -0
  231. data/test/hexapdf/type/test_font.rb +67 -0
  232. data/test/hexapdf/type/test_font_descriptor.rb +61 -0
  233. data/test/hexapdf/type/test_font_simple.rb +176 -0
  234. data/test/hexapdf/type/test_font_true_type.rb +31 -0
  235. data/test/hexapdf/type/test_font_type0.rb +120 -0
  236. data/test/hexapdf/type/test_font_type1.rb +142 -0
  237. data/test/hexapdf/type/test_font_type3.rb +26 -0
  238. data/test/hexapdf/type/test_form.rb +120 -0
  239. data/test/hexapdf/type/test_image.rb +261 -0
  240. data/test/hexapdf/type/test_info.rb +9 -0
  241. data/test/hexapdf/type/test_object_stream.rb +117 -0
  242. data/test/hexapdf/type/test_page.rb +598 -0
  243. data/test/hexapdf/type/test_page_tree_node.rb +315 -0
  244. data/test/hexapdf/type/test_resources.rb +209 -0
  245. data/test/hexapdf/type/test_trailer.rb +116 -0
  246. data/test/hexapdf/type/test_xref_stream.rb +143 -0
  247. data/test/hexapdf/utils/test_bit_field.rb +63 -0
  248. data/test/hexapdf/utils/test_bit_stream.rb +69 -0
  249. data/test/hexapdf/utils/test_graphics_helpers.rb +37 -0
  250. data/test/hexapdf/utils/test_lru_cache.rb +22 -0
  251. data/test/hexapdf/utils/test_object_hash.rb +120 -0
  252. data/test/hexapdf/utils/test_pdf_doc_encoding.rb +18 -0
  253. data/test/hexapdf/utils/test_sorted_tree_node.rb +239 -0
  254. data/test/test_helper.rb +58 -0
  255. metadata +263 -3
@@ -0,0 +1,645 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'test_helper'
4
+ require 'hexapdf/document'
5
+ require 'hexapdf/parser'
6
+ require 'stringio'
7
+
8
+ describe HexaPDF::Parser do
9
+ before do
10
+ @document = HexaPDF::Document.new
11
+ @document.config['parser.try_xref_reconstruction'] = false
12
+ @document.add(@document.wrap(10, oid: 1, gen: 0))
13
+
14
+ create_parser(<<~EOF)
15
+ %PDF-1.7
16
+
17
+ 1 0 obj
18
+ 10
19
+ endobj
20
+
21
+ 2 0 obj
22
+ [ 5 6 <</Length 10 >> (name) <4E6F762073 686D6F7A20 6B612070
23
+ 6F702E>]
24
+ endobj
25
+
26
+ 3 15 obj<< /Length 1 0 R/Hallo 6/Filter /Fl/DecodeParms<<>> >>stream
27
+ Hallo PDF!endstream
28
+ endobj
29
+
30
+ 4 0 obj
31
+ <</Type /XRef /Length 3 /W [1 1 1] /Index [1 1] /Size 2 >> stream
32
+ \x01\x0A\x00
33
+ endstream
34
+ endobj
35
+
36
+ xref
37
+ 0 4
38
+ 0000000000 65535 f
39
+ 0000000010 00000 n
40
+ 0000000029 00000 n
41
+ 0000000000 65535 f
42
+ 3 1
43
+ 0000000556 00000 n
44
+ trailer
45
+ << /Test (now) >>
46
+ startxref
47
+ 308
48
+ %%EOF
49
+ EOF
50
+ end
51
+
52
+ def create_parser(str)
53
+ @parse_io = StringIO.new(str)
54
+ @parser = HexaPDF::Parser.new(@parse_io, @document)
55
+ end
56
+
57
+ describe "parse_indirect_object" do
58
+ it "reads indirect objects sequentially" do
59
+ object, oid, gen, stream = @parser.parse_indirect_object
60
+ assert_equal(1, oid)
61
+ assert_equal(0, gen)
62
+ assert_equal(10, object)
63
+ assert_nil(stream)
64
+
65
+ object, oid, gen, stream = @parser.parse_indirect_object
66
+ assert_equal(2, oid)
67
+ assert_equal(0, gen)
68
+ assert_equal([5, 6, {Length: 10}, "name", "Nov shmoz ka pop."], object)
69
+ assert_nil(stream)
70
+
71
+ object, oid, gen, stream = @parser.parse_indirect_object
72
+ assert_equal(3, oid)
73
+ assert_equal(15, gen)
74
+ assert_kind_of(HexaPDF::StreamData, stream)
75
+ assert_equal([:Fl], stream.filter)
76
+ assert_equal([{}], stream.decode_parms)
77
+ assert_equal({Length: 10, Hallo: 6, Filter: :Fl, DecodeParms: {}}, object)
78
+ end
79
+
80
+ it "handles empty indirect objects by using PDF null for them" do
81
+ create_parser("1 0 obj\nendobj")
82
+ object, * = @parser.parse_indirect_object
83
+ assert_nil(object)
84
+ end
85
+
86
+ it "handles keyword stream followed only by CR without LF" do
87
+ create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
88
+ *, stream = @parser.parse_indirect_object
89
+ assert_equal('12', TestHelper.collector(stream.fiber))
90
+ end
91
+
92
+ it "handles keyword stream followed by space and CR or LF" do
93
+ create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
94
+ *, stream = @parser.parse_indirect_object
95
+ assert_equal('12', TestHelper.collector(stream.fiber))
96
+ end
97
+
98
+ it "handles keyword stream followed by space and CR LF" do
99
+ create_parser("1 0 obj<</Length 2>> stream \r\n12\nendstream endobj")
100
+ *, stream = @parser.parse_indirect_object
101
+ assert_equal('12', TestHelper.collector(stream.fiber))
102
+ end
103
+
104
+ it "handles invalid indirect object value consisting of number followed by endobj without space" do
105
+ create_parser("1 0 obj 749endobj")
106
+ object, * = @parser.parse_indirect_object
107
+ assert_equal(749, object)
108
+ end
109
+
110
+ it "treats indirect objects with invalid values as null objects" do
111
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
112
+ object, * = @parser.parse_indirect_object
113
+ assert_nil(object)
114
+ end
115
+
116
+ it "recovers from a stream length value that doesn't reflect the correct length" do
117
+ create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
118
+ obj, _, _, stream = @parser.parse_indirect_object
119
+ assert_equal(2, obj[:Length])
120
+ assert_equal('12', TestHelper.collector(stream.fiber))
121
+ end
122
+
123
+ it "recovers from an invalid stream length value" do
124
+ create_parser("1 0 obj<</Length 2 0 R>> stream\n12endstream endobj")
125
+ @document.add([5], oid: 2)
126
+ obj, _, _, stream = @parser.parse_indirect_object
127
+ assert_equal(2, obj[:Length])
128
+ assert_equal('12', TestHelper.collector(stream.fiber))
129
+ end
130
+
131
+ it "works even if the keyword endobj is missing or mangled" do
132
+ create_parser("1 0 obj<</Length 4>>5")
133
+ object, * = @parser.parse_indirect_object
134
+ assert_equal({Length: 4}, object)
135
+ create_parser("1 0 obj<</Length 4>>endobjk")
136
+ object, * = @parser.parse_indirect_object
137
+ assert_equal({Length: 4}, object)
138
+ end
139
+
140
+ it "fails if the oid, gen or 'obj' keyword is invalid" do
141
+ create_parser("a 0 obj\n5\nendobj")
142
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
143
+ assert_match(/No valid object/, exp.message)
144
+ create_parser("1 a obj\n5\nendobj")
145
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
146
+ assert_match(/No valid object/, exp.message)
147
+ create_parser("1 0 dobj\n5\nendobj")
148
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
149
+ assert_match(/No valid object/, exp.message)
150
+ end
151
+
152
+ it "fails if the value of a stream is not a dictionary" do
153
+ create_parser("1 0 obj\n(fail)\nstream\nendstream\nendobj\n")
154
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
155
+ assert_match(/stream.*dictionary/, exp.message)
156
+ end
157
+
158
+ it "fails if the 'stream' keyword isn't followed by EOL" do
159
+ create_parser("1 0 obj\n<< >>\nstream endstream\nendobj\n")
160
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
161
+ assert_match(/stream.*followed by LF/, exp.message)
162
+ end
163
+
164
+ it "fails if the 'endstream' keyword is missing" do
165
+ create_parser("1 0 obj\n<< >>\nstream\nendobj\n")
166
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
167
+ assert_match(/stream.*followed by.*endstream/i, exp.message)
168
+ end
169
+
170
+ describe "with strict parsing" do
171
+ before do
172
+ @document.config['parser.on_correctable_error'] = proc { true }
173
+ end
174
+
175
+ it "fails if an empty indirect object is found" do
176
+ create_parser("1 0 obj\nendobj")
177
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
178
+ assert_match(/no indirect object value/i, exp.message)
179
+ end
180
+
181
+ it "fails if keyword stream is followed only by CR without LF" do
182
+ create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
183
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
184
+ assert_match(/not CR alone/, exp.message)
185
+ end
186
+
187
+ it "fails if keyword stream is followed by space and CR or LF instead of LF or CR/LF" do
188
+ create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
189
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
190
+ assert_match(/followed by space instead/, exp.message)
191
+ end
192
+
193
+ it "fails if keyword stream is followed by space and CR LF instead of LF or CR/LF" do
194
+ create_parser("1 0 obj<</Length 2>> stream \r\n12\nendstream endobj")
195
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
196
+ assert_match(/followed by space instead/, exp.message)
197
+ end
198
+
199
+ it "fails for numbers followed by endobj without space" do
200
+ create_parser("1 0 obj 749endobj")
201
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
202
+ assert_match(/Missing whitespace after number/, exp.message)
203
+ end
204
+
205
+ it "fails for invalid values" do
206
+ create_parser("1 0 obj <</test ( /other (end)>> endobj")
207
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
208
+ assert_match(/Invalid value after '1 0 obj'/, exp.message)
209
+ end
210
+
211
+ it "fails if the stream length value is invalid" do
212
+ create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
213
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
214
+ assert_match(/invalid stream length/i, exp.message)
215
+ end
216
+
217
+ it "fails if the keyword endobj is mangled" do
218
+ create_parser("1 0 obj\n<< >>\nendobjd\n")
219
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
220
+ assert_match(/keyword endobj/, exp.message)
221
+ end
222
+
223
+ it "fails if the keyword endobj is missing" do
224
+ create_parser("1 0 obj\n<< >>")
225
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
226
+ assert_match(/keyword endobj/, exp.message)
227
+ end
228
+
229
+ it "fails if there is data between 'endstream' and 'endobj'" do
230
+ create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
231
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
232
+ assert_match(/keyword endobj/, exp.message)
233
+ end
234
+ end
235
+ end
236
+
237
+ describe "load_object" do
238
+ before do
239
+ @entry = HexaPDF::XRefSection.in_use_entry(2, 0, 29)
240
+ end
241
+
242
+ it "can load an indirect object" do
243
+ obj = @parser.load_object(@entry)
244
+ assert_kind_of(HexaPDF::Object, obj)
245
+ assert_equal(5, obj.value[0])
246
+ assert_equal(2, obj.oid)
247
+ assert_equal(0, obj.gen)
248
+ end
249
+
250
+ it "can load a free object" do
251
+ obj = @parser.load_object(HexaPDF::XRefSection.free_entry(0, 0))
252
+ assert_kind_of(HexaPDF::Object, obj)
253
+ assert_nil(obj.value)
254
+ end
255
+
256
+ it "can load a compressed object" do
257
+ def (@document).object(_oid)
258
+ obj = Object.new
259
+ def obj.parse_stream
260
+ HexaPDF::Type::ObjectStream::Data.new("5 [1 2]", [1, 2], [0, 2])
261
+ end
262
+ obj
263
+ end
264
+
265
+ obj = @parser.load_object(HexaPDF::XRefSection.compressed_entry(2, 3, 1))
266
+ assert_kind_of(HexaPDF::Object, obj)
267
+ assert_equal([1, 2], obj.value)
268
+ end
269
+
270
+ it "handles an invalid indirect object offset of 0" do
271
+ obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
272
+ assert(obj.null?)
273
+ assert_equal(2, obj.oid)
274
+ assert_equal(0, obj.gen)
275
+ end
276
+
277
+ describe "with strict parsing" do
278
+ it "raises an error if an indirect object has an offset of 0" do
279
+ @document.config['parser.on_correctable_error'] = proc { true }
280
+ exp = assert_raises(HexaPDF::MalformedPDFError) do
281
+ @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
282
+ end
283
+ assert_match(/has offset 0/, exp.message)
284
+ end
285
+ end
286
+
287
+ it "fails if another object is found instead of an object stream" do
288
+ def (@document).object(_oid)
289
+ :invalid
290
+ end
291
+ exp = assert_raises(HexaPDF::MalformedPDFError) do
292
+ @parser.load_object(HexaPDF::XRefSection.compressed_entry(2, 1, 1))
293
+ end
294
+ assert_match(/not an object stream/, exp.message)
295
+ end
296
+
297
+ it "fails if the xref entry type is invalid" do
298
+ exp = assert_raises(HexaPDF::MalformedPDFError) do
299
+ @parser.load_object(HexaPDF::XRefSection::Entry.new(:invalid))
300
+ end
301
+ assert_match(/invalid cross-reference type/i, exp.message)
302
+ end
303
+
304
+ it "fails if the object/generation numbers don't match" do
305
+ exp = assert_raises(HexaPDF::MalformedPDFError) do
306
+ @entry.gen = 2
307
+ @parser.load_object(@entry)
308
+ end
309
+ assert_match(/oid,gen.*don't match/, exp.message)
310
+ end
311
+ end
312
+
313
+ describe "startxref_offset" do
314
+ it "caches the offset value" do
315
+ assert_equal(308, @parser.startxref_offset)
316
+ @parser.instance_eval { @io }.string.sub!(/308\n/, "309\n")
317
+ assert_equal(308, @parser.startxref_offset)
318
+ end
319
+
320
+ it "returns the correct offset" do
321
+ assert_equal(308, @parser.startxref_offset)
322
+ end
323
+
324
+ it "ignores garbage at the end of the file" do
325
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
326
+ assert_equal(5, @parser.startxref_offset)
327
+ end
328
+
329
+ it "uses the last startxref if there are more than one" do
330
+ create_parser("startxref\n5\n%%EOF\n\nsome garbage\n\nstartxref\n555\n%%EOF\n")
331
+ assert_equal(555, @parser.startxref_offset)
332
+ end
333
+
334
+ it "finds the startxref anywhere in file" do
335
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
336
+ assert_equal(5, @parser.startxref_offset)
337
+ create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
338
+ assert_equal(5, @parser.startxref_offset)
339
+ end
340
+
341
+ it "fails even in big files when nothing is found" do
342
+ create_parser("\nhallo" * 5000)
343
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
344
+ assert_match(/end-of-file marker not found/, exp.message)
345
+ end
346
+
347
+ it "fails if the %%EOF marker is missing" do
348
+ create_parser("startxref\n5")
349
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
350
+ assert_match(/end-of-file marker not found/, exp.message)
351
+
352
+ create_parser("")
353
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
354
+ assert_match(/end-of-file marker not found/, exp.message)
355
+ end
356
+
357
+ it "fails if the startxref keyword is missing" do
358
+ create_parser("somexref\n5\n%%EOF")
359
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
360
+ assert_match(/missing startxref/, exp.message)
361
+ end
362
+
363
+ it "fails on strict parsing if the startxref is not in the last part of the file" do
364
+ @document.config['parser.on_correctable_error'] = proc { true }
365
+ create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
366
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
367
+ assert_match(/end-of-file marker not found/, exp.message)
368
+ end
369
+ end
370
+
371
+ describe "file_header_version" do
372
+ it "returns the correct version" do
373
+ assert_equal('1.7', @parser.file_header_version)
374
+ end
375
+
376
+ it "fails if the header is mangled" do
377
+ create_parser("%PDF-1\n")
378
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.file_header_version }
379
+ assert_match(/file header/, exp.message)
380
+ end
381
+
382
+ it "fails if the header is missing" do
383
+ create_parser("no header")
384
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.file_header_version }
385
+ assert_match(/file header/, exp.message)
386
+ end
387
+
388
+ it "ignores junk at the beginning of the file and correctly calculates offset" do
389
+ create_parser("junk" * 200 << "\n%PDF-1.4\n")
390
+ assert_equal('1.4', @parser.file_header_version)
391
+ assert_equal(801, @parser.instance_variable_get(:@header_offset))
392
+ end
393
+ end
394
+
395
+ it "xref_section?" do
396
+ assert(@parser.xref_section?(@parser.startxref_offset))
397
+ refute(@parser.xref_section?(53))
398
+ end
399
+
400
+ describe "parse_xref_section_and_trailer" do
401
+ it "works on a section with multiple sub sections" do
402
+ section, trailer = @parser.parse_xref_section_and_trailer(@parser.startxref_offset)
403
+ assert_equal({Test: 'now'}, trailer)
404
+ assert_equal(HexaPDF::XRefSection.free_entry(0, 65535), section[0, 65535])
405
+ assert_equal(HexaPDF::XRefSection.free_entry(3, 65535), section[3, 65535])
406
+ assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 10), section[1])
407
+ end
408
+
409
+ it "works for an empty section" do
410
+ create_parser("xref\n0 0\ntrailer\n<</Name /Value >>\n")
411
+ _, trailer = @parser.parse_xref_section_and_trailer(0)
412
+ assert_equal({Name: :Value}, trailer)
413
+ end
414
+
415
+ it "handles xref type=n with offset=0" do
416
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
417
+ section, _trailer = @parser.parse_xref_section_and_trailer(0)
418
+ assert_equal(HexaPDF::XRefSection.free_entry(1, 0), section[1])
419
+ end
420
+
421
+ it "handles xref type=n with gen>65535" do
422
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
423
+ section, _trailer = @parser.parse_xref_section_and_trailer(0)
424
+ assert_equal(HexaPDF::XRefSection.free_entry(1, 65536), section[1])
425
+ end
426
+
427
+ it "handles xref with missing whitespace at end" do
428
+ create_parser("xref\n0 2\n0000000000 00000 n\n0000000000 65536 n\ntrailer\n<<>>\n")
429
+ section, _trailer = @parser.parse_xref_section_and_trailer(0)
430
+ assert_equal(HexaPDF::XRefSection.free_entry(1, 65536), section[1])
431
+ end
432
+
433
+ it "fails if the xref keyword is missing/mangled" do
434
+ create_parser("xTEf\n0 d\n0000000000 00000 n \ntrailer\n<< >>\n")
435
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
436
+ assert_match(/keyword xref/, exp.message)
437
+ end
438
+
439
+ it "fails if a sub section header is mangled" do
440
+ create_parser("xref\n0 d\n0000000000 00000 n \ntrailer\n<< >>\n")
441
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
442
+ assert_match(/invalid cross-reference subsection/i, exp.message)
443
+ end
444
+
445
+ it "fails if a sub section entry is mangled" do
446
+ create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
447
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
448
+ assert_match(/invalid cross-reference entry/i, exp.message)
449
+ end
450
+
451
+ it "fails if there is no trailer" do
452
+ create_parser("xref\n0 1\n0000000000 00000 n \n")
453
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
454
+ assert_match(/keyword trailer/i, exp.message)
455
+ end
456
+
457
+ it "fails if the trailer is not a PDF dictionary" do
458
+ create_parser("xref\n0 1\n0000000000 00000 n \ntrailer\n(base)")
459
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
460
+ assert_match(/dictionary/, exp.message)
461
+ end
462
+
463
+ describe "invalid numbering of main xref section" do
464
+ it "handles the xref if the numbering is off by N" do
465
+ create_parser(" 1 0 obj 1 endobj\n" \
466
+ "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
467
+ section, _trailer = @parser.parse_xref_section_and_trailer(17)
468
+ assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 1), section[1])
469
+ end
470
+
471
+ it "fails if the first entry is not the one for oid=0" do
472
+ create_parser(" 1 0 obj 1 endobj\n" \
473
+ "xref\n1 2\n0000000000 00005 f \n0000000001 00000 n \ntrailer\n<<>>\n")
474
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
475
+ assert_match(/Main.*invalid numbering/i, exp.message)
476
+
477
+ create_parser(" 1 0 obj 1 endobj\n" \
478
+ "xref\n1 2\n0000000001 00000 n \n0000000001 00000 n \ntrailer\n<<>>\n")
479
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
480
+ assert_match(/Main.*invalid numbering/i, exp.message)
481
+ end
482
+
483
+ it "fails if the tested entry position is invalid" do
484
+ create_parser(" 1 0 obj 1 endobj\n" \
485
+ "xref\n1 2\n0000000000 65535 f \n0000000005 00000 n \ntrailer\n<<>>\n")
486
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
487
+ assert_match(/Main.*invalid numbering/i, exp.message)
488
+ end
489
+
490
+ it "fails if the tested entry position's oid doesn't match the corrected entry oid" do
491
+ create_parser(" 2 0 obj 1 endobj\n" \
492
+ "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
493
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
494
+ assert_match(/Main.*invalid numbering/i, exp.message)
495
+ end
496
+ end
497
+
498
+ describe "with strict parsing" do
499
+ before do
500
+ @document.config['parser.on_correctable_error'] = proc { true }
501
+ end
502
+
503
+ it "fails if xref type=n with offset=0" do
504
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
505
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
506
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
507
+ end
508
+
509
+ it " fails xref type=n with gen>65535" do
510
+ create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
511
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
512
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
513
+ end
514
+
515
+ it "fails if trailing second whitespace is missing" do
516
+ create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
517
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
518
+ assert_match(/invalid.*cross-reference entry/i, exp.message)
519
+ end
520
+
521
+ it "fails if the main cross-reference section has invalid numbering" do
522
+ create_parser("xref\n1 1\n0000000001 00000 n \ntrailer\n<<>>\n")
523
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
524
+ assert_match(/Main.*invalid numbering/i, exp.message)
525
+ end
526
+ end
527
+ end
528
+
529
+ describe "load_revision" do
530
+ it "works for a simple cross-reference section" do
531
+ xref_section, trailer = @parser.load_revision(@parser.startxref_offset)
532
+ assert_equal({Test: 'now'}, trailer)
533
+ assert(xref_section[1].in_use?)
534
+ end
535
+
536
+ it "works for a cross-reference stream" do
537
+ xref_section, trailer = @parser.load_revision(212)
538
+ assert_equal({Size: 2}, trailer)
539
+ assert(xref_section[1].in_use?)
540
+ end
541
+
542
+ it "fails if another object is found instead of a cross-reference stream" do
543
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
544
+ assert_match(/not a cross-reference stream/, exp.message)
545
+ end
546
+
547
+ it "fails if the cross-reference stream is missing data" do
548
+ @parse_io.string[287..288] = ''
549
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(212) }
550
+ assert_match(/missing data/, exp.message)
551
+ assert_equal(212, exp.pos)
552
+ end
553
+
554
+ it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
555
+ @document.config['parser.on_correctable_error'] = proc { true }
556
+ create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
557
+ "stream\n\x01\x0A\x00\nendstream endobj")
558
+ exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
559
+ assert_match(/entry for itself/, exp.message)
560
+ end
561
+ end
562
+
563
+ describe "reconstruct_revision" do
564
+ before do
565
+ @document.config['parser.try_xref_reconstruction'] = true
566
+ @xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
567
+ end
568
+
569
+ it "serially parses the contents" do
570
+ create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
571
+ assert_equal(6, @parser.load_object(@xref).value)
572
+ end
573
+
574
+ it "uses a security handler for decrypting indirect objects if necessary" do
575
+ handler = Minitest::Mock.new
576
+ handler.expect(:decrypt, HexaPDF::Object.new(:result, oid: 1), [HexaPDF::Object])
577
+ @document.instance_variable_set(:@security_handler, handler)
578
+ create_parser("1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
579
+ assert_equal(:result, @parser.load_object(@xref).value)
580
+ assert(handler.verify)
581
+ end
582
+
583
+ it "ignores parts where the starting line is split across lines" do
584
+ create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
585
+ assert_equal(5, @parser.load_object(@xref).value)
586
+ end
587
+
588
+ it "handles the case when the specified object had an xref entry but is not found" do
589
+ create_parser("3 0 obj\n5\nendobj\ntrailer\n<</Size 1>>")
590
+ assert(@parser.load_object(@xref).null?)
591
+ end
592
+
593
+ it "handles cases where the line contains an invalid string that exceeds the read buffer" do
594
+ create_parser("(1" << "(abc" * 32188 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
595
+ assert_equal(6, @parser.load_object(@xref).value)
596
+ end
597
+
598
+ it "handles pathalogical cases which contain many opened literal strings" do
599
+ time = Time.now
600
+ create_parser("(1" << "(abc\n" * 10000 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
601
+ assert_equal(6, @parser.load_object(@xref).value)
602
+ assert(Time.now - time < 0.5, "Xref reconstruction takes too long")
603
+ end
604
+
605
+ it "ignores invalid objects" do
606
+ create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
607
+ assert_equal(4, @parser.load_object(@xref).value)
608
+ end
609
+
610
+ it "ignores invalid lines" do
611
+ create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
612
+ assert_equal(6, @parser.load_object(@xref).value)
613
+ end
614
+
615
+ it "uses the last trailer" do
616
+ create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
617
+ assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
618
+ end
619
+
620
+ it "uses the first trailer in case of a linearized file" do
621
+ create_parser("1 0 obj\n<</Linearized true>>\nendobj\ntrailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
622
+ assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
623
+ end
624
+
625
+ it "tries the trailer specified at the startxref position if no other is found" do
626
+ create_parser("1 0 obj\n5\nendobj\nquack xref trailer <</Size 1/Prev 5>>\nstartxref\n22\n%%EOF")
627
+ assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
628
+ end
629
+
630
+ it "constructs a trailer with a /Root entry if no valid trailer was found" do
631
+ create_parser("1 0 obj\n<</Type /Catalog/Pages 2 0 R>>\nendobj\nxref trailer <</Size 1/Prev 5\n%%EOF")
632
+ assert_equal({Root: HexaPDF::Reference.new(1, 0)}, @parser.reconstructed_revision.trailer.value)
633
+ end
634
+
635
+ it "fails if no valid trailer is found and couldn't be constructed" do
636
+ create_parser("1 0 obj\n5\nendobj\nquack trailer <</Size 1>>\nstartxref\n22\n%%EOF")
637
+ assert_raises(HexaPDF::MalformedPDFError) { @parser.reconstructed_revision.trailer }
638
+ end
639
+
640
+ it "fails if no valid trailer is found" do
641
+ create_parser("1 0 obj\n5\nendobj")
642
+ assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
643
+ end
644
+ end
645
+ end