rpdf2txt 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. data/History.txt +5 -0
  2. data/LICENCE +515 -0
  3. data/Manifest.txt +126 -0
  4. data/README.txt +30 -0
  5. data/Rakefile +24 -0
  6. data/bin/rpdf2txt +58 -0
  7. data/config.save +12 -0
  8. data/install.rb +1098 -0
  9. data/lib/rpdf2txt-rockit/base_extensions.rb +73 -0
  10. data/lib/rpdf2txt-rockit/bootstrap.rb +120 -0
  11. data/lib/rpdf2txt-rockit/bounded_lru_cache.rb +43 -0
  12. data/lib/rpdf2txt-rockit/conflict_resolution.rb +302 -0
  13. data/lib/rpdf2txt-rockit/directed_graph.rb +401 -0
  14. data/lib/rpdf2txt-rockit/glr_parser.rb +393 -0
  15. data/lib/rpdf2txt-rockit/grammar.rb +644 -0
  16. data/lib/rpdf2txt-rockit/graphdrawing.rb +107 -0
  17. data/lib/rpdf2txt-rockit/graphviz_dot.rb +63 -0
  18. data/lib/rpdf2txt-rockit/indexable.rb +53 -0
  19. data/lib/rpdf2txt-rockit/lalr_parsetable_generator.rb +144 -0
  20. data/lib/rpdf2txt-rockit/parse_table.rb +273 -0
  21. data/lib/rpdf2txt-rockit/parsetable_generation.rb +164 -0
  22. data/lib/rpdf2txt-rockit/parsing_ambiguities.rb +84 -0
  23. data/lib/rpdf2txt-rockit/profiler.rb +168 -0
  24. data/lib/rpdf2txt-rockit/reduce_actions_generator.rb +523 -0
  25. data/lib/rpdf2txt-rockit/rockit.rb +76 -0
  26. data/lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb +187 -0
  27. data/lib/rpdf2txt-rockit/rockit_grammars_parser.rb +126 -0
  28. data/lib/rpdf2txt-rockit/sourcecode_dumpable.rb +181 -0
  29. data/lib/rpdf2txt-rockit/stringscanner.rb +54 -0
  30. data/lib/rpdf2txt-rockit/syntax_tree.rb +452 -0
  31. data/lib/rpdf2txt-rockit/token.rb +364 -0
  32. data/lib/rpdf2txt-rockit/version.rb +3 -0
  33. data/lib/rpdf2txt/attributesparser.rb +42 -0
  34. data/lib/rpdf2txt/cmapparser.rb +65 -0
  35. data/lib/rpdf2txt/data/_cmap.grammar +11 -0
  36. data/lib/rpdf2txt/data/_cmap_range.grammar +15 -0
  37. data/lib/rpdf2txt/data/_pdfattributes.grammar +32 -0
  38. data/lib/rpdf2txt/data/cmap.grammar +11 -0
  39. data/lib/rpdf2txt/data/cmap.rb +37 -0
  40. data/lib/rpdf2txt/data/cmap_range.grammar +15 -0
  41. data/lib/rpdf2txt/data/cmap_range.rb +43 -0
  42. data/lib/rpdf2txt/data/fonts/Courier-Bold.afm +342 -0
  43. data/lib/rpdf2txt/data/fonts/Courier-BoldOblique.afm +342 -0
  44. data/lib/rpdf2txt/data/fonts/Courier-Oblique.afm +342 -0
  45. data/lib/rpdf2txt/data/fonts/Courier.afm +342 -0
  46. data/lib/rpdf2txt/data/fonts/Helvetica-Bold.afm +2827 -0
  47. data/lib/rpdf2txt/data/fonts/Helvetica-BoldOblique.afm +2827 -0
  48. data/lib/rpdf2txt/data/fonts/Helvetica-Oblique.afm +3051 -0
  49. data/lib/rpdf2txt/data/fonts/Helvetica.afm +3051 -0
  50. data/lib/rpdf2txt/data/fonts/License-Adobe.txt +65 -0
  51. data/lib/rpdf2txt/data/fonts/Symbol.afm +213 -0
  52. data/lib/rpdf2txt/data/fonts/Times-Bold.afm +2588 -0
  53. data/lib/rpdf2txt/data/fonts/Times-BoldItalic.afm +2384 -0
  54. data/lib/rpdf2txt/data/fonts/Times-Italic.afm +2667 -0
  55. data/lib/rpdf2txt/data/fonts/Times-Roman.afm +2419 -0
  56. data/lib/rpdf2txt/data/fonts/ZapfDingbats.afm +225 -0
  57. data/lib/rpdf2txt/data/pdfattributes.grammar +32 -0
  58. data/lib/rpdf2txt/data/pdfattributes.rb +71 -0
  59. data/lib/rpdf2txt/data/pdftext.grammar +102 -0
  60. data/lib/rpdf2txt/data/pdftext.rb +146 -0
  61. data/lib/rpdf2txt/default_handler.rb +352 -0
  62. data/lib/rpdf2txt/lzw.rb +69 -0
  63. data/lib/rpdf2txt/object.rb +1114 -0
  64. data/lib/rpdf2txt/parser.rb +169 -0
  65. data/lib/rpdf2txt/symbol.rb +408 -0
  66. data/lib/rpdf2txt/text.rb +182 -0
  67. data/lib/rpdf2txt/text_state.rb +434 -0
  68. data/lib/rpdf2txt/textparser.rb +42 -0
  69. data/test/data/3392_obj +0 -0
  70. data/test/data/397_decrypted +15 -0
  71. data/test/data/450_decrypted +153 -0
  72. data/test/data/450_obj +0 -0
  73. data/test/data/452_decrypted +125 -0
  74. data/test/data/454_decrypted +108 -0
  75. data/test/data/456_decrypted +106 -0
  76. data/test/data/458_decrypted +111 -0
  77. data/test/data/458_obj +0 -0
  78. data/test/data/460_decrypted +118 -0
  79. data/test/data/460_obj +0 -0
  80. data/test/data/463_decrypted +117 -0
  81. data/test/data/465_decrypted +107 -0
  82. data/test/data/465_obj +0 -0
  83. data/test/data/90_obj +0 -0
  84. data/test/data/90_obj_comp +1 -0
  85. data/test/data/decrypted +0 -0
  86. data/test/data/encrypt_obj +0 -0
  87. data/test/data/encrypt_string +0 -0
  88. data/test/data/encrypt_string_128bit +0 -0
  89. data/test/data/encrypted_object_stream.pdf +0 -0
  90. data/test/data/firststream +1 -0
  91. data/test/data/index.pdfobj +0 -0
  92. data/test/data/index_2bit.pdfobj +0 -0
  93. data/test/data/index_masked.pdfobj +0 -0
  94. data/test/data/indexed.pdfobj +0 -0
  95. data/test/data/indexed_2bit.pdfobj +0 -0
  96. data/test/data/indexed_masked.pdfobj +0 -0
  97. data/test/data/inline.png +0 -0
  98. data/test/data/logo.png +0 -0
  99. data/test/data/lzw.pdfobj +0 -0
  100. data/test/data/lzw_index.pdfobj +0 -0
  101. data/test/data/page_tree.pdf +148 -0
  102. data/test/data/pdf_20.png +0 -0
  103. data/test/data/pdf_21.png +0 -0
  104. data/test/data/pdf_22.png +0 -0
  105. data/test/data/pdf_50.png +0 -0
  106. data/test/data/png.pdfobj +0 -0
  107. data/test/data/space_bug_stream.txt +119 -0
  108. data/test/data/stream.txt +292 -0
  109. data/test/data/stream_kerning_bug.txt +13 -0
  110. data/test/data/stream_kerning_bug2.txt +6 -0
  111. data/test/data/test.pdf +0 -0
  112. data/test/data/test.txt +8 -0
  113. data/test/data/test_text.txt +42 -0
  114. data/test/data/working_obj +0 -0
  115. data/test/data/working_obj2 +0 -0
  116. data/test/mock.rb +149 -0
  117. data/test/suite.rb +30 -0
  118. data/test/test_pdf_object.rb +1802 -0
  119. data/test/test_pdf_parser.rb +1340 -0
  120. data/test/test_pdf_text.rb +789 -0
  121. data/test/test_space_bug_05_2004.rb +87 -0
  122. data/test/test_stream.rb +194 -0
  123. data/test/test_text_state.rb +315 -0
  124. data/usage-en.txt +112 -0
  125. data/user-stories/UserStories_Rpdf2Txt.txt +34 -0
  126. data/user-stories/documents/swissmedicjournal/04_2004.pdf +0 -0
  127. metadata +220 -0
@@ -0,0 +1,13 @@
1
+ BT
2
+ 0 8.003 -7.9999 0 348.36 176.4505 Tm
3
+ -0.0014 Tc
4
+ -16.6438 0 TD
5
+ 0.0009 Tw
6
+ 67.3249 0 TD
7
+ -0.001 Tc
8
+ 0 Tw
9
+ -45.0132 0 TD
10
+ -0.0106 Tc
11
+ 0.0177 Tw
12
+ [(RA)-18.4(TI)-17.7(OP)-18.4(HA)-18.4(RM)-9.8( GM)-17.3(BH)-28385.1(2)-9.2(0)-1343.7(S)-10.9(t)]TJ
13
+ ET
@@ -0,0 +1,6 @@
1
+ BT
2
+ 0 8.003 -7.9999 0 382.32 176.4505 Tm
3
+ 0.0008 Tc
4
+ -0.0012 Tw
5
+ [(H)10.7(E)-7(U)10.7(M)1.6(ANN PH )7.5(GM)9.1(B)-7(H)10.7(&)-7(C)10.7(O.)8.7( )-7.5(K)8(G)-24157.1(20)-1332.3(St)]TJ
6
+ ET
Binary file
@@ -0,0 +1,8 @@
1
+ q Q q 18 40 576 734 re W n /Cs1 cs 0 0 0 sc q 1 0 0 -1 18 774 cm BT
2
+ 10 0 0 -10 510 12 Tm /F1.0 1 Tf (Page 1 of 1) Tj ET Q q 1 0 0 -1 18
3
+ 774 cm BT 10 0 0 -10 0 12 Tm /F1.0 1 Tf (untitled text) Tj ET Q q 1
4
+ 0 0 -1 18 774 cm BT 10 0 0 -10 0 24 Tm /F1.0 1 Tf (Printed: Donnerstag, 14. November 2002 14:04:29 Uhr)
5
+ Tj ET Q 0.25 w /Cs1 CS 0 0 0 SC q 1 0 0 -1 18 774 cm 0 36.125 m 576.25
6
+ 36.125 l S Q q 1 0 0 -1 18 774 cm 0 34.125 m 576.25 34.125 l S Q Q
7
+ q 18 40 576 694 re W n /Cs1 cs 0 0 0 sc q 1 0 0 -1 18 774 cm BT 10
8
+ 0 0 -10 1 52 Tm /F2.0 1 Tf (testpdf) Tj ET Q Q
@@ -0,0 +1,42 @@
1
+ BT
2
+ 10 0 0 10 42.7953 670.6528 Tm
3
+ -0.0002 Tc
4
+ 0 Tw
5
+ [(Zul.-Nr)91.8(.: )]TJ
6
+ /F8 1 Tf
7
+ 3.8772 0 TD
8
+ -0.0001 Tc
9
+ (55921)Tj
10
+ /F3 1 Tf
11
+ 8.8787 0 TD
12
+ (Abgabekategorie: )Tj
13
+ /F8 1 Tf
14
+ 8.6923 0 TD
15
+ 0 Tc
16
+ (D)Tj
17
+ /F3 1 Tf
18
+ 4.0636 0 TD
19
+ -0.0001 Tc
20
+ -0.0305 Tw
21
+ [(Index: 02.98.0.)-9563.3(18.10.2002)]TJ
22
+ -25.5118 -2.2428 TD
23
+ 0 Tw
24
+ [(Zusammensetzung:)-921(01)]TJ
25
+ 8.3 0 0 8.3 156.1811 648.2247 Tm
26
+ 0.1715 Tw
27
+ [(CRA)73.6(T)54.8(AEGI FOLII cum FLORE EXTRACTUM ETHANOLICUM SICCUM 450)-278.1(mg, EXCIPIENS pro)]TJ
28
+ 0 -1.3661 TD
29
+ -0.0306 Tw
30
+ (COMPRESSO OBDUCTO.)Tj
31
+ 10 0 0 10 42.7953 622.7121 Tm
32
+ -0.0304 Tw
33
+ [(Anwendung:)-5285.1(Bei nerv�sen Herzbeschwerden)]TJ
34
+ 0 -1.4174 TD
35
+ 0 Tw
36
+ [(Packungen:)-4532.5(01)-305.3(002)-7141.5(50)-566.7(Filmtabletten)-12429.9(D)]TJ
37
+ 11.3386 -1.4174 TD
38
+ [(042)-6585.6(100)-566.7(Filmtabletten)-12429.9(D)]TJ
39
+ -11.3386 -1.4174 TD
40
+ -0.0304 Tw
41
+ [(G�ltig bis:)-6647.6(17. Oktober 2007)]TJ
42
+ ET
Binary file
Binary file
@@ -0,0 +1,149 @@
1
+ # Ruby/Mock version 1.0
2
+ #
3
+ # A class for conveniently building mock objects in RUnit test cases.
4
+ # Copyright (c) 2001 Nat Pryce, all rights reserved
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program; if not, write to the Free Software
17
+ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
+
19
+ require 'runit/error'
20
+
21
+
22
+ class Mock
23
+ # Creates a new, named mock object. The name is reported in exceptions
24
+ # thrown by the mock object when method invocations are incorrect.
25
+ #
26
+ def initialize( mock_name = self.to_s )
27
+ @mock_calls = []
28
+ @next_call = 0
29
+ @name = mock_name
30
+ end
31
+
32
+ # Mock the next method call to be made to this mock object.
33
+ #
34
+ # A mock method is defined by the method name (a symbol) and a block
35
+ # that defines the arity of the method and the mocked behaviour for
36
+ # this call. The mocked behaviour should assert preconditions and
37
+ # return a value. Mocked behaviour should rarely be any more complex
38
+ # than that. If it is, that's probably an indication that the tests
39
+ # need some restructuring or that the tested code needs refactoring.
40
+ #
41
+ # If no block is given and preconditions have been defined for the named
42
+ # method, a block is created for the mocked methodthat has the same arity
43
+ # as the precondition and returns self.
44
+ #
45
+ def __next( name, &test )
46
+ if test == nil
47
+ if respond_to?( Mock.__pre(name) )
48
+ test = proc { |*args| self }
49
+ else
50
+ raise "no block given for mocked method #{name}"
51
+ end
52
+ end
53
+ @mock_calls.push( [name,test] )
54
+ end
55
+
56
+ # Call this at the end of a test to ensure that all scheduled calls
57
+ # have been made to the mock
58
+ #
59
+ def __verify
60
+ if @next_call != @mock_calls.length
61
+ raise RUNIT::AssertionFailedError,
62
+ "not all expected method calls were made to #{@name}",
63
+ caller
64
+ end
65
+ end
66
+
67
+
68
+ private
69
+ # Dispatches aribtrary method calls to the next mocked behaviour
70
+ #
71
+ def method_missing( name, *args )
72
+ __mock_call( name, args, (block_given? ? proc : nil) )
73
+ end
74
+
75
+ # Implements a method call using the next mocked behaviour and asserts
76
+ # that the expected method is called with the expected number of
77
+ # arguments.
78
+ #
79
+ def __mock_call( name, args, block )
80
+ if @next_call >= @mock_calls.length
81
+ raise RUNIT::AssertionFailedError,
82
+ "unexpected call to #{name} method of #{@name}",
83
+ caller(2)
84
+ end
85
+
86
+ expected_name,body = @mock_calls[@next_call]
87
+ @next_call += 1
88
+
89
+ if name != expected_name
90
+ raise RUNIT::AssertionFailedError,
91
+ "wrong method called on #{@name}; " +
92
+ "expected #{expected_name}, was #{name}",
93
+ caller(2)
94
+ end
95
+
96
+ args_length = args.length + (block ? 1 : 0)
97
+
98
+ if body.arity < 0
99
+ if (body.arity+1).abs > args_length
100
+ raise RUNIT::AssertionFailedError,
101
+ "too few arguments to #{name} method of #{@name}; " +
102
+ "require #{(body.arity+1).abs}, got #{args.length}",
103
+ caller(2)
104
+ end
105
+ else
106
+ if body.arity != args_length
107
+ raise RUNIT::AssertionFailedError,
108
+ "wrong number of arguments to " +
109
+ "#{name} method of #{@name}; " +
110
+ "require #{body.arity}, got #{args.length}",
111
+ caller(2)
112
+ end
113
+ end
114
+
115
+ if respond_to? Mock.__pre(name)
116
+ if block
117
+ precondition_ok = __send__( Mock.__pre(name), *args, &block )
118
+ else
119
+ precondition_ok = __send__( Mock.__pre(name), *args )
120
+ end
121
+
122
+ if not precondition_ok
123
+ raise RUNIT::AssertionFailedError,
124
+ "precondition of #{name} method violated",
125
+ caller(2)
126
+ end
127
+ end
128
+
129
+ if block
130
+ instance_eval { body.call( block, *args ) }
131
+ else
132
+ instance_eval { body.call( *args ) }
133
+ end
134
+ end
135
+
136
+ # The name of a precondition for a method
137
+ def Mock.__pre( method )
138
+ "__pre_#{method.to_i}".intern
139
+ end
140
+
141
+
142
+ def Mock.method_added( name )
143
+ unless(/^__pre_/.match(name.to_s))
144
+ pre = self.__pre(name)
145
+ alias_method( pre, name )
146
+ undef_method(name)
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Rpdf2txt -- PDF to Text Parser
4
+ # Copyright (C) 2003 Andreas Schrafl, Hannes Wyss
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # hwyss@ywesee.com, aschrafl@ywesee.com
22
+ #
23
+ # TestSuite -- Rpdf2txt -- 27.11.2002 -- aschrafl@ywesee.com
24
+
25
+ $: << File.dirname(File.expand_path(__FILE__))
26
+ $KCODE = 'u'
27
+
28
+ Dir.foreach(File.dirname(__FILE__)) { |file|
29
+ require file if /^test_.*\.rb$/o.match(file)
30
+ }
@@ -0,0 +1,1802 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Rpdf2txt -- PDF to Text Parser
4
+ # Copyright (C) 2003 Andreas Schrafl, Hannes Wyss, Masaomi Hatakeyama
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # zdvatz@ywesee.com, mhatakeyama@ywesee.com
22
+ #
23
+ # TestPdfObject -- Rpdf2txt -- 21.11.2002 -- aschrafl@ywesee.com
24
+
25
+ $KCODE = 'u'
26
+ $: << File.expand_path('../lib', File.dirname(__FILE__))
27
+
28
+ require 'test/unit'
29
+ require 'tempfile'
30
+ require 'rpdf2txt/object'
31
+ require 'rpdf2txt/default_handler'
32
+
33
+ module Rpdf2txt
34
+ class PdfObject
35
+ attr_accessor :attributes
36
+ end
37
+ class TrailerDictionary
38
+ public
39
+ attr_accessor :attributes
40
+ end
41
+ class PageLeaf < TreeNode
42
+ attr_accessor :contents, :resources
43
+ public :join_snippets
44
+ end
45
+ class CMap < Stream
46
+ public :extract_bfchar, :extract_bfrange
47
+ end
48
+ class TestCmap < Test::Unit::TestCase
49
+ def setup
50
+ @input_bfchar = <<-EOS
51
+ 25 0 obj
52
+ <</Length 357
53
+ >>
54
+ stream
55
+ /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo <<
56
+ /Registry (TT11+0) /Ordering (T42UV) /Supplement 0 >> def
57
+ /CMapName /TT11+0 def
58
+ /CMapType 2 def
59
+ 1 begincodespacerange <004a> <0074> endcodespacerange
60
+ 3 beginbfchar
61
+ <004a> <03B3>
62
+ <0064> <2264>
63
+ <0074> <2265>
64
+ endbfchar
65
+ endcmap CMapName currentdict /CMap defineresource pop end end
66
+
67
+ endstream
68
+ endobj
69
+ EOS
70
+ @input_bfrange = <<-EOS
71
+ 75 0 obj
72
+ <</Length 338
73
+ >>
74
+ stream
75
+ /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo <<
76
+ /Registry (TT11+0) /Ordering (T42UV) /Supplement 0 >> def
77
+ /CMapName /TT11+0 def
78
+ /CMapType 2 def
79
+ 1 begincodespacerange <0044> <0045> endcodespacerange
80
+ 1 beginbfrange
81
+ <0044> <0045> <03B1>
82
+ endbfrange
83
+ endcmap CMapName currentdict /CMap defineresource pop end end
84
+
85
+ endstream
86
+ endobj
87
+ EOS
88
+ end
89
+ def test_parser_grammar_bfchar
90
+ cmap = Rpdf2txt::CMap.new(@input_bfchar)
91
+ assert_nothing_raised{
92
+ ast= Rpdf2txt.cmap_parser.parse(cmap.extract_bfchar)
93
+ }
94
+ end
95
+ def test_extract_attributes_bfchar
96
+ cmap = Rpdf2txt::CMap.new(@input_bfchar)
97
+ expected = {:length => "357"}
98
+ assert_equal(expected, cmap.attributes)
99
+ end
100
+ def test_cmap_bfchar
101
+ cmap = Rpdf2txt::CMap.new(@input_bfchar)
102
+ assert_equal(8805, cmap.map[116])
103
+ assert_equal(8804, cmap.map[100])
104
+ assert_equal(947, cmap.map[74])
105
+ end
106
+ def test_parser_grammar_bfrange
107
+ cmap = Rpdf2txt::CMap.new(@input_bfrange)
108
+ assert_nothing_raised{
109
+ ast= Rpdf2txt.cmap_range_parser.parse(cmap.extract_bfrange)
110
+ }
111
+ end
112
+ def test_cmap_bfrange
113
+ cmap = Rpdf2txt::CMap.new(@input_bfrange)
114
+ assert_equal(945, cmap.map[68])
115
+ assert_equal(946, cmap.map[69])
116
+ end
117
+ def test_cmap_bfrange_array
118
+ input_bfrange = <<-EOS
119
+ 75 0 obj
120
+ <</Length 338
121
+ >>
122
+ stream
123
+ /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo <<
124
+ /Registry (TT11+0) /Ordering (T42UV) /Supplement 0 >> def
125
+ /CMapName /TT11+0 def
126
+ /CMapType 2 def
127
+ 1 begincodespacerange <0094> <0095> endcodespacerange
128
+ 1 beginbfrange
129
+ <0094> <0095> [ <2264> <2265> ] <0024> <0025> [ <2224> <2225> ]
130
+ endbfrange
131
+ endcmap CMapName currentdict /CMap defineresource pop end end
132
+
133
+ endstream
134
+ endobj
135
+ EOS
136
+ cmap = Rpdf2txt::CMap.new(input_bfrange)
137
+ assert_equal(8804, cmap.map[148])
138
+ assert_equal(8805, cmap.map[149])
139
+ assert_equal(8740, cmap.map[36])
140
+ assert_equal(8741, cmap.map[37])
141
+ end
142
+ end
143
+ class TestPdfObject < Test::Unit::TestCase
144
+ def setup
145
+ input = '3 0 obj << /Type /Page /Parent 2 0 R /Contents 6 0 R >> endobj'
146
+ @tree_node = Rpdf2txt::TreeNode.new(input)
147
+ end
148
+ def test_tree_node1
149
+ input = '4 0 obj << /Type /Pages /Kids [ 7 0 R 8 0 R ] /Count 2 >> endobj'
150
+ node = Rpdf2txt::TreeNode.new(input)
151
+ assert_equal(4, node.oid)
152
+ assert_equal(["7 0 R", "8 0 R"], node.attributes[:kids])
153
+ assert_equal(nil, node.attributes[:contents])
154
+ assert_equal(nil, node.attributes[:parent])
155
+ assert_equal(true, node.root?)
156
+ end
157
+ def test_tree_node2
158
+ input = '3 0 obj << /Type /Page /Parent 2 0 R /Contents 6 0 R >> endobj'
159
+ node = Rpdf2txt::TreeNode.new(input)
160
+ assert_equal(3, node.oid)
161
+ assert_equal(nil, node.attributes[:kids])
162
+ assert_equal('6 0 R', node.attributes[:contents])
163
+ assert_equal('2 0 R', node.attributes[:parent])
164
+ assert_equal(false, node.root?)
165
+ end
166
+ def test_tree_node3
167
+ input = '3 0 obj << /Type /Page /Parent 2 0 R / 2 0 R >> endobj'
168
+ node = nil
169
+ assert_nothing_raised {
170
+ node = Rpdf2txt::TreeNode.new(input)
171
+ }
172
+ end
173
+ def test_tree_node4
174
+ src = '
175
+ 400 0 obj
176
+ <<
177
+ /Title (���\)����\\��P�T#/��-&��;S��O�A)
178
+ /Parent 399 0 R
179
+ /A 436 0 R
180
+ /Next 433 0 R
181
+ >>
182
+ endobj
183
+ '
184
+ node = Rpdf2txt::TreeNode.new(src)
185
+ assert_equal(400, node.oid)
186
+ assert_equal('433 0 R', node.attributes[:next])
187
+ end
188
+ def test_tree_node5
189
+ src = '
190
+ 124 0 obj
191
+ <<
192
+ /Type /Font
193
+ /Subtype /CIDFontType2
194
+ /BaseFont /HAGNPN+SymbolMT
195
+ /FontDescriptor 122 0 R
196
+ /CIDSystemInfo << /Registry (y�>�)/Ordering (q�4�6ZB)/Supplement 0 >>
197
+ /DW 1000
198
+ /W [ 74 [ 411 ] 100 [ 548 ] 116 [ 548 ] ]
199
+ >>
200
+ endobj
201
+ '
202
+ node = Rpdf2txt::TreeNode.new(src)
203
+ assert_equal(124, node.oid)
204
+ end
205
+ def test_tree_node6
206
+ src = '
207
+ 198 0 obj
208
+ <<
209
+ /S /Standard#20#28Web#29
210
+ /C /Standard#20#28Web#29
211
+ /Pg 11 0 R
212
+ /P 346 0 R
213
+ /K [ 13 << /Type /MCR /Pg 21 0 R /MCID 0 >> ]
214
+ >>
215
+ endobj
216
+ '
217
+ node = Rpdf2txt::TreeNode.new(src)
218
+ assert_equal(198, node.oid)
219
+ end
220
+ def test_tree_node7
221
+ src = '
222
+ 345 0 obj
223
+ <<
224
+ /S /Standard
225
+ /C /Standard
226
+ /Pg 111 0 R
227
+ /K 17
228
+ /P 346 0 R
229
+ >>
230
+ endobj'
231
+ node = Rpdf2txt::TreeNode.new(src)
232
+ assert_equal(345, node.oid)
233
+ end
234
+ def test_tree_node8
235
+ src = '
236
+ 346 0 obj
237
+ <<
238
+ /S /Sect
239
+ /P 396 0 R
240
+ /K [ 347 0 R 143 0 R 144 0 R 352 0 R 149 0 R 150 0 R 151 0 R 153 0 R 154 0 R
241
+ 155 0 R 156 0 R 157 0 R 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R
242
+ 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R
243
+ 171 0 R 172 0 R 173 0 R 174 0 R 175 0 R 176 0 R 177 0 R 178 0 R
244
+ 179 0 R 180 0 R 181 0 R 182 0 R 183 0 R 184 0 R 185 0 R 186 0 R
245
+ 187 0 R 188 0 R 189 0 R 190 0 R 191 0 R 192 0 R 193 0 R 194 0 R
246
+ 195 0 R 196 0 R 197 0 R 198 0 R 199 0 R 200 0 R 201 0 R 202 0 R
247
+ 203 0 R 204 0 R 205 0 R 206 0 R 207 0 R 208 0 R 209 0 R 210 0 R
248
+ 211 0 R 212 0 R 213 0 R 214 0 R 215 0 R 216 0 R 217 0 R 218 0 R
249
+ 219 0 R 220 0 R 221 0 R 222 0 R 223 0 R 224 0 R 225 0 R 226 0 R
250
+ 227 0 R 228 0 R 229 0 R 230 0 R 231 0 R 232 0 R 233 0 R 234 0 R
251
+ 235 0 R 236 0 R 237 0 R 238 0 R 239 0 R 240 0 R 241 0 R 242 0 R
252
+ 243 0 R 244 0 R 245 0 R 246 0 R 247 0 R 248 0 R 249 0 R 250 0 R
253
+ 251 0 R 252 0 R 253 0 R 254 0 R 255 0 R 256 0 R 257 0 R 258 0 R
254
+ 259 0 R 260 0 R 261 0 R 262 0 R 263 0 R 264 0 R 265 0 R 266 0 R
255
+ 267 0 R 268 0 R 269 0 R 270 0 R 271 0 R 272 0 R 273 0 R 274 0 R
256
+ 275 0 R 276 0 R 277 0 R 278 0 R 279 0 R 280 0 R 281 0 R 282 0 R
257
+ 283 0 R 284 0 R 285 0 R 286 0 R 287 0 R 288 0 R 289 0 R 290 0 R
258
+ 291 0 R 292 0 R 293 0 R 294 0 R 295 0 R 296 0 R 297 0 R 298 0 R
259
+ 299 0 R 300 0 R 301 0 R 302 0 R 303 0 R 304 0 R 305 0 R 306 0 R
260
+ 307 0 R 308 0 R 309 0 R 310 0 R 311 0 R 312 0 R 313 0 R 314 0 R
261
+ 315 0 R 316 0 R 317 0 R 318 0 R 319 0 R 320 0 R 321 0 R 322 0 R
262
+ 324 0 R 325 0 R 326 0 R 327 0 R 328 0 R 329 0 R 330 0 R 331 0 R
263
+ 332 0 R 333 0 R 334 0 R 335 0 R 336 0 R 337 0 R 338 0 R 339 0 R
264
+ 340 0 R 360 0 R 344 0 R 345 0 R ]
265
+ >>
266
+ endobj'
267
+ node = Rpdf2txt::TreeNode.new(src)
268
+ assert_equal(346, node.oid)
269
+ end
270
+ def test_tree_node9
271
+ src = '
272
+ 346 0 obj
273
+ <<
274
+ /S /Sect
275
+ /P 396 0 R
276
+ /K [
277
+ 155 0 R 156 0 R 157 0 R 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R
278
+ 155 0 R 156 0 R 157 0 R 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R
279
+ 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R
280
+ 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R
281
+ 190 ]
282
+ >>
283
+ endobj'
284
+ node = Rpdf2txt::TreeNode.new(src)
285
+ assert_equal(346, node.oid)
286
+ end
287
+ def test_tree_node10
288
+ src = '
289
+ 198 0 obj
290
+ <<
291
+ /S /Standard#20#28Web#29
292
+ /C /Standard#20#28Web#29
293
+ /Pg 11 0 R
294
+ /P 346 0 R
295
+ /K [ ]
296
+ >>
297
+ endobj'
298
+ node = Rpdf2txt::TreeNode.new(src)
299
+ assert_equal(198, node.oid)
300
+ end
301
+ def test_extract_oids
302
+ input = '6 0 R'
303
+ assert_equal(@tree_node.extract_oids(input), [6])
304
+ input = ["7 0 R", "8 0 R"]
305
+ assert_equal(@tree_node.extract_oids(input), [7,8])
306
+ end
307
+ def test_parse_content_from_complex_attributes
308
+ src = <<-ENDOFSRC
309
+ 46 0 obj
310
+ <<
311
+ /Type /Page
312
+ /Parent 543 0 R
313
+ /Resources << /Font << /F2 575 0 R /T1_0 504 0 R /F4 573 0 R /T1_4 512 0 R /T1_3 511 0 R >>
314
+ /Shading << /S12 508 0 R >> /XObject << /Im4 51 0 R >> /ExtGState 47 0 R
315
+ /ProcSet [ /PDF /Text /ImageB ] /ColorSpace 534 0 R >>
316
+ /Contents 48 0 R
317
+ /BleedBox [ 0 0 651 898 ]
318
+ /MediaBox [ 0 0 651 898 ]
319
+ /TrimBox [ 28 28 623 870 ]
320
+ /CropBox [ 28 28 623 870 ]
321
+ /ArtBox [ 28 28 623 870 ]
322
+ /LastModified (D:20021210105029+01')
323
+ /Rotate 0
324
+ >>
325
+ endobj
326
+ ENDOFSRC
327
+ obj = Rpdf2txt::PdfObject.new(src)
328
+ attributes = obj.attributes
329
+ assert_equal(Hash, attributes.class)
330
+ assert_equal(11, attributes.size)
331
+ assert_equal(0, obj.revision_id)
332
+ end
333
+ def test_parse_content_from_complex_attributes2
334
+ src = <<-ENDOFSRC
335
+ 568 0 obj
336
+ <<
337
+ /Linearized 1
338
+ /O 570
339
+ /H [ 1049 1249 ]
340
+ /L 910845
341
+ /E 169588
342
+ /N 108
343
+ /T 899366
344
+ >>
345
+ endobj
346
+
347
+ ENDOFSRC
348
+ obj = Rpdf2txt::PdfObject.new(src)
349
+ attributes = obj.attributes
350
+ assert_equal(Hash, attributes.class)
351
+ assert_equal(7, attributes.size)
352
+ end
353
+ def test_parse_pantone
354
+ src = <<-ENDOFSRC
355
+ 2 0 obj
356
+ <<
357
+ /JT 150 0 R
358
+ /AGFA_NORN_V (ES15.101 V03)
359
+ /AGFA_PSE_V (Apogee Norm PSE 1.1 23 )
360
+ /AGFA_CMYKCCN << /PANTONE#20379#20CV [ 0.08501 0 0.60001 0 ] /PANTONE#20192#20CV [ 0 0.94 0.64999 0 ]
361
+ /PANTONE#20199#20CV [ 0 1 0.64999 0 ] /PANTONE#20383#20CV [ 0.185 0 1 0.185 ]
362
+ /PANTONE#20375#20CV [ 0.42999 0 0.78999 0 ] /PANTONE#20100#20CV [ 0 0 0.50999 0 ]
363
+ /PANTONE#20281#20CV [ 1 0.72 0 0.38 ] /PANTONE#20185#20CV [ 0 0.91 0.75999 0 ]
364
+ /PANTONE#20377#20CV [ 0.42999 0 1 0.235 ] /PANTONE#203015#20CV [ 1 0.235 0 0.185 ]
365
+ /PANTONE#20195#20CV [ 0 0.75999 0.56 0.56 ] /PANTONE#20381#20CV [ 0.185 0 0.91 0 ]
366
+ /PANTONE#20Cl#20Gy#207#20CV [ 0 0 0 0.47 ] /PANTONE#20137#20CV [ 0 0.34 0.91 0 ]
367
+ /PANTONE#20397#20CV [ 0.11501 0 1 0.11501 ] /PANTONE#20322#20CV [ 1 0 0.38 0.30499 ]
368
+ /PANTONE#20382#20CV [ 0.30499 0 0.94 0 ] /PANTONE#20376#20CV [ 0.56 0 1 0 ] >>
369
+ /Type /Catalog
370
+ /Pages 55 0 R
371
+ /Outlines 15 1 R
372
+ >>
373
+ endobj
374
+ ENDOFSRC
375
+ obj = Rpdf2txt::PdfObject.new(src)
376
+ attributes = obj.attributes
377
+ assert_equal(Hash, attributes.class)
378
+ assert_equal(7, attributes.size)
379
+ end
380
+ def test_parse_escaped
381
+ src =
382
+ '<<
383
+ /O (foo\\)
384
+ >>'
385
+ obj = nil
386
+ assert_nothing_raised {
387
+ obj = Rpdf2txt::PdfObject.new(src)
388
+ }
389
+ assert_equal({:o => 'foo\\'}, obj.attributes)
390
+ end
391
+ def test_parse_limits
392
+ src = <<-EOS
393
+ 31 0 obj
394
+ <<
395
+ /Limits [ <FEFF00530077006900730073006D0065006400690063002E006A006F0062006F
396
+ 007000740069006F006E0073> <FEFF00530077006900730073006D0065006400690063002E006A006F0062006F
397
+ 007000740069006F006E0073> ]
398
+ /Names [ <FEFF00530077006900730073006D0065006400690063002E006A006F0062006F
399
+ 007000740069006F006E0073> 141 0 R ]
400
+ >>
401
+ endobj
402
+ EOS
403
+ obj = nil
404
+ assert_nothing_raised {
405
+ obj = Rpdf2txt::PdfObject.new(src)
406
+ }
407
+ expected = {
408
+ :names => ["<FEFF00530077006900730073006D0065006400690063002E006A006F0062006F\n007000740069006F006E0073>",
409
+ "141 0 R"],
410
+ :limits => ["<FEFF00530077006900730073006D0065006400690063002E006A006F0062006F\n007000740069006F006E0073>",
411
+ "<FEFF00530077006900730073006D0065006400690063002E006A006F0062006F\n007000740069006F006E0073>"]}
412
+ assert_equal expected, obj.attributes
413
+ end
414
+ end
415
+ class TestText < Test::Unit::TestCase
416
+ def test_get_font
417
+ font_src = <<-EOS
418
+ 580 0 obj
419
+ <<
420
+ /Type /Font
421
+ /Subtype /Type1
422
+ /FirstChar 32
423
+ /LastChar 240
424
+ /Widths [ 278 389 500 556 556 1000 722 278 333 333 556 600 278 389 278 278
425
+ 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500
426
+ 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778
427
+ 556 778 611 556 556 722 667 1000 667 667 556 389 278 389 600 500
428
+ 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611
429
+ 611 611 389 444 389 611 556 889 556 556 500 333 222 333 600 278
430
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611
431
+ 0 0 0 0 556 556 0 0 0 0 0 800 0 0 0 278 0 0 278 600 278 278 0 611
432
+ 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0
433
+ 0 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
434
+ 0 0 0 0 0 278 ]
435
+ /Encoding /MacRomanEncoding
436
+ /BaseFont /Frutiger-BoldItalic
437
+ /FontDescriptor 579 0 R
438
+ >>
439
+ endobj
440
+ EOS
441
+ page_src = <<-EOS
442
+ 570 0 obj
443
+ <<
444
+ /Type /Page
445
+ /Parent 540 0 R
446
+ /Resources 571 0 R
447
+ /Contents 576 0 R
448
+ /BleedBox [ 0 0 651 898 ]
449
+ /MediaBox [ 0 0 651 898 ]
450
+ /TrimBox [ 28 28 623 870 ]
451
+ /CropBox [ 28 28 623 870 ]
452
+ /ArtBox [ 28 28 623 870 ]
453
+ /Rotate 0
454
+ >>
455
+ endobj
456
+ EOS
457
+ rsrc_src = <<-EOS
458
+ 571 0 obj
459
+ <<
460
+ /ProcSet [ /PDF /Text /ImageC ]
461
+ /Font << /F1 580 0 R /F2 575 0 R /F3 578 0 R /F4 573 0 R >>
462
+ /XObject << /Im1 587 0 R >>
463
+ /ExtGState << /GS2 585 0 R /GS3 584 0 R >>
464
+ /Shading << /Sh1 590 0 R >>
465
+ >>
466
+ endobj
467
+ EOS
468
+ font = Rpdf2txt::Font.new(font_src)
469
+ page = Rpdf2txt::PageLeaf.new(page_src)
470
+ rsrc = Rpdf2txt::Resource.new(rsrc_src)
471
+ page.build_tree({580=>font,571=>rsrc})
472
+ text = Rpdf2txt::Text.new("(Hello World)")
473
+ text.current_page = page
474
+ get_font = text.get_font("F1")
475
+ assert_equal(Rpdf2txt::Font, get_font.class)
476
+ assert_equal(font, get_font)
477
+ assert_equal(true, font.bold?)
478
+ assert_equal(true, font.italic?)
479
+ assert_equal("/Frutiger-BoldItalic", font.basefont_name)
480
+ end
481
+ def test_font_no_width
482
+ font_src = <<-EOS
483
+ 327 0 obj
484
+ <<
485
+ /Type /Font
486
+ /Subtype /Type1
487
+ /Encoding 370 0 R
488
+ /BaseFont /Symbol
489
+ >>
490
+ endobj
491
+ EOS
492
+ page_src = <<-EOS
493
+ 10 0 obj
494
+ <<
495
+ /Type /Page
496
+ /Parent 390 0 R
497
+ /Resources 11 0 R
498
+ /Contents 12 0 R
499
+ /MediaBox [ 0 0 595 841 ]
500
+ /CropBox [ 0 0 595 841 ]
501
+ /Rotate 0
502
+ >>
503
+ endobj
504
+ EOS
505
+ rsrc_src = <<-EOS
506
+ 11 0 obj
507
+ <<
508
+ /ProcSet [ /PDF /Text ]
509
+ /Font << /F1 416 0 R /F2 408 0 R /F4 410 0 R /F6 325 0 R /F8 327 0 R >>
510
+ /ExtGState << /GS1 422 0 R >>
511
+ >>
512
+ endobj
513
+ EOS
514
+ txt_src = <<-EOS
515
+ BT
516
+ /F8 1 Tf
517
+ (Hello World) Tj
518
+ ET
519
+ EOS
520
+ font = Rpdf2txt::Font.new(font_src)
521
+ page = Rpdf2txt::PageLeaf.new(page_src)
522
+ rsrc = Rpdf2txt::Resource.new(rsrc_src)
523
+ page.build_tree({327=>font,11=>rsrc})
524
+ text = Rpdf2txt::Text.new(txt_src)
525
+ text.current_page = page
526
+ assert_nothing_raised {
527
+ text.scan
528
+ }
529
+ end
530
+ end
531
+ class TestEncrypt < Test::Unit::TestCase
532
+ def setup
533
+ file = File.expand_path('./data/encrypt_string', File.dirname(__FILE__))
534
+ src_encrypt_obj = File.read(file)
535
+ @encrypt = Rpdf2txt::PdfEncrypt.new(src_encrypt_obj)
536
+ @encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18'
537
+ end
538
+ def test_decrypt
539
+ file = File.expand_path('./data/working_obj', File.dirname(__FILE__))
540
+ input = File.read(file)
541
+ pdf_obj = Rpdf2txt::Stream.new(input)
542
+ assert_equal("dc08b36009e48618f99c", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
543
+ #if the stream could be inflated, the decryption is ok!
544
+ assert_nothing_raised{
545
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
546
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
547
+ }
548
+ end
549
+ def test_decrypt2
550
+ file = File.expand_path('./data/90_obj', File.dirname(__FILE__))
551
+ input = File.read(file)
552
+ pdf_obj = Rpdf2txt::Stream.new(input)
553
+ assert_equal("7617ca1ac5babcf09cdf", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
554
+ #if the stream could be inflated, the decryption is ok!
555
+ assert_nothing_raised{
556
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
557
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
558
+ }
559
+ end
560
+ def test_decrypt3
561
+ file = File.expand_path('./data/working_obj2', File.dirname(__FILE__))
562
+ input = File.read(file)
563
+ pdf_obj = Rpdf2txt::Stream.new(input)
564
+ assert_equal("a9a666959bd64a96551b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
565
+ #if the stream could be inflated, the decryption is ok!
566
+ assert_nothing_raised{
567
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
568
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
569
+ }
570
+ end
571
+ def test_decrypt5
572
+ file = File.expand_path('./data/458_obj', File.dirname(__FILE__))
573
+ input = File.read(file)
574
+ pdf_obj = Rpdf2txt::Stream.new(input)
575
+ #assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
576
+ #if the stream could be inflated, the decryption is ok!
577
+ assert_nothing_raised{
578
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
579
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
580
+ }
581
+ end
582
+ def test_decrypt6
583
+ file = File.expand_path('./data/450_obj', File.dirname(__FILE__))
584
+ input = File.read(file)
585
+ pdf_obj = Rpdf2txt::Stream.new(input)
586
+ #assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
587
+ #if the stream could be inflated, the decryption is ok!
588
+ assert_nothing_raised{
589
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
590
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
591
+ }
592
+ end
593
+ def test_decrypt7
594
+ file = File.expand_path('./data/465_obj', File.dirname(__FILE__))
595
+ input = File.read(file)
596
+ pdf_obj = Rpdf2txt::Stream.new(input)
597
+ #assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
598
+ #if the stream could be inflated, the decryption is ok!
599
+ assert_nothing_raised{
600
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
601
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
602
+ }
603
+ end
604
+ def test_decrypt_key
605
+ file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
606
+ src = File.read(file)
607
+ #byte position important! do not indent these lines!!!
608
+ obj_src = <<-EOS
609
+ 473 0 obj
610
+ << /N 3 /Alternate /DeviceRGB /Length 2575 /Filter /FlateDecode >>
611
+ endobj
612
+ EOS
613
+ pdf_obj = Rpdf2txt::Stream.new(obj_src)
614
+ encrypt = Rpdf2txt::PdfEncrypt.new(src)
615
+ encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18'
616
+ #puts encrypt.decrypt_key(pdf_obj)
617
+ assert_equal("dc08b36009e48618f99c", encrypt.decrypt_key(pdf_obj).unpack('h*').first)
618
+ end
619
+ def test_inflate_obj
620
+ file = File.expand_path('./data/90_obj_comp', File.dirname(__FILE__))
621
+ input = File.read(file)
622
+ input = [input].pack('H*')
623
+ # puts input
624
+ assert_nothing_raised{
625
+ Zlib::Inflate.inflate(input)
626
+ # Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
627
+ }
628
+ end
629
+ def test_parse_encrypt
630
+ file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
631
+ src = File.read(file)
632
+ encrypt = Rpdf2txt::PdfEncrypt.new(src)
633
+ encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18'
634
+ assert_equal("00ecc7a7bf8d68c564a21b98258b1dbff2aaf8d24bfdbaa74a9a073467d896b6", encrypt.user_key.unpack("H*").first)
635
+ assert_equal("2055c756c72e1ad702608e8196acad447ad32d17cff583235f6dd15fed7dab67", encrypt.owner_key.unpack("H*").first)
636
+ assert_nothing_raised{
637
+ encrypt.encryption_key
638
+ }
639
+ end
640
+ def test_endianess
641
+ file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
642
+ src = File.read(file)
643
+ encrypt = Rpdf2txt::PdfEncrypt.new(src)
644
+ encrypt.big_endian?
645
+ end
646
+ end
647
+ class TestEncrypt128bit < Test::Unit::TestCase
648
+ def setup
649
+ file = File.expand_path('./data/encrypt_string_128bit',
650
+ File.dirname(__FILE__))
651
+ src_encrypt_obj = File.read(file)
652
+ @encrypt = Rpdf2txt::PdfEncrypt.new(src_encrypt_obj)
653
+ @encrypt.file_id = 'D816A5E838D50653C19DB62504229EB6'
654
+ end
655
+ def test_decrypt8
656
+ file = File.expand_path('./data/3392_obj', File.dirname(__FILE__))
657
+ input = File.read(file)
658
+ pdf_obj = Rpdf2txt::Stream.new(input)
659
+ #if the stream could be inflated, the decryption is ok!
660
+ assert_nothing_raised{
661
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
662
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
663
+ }
664
+ end
665
+ end
666
+ class TestTrailer < Test::Unit::TestCase
667
+ def test_parse_trail
668
+ src ='
669
+ trailer
670
+ <<
671
+ /Size 476
672
+ /Info 388 0 R
673
+ /Encrypt 395 0 R
674
+ /Root 394 0 R
675
+ /Prev 203754
676
+ /ID[<8664e6986751f2a49dccc9a4b40a4f18v><e720b2184372f5e3f4edd86673b81dfd>]
677
+ >>
678
+ startxref'
679
+ expected = "8664e6986751f2a49dccc9a4b40a4f18v"
680
+ node = Rpdf2txt::TrailerDictionary.new(src)
681
+ assert_equal(expected, node.file_id)
682
+ end
683
+ end
684
+ class TestFont < Test::Unit::TestCase
685
+ def test_encoding
686
+ src = <<-EOS
687
+ 580 0 obj
688
+ <<
689
+ /Type /Font
690
+ /Subtype /Type1
691
+ /FirstChar 32
692
+ /LastChar 240
693
+ /Widths [ 278 389 500 556 556 1000 722 278 333 333 556 600 278 389 278 278
694
+ 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500
695
+ 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778
696
+ 556 778 611 556 556 722 667 1000 667 667 556 389 278 389 600 500
697
+ 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611
698
+ 611 611 389 444 389 611 556 889 556 556 500 333 222 333 600 278
699
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611
700
+ 0 0 0 0 556 556 0 0 0 0 0 800 0 0 0 278 0 0 278 600 278 278 0 611
701
+ 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0
702
+ 0 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
703
+ 0 0 0 0 0 278 ]
704
+ /Encoding /MacRomanEncoding
705
+ /BaseFont /Frutiger-BoldItalic
706
+ /FontDescriptor 579 0 R
707
+ >>
708
+ endobj
709
+ EOS
710
+ font = Rpdf2txt::Font.new(src)
711
+ assert_equal("mac", font.encoding)
712
+ end
713
+ def test_width
714
+ src = <<-EOS
715
+ 580 0 obj
716
+ <<
717
+ /Type /Font
718
+ /Subtype /Type1
719
+ /FirstChar 32
720
+ /LastChar 240
721
+ /Widths [ 278 389 500 556 556 1000 722 278 333 333 556 600 278 389 278 278
722
+ 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500
723
+ 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778
724
+ 556 778 611 556 556 722 667 1000 667 667 556 389 278 389 600 500
725
+ 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611
726
+ 611 611 389 444 389 611 556 889 556 556 500 333 222 333 600 278
727
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611
728
+ 0 0 0 0 556 556 0 0 0 0 0 800 0 0 0 278 0 0 278 600 278 278 0 611
729
+ 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0
730
+ 0 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
731
+ 0 0 0 0 0 278 ]
732
+ /Encoding /MacRomanEncoding
733
+ /BaseFont /Frutiger-BoldItalic
734
+ /FontDescriptor 579 0 R
735
+ >>
736
+ endobj
737
+ EOS
738
+ font = Rpdf2txt::Font.new(src)
739
+ assert_equal(278, font.width(' '))
740
+ assert_equal(556, font.width('a'))
741
+ end
742
+ def test_width__builtin
743
+ src = <<-EOS
744
+ 580 0 obj
745
+ <<
746
+ /Type /Font
747
+ /Subtype /Type1
748
+ /FirstChar 32
749
+ /LastChar 240
750
+ /Encoding /MacRomanEncoding
751
+ /BaseFont /Symbol
752
+ /FontDescriptor 579 0 R
753
+ >>
754
+ endobj
755
+ EOS
756
+ font = Rpdf2txt::Font.new(src)
757
+ assert_equal(250, font.width(' '))
758
+ assert_equal(763, font.width(70))
759
+ assert_equal(631, font.width('alpha'))
760
+ end
761
+ def test_width__differences
762
+ encoding = <<-EOS
763
+ 252 0 obj
764
+ <<
765
+ /Type /Encoding
766
+ /Differences [ 1 /space /beta /alpha ]
767
+ >>
768
+ endobj
769
+ EOS
770
+ font = <<-EOS
771
+ 219 0 obj
772
+ <<
773
+ /Type /Font
774
+ /Subtype /Type1
775
+ /Encoding 252 0 R
776
+ /BaseFont /Symbol
777
+ /ToUnicode 253 0 R
778
+ >>
779
+ endobj
780
+ EOS
781
+ tounicode = <<-EOS
782
+ 253 0 obj
783
+ << /Filter /FlateDecode /Length 227 >>
784
+ stream
785
+ H�TP�n� ���-��[�"���%�8I�am!a\��9)#v���^Ʒљ���0�b����=*�W�w��J���� @�x:��vt��a �37�8M�������~D�ѸN_��'��-� �� ��� ��bn7i�y�5nA*�ҭ�"C/���#�C1/��=��:&2ѕ{���+D��
786
+ �ɞ�b_��L��s𺐚��4�; >�P�_Eo�
787
+ endstream
788
+ endobj
789
+ EOS
790
+ font = Rpdf2txt::Font.new(font)
791
+ font.attributes[:encoding] = Rpdf2txt::Encoding.new(encoding)
792
+ font.attributes[:to_unicode] = Rpdf2txt::Stream.new(tounicode).to_cmap
793
+ assert_equal(631, font.width('alpha'))
794
+ assert_equal(631, font.width(3))
795
+ end
796
+ end
797
+ class TestPageLeaf < Test::Unit::TestCase
798
+ def test_text_cm
799
+ stream = Stream.new
800
+ stream.decoded_stream = <<-'EOS'
801
+ q
802
+ 1 0 0 -1 70.866 841.89 cm
803
+ 0 J
804
+ 1 1 1 RG
805
+ q
806
+ -1.5 -1.5 m
807
+ 455.043 -1.5 l
808
+ 452.043 1.5 l
809
+ 1.5 1.5 l
810
+ W*
811
+ n
812
+ -0.5 0 m
813
+ 454.043 0 l
814
+ S
815
+ Q
816
+ q
817
+ 455.043 -1.5 m
818
+ 455.043 32.5 l
819
+ 452.043 29.5 l
820
+ 452.043 1.5 l
821
+ W*
822
+ n
823
+ 453.543 -0.5 m
824
+ 453.543 31.5 l
825
+ S
826
+ Q
827
+ q
828
+ 455.043 32.5 m
829
+ -1.5 32.5 l
830
+ 1.5 29.5 l
831
+ 452.043 29.5 l
832
+ W*
833
+ n
834
+ 454.043 31 m
835
+ -0.5 31 l
836
+ S
837
+ Q
838
+ q
839
+ -1.5 32.5 m
840
+ -1.5 -1.5 l
841
+ 1.5 1.5 l
842
+ 1.5 29.5 l
843
+ W*
844
+ n
845
+ 0 31.5 m
846
+ 0 -0.5 l
847
+ S
848
+ Q
849
+ 1 0 0 1 0.5 31.5 cm
850
+ 0 0 0 rg
851
+ BT
852
+ /F0 8 Tf
853
+ 1 0 0 -1 232.336 7.573 Tm
854
+ [(1. position: 7.573 offset: 31.5)] TJ
855
+ ET
856
+ 1 0 0 1 -0.5 -0.5 cm
857
+ q
858
+ 455.043 -1.5 m
859
+ 455.043 12.1 l
860
+ 452.043 9.1 l
861
+ 452.043 1.5 l
862
+ W*
863
+ n
864
+ 453.543 -0.5 m
865
+ 453.543 11.1 l
866
+ S
867
+ Q
868
+ q
869
+ 455.043 12.1 m
870
+ -1.5 12.1 l
871
+ 1.5 9.1 l
872
+ 452.043 9.1 l
873
+ W*
874
+ n
875
+ 454.043 10.6 m
876
+ -0.5 10.6 l
877
+ S
878
+ Q
879
+ q
880
+ -1.5 12.1 m
881
+ -1.5 -1.5 l
882
+ 1.5 1.5 l
883
+ 1.5 9.1 l
884
+ W*
885
+ n
886
+ 0 11.1 m
887
+ 0 -0.5 l
888
+ S
889
+ Q
890
+ 1 0 0 1 0 39.866 cm
891
+ BT
892
+ /F1 16 Tf
893
+ 1 0 0 -1 0 14.347 Tm
894
+ (2. position: 14.347 offset: 39.866) Tj
895
+ 0 0 0 RG
896
+ ET
897
+ 0 30.173 m
898
+ 453.543 30.173 l S
899
+ 1 0 0 1 0 32.2 cm
900
+ BT
901
+ /F2 11 Tf
902
+ 1 0 0 -1 314.813 10.413 Tm
903
+ (3. position: 10.413 offset: 32.2) Tj
904
+ ET
905
+ 1 0 0 1 0 -32.2 cm
906
+ 0 46.7 m
907
+ 453.543 46.7 l
908
+ S
909
+ BT
910
+ /F2 8 Tf
911
+ 1 0 0 -1 0 62.573 Tm
912
+ (4. position: 62.573 offset:-32.2) Tj
913
+ /F3 9 Tf
914
+ 0 -14.547 Td
915
+ (5. moved by: -14.547) Tj
916
+ /F2 8 Tf
917
+ 0 -15.853 Td
918
+ (6. moved by: -15.853) Tj
919
+ ET
920
+ EOS
921
+ page = PageLeaf.new
922
+ page.resources = Resource.new
923
+ handler = SimpleHandler.new
924
+ page.contents = [stream]
925
+ page.text(handler)
926
+ ## a+b a-b
927
+ # 1. 39.073 -23.927
928
+ # 2. 54.213 -25.519
929
+ # 3. 42.613 -21.787
930
+ ## fonts
931
+ # 1. F0 8
932
+ # 2. F1 16
933
+ # 3. F2 11
934
+ ## a+b+f a+b-f a-b-f a-b+f
935
+ # 1. 47.073 31.073 -31.927 -15.926
936
+ # 2. 70.213 38.213 -41.519 - 9.519
937
+ # 3. 53.613 31.613 -32.787 -10.787
938
+ ## a+f a-f
939
+ # 1. 15.573 -0.427
940
+ # 2. 30.347 -1.653
941
+ # 3. 21.413 -0.587
942
+
943
+ ## 5 hrs -> 5 x newline
944
+ expected = <<-EOS
945
+ 1. position: 7.573 offset: 31.5
946
+ 2. position: 14.347 offset: 39.866
947
+ 3. position: 10.413 offset: 32.2
948
+ 4. position: 62.573 offset:-32.2
949
+ 5. moved by: -14.547
950
+ 6. moved by: -15.853
951
+ EOS
952
+ assert_equal(expected.strip, handler.out.strip)
953
+ end
954
+ def test_text__fixed_double_lead_bug
955
+ stream = Stream.new
956
+ stream.decoded_stream = <<-'EOS'
957
+ q
958
+ 1 i
959
+ 0.059998 34.407 618 -34.5 re
960
+ W* n
961
+ 0 864.567 617.94 -864.54 re
962
+ W* n
963
+ /GS1 gs
964
+ q
965
+ 324.71994 0 0 25.199999 -0.720012 10.166975 cm
966
+ /Im112 Do
967
+ Q
968
+ Q
969
+ q
970
+ 1 i
971
+ 617.04 11.127 0.89996 0.23999 re
972
+ W n
973
+ /GS1 gs
974
+ q
975
+ 1.44 0 0 0.24 617.039978 11.126974 cm
976
+ /Im17 Do
977
+ Q
978
+ Q
979
+ q
980
+ 1 i
981
+ 0.059998 34.407 618 -34.5 re
982
+ W* n
983
+ 0 864.567 617.94 -864.54 re
984
+ W* n
985
+ /GS1 gs
986
+ q
987
+ 1.44 0 0 0.24 0.239988 10.886974 cm
988
+ /Im18 Do
989
+ Q
990
+ q
991
+ 27.359999 0 0 0.24 295.679962 10.886974 cm
992
+ /Im16 Do
993
+ Q
994
+ Q
995
+ q
996
+ 1 i
997
+ 617.28 10.887 0.65997 0.24002 re
998
+ W n
999
+ /GS1 gs
1000
+ q
1001
+ 0.96 0 0 0.24 617.279968 10.886974 cm
1002
+ /Im14 Do
1003
+ Q
1004
+ Q
1005
+ q
1006
+ 1 i
1007
+ 0.059998 34.407 618 -34.5 re
1008
+ W* n
1009
+ 0 864.567 617.94 -864.54 re
1010
+ W* n
1011
+ /GS1 gs
1012
+ q
1013
+ 597.599976 0 0 12.719999 10.319989 -0.873026 cm
1014
+ /Im113 Do
1015
+ Q
1016
+ Q
1017
+ q
1018
+ 1 i
1019
+ 11.28 0.026978 0.47998 0.059998 re
1020
+ W n
1021
+ /GS1 gs
1022
+ q
1023
+ 0.48 0 0 0.24 11.279988 -0.153026 cm
1024
+ /Im2 Do
1025
+ Q
1026
+ Q
1027
+ q
1028
+ 1 i
1029
+ 606.48 0.026978 0.47998 0.059998 re
1030
+ W n
1031
+ /GS1 gs
1032
+ q
1033
+ 0.48 0 0 0.24 606.47998 -0.153026 cm
1034
+ /Im2 Do
1035
+ Q
1036
+ Q
1037
+ q
1038
+ 1 i
1039
+ 0 864.567 617.94 -50.94 re
1040
+ W* n
1041
+ /GS1 gs
1042
+ q
1043
+ 608.399963 0 0 13.200012 -0.960012 852.326965 cm
1044
+ /Im93 Do
1045
+ Q
1046
+ Q
1047
+ q
1048
+ 1 i
1049
+ 616.08 853.287 1.86 0.23999 re
1050
+ W n
1051
+ /GS1 gs
1052
+ q
1053
+ 1.92 0 0 0.24 616.079956 853.286987 cm
1054
+ /Im44 Do
1055
+ Q
1056
+ Q
1057
+ q
1058
+ 1 i
1059
+ 0 864.567 617.94 -50.94 re
1060
+ W* n
1061
+ /GS1 gs
1062
+ q
1063
+ 1.92 0 0 0.24 -0.000012 853.046936 cm
1064
+ /Im87 Do
1065
+ Q
1066
+ q
1067
+ 240 0 0 0.24 7.439988 853.046936 cm
1068
+ /Im85 Do
1069
+ Q
1070
+ Q
1071
+ q
1072
+ 1 i
1073
+ 615.84 853.047 2.1 0.24005 re
1074
+ W n
1075
+ /GS1 gs
1076
+ q
1077
+ 2.4 0 0 0.24 615.839966 853.046936 cm
1078
+ /Im59 Do
1079
+ Q
1080
+ Q
1081
+ q
1082
+ 1 i
1083
+ 0 864.567 617.94 -50.94 re
1084
+ W* n
1085
+ /GS1 gs
1086
+ q
1087
+ 241.920013 0 0 41.040039 6.479988 812.966919 cm
1088
+ /Im88 Do
1089
+ Q
1090
+ q
1091
+ 572.639954 0 0 0.24 39.359989 813.686951 cm
1092
+ /Im109 Do
1093
+ Q
1094
+ q
1095
+ 572.639954 0 0 0.24 39.359989 813.44696 cm
1096
+ /Im109 Do
1097
+ Q
1098
+ Q
1099
+ /GS1 gs
1100
+ BT
1101
+ /F1 1 Tf
1102
+ 10.02 0 0 10.02 48.24 821.187 Tm
1103
+ 0 g
1104
+ -0.0006 Tc
1105
+ -0.002 Tw
1106
+ [(Arzneimittel Nachrichten )5.9(/ M�dicamen)5.6(t)-0.8(s )]TJ
1107
+ /F2 1 Tf
1108
+ 7.02 0 0 7.02 87.9 24.987 Tm
1109
+ 0.0023 Tc
1110
+ 0.0017 Tw
1111
+ [(S)6.6(w)6.2(iss)6.7(m)2.4(ed)6.5(ic)10.4( Jo)6.5(u)6.5(r)-1.9(n)6.5(a)11.3(l 03)11.3(/200)11.3(6)11.3( )]TJ
1112
+ 1 g
1113
+ 30.6752 0 TD
1114
+ -0.0004 Tc
1115
+ 0 Tw
1116
+ (226)Tj
1117
+ 0 g
1118
+ 1.6667 0 TD
1119
+ 0 Tc
1120
+ ( )Tj
1121
+ /F1 1 Tf
1122
+ 11.52 0 0 11.52 96.42 773.3669 Tm
1123
+ -0.0006 Tc
1124
+ -0.0014 Tw
1125
+ [(Autorisa)3.3(tion d�un m�dicament co)6.2(ntenant un)6.2( nouveau principe actif: )]TJ
1126
+ 0 -1.125 TD
1127
+ 0 Tc
1128
+ 0 Tw
1129
+ (M)Tj
1130
+ /F2 1 Tf
1131
+ 10.02 0 0 10.02 96.42 742.527 Tm
1132
+ 0 Tc
1133
+ 0 Tw
1134
+ ( )Tj
1135
+ /F1 1 Tf
1136
+ 0 -1.1976 TD
1137
+ -0.0002 Tc
1138
+ 0.1054 Tw
1139
+ [(En f�vrier 2)4.9(006, la pr�p)6(aration Mac)6.7(ugen)6(�)-2.6(, une )]TJ
1140
+ T*
1141
+ -0.0004 Tc
1142
+ 0.1116 Tw
1143
+ [(solution injectable conte)4.7(n)-0.2(ant un nou)5.8(veau prin)5.8(-)]TJ
1144
+ T*
1145
+ -0.0002 Tc
1146
+ 0.1893 Tw
1147
+ [(cipe actif, le pegapta)4.9(n)0(ib, a �t� autor)5.6(i)2.4(s�e dans)6.7( )]TJ
1148
+ T*
1149
+ -0.0005 Tc
1150
+ 0.1357 Tw
1151
+ [(l�indication suivante )137.8(: � )137.8(T)4.6(r)-0.7(aitement )6(d)5.7(e)-1.4( la form)8.3(e )]TJ
1152
+ T*
1153
+ -0.0007 Tc
1154
+ 0.3514 Tw
1155
+ [(n�ovasc)6.2(u)-0.5(laire \(h)5.5(umide\) )5.9(de la d)5.5(�)-1.6(g�n�rescence )]TJ
1156
+ T*
1157
+ -0.0019 Tw
1158
+ (maculaire li�e � l��ge�. )Tj
1159
+ T*
1160
+ -0.0001 Tc
1161
+ 0.0395 Tw
1162
+ [(La dos)6.8(e auto)6.1(ris�e de 0,3 )47.9(mg de pegaptanib doit)5.7( )]TJ
1163
+ T*
1164
+ -0.0003 Tc
1165
+ 0.4409 Tw
1166
+ [(�tre administr�e par )-6(i)8.2(n)-0.1(jection )-6(int)5.5(r)-0.5(avitr�)4.8(enne)4.8( )]TJ
1167
+ T*
1168
+ -0.0005 Tc
1169
+ -0.002 Tw
1170
+ [(toutes les six semaines \(9 )6(injections par an\). )]TJ
1171
+ T*
1172
+ 0 Tc
1173
+ 0 Tw
1174
+ ( )Tj
1175
+ /F2 1 Tf
1176
+ T*
1177
+ -0.001 Tc
1178
+ 0.4595 Tw
1179
+ [(Le )6(pegaptanib sodique est)4.8( un oli)7.5(gonucl�ide )]TJ
1180
+ T*
1181
+ -0.0014 Tc
1182
+ 0.0767 Tw
1183
+ [(modifi� p�gy)-4.4(l� qui )6(se lie � l�isoforme VEGF)]TJ
1184
+ -21.4132 -1.1976 TD
1185
+ -0.001 Tc
1186
+ 0.3158 Tw
1187
+ [(facteur)4.8( de c)5.9(r)-1.2(oissance de l�endoth�li)7.5(um vascu-)]TJ
1188
+ T*
1189
+ -0.0004 Tc
1190
+ 0.1595 Tw
1191
+ [(laire \(VEGF\) )6(et inhibe so)5.8(n activit�. Le VEGF est)5.4( )]TJ
1192
+ T*
1193
+ -0.0006 Tc
1194
+ 0.1957 Tw
1195
+ [(une prot)5.2(�in)5.6(e)-1.5( qui induit une angiog)5.6(en�se, un)5.6(e )]TJ
1196
+ T*
1197
+ -0.0009 Tc
1198
+ 0.4295 Tw
1199
+ [(perm�abilit� v)-3.9(a)-1.8(sculaire )6(et une inflammation. )]TJ
1200
+ T*
1201
+ ( )Tj
1202
+ ET
1203
+ EOS
1204
+ page = PageLeaf.new
1205
+ page.resources = Resource.new
1206
+ handler = SimpleHandler.new
1207
+ page.contents = [stream]
1208
+ page.text(handler)
1209
+ expected = <<-EOS.strip
1210
+ Arzneimittel Nachrichten / M\351dicaments
1211
+ Autorisation d\222un m\351dicament contenant un nouveau principe actif:
1212
+ M
1213
+
1214
+ En f\351vrier 2006, la pr\351paration Macugen\256, une
1215
+ solution injectable contenant un nouveau prin-
1216
+ cipe actif, le pegaptanib, a \351t\351 autoris\351e dans
1217
+ l\222indication suivante : \253 Traitement de la forme
1218
+ n\351ovasculaire (humide) de la d\351g\351n\351rescence
1219
+ maculaire li\351e \340 l\222\342ge\273.
1220
+ La dose autoris\351e de 0,3 mg de pegaptanib doit
1221
+ \352tre administr\351e par injection intravitr\351enne
1222
+ toutes les six semaines (9 injections par an).
1223
+
1224
+ Le pegaptanib sodique est un oligonucl\351ide
1225
+ modifi\351 p\351gyl\351 qui se lie \340 l\222isoforme VEGF
1226
+ facteur de croissance de l\222endoth\351lium vascu-
1227
+ laire (VEGF) et inhibe son activit\351. Le VEGF est
1228
+ une prot\351ine qui induit une angiogen\350se, une
1229
+ perm\351abilit\351 vasculaire et une inflammation.
1230
+
1231
+ Swissmedic Journal 03/2006 226
1232
+ EOS
1233
+ result = handler.out.strip
1234
+ =begin
1235
+ [expected.size, result.size].max.times do |idx|
1236
+ unless result[idx] == expected[idx]
1237
+ flunk "unexpected result: (#{result[idx]}/#{expected[idx]} at #{idx}) ...#{expected[idx-10,20].inspect}..."
1238
+ end
1239
+ end
1240
+ =end
1241
+ assert_equal(expected, result)
1242
+ end
1243
+ def test_text_landscape
1244
+ stream = Stream.new
1245
+ stream.decoded_stream = <<-'EOS'
1246
+ /GS1 gs
1247
+ BT
1248
+ /TT2 1 Tf
1249
+ 0 14.0053 -13.9999 0 59.64 43.2505 Tm
1250
+ /Cs6 cs 0 0 0 scn
1251
+ -0.0002 Tc
1252
+ 0.0008 Tw
1253
+ (Zuzahlungsbefreite Arzneimittel nach � 31 Abs. 3 Satz 4 SGB V)Tj
1254
+ /TT4 1 Tf
1255
+ 0 9.0035 -9 0 117 176.4505 Tm
1256
+ 0.0009 Tc
1257
+ 0 Tw
1258
+ (PZN)Tj
1259
+ -14.7942 0 TD
1260
+ -0.0016 Tc
1261
+ [(Arzneimit)-3.7(t)-3.7(e)1.4(lname)]TJ
1262
+ 59.8165 0 TD
1263
+ 0.0016 Tc
1264
+ [(D)4(a)11.3(rrei)10.5(c)1.8(hu)4.6(n)11.3(g)-2(sf)6.2(orm)]TJ
1265
+ -39.9843 0 TD
1266
+ -0.0013 Tc
1267
+ [(He)8.4(rst)-10(e)8.4(l)-5.7(l)7.6(e)-4.9(r)]TJ
1268
+ 52.2861 0 TD
1269
+ 0.0001 Tc
1270
+ [(Apo)9.8(t)-8.6(h)9.8(e)-3.5(ke)9.8(nverka)9.8(ufspre)9.8(is)]TJ
1271
+ 3.1321 -1.14 TD
1272
+ -0.0006 Tc
1273
+ 0.0027 Tw
1274
+ [( in)-4.2(kl)8.3(.)-9.3(M)-0.6(w)21.8(S)0(t)]TJ
1275
+ ET
1276
+ 129.3 42.531 1.98 751.68 re
1277
+ f
1278
+ BT
1279
+ 0 9.0035 -9 0 117 519.6505 Tm
1280
+ -0.0017 Tc
1281
+ 0 Tw
1282
+ (Packungs-)Tj
1283
+ 1.0662 -1.14 TD
1284
+ -0.0022 Tc
1285
+ [(gr��)-4.4(e)]TJ
1286
+ -20.6119 1.14 TD
1287
+ 0 Tc
1288
+ [(Wirkstoff)-8.7(\()-0.2(e)9.7(\))-5598(Wirkst�rke)]TJ
1289
+ ET
1290
+ q
1291
+ 1 i
1292
+ 108.9 440.091 9.96 53.46 re
1293
+ W n
1294
+ BT
1295
+ 0 9.0035 -9 0 117 482.5705 Tm
1296
+ (\()Tj
1297
+ ET
1298
+ Q
1299
+ BT
1300
+ 0 9.0035 -9 0 117 485.5705 Tm
1301
+ (n)Tj
1302
+ ET
1303
+ q
1304
+ 1 i
1305
+ 108.9 440.091 9.96 53.46 re
1306
+ W n
1307
+ BT
1308
+ 0 9.0035 -9 0 117 490.5505 Tm
1309
+ (\))Tj
1310
+ ET
1311
+ Q
1312
+ BT
1313
+ /TT2 1 Tf
1314
+ 0 14.0053 -13.9999 0 79.5 43.2505 Tm
1315
+ -0.0008 Tc
1316
+ (Produktstand)Tj
1317
+ 0 -1.2129 TD
1318
+ -0.0001 Tc
1319
+ 0.0007 Tw
1320
+ (sortiert nach Arzneimittelname)Tj
1321
+ 7.2915 1.2129 TD
1322
+ 0.0001 Tc
1323
+ 0 Tw
1324
+ [(01)-94.9(.)-231.9(0)-0.7(8)-35(.)-0.5(2009)]TJ
1325
+ /TT4 1 Tf
1326
+ 0 8.003 -7.9999 0 144.36 176.4505 Tm
1327
+ -0.0014 Tc
1328
+ (4000741)Tj
1329
+ -16.6438 0 TD
1330
+ -0.0006 Tc
1331
+ 0.0002 Tw
1332
+ (ABSEAMED 10000I.E./1ML)Tj
1333
+ 67.3249 0 TD
1334
+ -0.0008 Tc
1335
+ 0 Tw
1336
+ [(Fertigspritzen)-12242.4(611,53)]TJ
1337
+ -45.0132 0 TD
1338
+ 0.0045 Tc
1339
+ -0.0049 Tw
1340
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-23621.1(6)-1.6(X1)-1298.7(m)5.3(l)]TJ
1341
+ 15.2268 0 TD
1342
+ -0.0006 Tc
1343
+ 0.0077 Tw
1344
+ [(E)-8.4(p)0.8(o)8.3(e)0.8(t)-7.7(i)-3.3(n)8.3( alf)7.3(a)-8118.7(10000)-831.4(I.E.)]TJ
1345
+ ET
1346
+ 169.56 42.531 0.48 748.26 re
1347
+ f
1348
+ BT
1349
+ 0 8.003 -7.9999 0 178.32 176.4505 Tm
1350
+ -0.0014 Tc
1351
+ 0 Tw
1352
+ (4000646)Tj
1353
+ -16.6438 0 TD
1354
+ -0.0006 Tc
1355
+ 0.0002 Tw
1356
+ (ABSEAMED 1000I.E./0.5ML)Tj
1357
+ 67.3249 0 TD
1358
+ 0 Tw
1359
+ [(Fertigspritzen)-12519.7(64,20)]TJ
1360
+ -45.0132 0 TD
1361
+ 0.0045 Tc
1362
+ -0.0049 Tw
1363
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(5)-1313.6(m)5.3(l)]TJ
1364
+ 15.2268 0 TD
1365
+ -0.0005 Tc
1366
+ 0.0076 Tw
1367
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(1000)-831.3(I.E.)]TJ
1368
+ ET
1369
+ 203.46 42.531 0.54001 748.26 re
1370
+ f
1371
+ BT
1372
+ 0 8.003 -7.9999 0 212.34 176.4505 Tm
1373
+ -0.0014 Tc
1374
+ 0 Tw
1375
+ (4000652)Tj
1376
+ -16.6438 0 TD
1377
+ -0.0006 Tc
1378
+ 0.0002 Tw
1379
+ (ABSEAMED 2000I.E./1ML)Tj
1380
+ 67.3249 0 TD
1381
+ -0.0008 Tc
1382
+ 0 Tw
1383
+ [(Fertigspritzen)-12242.4(119,04)]TJ
1384
+ -45.0132 0 TD
1385
+ 0.0045 Tc
1386
+ -0.0049 Tw
1387
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-23621.1(6)-1.6(X1)-1298.7(m)5.3(l)]TJ
1388
+ 15.2268 0 TD
1389
+ -0.0005 Tc
1390
+ 0.0076 Tw
1391
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(2000)-831.3(I.E.)]TJ
1392
+ ET
1393
+ 237.48 42.531 0.53999 748.26 re
1394
+ f
1395
+ BT
1396
+ 0 8.003 -7.9999 0 246.36 176.4505 Tm
1397
+ -0.0014 Tc
1398
+ 0 Tw
1399
+ (4000669)Tj
1400
+ -16.6438 0 TD
1401
+ -0.0006 Tc
1402
+ 0.0002 Tw
1403
+ (ABSEAMED 3000I.E./0.3ML)Tj
1404
+ 67.3249 0 TD
1405
+ -0.0008 Tc
1406
+ 0 Tw
1407
+ [(Fertigspritzen)-12242.4(173,94)]TJ
1408
+ -45.0132 0 TD
1409
+ 0.0045 Tc
1410
+ -0.0049 Tw
1411
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(3)-1313.6(m)5.3(l)]TJ
1412
+ 15.2268 0 TD
1413
+ -0.0005 Tc
1414
+ 0.0076 Tw
1415
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(3000)-831.3(I.E.)]TJ
1416
+ ET
1417
+ 271.56 42.531 0.48001 748.26 re
1418
+ f
1419
+ BT
1420
+ 0 8.003 -7.9999 0 280.32 176.4505 Tm
1421
+ -0.0014 Tc
1422
+ 0 Tw
1423
+ (4000681)Tj
1424
+ -16.6438 0 TD
1425
+ -0.0006 Tc
1426
+ 0.0002 Tw
1427
+ (ABSEAMED 4000I.E./0.4ML)Tj
1428
+ 67.3249 0 TD
1429
+ -0.0008 Tc
1430
+ 0 Tw
1431
+ [(Fertigspritzen)-12242.4(228,83)]TJ
1432
+ -45.0132 0 TD
1433
+ 0.0045 Tc
1434
+ -0.0049 Tw
1435
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(4)-1313.6(m)5.3(l)]TJ
1436
+ 15.2268 0 TD
1437
+ -0.0005 Tc
1438
+ 0.0076 Tw
1439
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(4000)-831.3(I.E.)]TJ
1440
+ ET
1441
+ 305.46 42.531 0.53998 748.26 re
1442
+ f
1443
+ BT
1444
+ 0 8.003 -7.9999 0 314.34 176.4505 Tm
1445
+ -0.0014 Tc
1446
+ 0 Tw
1447
+ (4000698)Tj
1448
+ -16.6438 0 TD
1449
+ -0.0006 Tc
1450
+ 0.0002 Tw
1451
+ (ABSEAMED 5000I.E./0.5ML)Tj
1452
+ 67.3249 0 TD
1453
+ -0.0008 Tc
1454
+ 0 Tw
1455
+ [(Fertigspritzen)-12242.4(283,70)]TJ
1456
+ -45.0132 0 TD
1457
+ 0.0045 Tc
1458
+ -0.0049 Tw
1459
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(5)-1313.6(m)5.3(l)]TJ
1460
+ 15.2268 0 TD
1461
+ -0.0005 Tc
1462
+ 0.0076 Tw
1463
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(5000)-831.3(I.E.)]TJ
1464
+ ET
1465
+ 339.48 42.531 0.54001 748.26 re
1466
+ f
1467
+ BT
1468
+ 0 8.003 -7.9999 0 348.36 176.4505 Tm
1469
+ -0.0014 Tc
1470
+ 0 Tw
1471
+ (4000729)Tj
1472
+ -16.6438 0 TD
1473
+ -0.0006 Tc
1474
+ 0.0002 Tw
1475
+ (ABSEAMED 6000I.E./0.6ML)Tj
1476
+ 67.3249 0 TD
1477
+ -0.0008 Tc
1478
+ 0 Tw
1479
+ [(Fertigspritzen)-12242.4(338,57)]TJ
1480
+ -45.0132 0 TD
1481
+ 0.0045 Tc
1482
+ -0.0049 Tw
1483
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(6)-1313.6(m)5.3(l)]TJ
1484
+ 15.2268 0 TD
1485
+ -0.0005 Tc
1486
+ 0.0076 Tw
1487
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(6000)-831.3(I.E.)]TJ
1488
+ ET
1489
+ 373.56 42.531 0.47998 748.26 re
1490
+ f
1491
+ BT
1492
+ 0 8.003 -7.9999 0 382.32 176.4505 Tm
1493
+ -0.0014 Tc
1494
+ 0 Tw
1495
+ (4000735)Tj
1496
+ -16.6438 0 TD
1497
+ -0.0006 Tc
1498
+ 0.0002 Tw
1499
+ (ABSEAMED 8000I.E./0.8ML)Tj
1500
+ 67.3249 0 TD
1501
+ -0.0008 Tc
1502
+ 0 Tw
1503
+ [(Fertigspritzen)-12242.4(448,34)]TJ
1504
+ -45.0132 0 TD
1505
+ 0.0045 Tc
1506
+ -0.0049 Tw
1507
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(8)-1313.6(m)5.3(l)]TJ
1508
+ 15.2268 0 TD
1509
+ -0.0005 Tc
1510
+ 0.0076 Tw
1511
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(8000)-831.3(I.E.)]TJ
1512
+ ET
1513
+ 407.46 42.531 0.54001 748.26 re
1514
+ f
1515
+ BT
1516
+ 0 8.003 -7.9999 0 416.34 176.4505 Tm
1517
+ -0.0014 Tc
1518
+ 0 Tw
1519
+ (3867219)Tj
1520
+ -16.6438 0 TD
1521
+ -0.0016 Tc
1522
+ 0.0012 Tw
1523
+ (ACC 200)Tj
1524
+ 67.3249 0 TD
1525
+ -0.0008 Tc
1526
+ 0 Tw
1527
+ [(Brausetabletten)-11575.1(12,74)]TJ
1528
+ -45.0132 0 TD
1529
+ 0.0003 Tc
1530
+ 0.0068 Tw
1531
+ [(H)10.2(E)-7.5(XAL)9.2( AG)]TJ
1532
+ 38.2882 0 TD
1533
+ -0.0006 Tc
1534
+ 0 Tw
1535
+ [(50)-1333.7(St)]TJ
1536
+ -23.0614 0 TD
1537
+ -0.0021 Tc
1538
+ [(A)-9.9(c)3.1(ety)10.6(l)-4.9(c)-11.9(y)10.6(st)-9.2(e)6.8(i)-4.8(n)-8690(2)-8.2(00)-825.4(mg)]TJ
1539
+ ET
1540
+ 441.48 42.531 0.54001 748.26 re
1541
+ f
1542
+ BT
1543
+ 0 8.003 -7.9999 0 450.36 176.4505 Tm
1544
+ -0.0014 Tc
1545
+ (3867225)Tj
1546
+ -16.6438 0 TD
1547
+ -0.0016 Tc
1548
+ 0.0012 Tw
1549
+ (ACC 200)Tj
1550
+ 67.3249 0 TD
1551
+ -0.0008 Tc
1552
+ 0 Tw
1553
+ [(Brausetabletten)-11575.1(15,42)]TJ
1554
+ -45.0132 0 TD
1555
+ 0.0003 Tc
1556
+ 0.0068 Tw
1557
+ [(H)10.2(E)-7.5(XAL)9.2( AG)]TJ
1558
+ 37.7335 0 TD
1559
+ -0.0007 Tc
1560
+ 0 Tw
1561
+ [(100)-1333.8(St)]TJ
1562
+ -22.5066 0 TD
1563
+ -0.0021 Tc
1564
+ [(A)-9.9(c)3.1(ety)10.6(l)-4.9(c)-11.9(y)10.6(st)-9.2(e)6.8(i)-4.8(n)-8690(2)-8.2(00)-825.4(mg)]TJ
1565
+ ET
1566
+ 475.56 42.531 0.47998 748.26 re
1567
+ f
1568
+ BT
1569
+ 0 8.003 -7.9999 0 484.32 176.4505 Tm
1570
+ -0.0014 Tc
1571
+ (4789763)Tj
1572
+ -16.6438 0 TD
1573
+ -0.0016 Tc
1574
+ 0.0012 Tw
1575
+ (ACC 200)Tj
1576
+ 67.3249 0 TD
1577
+ -0.0008 Tc
1578
+ 0 Tw
1579
+ [(Brausetabletten)-11575.1(11,01)]TJ
1580
+ -45.0132 0 TD
1581
+ 0.0003 Tc
1582
+ 0.0068 Tw
1583
+ [(H)10.2(E)-7.5(XAL)9.2( AG)]TJ
1584
+ 38.2882 0 TD
1585
+ -0.0006 Tc
1586
+ 0 Tw
1587
+ [(20)-1333.7(St)]TJ
1588
+ -23.0614 0 TD
1589
+ -0.0021 Tc
1590
+ [(A)-9.9(c)3.1(ety)10.6(l)-4.9(c)-11.9(y)10.6(st)-9.2(e)6.8(i)-4.8(n)-8690(2)-8.2(00)-825.4(mg)]TJ
1591
+ ET
1592
+ 509.46 42.531 0.53998 748.26 re
1593
+ f
1594
+ BT
1595
+ 0 8.003 -7.9999 0 548.8199 376.6105 Tm
1596
+ -0.0007 Tc
1597
+ -0.0072 Tw
1598
+ [(Seite 1)-6.8( v)-10.5(o)0.7(n)8.2( )-15(10)8.2(83)]TJ
1599
+ ET
1600
+ EOS
1601
+ page = PageLeaf.new
1602
+ page.attributes.store :rotate, '90'
1603
+ page.resources = Resource.new
1604
+ handler = SimpleHandler.new
1605
+ page.contents = [stream]
1606
+ page.text(handler)
1607
+ expected = <<-EOS
1608
+ Zuzahlungsbefreite Arzneimittel nach \247 31 Abs. 3 Satz 4 SGB V
1609
+ Produktstand 01.08.2009
1610
+ sortiert nach Arzneimittelname
1611
+ Arzneimittelname PZN Hersteller Wirkstoff(e) Wirkst�rke (n) Packungs- Darreichungsform Apothekenverkaufspreis
1612
+ gr��e inkl.MwSt
1613
+ ABSEAMED 10000I.E./1ML 4000741 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 10000 I.E.6X1 ml Fertigspritzen 611,53
1614
+ ABSEAMED 1000I.E./0.5ML 4000646 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 1000 6I.E.X0.5 ml Fertigspritzen 64,20
1615
+ ABSEAMED 2000I.E./1ML 4000652 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 2000 I.E.6X1 ml Fertigspritzen 119,04
1616
+ ABSEAMED 3000I.E./0.3ML 4000669 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 3000 6I.E.X0.3 ml Fertigspritzen 173,94
1617
+ ABSEAMED 4000I.E./0.4ML 4000681 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 4000 6I.E.X0.4 ml Fertigspritzen 228,83
1618
+ ABSEAMED 5000I.E./0.5ML 4000698 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 5000 6I.E.X0.5 ml Fertigspritzen 283,70
1619
+ ABSEAMED 6000I.E./0.6ML 4000729 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 6000 6I.E.X0.6 ml Fertigspritzen 338,57
1620
+ ABSEAMED 8000I.E./0.8ML 4000735 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 8000 6I.E.X0.8 ml Fertigspritzen 448,34
1621
+ ACC 200 3867219 HEXAL AG Acetylcystein 200 mg 50 St Brausetabletten 12,74
1622
+ ACC 200 3867225 HEXAL AG Acetylcystein 200 mg 100 St Brausetabletten 15,42
1623
+ ACC 200 4789763 HEXAL AG Acetylcystein 200 mg 20 St Brausetabletten 11,01
1624
+ Seite 1 von 1083
1625
+ EOS
1626
+ assert_equal(expected.strip, handler.out.strip)
1627
+ end
1628
+ def test_text_kerning_bug
1629
+ stream = Stream.new
1630
+ path = File.expand_path('data/stream_kerning_bug.txt',
1631
+ File.dirname(__FILE__))
1632
+ stream.decoded_stream = File.read path
1633
+ page = PageLeaf.new
1634
+ page.attributes.store :rotate, '90'
1635
+ page.resources = Resource.new
1636
+ handler = SimpleHandler.new
1637
+ page.contents = [stream]
1638
+ page.text(handler)
1639
+ expected = "RATIOPHARM GMBH 20 St"
1640
+ assert_equal(expected.strip, handler.out.strip)
1641
+ end
1642
+ def test_text_kerning_bug2
1643
+ stream = Stream.new
1644
+ path = File.expand_path('data/stream_kerning_bug2.txt',
1645
+ File.dirname(__FILE__))
1646
+ stream.decoded_stream = File.read path
1647
+ page = PageLeaf.new
1648
+ page.attributes.store :rotate, '90'
1649
+ page.resources = Resource.new
1650
+ handler = SimpleHandler.new
1651
+ page.contents = [stream]
1652
+ page.text(handler)
1653
+ expected = "HEUMANN PH GMBH&CO. KG 20 St"
1654
+ assert_equal(expected.strip, handler.out.strip)
1655
+ end
1656
+ =begin
1657
+ def test_text_space_bug2
1658
+ stream = Stream.new
1659
+ path = File.expand_path('data/space_bug_stream2.txt',
1660
+ File.dirname(__FILE__))
1661
+ fontsrc15 = <<-EOS
1662
+ 327 0 obj
1663
+ EOS
1664
+ font15 = Font.new(fontsrc15)
1665
+ stream.decoded_stream = File.read path
1666
+ page = PageLeaf.new
1667
+ page.resources = resource = Resource.new
1668
+ resource.instance_variable_get('@fonts').store(:r15, font15)
1669
+ handler = SimpleHandler.new
1670
+ page.contents = [stream]
1671
+ page.text(handler)
1672
+ expected = "Inhalt / Table des mati\303\250res"
1673
+ assert_equal(expected.strip, handler.out.strip[0,28])
1674
+ expected = '10 mg, 20 mg und 40 mg'
1675
+ assert_equal(expected.strip, handler.out.strip[346,22])
1676
+ end
1677
+ =end
1678
+ end
1679
+ class TestEncoding < Test::Unit::TestCase
1680
+ def setup
1681
+ src = <<-EOS
1682
+ 252 0 obj
1683
+ <<
1684
+ /Type /Encoding
1685
+ /Differences [ 1 /space /beta /alpha ]
1686
+ >>
1687
+ endobj
1688
+ EOS
1689
+ @encoding = Rpdf2txt::Encoding.new(src)
1690
+ end
1691
+ def test_differences
1692
+ expected = {
1693
+ 1 => 'space',
1694
+ 2 => 'beta',
1695
+ 3 => 'alpha',
1696
+ }
1697
+ assert_equal(expected, @encoding.differences)
1698
+ end
1699
+ def test_convert_symbol
1700
+ txt = "\003"
1701
+ assert_equal("a", @encoding.convert_symbol(txt))
1702
+ assert_equal("\003", txt)
1703
+ end
1704
+ end
1705
+ class TestImage < Test::Unit::TestCase
1706
+ def test_png
1707
+ path = File.expand_path('data/png.pdfobj', File.dirname(__FILE__))
1708
+ src = File.read(path)
1709
+ obj = Image.new(src)
1710
+ assert_nothing_raised { obj.image }
1711
+ path = File.expand_path('data/logo.png', File.dirname(__FILE__))
1712
+ good, = Magick::Image.read path
1713
+ assert_equal(good, obj.image)
1714
+ end
1715
+ def test_indexed
1716
+ path = File.expand_path('data/index.pdfobj', File.dirname(__FILE__))
1717
+ src = File.read(path)
1718
+ index = Stream.new(src)
1719
+ path = File.expand_path('data/indexed.pdfobj', File.dirname(__FILE__))
1720
+ src = File.read(path)
1721
+ obj = Image.new(src)
1722
+ obj.build_tree(51 => index)
1723
+ assert_nothing_raised { obj.image }
1724
+ path = File.expand_path('data/pdf_50.png', File.dirname(__FILE__))
1725
+ good, = Magick::Image.read path
1726
+ assert_equal(good, obj.image)
1727
+ end
1728
+ def test_indexed_2bit
1729
+ path = File.expand_path('data/index_2bit.pdfobj', File.dirname(__FILE__))
1730
+ src = File.read(path)
1731
+ index = Stream.new(src)
1732
+ path = File.expand_path('data/indexed_2bit.pdfobj', File.dirname(__FILE__))
1733
+ src = File.read(path)
1734
+ obj = Image.new(src)
1735
+ obj.build_tree(21 => index)
1736
+ assert_nothing_raised { obj.image }
1737
+ path = File.expand_path('data/pdf_20.png', File.dirname(__FILE__))
1738
+ good, = Magick::Image.read path
1739
+ assert_equal(good, obj.image)
1740
+ end
1741
+ def test_indexed_masked
1742
+ path = File.expand_path('data/index_masked.pdfobj', File.dirname(__FILE__))
1743
+ src = File.read(path)
1744
+ index = Stream.new(src)
1745
+ path = File.expand_path('data/indexed_masked.pdfobj', File.dirname(__FILE__))
1746
+ src = File.read(path)
1747
+ obj = Image.new(src)
1748
+ obj.build_tree(21 => index)
1749
+ assert_nothing_raised { obj.image }
1750
+ path = File.expand_path('data/pdf_21.png', File.dirname(__FILE__))
1751
+ good, = Magick::Image.read path
1752
+ assert_equal(good, obj.image)
1753
+ end
1754
+ def test_lzw_decode ## from the PDF-Manual
1755
+ data = "\x80\x0B\x60\x50\x22\x0C\x0C\x85\x01"
1756
+ stream = Stream.new(data)
1757
+ expected = "-----A---B"
1758
+ assert_equal(expected, stream.lzw_decode(data))
1759
+ end
1760
+ def test_lzw_image
1761
+ path = File.expand_path('data/lzw_index.pdfobj', File.dirname(__FILE__))
1762
+ src = File.read(path)
1763
+ index = Stream.new(src)
1764
+ path = File.expand_path('data/lzw.pdfobj', File.dirname(__FILE__))
1765
+ src = File.read(path)
1766
+ obj = Image.new(src)
1767
+ obj.build_tree(21 => index)
1768
+ assert_nothing_raised { obj.image }
1769
+ path = File.expand_path('data/pdf_22.png', File.dirname(__FILE__))
1770
+ good, = Magick::Image.read path
1771
+ assert_equal(good, obj.image)
1772
+ end
1773
+ end
1774
+ class TestInlineImage < Test::Unit::TestCase
1775
+ def test_inline_img
1776
+ attrs = <<-EOS
1777
+ /W 113
1778
+ /CS /DeviceGray
1779
+ /BPC 8
1780
+ /DP << /Predictor 15
1781
+ /Columns 113
1782
+ >>
1783
+ /F /Fl
1784
+ /H 1
1785
+
1786
+ EOS
1787
+ data = "x\234cd\2407\000\000\000\344\000\002"
1788
+ obj = InlineImage.new(attrs, data)
1789
+ assert_nothing_raised { obj.image }
1790
+ path = File.expand_path('data/inline.png', File.dirname(__FILE__))
1791
+ good = Magick::Image.read path
1792
+ tmp_path = Tempfile.new('test').path + '.png'
1793
+ obj.image.write tmp_path
1794
+ tmp = Magick::Image.read tmp_path
1795
+ assert_equal(good, tmp)
1796
+ rescue StandardError => e
1797
+ p e
1798
+ ensure
1799
+ File.delete tmp_path if File.exist? tmp_path
1800
+ end
1801
+ end
1802
+ end