rpdf2txt 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (127) hide show
  1. data/History.txt +5 -0
  2. data/LICENCE +515 -0
  3. data/Manifest.txt +126 -0
  4. data/README.txt +30 -0
  5. data/Rakefile +24 -0
  6. data/bin/rpdf2txt +58 -0
  7. data/config.save +12 -0
  8. data/install.rb +1098 -0
  9. data/lib/rpdf2txt-rockit/base_extensions.rb +73 -0
  10. data/lib/rpdf2txt-rockit/bootstrap.rb +120 -0
  11. data/lib/rpdf2txt-rockit/bounded_lru_cache.rb +43 -0
  12. data/lib/rpdf2txt-rockit/conflict_resolution.rb +302 -0
  13. data/lib/rpdf2txt-rockit/directed_graph.rb +401 -0
  14. data/lib/rpdf2txt-rockit/glr_parser.rb +393 -0
  15. data/lib/rpdf2txt-rockit/grammar.rb +644 -0
  16. data/lib/rpdf2txt-rockit/graphdrawing.rb +107 -0
  17. data/lib/rpdf2txt-rockit/graphviz_dot.rb +63 -0
  18. data/lib/rpdf2txt-rockit/indexable.rb +53 -0
  19. data/lib/rpdf2txt-rockit/lalr_parsetable_generator.rb +144 -0
  20. data/lib/rpdf2txt-rockit/parse_table.rb +273 -0
  21. data/lib/rpdf2txt-rockit/parsetable_generation.rb +164 -0
  22. data/lib/rpdf2txt-rockit/parsing_ambiguities.rb +84 -0
  23. data/lib/rpdf2txt-rockit/profiler.rb +168 -0
  24. data/lib/rpdf2txt-rockit/reduce_actions_generator.rb +523 -0
  25. data/lib/rpdf2txt-rockit/rockit.rb +76 -0
  26. data/lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb +187 -0
  27. data/lib/rpdf2txt-rockit/rockit_grammars_parser.rb +126 -0
  28. data/lib/rpdf2txt-rockit/sourcecode_dumpable.rb +181 -0
  29. data/lib/rpdf2txt-rockit/stringscanner.rb +54 -0
  30. data/lib/rpdf2txt-rockit/syntax_tree.rb +452 -0
  31. data/lib/rpdf2txt-rockit/token.rb +364 -0
  32. data/lib/rpdf2txt-rockit/version.rb +3 -0
  33. data/lib/rpdf2txt/attributesparser.rb +42 -0
  34. data/lib/rpdf2txt/cmapparser.rb +65 -0
  35. data/lib/rpdf2txt/data/_cmap.grammar +11 -0
  36. data/lib/rpdf2txt/data/_cmap_range.grammar +15 -0
  37. data/lib/rpdf2txt/data/_pdfattributes.grammar +32 -0
  38. data/lib/rpdf2txt/data/cmap.grammar +11 -0
  39. data/lib/rpdf2txt/data/cmap.rb +37 -0
  40. data/lib/rpdf2txt/data/cmap_range.grammar +15 -0
  41. data/lib/rpdf2txt/data/cmap_range.rb +43 -0
  42. data/lib/rpdf2txt/data/fonts/Courier-Bold.afm +342 -0
  43. data/lib/rpdf2txt/data/fonts/Courier-BoldOblique.afm +342 -0
  44. data/lib/rpdf2txt/data/fonts/Courier-Oblique.afm +342 -0
  45. data/lib/rpdf2txt/data/fonts/Courier.afm +342 -0
  46. data/lib/rpdf2txt/data/fonts/Helvetica-Bold.afm +2827 -0
  47. data/lib/rpdf2txt/data/fonts/Helvetica-BoldOblique.afm +2827 -0
  48. data/lib/rpdf2txt/data/fonts/Helvetica-Oblique.afm +3051 -0
  49. data/lib/rpdf2txt/data/fonts/Helvetica.afm +3051 -0
  50. data/lib/rpdf2txt/data/fonts/License-Adobe.txt +65 -0
  51. data/lib/rpdf2txt/data/fonts/Symbol.afm +213 -0
  52. data/lib/rpdf2txt/data/fonts/Times-Bold.afm +2588 -0
  53. data/lib/rpdf2txt/data/fonts/Times-BoldItalic.afm +2384 -0
  54. data/lib/rpdf2txt/data/fonts/Times-Italic.afm +2667 -0
  55. data/lib/rpdf2txt/data/fonts/Times-Roman.afm +2419 -0
  56. data/lib/rpdf2txt/data/fonts/ZapfDingbats.afm +225 -0
  57. data/lib/rpdf2txt/data/pdfattributes.grammar +32 -0
  58. data/lib/rpdf2txt/data/pdfattributes.rb +71 -0
  59. data/lib/rpdf2txt/data/pdftext.grammar +102 -0
  60. data/lib/rpdf2txt/data/pdftext.rb +146 -0
  61. data/lib/rpdf2txt/default_handler.rb +352 -0
  62. data/lib/rpdf2txt/lzw.rb +69 -0
  63. data/lib/rpdf2txt/object.rb +1114 -0
  64. data/lib/rpdf2txt/parser.rb +169 -0
  65. data/lib/rpdf2txt/symbol.rb +408 -0
  66. data/lib/rpdf2txt/text.rb +182 -0
  67. data/lib/rpdf2txt/text_state.rb +434 -0
  68. data/lib/rpdf2txt/textparser.rb +42 -0
  69. data/test/data/3392_obj +0 -0
  70. data/test/data/397_decrypted +15 -0
  71. data/test/data/450_decrypted +153 -0
  72. data/test/data/450_obj +0 -0
  73. data/test/data/452_decrypted +125 -0
  74. data/test/data/454_decrypted +108 -0
  75. data/test/data/456_decrypted +106 -0
  76. data/test/data/458_decrypted +111 -0
  77. data/test/data/458_obj +0 -0
  78. data/test/data/460_decrypted +118 -0
  79. data/test/data/460_obj +0 -0
  80. data/test/data/463_decrypted +117 -0
  81. data/test/data/465_decrypted +107 -0
  82. data/test/data/465_obj +0 -0
  83. data/test/data/90_obj +0 -0
  84. data/test/data/90_obj_comp +1 -0
  85. data/test/data/decrypted +0 -0
  86. data/test/data/encrypt_obj +0 -0
  87. data/test/data/encrypt_string +0 -0
  88. data/test/data/encrypt_string_128bit +0 -0
  89. data/test/data/encrypted_object_stream.pdf +0 -0
  90. data/test/data/firststream +1 -0
  91. data/test/data/index.pdfobj +0 -0
  92. data/test/data/index_2bit.pdfobj +0 -0
  93. data/test/data/index_masked.pdfobj +0 -0
  94. data/test/data/indexed.pdfobj +0 -0
  95. data/test/data/indexed_2bit.pdfobj +0 -0
  96. data/test/data/indexed_masked.pdfobj +0 -0
  97. data/test/data/inline.png +0 -0
  98. data/test/data/logo.png +0 -0
  99. data/test/data/lzw.pdfobj +0 -0
  100. data/test/data/lzw_index.pdfobj +0 -0
  101. data/test/data/page_tree.pdf +148 -0
  102. data/test/data/pdf_20.png +0 -0
  103. data/test/data/pdf_21.png +0 -0
  104. data/test/data/pdf_22.png +0 -0
  105. data/test/data/pdf_50.png +0 -0
  106. data/test/data/png.pdfobj +0 -0
  107. data/test/data/space_bug_stream.txt +119 -0
  108. data/test/data/stream.txt +292 -0
  109. data/test/data/stream_kerning_bug.txt +13 -0
  110. data/test/data/stream_kerning_bug2.txt +6 -0
  111. data/test/data/test.pdf +0 -0
  112. data/test/data/test.txt +8 -0
  113. data/test/data/test_text.txt +42 -0
  114. data/test/data/working_obj +0 -0
  115. data/test/data/working_obj2 +0 -0
  116. data/test/mock.rb +149 -0
  117. data/test/suite.rb +30 -0
  118. data/test/test_pdf_object.rb +1802 -0
  119. data/test/test_pdf_parser.rb +1340 -0
  120. data/test/test_pdf_text.rb +789 -0
  121. data/test/test_space_bug_05_2004.rb +87 -0
  122. data/test/test_stream.rb +194 -0
  123. data/test/test_text_state.rb +315 -0
  124. data/usage-en.txt +112 -0
  125. data/user-stories/UserStories_Rpdf2Txt.txt +34 -0
  126. data/user-stories/documents/swissmedicjournal/04_2004.pdf +0 -0
  127. metadata +220 -0
@@ -0,0 +1,13 @@
1
+ BT
2
+ 0 8.003 -7.9999 0 348.36 176.4505 Tm
3
+ -0.0014 Tc
4
+ -16.6438 0 TD
5
+ 0.0009 Tw
6
+ 67.3249 0 TD
7
+ -0.001 Tc
8
+ 0 Tw
9
+ -45.0132 0 TD
10
+ -0.0106 Tc
11
+ 0.0177 Tw
12
+ [(RA)-18.4(TI)-17.7(OP)-18.4(HA)-18.4(RM)-9.8( GM)-17.3(BH)-28385.1(2)-9.2(0)-1343.7(S)-10.9(t)]TJ
13
+ ET
@@ -0,0 +1,6 @@
1
+ BT
2
+ 0 8.003 -7.9999 0 382.32 176.4505 Tm
3
+ 0.0008 Tc
4
+ -0.0012 Tw
5
+ [(H)10.7(E)-7(U)10.7(M)1.6(ANN PH )7.5(GM)9.1(B)-7(H)10.7(&)-7(C)10.7(O.)8.7( )-7.5(K)8(G)-24157.1(20)-1332.3(St)]TJ
6
+ ET
Binary file
@@ -0,0 +1,8 @@
1
+ q Q q 18 40 576 734 re W n /Cs1 cs 0 0 0 sc q 1 0 0 -1 18 774 cm BT
2
+ 10 0 0 -10 510 12 Tm /F1.0 1 Tf (Page 1 of 1) Tj ET Q q 1 0 0 -1 18
3
+ 774 cm BT 10 0 0 -10 0 12 Tm /F1.0 1 Tf (untitled text) Tj ET Q q 1
4
+ 0 0 -1 18 774 cm BT 10 0 0 -10 0 24 Tm /F1.0 1 Tf (Printed: Donnerstag, 14. November 2002 14:04:29 Uhr)
5
+ Tj ET Q 0.25 w /Cs1 CS 0 0 0 SC q 1 0 0 -1 18 774 cm 0 36.125 m 576.25
6
+ 36.125 l S Q q 1 0 0 -1 18 774 cm 0 34.125 m 576.25 34.125 l S Q Q
7
+ q 18 40 576 694 re W n /Cs1 cs 0 0 0 sc q 1 0 0 -1 18 774 cm BT 10
8
+ 0 0 -10 1 52 Tm /F2.0 1 Tf (testpdf) Tj ET Q Q
@@ -0,0 +1,42 @@
1
+ BT
2
+ 10 0 0 10 42.7953 670.6528 Tm
3
+ -0.0002 Tc
4
+ 0 Tw
5
+ [(Zul.-Nr)91.8(.: )]TJ
6
+ /F8 1 Tf
7
+ 3.8772 0 TD
8
+ -0.0001 Tc
9
+ (55921)Tj
10
+ /F3 1 Tf
11
+ 8.8787 0 TD
12
+ (Abgabekategorie: )Tj
13
+ /F8 1 Tf
14
+ 8.6923 0 TD
15
+ 0 Tc
16
+ (D)Tj
17
+ /F3 1 Tf
18
+ 4.0636 0 TD
19
+ -0.0001 Tc
20
+ -0.0305 Tw
21
+ [(Index: 02.98.0.)-9563.3(18.10.2002)]TJ
22
+ -25.5118 -2.2428 TD
23
+ 0 Tw
24
+ [(Zusammensetzung:)-921(01)]TJ
25
+ 8.3 0 0 8.3 156.1811 648.2247 Tm
26
+ 0.1715 Tw
27
+ [(CRA)73.6(T)54.8(AEGI FOLII cum FLORE EXTRACTUM ETHANOLICUM SICCUM 450)-278.1(mg, EXCIPIENS pro)]TJ
28
+ 0 -1.3661 TD
29
+ -0.0306 Tw
30
+ (COMPRESSO OBDUCTO.)Tj
31
+ 10 0 0 10 42.7953 622.7121 Tm
32
+ -0.0304 Tw
33
+ [(Anwendung:)-5285.1(Bei nerv�sen Herzbeschwerden)]TJ
34
+ 0 -1.4174 TD
35
+ 0 Tw
36
+ [(Packungen:)-4532.5(01)-305.3(002)-7141.5(50)-566.7(Filmtabletten)-12429.9(D)]TJ
37
+ 11.3386 -1.4174 TD
38
+ [(042)-6585.6(100)-566.7(Filmtabletten)-12429.9(D)]TJ
39
+ -11.3386 -1.4174 TD
40
+ -0.0304 Tw
41
+ [(G�ltig bis:)-6647.6(17. Oktober 2007)]TJ
42
+ ET
Binary file
Binary file
@@ -0,0 +1,149 @@
1
+ # Ruby/Mock version 1.0
2
+ #
3
+ # A class for conveniently building mock objects in RUnit test cases.
4
+ # Copyright (c) 2001 Nat Pryce, all rights reserved
5
+ #
6
+ # This program is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program; if not, write to the Free Software
17
+ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
+
19
+ require 'runit/error'
20
+
21
+
22
+ class Mock
23
+ # Creates a new, named mock object. The name is reported in exceptions
24
+ # thrown by the mock object when method invocations are incorrect.
25
+ #
26
+ def initialize( mock_name = self.to_s )
27
+ @mock_calls = []
28
+ @next_call = 0
29
+ @name = mock_name
30
+ end
31
+
32
+ # Mock the next method call to be made to this mock object.
33
+ #
34
+ # A mock method is defined by the method name (a symbol) and a block
35
+ # that defines the arity of the method and the mocked behaviour for
36
+ # this call. The mocked behaviour should assert preconditions and
37
+ # return a value. Mocked behaviour should rarely be any more complex
38
+ # than that. If it is, that's probably an indication that the tests
39
+ # need some restructuring or that the tested code needs refactoring.
40
+ #
41
+ # If no block is given and preconditions have been defined for the named
42
+ # method, a block is created for the mocked methodthat has the same arity
43
+ # as the precondition and returns self.
44
+ #
45
+ def __next( name, &test )
46
+ if test == nil
47
+ if respond_to?( Mock.__pre(name) )
48
+ test = proc { |*args| self }
49
+ else
50
+ raise "no block given for mocked method #{name}"
51
+ end
52
+ end
53
+ @mock_calls.push( [name,test] )
54
+ end
55
+
56
+ # Call this at the end of a test to ensure that all scheduled calls
57
+ # have been made to the mock
58
+ #
59
+ def __verify
60
+ if @next_call != @mock_calls.length
61
+ raise RUNIT::AssertionFailedError,
62
+ "not all expected method calls were made to #{@name}",
63
+ caller
64
+ end
65
+ end
66
+
67
+
68
+ private
69
+ # Dispatches aribtrary method calls to the next mocked behaviour
70
+ #
71
+ def method_missing( name, *args )
72
+ __mock_call( name, args, (block_given? ? proc : nil) )
73
+ end
74
+
75
+ # Implements a method call using the next mocked behaviour and asserts
76
+ # that the expected method is called with the expected number of
77
+ # arguments.
78
+ #
79
+ def __mock_call( name, args, block )
80
+ if @next_call >= @mock_calls.length
81
+ raise RUNIT::AssertionFailedError,
82
+ "unexpected call to #{name} method of #{@name}",
83
+ caller(2)
84
+ end
85
+
86
+ expected_name,body = @mock_calls[@next_call]
87
+ @next_call += 1
88
+
89
+ if name != expected_name
90
+ raise RUNIT::AssertionFailedError,
91
+ "wrong method called on #{@name}; " +
92
+ "expected #{expected_name}, was #{name}",
93
+ caller(2)
94
+ end
95
+
96
+ args_length = args.length + (block ? 1 : 0)
97
+
98
+ if body.arity < 0
99
+ if (body.arity+1).abs > args_length
100
+ raise RUNIT::AssertionFailedError,
101
+ "too few arguments to #{name} method of #{@name}; " +
102
+ "require #{(body.arity+1).abs}, got #{args.length}",
103
+ caller(2)
104
+ end
105
+ else
106
+ if body.arity != args_length
107
+ raise RUNIT::AssertionFailedError,
108
+ "wrong number of arguments to " +
109
+ "#{name} method of #{@name}; " +
110
+ "require #{body.arity}, got #{args.length}",
111
+ caller(2)
112
+ end
113
+ end
114
+
115
+ if respond_to? Mock.__pre(name)
116
+ if block
117
+ precondition_ok = __send__( Mock.__pre(name), *args, &block )
118
+ else
119
+ precondition_ok = __send__( Mock.__pre(name), *args )
120
+ end
121
+
122
+ if not precondition_ok
123
+ raise RUNIT::AssertionFailedError,
124
+ "precondition of #{name} method violated",
125
+ caller(2)
126
+ end
127
+ end
128
+
129
+ if block
130
+ instance_eval { body.call( block, *args ) }
131
+ else
132
+ instance_eval { body.call( *args ) }
133
+ end
134
+ end
135
+
136
+ # The name of a precondition for a method
137
+ def Mock.__pre( method )
138
+ "__pre_#{method.to_i}".intern
139
+ end
140
+
141
+
142
+ def Mock.method_added( name )
143
+ unless(/^__pre_/.match(name.to_s))
144
+ pre = self.__pre(name)
145
+ alias_method( pre, name )
146
+ undef_method(name)
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Rpdf2txt -- PDF to Text Parser
4
+ # Copyright (C) 2003 Andreas Schrafl, Hannes Wyss
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # hwyss@ywesee.com, aschrafl@ywesee.com
22
+ #
23
+ # TestSuite -- Rpdf2txt -- 27.11.2002 -- aschrafl@ywesee.com
24
+
25
+ $: << File.dirname(File.expand_path(__FILE__))
26
+ $KCODE = 'u'
27
+
28
+ Dir.foreach(File.dirname(__FILE__)) { |file|
29
+ require file if /^test_.*\.rb$/o.match(file)
30
+ }
@@ -0,0 +1,1802 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Rpdf2txt -- PDF to Text Parser
4
+ # Copyright (C) 2003 Andreas Schrafl, Hannes Wyss, Masaomi Hatakeyama
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # zdvatz@ywesee.com, mhatakeyama@ywesee.com
22
+ #
23
+ # TestPdfObject -- Rpdf2txt -- 21.11.2002 -- aschrafl@ywesee.com
24
+
25
+ $KCODE = 'u'
26
+ $: << File.expand_path('../lib', File.dirname(__FILE__))
27
+
28
+ require 'test/unit'
29
+ require 'tempfile'
30
+ require 'rpdf2txt/object'
31
+ require 'rpdf2txt/default_handler'
32
+
33
+ module Rpdf2txt
34
+ class PdfObject
35
+ attr_accessor :attributes
36
+ end
37
+ class TrailerDictionary
38
+ public
39
+ attr_accessor :attributes
40
+ end
41
+ class PageLeaf < TreeNode
42
+ attr_accessor :contents, :resources
43
+ public :join_snippets
44
+ end
45
+ class CMap < Stream
46
+ public :extract_bfchar, :extract_bfrange
47
+ end
48
+ class TestCmap < Test::Unit::TestCase
49
+ def setup
50
+ @input_bfchar = <<-EOS
51
+ 25 0 obj
52
+ <</Length 357
53
+ >>
54
+ stream
55
+ /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo <<
56
+ /Registry (TT11+0) /Ordering (T42UV) /Supplement 0 >> def
57
+ /CMapName /TT11+0 def
58
+ /CMapType 2 def
59
+ 1 begincodespacerange <004a> <0074> endcodespacerange
60
+ 3 beginbfchar
61
+ <004a> <03B3>
62
+ <0064> <2264>
63
+ <0074> <2265>
64
+ endbfchar
65
+ endcmap CMapName currentdict /CMap defineresource pop end end
66
+
67
+ endstream
68
+ endobj
69
+ EOS
70
+ @input_bfrange = <<-EOS
71
+ 75 0 obj
72
+ <</Length 338
73
+ >>
74
+ stream
75
+ /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo <<
76
+ /Registry (TT11+0) /Ordering (T42UV) /Supplement 0 >> def
77
+ /CMapName /TT11+0 def
78
+ /CMapType 2 def
79
+ 1 begincodespacerange <0044> <0045> endcodespacerange
80
+ 1 beginbfrange
81
+ <0044> <0045> <03B1>
82
+ endbfrange
83
+ endcmap CMapName currentdict /CMap defineresource pop end end
84
+
85
+ endstream
86
+ endobj
87
+ EOS
88
+ end
89
+ def test_parser_grammar_bfchar
90
+ cmap = Rpdf2txt::CMap.new(@input_bfchar)
91
+ assert_nothing_raised{
92
+ ast= Rpdf2txt.cmap_parser.parse(cmap.extract_bfchar)
93
+ }
94
+ end
95
+ def test_extract_attributes_bfchar
96
+ cmap = Rpdf2txt::CMap.new(@input_bfchar)
97
+ expected = {:length => "357"}
98
+ assert_equal(expected, cmap.attributes)
99
+ end
100
+ def test_cmap_bfchar
101
+ cmap = Rpdf2txt::CMap.new(@input_bfchar)
102
+ assert_equal(8805, cmap.map[116])
103
+ assert_equal(8804, cmap.map[100])
104
+ assert_equal(947, cmap.map[74])
105
+ end
106
+ def test_parser_grammar_bfrange
107
+ cmap = Rpdf2txt::CMap.new(@input_bfrange)
108
+ assert_nothing_raised{
109
+ ast= Rpdf2txt.cmap_range_parser.parse(cmap.extract_bfrange)
110
+ }
111
+ end
112
+ def test_cmap_bfrange
113
+ cmap = Rpdf2txt::CMap.new(@input_bfrange)
114
+ assert_equal(945, cmap.map[68])
115
+ assert_equal(946, cmap.map[69])
116
+ end
117
+ def test_cmap_bfrange_array
118
+ input_bfrange = <<-EOS
119
+ 75 0 obj
120
+ <</Length 338
121
+ >>
122
+ stream
123
+ /CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo <<
124
+ /Registry (TT11+0) /Ordering (T42UV) /Supplement 0 >> def
125
+ /CMapName /TT11+0 def
126
+ /CMapType 2 def
127
+ 1 begincodespacerange <0094> <0095> endcodespacerange
128
+ 1 beginbfrange
129
+ <0094> <0095> [ <2264> <2265> ] <0024> <0025> [ <2224> <2225> ]
130
+ endbfrange
131
+ endcmap CMapName currentdict /CMap defineresource pop end end
132
+
133
+ endstream
134
+ endobj
135
+ EOS
136
+ cmap = Rpdf2txt::CMap.new(input_bfrange)
137
+ assert_equal(8804, cmap.map[148])
138
+ assert_equal(8805, cmap.map[149])
139
+ assert_equal(8740, cmap.map[36])
140
+ assert_equal(8741, cmap.map[37])
141
+ end
142
+ end
143
+ class TestPdfObject < Test::Unit::TestCase
144
+ def setup
145
+ input = '3 0 obj << /Type /Page /Parent 2 0 R /Contents 6 0 R >> endobj'
146
+ @tree_node = Rpdf2txt::TreeNode.new(input)
147
+ end
148
+ def test_tree_node1
149
+ input = '4 0 obj << /Type /Pages /Kids [ 7 0 R 8 0 R ] /Count 2 >> endobj'
150
+ node = Rpdf2txt::TreeNode.new(input)
151
+ assert_equal(4, node.oid)
152
+ assert_equal(["7 0 R", "8 0 R"], node.attributes[:kids])
153
+ assert_equal(nil, node.attributes[:contents])
154
+ assert_equal(nil, node.attributes[:parent])
155
+ assert_equal(true, node.root?)
156
+ end
157
+ def test_tree_node2
158
+ input = '3 0 obj << /Type /Page /Parent 2 0 R /Contents 6 0 R >> endobj'
159
+ node = Rpdf2txt::TreeNode.new(input)
160
+ assert_equal(3, node.oid)
161
+ assert_equal(nil, node.attributes[:kids])
162
+ assert_equal('6 0 R', node.attributes[:contents])
163
+ assert_equal('2 0 R', node.attributes[:parent])
164
+ assert_equal(false, node.root?)
165
+ end
166
+ def test_tree_node3
167
+ input = '3 0 obj << /Type /Page /Parent 2 0 R / 2 0 R >> endobj'
168
+ node = nil
169
+ assert_nothing_raised {
170
+ node = Rpdf2txt::TreeNode.new(input)
171
+ }
172
+ end
173
+ def test_tree_node4
174
+ src = '
175
+ 400 0 obj
176
+ <<
177
+ /Title (���\)����\\��P�T#/��-&��;S��O�A)
178
+ /Parent 399 0 R
179
+ /A 436 0 R
180
+ /Next 433 0 R
181
+ >>
182
+ endobj
183
+ '
184
+ node = Rpdf2txt::TreeNode.new(src)
185
+ assert_equal(400, node.oid)
186
+ assert_equal('433 0 R', node.attributes[:next])
187
+ end
188
+ def test_tree_node5
189
+ src = '
190
+ 124 0 obj
191
+ <<
192
+ /Type /Font
193
+ /Subtype /CIDFontType2
194
+ /BaseFont /HAGNPN+SymbolMT
195
+ /FontDescriptor 122 0 R
196
+ /CIDSystemInfo << /Registry (y�>�)/Ordering (q�4�6ZB)/Supplement 0 >>
197
+ /DW 1000
198
+ /W [ 74 [ 411 ] 100 [ 548 ] 116 [ 548 ] ]
199
+ >>
200
+ endobj
201
+ '
202
+ node = Rpdf2txt::TreeNode.new(src)
203
+ assert_equal(124, node.oid)
204
+ end
205
+ def test_tree_node6
206
+ src = '
207
+ 198 0 obj
208
+ <<
209
+ /S /Standard#20#28Web#29
210
+ /C /Standard#20#28Web#29
211
+ /Pg 11 0 R
212
+ /P 346 0 R
213
+ /K [ 13 << /Type /MCR /Pg 21 0 R /MCID 0 >> ]
214
+ >>
215
+ endobj
216
+ '
217
+ node = Rpdf2txt::TreeNode.new(src)
218
+ assert_equal(198, node.oid)
219
+ end
220
+ def test_tree_node7
221
+ src = '
222
+ 345 0 obj
223
+ <<
224
+ /S /Standard
225
+ /C /Standard
226
+ /Pg 111 0 R
227
+ /K 17
228
+ /P 346 0 R
229
+ >>
230
+ endobj'
231
+ node = Rpdf2txt::TreeNode.new(src)
232
+ assert_equal(345, node.oid)
233
+ end
234
+ def test_tree_node8
235
+ src = '
236
+ 346 0 obj
237
+ <<
238
+ /S /Sect
239
+ /P 396 0 R
240
+ /K [ 347 0 R 143 0 R 144 0 R 352 0 R 149 0 R 150 0 R 151 0 R 153 0 R 154 0 R
241
+ 155 0 R 156 0 R 157 0 R 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R
242
+ 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R
243
+ 171 0 R 172 0 R 173 0 R 174 0 R 175 0 R 176 0 R 177 0 R 178 0 R
244
+ 179 0 R 180 0 R 181 0 R 182 0 R 183 0 R 184 0 R 185 0 R 186 0 R
245
+ 187 0 R 188 0 R 189 0 R 190 0 R 191 0 R 192 0 R 193 0 R 194 0 R
246
+ 195 0 R 196 0 R 197 0 R 198 0 R 199 0 R 200 0 R 201 0 R 202 0 R
247
+ 203 0 R 204 0 R 205 0 R 206 0 R 207 0 R 208 0 R 209 0 R 210 0 R
248
+ 211 0 R 212 0 R 213 0 R 214 0 R 215 0 R 216 0 R 217 0 R 218 0 R
249
+ 219 0 R 220 0 R 221 0 R 222 0 R 223 0 R 224 0 R 225 0 R 226 0 R
250
+ 227 0 R 228 0 R 229 0 R 230 0 R 231 0 R 232 0 R 233 0 R 234 0 R
251
+ 235 0 R 236 0 R 237 0 R 238 0 R 239 0 R 240 0 R 241 0 R 242 0 R
252
+ 243 0 R 244 0 R 245 0 R 246 0 R 247 0 R 248 0 R 249 0 R 250 0 R
253
+ 251 0 R 252 0 R 253 0 R 254 0 R 255 0 R 256 0 R 257 0 R 258 0 R
254
+ 259 0 R 260 0 R 261 0 R 262 0 R 263 0 R 264 0 R 265 0 R 266 0 R
255
+ 267 0 R 268 0 R 269 0 R 270 0 R 271 0 R 272 0 R 273 0 R 274 0 R
256
+ 275 0 R 276 0 R 277 0 R 278 0 R 279 0 R 280 0 R 281 0 R 282 0 R
257
+ 283 0 R 284 0 R 285 0 R 286 0 R 287 0 R 288 0 R 289 0 R 290 0 R
258
+ 291 0 R 292 0 R 293 0 R 294 0 R 295 0 R 296 0 R 297 0 R 298 0 R
259
+ 299 0 R 300 0 R 301 0 R 302 0 R 303 0 R 304 0 R 305 0 R 306 0 R
260
+ 307 0 R 308 0 R 309 0 R 310 0 R 311 0 R 312 0 R 313 0 R 314 0 R
261
+ 315 0 R 316 0 R 317 0 R 318 0 R 319 0 R 320 0 R 321 0 R 322 0 R
262
+ 324 0 R 325 0 R 326 0 R 327 0 R 328 0 R 329 0 R 330 0 R 331 0 R
263
+ 332 0 R 333 0 R 334 0 R 335 0 R 336 0 R 337 0 R 338 0 R 339 0 R
264
+ 340 0 R 360 0 R 344 0 R 345 0 R ]
265
+ >>
266
+ endobj'
267
+ node = Rpdf2txt::TreeNode.new(src)
268
+ assert_equal(346, node.oid)
269
+ end
270
+ def test_tree_node9
271
+ src = '
272
+ 346 0 obj
273
+ <<
274
+ /S /Sect
275
+ /P 396 0 R
276
+ /K [
277
+ 155 0 R 156 0 R 157 0 R 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R
278
+ 155 0 R 156 0 R 157 0 R 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R
279
+ 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R
280
+ 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R
281
+ 190 ]
282
+ >>
283
+ endobj'
284
+ node = Rpdf2txt::TreeNode.new(src)
285
+ assert_equal(346, node.oid)
286
+ end
287
+ def test_tree_node10
288
+ src = '
289
+ 198 0 obj
290
+ <<
291
+ /S /Standard#20#28Web#29
292
+ /C /Standard#20#28Web#29
293
+ /Pg 11 0 R
294
+ /P 346 0 R
295
+ /K [ ]
296
+ >>
297
+ endobj'
298
+ node = Rpdf2txt::TreeNode.new(src)
299
+ assert_equal(198, node.oid)
300
+ end
301
+ def test_extract_oids
302
+ input = '6 0 R'
303
+ assert_equal(@tree_node.extract_oids(input), [6])
304
+ input = ["7 0 R", "8 0 R"]
305
+ assert_equal(@tree_node.extract_oids(input), [7,8])
306
+ end
307
+ def test_parse_content_from_complex_attributes
308
+ src = <<-ENDOFSRC
309
+ 46 0 obj
310
+ <<
311
+ /Type /Page
312
+ /Parent 543 0 R
313
+ /Resources << /Font << /F2 575 0 R /T1_0 504 0 R /F4 573 0 R /T1_4 512 0 R /T1_3 511 0 R >>
314
+ /Shading << /S12 508 0 R >> /XObject << /Im4 51 0 R >> /ExtGState 47 0 R
315
+ /ProcSet [ /PDF /Text /ImageB ] /ColorSpace 534 0 R >>
316
+ /Contents 48 0 R
317
+ /BleedBox [ 0 0 651 898 ]
318
+ /MediaBox [ 0 0 651 898 ]
319
+ /TrimBox [ 28 28 623 870 ]
320
+ /CropBox [ 28 28 623 870 ]
321
+ /ArtBox [ 28 28 623 870 ]
322
+ /LastModified (D:20021210105029+01')
323
+ /Rotate 0
324
+ >>
325
+ endobj
326
+ ENDOFSRC
327
+ obj = Rpdf2txt::PdfObject.new(src)
328
+ attributes = obj.attributes
329
+ assert_equal(Hash, attributes.class)
330
+ assert_equal(11, attributes.size)
331
+ assert_equal(0, obj.revision_id)
332
+ end
333
+ def test_parse_content_from_complex_attributes2
334
+ src = <<-ENDOFSRC
335
+ 568 0 obj
336
+ <<
337
+ /Linearized 1
338
+ /O 570
339
+ /H [ 1049 1249 ]
340
+ /L 910845
341
+ /E 169588
342
+ /N 108
343
+ /T 899366
344
+ >>
345
+ endobj
346
+
347
+ ENDOFSRC
348
+ obj = Rpdf2txt::PdfObject.new(src)
349
+ attributes = obj.attributes
350
+ assert_equal(Hash, attributes.class)
351
+ assert_equal(7, attributes.size)
352
+ end
353
+ def test_parse_pantone
354
+ src = <<-ENDOFSRC
355
+ 2 0 obj
356
+ <<
357
+ /JT 150 0 R
358
+ /AGFA_NORN_V (ES15.101 V03)
359
+ /AGFA_PSE_V (Apogee Norm PSE 1.1 23 )
360
+ /AGFA_CMYKCCN << /PANTONE#20379#20CV [ 0.08501 0 0.60001 0 ] /PANTONE#20192#20CV [ 0 0.94 0.64999 0 ]
361
+ /PANTONE#20199#20CV [ 0 1 0.64999 0 ] /PANTONE#20383#20CV [ 0.185 0 1 0.185 ]
362
+ /PANTONE#20375#20CV [ 0.42999 0 0.78999 0 ] /PANTONE#20100#20CV [ 0 0 0.50999 0 ]
363
+ /PANTONE#20281#20CV [ 1 0.72 0 0.38 ] /PANTONE#20185#20CV [ 0 0.91 0.75999 0 ]
364
+ /PANTONE#20377#20CV [ 0.42999 0 1 0.235 ] /PANTONE#203015#20CV [ 1 0.235 0 0.185 ]
365
+ /PANTONE#20195#20CV [ 0 0.75999 0.56 0.56 ] /PANTONE#20381#20CV [ 0.185 0 0.91 0 ]
366
+ /PANTONE#20Cl#20Gy#207#20CV [ 0 0 0 0.47 ] /PANTONE#20137#20CV [ 0 0.34 0.91 0 ]
367
+ /PANTONE#20397#20CV [ 0.11501 0 1 0.11501 ] /PANTONE#20322#20CV [ 1 0 0.38 0.30499 ]
368
+ /PANTONE#20382#20CV [ 0.30499 0 0.94 0 ] /PANTONE#20376#20CV [ 0.56 0 1 0 ] >>
369
+ /Type /Catalog
370
+ /Pages 55 0 R
371
+ /Outlines 15 1 R
372
+ >>
373
+ endobj
374
+ ENDOFSRC
375
+ obj = Rpdf2txt::PdfObject.new(src)
376
+ attributes = obj.attributes
377
+ assert_equal(Hash, attributes.class)
378
+ assert_equal(7, attributes.size)
379
+ end
380
+ def test_parse_escaped
381
+ src =
382
+ '<<
383
+ /O (foo\\)
384
+ >>'
385
+ obj = nil
386
+ assert_nothing_raised {
387
+ obj = Rpdf2txt::PdfObject.new(src)
388
+ }
389
+ assert_equal({:o => 'foo\\'}, obj.attributes)
390
+ end
391
+ def test_parse_limits
392
+ src = <<-EOS
393
+ 31 0 obj
394
+ <<
395
+ /Limits [ <FEFF00530077006900730073006D0065006400690063002E006A006F0062006F
396
+ 007000740069006F006E0073> <FEFF00530077006900730073006D0065006400690063002E006A006F0062006F
397
+ 007000740069006F006E0073> ]
398
+ /Names [ <FEFF00530077006900730073006D0065006400690063002E006A006F0062006F
399
+ 007000740069006F006E0073> 141 0 R ]
400
+ >>
401
+ endobj
402
+ EOS
403
+ obj = nil
404
+ assert_nothing_raised {
405
+ obj = Rpdf2txt::PdfObject.new(src)
406
+ }
407
+ expected = {
408
+ :names => ["<FEFF00530077006900730073006D0065006400690063002E006A006F0062006F\n007000740069006F006E0073>",
409
+ "141 0 R"],
410
+ :limits => ["<FEFF00530077006900730073006D0065006400690063002E006A006F0062006F\n007000740069006F006E0073>",
411
+ "<FEFF00530077006900730073006D0065006400690063002E006A006F0062006F\n007000740069006F006E0073>"]}
412
+ assert_equal expected, obj.attributes
413
+ end
414
+ end
415
+ class TestText < Test::Unit::TestCase
416
+ def test_get_font
417
+ font_src = <<-EOS
418
+ 580 0 obj
419
+ <<
420
+ /Type /Font
421
+ /Subtype /Type1
422
+ /FirstChar 32
423
+ /LastChar 240
424
+ /Widths [ 278 389 500 556 556 1000 722 278 333 333 556 600 278 389 278 278
425
+ 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500
426
+ 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778
427
+ 556 778 611 556 556 722 667 1000 667 667 556 389 278 389 600 500
428
+ 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611
429
+ 611 611 389 444 389 611 556 889 556 556 500 333 222 333 600 278
430
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611
431
+ 0 0 0 0 556 556 0 0 0 0 0 800 0 0 0 278 0 0 278 600 278 278 0 611
432
+ 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0
433
+ 0 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
434
+ 0 0 0 0 0 278 ]
435
+ /Encoding /MacRomanEncoding
436
+ /BaseFont /Frutiger-BoldItalic
437
+ /FontDescriptor 579 0 R
438
+ >>
439
+ endobj
440
+ EOS
441
+ page_src = <<-EOS
442
+ 570 0 obj
443
+ <<
444
+ /Type /Page
445
+ /Parent 540 0 R
446
+ /Resources 571 0 R
447
+ /Contents 576 0 R
448
+ /BleedBox [ 0 0 651 898 ]
449
+ /MediaBox [ 0 0 651 898 ]
450
+ /TrimBox [ 28 28 623 870 ]
451
+ /CropBox [ 28 28 623 870 ]
452
+ /ArtBox [ 28 28 623 870 ]
453
+ /Rotate 0
454
+ >>
455
+ endobj
456
+ EOS
457
+ rsrc_src = <<-EOS
458
+ 571 0 obj
459
+ <<
460
+ /ProcSet [ /PDF /Text /ImageC ]
461
+ /Font << /F1 580 0 R /F2 575 0 R /F3 578 0 R /F4 573 0 R >>
462
+ /XObject << /Im1 587 0 R >>
463
+ /ExtGState << /GS2 585 0 R /GS3 584 0 R >>
464
+ /Shading << /Sh1 590 0 R >>
465
+ >>
466
+ endobj
467
+ EOS
468
+ font = Rpdf2txt::Font.new(font_src)
469
+ page = Rpdf2txt::PageLeaf.new(page_src)
470
+ rsrc = Rpdf2txt::Resource.new(rsrc_src)
471
+ page.build_tree({580=>font,571=>rsrc})
472
+ text = Rpdf2txt::Text.new("(Hello World)")
473
+ text.current_page = page
474
+ get_font = text.get_font("F1")
475
+ assert_equal(Rpdf2txt::Font, get_font.class)
476
+ assert_equal(font, get_font)
477
+ assert_equal(true, font.bold?)
478
+ assert_equal(true, font.italic?)
479
+ assert_equal("/Frutiger-BoldItalic", font.basefont_name)
480
+ end
481
+ def test_font_no_width
482
+ font_src = <<-EOS
483
+ 327 0 obj
484
+ <<
485
+ /Type /Font
486
+ /Subtype /Type1
487
+ /Encoding 370 0 R
488
+ /BaseFont /Symbol
489
+ >>
490
+ endobj
491
+ EOS
492
+ page_src = <<-EOS
493
+ 10 0 obj
494
+ <<
495
+ /Type /Page
496
+ /Parent 390 0 R
497
+ /Resources 11 0 R
498
+ /Contents 12 0 R
499
+ /MediaBox [ 0 0 595 841 ]
500
+ /CropBox [ 0 0 595 841 ]
501
+ /Rotate 0
502
+ >>
503
+ endobj
504
+ EOS
505
+ rsrc_src = <<-EOS
506
+ 11 0 obj
507
+ <<
508
+ /ProcSet [ /PDF /Text ]
509
+ /Font << /F1 416 0 R /F2 408 0 R /F4 410 0 R /F6 325 0 R /F8 327 0 R >>
510
+ /ExtGState << /GS1 422 0 R >>
511
+ >>
512
+ endobj
513
+ EOS
514
+ txt_src = <<-EOS
515
+ BT
516
+ /F8 1 Tf
517
+ (Hello World) Tj
518
+ ET
519
+ EOS
520
+ font = Rpdf2txt::Font.new(font_src)
521
+ page = Rpdf2txt::PageLeaf.new(page_src)
522
+ rsrc = Rpdf2txt::Resource.new(rsrc_src)
523
+ page.build_tree({327=>font,11=>rsrc})
524
+ text = Rpdf2txt::Text.new(txt_src)
525
+ text.current_page = page
526
+ assert_nothing_raised {
527
+ text.scan
528
+ }
529
+ end
530
+ end
531
+ class TestEncrypt < Test::Unit::TestCase
532
+ def setup
533
+ file = File.expand_path('./data/encrypt_string', File.dirname(__FILE__))
534
+ src_encrypt_obj = File.read(file)
535
+ @encrypt = Rpdf2txt::PdfEncrypt.new(src_encrypt_obj)
536
+ @encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18'
537
+ end
538
+ def test_decrypt
539
+ file = File.expand_path('./data/working_obj', File.dirname(__FILE__))
540
+ input = File.read(file)
541
+ pdf_obj = Rpdf2txt::Stream.new(input)
542
+ assert_equal("dc08b36009e48618f99c", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
543
+ #if the stream could be inflated, the decryption is ok!
544
+ assert_nothing_raised{
545
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
546
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
547
+ }
548
+ end
549
+ def test_decrypt2
550
+ file = File.expand_path('./data/90_obj', File.dirname(__FILE__))
551
+ input = File.read(file)
552
+ pdf_obj = Rpdf2txt::Stream.new(input)
553
+ assert_equal("7617ca1ac5babcf09cdf", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
554
+ #if the stream could be inflated, the decryption is ok!
555
+ assert_nothing_raised{
556
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
557
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
558
+ }
559
+ end
560
+ def test_decrypt3
561
+ file = File.expand_path('./data/working_obj2', File.dirname(__FILE__))
562
+ input = File.read(file)
563
+ pdf_obj = Rpdf2txt::Stream.new(input)
564
+ assert_equal("a9a666959bd64a96551b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
565
+ #if the stream could be inflated, the decryption is ok!
566
+ assert_nothing_raised{
567
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
568
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
569
+ }
570
+ end
571
+ def test_decrypt5
572
+ file = File.expand_path('./data/458_obj', File.dirname(__FILE__))
573
+ input = File.read(file)
574
+ pdf_obj = Rpdf2txt::Stream.new(input)
575
+ #assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
576
+ #if the stream could be inflated, the decryption is ok!
577
+ assert_nothing_raised{
578
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
579
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
580
+ }
581
+ end
582
+ def test_decrypt6
583
+ file = File.expand_path('./data/450_obj', File.dirname(__FILE__))
584
+ input = File.read(file)
585
+ pdf_obj = Rpdf2txt::Stream.new(input)
586
+ #assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
587
+ #if the stream could be inflated, the decryption is ok!
588
+ assert_nothing_raised{
589
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
590
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
591
+ }
592
+ end
593
+ def test_decrypt7
594
+ file = File.expand_path('./data/465_obj', File.dirname(__FILE__))
595
+ input = File.read(file)
596
+ pdf_obj = Rpdf2txt::Stream.new(input)
597
+ #assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
598
+ #if the stream could be inflated, the decryption is ok!
599
+ assert_nothing_raised{
600
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
601
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
602
+ }
603
+ end
604
+ def test_decrypt_key
605
+ file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
606
+ src = File.read(file)
607
+ #byte position important! do not indent these lines!!!
608
+ obj_src = <<-EOS
609
+ 473 0 obj
610
+ << /N 3 /Alternate /DeviceRGB /Length 2575 /Filter /FlateDecode >>
611
+ endobj
612
+ EOS
613
+ pdf_obj = Rpdf2txt::Stream.new(obj_src)
614
+ encrypt = Rpdf2txt::PdfEncrypt.new(src)
615
+ encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18'
616
+ #puts encrypt.decrypt_key(pdf_obj)
617
+ assert_equal("dc08b36009e48618f99c", encrypt.decrypt_key(pdf_obj).unpack('h*').first)
618
+ end
619
+ def test_inflate_obj
620
+ file = File.expand_path('./data/90_obj_comp', File.dirname(__FILE__))
621
+ input = File.read(file)
622
+ input = [input].pack('H*')
623
+ # puts input
624
+ assert_nothing_raised{
625
+ Zlib::Inflate.inflate(input)
626
+ # Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
627
+ }
628
+ end
629
+ def test_parse_encrypt
630
+ file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
631
+ src = File.read(file)
632
+ encrypt = Rpdf2txt::PdfEncrypt.new(src)
633
+ encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18'
634
+ assert_equal("00ecc7a7bf8d68c564a21b98258b1dbff2aaf8d24bfdbaa74a9a073467d896b6", encrypt.user_key.unpack("H*").first)
635
+ assert_equal("2055c756c72e1ad702608e8196acad447ad32d17cff583235f6dd15fed7dab67", encrypt.owner_key.unpack("H*").first)
636
+ assert_nothing_raised{
637
+ encrypt.encryption_key
638
+ }
639
+ end
640
+ def test_endianess
641
+ file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
642
+ src = File.read(file)
643
+ encrypt = Rpdf2txt::PdfEncrypt.new(src)
644
+ encrypt.big_endian?
645
+ end
646
+ end
647
+ class TestEncrypt128bit < Test::Unit::TestCase
648
+ def setup
649
+ file = File.expand_path('./data/encrypt_string_128bit',
650
+ File.dirname(__FILE__))
651
+ src_encrypt_obj = File.read(file)
652
+ @encrypt = Rpdf2txt::PdfEncrypt.new(src_encrypt_obj)
653
+ @encrypt.file_id = 'D816A5E838D50653C19DB62504229EB6'
654
+ end
655
+ def test_decrypt8
656
+ file = File.expand_path('./data/3392_obj', File.dirname(__FILE__))
657
+ input = File.read(file)
658
+ pdf_obj = Rpdf2txt::Stream.new(input)
659
+ #if the stream could be inflated, the decryption is ok!
660
+ assert_nothing_raised{
661
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
662
+ Zlib::Inflate.inflate(@encrypt.decrypt(pdf_obj))
663
+ }
664
+ end
665
+ end
666
+ class TestTrailer < Test::Unit::TestCase
667
+ def test_parse_trail
668
+ src ='
669
+ trailer
670
+ <<
671
+ /Size 476
672
+ /Info 388 0 R
673
+ /Encrypt 395 0 R
674
+ /Root 394 0 R
675
+ /Prev 203754
676
+ /ID[<8664e6986751f2a49dccc9a4b40a4f18v><e720b2184372f5e3f4edd86673b81dfd>]
677
+ >>
678
+ startxref'
679
+ expected = "8664e6986751f2a49dccc9a4b40a4f18v"
680
+ node = Rpdf2txt::TrailerDictionary.new(src)
681
+ assert_equal(expected, node.file_id)
682
+ end
683
+ end
684
+ class TestFont < Test::Unit::TestCase
685
+ def test_encoding
686
+ src = <<-EOS
687
+ 580 0 obj
688
+ <<
689
+ /Type /Font
690
+ /Subtype /Type1
691
+ /FirstChar 32
692
+ /LastChar 240
693
+ /Widths [ 278 389 500 556 556 1000 722 278 333 333 556 600 278 389 278 278
694
+ 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500
695
+ 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778
696
+ 556 778 611 556 556 722 667 1000 667 667 556 389 278 389 600 500
697
+ 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611
698
+ 611 611 389 444 389 611 556 889 556 556 500 333 222 333 600 278
699
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611
700
+ 0 0 0 0 556 556 0 0 0 0 0 800 0 0 0 278 0 0 278 600 278 278 0 611
701
+ 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0
702
+ 0 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
703
+ 0 0 0 0 0 278 ]
704
+ /Encoding /MacRomanEncoding
705
+ /BaseFont /Frutiger-BoldItalic
706
+ /FontDescriptor 579 0 R
707
+ >>
708
+ endobj
709
+ EOS
710
+ font = Rpdf2txt::Font.new(src)
711
+ assert_equal("mac", font.encoding)
712
+ end
713
+ def test_width
714
+ src = <<-EOS
715
+ 580 0 obj
716
+ <<
717
+ /Type /Font
718
+ /Subtype /Type1
719
+ /FirstChar 32
720
+ /LastChar 240
721
+ /Widths [ 278 389 500 556 556 1000 722 278 333 333 556 600 278 389 278 278
722
+ 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500
723
+ 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778
724
+ 556 778 611 556 556 722 667 1000 667 667 556 389 278 389 600 500
725
+ 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611
726
+ 611 611 389 444 389 611 556 889 556 556 500 333 222 333 600 278
727
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611
728
+ 0 0 0 0 556 556 0 0 0 0 0 800 0 0 0 278 0 0 278 600 278 278 0 611
729
+ 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0
730
+ 0 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
731
+ 0 0 0 0 0 278 ]
732
+ /Encoding /MacRomanEncoding
733
+ /BaseFont /Frutiger-BoldItalic
734
+ /FontDescriptor 579 0 R
735
+ >>
736
+ endobj
737
+ EOS
738
+ font = Rpdf2txt::Font.new(src)
739
+ assert_equal(278, font.width(' '))
740
+ assert_equal(556, font.width('a'))
741
+ end
742
+ def test_width__builtin
743
+ src = <<-EOS
744
+ 580 0 obj
745
+ <<
746
+ /Type /Font
747
+ /Subtype /Type1
748
+ /FirstChar 32
749
+ /LastChar 240
750
+ /Encoding /MacRomanEncoding
751
+ /BaseFont /Symbol
752
+ /FontDescriptor 579 0 R
753
+ >>
754
+ endobj
755
+ EOS
756
+ font = Rpdf2txt::Font.new(src)
757
+ assert_equal(250, font.width(' '))
758
+ assert_equal(763, font.width(70))
759
+ assert_equal(631, font.width('alpha'))
760
+ end
761
+ def test_width__differences
762
+ encoding = <<-EOS
763
+ 252 0 obj
764
+ <<
765
+ /Type /Encoding
766
+ /Differences [ 1 /space /beta /alpha ]
767
+ >>
768
+ endobj
769
+ EOS
770
+ font = <<-EOS
771
+ 219 0 obj
772
+ <<
773
+ /Type /Font
774
+ /Subtype /Type1
775
+ /Encoding 252 0 R
776
+ /BaseFont /Symbol
777
+ /ToUnicode 253 0 R
778
+ >>
779
+ endobj
780
+ EOS
781
+ tounicode = <<-EOS
782
+ 253 0 obj
783
+ << /Filter /FlateDecode /Length 227 >>
784
+ stream
785
+ H�TP�n� ���-��[�"���%�8I�am!a\��9)#v���^Ʒљ���0�b����=*�W�w��J���� @�x:��vt��a �37�8M�������~D�ѸN_��'��-� �� ��� ��bn7i�y�5nA*�ҭ�"C/���#�C1/��=��:&2ѕ{���+D��
786
+ �ɞ�b_��L��s𺐚��4�; >�P�_Eo�
787
+ endstream
788
+ endobj
789
+ EOS
790
+ font = Rpdf2txt::Font.new(font)
791
+ font.attributes[:encoding] = Rpdf2txt::Encoding.new(encoding)
792
+ font.attributes[:to_unicode] = Rpdf2txt::Stream.new(tounicode).to_cmap
793
+ assert_equal(631, font.width('alpha'))
794
+ assert_equal(631, font.width(3))
795
+ end
796
+ end
797
+ class TestPageLeaf < Test::Unit::TestCase
798
+ def test_text_cm
799
+ stream = Stream.new
800
+ stream.decoded_stream = <<-'EOS'
801
+ q
802
+ 1 0 0 -1 70.866 841.89 cm
803
+ 0 J
804
+ 1 1 1 RG
805
+ q
806
+ -1.5 -1.5 m
807
+ 455.043 -1.5 l
808
+ 452.043 1.5 l
809
+ 1.5 1.5 l
810
+ W*
811
+ n
812
+ -0.5 0 m
813
+ 454.043 0 l
814
+ S
815
+ Q
816
+ q
817
+ 455.043 -1.5 m
818
+ 455.043 32.5 l
819
+ 452.043 29.5 l
820
+ 452.043 1.5 l
821
+ W*
822
+ n
823
+ 453.543 -0.5 m
824
+ 453.543 31.5 l
825
+ S
826
+ Q
827
+ q
828
+ 455.043 32.5 m
829
+ -1.5 32.5 l
830
+ 1.5 29.5 l
831
+ 452.043 29.5 l
832
+ W*
833
+ n
834
+ 454.043 31 m
835
+ -0.5 31 l
836
+ S
837
+ Q
838
+ q
839
+ -1.5 32.5 m
840
+ -1.5 -1.5 l
841
+ 1.5 1.5 l
842
+ 1.5 29.5 l
843
+ W*
844
+ n
845
+ 0 31.5 m
846
+ 0 -0.5 l
847
+ S
848
+ Q
849
+ 1 0 0 1 0.5 31.5 cm
850
+ 0 0 0 rg
851
+ BT
852
+ /F0 8 Tf
853
+ 1 0 0 -1 232.336 7.573 Tm
854
+ [(1. position: 7.573 offset: 31.5)] TJ
855
+ ET
856
+ 1 0 0 1 -0.5 -0.5 cm
857
+ q
858
+ 455.043 -1.5 m
859
+ 455.043 12.1 l
860
+ 452.043 9.1 l
861
+ 452.043 1.5 l
862
+ W*
863
+ n
864
+ 453.543 -0.5 m
865
+ 453.543 11.1 l
866
+ S
867
+ Q
868
+ q
869
+ 455.043 12.1 m
870
+ -1.5 12.1 l
871
+ 1.5 9.1 l
872
+ 452.043 9.1 l
873
+ W*
874
+ n
875
+ 454.043 10.6 m
876
+ -0.5 10.6 l
877
+ S
878
+ Q
879
+ q
880
+ -1.5 12.1 m
881
+ -1.5 -1.5 l
882
+ 1.5 1.5 l
883
+ 1.5 9.1 l
884
+ W*
885
+ n
886
+ 0 11.1 m
887
+ 0 -0.5 l
888
+ S
889
+ Q
890
+ 1 0 0 1 0 39.866 cm
891
+ BT
892
+ /F1 16 Tf
893
+ 1 0 0 -1 0 14.347 Tm
894
+ (2. position: 14.347 offset: 39.866) Tj
895
+ 0 0 0 RG
896
+ ET
897
+ 0 30.173 m
898
+ 453.543 30.173 l S
899
+ 1 0 0 1 0 32.2 cm
900
+ BT
901
+ /F2 11 Tf
902
+ 1 0 0 -1 314.813 10.413 Tm
903
+ (3. position: 10.413 offset: 32.2) Tj
904
+ ET
905
+ 1 0 0 1 0 -32.2 cm
906
+ 0 46.7 m
907
+ 453.543 46.7 l
908
+ S
909
+ BT
910
+ /F2 8 Tf
911
+ 1 0 0 -1 0 62.573 Tm
912
+ (4. position: 62.573 offset:-32.2) Tj
913
+ /F3 9 Tf
914
+ 0 -14.547 Td
915
+ (5. moved by: -14.547) Tj
916
+ /F2 8 Tf
917
+ 0 -15.853 Td
918
+ (6. moved by: -15.853) Tj
919
+ ET
920
+ EOS
921
+ page = PageLeaf.new
922
+ page.resources = Resource.new
923
+ handler = SimpleHandler.new
924
+ page.contents = [stream]
925
+ page.text(handler)
926
+ ## a+b a-b
927
+ # 1. 39.073 -23.927
928
+ # 2. 54.213 -25.519
929
+ # 3. 42.613 -21.787
930
+ ## fonts
931
+ # 1. F0 8
932
+ # 2. F1 16
933
+ # 3. F2 11
934
+ ## a+b+f a+b-f a-b-f a-b+f
935
+ # 1. 47.073 31.073 -31.927 -15.926
936
+ # 2. 70.213 38.213 -41.519 - 9.519
937
+ # 3. 53.613 31.613 -32.787 -10.787
938
+ ## a+f a-f
939
+ # 1. 15.573 -0.427
940
+ # 2. 30.347 -1.653
941
+ # 3. 21.413 -0.587
942
+
943
+ ## 5 hrs -> 5 x newline
944
+ expected = <<-EOS
945
+ 1. position: 7.573 offset: 31.5
946
+ 2. position: 14.347 offset: 39.866
947
+ 3. position: 10.413 offset: 32.2
948
+ 4. position: 62.573 offset:-32.2
949
+ 5. moved by: -14.547
950
+ 6. moved by: -15.853
951
+ EOS
952
+ assert_equal(expected.strip, handler.out.strip)
953
+ end
954
+ def test_text__fixed_double_lead_bug
955
+ stream = Stream.new
956
+ stream.decoded_stream = <<-'EOS'
957
+ q
958
+ 1 i
959
+ 0.059998 34.407 618 -34.5 re
960
+ W* n
961
+ 0 864.567 617.94 -864.54 re
962
+ W* n
963
+ /GS1 gs
964
+ q
965
+ 324.71994 0 0 25.199999 -0.720012 10.166975 cm
966
+ /Im112 Do
967
+ Q
968
+ Q
969
+ q
970
+ 1 i
971
+ 617.04 11.127 0.89996 0.23999 re
972
+ W n
973
+ /GS1 gs
974
+ q
975
+ 1.44 0 0 0.24 617.039978 11.126974 cm
976
+ /Im17 Do
977
+ Q
978
+ Q
979
+ q
980
+ 1 i
981
+ 0.059998 34.407 618 -34.5 re
982
+ W* n
983
+ 0 864.567 617.94 -864.54 re
984
+ W* n
985
+ /GS1 gs
986
+ q
987
+ 1.44 0 0 0.24 0.239988 10.886974 cm
988
+ /Im18 Do
989
+ Q
990
+ q
991
+ 27.359999 0 0 0.24 295.679962 10.886974 cm
992
+ /Im16 Do
993
+ Q
994
+ Q
995
+ q
996
+ 1 i
997
+ 617.28 10.887 0.65997 0.24002 re
998
+ W n
999
+ /GS1 gs
1000
+ q
1001
+ 0.96 0 0 0.24 617.279968 10.886974 cm
1002
+ /Im14 Do
1003
+ Q
1004
+ Q
1005
+ q
1006
+ 1 i
1007
+ 0.059998 34.407 618 -34.5 re
1008
+ W* n
1009
+ 0 864.567 617.94 -864.54 re
1010
+ W* n
1011
+ /GS1 gs
1012
+ q
1013
+ 597.599976 0 0 12.719999 10.319989 -0.873026 cm
1014
+ /Im113 Do
1015
+ Q
1016
+ Q
1017
+ q
1018
+ 1 i
1019
+ 11.28 0.026978 0.47998 0.059998 re
1020
+ W n
1021
+ /GS1 gs
1022
+ q
1023
+ 0.48 0 0 0.24 11.279988 -0.153026 cm
1024
+ /Im2 Do
1025
+ Q
1026
+ Q
1027
+ q
1028
+ 1 i
1029
+ 606.48 0.026978 0.47998 0.059998 re
1030
+ W n
1031
+ /GS1 gs
1032
+ q
1033
+ 0.48 0 0 0.24 606.47998 -0.153026 cm
1034
+ /Im2 Do
1035
+ Q
1036
+ Q
1037
+ q
1038
+ 1 i
1039
+ 0 864.567 617.94 -50.94 re
1040
+ W* n
1041
+ /GS1 gs
1042
+ q
1043
+ 608.399963 0 0 13.200012 -0.960012 852.326965 cm
1044
+ /Im93 Do
1045
+ Q
1046
+ Q
1047
+ q
1048
+ 1 i
1049
+ 616.08 853.287 1.86 0.23999 re
1050
+ W n
1051
+ /GS1 gs
1052
+ q
1053
+ 1.92 0 0 0.24 616.079956 853.286987 cm
1054
+ /Im44 Do
1055
+ Q
1056
+ Q
1057
+ q
1058
+ 1 i
1059
+ 0 864.567 617.94 -50.94 re
1060
+ W* n
1061
+ /GS1 gs
1062
+ q
1063
+ 1.92 0 0 0.24 -0.000012 853.046936 cm
1064
+ /Im87 Do
1065
+ Q
1066
+ q
1067
+ 240 0 0 0.24 7.439988 853.046936 cm
1068
+ /Im85 Do
1069
+ Q
1070
+ Q
1071
+ q
1072
+ 1 i
1073
+ 615.84 853.047 2.1 0.24005 re
1074
+ W n
1075
+ /GS1 gs
1076
+ q
1077
+ 2.4 0 0 0.24 615.839966 853.046936 cm
1078
+ /Im59 Do
1079
+ Q
1080
+ Q
1081
+ q
1082
+ 1 i
1083
+ 0 864.567 617.94 -50.94 re
1084
+ W* n
1085
+ /GS1 gs
1086
+ q
1087
+ 241.920013 0 0 41.040039 6.479988 812.966919 cm
1088
+ /Im88 Do
1089
+ Q
1090
+ q
1091
+ 572.639954 0 0 0.24 39.359989 813.686951 cm
1092
+ /Im109 Do
1093
+ Q
1094
+ q
1095
+ 572.639954 0 0 0.24 39.359989 813.44696 cm
1096
+ /Im109 Do
1097
+ Q
1098
+ Q
1099
+ /GS1 gs
1100
+ BT
1101
+ /F1 1 Tf
1102
+ 10.02 0 0 10.02 48.24 821.187 Tm
1103
+ 0 g
1104
+ -0.0006 Tc
1105
+ -0.002 Tw
1106
+ [(Arzneimittel Nachrichten )5.9(/ M�dicamen)5.6(t)-0.8(s )]TJ
1107
+ /F2 1 Tf
1108
+ 7.02 0 0 7.02 87.9 24.987 Tm
1109
+ 0.0023 Tc
1110
+ 0.0017 Tw
1111
+ [(S)6.6(w)6.2(iss)6.7(m)2.4(ed)6.5(ic)10.4( Jo)6.5(u)6.5(r)-1.9(n)6.5(a)11.3(l 03)11.3(/200)11.3(6)11.3( )]TJ
1112
+ 1 g
1113
+ 30.6752 0 TD
1114
+ -0.0004 Tc
1115
+ 0 Tw
1116
+ (226)Tj
1117
+ 0 g
1118
+ 1.6667 0 TD
1119
+ 0 Tc
1120
+ ( )Tj
1121
+ /F1 1 Tf
1122
+ 11.52 0 0 11.52 96.42 773.3669 Tm
1123
+ -0.0006 Tc
1124
+ -0.0014 Tw
1125
+ [(Autorisa)3.3(tion d�un m�dicament co)6.2(ntenant un)6.2( nouveau principe actif: )]TJ
1126
+ 0 -1.125 TD
1127
+ 0 Tc
1128
+ 0 Tw
1129
+ (M)Tj
1130
+ /F2 1 Tf
1131
+ 10.02 0 0 10.02 96.42 742.527 Tm
1132
+ 0 Tc
1133
+ 0 Tw
1134
+ ( )Tj
1135
+ /F1 1 Tf
1136
+ 0 -1.1976 TD
1137
+ -0.0002 Tc
1138
+ 0.1054 Tw
1139
+ [(En f�vrier 2)4.9(006, la pr�p)6(aration Mac)6.7(ugen)6(�)-2.6(, une )]TJ
1140
+ T*
1141
+ -0.0004 Tc
1142
+ 0.1116 Tw
1143
+ [(solution injectable conte)4.7(n)-0.2(ant un nou)5.8(veau prin)5.8(-)]TJ
1144
+ T*
1145
+ -0.0002 Tc
1146
+ 0.1893 Tw
1147
+ [(cipe actif, le pegapta)4.9(n)0(ib, a �t� autor)5.6(i)2.4(s�e dans)6.7( )]TJ
1148
+ T*
1149
+ -0.0005 Tc
1150
+ 0.1357 Tw
1151
+ [(l�indication suivante )137.8(: � )137.8(T)4.6(r)-0.7(aitement )6(d)5.7(e)-1.4( la form)8.3(e )]TJ
1152
+ T*
1153
+ -0.0007 Tc
1154
+ 0.3514 Tw
1155
+ [(n�ovasc)6.2(u)-0.5(laire \(h)5.5(umide\) )5.9(de la d)5.5(�)-1.6(g�n�rescence )]TJ
1156
+ T*
1157
+ -0.0019 Tw
1158
+ (maculaire li�e � l��ge�. )Tj
1159
+ T*
1160
+ -0.0001 Tc
1161
+ 0.0395 Tw
1162
+ [(La dos)6.8(e auto)6.1(ris�e de 0,3 )47.9(mg de pegaptanib doit)5.7( )]TJ
1163
+ T*
1164
+ -0.0003 Tc
1165
+ 0.4409 Tw
1166
+ [(�tre administr�e par )-6(i)8.2(n)-0.1(jection )-6(int)5.5(r)-0.5(avitr�)4.8(enne)4.8( )]TJ
1167
+ T*
1168
+ -0.0005 Tc
1169
+ -0.002 Tw
1170
+ [(toutes les six semaines \(9 )6(injections par an\). )]TJ
1171
+ T*
1172
+ 0 Tc
1173
+ 0 Tw
1174
+ ( )Tj
1175
+ /F2 1 Tf
1176
+ T*
1177
+ -0.001 Tc
1178
+ 0.4595 Tw
1179
+ [(Le )6(pegaptanib sodique est)4.8( un oli)7.5(gonucl�ide )]TJ
1180
+ T*
1181
+ -0.0014 Tc
1182
+ 0.0767 Tw
1183
+ [(modifi� p�gy)-4.4(l� qui )6(se lie � l�isoforme VEGF)]TJ
1184
+ -21.4132 -1.1976 TD
1185
+ -0.001 Tc
1186
+ 0.3158 Tw
1187
+ [(facteur)4.8( de c)5.9(r)-1.2(oissance de l�endoth�li)7.5(um vascu-)]TJ
1188
+ T*
1189
+ -0.0004 Tc
1190
+ 0.1595 Tw
1191
+ [(laire \(VEGF\) )6(et inhibe so)5.8(n activit�. Le VEGF est)5.4( )]TJ
1192
+ T*
1193
+ -0.0006 Tc
1194
+ 0.1957 Tw
1195
+ [(une prot)5.2(�in)5.6(e)-1.5( qui induit une angiog)5.6(en�se, un)5.6(e )]TJ
1196
+ T*
1197
+ -0.0009 Tc
1198
+ 0.4295 Tw
1199
+ [(perm�abilit� v)-3.9(a)-1.8(sculaire )6(et une inflammation. )]TJ
1200
+ T*
1201
+ ( )Tj
1202
+ ET
1203
+ EOS
1204
+ page = PageLeaf.new
1205
+ page.resources = Resource.new
1206
+ handler = SimpleHandler.new
1207
+ page.contents = [stream]
1208
+ page.text(handler)
1209
+ expected = <<-EOS.strip
1210
+ Arzneimittel Nachrichten / M\351dicaments
1211
+ Autorisation d\222un m\351dicament contenant un nouveau principe actif:
1212
+ M
1213
+
1214
+ En f\351vrier 2006, la pr\351paration Macugen\256, une
1215
+ solution injectable contenant un nouveau prin-
1216
+ cipe actif, le pegaptanib, a \351t\351 autoris\351e dans
1217
+ l\222indication suivante : \253 Traitement de la forme
1218
+ n\351ovasculaire (humide) de la d\351g\351n\351rescence
1219
+ maculaire li\351e \340 l\222\342ge\273.
1220
+ La dose autoris\351e de 0,3 mg de pegaptanib doit
1221
+ \352tre administr\351e par injection intravitr\351enne
1222
+ toutes les six semaines (9 injections par an).
1223
+
1224
+ Le pegaptanib sodique est un oligonucl\351ide
1225
+ modifi\351 p\351gyl\351 qui se lie \340 l\222isoforme VEGF
1226
+ facteur de croissance de l\222endoth\351lium vascu-
1227
+ laire (VEGF) et inhibe son activit\351. Le VEGF est
1228
+ une prot\351ine qui induit une angiogen\350se, une
1229
+ perm\351abilit\351 vasculaire et une inflammation.
1230
+
1231
+ Swissmedic Journal 03/2006 226
1232
+ EOS
1233
+ result = handler.out.strip
1234
+ =begin
1235
+ [expected.size, result.size].max.times do |idx|
1236
+ unless result[idx] == expected[idx]
1237
+ flunk "unexpected result: (#{result[idx]}/#{expected[idx]} at #{idx}) ...#{expected[idx-10,20].inspect}..."
1238
+ end
1239
+ end
1240
+ =end
1241
+ assert_equal(expected, result)
1242
+ end
1243
+ def test_text_landscape
1244
+ stream = Stream.new
1245
+ stream.decoded_stream = <<-'EOS'
1246
+ /GS1 gs
1247
+ BT
1248
+ /TT2 1 Tf
1249
+ 0 14.0053 -13.9999 0 59.64 43.2505 Tm
1250
+ /Cs6 cs 0 0 0 scn
1251
+ -0.0002 Tc
1252
+ 0.0008 Tw
1253
+ (Zuzahlungsbefreite Arzneimittel nach � 31 Abs. 3 Satz 4 SGB V)Tj
1254
+ /TT4 1 Tf
1255
+ 0 9.0035 -9 0 117 176.4505 Tm
1256
+ 0.0009 Tc
1257
+ 0 Tw
1258
+ (PZN)Tj
1259
+ -14.7942 0 TD
1260
+ -0.0016 Tc
1261
+ [(Arzneimit)-3.7(t)-3.7(e)1.4(lname)]TJ
1262
+ 59.8165 0 TD
1263
+ 0.0016 Tc
1264
+ [(D)4(a)11.3(rrei)10.5(c)1.8(hu)4.6(n)11.3(g)-2(sf)6.2(orm)]TJ
1265
+ -39.9843 0 TD
1266
+ -0.0013 Tc
1267
+ [(He)8.4(rst)-10(e)8.4(l)-5.7(l)7.6(e)-4.9(r)]TJ
1268
+ 52.2861 0 TD
1269
+ 0.0001 Tc
1270
+ [(Apo)9.8(t)-8.6(h)9.8(e)-3.5(ke)9.8(nverka)9.8(ufspre)9.8(is)]TJ
1271
+ 3.1321 -1.14 TD
1272
+ -0.0006 Tc
1273
+ 0.0027 Tw
1274
+ [( in)-4.2(kl)8.3(.)-9.3(M)-0.6(w)21.8(S)0(t)]TJ
1275
+ ET
1276
+ 129.3 42.531 1.98 751.68 re
1277
+ f
1278
+ BT
1279
+ 0 9.0035 -9 0 117 519.6505 Tm
1280
+ -0.0017 Tc
1281
+ 0 Tw
1282
+ (Packungs-)Tj
1283
+ 1.0662 -1.14 TD
1284
+ -0.0022 Tc
1285
+ [(gr��)-4.4(e)]TJ
1286
+ -20.6119 1.14 TD
1287
+ 0 Tc
1288
+ [(Wirkstoff)-8.7(\()-0.2(e)9.7(\))-5598(Wirkst�rke)]TJ
1289
+ ET
1290
+ q
1291
+ 1 i
1292
+ 108.9 440.091 9.96 53.46 re
1293
+ W n
1294
+ BT
1295
+ 0 9.0035 -9 0 117 482.5705 Tm
1296
+ (\()Tj
1297
+ ET
1298
+ Q
1299
+ BT
1300
+ 0 9.0035 -9 0 117 485.5705 Tm
1301
+ (n)Tj
1302
+ ET
1303
+ q
1304
+ 1 i
1305
+ 108.9 440.091 9.96 53.46 re
1306
+ W n
1307
+ BT
1308
+ 0 9.0035 -9 0 117 490.5505 Tm
1309
+ (\))Tj
1310
+ ET
1311
+ Q
1312
+ BT
1313
+ /TT2 1 Tf
1314
+ 0 14.0053 -13.9999 0 79.5 43.2505 Tm
1315
+ -0.0008 Tc
1316
+ (Produktstand)Tj
1317
+ 0 -1.2129 TD
1318
+ -0.0001 Tc
1319
+ 0.0007 Tw
1320
+ (sortiert nach Arzneimittelname)Tj
1321
+ 7.2915 1.2129 TD
1322
+ 0.0001 Tc
1323
+ 0 Tw
1324
+ [(01)-94.9(.)-231.9(0)-0.7(8)-35(.)-0.5(2009)]TJ
1325
+ /TT4 1 Tf
1326
+ 0 8.003 -7.9999 0 144.36 176.4505 Tm
1327
+ -0.0014 Tc
1328
+ (4000741)Tj
1329
+ -16.6438 0 TD
1330
+ -0.0006 Tc
1331
+ 0.0002 Tw
1332
+ (ABSEAMED 10000I.E./1ML)Tj
1333
+ 67.3249 0 TD
1334
+ -0.0008 Tc
1335
+ 0 Tw
1336
+ [(Fertigspritzen)-12242.4(611,53)]TJ
1337
+ -45.0132 0 TD
1338
+ 0.0045 Tc
1339
+ -0.0049 Tw
1340
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-23621.1(6)-1.6(X1)-1298.7(m)5.3(l)]TJ
1341
+ 15.2268 0 TD
1342
+ -0.0006 Tc
1343
+ 0.0077 Tw
1344
+ [(E)-8.4(p)0.8(o)8.3(e)0.8(t)-7.7(i)-3.3(n)8.3( alf)7.3(a)-8118.7(10000)-831.4(I.E.)]TJ
1345
+ ET
1346
+ 169.56 42.531 0.48 748.26 re
1347
+ f
1348
+ BT
1349
+ 0 8.003 -7.9999 0 178.32 176.4505 Tm
1350
+ -0.0014 Tc
1351
+ 0 Tw
1352
+ (4000646)Tj
1353
+ -16.6438 0 TD
1354
+ -0.0006 Tc
1355
+ 0.0002 Tw
1356
+ (ABSEAMED 1000I.E./0.5ML)Tj
1357
+ 67.3249 0 TD
1358
+ 0 Tw
1359
+ [(Fertigspritzen)-12519.7(64,20)]TJ
1360
+ -45.0132 0 TD
1361
+ 0.0045 Tc
1362
+ -0.0049 Tw
1363
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(5)-1313.6(m)5.3(l)]TJ
1364
+ 15.2268 0 TD
1365
+ -0.0005 Tc
1366
+ 0.0076 Tw
1367
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(1000)-831.3(I.E.)]TJ
1368
+ ET
1369
+ 203.46 42.531 0.54001 748.26 re
1370
+ f
1371
+ BT
1372
+ 0 8.003 -7.9999 0 212.34 176.4505 Tm
1373
+ -0.0014 Tc
1374
+ 0 Tw
1375
+ (4000652)Tj
1376
+ -16.6438 0 TD
1377
+ -0.0006 Tc
1378
+ 0.0002 Tw
1379
+ (ABSEAMED 2000I.E./1ML)Tj
1380
+ 67.3249 0 TD
1381
+ -0.0008 Tc
1382
+ 0 Tw
1383
+ [(Fertigspritzen)-12242.4(119,04)]TJ
1384
+ -45.0132 0 TD
1385
+ 0.0045 Tc
1386
+ -0.0049 Tw
1387
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-23621.1(6)-1.6(X1)-1298.7(m)5.3(l)]TJ
1388
+ 15.2268 0 TD
1389
+ -0.0005 Tc
1390
+ 0.0076 Tw
1391
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(2000)-831.3(I.E.)]TJ
1392
+ ET
1393
+ 237.48 42.531 0.53999 748.26 re
1394
+ f
1395
+ BT
1396
+ 0 8.003 -7.9999 0 246.36 176.4505 Tm
1397
+ -0.0014 Tc
1398
+ 0 Tw
1399
+ (4000669)Tj
1400
+ -16.6438 0 TD
1401
+ -0.0006 Tc
1402
+ 0.0002 Tw
1403
+ (ABSEAMED 3000I.E./0.3ML)Tj
1404
+ 67.3249 0 TD
1405
+ -0.0008 Tc
1406
+ 0 Tw
1407
+ [(Fertigspritzen)-12242.4(173,94)]TJ
1408
+ -45.0132 0 TD
1409
+ 0.0045 Tc
1410
+ -0.0049 Tw
1411
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(3)-1313.6(m)5.3(l)]TJ
1412
+ 15.2268 0 TD
1413
+ -0.0005 Tc
1414
+ 0.0076 Tw
1415
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(3000)-831.3(I.E.)]TJ
1416
+ ET
1417
+ 271.56 42.531 0.48001 748.26 re
1418
+ f
1419
+ BT
1420
+ 0 8.003 -7.9999 0 280.32 176.4505 Tm
1421
+ -0.0014 Tc
1422
+ 0 Tw
1423
+ (4000681)Tj
1424
+ -16.6438 0 TD
1425
+ -0.0006 Tc
1426
+ 0.0002 Tw
1427
+ (ABSEAMED 4000I.E./0.4ML)Tj
1428
+ 67.3249 0 TD
1429
+ -0.0008 Tc
1430
+ 0 Tw
1431
+ [(Fertigspritzen)-12242.4(228,83)]TJ
1432
+ -45.0132 0 TD
1433
+ 0.0045 Tc
1434
+ -0.0049 Tw
1435
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(4)-1313.6(m)5.3(l)]TJ
1436
+ 15.2268 0 TD
1437
+ -0.0005 Tc
1438
+ 0.0076 Tw
1439
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(4000)-831.3(I.E.)]TJ
1440
+ ET
1441
+ 305.46 42.531 0.53998 748.26 re
1442
+ f
1443
+ BT
1444
+ 0 8.003 -7.9999 0 314.34 176.4505 Tm
1445
+ -0.0014 Tc
1446
+ 0 Tw
1447
+ (4000698)Tj
1448
+ -16.6438 0 TD
1449
+ -0.0006 Tc
1450
+ 0.0002 Tw
1451
+ (ABSEAMED 5000I.E./0.5ML)Tj
1452
+ 67.3249 0 TD
1453
+ -0.0008 Tc
1454
+ 0 Tw
1455
+ [(Fertigspritzen)-12242.4(283,70)]TJ
1456
+ -45.0132 0 TD
1457
+ 0.0045 Tc
1458
+ -0.0049 Tw
1459
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(5)-1313.6(m)5.3(l)]TJ
1460
+ 15.2268 0 TD
1461
+ -0.0005 Tc
1462
+ 0.0076 Tw
1463
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(5000)-831.3(I.E.)]TJ
1464
+ ET
1465
+ 339.48 42.531 0.54001 748.26 re
1466
+ f
1467
+ BT
1468
+ 0 8.003 -7.9999 0 348.36 176.4505 Tm
1469
+ -0.0014 Tc
1470
+ 0 Tw
1471
+ (4000729)Tj
1472
+ -16.6438 0 TD
1473
+ -0.0006 Tc
1474
+ 0.0002 Tw
1475
+ (ABSEAMED 6000I.E./0.6ML)Tj
1476
+ 67.3249 0 TD
1477
+ -0.0008 Tc
1478
+ 0 Tw
1479
+ [(Fertigspritzen)-12242.4(338,57)]TJ
1480
+ -45.0132 0 TD
1481
+ 0.0045 Tc
1482
+ -0.0049 Tw
1483
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(6)-1313.6(m)5.3(l)]TJ
1484
+ 15.2268 0 TD
1485
+ -0.0005 Tc
1486
+ 0.0076 Tw
1487
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(6000)-831.3(I.E.)]TJ
1488
+ ET
1489
+ 373.56 42.531 0.47998 748.26 re
1490
+ f
1491
+ BT
1492
+ 0 8.003 -7.9999 0 382.32 176.4505 Tm
1493
+ -0.0014 Tc
1494
+ 0 Tw
1495
+ (4000735)Tj
1496
+ -16.6438 0 TD
1497
+ -0.0006 Tc
1498
+ 0.0002 Tw
1499
+ (ABSEAMED 8000I.E./0.8ML)Tj
1500
+ 67.3249 0 TD
1501
+ -0.0008 Tc
1502
+ 0 Tw
1503
+ [(Fertigspritzen)-12242.4(448,34)]TJ
1504
+ -45.0132 0 TD
1505
+ 0.0045 Tc
1506
+ -0.0049 Tw
1507
+ [(M)12.8(E)-3.3(D)14.4(I)-2.6(C)6.9(E)4.2( AR)6.9(Z)15.6(N)6.9(.G)17.6(MB)11.7(H&)11.7(CO)17.6(.K)11.7(G)-22781.4(6)5.9(X0)5.9(.)4.9(8)-1313.6(m)5.3(l)]TJ
1508
+ 15.2268 0 TD
1509
+ -0.0005 Tc
1510
+ 0.0076 Tw
1511
+ [(E)-8.3(p)0.9(o)8.4(e)0.9(t)-7.6(i)-3.2(n)8.4( alf)7.4(a)-8673.4(8000)-831.3(I.E.)]TJ
1512
+ ET
1513
+ 407.46 42.531 0.54001 748.26 re
1514
+ f
1515
+ BT
1516
+ 0 8.003 -7.9999 0 416.34 176.4505 Tm
1517
+ -0.0014 Tc
1518
+ 0 Tw
1519
+ (3867219)Tj
1520
+ -16.6438 0 TD
1521
+ -0.0016 Tc
1522
+ 0.0012 Tw
1523
+ (ACC 200)Tj
1524
+ 67.3249 0 TD
1525
+ -0.0008 Tc
1526
+ 0 Tw
1527
+ [(Brausetabletten)-11575.1(12,74)]TJ
1528
+ -45.0132 0 TD
1529
+ 0.0003 Tc
1530
+ 0.0068 Tw
1531
+ [(H)10.2(E)-7.5(XAL)9.2( AG)]TJ
1532
+ 38.2882 0 TD
1533
+ -0.0006 Tc
1534
+ 0 Tw
1535
+ [(50)-1333.7(St)]TJ
1536
+ -23.0614 0 TD
1537
+ -0.0021 Tc
1538
+ [(A)-9.9(c)3.1(ety)10.6(l)-4.9(c)-11.9(y)10.6(st)-9.2(e)6.8(i)-4.8(n)-8690(2)-8.2(00)-825.4(mg)]TJ
1539
+ ET
1540
+ 441.48 42.531 0.54001 748.26 re
1541
+ f
1542
+ BT
1543
+ 0 8.003 -7.9999 0 450.36 176.4505 Tm
1544
+ -0.0014 Tc
1545
+ (3867225)Tj
1546
+ -16.6438 0 TD
1547
+ -0.0016 Tc
1548
+ 0.0012 Tw
1549
+ (ACC 200)Tj
1550
+ 67.3249 0 TD
1551
+ -0.0008 Tc
1552
+ 0 Tw
1553
+ [(Brausetabletten)-11575.1(15,42)]TJ
1554
+ -45.0132 0 TD
1555
+ 0.0003 Tc
1556
+ 0.0068 Tw
1557
+ [(H)10.2(E)-7.5(XAL)9.2( AG)]TJ
1558
+ 37.7335 0 TD
1559
+ -0.0007 Tc
1560
+ 0 Tw
1561
+ [(100)-1333.8(St)]TJ
1562
+ -22.5066 0 TD
1563
+ -0.0021 Tc
1564
+ [(A)-9.9(c)3.1(ety)10.6(l)-4.9(c)-11.9(y)10.6(st)-9.2(e)6.8(i)-4.8(n)-8690(2)-8.2(00)-825.4(mg)]TJ
1565
+ ET
1566
+ 475.56 42.531 0.47998 748.26 re
1567
+ f
1568
+ BT
1569
+ 0 8.003 -7.9999 0 484.32 176.4505 Tm
1570
+ -0.0014 Tc
1571
+ (4789763)Tj
1572
+ -16.6438 0 TD
1573
+ -0.0016 Tc
1574
+ 0.0012 Tw
1575
+ (ACC 200)Tj
1576
+ 67.3249 0 TD
1577
+ -0.0008 Tc
1578
+ 0 Tw
1579
+ [(Brausetabletten)-11575.1(11,01)]TJ
1580
+ -45.0132 0 TD
1581
+ 0.0003 Tc
1582
+ 0.0068 Tw
1583
+ [(H)10.2(E)-7.5(XAL)9.2( AG)]TJ
1584
+ 38.2882 0 TD
1585
+ -0.0006 Tc
1586
+ 0 Tw
1587
+ [(20)-1333.7(St)]TJ
1588
+ -23.0614 0 TD
1589
+ -0.0021 Tc
1590
+ [(A)-9.9(c)3.1(ety)10.6(l)-4.9(c)-11.9(y)10.6(st)-9.2(e)6.8(i)-4.8(n)-8690(2)-8.2(00)-825.4(mg)]TJ
1591
+ ET
1592
+ 509.46 42.531 0.53998 748.26 re
1593
+ f
1594
+ BT
1595
+ 0 8.003 -7.9999 0 548.8199 376.6105 Tm
1596
+ -0.0007 Tc
1597
+ -0.0072 Tw
1598
+ [(Seite 1)-6.8( v)-10.5(o)0.7(n)8.2( )-15(10)8.2(83)]TJ
1599
+ ET
1600
+ EOS
1601
+ page = PageLeaf.new
1602
+ page.attributes.store :rotate, '90'
1603
+ page.resources = Resource.new
1604
+ handler = SimpleHandler.new
1605
+ page.contents = [stream]
1606
+ page.text(handler)
1607
+ expected = <<-EOS
1608
+ Zuzahlungsbefreite Arzneimittel nach \247 31 Abs. 3 Satz 4 SGB V
1609
+ Produktstand 01.08.2009
1610
+ sortiert nach Arzneimittelname
1611
+ Arzneimittelname PZN Hersteller Wirkstoff(e) Wirkst�rke (n) Packungs- Darreichungsform Apothekenverkaufspreis
1612
+ gr��e inkl.MwSt
1613
+ ABSEAMED 10000I.E./1ML 4000741 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 10000 I.E.6X1 ml Fertigspritzen 611,53
1614
+ ABSEAMED 1000I.E./0.5ML 4000646 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 1000 6I.E.X0.5 ml Fertigspritzen 64,20
1615
+ ABSEAMED 2000I.E./1ML 4000652 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 2000 I.E.6X1 ml Fertigspritzen 119,04
1616
+ ABSEAMED 3000I.E./0.3ML 4000669 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 3000 6I.E.X0.3 ml Fertigspritzen 173,94
1617
+ ABSEAMED 4000I.E./0.4ML 4000681 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 4000 6I.E.X0.4 ml Fertigspritzen 228,83
1618
+ ABSEAMED 5000I.E./0.5ML 4000698 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 5000 6I.E.X0.5 ml Fertigspritzen 283,70
1619
+ ABSEAMED 6000I.E./0.6ML 4000729 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 6000 6I.E.X0.6 ml Fertigspritzen 338,57
1620
+ ABSEAMED 8000I.E./0.8ML 4000735 MEDICE ARZN.GMBH&CO.KG Epoetin alfa 8000 6I.E.X0.8 ml Fertigspritzen 448,34
1621
+ ACC 200 3867219 HEXAL AG Acetylcystein 200 mg 50 St Brausetabletten 12,74
1622
+ ACC 200 3867225 HEXAL AG Acetylcystein 200 mg 100 St Brausetabletten 15,42
1623
+ ACC 200 4789763 HEXAL AG Acetylcystein 200 mg 20 St Brausetabletten 11,01
1624
+ Seite 1 von 1083
1625
+ EOS
1626
+ assert_equal(expected.strip, handler.out.strip)
1627
+ end
1628
+ def test_text_kerning_bug
1629
+ stream = Stream.new
1630
+ path = File.expand_path('data/stream_kerning_bug.txt',
1631
+ File.dirname(__FILE__))
1632
+ stream.decoded_stream = File.read path
1633
+ page = PageLeaf.new
1634
+ page.attributes.store :rotate, '90'
1635
+ page.resources = Resource.new
1636
+ handler = SimpleHandler.new
1637
+ page.contents = [stream]
1638
+ page.text(handler)
1639
+ expected = "RATIOPHARM GMBH 20 St"
1640
+ assert_equal(expected.strip, handler.out.strip)
1641
+ end
1642
+ def test_text_kerning_bug2
1643
+ stream = Stream.new
1644
+ path = File.expand_path('data/stream_kerning_bug2.txt',
1645
+ File.dirname(__FILE__))
1646
+ stream.decoded_stream = File.read path
1647
+ page = PageLeaf.new
1648
+ page.attributes.store :rotate, '90'
1649
+ page.resources = Resource.new
1650
+ handler = SimpleHandler.new
1651
+ page.contents = [stream]
1652
+ page.text(handler)
1653
+ expected = "HEUMANN PH GMBH&CO. KG 20 St"
1654
+ assert_equal(expected.strip, handler.out.strip)
1655
+ end
1656
+ =begin
1657
+ def test_text_space_bug2
1658
+ stream = Stream.new
1659
+ path = File.expand_path('data/space_bug_stream2.txt',
1660
+ File.dirname(__FILE__))
1661
+ fontsrc15 = <<-EOS
1662
+ 327 0 obj
1663
+ EOS
1664
+ font15 = Font.new(fontsrc15)
1665
+ stream.decoded_stream = File.read path
1666
+ page = PageLeaf.new
1667
+ page.resources = resource = Resource.new
1668
+ resource.instance_variable_get('@fonts').store(:r15, font15)
1669
+ handler = SimpleHandler.new
1670
+ page.contents = [stream]
1671
+ page.text(handler)
1672
+ expected = "Inhalt / Table des mati\303\250res"
1673
+ assert_equal(expected.strip, handler.out.strip[0,28])
1674
+ expected = '10 mg, 20 mg und 40 mg'
1675
+ assert_equal(expected.strip, handler.out.strip[346,22])
1676
+ end
1677
+ =end
1678
+ end
1679
+ class TestEncoding < Test::Unit::TestCase
1680
+ def setup
1681
+ src = <<-EOS
1682
+ 252 0 obj
1683
+ <<
1684
+ /Type /Encoding
1685
+ /Differences [ 1 /space /beta /alpha ]
1686
+ >>
1687
+ endobj
1688
+ EOS
1689
+ @encoding = Rpdf2txt::Encoding.new(src)
1690
+ end
1691
+ def test_differences
1692
+ expected = {
1693
+ 1 => 'space',
1694
+ 2 => 'beta',
1695
+ 3 => 'alpha',
1696
+ }
1697
+ assert_equal(expected, @encoding.differences)
1698
+ end
1699
+ def test_convert_symbol
1700
+ txt = "\003"
1701
+ assert_equal("a", @encoding.convert_symbol(txt))
1702
+ assert_equal("\003", txt)
1703
+ end
1704
+ end
1705
+ class TestImage < Test::Unit::TestCase
1706
+ def test_png
1707
+ path = File.expand_path('data/png.pdfobj', File.dirname(__FILE__))
1708
+ src = File.read(path)
1709
+ obj = Image.new(src)
1710
+ assert_nothing_raised { obj.image }
1711
+ path = File.expand_path('data/logo.png', File.dirname(__FILE__))
1712
+ good, = Magick::Image.read path
1713
+ assert_equal(good, obj.image)
1714
+ end
1715
+ def test_indexed
1716
+ path = File.expand_path('data/index.pdfobj', File.dirname(__FILE__))
1717
+ src = File.read(path)
1718
+ index = Stream.new(src)
1719
+ path = File.expand_path('data/indexed.pdfobj', File.dirname(__FILE__))
1720
+ src = File.read(path)
1721
+ obj = Image.new(src)
1722
+ obj.build_tree(51 => index)
1723
+ assert_nothing_raised { obj.image }
1724
+ path = File.expand_path('data/pdf_50.png', File.dirname(__FILE__))
1725
+ good, = Magick::Image.read path
1726
+ assert_equal(good, obj.image)
1727
+ end
1728
+ def test_indexed_2bit
1729
+ path = File.expand_path('data/index_2bit.pdfobj', File.dirname(__FILE__))
1730
+ src = File.read(path)
1731
+ index = Stream.new(src)
1732
+ path = File.expand_path('data/indexed_2bit.pdfobj', File.dirname(__FILE__))
1733
+ src = File.read(path)
1734
+ obj = Image.new(src)
1735
+ obj.build_tree(21 => index)
1736
+ assert_nothing_raised { obj.image }
1737
+ path = File.expand_path('data/pdf_20.png', File.dirname(__FILE__))
1738
+ good, = Magick::Image.read path
1739
+ assert_equal(good, obj.image)
1740
+ end
1741
+ def test_indexed_masked
1742
+ path = File.expand_path('data/index_masked.pdfobj', File.dirname(__FILE__))
1743
+ src = File.read(path)
1744
+ index = Stream.new(src)
1745
+ path = File.expand_path('data/indexed_masked.pdfobj', File.dirname(__FILE__))
1746
+ src = File.read(path)
1747
+ obj = Image.new(src)
1748
+ obj.build_tree(21 => index)
1749
+ assert_nothing_raised { obj.image }
1750
+ path = File.expand_path('data/pdf_21.png', File.dirname(__FILE__))
1751
+ good, = Magick::Image.read path
1752
+ assert_equal(good, obj.image)
1753
+ end
1754
+ def test_lzw_decode ## from the PDF-Manual
1755
+ data = "\x80\x0B\x60\x50\x22\x0C\x0C\x85\x01"
1756
+ stream = Stream.new(data)
1757
+ expected = "-----A---B"
1758
+ assert_equal(expected, stream.lzw_decode(data))
1759
+ end
1760
+ def test_lzw_image
1761
+ path = File.expand_path('data/lzw_index.pdfobj', File.dirname(__FILE__))
1762
+ src = File.read(path)
1763
+ index = Stream.new(src)
1764
+ path = File.expand_path('data/lzw.pdfobj', File.dirname(__FILE__))
1765
+ src = File.read(path)
1766
+ obj = Image.new(src)
1767
+ obj.build_tree(21 => index)
1768
+ assert_nothing_raised { obj.image }
1769
+ path = File.expand_path('data/pdf_22.png', File.dirname(__FILE__))
1770
+ good, = Magick::Image.read path
1771
+ assert_equal(good, obj.image)
1772
+ end
1773
+ end
1774
+ class TestInlineImage < Test::Unit::TestCase
1775
+ def test_inline_img
1776
+ attrs = <<-EOS
1777
+ /W 113
1778
+ /CS /DeviceGray
1779
+ /BPC 8
1780
+ /DP << /Predictor 15
1781
+ /Columns 113
1782
+ >>
1783
+ /F /Fl
1784
+ /H 1
1785
+
1786
+ EOS
1787
+ data = "x\234cd\2407\000\000\000\344\000\002"
1788
+ obj = InlineImage.new(attrs, data)
1789
+ assert_nothing_raised { obj.image }
1790
+ path = File.expand_path('data/inline.png', File.dirname(__FILE__))
1791
+ good = Magick::Image.read path
1792
+ tmp_path = Tempfile.new('test').path + '.png'
1793
+ obj.image.write tmp_path
1794
+ tmp = Magick::Image.read tmp_path
1795
+ assert_equal(good, tmp)
1796
+ rescue StandardError => e
1797
+ p e
1798
+ ensure
1799
+ File.delete tmp_path if File.exist? tmp_path
1800
+ end
1801
+ end
1802
+ end