rpdf2txt 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/LICENCE +515 -0
- data/Manifest.txt +126 -0
- data/README.txt +30 -0
- data/Rakefile +24 -0
- data/bin/rpdf2txt +58 -0
- data/config.save +12 -0
- data/install.rb +1098 -0
- data/lib/rpdf2txt-rockit/base_extensions.rb +73 -0
- data/lib/rpdf2txt-rockit/bootstrap.rb +120 -0
- data/lib/rpdf2txt-rockit/bounded_lru_cache.rb +43 -0
- data/lib/rpdf2txt-rockit/conflict_resolution.rb +302 -0
- data/lib/rpdf2txt-rockit/directed_graph.rb +401 -0
- data/lib/rpdf2txt-rockit/glr_parser.rb +393 -0
- data/lib/rpdf2txt-rockit/grammar.rb +644 -0
- data/lib/rpdf2txt-rockit/graphdrawing.rb +107 -0
- data/lib/rpdf2txt-rockit/graphviz_dot.rb +63 -0
- data/lib/rpdf2txt-rockit/indexable.rb +53 -0
- data/lib/rpdf2txt-rockit/lalr_parsetable_generator.rb +144 -0
- data/lib/rpdf2txt-rockit/parse_table.rb +273 -0
- data/lib/rpdf2txt-rockit/parsetable_generation.rb +164 -0
- data/lib/rpdf2txt-rockit/parsing_ambiguities.rb +84 -0
- data/lib/rpdf2txt-rockit/profiler.rb +168 -0
- data/lib/rpdf2txt-rockit/reduce_actions_generator.rb +523 -0
- data/lib/rpdf2txt-rockit/rockit.rb +76 -0
- data/lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb +187 -0
- data/lib/rpdf2txt-rockit/rockit_grammars_parser.rb +126 -0
- data/lib/rpdf2txt-rockit/sourcecode_dumpable.rb +181 -0
- data/lib/rpdf2txt-rockit/stringscanner.rb +54 -0
- data/lib/rpdf2txt-rockit/syntax_tree.rb +452 -0
- data/lib/rpdf2txt-rockit/token.rb +364 -0
- data/lib/rpdf2txt-rockit/version.rb +3 -0
- data/lib/rpdf2txt/attributesparser.rb +42 -0
- data/lib/rpdf2txt/cmapparser.rb +65 -0
- data/lib/rpdf2txt/data/_cmap.grammar +11 -0
- data/lib/rpdf2txt/data/_cmap_range.grammar +15 -0
- data/lib/rpdf2txt/data/_pdfattributes.grammar +32 -0
- data/lib/rpdf2txt/data/cmap.grammar +11 -0
- data/lib/rpdf2txt/data/cmap.rb +37 -0
- data/lib/rpdf2txt/data/cmap_range.grammar +15 -0
- data/lib/rpdf2txt/data/cmap_range.rb +43 -0
- data/lib/rpdf2txt/data/fonts/Courier-Bold.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier-BoldOblique.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier-Oblique.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-Bold.afm +2827 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-BoldOblique.afm +2827 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-Oblique.afm +3051 -0
- data/lib/rpdf2txt/data/fonts/Helvetica.afm +3051 -0
- data/lib/rpdf2txt/data/fonts/License-Adobe.txt +65 -0
- data/lib/rpdf2txt/data/fonts/Symbol.afm +213 -0
- data/lib/rpdf2txt/data/fonts/Times-Bold.afm +2588 -0
- data/lib/rpdf2txt/data/fonts/Times-BoldItalic.afm +2384 -0
- data/lib/rpdf2txt/data/fonts/Times-Italic.afm +2667 -0
- data/lib/rpdf2txt/data/fonts/Times-Roman.afm +2419 -0
- data/lib/rpdf2txt/data/fonts/ZapfDingbats.afm +225 -0
- data/lib/rpdf2txt/data/pdfattributes.grammar +32 -0
- data/lib/rpdf2txt/data/pdfattributes.rb +71 -0
- data/lib/rpdf2txt/data/pdftext.grammar +102 -0
- data/lib/rpdf2txt/data/pdftext.rb +146 -0
- data/lib/rpdf2txt/default_handler.rb +352 -0
- data/lib/rpdf2txt/lzw.rb +69 -0
- data/lib/rpdf2txt/object.rb +1114 -0
- data/lib/rpdf2txt/parser.rb +169 -0
- data/lib/rpdf2txt/symbol.rb +408 -0
- data/lib/rpdf2txt/text.rb +182 -0
- data/lib/rpdf2txt/text_state.rb +434 -0
- data/lib/rpdf2txt/textparser.rb +42 -0
- data/test/data/3392_obj +0 -0
- data/test/data/397_decrypted +15 -0
- data/test/data/450_decrypted +153 -0
- data/test/data/450_obj +0 -0
- data/test/data/452_decrypted +125 -0
- data/test/data/454_decrypted +108 -0
- data/test/data/456_decrypted +106 -0
- data/test/data/458_decrypted +111 -0
- data/test/data/458_obj +0 -0
- data/test/data/460_decrypted +118 -0
- data/test/data/460_obj +0 -0
- data/test/data/463_decrypted +117 -0
- data/test/data/465_decrypted +107 -0
- data/test/data/465_obj +0 -0
- data/test/data/90_obj +0 -0
- data/test/data/90_obj_comp +1 -0
- data/test/data/decrypted +0 -0
- data/test/data/encrypt_obj +0 -0
- data/test/data/encrypt_string +0 -0
- data/test/data/encrypt_string_128bit +0 -0
- data/test/data/encrypted_object_stream.pdf +0 -0
- data/test/data/firststream +1 -0
- data/test/data/index.pdfobj +0 -0
- data/test/data/index_2bit.pdfobj +0 -0
- data/test/data/index_masked.pdfobj +0 -0
- data/test/data/indexed.pdfobj +0 -0
- data/test/data/indexed_2bit.pdfobj +0 -0
- data/test/data/indexed_masked.pdfobj +0 -0
- data/test/data/inline.png +0 -0
- data/test/data/logo.png +0 -0
- data/test/data/lzw.pdfobj +0 -0
- data/test/data/lzw_index.pdfobj +0 -0
- data/test/data/page_tree.pdf +148 -0
- data/test/data/pdf_20.png +0 -0
- data/test/data/pdf_21.png +0 -0
- data/test/data/pdf_22.png +0 -0
- data/test/data/pdf_50.png +0 -0
- data/test/data/png.pdfobj +0 -0
- data/test/data/space_bug_stream.txt +119 -0
- data/test/data/stream.txt +292 -0
- data/test/data/stream_kerning_bug.txt +13 -0
- data/test/data/stream_kerning_bug2.txt +6 -0
- data/test/data/test.pdf +0 -0
- data/test/data/test.txt +8 -0
- data/test/data/test_text.txt +42 -0
- data/test/data/working_obj +0 -0
- data/test/data/working_obj2 +0 -0
- data/test/mock.rb +149 -0
- data/test/suite.rb +30 -0
- data/test/test_pdf_object.rb +1802 -0
- data/test/test_pdf_parser.rb +1340 -0
- data/test/test_pdf_text.rb +789 -0
- data/test/test_space_bug_05_2004.rb +87 -0
- data/test/test_stream.rb +194 -0
- data/test/test_text_state.rb +315 -0
- data/usage-en.txt +112 -0
- data/user-stories/UserStories_Rpdf2Txt.txt +34 -0
- data/user-stories/documents/swissmedicjournal/04_2004.pdf +0 -0
- metadata +220 -0
@@ -0,0 +1,1340 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Rpdf2txt -- PDF to Text Parser
|
4
|
+
# Copyright (C) 2003 Andreas Schrafl, Hannes Wyss, Masaomi Hatakeyama
|
5
|
+
#
|
6
|
+
# This library is free software; you can redistribute it and/or
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
8
|
+
# License as published by the Free Software Foundation; either
|
9
|
+
# version 2.1 of the License, or (at your option) any later version.
|
10
|
+
#
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
# Lesser General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
17
|
+
# License along with this library; if not, write to the Free Software
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
+
#
|
20
|
+
# ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
|
21
|
+
# zdavatz@ywesee.com, mhatakeymama@ywesee.com
|
22
|
+
#
|
23
|
+
# TestParser-- Rpdf2txt -- 28.11.2002 -- aschrafl@ywesee.com
|
24
|
+
|
25
|
+
$: << File.expand_path('../lib', File.dirname(__FILE__))
|
26
|
+
$: << File.dirname(__FILE__)
|
27
|
+
|
28
|
+
$KCODE = "UTF8"
|
29
|
+
|
30
|
+
require 'test/unit'
|
31
|
+
require 'rpdf2txt/parser'
|
32
|
+
require 'mock'
|
33
|
+
|
34
|
+
module Rpdf2txt
|
35
|
+
class Parser
|
36
|
+
public :page_tree_root, :build_object, :build_trailer_dictionary, :rebuild_object_catalogue
|
37
|
+
attr_accessor :src, :encrypt_id
|
38
|
+
end
|
39
|
+
class PdfObject
|
40
|
+
attr_reader :attributes, :src
|
41
|
+
# attr_accessor :oid
|
42
|
+
end
|
43
|
+
class TrailerDictionary
|
44
|
+
attr_reader :attributes
|
45
|
+
end
|
46
|
+
class ReferenceArray < TreeNode
|
47
|
+
attr_reader :references, :contents
|
48
|
+
end
|
49
|
+
class PdfArray < TreeNode
|
50
|
+
attr_reader :references, :contents
|
51
|
+
end
|
52
|
+
class PageLeaf < TreeNode
|
53
|
+
public :join_snippets
|
54
|
+
attr_writer :resources
|
55
|
+
end
|
56
|
+
class CatalogNode < TreeNode
|
57
|
+
attr_accessor :pages
|
58
|
+
end
|
59
|
+
end
|
60
|
+
class FontDonorStub
|
61
|
+
attr_reader :attributes
|
62
|
+
def initialize(fonts)
|
63
|
+
@fonts = fonts
|
64
|
+
@attributes = {}
|
65
|
+
end
|
66
|
+
def font(font_name)
|
67
|
+
@fonts[font_name]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
class TestParser < Test::Unit::TestCase
|
72
|
+
class RootDonorStub
|
73
|
+
def root_id
|
74
|
+
end
|
75
|
+
end
|
76
|
+
def setup
|
77
|
+
file = File.expand_path('./data/page_tree.pdf', File.dirname(__FILE__))
|
78
|
+
input = File.read(file)
|
79
|
+
@parser = Rpdf2txt::Parser.new(input)
|
80
|
+
end
|
81
|
+
def test_object_catalogue
|
82
|
+
cat= @parser.object_catalogue
|
83
|
+
assert_equal(Hash, cat.class)
|
84
|
+
assert_equal(16, cat.size)
|
85
|
+
assert_equal(Rpdf2txt::CatalogNode, cat[1].class)
|
86
|
+
assert_equal(Rpdf2txt::PageNode, cat[2].class)
|
87
|
+
assert_equal(Rpdf2txt::PageLeaf, cat[3].class)
|
88
|
+
assert_equal(Rpdf2txt::PageNode, cat[4].class)
|
89
|
+
assert_equal(Rpdf2txt::PageNode, cat[5].class)
|
90
|
+
assert_equal(Rpdf2txt::Stream, cat[6].class)
|
91
|
+
assert_equal(Rpdf2txt::PageNode, cat[7].class)
|
92
|
+
assert_equal(Rpdf2txt::PageNode, cat[8].class)
|
93
|
+
assert_equal(Rpdf2txt::PageLeaf, cat[9].class)
|
94
|
+
assert_equal(Rpdf2txt::PageLeaf, cat[10].class)
|
95
|
+
assert_equal(Rpdf2txt::PageLeaf, cat[11].class)
|
96
|
+
assert_equal(Rpdf2txt::PageLeaf, cat[12].class)
|
97
|
+
assert_equal(Rpdf2txt::PageLeaf, cat[13].class)
|
98
|
+
assert_equal(Rpdf2txt::PageLeaf, cat[14].class)
|
99
|
+
assert_equal(Rpdf2txt::Stream, cat[15].class)
|
100
|
+
assert_equal(Rpdf2txt::Font, cat[16].class)
|
101
|
+
end
|
102
|
+
def test_rebuild_object_catalogue
|
103
|
+
file = File.expand_path('./data/encrypted_object_stream.pdf', File.dirname(__FILE__))
|
104
|
+
input = File.read(file)
|
105
|
+
parser = Rpdf2txt::Parser.new(input)
|
106
|
+
cat = parser.object_catalogue
|
107
|
+
assert_equal(3, cat.length)
|
108
|
+
assert_equal(cat[2545].class, Rpdf2txt::ObjStream)
|
109
|
+
assert_equal(cat[3166].class, Rpdf2txt::TrailerDictionary)
|
110
|
+
assert_equal(cat[2544].class, Rpdf2txt::PdfHash)
|
111
|
+
parser.trailer_dictionary
|
112
|
+
parser.rebuild_object_catalogue
|
113
|
+
assert_equal(4, cat.length)
|
114
|
+
assert_equal(cat[2530].class, Rpdf2txt::PdfHash)
|
115
|
+
end
|
116
|
+
def test_tree_root
|
117
|
+
cat = @parser.object_catalogue
|
118
|
+
assert_equal(cat[1],@parser.page_tree_root)
|
119
|
+
end
|
120
|
+
def test_page_tree
|
121
|
+
tree = @parser.page_tree
|
122
|
+
cat = @parser.object_catalogue
|
123
|
+
assert_equal(cat[1],tree)
|
124
|
+
firstlevel = tree.pages.kids
|
125
|
+
assert_equal(cat[3],firstlevel[0])
|
126
|
+
assert_equal([cat[6]], firstlevel[0].contents)
|
127
|
+
assert_equal(cat[4],firstlevel[1])
|
128
|
+
assert_equal(cat[5],firstlevel[2])
|
129
|
+
assert_raises(NoMethodError){firstlevel[0].kids}
|
130
|
+
secondlevel = firstlevel[1].kids
|
131
|
+
assert_equal(cat[13],secondlevel[0])
|
132
|
+
assert_equal([cat[6]], secondlevel[0].contents)
|
133
|
+
assert_equal(cat[14],secondlevel[1])
|
134
|
+
assert_equal([cat[6]], secondlevel[1].contents)
|
135
|
+
secondlevel = firstlevel[2].kids
|
136
|
+
assert_equal(cat[7],secondlevel[0])
|
137
|
+
assert_equal(cat[8],secondlevel[1])
|
138
|
+
thirdlevel = secondlevel[0].kids
|
139
|
+
assert_equal(cat[9],thirdlevel[0])
|
140
|
+
assert_equal([cat[6]], thirdlevel[0].contents)
|
141
|
+
assert_equal(cat[10],thirdlevel[1])
|
142
|
+
assert_equal([cat[6]], thirdlevel[1].contents)
|
143
|
+
thirdlevel = secondlevel[1].kids
|
144
|
+
assert_equal(cat[11],thirdlevel[0])
|
145
|
+
assert_equal(cat[12],thirdlevel[1])
|
146
|
+
end
|
147
|
+
def test_tree_each
|
148
|
+
expected = [3,13,14,9,10,11,12]
|
149
|
+
assert_equal(expected,@parser.page_tree.collect{ |node| node.oid })
|
150
|
+
end
|
151
|
+
def test_contents
|
152
|
+
expected = Array.new(6, [@parser.object_catalogue[6]])
|
153
|
+
expected.push([@parser.object_catalogue[6],
|
154
|
+
@parser.object_catalogue[15]])
|
155
|
+
result = @parser.page_tree.collect { |node| node.contents }
|
156
|
+
assert_equal(expected, result)
|
157
|
+
end
|
158
|
+
def test_font_token
|
159
|
+
#test was added after we had a parse error in this line
|
160
|
+
# /BaseFont /CKGGCC+TimesNewRoman,Italic
|
161
|
+
#(,Italic was the problem)
|
162
|
+
input = <<-EOS
|
163
|
+
48 0 obj
|
164
|
+
<<
|
165
|
+
/Type /Font
|
166
|
+
/Subtype /Type1
|
167
|
+
/FirstChar 1
|
168
|
+
/LastChar 41
|
169
|
+
/Widths [ 722 444 389 250 333 500 389 278 278 500 500 500 500 444 500 667 389
|
170
|
+
722 278 722 333 333 444 500 333 556 278 250 611 444 500 500 500
|
171
|
+
556 667 333 500 444 500 444 611 ]
|
172
|
+
/Encoding 413 0 R
|
173
|
+
/BaseFont /CKGGCC+TimesNewRoman,Italic
|
174
|
+
/FontDescriptor 395 0 R
|
175
|
+
/ToUnicode 414 0 R
|
176
|
+
>>
|
177
|
+
endobj
|
178
|
+
EOS
|
179
|
+
assert_instance_of(Rpdf2txt::Font, @parser.build_object(input))
|
180
|
+
end
|
181
|
+
def test_resource
|
182
|
+
input = "17 0 obj\n"
|
183
|
+
input << "<< \n"
|
184
|
+
input << "/ProcSet [ /PDF /Text ] \n"
|
185
|
+
input << "/Font << /F2 304 0 R /F4 306 0 R /F6 275 0 R /F8 277 0 R >> \n"
|
186
|
+
input << "/ExtGState << /GS1 318 0 R >> \n"
|
187
|
+
input << ">> \n"
|
188
|
+
input << "endobj\n"
|
189
|
+
input << "1\n"
|
190
|
+
assert_instance_of(Rpdf2txt::Resource, @parser.build_object(input))
|
191
|
+
end
|
192
|
+
def test_reference_array
|
193
|
+
input = <<-END
|
194
|
+
76 0 obj
|
195
|
+
[
|
196
|
+
535 0 R 78 0 R
|
197
|
+
]
|
198
|
+
endobj
|
199
|
+
END
|
200
|
+
obj = @parser.build_object(input)
|
201
|
+
assert_instance_of(Rpdf2txt::ReferenceArray, obj)
|
202
|
+
assert_equal([535, 78], obj.references)
|
203
|
+
end
|
204
|
+
def test_array
|
205
|
+
input = <<-END
|
206
|
+
65 0 obj \n[ \n278 \n0 \n355 \n0 \n0 \n889 \n0 \n191 \n333 \n333 \n0 \n584 \n278 \n333 \n278 \n278 \n556 \n556 \n556 \n556 \n556 \n556 \n556 \n556 \n556 \n556 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n667 \n667 \n722 \n722 \n667 \n611 \n778 \n722 \n278 \n500 \n667 \n556 \n833 \n722 \n778 \n667 \n778 \n722 \n667 \n611 \n722 \n667 \n944 \n667 \n667 \n611 \n0 \n0 \n0 \n0 \n0 \n0 \n556 \n556 \n500 \n556 \n556 \n278 \n556 \n556 \n222 \n222 \n500 \n222 \n833 \n556 \n556 \n556 \n556 \n333 \n500 \n278 \n556 \n500 \n722 \n500 \n500 \n500 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n278 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n556 \n0 \n0 \n0 \n556 \n0 \n0 \n0 \n556 \n556 \n556 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n0 \n556 \n0 \n556 \n0 \n0 \n0 \n0 \n0 \n556 \n] \nendobj \n
|
207
|
+
END
|
208
|
+
obj = @parser.build_object(input)
|
209
|
+
assert_instance_of(Rpdf2txt::PdfArray, obj)
|
210
|
+
assert_equal(221, obj.contents.size)
|
211
|
+
end
|
212
|
+
def test_array__no_trailing_whitespace
|
213
|
+
input = <<-END
|
214
|
+
146 0 obj[278 0 0 0 0 0 0 0 0 0 0 0 0 333 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 722 722 722 0 0 0 0 722 0 0 0 0 833 0 0 667 0 0 667 611 0 0 0 0 0 611 0 0 0 0 0 0 556 611 556 611 556 333 611 611 278 0 556 278 889 611 611 611 0 389 556 333 611 0 0 0 556 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556]\rendobj
|
215
|
+
END
|
216
|
+
obj = @parser.build_object(input)
|
217
|
+
assert_instance_of(Rpdf2txt::PdfArray, obj)
|
218
|
+
assert_equal(197, obj.contents.size)
|
219
|
+
end
|
220
|
+
def test_no_array
|
221
|
+
input = <<-END
|
222
|
+
185 0 obj\r[ \r/CalRGB << /WhitePoint [ 0.9505 1 1.089 ] /Gamma [ 2.22221 2.22221 2.22221 ] \r/Matrix [ 0.4124 0.2126 0.0193 0.3576 0.71519 0.1192 0.1805 0.0722 0.9505 ] >> \r\r]\rendobj\r
|
223
|
+
END
|
224
|
+
obj = nil
|
225
|
+
assert_nothing_raised {
|
226
|
+
obj = @parser.build_object(input)
|
227
|
+
}
|
228
|
+
assert_instance_of(Rpdf2txt::Unknown, obj)
|
229
|
+
end
|
230
|
+
def test_join_snippets__hex_chars
|
231
|
+
input = <<-'EOS'
|
232
|
+
BT
|
233
|
+
/F0 11 Tf
|
234
|
+
1 0 0 -1 0 10.413 Tm
|
235
|
+
(Paroxetin besitzt eine selektive Wirkung; in-vitro Studien haben gezeigt, dass es, im Gegensatz zu) Tj
|
236
|
+
0 -13.2 Td
|
237
|
+
(trizyklischen Antidepressiva, eine geringe Affinit�t f�r ) Tj
|
238
|
+
/F4 11 Tf
|
239
|
+
260.436 0 Td
|
240
|
+
[ -2<012e>] TJ
|
241
|
+
/F0 11 Tf
|
242
|
+
6.384 -2.2 Td
|
243
|
+
(1) Tj
|
244
|
+
6.118 2.2 Td
|
245
|
+
(-, ) Tj
|
246
|
+
/F4 11 Tf
|
247
|
+
9.779 0 Td
|
248
|
+
<012e> Tj
|
249
|
+
/F0 11 Tf
|
250
|
+
6.356 -2.2 Td
|
251
|
+
(2) Tj
|
252
|
+
6.118 2.2 Td
|
253
|
+
(- und ) Tj
|
254
|
+
/F4 11 Tf
|
255
|
+
28.127 0 Td
|
256
|
+
<0215> Tj
|
257
|
+
/F0 11 Tf
|
258
|
+
6.325 0 Td
|
259
|
+
(-Adrenozeptoren sowie f�r) Tj
|
260
|
+
-329.642 -15.4 Td
|
261
|
+
(Dopamin \(D) Tj
|
262
|
+
58.685 -2.2 Td
|
263
|
+
(2) Tj
|
264
|
+
6.118 2.2 Td
|
265
|
+
(\)-, 5-HT) Tj
|
266
|
+
37.882 -2.2 Td
|
267
|
+
(1) Tj
|
268
|
+
6.118 2.2 Td(-artige, 5-HT) Tj 61.735 -2.2 Td(2) Tj 6.118
|
269
|
+
2.2 Td( und Histamin \(H) Tj 81.915 -2.2 Td(1) Tj 6.118 2.2 Td(\)-Rezeptoren aufweist. Das Fehlen einer)
|
270
|
+
Tj ET
|
271
|
+
EOS
|
272
|
+
txt = Rpdf2txt::Text.new(input, 'latin1', Matrix[[1, 0, 0],
|
273
|
+
[0, 1, 0], [0, 39.866, 0]])
|
274
|
+
fontsrc0 = <<-'EOS'
|
275
|
+
6 0 obj
|
276
|
+
<<
|
277
|
+
/Type /Font
|
278
|
+
/Name /F0
|
279
|
+
/Subtype /TrueType
|
280
|
+
/BaseFont /ArialMT
|
281
|
+
/FirstChar 32
|
282
|
+
/LastChar 252
|
283
|
+
/Widths[ 278 0 0 0 0 889 0 191 333 333 0 0 278 333 278 278 556 556 556 556 556 556
|
284
|
+
556 556 556 556 278 278 584 0 584 0 0 667 667 722 722 667 611 778 722 278 500 667
|
285
|
+
556 833 722 778 667 778 722 667 611 722 667 944 667 667 611 278 0 278 0 0 0 556 556
|
286
|
+
500 556 556 278 556 556 222 222 500 222 833 556 556 556 556 333 500 278 556 500 722
|
287
|
+
500 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
288
|
+
0 0 0 0 0 0 0 0 0 0 0 737 0 556 0 0 737 0 400 0 0 0 0 0 0 0 0 0 0 556 0 834 0 0 0
|
289
|
+
0 0 0 667 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 778 0 0 0 0 0 722 0 0 0 0 0 0 0 556 0
|
290
|
+
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 556 ]
|
291
|
+
/Encoding /WinAnsiEncoding
|
292
|
+
/FontDescriptor 83 0 R
|
293
|
+
>>
|
294
|
+
endobj
|
295
|
+
EOS
|
296
|
+
font0 = Rpdf2txt::Font.new(fontsrc0)
|
297
|
+
fontsrc4 = <<-'EOS'
|
298
|
+
31 0 obj
|
299
|
+
<<
|
300
|
+
/Type /Font
|
301
|
+
/Name /F4
|
302
|
+
/Subtype /Type0
|
303
|
+
/BaseFont /CRASED+ArialMT
|
304
|
+
/Encoding /Identity-H
|
305
|
+
/DescendantFonts[ 87 0 R]
|
306
|
+
/ToUnicode 88 0 R
|
307
|
+
>>
|
308
|
+
endobj
|
309
|
+
EOS
|
310
|
+
font4 = Rpdf2txt::Font.new(fontsrc4)
|
311
|
+
cmap = Rpdf2txt::CMap.new('<< >>')
|
312
|
+
cmap.map = {
|
313
|
+
302 => 945,
|
314
|
+
533 => 946,
|
315
|
+
}
|
316
|
+
font4.cmap = cmap
|
317
|
+
fonts = {
|
318
|
+
:f0 => font0,
|
319
|
+
:f4 => font4,
|
320
|
+
}
|
321
|
+
txt.current_page = FontDonorStub.new(fonts)
|
322
|
+
leaf = Rpdf2txt::PageLeaf.new
|
323
|
+
expected = <<-EOS
|
324
|
+
Paroxetin besitzt eine selektive Wirkung; in-vitro Studien haben gezeigt, dass es, im Gegensatz zu
|
325
|
+
trizyklischen Antidepressiva, eine geringe Affinit\344t f\374r a1-, a2- und b-Adrenozeptoren sowie f\374r
|
326
|
+
Dopamin (D2)-, 5-HT1-artige, 5-HT2 und Histamin (H1)-Rezeptoren aufweist. Das Fehlen einer
|
327
|
+
EOS
|
328
|
+
handler = Rpdf2txt::SimpleHandler.new
|
329
|
+
leaf.join_snippets(txt.scan, handler)
|
330
|
+
result = handler.out
|
331
|
+
result << "\n"
|
332
|
+
assert_equal(expected, result)
|
333
|
+
end
|
334
|
+
def test_join_snippets5
|
335
|
+
input = <<-EOS
|
336
|
+
BT
|
337
|
+
/TT1 1 Tf
|
338
|
+
-0.00031 Tc -0.00211 Tw 10.02 0 0 10.02 70.86 736.40054 Tm
|
339
|
+
[(Der Prozentsatz der mit Humira b)5(eha)5(nd)5(elt)-5(en Patiente)5(n)-1(, welche ACR20)5(-)-3(, ACR50)5(-)-3( und ACR7)5(0-)]TJ
|
340
|
+
0.0002 Tc -0.00259 Tw 0 -1.14371 TD
|
341
|
+
[(Ansp)6(re)6(chrate)6(n erreichten, war )6(kon)6(s)-3(i)7(s)-3(ten)6(t)3( \374ber die )]TJ
|
342
|
+
-0.00011 Tc -0.0023 Tw 22.3054 0 Td
|
343
|
+
[(Stu)5(d)-1(ien 1, 2, 3 und 4. Die Erg)5(ebni)7(sse f\374r 4)5(0)-1( mg )]TJ
|
344
|
+
0.00011 Tc -0.0025 Tw -22.3054 -1.1497 Td
|
345
|
+
[(Humi)7(ra alle )6(zwei Wochen )6(sind in Ta)5(bell)7(e)-1( 1 zu)5(samm)7(e)5(ngefa)5(sst. )]TJ
|
346
|
+
/TT0 1 Tf
|
347
|
+
0 Tc 10.02 0 0 10.02 70.86 687.86049 Tm
|
348
|
+
[(Tabelle 1: ACR-An)5(sp)5(re)5(ch)5(raten bei Placebo-ko)5(nt)]TJ
|
349
|
+
0.00011 Tc [(rollie)5(rten Pr\374fun)5(ge)5(n \\(in Pro)5(z)-3(ent )6(der Patiente)5(n)5(\\))]TJ
|
350
|
+
/TT1 1 Tf
|
351
|
+
0 Tc 0 Tw 41.6647 0 Td
|
352
|
+
( )Tj
|
353
|
+
/TT2 1 Tf
|
354
|
+
-0.0013 Tc 10.02 0 0 10.02 70.86 663.14011 Tm
|
355
|
+
(---------------------------------------------------- )Tj
|
356
|
+
ET
|
357
|
+
EOS
|
358
|
+
txt = Rpdf2txt::Text.new(input, 'latin1')
|
359
|
+
|
360
|
+
fontsrc0 = <<-EOS
|
361
|
+
5 0 obj <</LastChar 252 /BaseFont /ArialMT
|
362
|
+
/Subtype /TrueType
|
363
|
+
/Widths [278 0 0 0 0 889 0 0 333 333 389 0 278 333 278 278 556 556 556 556 556 556 556 556 556 556 278 278 584 584 584 0 0 667 667 722 722 667 611 778 722 278 500 667 556 833 722 778 667 778 722 667 611 722 667 944 667 667 611 278 0 278 0 0 0 556 556 500 556 556 278 556 556 222 222 500 222 833 556 556 556 556 333 500 278 556 500 722 500 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 737 0 400 0 333 333 0 576 0 0 0 333 0 556 0 0 0 0 0 0 0 0 667 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 778 584 0 0 0 0 722 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 556]
|
364
|
+
/FontDescriptor 144 0 R
|
365
|
+
/Encoding /WinAnsiEncoding
|
366
|
+
/Type /Font
|
367
|
+
/FirstChar 32
|
368
|
+
>> endobj
|
369
|
+
EOS
|
370
|
+
font0 = Rpdf2txt::Font.new(fontsrc0)
|
371
|
+
fontsrc1 = <<-EOS
|
372
|
+
5 0 obj <</LastChar 252 /BaseFont /ArialMT
|
373
|
+
/Subtype /TrueType
|
374
|
+
/Widths [278 0 0 0 0 889 0 0 333 333 389 0 278 333 278 278 556 556 556 556 556 556 556 556 556 556 278 278 584 584 584 0 0 667 667 722 722 667 611 778 722 278 500 667 556 833 722 778 667 778 722 667 611 722 667 944 667 667 611 278 0 278 0 0 0 556 556 500 556 556 278 556 556 222 222 500 222 833 556 556 556 556 333 500 278 556 500 722 500 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 737 0 400 0 333 333 0 576 0 0 0 333 0 556 0 0 0 0 0 0 0 0 667 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 778 584 0 0 0 0 722 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 556]
|
375
|
+
/FontDescriptor 144 0 R
|
376
|
+
/Encoding /WinAnsiEncoding
|
377
|
+
/Type /Font
|
378
|
+
/FirstChar 32
|
379
|
+
>> endobj
|
380
|
+
EOS
|
381
|
+
font1 = Rpdf2txt::Font.new(fontsrc0)
|
382
|
+
fontsrc2 = <<-EOS
|
383
|
+
4 0 obj
|
384
|
+
<</LastChar 252
|
385
|
+
/BaseFont /Arial-ItalicMT
|
386
|
+
/Subtype /TrueType
|
387
|
+
/Widths [278 0 0 0 0 0 0 0 333 333 0 0 278 333 0 0 556 556 556 556 556 556 556 556 556 0 278 0 0 0 0 0 0 667 667 722 722 667 611 778 722 278 500 667 556 833 722 0 667 778 722 667 611 722 667 944 0 0 0 0 0 0 0 0 0 556 556 500 556 556 278 556 556 222 222 500 222 833 556 556 556 556 333 500 278 556 500 722 500 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 667 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 556]
|
388
|
+
/FontDescriptor 143 0 R
|
389
|
+
/Encoding /WinAnsiEncoding
|
390
|
+
/Type /Font
|
391
|
+
/FirstChar 32
|
392
|
+
>>
|
393
|
+
endobj
|
394
|
+
EOS
|
395
|
+
font2 = Rpdf2txt::Font.new(fontsrc2)
|
396
|
+
|
397
|
+
cmap = Rpdf2txt::CMap.new('<< >>')
|
398
|
+
cmap.map = {
|
399
|
+
36 => 8805,
|
400
|
+
}
|
401
|
+
font2.cmap = cmap
|
402
|
+
|
403
|
+
fonts = {
|
404
|
+
:tt0 => font0,
|
405
|
+
:tt1 => font1,
|
406
|
+
:tt2 => font2,
|
407
|
+
}
|
408
|
+
txt.current_page = FontDonorStub.new(fonts)
|
409
|
+
|
410
|
+
leaf = Rpdf2txt::PageLeaf.new
|
411
|
+
expected = <<-EOS
|
412
|
+
Der Prozentsatz der mit Humira behandelten Patienten, welche ACR20-, ACR50- und ACR70-
|
413
|
+
Ansprechraten erreichten, war konsistent \374ber die Studien 1, 2, 3 und 4. Die Ergebnisse f\374r 40 mg
|
414
|
+
Humira alle zwei Wochen sind in Tabelle 1 zusammengefasst.
|
415
|
+
Tabelle 1: ACR-Ansprechraten bei Placebo-kontrollierten Pr\374fungen (in Prozent der Patienten)
|
416
|
+
----------------------------------------------------
|
417
|
+
EOS
|
418
|
+
handler = Rpdf2txt::SimpleHandler.new
|
419
|
+
leaf.join_snippets(txt.scan, handler)
|
420
|
+
result = handler.out
|
421
|
+
result << "\n"
|
422
|
+
assert_equal(expected, result)
|
423
|
+
end
|
424
|
+
def test_join_snippets6
|
425
|
+
input = <<-EOS
|
426
|
+
BT
|
427
|
+
/TT0 1 Tf
|
428
|
+
-0.0002 Tc -0.0022 Tw 10.02 0 0 10.02 70.86 627.92047 Tm
|
429
|
+
[(In Studie 1 evaluierte m)7(an )6(271 Patiente)5(n)-1( mit einer m)7(\344)-1(ssige)5(n)-1( bis )6(sch)5(w)-3(eren a)5(k)-3(tiven rhe)5(u)-1(matoi)6(den )]TJ
|
430
|
+
-0.00031 Tc -0.00211 Tw 0 -1.22749 TD
|
431
|
+
[(Arthritis)-3(,)2( die )]TJ
|
432
|
+
/TT2 1 Tf
|
433
|
+
0 Tc 0 Tw 5.5509 0 Td
|
434
|
+
($)Tj
|
435
|
+
/TT0 1 Tf
|
436
|
+
-0.00011 Tc -0.0023 Tw 0.54491 0 Td
|
437
|
+
[(18 Ja)5(hre alt )6(waren, bei de)5(nen die Th)5(erapie mit mind)5(esten)5(s)-3( ein)5(e)-1(m)7(,)2( aber mit nicht mehr )]TJ
|
438
|
+
0 Tc -0.0024 Tw -6.09579 -1.14371 Td
|
439
|
+
ET
|
440
|
+
EOS
|
441
|
+
txt = Rpdf2txt::Text.new(input, 'latin1')
|
442
|
+
fontsrc0 = <<-EOS
|
443
|
+
5 0 obj <</LastChar 252 /BaseFont /ArialMT
|
444
|
+
/Subtype /TrueType
|
445
|
+
/Widths [278 0 0 0 0 889 0 0 333 333 389 0 278 333 278 278 556 556 556 556 556 556 556 556 556 556 278 278 584 584 584 0 0 667 667 722 722 667 611 778 722 278 500 667 556 833 722 778 667 778 722 667 611 722 667 944 667 667 611 278 0 278 0 0 0 556 556 500 556 556 278 556 556 222 222 500 222 833 556 556 556 556 333 500 278 556 500 722 500 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 737 0 400 0 333 333 0 576 0 0 0 333 0 556 0 0 0 0 0 0 0 0 667 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 778 584 0 0 0 0 722 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 556]
|
446
|
+
/FontDescriptor 144 0 R
|
447
|
+
/Encoding /WinAnsiEncoding
|
448
|
+
/Type /Font
|
449
|
+
/FirstChar 32
|
450
|
+
>> endobj
|
451
|
+
EOS
|
452
|
+
font0 = Rpdf2txt::Font.new(fontsrc0)
|
453
|
+
fontsrc2 = <<-EOS
|
454
|
+
3 0 obj
|
455
|
+
<</LastChar 252
|
456
|
+
/BaseFont /Arial-BoldItalicMT /Subtype /TrueType /Widths [278 0 0 0 0 0 0 0 0 0 0 0 0 0 278 278 556 556 556 0 0 0 0 0 0 556 0 0 0 0 0 0 0 722 722 0 722 667 611 778 722 278 0 722 0 833 0 778 667 0 0 667 611 722 667 944 0 0 611 0 0 0 0 0 0 556 611 556 611 556 333 611 611 278 0 556 278 889 611 611 611 0 389 556 333 611 556 778 0 0 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 722 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611 0 0 0 0 0 611]
|
457
|
+
/FontDescriptor 146 0 R /Encoding /WinAnsiEncoding /Type /Font /FirstChar 32 >>
|
458
|
+
endobj
|
459
|
+
EOS
|
460
|
+
font2 = Rpdf2txt::Font.new(fontsrc2)
|
461
|
+
|
462
|
+
cmap = Rpdf2txt::CMap.new('<< >>')
|
463
|
+
cmap.map = {
|
464
|
+
36 => 8805,
|
465
|
+
}
|
466
|
+
font2.cmap = cmap
|
467
|
+
|
468
|
+
fonts = {
|
469
|
+
:tt0 => font0,
|
470
|
+
:tt2 => font2,
|
471
|
+
}
|
472
|
+
txt.current_page = FontDonorStub.new(fonts)
|
473
|
+
leaf = Rpdf2txt::PageLeaf.new
|
474
|
+
expected = "In Studie 1 evaluierte man 271 Patienten mit einer m\344ssigen bis schweren aktiven rheumatoiden \nArthritis, die \26318 Jahre alt waren, bei denen die Therapie mit mindestens einem, aber mit nicht mehr \n"
|
475
|
+
handler = Rpdf2txt::SimpleHandler.new
|
476
|
+
leaf.join_snippets(txt.scan, handler)
|
477
|
+
result = handler.out
|
478
|
+
result << "\n"
|
479
|
+
assert_equal(expected, result)
|
480
|
+
end
|
481
|
+
def test_join_snippets7
|
482
|
+
input ="BT
|
483
|
+
/F9 1 Tf
|
484
|
+
10 0 0 10 70.7953 393.3369 Tm
|
485
|
+
-0.0336 Tw
|
486
|
+
[(01)-1044.2(K�nzle tisana per i nervi e per dormir)17.7(e, erbe medicinali smin)]TJ
|
487
|
+
33.3976 0 TD
|
488
|
+
0.0000 Tw
|
489
|
+
(uzzate)Tj
|
490
|
+
/F3 1 Tf
|
491
|
+
-33.3976 -1.4174 TD
|
492
|
+
-0.0306 Tw
|
493
|
+
(* Parroco Erborista K�nzle SA, , 6573 Magadino)Tj
|
494
|
+
ET"
|
495
|
+
fontsrc = <<-EOS
|
496
|
+
150 0 obj
|
497
|
+
<</BaseFont /Frutiger-Black
|
498
|
+
/LastChar 240
|
499
|
+
/Subtype /Type1
|
500
|
+
/FontDescriptor 151 0 R
|
501
|
+
/Widths [306 444 611 612 612 1000 778 333 389 389 611 600 306 333 306 278 612 612 612 612 612 612 612 612 612 612 306 306 600 600 600 556 800 778 667 667 778 611 556 778 722 334 444 722 556 1000 778 778 611 778 667 611 556 722 722 1000 722 722 611 389 278 389 600 500 333 611 668 500 668 611 444 668 667 334 334 611 334 1000 667 668 668 668 444 500 444 667 611 944 611 611 500 389 222 389 600 306 778 0 0 0 0 0 0 0 611 611 611 0 0 0 611 611 0 0 0 0 0 0 0 0 0 0 668 0 0 0 0 667 0 400 612 612 0 0 0 0 800 800 0 0 0 306 0 0 306 600 306 306 0 667 306 306 306 306 306 0 0 306 0 0 0 0 0 306 0 306 306 611 611 0 306 0 0 0 0 0 0 0 0 0 0 333 0 306 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 306]
|
502
|
+
/Encoding /MacRomanEncoding
|
503
|
+
/Type /Font
|
504
|
+
/FirstChar 32
|
505
|
+
>>
|
506
|
+
endobj
|
507
|
+
EOS
|
508
|
+
txt = Rpdf2txt::Text.new(input, 'latin1')
|
509
|
+
font = Rpdf2txt::Font.new(fontsrc)
|
510
|
+
cmap = Rpdf2txt::CMap.new('<< >>')
|
511
|
+
font.cmap = cmap
|
512
|
+
font.cmap.map = {
|
513
|
+
36 => 8805,
|
514
|
+
}
|
515
|
+
font_simple = font.dup
|
516
|
+
font_simple.cmap = nil
|
517
|
+
fonts = {
|
518
|
+
:f9 => font_simple,
|
519
|
+
:f3 => font
|
520
|
+
}
|
521
|
+
txt.current_page = FontDonorStub.new(fonts)
|
522
|
+
leaf = Rpdf2txt::PageLeaf.new
|
523
|
+
expected = "01 K�nzle tisana per i nervi e per dormire, erbe medicinali sminuzzate
|
524
|
+
* Parroco Erborista K�nzle SA, , 6573 Magadino"
|
525
|
+
handler = Rpdf2txt::SimpleHandler.new
|
526
|
+
leaf.join_snippets(txt.scan, handler)
|
527
|
+
result = handler.out
|
528
|
+
assert_equal(expected, result)
|
529
|
+
end
|
530
|
+
def test_join_snippets8
|
531
|
+
input = <<-EOS
|
532
|
+
BT
|
533
|
+
/F6 1 Tf
|
534
|
+
12.5 0 0 12.5 62.4488 839.7822 Tm
|
535
|
+
0.000 0.000 0.000 1.000 k
|
536
|
+
/GS3 gs
|
537
|
+
-0.0001 Tc
|
538
|
+
-0.0286 Tw
|
539
|
+
[(Arzneimittel Statistik)-130.1(/)-130(Miscellan�es)]TJ
|
540
|
+
/F4 1 Tf
|
541
|
+
10 0 0 10 314.5196 35.7671 Tm
|
542
|
+
0.000 0.000 0.000 0.000 k
|
543
|
+
/GS2 gs
|
544
|
+
-0.0001 Tc
|
545
|
+
0.0000 Tw
|
546
|
+
(1182)Tj
|
547
|
+
/F2 1 Tf
|
548
|
+
7 0 0 7 63.4331 45.5374 Tm
|
549
|
+
0.000 0.000 0.000 1.000 k
|
550
|
+
/GS3 gs
|
551
|
+
-0.0306 Tw
|
552
|
+
[(Swissmedic Jour)-17.9(nal)-1111.9(1)55.6(1)-55.6(/)-111.1(2004)]TJ
|
553
|
+
/F9 1 Tf
|
554
|
+
10 0 0 10 113.0394 749.1141 Tm
|
555
|
+
-0.0336 Tw
|
556
|
+
[(01)-1044.2(Carsol CR 200)-306(mg, T)98.8(abletten)]TJ
|
557
|
+
0 -1.2835 TD
|
558
|
+
[(02)-1044.2(Carsol CR 400)-306(mg, T)98.8(abletten)]TJ
|
559
|
+
/F3 1 Tf
|
560
|
+
0 -1.4174 TD
|
561
|
+
-0.0306 Tw
|
562
|
+
(Ecosol AG, , 6312 Steinhausen)Tj
|
563
|
+
10 0 0 10 113.0394 701.7668 Tm
|
564
|
+
-0.0002 Tc
|
565
|
+
0.0000 Tw
|
566
|
+
[(Zul.-Nr)91.6(.: )]TJ
|
567
|
+
/F9 1 Tf
|
568
|
+
3.8771 0 TD
|
569
|
+
(56749)Tj
|
570
|
+
/F3 1 Tf
|
571
|
+
8.8787 0 TD
|
572
|
+
-0.0001 Tc
|
573
|
+
[(V)36.8(erkaufskategorie: )]TJ
|
574
|
+
/F9 1 Tf
|
575
|
+
9.1015 0 TD
|
576
|
+
0.0000 Tc
|
577
|
+
(B)Tj
|
578
|
+
/F3 1 Tf
|
579
|
+
3.6544 0 TD
|
580
|
+
-0.0001 Tc
|
581
|
+
-0.0306 Tw
|
582
|
+
[(Index: 01.07.1.)-9563.5(18.11.2004)]TJ
|
583
|
+
-25.5118 -2.2428 TD
|
584
|
+
0.0000 Tw
|
585
|
+
[(Zusammensetzung:)-921.1(01)]TJ
|
586
|
+
8.3 0 0 8.3 226.4252 679.3388 Tm
|
587
|
+
-0.0306 Tw
|
588
|
+
[(CARBAMAZEPINUM 200)-278.1(mg, EXCIPIENS pro COMPRESSO.)]TJ
|
589
|
+
10 0 0 10 212.252 665.1652 Tm
|
590
|
+
-0.0002 Tc
|
591
|
+
0.0000 Tw
|
592
|
+
(02)Tj
|
593
|
+
8.3 0 0 8.3 226.4252 665.1652 Tm
|
594
|
+
-0.0001 Tc
|
595
|
+
-0.0306 Tw
|
596
|
+
[(CARBAMAZEPINUM 400)-278.1(mg, EXCIPIENS pro COMPRESSO.)]TJ
|
597
|
+
10 0 0 10 113.0394 650.9915 Tm
|
598
|
+
0.0000 Tw
|
599
|
+
/F9 1 Tf
|
600
|
+
11.5 0 0 11.5 113.0394 790.1339 Tm
|
601
|
+
-0.0336 Tw
|
602
|
+
[(Neuzulassungen /)-122.3(Nouvelles autorisations)]TJ
|
603
|
+
/F4 1 Tf
|
604
|
+
10 0 0 10 113.0394 766.1339 Tm
|
605
|
+
-0.0306 Tw
|
606
|
+
[(Humanpr�parate)-111.2(/)-111.2(P)0.1(r)17.7(oduits � usage humain)]TJ
|
607
|
+
ET
|
608
|
+
EOS
|
609
|
+
txt = Rpdf2txt::Text.new(input, 'latin1')
|
610
|
+
fontsrc2 = <<-EOS
|
611
|
+
13 0 obj
|
612
|
+
<</BaseFont /Frutiger-Light
|
613
|
+
/LastChar 240
|
614
|
+
/Subtype /Type1
|
615
|
+
/FontDescriptor 18 0 R
|
616
|
+
/Widths [278 389 556 556 556 1000 667 278 278 278 556 600 278 333 278 278 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500 800 667 556 667 667 500 444 722 667 222 333 611 444 889 667 722 500 722 556 500 500 667 611 944 611 611 500 278 278 278 600 500 222 500 556 444 556 500 333 556 556 222 222 500 222 833 556 556 556 556 333 389 333 556 444 778 444 444 444 278 222 278 600 278 667 0 0 0 0 722 667 0 500 500 500 0 0 444 500 500 500 500 0 0 222 222 0 0 0 556 556 0 0 556 556 556 0 400 556 556 0 500 0 0 800 800 0 0 0 278 0 0 278 600 278 278 0 556 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 556 556 0 278 0 0 0 0 889 500 0 0 0 0 278 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 278]
|
617
|
+
/Encoding /MacRomanEncoding
|
618
|
+
/Type /Font
|
619
|
+
/FirstChar 32
|
620
|
+
>>
|
621
|
+
endobj
|
622
|
+
EOS
|
623
|
+
font2 = Rpdf2txt::Font.new(fontsrc2)
|
624
|
+
fontsrc3 = <<-EOS
|
625
|
+
16 0 obj
|
626
|
+
<</BaseFont /Frutiger-Roman
|
627
|
+
/LastChar 240
|
628
|
+
/Subtype /Type1
|
629
|
+
/FontDescriptor 19 0 R
|
630
|
+
/Widths [278 389 556 556 556 1000 722 278 333 333 556 600 278 333 278 278 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778 556 778 611 500 556 722 667 1000 667 667 556 333 278 333 600 500 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611 611 611 389 389 389 611 500 833 500 500 500 333 222 333 600 278 722 0 0 0 0 0 0 0 556 556 556 0 0 0 556 556 0 0 0 0 278 0 0 0 0 0 611 0 0 0 0 611 0 400 556 556 0 0 0 0 800 800 0 0 0 278 0 0 278 600 278 278 0 611 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0 0 0 0 0 500 0 0 0 0 278 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 278]
|
631
|
+
/Encoding /MacRomanEncoding
|
632
|
+
/Type /Font
|
633
|
+
/FirstChar 32
|
634
|
+
>>
|
635
|
+
endobj
|
636
|
+
EOS
|
637
|
+
font3 = Rpdf2txt::Font.new(fontsrc3)
|
638
|
+
fontsrc4 = <<-EOS
|
639
|
+
15 0 obj
|
640
|
+
<</BaseFont /Frutiger-Bold
|
641
|
+
/LastChar 240
|
642
|
+
/Subtype /Type1
|
643
|
+
/FontDescriptor 20 0 R
|
644
|
+
/Widths [278 389 481 556 556 1000 722 278 333 333 556 600 278 333 278 389 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778 556 778 611 556 556 722 667 1000 667 667 556 333 389 333 600 500 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611 611 611 389 444 389 611 556 889 556 556 500 333 222 333 600 278 0 0 0 0 0 0 0 0 556 556 556 0 0 444 556 556 556 0 0 0 0 0 0 0 0 611 611 0 0 0 0 611 0 0 556 556 0 0 0 0 800 800 0 0 0 278 0 0 278 600 278 278 0 611 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 556 556 0 278 0 0 0 0 944 500 0 0 0 0 278 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 278]
|
645
|
+
/Encoding /MacRomanEncoding
|
646
|
+
/Type /Font
|
647
|
+
/FirstChar 32
|
648
|
+
>>
|
649
|
+
endobj
|
650
|
+
EOS
|
651
|
+
font4 = Rpdf2txt::Font.new(fontsrc4)
|
652
|
+
fontsrc6 = <<-EOS
|
653
|
+
36 0 obj
|
654
|
+
<</BaseFont /Frutiger-BlackCn
|
655
|
+
/LastChar 240
|
656
|
+
/Subtype /Type1
|
657
|
+
/FontDescriptor 39 0 R
|
658
|
+
/Widths [260 388 520 520 520 852 704 260 315 315 519 600 260 333 260 296 520 520 520 520 520 520 520 520 520 520 260 260 600 600 600 463 800 648 574 574 667 519 481 667 630 296 389 611 481 852 667 685 556 685 593 537 500 630 611 852 611 592 519 315 296 315 600 500 222 537 574 444 574 537 370 574 556 278 278 537 278 834 556 574 574 574 389 426 389 556 519 796 519 519 426 333 222 333 600 260 0 0 0 0 0 0 0 0 0 0 0 0 0 0 537 537 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 520 520 0 0 0 0 0 800 0 0 0 260 0 0 260 600 260 260 0 556 260 260 260 260 260 0 0 260 0 0 0 0 0 260 0 260 260 0 0 0 260 0 0 0 0 0 0 0 0 0 0 0 0 260 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 260]
|
659
|
+
/Encoding /MacRomanEncoding
|
660
|
+
/Type /Font
|
661
|
+
/FirstChar 32
|
662
|
+
>>
|
663
|
+
endobj
|
664
|
+
EOS
|
665
|
+
font6 = Rpdf2txt::Font.new(fontsrc6)
|
666
|
+
fontsrc9 = <<-EOS
|
667
|
+
150 0 obj
|
668
|
+
<</BaseFont /Frutiger-Black
|
669
|
+
/LastChar 240
|
670
|
+
/Subtype /Type1
|
671
|
+
/FontDescriptor 151 0 R
|
672
|
+
/Widths [306 444 611 612 612 1000 778 333 389 389 611 600 306 333 306 278 612 612 612 612 612 612 612 612 612 612 306 306 600 600 600 556 800 778 667 667 778 611 556 778 722 334 444 722 556 1000 778 778 611 778 667 611 556 722 722 1000 722 722 611 389 278 389 600 500 333 611 668 500 668 611 444 668 667 334 334 611 334 1000 667 668 668 668 444 500 444 667 611 944 611 611 500 389 222 389 600 306 778 0 0 0 0 0 0 0 611 611 611 0 0 0 611 611 0 0 0 0 0 0 0 0 0 0 668 0 0 0 0 667 0 400 612 612 0 0 0 0 800 800 0 0 0 306 0 0 306 600 306 306 0 667 306 306 306 306 306 0 0 306 0 0 0 0 0 306 0 306 306 611 611 0 306 0 0 0 0 0 0 0 0 0 0 333 0 306 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 306]
|
673
|
+
/Encoding /MacRomanEncoding
|
674
|
+
/Type /Font
|
675
|
+
/FirstChar 32
|
676
|
+
>>
|
677
|
+
endobj
|
678
|
+
EOS
|
679
|
+
font9 = Rpdf2txt::Font.new(fontsrc9)
|
680
|
+
fonts = {
|
681
|
+
:f2 => font2,
|
682
|
+
:f3 => font3,
|
683
|
+
:f4 => font4,
|
684
|
+
:f6 => font6,
|
685
|
+
:f9 => font9,
|
686
|
+
}
|
687
|
+
txt.current_page = FontDonorStub.new(fonts)
|
688
|
+
leaf = Rpdf2txt::PageLeaf.new
|
689
|
+
expected = <<-EOS
|
690
|
+
Arzneimittel Statistik/Miscellan\351es
|
691
|
+
Neuzulassungen /Nouvelles autorisations
|
692
|
+
Humanpr\344parate/Produits \340 usage humain
|
693
|
+
01 Carsol CR 200 mg, Tabletten
|
694
|
+
02 Carsol CR 400 mg, Tabletten
|
695
|
+
Ecosol AG, , 6312 Steinhausen
|
696
|
+
Zul.-Nr.: 56749 Verkaufskategorie: B Index: 01.07.1. 18.11.2004
|
697
|
+
Zusammensetzung: 01 CARBAMAZEPINUM 200 mg, EXCIPIENS pro COMPRESSO.
|
698
|
+
02 CARBAMAZEPINUM 400 mg, EXCIPIENS pro COMPRESSO.
|
699
|
+
Swissmedic Journal 11/2004
|
700
|
+
1182
|
701
|
+
EOS
|
702
|
+
handler = Rpdf2txt::SimpleHandler.new
|
703
|
+
leaf.join_snippets(txt.scan, handler)
|
704
|
+
result = handler.out
|
705
|
+
assert_equal(expected.strip, result)
|
706
|
+
end
|
707
|
+
def test_join_snippets9
|
708
|
+
input = <<-EOS
|
709
|
+
BT
|
710
|
+
/F3 1 Tf
|
711
|
+
10 0 0 10 113.0394 744.2866 Tm
|
712
|
+
-0.0002 Tc
|
713
|
+
0.0000 Tw
|
714
|
+
[(Zul.-Nr)91.6(.: )]TJ
|
715
|
+
/F9 1 Tf
|
716
|
+
3.8771 0 TD
|
717
|
+
(56120)Tj
|
718
|
+
/F3 1 Tf
|
719
|
+
8.8787 0 TD
|
720
|
+
-0.0001 Tc
|
721
|
+
[(V)36.8(erkaufskategorie: )]TJ
|
722
|
+
/F9 1 Tf
|
723
|
+
9.1015 0 TD
|
724
|
+
0.0000 Tc
|
725
|
+
(B)Tj
|
726
|
+
/F3 1 Tf
|
727
|
+
3.6544 0 TD
|
728
|
+
-0.0001 Tc
|
729
|
+
-0.0306 Tw
|
730
|
+
[(Index: 02.07.1.)-9563.5(26.11.2004)]TJ
|
731
|
+
-25.5118 -2.2428 TD
|
732
|
+
0.0000 Tw
|
733
|
+
[(Zusammensetzung:)-921.1(01)]TJ
|
734
|
+
8.3 0 0 8.3 226.4252 721.8585 Tm
|
735
|
+
0.1192 Tw
|
736
|
+
[(TREPROSTINILUM 1)-278.1(mg ut TREPROSTINILUM NA)73.7(TRICUM, NA)73.7(TRII CITRAS, A)]TJ
|
737
|
+
35.1847 0 TD
|
738
|
+
(CIDUM HYDRO-)Tj
|
739
|
+
-35.1847 -1.3661 TD
|
740
|
+
-0.0304 Tw
|
741
|
+
[(CHLORICUM DILUTUM, NA)73.7(TRII HYDROXIDUM, NA)73.7(TRII CHLORIDUM, CONSER)17.7(V)]TJ
|
742
|
+
35.7763 0 TD
|
743
|
+
[(.: MET)54.7(ACRESO-)]TJ
|
744
|
+
-35.7763 -1.3661 TD
|
745
|
+
-0.0306 Tw
|
746
|
+
[(LUM 3)-278.1(mg, EXCIPIENS ad SOLUTIONEM pro 1)-278.1(mL. )]TJ
|
747
|
+
10 0 0 10 212.252 685.0069 Tm
|
748
|
+
-0.0002 Tc
|
749
|
+
0.0000 Tw
|
750
|
+
(02)Tj
|
751
|
+
8.3 0 0 8.3 226.4252 685.0069 Tm
|
752
|
+
-0.0001 Tc
|
753
|
+
0.0150 Tw
|
754
|
+
[(TREPROSTINILUM 2.5)-278.1(mg ut TREPROSTINILUM NA)73.7(TRICUM, NA)73.7(TRII CITRAS,)]TJ
|
755
|
+
34.2741 0 TD
|
756
|
+
[( ACIDUM HYDRO-)]TJ
|
757
|
+
-34.2741 -1.3662 TD
|
758
|
+
-0.0304 Tw
|
759
|
+
[(CHLORICUM DILUTUM, NA)73.7(TRII HYDROXIDUM, NA)73.7(TRII CHLORIDUM, CONSER)17.7(V)]TJ
|
760
|
+
35.7763 0 TD
|
761
|
+
[(.: MET)54.7(ACRESO-)]TJ
|
762
|
+
-35.7763 -1.3662 TD
|
763
|
+
-0.0306 Tw
|
764
|
+
[(LUM 3)-278.1(mg, EXCIPIENS ad SOLUTIONEM pro 1)-278.1(mL. )]TJ
|
765
|
+
10 0 0 10 113.0394 648.1553 Tm
|
766
|
+
[(Anwendung:)-5285.2(Prim�re und sekund�re pulmonale Hypertonie)]TJ
|
767
|
+
0 -1.4174 TD
|
768
|
+
[(Packungen:)-4532.7(01)-305.6(001)-5591.4(1 x 20)-567(mL)-17319.8(B)]TJ
|
769
|
+
9.9213 -1.4174 TD
|
770
|
+
[(02)-305.6(003)-5591.4(1 x 20)-567(mL)-17319.8(B)]TJ
|
771
|
+
-9.9213 -1.4174 TD
|
772
|
+
[(Bemerkung:)-5671.9(TREPROSTINILUM \\(ut natrii T)73.9(r)0(eprostinilum\\) = NAS \\(neue)]TJ
|
773
|
+
37.1235 0 TD
|
774
|
+
[(r Wirkstof)17.7(f\\))]TJ
|
775
|
+
-37.1235 -1.4174 TD
|
776
|
+
[(G�ltig bis:)-6647.8(25. November 2009)]TJ
|
777
|
+
ET
|
778
|
+
EOS
|
779
|
+
txt = Rpdf2txt::Text.new(input, 'latin1')
|
780
|
+
fontsrc3 = <<-EOS
|
781
|
+
16 0 obj
|
782
|
+
<</BaseFont /Frutiger-Roman
|
783
|
+
/LastChar 240
|
784
|
+
/Subtype /Type1
|
785
|
+
/FontDescriptor 19 0 R
|
786
|
+
/Widths [278 389 556 556 556 1000 722 278 333 333 556 600 278 333 278 278 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778 556 778 611 500 556 722 667 1000 667 667 556 333 278 333 600 500 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611 611 611 389 389 389 611 500 833 500 500 500 333 222 333 600 278 722 0 0 0 0 0 0 0 556 556 556 0 0 0 556 556 0 0 0 0 278 0 0 0 0 0 611 0 0 0 0 611 0 400 556 556 0 0 0 0 800 800 0 0 0 278 0 0 278 600 278 278 0 611 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0 0 0 0 0 500 0 0 0 0 278 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 278]
|
787
|
+
/Encoding /MacRomanEncoding
|
788
|
+
/Type /Font
|
789
|
+
/FirstChar 32
|
790
|
+
>>
|
791
|
+
endobj
|
792
|
+
EOS
|
793
|
+
font3 = Rpdf2txt::Font.new(fontsrc3)
|
794
|
+
fontsrc9 = <<-EOS
|
795
|
+
150 0 obj
|
796
|
+
<</BaseFont /Frutiger-Black
|
797
|
+
/LastChar 240
|
798
|
+
/Subtype /Type1
|
799
|
+
/FontDescriptor 151 0 R
|
800
|
+
/Widths [306 444 611 612 612 1000 778 333 389 389 611 600 306 333 306 278 612 612 612 612 612 612 612 612 612 612 306 306 600 600 600 556 800 778 667 667 778 611 556 778 722 334 444 722 556 1000 778 778 611 778 667 611 556 722 722 1000 722 722 611 389 278 389 600 500 333 611 668 500 668 611 444 668 667 334 334 611 334 1000 667 668 668 668 444 500 444 667 611 944 611 611 500 389 222 389 600 306 778 0 0 0 0 0 0 0 611 611 611 0 0 0 611 611 0 0 0 0 0 0 0 0 0 0 668 0 0 0 0 667 0 400 612 612 0 0 0 0 800 800 0 0 0 306 0 0 306 600 306 306 0 667 306 306 306 306 306 0 0 306 0 0 0 0 0 306 0 306 306 611 611 0 306 0 0 0 0 0 0 0 0 0 0 333 0 306 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 306]
|
801
|
+
/Encoding /MacRomanEncoding
|
802
|
+
/Type /Font
|
803
|
+
/FirstChar 32
|
804
|
+
>>
|
805
|
+
endobj
|
806
|
+
EOS
|
807
|
+
font9 = Rpdf2txt::Font.new(fontsrc9)
|
808
|
+
fonts = {
|
809
|
+
:f3 => font3,
|
810
|
+
:f9 => font9,
|
811
|
+
}
|
812
|
+
txt.current_page = FontDonorStub.new(fonts)
|
813
|
+
leaf = Rpdf2txt::PageLeaf.new
|
814
|
+
expected = <<-EOS
|
815
|
+
Zul.-Nr.: 56120 Verkaufskategorie: B Index: 02.07.1. 26.11.2004
|
816
|
+
Zusammensetzung: 01 TREPROSTINILUM 1 mg ut TREPROSTINILUM NATRICUM, NATRII CITRAS, ACIDUM HYDRO-
|
817
|
+
CHLORICUM DILUTUM, NATRII HYDROXIDUM, NATRII CHLORIDUM, CONSERV.: METACRESO-
|
818
|
+
LUM 3 mg, EXCIPIENS ad SOLUTIONEM pro 1 mL.
|
819
|
+
02 TREPROSTINILUM 2.5 mg ut TREPROSTINILUM NATRICUM, NATRII CITRAS, ACIDUM HYDRO-
|
820
|
+
CHLORICUM DILUTUM, NATRII HYDROXIDUM, NATRII CHLORIDUM, CONSERV.: METACRESO-
|
821
|
+
LUM 3 mg, EXCIPIENS ad SOLUTIONEM pro 1 mL.
|
822
|
+
Anwendung: Prim\344re und sekund\344re pulmonale Hypertonie
|
823
|
+
Packungen: 01 001 1 x 20 mL B
|
824
|
+
02 003 1 x 20 mL B
|
825
|
+
Bemerkung: TREPROSTINILUM (ut natrii Treprostinilum) = NAS (neuer Wirkstoff)
|
826
|
+
G\374ltig bis: 25. November 2009
|
827
|
+
EOS
|
828
|
+
handler = Rpdf2txt::SimpleHandler.new
|
829
|
+
leaf.join_snippets(txt.scan, handler)
|
830
|
+
result = handler.out
|
831
|
+
assert_equal(expected.strip, result, result)
|
832
|
+
end
|
833
|
+
def test_join_snippets10
|
834
|
+
input = <<-EOS
|
835
|
+
BT
|
836
|
+
/F9 1 Tf
|
837
|
+
10 0 0 10 113.0394 585.3986 Tm
|
838
|
+
-0.0336 Tw
|
839
|
+
[(01)-1044.2(Imigran, Injektionsl�sung)]TJ
|
840
|
+
/F3 1 Tf
|
841
|
+
T*
|
842
|
+
-0.0306 Tw
|
843
|
+
[(GlaxoSmithKline AG, T)73.9(alstrasse 3�5, 3053 M�nchenbuchsee)]TJ
|
844
|
+
10 0 0 10 113.0394 550.8859 Tm
|
845
|
+
-0.0002 Tc
|
846
|
+
0.0000 Tw
|
847
|
+
[(Zul.-Nr)91.6(.: )]TJ
|
848
|
+
/F9 1 Tf
|
849
|
+
3.8771 0 TD
|
850
|
+
(51684)Tj
|
851
|
+
/F3 1 Tf
|
852
|
+
8.8787 0 TD
|
853
|
+
-0.0001 Tc
|
854
|
+
[(V)36.8(erkaufskategorie: )]TJ
|
855
|
+
/F9 1 Tf
|
856
|
+
9.1015 0 TD
|
857
|
+
0.0000 Tc
|
858
|
+
(B)Tj
|
859
|
+
/F3 1 Tf
|
860
|
+
3.6544 0 TD
|
861
|
+
-0.0001 Tc
|
862
|
+
-0.0306 Tw
|
863
|
+
[(Index: 02.05.1.)-9563.5(17.11.2004)]TJ
|
864
|
+
-25.5118 -2.2428 TD
|
865
|
+
0.0000 Tw
|
866
|
+
[(Zusammensetzung:)-921.1(01)]TJ
|
867
|
+
8.3 0 0 8.3 226.4252 528.4578 Tm
|
868
|
+
-0.0582 Tw
|
869
|
+
[(SUMA)73.7(TRIPT)54.7(ANUM 6)-278.1(mg ut SUMA)73.7(TRIPT)54.7(ANI SUCCINAS \\(1:1\\), NA)73.7(TRII CHLORI)]TJ
|
870
|
+
33.9231 0 TD
|
871
|
+
(DUM, AQUA ad IN-)Tj
|
872
|
+
-33.9231 -1.3662 TD
|
873
|
+
-0.0306 Tw
|
874
|
+
[(IECT)54.7(ABILIA q.s. ad SOLUTIONEM pro 0.5)-278.1(mL. )]TJ
|
875
|
+
10 0 0 10 113.0394 502.9452 Tm
|
876
|
+
[(Anwendung:)-5285.2(Behandlung akuter Migr�neanf�lle)]TJ
|
877
|
+
0 -1.4174 TD
|
878
|
+
[(* Packung:)-4896.3(01)-305.6(044)-3172.4(2 Patronen)-14173.3(B)]TJ
|
879
|
+
T*
|
880
|
+
[(Bemerkungen:)-4505.2(Ersetzt die Zulassungsbescheinigung vom 01.05.2002 )]TJ
|
881
|
+
35.9654 0 TD
|
882
|
+
[(\\(V)36.8(erzicht Fertigspritze\\))]TJ
|
883
|
+
-35.9654 -1.4174 TD
|
884
|
+
[(G�ltig bis:)-6647.8(31. Dezember 2006)]TJ
|
885
|
+
ET
|
886
|
+
EOS
|
887
|
+
txt = Rpdf2txt::Text.new(input, 'latin1')
|
888
|
+
fontsrc3 = <<-EOS
|
889
|
+
16 0 obj
|
890
|
+
<</BaseFont /Frutiger-Roman
|
891
|
+
/LastChar 240
|
892
|
+
/Subtype /Type1
|
893
|
+
/FontDescriptor 19 0 R
|
894
|
+
/Widths [278 389 556 556 556 1000 722 278 333 333 556 600 278 333 278 278 556 556 556 556 556 556 556 556 556 556 278 278 600 600 600 500 800 722 611 611 722 556 500 722 722 278 389 667 500 944 722 778 556 778 611 500 556 722 667 1000 667 667 556 333 278 333 600 500 278 556 611 444 611 556 389 611 611 278 278 556 278 889 611 611 611 611 389 389 389 611 500 833 500 500 500 333 222 333 600 278 722 0 0 0 0 0 0 0 556 556 556 0 0 0 556 556 0 0 0 0 278 0 0 0 0 0 611 0 0 0 0 611 0 400 556 556 0 0 0 0 800 800 0 0 0 278 0 0 278 600 278 278 0 611 278 278 278 278 278 0 0 278 0 0 0 0 0 278 0 278 278 0 0 0 278 0 0 0 0 0 500 0 0 0 0 278 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 278]
|
895
|
+
/Encoding /MacRomanEncoding
|
896
|
+
/Type /Font
|
897
|
+
/FirstChar 32
|
898
|
+
>>
|
899
|
+
endobj
|
900
|
+
EOS
|
901
|
+
font3 = Rpdf2txt::Font.new(fontsrc3)
|
902
|
+
fontsrc9 = <<-EOS
|
903
|
+
150 0 obj
|
904
|
+
<</BaseFont /Frutiger-Black
|
905
|
+
/LastChar 240
|
906
|
+
/Subtype /Type1
|
907
|
+
/FontDescriptor 151 0 R
|
908
|
+
/Widths [306 444 611 612 612 1000 778 333 389 389 611 600 306 333 306 278 612 612 612 612 612 612 612 612 612 612 306 306 600 600 600 556 800 778 667 667 778 611 556 778 722 334 444 722 556 1000 778 778 611 778 667 611 556 722 722 1000 722 722 611 389 278 389 600 500 333 611 668 500 668 611 444 668 667 334 334 611 334 1000 667 668 668 668 444 500 444 667 611 944 611 611 500 389 222 389 600 306 778 0 0 0 0 0 0 0 611 611 611 0 0 0 611 611 0 0 0 0 0 0 0 0 0 0 668 0 0 0 0 667 0 400 612 612 0 0 0 0 800 800 0 0 0 306 0 0 306 600 306 306 0 667 306 306 306 306 306 0 0 306 0 0 0 0 0 306 0 306 306 611 611 0 306 0 0 0 0 0 0 0 0 0 0 333 0 306 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 306]
|
909
|
+
/Encoding /MacRomanEncoding
|
910
|
+
/Type /Font
|
911
|
+
/FirstChar 32
|
912
|
+
>>
|
913
|
+
endobj
|
914
|
+
EOS
|
915
|
+
font9 = Rpdf2txt::Font.new(fontsrc9)
|
916
|
+
fonts = {
|
917
|
+
:f3 => font3,
|
918
|
+
:f9 => font9,
|
919
|
+
}
|
920
|
+
txt.current_page = FontDonorStub.new(fonts)
|
921
|
+
leaf = Rpdf2txt::PageLeaf.new
|
922
|
+
expected = <<-EOS
|
923
|
+
01 Imigran, Injektionsl\366sung
|
924
|
+
GlaxoSmithKline AG, Talstrasse 3-5, 3053 M\374nchenbuchsee
|
925
|
+
Zul.-Nr.: 51684 Verkaufskategorie: B Index: 02.05.1. 17.11.2004
|
926
|
+
Zusammensetzung: 01 SUMATRIPTANUM 6 mg ut SUMATRIPTANI SUCCINAS (1:1), NATRII CHLORIDUM, AQUA ad IN-
|
927
|
+
IECTABILIA q.s. ad SOLUTIONEM pro 0.5 mL.
|
928
|
+
Anwendung: Behandlung akuter Migr\344neanf\344lle
|
929
|
+
* Packung: 01 044 2 Patronen B
|
930
|
+
Bemerkungen: Ersetzt die Zulassungsbescheinigung vom 01.05.2002 (Verzicht Fertigspritze)
|
931
|
+
G\374ltig bis: 31. Dezember 2006
|
932
|
+
EOS
|
933
|
+
handler = Rpdf2txt::SimpleHandler.new
|
934
|
+
leaf.join_snippets(txt.scan, handler)
|
935
|
+
result = handler.out
|
936
|
+
assert_equal(expected.strip, result)
|
937
|
+
end
|
938
|
+
def test_join_snippets11
|
939
|
+
stream = Rpdf2txt::Stream.new('', 'latin1')
|
940
|
+
stream.decoded_stream = <<-'EOS'
|
941
|
+
q 1 0 0 -1 70.866 841.89 cm 0 J 1 1 1 RG q -1.5 -1.5 m 455.043 -1.5 l 452.043 1.5
|
942
|
+
l 1.5 1.5 l W* n -0.5 0 m 454.043 0 l S Q q 455.043 -1.5 m 455.043 32.5 l 452.043
|
943
|
+
29.5 l 452.043 1.5 l W* n 453.543 -0.5 m 453.543 31.5 l S Q q 455.043 32.5 m -1.5
|
944
|
+
32.5 l 1.5 29.5 l 452.043 29.5 l W* n 454.043 31 m -0.5 31 l S Q q -1.5 32.5 m -1.5
|
945
|
+
-1.5 l 1.5 1.5 l 1.5 29.5 l W* n 0 31.5 m 0 -0.5 l S Q 1 0 0 1 0.5 31.5 cm 0 0 0
|
946
|
+
rg
|
947
|
+
BT /F0 8 Tf 1 0 0 -1 232.336 7.573 Tm[(Fachinformation)
|
948
|
+
-3( )] TJ ET
|
949
|
+
1 0 0 1 -0.5 -0.5 cm q 455.043 -1.5 m 455.043 12.1 l 452.043 9.1 l 452.043 1.5 l
|
950
|
+
W* n 453.543 -0.5 m 453.543 11.1 l S Q q 455.043 12.1 m -1.5 12.1 l 1.5 9.1 l 452.043
|
951
|
+
9.1 l W* n 454.043 10.6 m -0.5 10.6 l S Q q -1.5 12.1 m -1.5 -1.5 l 1.5 1.5 l 1.5
|
952
|
+
9.1 l W* n 0 11.1 m 0 -0.5 l S Q 1 0 0 1 0 39.866 cm
|
953
|
+
BT /F1 16 Tf 1 0 0 -1 0 15.147 Tm(Ciprofloxacin Sandoz� i.v.) Tj 0 0 0 RG ET
|
954
|
+
0 31.773 m 453.543 31.773 l S
|
955
|
+
BT /F2 11 Tf 1 0 0 -1 407.099 44.213 Tm(SANDOZ) Tj ET
|
956
|
+
0 48.3 m 453.543 48.3 l S
|
957
|
+
BT /F2 8 Tf 1 0 0 -1 0 70.173 Tm(AMZV 9.11.2001) Tj 0 -15.6 Td(Zusammensetzung) Tj
|
958
|
+
/F3 8 Tf 0 -9.6 Td(Wirkstoff:) Tj /F0 8 Tf 33.332 0 Td( 1 Cyclopropyl-6-fluor-1,4-dihydro-4-oxo-7-\(1-piperazinyl\)-3-chinolincarbons�ure \(Ciprofloxacinum ut Ciprofloxacini)
|
959
|
+
Tj -33.332 -9.6 Td(hydrochloridum\).) Tj /F3 8 Tf 0 -11.6 Td(Hilfsstoffe:) Tj /F0
|
960
|
+
8 Tf 37.344 0 Td( Acidum lacticum, Natrii chloridum, Aqua ad inject.) Tj /F2 8 Tf
|
961
|
+
-37.344 -17.6 Td(Galenische Form und Wirkstoffmenge pro Einheit) Tj /F0 8 Tf 0 -9.6
|
962
|
+
Td(L�sung zur Infusion.) Tj 0 -11.6 Td(Ciprofloxacinum 200 mg/100 ml ut Ciprofloxacini hydrochloridum.)
|
963
|
+
Tj 0 -11.6 Td(Ciprofloxacinum 400 mg/200 ml ut Ciprofloxacini hydrochloridum.) Tj
|
964
|
+
/F2 8 Tf 0 -17.6 Td(Indikationen/Anwendungsm�glichkeiten) Tj /F0 8 Tf -0.311 Tw 0
|
965
|
+
-9.6 Td(Ciprofloxacin Sandoz i.v. eignet sich zur Behandlung von Infektionen, die durch Ciprofloxacin-empfindliche Erreger hervorgerufen)
|
966
|
+
Tj 0 Tw 0 -9.6 Td(werden:) Tj 0 -11.6 Td(Infektionen der Atemwege.) Tj 0 -11.6 Td(Bei den im ambulanten Bereich h�ufigen Pneumokokken-Pneumonien ist Ciprofloxacin Sandoz i.v. nicht das Mittel der ersten)
|
967
|
+
Tj 0 -9.6 Td(Wahl. Ciprofloxacin Sandoz i.v. kann aber bei Pneumonien, verursacht durch z.B. Klebsiella, Enterobacter, Proteus,)
|
968
|
+
Tj 0 -9.6 Td(Pseudomonas, E. coli, Haemophilus, Branhamella, Legionella, Staphylococcus, angezeigt sein.)
|
969
|
+
Tj -0.036 Tw 0 -11.6 Td(Bei akuten, durch P. aeruginosa verursachten Infektionssch�ben bei Kindern und Jugendlichen \(5�17 Jahre\) mit Mukoviszidose.)
|
970
|
+
Tj 0 Tw 0 -9.6 Td(Die Behandlung betr�gt 10�14 Tage.) Tj 0 -11.6 Td(Hals-Nasen-Ohren-Infektionen. Insbesondere wenn sie durch gramnegative Keime einschliesslich Pseudomonas oder durch)
|
971
|
+
Tj 0 -9.6 Td(Staphylococcus verursacht sind.) Tj 0 -11.6 Td(Mund-Zahn-Kiefer-Infektionen.)
|
972
|
+
Tj 0 -11.6 Td(Infektionen der Nieren und/oder der ableitenden Harnwege.) Tj 0 -11.6
|
973
|
+
Td(Infektionen der Geschlechtsorgane, einschliesslich Gonorrh� und Adnexitis.) Tj
|
974
|
+
-0.631 Tw 0 -11.6 Td(Bei einer begleiteten Infektion durch Chlamydien/Mykoplasmen \(nicht- resp. postgonorrhoische Urethritis\) ist Ciprofloxacin Sandoz)
|
975
|
+
Tj 0 Tw 0 -9.6 Td(i.v. nicht das Mittel der 1. Wahl \(siehe �Spezielle Dosieranweisung�\). Eine begleitende Lues wird nicht beeinflusst.)
|
976
|
+
Tj 0 -11.6 Td(Infektionen des Magen-Darm-Traktes.) Tj 0 -11.6 Td(Infektionen der Gallenwege.)
|
977
|
+
Tj 0 -11.6 Td(Wund- und Weichteilinfektionen.) Tj 0 -11.6 Td(Infektionen der Knochen und Gelenke.)
|
978
|
+
Tj 0 -11.6 Td(Infektionen in Gyn�kologie und Geburtshilfe.) Tj 0 -11.6 Td(Sepsis.)
|
979
|
+
Tj 0 -11.6 Td(Infektionen des Bauchfells \(Peritonitis\).) Tj 0 -11.6 Td(Infektionen der Augen.)
|
980
|
+
Tj 0 -11.6 Td(Infektionen oder drohende Infektionsgefahr \(Prophylaxe\) bei Patienten mit geschw�chter k�rpereigener Abwehr \(z.B. unter)
|
981
|
+
Tj 0 -9.6 Td(Behandlung mit Immunsuppressiva bzw. im neutropenischen Zustand\).)
|
982
|
+
Tj 0 -11.6 Td(Anwendung zur selektiven Darmdekontamination bei immunsuppressiv behandelten Patienten \(oral\).)
|
983
|
+
Tj /F3 8 Tf 0 -11.6 Td(Bei Milzbrand:) Tj /F0 8 Tf 50.688 0 Td( Zur Postexpositionsprophylaxe und zur Behandlung des Milzbrandes nach Inhalation des Erregers Bacillus)
|
984
|
+
Tj -50.688 -9.6 Td(anthracis. Die Wirksamkeit von Ciprofloxacin bei Milzbrand wurde tierexperimentell belegt \(siehe Kapitel �Eigenschaften/)
|
985
|
+
Tj -0.67 Tw 0 -9.6 Td(Wirkungen�\). Bei Kindern, Heranwachsenden, Schwangeren und stillenden Frauen sollte nach Feststellung des Resistenzmusters)
|
986
|
+
Tj 0 Tw 0 -9.6 Td(des beteiligten Bacillus anthracis-Stammes die M�glichkeit einer Umstellung der Therapie auf \(Amino-\) penicilline �berpr�ft)
|
987
|
+
Tj 0 -9.6 Td(werden.) Tj /F2 8 Tf 0 -17.6 Td(Dosierung/Anwendung) Tj /F3 8 Tf 0 -9.6
|
988
|
+
Td(�bliche Dosierung) Tj ET
|
989
|
+
1 0 0 1 0 551.4 cm 0.1 w q -1 1.835 m 454.543 1.835 l 452.443 3.935 l 1.1 3.935 l
|
990
|
+
W* n 0 2.885 m 453.543 2.885 l S Q q 454.543 1.835 m 454.543 4.035 l 452.443 1.935
|
991
|
+
l 452.443 3.935 l W* n 453.493 2.835 m 453.493 3.035 l S Q q 454.543 4.035 m -1 4.035
|
992
|
+
l 1.1 1.935 l 452.443 1.935 l W* n 453.543 2.985 m 0 2.985 l S Q q -1 4.035 m -1
|
993
|
+
1.835 l 1.1 3.935 l 1.1 1.935 l W* n 0.05 3.035 m 0.05 2.835 l S Q 1 0 0 1 0 5.869
|
994
|
+
cm
|
995
|
+
BT /F4 8 Tf 1 0 0 -1 0 6.929 Tm( Einzel-/Tagesdosen)
|
996
|
+
Tj ET
|
997
|
+
1 0 0 1 0 9.6 cm
|
998
|
+
BT /F4 8 Tf 1 0 0 -1 0 6.929 Tm( bei Erwachsenen )
|
999
|
+
Tj ET
|
1000
|
+
1 0 0 1 0 9.6 cm q -1 1.835 m 454.543 1.835 l 452.443 3.935 l 1.1 3.935 l W* n 0
|
1001
|
+
2.885 m 453.543 2.885 l S Q q 454.543 1.835 m 454.543 4.035 l 452.443 1.935 l 452.443
|
1002
|
+
3.935 l W* n 453.493 2.835 m 453.493 3.035 l S Q q 454.543 4.035 m -1 4.035 l 1.1
|
1003
|
+
1.935 l 452.443 1.935 l W* n 453.543 2.985 m 0 2.985 l S Q q -1 4.035 m -1 1.835
|
1004
|
+
l 1.1 3.935 l 1.1 1.935 l W* n 0.05 3.035 m 0.05 2.835 l S Q 1 0 0 1 0 5.869 cm
|
1005
|
+
BT /F4 8 Tf 1 0 0 -1 0 6.929 Tm(Einfache Infektionen der 2� 200 mg )
|
1006
|
+
Tj ET
|
1007
|
+
1 0 0 1 0 9.6 cm
|
1008
|
+
BT /F4 8 Tf 1 0 0 -1 0 6.929 Tm(unteren und oberen Harnwege )
|
1009
|
+
Tj ET
|
1010
|
+
1 0 0 1 0 9.6 cm q -1 1.835 m 454.543 1.835 l 452.443 3.935 l 1.1 3.935 l W* n 0
|
1011
|
+
2.885 m 453.543 2.885 l S Q q 454.543 1.835 m 454.543 4.035 l 452.443 1.935 l 452.443
|
1012
|
+
3.935 l W* n 453.493 2.835 m 453.493 3.035 l S Q q 454.543 4.035 m -1 4.035 l 1.1
|
1013
|
+
1.935 l 452.443 1.935 l W* n 453.543 2.985 m 0 2.985 l S Q q -1 4.035 m -1 1.835
|
1014
|
+
l 1.1 3.935 l 1.1 1.935 l W* n 0.05 3.035 m 0.05 2.835 l S Q 1 0 0 1 0 5.869 cm
|
1015
|
+
BT /F4 8 Tf 1 0 0 -1 0 6.929 Tm(Schwere Infektionen der Harnwege 2� 200 mg bis )
|
1016
|
+
Tj ET
|
1017
|
+
1 0 0 1 0 9.6 cm
|
1018
|
+
BT /F4 8 Tf 1 0 0 -1 0 6.929 Tm( 2� 400 mg )
|
1019
|
+
Tj ET
|
1020
|
+
1 0 0 1 0 9.6 cm q -1 1.835 m 454.543 1.835 l 452.443 3.935 l 1.1 3.935 l W* n 0
|
1021
|
+
2.885 m 453.543 2.885 l S Q q 454.543 1.835 m 454.543 4.035 l 452.443 1.935 l 452.443
|
1022
|
+
3.935 l W* n 453.493 2.835 m 453.493 3.035 l S Q q 454.543 4.035 m -1 4.035 l 1.1
|
1023
|
+
1.935 l 452.443 1.935 l W* n 453.543 2.985 m 0 2.985 l S Q q -1 4.035 m -1 1.835
|
1024
|
+
l 1.1 3.935 l 1.1 1.935 l W* n 0.05 3.035 m 0.05 2.835 l S Q 1 0 0 1 0 5.869 cm
|
1025
|
+
BT /F4 8 Tf 1 0 0 -1 0 6.929 Tm(Infektionen der Atemwege 2� 200 mg bis )
|
1026
|
+
Tj ET
|
1027
|
+
1 0 0 1 0 9.6 cm
|
1028
|
+
BT /F4 8 Tf 1 0 0 -1 0 6.929 Tm(\(je nach Schweregrad und Keim\) 2� 400 mg )
|
1029
|
+
Tj ET
|
1030
|
+
1 0 0 1 0 9.6 cm q -1 1.835 m 454.543 1.835 l 452.443 3.935 l 1.1 3.935 l W* n 0
|
1031
|
+
2.885 m 453.543 2.885 l S Q q 454.543 1.835 m 454.543 4.035 l 452.443 1.935 l 452.443
|
1032
|
+
3.935 l W* n 453.493 2.835 m 453.493 3.035 l S Q q 454.543 4.035 m -1 4.035 l 1.1
|
1033
|
+
1.935 l 452.443 1.935 l W* n 453.543 2.985 m 0 2.985 l S Q q -1 4.035 m -1 1.835
|
1034
|
+
l 1.1 3.935 l 1.1 1.935 l W* n 0.05 3.035 m 0.05 2.835 l S Q 1 0 0 1 0 5.869 cm
|
1035
|
+
BT /F4 8 Tf 1 0 0 -1 0 6.929 Tm(Andere Infektionen 2� 400 mg )
|
1036
|
+
Tj ET
|
1037
|
+
1 0 0 1 0 9.6 cm
|
1038
|
+
BT /F4 8 Tf 1 0 0 -1 0 6.929 Tm(\(vergleiche Indikationen\) )
|
1039
|
+
Tj ET
|
1040
|
+
1 0 0 1 0 9.6 cm q -1 1.835 m 454.543 1.835 l 452.443 3.935 l 1.1 3.935 l W* n 0
|
1041
|
+
2.885 m 453.543 2.885 l S Q q 454.543 1.835 m 454.543 4.035 l 452.443 1.935 l 452.443
|
1042
|
+
3.935 l W* n 453.493 2.835 m 453.493 3.035 l S Q q 454.543 4.035 m -1 4.035 l 1.1
|
1043
|
+
1.935 l 452.443 1.935 l W* n 453.543 2.985 m 0 2.985 l S Q q -1 4.035 m -1 1.835
|
1044
|
+
l 1.1 3.935 l 1.1 1.935 l W* n 0.05 3.035 m 0.05 2.835 l S Q 1 0 0 1 0 -676.746 cm
|
1045
|
+
BT /F0 8 Tf 1 0 0 -1 0 690.189 Tm(Bei akuter, unkomplizierter Gonorrh� der Frau und des Mannes \(Urethritis\) und bei einer unkomplizierten Zystitis der Frau reicht)
|
1046
|
+
Tj 0 -9.6 Td(die einmalige Infusion von 200 mg.) Tj ET
|
1047
|
+
1 0 0 1 0 714.331 cm 1 1 1 RG 1 w q -1.5 -1.5 m 455.043 -1.5 l 452.043 1.5 l 1.5
|
1048
|
+
1.5 l W* n -0.5 0 m 454.043 0 l S Q q 455.043 -1.5 m 455.043 32.5 l 452.043 29.5
|
1049
|
+
l 452.043 1.5 l W* n 453.543 -0.5 m 453.543 31.5 l S Q q -1.5 32.5 m -1.5 -1.5 l
|
1050
|
+
1.5 1.5 l 1.5 29.5 l W* n 0 31.5 m 0 -0.5 l S Q 1 0 0 1 0 31 cm q -1.5 -1.5 m 305.043
|
1051
|
+
-1.5 l 302.043 1.5 l 1.5 1.5 l W* n -0.5 0 m 304.043 0 l S Q q 305.043 -1.5 m 305.043
|
1052
|
+
12.1 l 302.043 9.1 l 302.043 1.5 l W* n 303.543 -0.5 m 303.543 11.1 l S Q q 305.043
|
1053
|
+
12.1 m -1.5 12.1 l 1.5 9.1 l 302.043 9.1 l W* n 304.043 10.6 m -0.5 10.6 l S Q q
|
1054
|
+
-1.5 12.1 m -1.5 -1.5 l 1.5 1.5 l 1.5 9.1 l W* n 0 11.1 m 0 -0.5 l S Q 1 0 0 1 304.043
|
1055
|
+
0.5 cm
|
1056
|
+
BT /F0 8 Tf 1 0 0 -1 124.094 7.573 Tm(Seite 1) Tj ET
|
1057
|
+
1 0 0 1 -304.043 -0.5 cm q 302.043 -1.5 m 455.043 -1.5 l 452.043 1.5 l 305.043 1.5
|
1058
|
+
l W* n 303.043 0 m 454.043 0 l S Q q 455.043 -1.5 m 455.043 12.1 l 452.043 9.1 l
|
1059
|
+
452.043 1.5 l W* n 453.543 -0.5 m 453.543 11.1 l S Q q 455.043 12.1 m 302.043 12.1
|
1060
|
+
l 305.043 9.1 l 452.043 9.1 l W* n 454.043 10.6 m 303.043 10.6 l S Q 1 0 0 1 -70.866
|
1061
|
+
-816.197 cm Q
|
1062
|
+
EOS
|
1063
|
+
fontsrc0 = <<-EOS
|
1064
|
+
6 0 obj
|
1065
|
+
<<
|
1066
|
+
/Type /Font
|
1067
|
+
/Name /F0
|
1068
|
+
/Subtype /TrueType
|
1069
|
+
/BaseFont /ArialMT
|
1070
|
+
/FirstChar 32
|
1071
|
+
/LastChar 252
|
1072
|
+
/Widths[ 278 0 0 0 0 889 0 0 333 333 0 0 278 333 278 278 556 556 556 556 556 556
|
1073
|
+
556 556 556 556 278 278 584 584 0 0 0 667 667 722 722 667 611 778 722 278 500 667
|
1074
|
+
556 833 722 778 667 0 722 667 611 722 667 944 667 667 611 278 0 278 0 0 0 556 556
|
1075
|
+
500 556 556 278 556 556 222 222 500 222 833 556 556 556 556 333 500 278 556 500 722
|
1076
|
+
500 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0
|
1077
|
+
0 0 0 0 0 0 0 0 0 0 0 0 737 0 556 0 0 737 0 400 0 333 0 0 576 0 0 0 0 0 556 0 0 0
|
1078
|
+
0 0 0 0 0 667 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 584 0 0 0 0 722 0 0 0 0 0 0 0 556
|
1079
|
+
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 556 ]
|
1080
|
+
/Encoding /WinAnsiEncoding
|
1081
|
+
/FontDescriptor 72 0 R
|
1082
|
+
>>
|
1083
|
+
endobj
|
1084
|
+
EOS
|
1085
|
+
font0 = Rpdf2txt::Font.new(fontsrc0)
|
1086
|
+
fontsrc1 = <<-EOS
|
1087
|
+
7 0 obj
|
1088
|
+
<<
|
1089
|
+
/Type /Font
|
1090
|
+
/Name /F1
|
1091
|
+
/Subtype /TrueType
|
1092
|
+
/BaseFont /Arial-BoldMT
|
1093
|
+
/FirstChar 32
|
1094
|
+
/LastChar 174
|
1095
|
+
/Widths[ 278 0 0 0 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
1096
|
+
0 722 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 667 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 556 611 0
|
1097
|
+
333 0 0 278 0 0 278 0 611 611 611 0 389 0 0 0 556 0 556 0 500 0 0 0 0 0 0 0 0 0 0
|
1098
|
+
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
1099
|
+
737 ]
|
1100
|
+
/Encoding /WinAnsiEncoding
|
1101
|
+
/FontDescriptor 73 0 R
|
1102
|
+
>>
|
1103
|
+
endobj
|
1104
|
+
EOS
|
1105
|
+
font1 = Rpdf2txt::Font.new(fontsrc1)
|
1106
|
+
fontsrc2 = <<-EOS
|
1107
|
+
8 0 obj
|
1108
|
+
<<
|
1109
|
+
/Type /Font
|
1110
|
+
/Name /F2
|
1111
|
+
/Subtype /TrueType
|
1112
|
+
/BaseFont /Arial-BoldItalicMT
|
1113
|
+
/FirstChar 32
|
1114
|
+
/LastChar 252
|
1115
|
+
/Widths[ 278 0 0 0 0 0 0 0 0 0 0 0 0 0 278 278 556 556 556 0 0 0 0 0 0 556 0 0 0
|
1116
|
+
0 0 0 0 722 722 0 722 667 611 778 722 278 0 722 0 833 722 778 667 0 0 667 0 722 667
|
1117
|
+
944 0 0 611 0 0 0 0 0 0 556 611 556 611 556 333 611 611 278 0 556 278 889 611 611
|
1118
|
+
611 0 389 556 333 611 556 778 0 0 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
1119
|
+
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
1120
|
+
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 722 0 0 0 0 0
|
1121
|
+
0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611 0 0 0 0 0 611 ]
|
1122
|
+
/Encoding /WinAnsiEncoding
|
1123
|
+
/FontDescriptor 74 0 R
|
1124
|
+
>>
|
1125
|
+
endobj
|
1126
|
+
EOS
|
1127
|
+
font2 = Rpdf2txt::Font.new(fontsrc2)
|
1128
|
+
fontsrc3 = <<-EOS
|
1129
|
+
9 0 obj
|
1130
|
+
<<
|
1131
|
+
/Type /Font
|
1132
|
+
/Name /F3
|
1133
|
+
/Subtype /TrueType
|
1134
|
+
/BaseFont /Arial-ItalicMT
|
1135
|
+
/FirstChar 32
|
1136
|
+
/LastChar 252
|
1137
|
+
/Widths[ 278 0 0 0 0 0 0 0 333 333 0 0 0 333 0 278 556 556 0 0 556 0 0 0 0 556 278
|
1138
|
+
0 584 584 584 0 0 667 667 722 722 667 0 778 722 0 500 667 556 833 722 0 667 0 722
|
1139
|
+
667 0 722 0 944 0 0 0 0 0 0 0 0 0 556 556 500 556 556 278 556 556 222 0 500 222 833
|
1140
|
+
556 556 556 0 333 500 278 556 500 722 500 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
1141
|
+
0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
1142
|
+
0 0 576 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
1143
|
+
0 722 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556 0 0 0 0 0 556 ]
|
1144
|
+
/Encoding /WinAnsiEncoding
|
1145
|
+
/FontDescriptor 75 0 R
|
1146
|
+
>>
|
1147
|
+
endobj
|
1148
|
+
EOS
|
1149
|
+
font3 = Rpdf2txt::Font.new(fontsrc3)
|
1150
|
+
fontsrc4 = <<-EOS
|
1151
|
+
10 0 obj
|
1152
|
+
<<
|
1153
|
+
/Type /Font
|
1154
|
+
/Name /F4
|
1155
|
+
/Subtype /TrueType
|
1156
|
+
/BaseFont /CourierNewPSMT
|
1157
|
+
/FirstChar 32
|
1158
|
+
/LastChar 252
|
1159
|
+
/Widths[ 600 0 0 0 0 600 0 0 600 600 0 0 600 600 600 600 600 600 600 600 600 600
|
1160
|
+
600 600 600 600 0 0 600 0 600 0 0 600 600 600 600 600 600 600 600 600 600 600 600
|
1161
|
+
600 600 600 600 0 600 600 600 600 600 0 0 600 600 0 0 0 0 0 0 600 600 600 600 600
|
1162
|
+
600 600 600 600 600 600 600 600 600 600 600 0 600 600 600 600 600 600 600 600 600
|
1163
|
+
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 600 0 0 0 0 0 0 0 0 0 0 0 0
|
1164
|
+
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 600 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
1165
|
+
0 0 0 0 0 0 0 0 0 0 0 0 600 0 0 0 0 0 0 0 0 0 0 0 0 600 0 0 0 0 0 0 0 0 0 0 0 0 0
|
1166
|
+
0 0 0 0 0 0 0 0 0 0 600 ]
|
1167
|
+
/Encoding /WinAnsiEncoding
|
1168
|
+
/FontDescriptor 76 0 R
|
1169
|
+
>>
|
1170
|
+
endobj
|
1171
|
+
EOS
|
1172
|
+
font4 = Rpdf2txt::Font.new(fontsrc4)
|
1173
|
+
fonts = {
|
1174
|
+
:f0 => font0,
|
1175
|
+
:f1 => font1,
|
1176
|
+
:f2 => font2,
|
1177
|
+
:f3 => font3,
|
1178
|
+
:f4 => font4,
|
1179
|
+
}
|
1180
|
+
text_state = Rpdf2txt::TextState.new('latin1')
|
1181
|
+
font_donor = FontDonorStub.new(fonts)
|
1182
|
+
text_snippets = stream.extract_text_objects(font_donor, text_state)
|
1183
|
+
expected = <<-EOS
|
1184
|
+
Fachinformation
|
1185
|
+
Ciprofloxacin Sandoz\256 i.v.
|
1186
|
+
SANDOZ
|
1187
|
+
AMZV 9.11.2001
|
1188
|
+
Zusammensetzung
|
1189
|
+
Wirkstoff: 1 Cyclopropyl-6-fluor-1,4-dihydro-4-oxo-7-(1-piperazinyl)-3-chinolincarbons\344ure (Ciprofloxacinum ut Ciprofloxacini
|
1190
|
+
hydrochloridum).
|
1191
|
+
Hilfsstoffe: Acidum lacticum, Natrii chloridum, Aqua ad inject.
|
1192
|
+
Galenische Form und Wirkstoffmenge pro Einheit
|
1193
|
+
L\366sung zur Infusion.
|
1194
|
+
Ciprofloxacinum 200 mg/100 ml ut Ciprofloxacini hydrochloridum.
|
1195
|
+
Ciprofloxacinum 400 mg/200 ml ut Ciprofloxacini hydrochloridum.
|
1196
|
+
Indikationen/Anwendungsm\366glichkeiten
|
1197
|
+
Ciprofloxacin Sandoz i.v. eignet sich zur Behandlung von Infektionen, die durch Ciprofloxacin-empfindliche Erreger hervorgerufen
|
1198
|
+
werden:
|
1199
|
+
Infektionen der Atemwege.
|
1200
|
+
Bei den im ambulanten Bereich h\344ufigen Pneumokokken-Pneumonien ist Ciprofloxacin Sandoz i.v. nicht das Mittel der ersten
|
1201
|
+
Wahl. Ciprofloxacin Sandoz i.v. kann aber bei Pneumonien, verursacht durch z.B. Klebsiella, Enterobacter, Proteus,
|
1202
|
+
Pseudomonas, E. coli, Haemophilus, Branhamella, Legionella, Staphylococcus, angezeigt sein.
|
1203
|
+
Bei akuten, durch P. aeruginosa verursachten Infektionssch\374ben bei Kindern und Jugendlichen (5-17 Jahre) mit Mukoviszidose.
|
1204
|
+
Die Behandlung betr\344gt 10-14 Tage.
|
1205
|
+
Hals-Nasen-Ohren-Infektionen. Insbesondere wenn sie durch gramnegative Keime einschliesslich Pseudomonas oder durch
|
1206
|
+
Staphylococcus verursacht sind.
|
1207
|
+
Mund-Zahn-Kiefer-Infektionen.
|
1208
|
+
Infektionen der Nieren und/oder der ableitenden Harnwege.
|
1209
|
+
Infektionen der Geschlechtsorgane, einschliesslich Gonorrh\366 und Adnexitis.
|
1210
|
+
Bei einer begleiteten Infektion durch Chlamydien/Mykoplasmen (nicht- resp. postgonorrhoische Urethritis) ist Ciprofloxacin Sandoz
|
1211
|
+
i.v. nicht das Mittel der 1. Wahl (siehe \253Spezielle Dosieranweisung\273). Eine begleitende Lues wird nicht beeinflusst.
|
1212
|
+
Infektionen des Magen-Darm-Traktes.
|
1213
|
+
Infektionen der Gallenwege.
|
1214
|
+
Wund- und Weichteilinfektionen.
|
1215
|
+
Infektionen der Knochen und Gelenke.
|
1216
|
+
Infektionen in Gyn\344kologie und Geburtshilfe.
|
1217
|
+
Sepsis.
|
1218
|
+
Infektionen des Bauchfells (Peritonitis).
|
1219
|
+
Infektionen der Augen.
|
1220
|
+
Infektionen oder drohende Infektionsgefahr (Prophylaxe) bei Patienten mit geschw\344chter k\366rpereigener Abwehr (z.B. unter
|
1221
|
+
Behandlung mit Immunsuppressiva bzw. im neutropenischen Zustand).
|
1222
|
+
Anwendung zur selektiven Darmdekontamination bei immunsuppressiv behandelten Patienten (oral).
|
1223
|
+
Bei Milzbrand: Zur Postexpositionsprophylaxe und zur Behandlung des Milzbrandes nach Inhalation des Erregers Bacillus
|
1224
|
+
anthracis. Die Wirksamkeit von Ciprofloxacin bei Milzbrand wurde tierexperimentell belegt (siehe Kapitel \253Eigenschaften/
|
1225
|
+
Wirkungen\273). Bei Kindern, Heranwachsenden, Schwangeren und stillenden Frauen sollte nach Feststellung des Resistenzmusters
|
1226
|
+
des beteiligten Bacillus anthracis-Stammes die M\366glichkeit einer Umstellung der Therapie auf (Amino-) penicilline \374berpr\374ft
|
1227
|
+
werden.
|
1228
|
+
Dosierung/Anwendung
|
1229
|
+
\334bliche Dosierung
|
1230
|
+
Einzel-/Tagesdosen
|
1231
|
+
bei Erwachsenen
|
1232
|
+
Einfache Infektionen der 2\327 200 mg
|
1233
|
+
unteren und oberen Harnwege
|
1234
|
+
Schwere Infektionen der Harnwege 2\327 200 mg bis
|
1235
|
+
2\327 400 mg
|
1236
|
+
Infektionen der Atemwege 2\327 200 mg bis
|
1237
|
+
(je nach Schweregrad und Keim) 2\327 400 mg
|
1238
|
+
Andere Infektionen 2\327 400 mg
|
1239
|
+
(vergleiche Indikationen)
|
1240
|
+
Bei akuter, unkomplizierter Gonorrh\366 der Frau und des Mannes (Urethritis) und bei einer unkomplizierten Zystitis der Frau reicht
|
1241
|
+
die einmalige Infusion von 200 mg.
|
1242
|
+
Seite 1
|
1243
|
+
EOS
|
1244
|
+
leaf = Rpdf2txt::PageLeaf.new
|
1245
|
+
handler = Rpdf2txt::SimpleHandler.new
|
1246
|
+
leaf.join_snippets(text_snippets, handler)
|
1247
|
+
result = handler.out
|
1248
|
+
assert_equal(expected.strip, result.strip)
|
1249
|
+
end
|
1250
|
+
def test_umlaut
|
1251
|
+
input = <<-EOS
|
1252
|
+
BT
|
1253
|
+
/TT9 1 Tf
|
1254
|
+
0.0009 Tc -0.0033 Tw 10.02 0 0 10.02 70.86 279.2013 Tm
|
1255
|
+
(\\334bli)Tj
|
1256
|
+
10.02 0 0 10.02 88.0754 279.2013 Tm
|
1257
|
+
(che Ta)Tj
|
1258
|
+
10.02 0 0 10.02 118.6655 279.2013 Tm
|
1259
|
+
ET
|
1260
|
+
EOS
|
1261
|
+
fontsrc = <<-EOS
|
1262
|
+
17 0 obj
|
1263
|
+
<</BaseFont /CAIIOF+Arial,Italic
|
1264
|
+
/LastChar 252
|
1265
|
+
/Subtype /TrueType
|
1266
|
+
/FontDescriptor 242 0 R
|
1267
|
+
/Widths [278 0 0 0 0 0 0 0 0 0 0 0 0 333 0 0 0 556 556 0 0 0 556 0 0 0 278 0 0 0 0 0 0 667 667 0 722 667 0 0 722 278 500 667 556 0 0 0 667 0 0 667 611 0 0 944 0 0 0 0 0 0 0 0 0 556 556 500 556 556 278 556 556 222 0 500 222 833 556 556 556 0 333 500 278 556 500 722 0 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 722 0 0 0 0 0 0 0 556 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 556]
|
1268
|
+
/Encoding /WinAnsiEncoding
|
1269
|
+
/Type /Font
|
1270
|
+
/FirstChar 32
|
1271
|
+
>>
|
1272
|
+
endobj
|
1273
|
+
EOS
|
1274
|
+
txt = Rpdf2txt::Text.new(input, 'latin1')
|
1275
|
+
font = Rpdf2txt::Font.new(fontsrc, 'latin1')
|
1276
|
+
fonts = {
|
1277
|
+
:tt9 => font
|
1278
|
+
}
|
1279
|
+
txt.current_page = FontDonorStub.new(fonts)
|
1280
|
+
leaf = Rpdf2txt::PageLeaf.new
|
1281
|
+
expected ="�bliche Ta"
|
1282
|
+
handler = Rpdf2txt::SimpleHandler.new
|
1283
|
+
leaf.join_snippets(txt.scan, handler)
|
1284
|
+
result = handler.out
|
1285
|
+
assert_equal(expected, result)
|
1286
|
+
end
|
1287
|
+
def test_trailer_dictionary
|
1288
|
+
input ='
|
1289
|
+
SDSdASDASd
|
1290
|
+
trailer
|
1291
|
+
<<
|
1292
|
+
/Size 476
|
1293
|
+
/Info 388 0 R
|
1294
|
+
/Encrypt 395 0 R
|
1295
|
+
/Root 394 0 R
|
1296
|
+
/Prev 203754
|
1297
|
+
/ID[<8664e6986751f2a49dccc9a4b40a4f18v><e720b2184372f5e3f4edd86673b81dfd>]
|
1298
|
+
>>
|
1299
|
+
startxref
|
1300
|
+
adfadfadf
|
1301
|
+
trailer
|
1302
|
+
<<
|
1303
|
+
/Size 500
|
1304
|
+
/ID[<8664e6986751f2a49dccc9a4b40a4f18v><e720b2184372f5e3f4edd86673b81dfd>]
|
1305
|
+
>>
|
1306
|
+
startxref'
|
1307
|
+
@parser.src = input
|
1308
|
+
@parser.object_catalogue
|
1309
|
+
@parser.build_trailer_dictionary
|
1310
|
+
assert_equal("500", @parser.trailer_dictionary.attributes[:size])
|
1311
|
+
assert_equal("388 0 R", @parser.trailer_dictionary.attributes[:info])
|
1312
|
+
assert_equal(395, @parser.trailer_dictionary.encrypt_id)
|
1313
|
+
end
|
1314
|
+
def test_encrypt
|
1315
|
+
input ='
|
1316
|
+
SDSdASDASd
|
1317
|
+
trailer
|
1318
|
+
<<
|
1319
|
+
/Size 476
|
1320
|
+
/Info 388 0 R
|
1321
|
+
/Encrypt 395 0 R
|
1322
|
+
/Root 394 0 R
|
1323
|
+
/Prev 203754
|
1324
|
+
/ID[<8664e6986751f2a49dccc9a4b40a4f18v><e720b2184372f5e3f4edd86673b81dfd>]
|
1325
|
+
>>
|
1326
|
+
startxref
|
1327
|
+
adfadfadf
|
1328
|
+
trailer
|
1329
|
+
<<
|
1330
|
+
/Size 500
|
1331
|
+
/ID[<8664e6986751f2a49dccc9a4b40a4f18v><e720b2184372f5e3f4edd86673b81dfd>]
|
1332
|
+
>>
|
1333
|
+
startxref'
|
1334
|
+
@parser.src = input
|
1335
|
+
@parser.object_catalogue
|
1336
|
+
@parser.trailer_dictionary
|
1337
|
+
assert_equal(395, @parser.encrypt_id)
|
1338
|
+
assert_equal("500", @parser.trailer_dictionary.attributes[:size])
|
1339
|
+
end
|
1340
|
+
end
|