rpdf2txt 0.8.2 → 0.8.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/History.txt +4 -0
- data/Manifest.txt +0 -4
- data/README.txt +16 -3
- data/bin/rpdf2txt +4 -1
- data/lib/rpdf2txt/data/cmap.rb +10 -9
- data/lib/rpdf2txt/data/cmap_range.rb +13 -12
- data/lib/rpdf2txt/data/pdfattributes.rb +14 -13
- data/lib/rpdf2txt/data/pdftext.rb +19 -18
- data/lib/rpdf2txt/object.rb +68 -13
- data/lib/rpdf2txt/parser.rb +6 -2
- data/lib/rpdf2txt/text.rb +1 -2
- data/lib/rpdf2txt/text_state.rb +10 -2
- data/lib/rpdf2txt-rockit/rockit.rb +1 -1
- data/lib/rpdf2txt-rockit/rockit_grammars_parser.rb +1 -0
- data/lib/rpdf2txt-rockit/token.rb +1 -0
- data/test/mock.rb +19 -11
- data/test/test_object.rb +33 -0
- data/test/test_pdf_object.rb +25 -24
- data/test/test_pdf_parser.rb +8 -5
- data/test/test_pdf_text.rb +11 -10
- data/test/test_space_bug_05_2004.rb +2 -1
- data/test/test_stream.rb +6 -5
- data/test/test_text_state.rb +220 -219
- metadata +13 -14
- data/config.save +0 -12
- data/lib/rpdf2txt/data/_cmap.grammar +0 -11
- data/lib/rpdf2txt/data/_cmap_range.grammar +0 -15
- data/lib/rpdf2txt/data/_pdfattributes.grammar +0 -32
data/test/mock.rb
CHANGED
@@ -16,7 +16,8 @@
|
|
16
16
|
# along with this program; if not, write to the Free Software
|
17
17
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
18
18
|
|
19
|
-
require 'runit/error'
|
19
|
+
#require 'runit/error'
|
20
|
+
require 'test/unit'
|
20
21
|
|
21
22
|
|
22
23
|
class Mock
|
@@ -58,7 +59,8 @@ class Mock
|
|
58
59
|
#
|
59
60
|
def __verify
|
60
61
|
if @next_call != @mock_calls.length
|
61
|
-
raise RUNIT::AssertionFailedError,
|
62
|
+
#raise RUNIT::AssertionFailedError,
|
63
|
+
raise Test::Unit::AssertionFailedError,
|
62
64
|
"not all expected method calls were made to #{@name}",
|
63
65
|
caller
|
64
66
|
end
|
@@ -78,33 +80,37 @@ private
|
|
78
80
|
#
|
79
81
|
def __mock_call( name, args, block )
|
80
82
|
if @next_call >= @mock_calls.length
|
81
|
-
raise RUNIT::AssertionFailedError,
|
83
|
+
#raise RUNIT::AssertionFailedError,
|
84
|
+
raise Test::Unit::AssertionFailedError,
|
82
85
|
"unexpected call to #{name} method of #{@name}",
|
83
86
|
caller(2)
|
84
87
|
end
|
85
|
-
|
86
88
|
expected_name,body = @mock_calls[@next_call]
|
87
89
|
@next_call += 1
|
88
90
|
|
89
91
|
if name != expected_name
|
90
|
-
raise RUNIT::AssertionFailedError,
|
92
|
+
#raise RUNIT::AssertionFailedError,
|
93
|
+
raise Test::Unit::AssertionFailedError,
|
91
94
|
"wrong method called on #{@name}; " +
|
92
95
|
"expected #{expected_name}, was #{name}",
|
93
96
|
caller(2)
|
94
97
|
end
|
95
|
-
|
98
|
+
|
96
99
|
args_length = args.length + (block ? 1 : 0)
|
97
|
-
|
100
|
+
|
98
101
|
if body.arity < 0
|
99
102
|
if (body.arity+1).abs > args_length
|
100
|
-
raise RUNIT::AssertionFailedError,
|
103
|
+
#raise RUNIT::AssertionFailedError,
|
104
|
+
raise Test::Unit::AssertionFailedError,
|
101
105
|
"too few arguments to #{name} method of #{@name}; " +
|
102
106
|
"require #{(body.arity+1).abs}, got #{args.length}",
|
103
107
|
caller(2)
|
104
108
|
end
|
105
109
|
else
|
110
|
+
|
106
111
|
if body.arity != args_length
|
107
|
-
raise RUNIT::AssertionFailedError,
|
112
|
+
#raise RUNIT::AssertionFailedError,
|
113
|
+
raise Test::Unit::AssertionFailedError,
|
108
114
|
"wrong number of arguments to " +
|
109
115
|
"#{name} method of #{@name}; " +
|
110
116
|
"require #{body.arity}, got #{args.length}",
|
@@ -120,7 +126,8 @@ private
|
|
120
126
|
end
|
121
127
|
|
122
128
|
if not precondition_ok
|
123
|
-
raise RUNIT::AssertionFailedError,
|
129
|
+
#raise RUNIT::AssertionFailedError,
|
130
|
+
raise Test::Unit::AssertionFailedError,
|
124
131
|
"precondition of #{name} method violated",
|
125
132
|
caller(2)
|
126
133
|
end
|
@@ -135,7 +142,8 @@ private
|
|
135
142
|
|
136
143
|
# The name of a precondition for a method
|
137
144
|
def Mock.__pre( method )
|
138
|
-
"__pre_#{method.to_i}".intern
|
145
|
+
#"__pre_#{method.to_i}".intern
|
146
|
+
"__pre_#{method}".intern
|
139
147
|
end
|
140
148
|
|
141
149
|
|
data/test/test_object.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
# TestObject -- rpdf2txt -- 26.05.2011 -- mhatakeyama@ywesee.com
|
4
|
+
|
5
|
+
$: << File.expand_path('../lib', File.dirname(__FILE__))
|
6
|
+
|
7
|
+
require 'test/unit'
|
8
|
+
require 'flexmock'
|
9
|
+
require 'rpdf2txt/object'
|
10
|
+
|
11
|
+
module Rpdf2txt
|
12
|
+
class TestPageLeaf < Test::Unit::TestCase
|
13
|
+
include FlexMock::TestCase
|
14
|
+
def test_merge_snippets
|
15
|
+
pageleaf = Rpdf2txt::PageLeaf.new
|
16
|
+
snippet1 = flexmock('snippet1',
|
17
|
+
:txt => 'txt1',
|
18
|
+
:txt= => nil
|
19
|
+
)
|
20
|
+
snippet2 = flexmock('snippet2',
|
21
|
+
:txt => 'txt2',
|
22
|
+
:txt= => nil
|
23
|
+
)
|
24
|
+
|
25
|
+
text_snippets = [snippet1, snippet2, snippet2]
|
26
|
+
result = pageleaf.merge_snippets(text_snippets)
|
27
|
+
assert_equal(2, result.length)
|
28
|
+
assert_kind_of(snippet1.class, result[0])
|
29
|
+
assert_kind_of(snippet2.class, result[1])
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
data/test/test_pdf_object.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# encoding: ascii-8bit
|
2
3
|
#
|
3
4
|
# Rpdf2txt -- PDF to Text Parser
|
4
5
|
# Copyright (C) 2003 Andreas Schrafl, Hannes Wyss, Masaomi Hatakeyama
|
@@ -531,13 +532,13 @@ ET
|
|
531
532
|
class TestEncrypt < Test::Unit::TestCase
|
532
533
|
def setup
|
533
534
|
file = File.expand_path('./data/encrypt_string', File.dirname(__FILE__))
|
534
|
-
src_encrypt_obj =
|
535
|
+
src_encrypt_obj = open(file, 'rb'){|file| file.read}
|
535
536
|
@encrypt = Rpdf2txt::PdfEncrypt.new(src_encrypt_obj)
|
536
537
|
@encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18'
|
537
538
|
end
|
538
539
|
def test_decrypt
|
539
540
|
file = File.expand_path('./data/working_obj', File.dirname(__FILE__))
|
540
|
-
input =
|
541
|
+
input = open(file, 'rb'){|file| file.read}
|
541
542
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
542
543
|
assert_equal("dc08b36009e48618f99c", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
543
544
|
#if the stream could be inflated, the decryption is ok!
|
@@ -548,7 +549,7 @@ ET
|
|
548
549
|
end
|
549
550
|
def test_decrypt2
|
550
551
|
file = File.expand_path('./data/90_obj', File.dirname(__FILE__))
|
551
|
-
input =
|
552
|
+
input = open(file, 'rb'){|file| file.read}
|
552
553
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
553
554
|
assert_equal("7617ca1ac5babcf09cdf", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
554
555
|
#if the stream could be inflated, the decryption is ok!
|
@@ -559,7 +560,7 @@ ET
|
|
559
560
|
end
|
560
561
|
def test_decrypt3
|
561
562
|
file = File.expand_path('./data/working_obj2', File.dirname(__FILE__))
|
562
|
-
input =
|
563
|
+
input = open(file, 'rb'){|file| file.read}
|
563
564
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
564
565
|
assert_equal("a9a666959bd64a96551b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
565
566
|
#if the stream could be inflated, the decryption is ok!
|
@@ -570,7 +571,7 @@ ET
|
|
570
571
|
end
|
571
572
|
def test_decrypt5
|
572
573
|
file = File.expand_path('./data/458_obj', File.dirname(__FILE__))
|
573
|
-
input =
|
574
|
+
input = open(file, 'rb'){|file| file.read}
|
574
575
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
575
576
|
#assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
576
577
|
#if the stream could be inflated, the decryption is ok!
|
@@ -581,7 +582,7 @@ ET
|
|
581
582
|
end
|
582
583
|
def test_decrypt6
|
583
584
|
file = File.expand_path('./data/450_obj', File.dirname(__FILE__))
|
584
|
-
input =
|
585
|
+
input = open(file, 'rb'){|file| file.read}
|
585
586
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
586
587
|
#assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
587
588
|
#if the stream could be inflated, the decryption is ok!
|
@@ -592,7 +593,7 @@ ET
|
|
592
593
|
end
|
593
594
|
def test_decrypt7
|
594
595
|
file = File.expand_path('./data/465_obj', File.dirname(__FILE__))
|
595
|
-
input =
|
596
|
+
input = open(file, 'rb'){|file| file.read}
|
596
597
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
597
598
|
#assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
598
599
|
#if the stream could be inflated, the decryption is ok!
|
@@ -603,7 +604,7 @@ ET
|
|
603
604
|
end
|
604
605
|
def test_decrypt_key
|
605
606
|
file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
|
606
|
-
src =
|
607
|
+
src = open(file, 'rb'){|file| file.read}
|
607
608
|
#byte position important! do not indent these lines!!!
|
608
609
|
obj_src = <<-EOS
|
609
610
|
473 0 obj
|
@@ -618,7 +619,7 @@ endobj
|
|
618
619
|
end
|
619
620
|
def test_inflate_obj
|
620
621
|
file = File.expand_path('./data/90_obj_comp', File.dirname(__FILE__))
|
621
|
-
input =
|
622
|
+
input = open(file, 'rb'){|file| file.read}
|
622
623
|
input = [input].pack('H*')
|
623
624
|
# puts input
|
624
625
|
assert_nothing_raised{
|
@@ -628,7 +629,7 @@ endobj
|
|
628
629
|
end
|
629
630
|
def test_parse_encrypt
|
630
631
|
file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
|
631
|
-
src =
|
632
|
+
src = open(file, 'rb'){|file| file.read}
|
632
633
|
encrypt = Rpdf2txt::PdfEncrypt.new(src)
|
633
634
|
encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18'
|
634
635
|
assert_equal("00ecc7a7bf8d68c564a21b98258b1dbff2aaf8d24bfdbaa74a9a073467d896b6", encrypt.user_key.unpack("H*").first)
|
@@ -639,7 +640,7 @@ endobj
|
|
639
640
|
end
|
640
641
|
def test_endianess
|
641
642
|
file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
|
642
|
-
src =
|
643
|
+
src = open(file, 'rb'){|file| file.read}
|
643
644
|
encrypt = Rpdf2txt::PdfEncrypt.new(src)
|
644
645
|
encrypt.big_endian?
|
645
646
|
end
|
@@ -648,13 +649,13 @@ endobj
|
|
648
649
|
def setup
|
649
650
|
file = File.expand_path('./data/encrypt_string_128bit',
|
650
651
|
File.dirname(__FILE__))
|
651
|
-
src_encrypt_obj =
|
652
|
+
src_encrypt_obj = open(file, 'rb'){|file| file.read}
|
652
653
|
@encrypt = Rpdf2txt::PdfEncrypt.new(src_encrypt_obj)
|
653
654
|
@encrypt.file_id = 'D816A5E838D50653C19DB62504229EB6'
|
654
655
|
end
|
655
656
|
def test_decrypt8
|
656
657
|
file = File.expand_path('./data/3392_obj', File.dirname(__FILE__))
|
657
|
-
input =
|
658
|
+
input = open(file, 'rb'){|file| file.read}
|
658
659
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
659
660
|
#if the stream could be inflated, the decryption is ok!
|
660
661
|
assert_nothing_raised{
|
@@ -1231,7 +1232,7 @@ perm\351abilit\351 vasculaire et une inflammation.
|
|
1231
1232
|
Swissmedic Journal 03/2006 226
|
1232
1233
|
EOS
|
1233
1234
|
result = handler.out.strip
|
1234
|
-
=begin
|
1235
|
+
=begin keep
|
1235
1236
|
[expected.size, result.size].max.times do |idx|
|
1236
1237
|
unless result[idx] == expected[idx]
|
1237
1238
|
flunk "unexpected result: (#{result[idx]}/#{expected[idx]} at #{idx}) ...#{expected[idx-10,20].inspect}..."
|
@@ -1653,7 +1654,7 @@ Seite 1 von 1083
|
|
1653
1654
|
expected = "HEUMANN PH GMBH&CO. KG 20 St"
|
1654
1655
|
assert_equal(expected.strip, handler.out.strip)
|
1655
1656
|
end
|
1656
|
-
=begin
|
1657
|
+
=begin keep
|
1657
1658
|
def test_text_space_bug2
|
1658
1659
|
stream = Stream.new
|
1659
1660
|
path = File.expand_path('data/space_bug_stream2.txt',
|
@@ -1705,7 +1706,7 @@ endobj
|
|
1705
1706
|
class TestImage < Test::Unit::TestCase
|
1706
1707
|
def test_png
|
1707
1708
|
path = File.expand_path('data/png.pdfobj', File.dirname(__FILE__))
|
1708
|
-
src =
|
1709
|
+
src = open(path, 'rb'){|file| file.read}
|
1709
1710
|
obj = Image.new(src)
|
1710
1711
|
assert_nothing_raised { obj.image }
|
1711
1712
|
path = File.expand_path('data/logo.png', File.dirname(__FILE__))
|
@@ -1714,10 +1715,10 @@ endobj
|
|
1714
1715
|
end
|
1715
1716
|
def test_indexed
|
1716
1717
|
path = File.expand_path('data/index.pdfobj', File.dirname(__FILE__))
|
1717
|
-
src =
|
1718
|
+
src = open(path, 'rb'){|file| file.read}
|
1718
1719
|
index = Stream.new(src)
|
1719
1720
|
path = File.expand_path('data/indexed.pdfobj', File.dirname(__FILE__))
|
1720
|
-
src =
|
1721
|
+
src = open(path, 'rb'){|file| file.read}
|
1721
1722
|
obj = Image.new(src)
|
1722
1723
|
obj.build_tree(51 => index)
|
1723
1724
|
assert_nothing_raised { obj.image }
|
@@ -1727,10 +1728,10 @@ endobj
|
|
1727
1728
|
end
|
1728
1729
|
def test_indexed_2bit
|
1729
1730
|
path = File.expand_path('data/index_2bit.pdfobj', File.dirname(__FILE__))
|
1730
|
-
src =
|
1731
|
+
src = open(path, 'rb'){|file| file.read}
|
1731
1732
|
index = Stream.new(src)
|
1732
1733
|
path = File.expand_path('data/indexed_2bit.pdfobj', File.dirname(__FILE__))
|
1733
|
-
src =
|
1734
|
+
src = open(path, 'rb'){|file| file.read}
|
1734
1735
|
obj = Image.new(src)
|
1735
1736
|
obj.build_tree(21 => index)
|
1736
1737
|
assert_nothing_raised { obj.image }
|
@@ -1740,10 +1741,10 @@ endobj
|
|
1740
1741
|
end
|
1741
1742
|
def test_indexed_masked
|
1742
1743
|
path = File.expand_path('data/index_masked.pdfobj', File.dirname(__FILE__))
|
1743
|
-
src =
|
1744
|
+
src = open(path, 'rb'){|file| file.read}
|
1744
1745
|
index = Stream.new(src)
|
1745
1746
|
path = File.expand_path('data/indexed_masked.pdfobj', File.dirname(__FILE__))
|
1746
|
-
src =
|
1747
|
+
src = open(path, 'rb'){|file| file.read}
|
1747
1748
|
obj = Image.new(src)
|
1748
1749
|
obj.build_tree(21 => index)
|
1749
1750
|
assert_nothing_raised { obj.image }
|
@@ -1759,10 +1760,10 @@ endobj
|
|
1759
1760
|
end
|
1760
1761
|
def test_lzw_image
|
1761
1762
|
path = File.expand_path('data/lzw_index.pdfobj', File.dirname(__FILE__))
|
1762
|
-
src =
|
1763
|
+
src = open(path, 'rb'){|file| file.read}
|
1763
1764
|
index = Stream.new(src)
|
1764
1765
|
path = File.expand_path('data/lzw.pdfobj', File.dirname(__FILE__))
|
1765
|
-
src =
|
1766
|
+
src = open(path, 'rb'){|file| file.read}
|
1766
1767
|
obj = Image.new(src)
|
1767
1768
|
obj.build_tree(21 => index)
|
1768
1769
|
assert_nothing_raised { obj.image }
|
data/test/test_pdf_parser.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
#
|
2
|
+
# encoding: ascii-8bit
|
3
3
|
# Rpdf2txt -- PDF to Text Parser
|
4
4
|
# Copyright (C) 2003 Andreas Schrafl, Hannes Wyss, Masaomi Hatakeyama
|
5
5
|
#
|
@@ -75,7 +75,7 @@ class TestParser < Test::Unit::TestCase
|
|
75
75
|
end
|
76
76
|
def setup
|
77
77
|
file = File.expand_path('./data/page_tree.pdf', File.dirname(__FILE__))
|
78
|
-
input =
|
78
|
+
input = open(file, 'rb'){|file| file.read}
|
79
79
|
@parser = Rpdf2txt::Parser.new(input)
|
80
80
|
end
|
81
81
|
def test_object_catalogue
|
@@ -101,7 +101,7 @@ class TestParser < Test::Unit::TestCase
|
|
101
101
|
end
|
102
102
|
def test_rebuild_object_catalogue
|
103
103
|
file = File.expand_path('./data/encrypted_object_stream.pdf', File.dirname(__FILE__))
|
104
|
-
input =
|
104
|
+
input = open(file, 'rb'){|file| file.read}
|
105
105
|
parser = Rpdf2txt::Parser.new(input)
|
106
106
|
cat = parser.object_catalogue
|
107
107
|
assert_equal(3, cat.length)
|
@@ -322,7 +322,7 @@ endobj
|
|
322
322
|
leaf = Rpdf2txt::PageLeaf.new
|
323
323
|
expected = <<-EOS
|
324
324
|
Paroxetin besitzt eine selektive Wirkung; in-vitro Studien haben gezeigt, dass es, im Gegensatz zu
|
325
|
-
trizyklischen Antidepressiva, eine geringe Affinit\344t f\374r
|
325
|
+
trizyklischen Antidepressiva, eine geringe Affinit\344t f\374r a1-, a2- und b-Adrenozeptoren sowie f\374r
|
326
326
|
Dopamin (D2)-, 5-HT1-artige, 5-HT2 und Histamin (H1)-Rezeptoren aufweist. Das Fehlen einer
|
327
327
|
EOS
|
328
328
|
handler = Rpdf2txt::SimpleHandler.new
|
@@ -464,7 +464,9 @@ endobj
|
|
464
464
|
36 => 8805,
|
465
465
|
}
|
466
466
|
font2.cmap = cmap
|
467
|
-
|
467
|
+
#require 'pp'
|
468
|
+
#print "font2="
|
469
|
+
#pp font2
|
468
470
|
fonts = {
|
469
471
|
:tt0 => font0,
|
470
472
|
:tt2 => font2,
|
@@ -472,6 +474,7 @@ endobj
|
|
472
474
|
txt.current_page = FontDonorStub.new(fonts)
|
473
475
|
leaf = Rpdf2txt::PageLeaf.new
|
474
476
|
expected = "In Studie 1 evaluierte man 271 Patienten mit einer m\344ssigen bis schweren aktiven rheumatoiden \nArthritis, die \26318 Jahre alt waren, bei denen die Therapie mit mindestens einem, aber mit nicht mehr \n"
|
477
|
+
#expected = "In Studie 1 evaluierte man 271 Patienten mit einer m\344ssigen bis schweren aktiven rheumatoiden \nArthritis, die 18 Jahre alt waren, bei denen die Therapie mit mindestens einem, aber mit nicht mehr \n"
|
475
478
|
handler = Rpdf2txt::SimpleHandler.new
|
476
479
|
leaf.join_snippets(txt.scan, handler)
|
477
480
|
result = handler.out
|
data/test/test_pdf_text.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# encoding: ascii-8bit
|
2
3
|
#
|
3
4
|
# Rpdf2txt -- PDF to Text Parser
|
4
5
|
# Copyright (C) 2003 Andreas Schrafl, Hannes Wyss
|
@@ -36,7 +37,7 @@ module Rpdf2txt
|
|
36
37
|
class TestText < Test::Unit::TestCase
|
37
38
|
def setup
|
38
39
|
path = File.expand_path("./data/test_text.txt", File.dirname(__FILE__))
|
39
|
-
src =
|
40
|
+
src = open(path, 'rb'){|file| file.read}
|
40
41
|
#@handler = Rpdf2txt::HTMLHandler.new
|
41
42
|
@text=Rpdf2txt::Text.new(src)
|
42
43
|
end
|
@@ -78,7 +79,7 @@ ET
|
|
78
79
|
ast = Rpdf2txt.text_parser.parse(@text.src)
|
79
80
|
assert_equal("-0.0002", ast.values.first.charspace.value)
|
80
81
|
text_state = Mock.new("text_state")
|
81
|
-
text_state.__next(:transformation_matrix=) {}
|
82
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
82
83
|
@text.text_state = text_state
|
83
84
|
text_state.__next(:set_char_spacing){|value|
|
84
85
|
assert_equal("-0.0002", value)
|
@@ -94,7 +95,7 @@ BT
|
|
94
95
|
ET
|
95
96
|
EOS
|
96
97
|
text_state = Mock.new("text_state")
|
97
|
-
text_state.__next(:transformation_matrix=) {}
|
98
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
98
99
|
@text.text_state = text_state
|
99
100
|
text_state.__next(:update_x){ |x|
|
100
101
|
assert_equal(-36.7896, x)
|
@@ -116,10 +117,10 @@ BT
|
|
116
117
|
ET
|
117
118
|
EOS
|
118
119
|
text_state = Mock.new("text_state")
|
119
|
-
text_state.__next(:transformation_matrix=) {}
|
120
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
120
121
|
@text.text_state = text_state
|
121
|
-
text_state.__next(:update_x){}
|
122
|
-
text_state.__next(:update_y){}
|
122
|
+
text_state.__next(:update_x){|*x|}
|
123
|
+
text_state.__next(:update_y){|*x|}
|
123
124
|
@text.scan
|
124
125
|
text_state.__verify
|
125
126
|
end
|
@@ -132,7 +133,7 @@ BT
|
|
132
133
|
ET
|
133
134
|
EOS
|
134
135
|
text_state = Mock.new("text_state")
|
135
|
-
text_state.__next(:transformation_matrix=) {}
|
136
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
136
137
|
@text.text_state = text_state
|
137
138
|
current_page.__next(:font){ |font|
|
138
139
|
assert_equal(:f16, font)
|
@@ -151,7 +152,7 @@ BT
|
|
151
152
|
ET
|
152
153
|
EOS
|
153
154
|
text_state = Mock.new("text_state")
|
154
|
-
text_state.__next(:transformation_matrix=) {}
|
155
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
155
156
|
@text.text_state = text_state
|
156
157
|
current_page.__next(:font){ |font|
|
157
158
|
assert_equal(:c2_0, font)
|
@@ -169,7 +170,7 @@ BT
|
|
169
170
|
ET
|
170
171
|
EOS
|
171
172
|
text_state = Mock.new("text_state")
|
172
|
-
text_state.__next(:transformation_matrix=) {}
|
173
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
173
174
|
@text.text_state = text_state
|
174
175
|
text_state.__next(:set_xscale){|x|
|
175
176
|
assert_equal("10", x)
|
@@ -199,7 +200,7 @@ BT
|
|
199
200
|
ET
|
200
201
|
EOS
|
201
202
|
text_state = Mock.new("text_state")
|
202
|
-
text_state.__next(:transformation_matrix=) {}
|
203
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
203
204
|
@text.text_state = text_state
|
204
205
|
text_state.__next(:set_word_spacing){ |wordspace|
|
205
206
|
assert_equal('0.0000', wordspace)
|
@@ -70,7 +70,8 @@ endobj
|
|
70
70
|
font30 = Font.new(font30_src) # WinAnsi Encoded
|
71
71
|
path = File.expand_path('data/space_bug_stream.txt',
|
72
72
|
File.dirname(__FILE__))
|
73
|
-
|
73
|
+
stream = open(path, 'rb'){|file| file.read}
|
74
|
+
stream = Stream.new(stream)
|
74
75
|
page = FontDonor.new
|
75
76
|
page.fonts = {
|
76
77
|
:f3 => font3,
|
data/test/test_stream.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# encoding: ascii-8bit
|
2
3
|
# TestStream -- rpdf2txt -- 01.06.2005 -- hwyss@ywesee.com
|
3
4
|
|
4
5
|
$: << File.expand_path('../lib', File.dirname(__FILE__))
|
@@ -32,13 +33,13 @@ BT /F1 16 Tf 1 0 0 -1 0 14.347 Tm(Ponstan
|
|
32
33
|
def test_decode_raw_stream
|
33
34
|
file = File.expand_path('./data/firststream',
|
34
35
|
File.dirname(__FILE__))
|
35
|
-
deflated =
|
36
|
+
deflated = open(file, 'rb'){|file| file.read}
|
36
37
|
src = "stream\n#{deflated}endstream"
|
37
38
|
stream = Rpdf2txt::Stream.new(src)
|
38
39
|
stream.attributes.store(:filter, '/FlateDecode')
|
39
40
|
file = File.expand_path('./data/test.txt',
|
40
41
|
File.dirname(__FILE__))
|
41
|
-
expected =
|
42
|
+
expected = open(file, 'rb'){|file| file.read}
|
42
43
|
assert_equal(expected, stream.decode_raw_stream)
|
43
44
|
end
|
44
45
|
def test_raw_stream
|
@@ -51,15 +52,15 @@ BT /F1 16 Tf 1 0 0 -1 0 14.347 Tm(Ponstan
|
|
51
52
|
end
|
52
53
|
def test_decoded_stream2
|
53
54
|
file = File.expand_path('./data/firststream', File.dirname(__FILE__))
|
54
|
-
@stream.raw_stream =
|
55
|
+
@stream.raw_stream = open(file, 'rb'){|file| file.read}
|
55
56
|
@stream.attributes[:filter] = "/FlateDecode"
|
56
57
|
file = File.expand_path('./data/test.txt', File.dirname(__FILE__))
|
57
|
-
expected =
|
58
|
+
expected = open(file, 'rb'){|file| file.read}
|
58
59
|
assert_equal(expected, @stream.decoded_stream)
|
59
60
|
end
|
60
61
|
def test_extract_text_objects
|
61
62
|
file = File.expand_path('./data/stream.txt', File.dirname(__FILE__))
|
62
|
-
@stream.decoded_stream =
|
63
|
+
@stream.decoded_stream = open(file, 'rb'){|file| file.read}
|
63
64
|
result = @stream.extract_text_objects(nil, TextState.new).select { |res|
|
64
65
|
res.is_a?(TextState)
|
65
66
|
}
|