rpdf2txt 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/History.txt +4 -0
- data/Manifest.txt +0 -4
- data/README.txt +16 -3
- data/bin/rpdf2txt +4 -1
- data/lib/rpdf2txt/data/cmap.rb +10 -9
- data/lib/rpdf2txt/data/cmap_range.rb +13 -12
- data/lib/rpdf2txt/data/pdfattributes.rb +14 -13
- data/lib/rpdf2txt/data/pdftext.rb +19 -18
- data/lib/rpdf2txt/object.rb +68 -13
- data/lib/rpdf2txt/parser.rb +6 -2
- data/lib/rpdf2txt/text.rb +1 -2
- data/lib/rpdf2txt/text_state.rb +10 -2
- data/lib/rpdf2txt-rockit/rockit.rb +1 -1
- data/lib/rpdf2txt-rockit/rockit_grammars_parser.rb +1 -0
- data/lib/rpdf2txt-rockit/token.rb +1 -0
- data/test/mock.rb +19 -11
- data/test/test_object.rb +33 -0
- data/test/test_pdf_object.rb +25 -24
- data/test/test_pdf_parser.rb +8 -5
- data/test/test_pdf_text.rb +11 -10
- data/test/test_space_bug_05_2004.rb +2 -1
- data/test/test_stream.rb +6 -5
- data/test/test_text_state.rb +220 -219
- metadata +13 -14
- data/config.save +0 -12
- data/lib/rpdf2txt/data/_cmap.grammar +0 -11
- data/lib/rpdf2txt/data/_cmap_range.grammar +0 -15
- data/lib/rpdf2txt/data/_pdfattributes.grammar +0 -32
data/test/mock.rb
CHANGED
@@ -16,7 +16,8 @@
|
|
16
16
|
# along with this program; if not, write to the Free Software
|
17
17
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
18
18
|
|
19
|
-
require 'runit/error'
|
19
|
+
#require 'runit/error'
|
20
|
+
require 'test/unit'
|
20
21
|
|
21
22
|
|
22
23
|
class Mock
|
@@ -58,7 +59,8 @@ class Mock
|
|
58
59
|
#
|
59
60
|
def __verify
|
60
61
|
if @next_call != @mock_calls.length
|
61
|
-
raise RUNIT::AssertionFailedError,
|
62
|
+
#raise RUNIT::AssertionFailedError,
|
63
|
+
raise Test::Unit::AssertionFailedError,
|
62
64
|
"not all expected method calls were made to #{@name}",
|
63
65
|
caller
|
64
66
|
end
|
@@ -78,33 +80,37 @@ private
|
|
78
80
|
#
|
79
81
|
def __mock_call( name, args, block )
|
80
82
|
if @next_call >= @mock_calls.length
|
81
|
-
raise RUNIT::AssertionFailedError,
|
83
|
+
#raise RUNIT::AssertionFailedError,
|
84
|
+
raise Test::Unit::AssertionFailedError,
|
82
85
|
"unexpected call to #{name} method of #{@name}",
|
83
86
|
caller(2)
|
84
87
|
end
|
85
|
-
|
86
88
|
expected_name,body = @mock_calls[@next_call]
|
87
89
|
@next_call += 1
|
88
90
|
|
89
91
|
if name != expected_name
|
90
|
-
raise RUNIT::AssertionFailedError,
|
92
|
+
#raise RUNIT::AssertionFailedError,
|
93
|
+
raise Test::Unit::AssertionFailedError,
|
91
94
|
"wrong method called on #{@name}; " +
|
92
95
|
"expected #{expected_name}, was #{name}",
|
93
96
|
caller(2)
|
94
97
|
end
|
95
|
-
|
98
|
+
|
96
99
|
args_length = args.length + (block ? 1 : 0)
|
97
|
-
|
100
|
+
|
98
101
|
if body.arity < 0
|
99
102
|
if (body.arity+1).abs > args_length
|
100
|
-
raise RUNIT::AssertionFailedError,
|
103
|
+
#raise RUNIT::AssertionFailedError,
|
104
|
+
raise Test::Unit::AssertionFailedError,
|
101
105
|
"too few arguments to #{name} method of #{@name}; " +
|
102
106
|
"require #{(body.arity+1).abs}, got #{args.length}",
|
103
107
|
caller(2)
|
104
108
|
end
|
105
109
|
else
|
110
|
+
|
106
111
|
if body.arity != args_length
|
107
|
-
raise RUNIT::AssertionFailedError,
|
112
|
+
#raise RUNIT::AssertionFailedError,
|
113
|
+
raise Test::Unit::AssertionFailedError,
|
108
114
|
"wrong number of arguments to " +
|
109
115
|
"#{name} method of #{@name}; " +
|
110
116
|
"require #{body.arity}, got #{args.length}",
|
@@ -120,7 +126,8 @@ private
|
|
120
126
|
end
|
121
127
|
|
122
128
|
if not precondition_ok
|
123
|
-
raise RUNIT::AssertionFailedError,
|
129
|
+
#raise RUNIT::AssertionFailedError,
|
130
|
+
raise Test::Unit::AssertionFailedError,
|
124
131
|
"precondition of #{name} method violated",
|
125
132
|
caller(2)
|
126
133
|
end
|
@@ -135,7 +142,8 @@ private
|
|
135
142
|
|
136
143
|
# The name of a precondition for a method
|
137
144
|
def Mock.__pre( method )
|
138
|
-
"__pre_#{method.to_i}".intern
|
145
|
+
#"__pre_#{method.to_i}".intern
|
146
|
+
"__pre_#{method}".intern
|
139
147
|
end
|
140
148
|
|
141
149
|
|
data/test/test_object.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
# TestObject -- rpdf2txt -- 26.05.2011 -- mhatakeyama@ywesee.com
|
4
|
+
|
5
|
+
$: << File.expand_path('../lib', File.dirname(__FILE__))
|
6
|
+
|
7
|
+
require 'test/unit'
|
8
|
+
require 'flexmock'
|
9
|
+
require 'rpdf2txt/object'
|
10
|
+
|
11
|
+
module Rpdf2txt
|
12
|
+
class TestPageLeaf < Test::Unit::TestCase
|
13
|
+
include FlexMock::TestCase
|
14
|
+
def test_merge_snippets
|
15
|
+
pageleaf = Rpdf2txt::PageLeaf.new
|
16
|
+
snippet1 = flexmock('snippet1',
|
17
|
+
:txt => 'txt1',
|
18
|
+
:txt= => nil
|
19
|
+
)
|
20
|
+
snippet2 = flexmock('snippet2',
|
21
|
+
:txt => 'txt2',
|
22
|
+
:txt= => nil
|
23
|
+
)
|
24
|
+
|
25
|
+
text_snippets = [snippet1, snippet2, snippet2]
|
26
|
+
result = pageleaf.merge_snippets(text_snippets)
|
27
|
+
assert_equal(2, result.length)
|
28
|
+
assert_kind_of(snippet1.class, result[0])
|
29
|
+
assert_kind_of(snippet2.class, result[1])
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
data/test/test_pdf_object.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# encoding: ascii-8bit
|
2
3
|
#
|
3
4
|
# Rpdf2txt -- PDF to Text Parser
|
4
5
|
# Copyright (C) 2003 Andreas Schrafl, Hannes Wyss, Masaomi Hatakeyama
|
@@ -531,13 +532,13 @@ ET
|
|
531
532
|
class TestEncrypt < Test::Unit::TestCase
|
532
533
|
def setup
|
533
534
|
file = File.expand_path('./data/encrypt_string', File.dirname(__FILE__))
|
534
|
-
src_encrypt_obj =
|
535
|
+
src_encrypt_obj = open(file, 'rb'){|file| file.read}
|
535
536
|
@encrypt = Rpdf2txt::PdfEncrypt.new(src_encrypt_obj)
|
536
537
|
@encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18'
|
537
538
|
end
|
538
539
|
def test_decrypt
|
539
540
|
file = File.expand_path('./data/working_obj', File.dirname(__FILE__))
|
540
|
-
input =
|
541
|
+
input = open(file, 'rb'){|file| file.read}
|
541
542
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
542
543
|
assert_equal("dc08b36009e48618f99c", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
543
544
|
#if the stream could be inflated, the decryption is ok!
|
@@ -548,7 +549,7 @@ ET
|
|
548
549
|
end
|
549
550
|
def test_decrypt2
|
550
551
|
file = File.expand_path('./data/90_obj', File.dirname(__FILE__))
|
551
|
-
input =
|
552
|
+
input = open(file, 'rb'){|file| file.read}
|
552
553
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
553
554
|
assert_equal("7617ca1ac5babcf09cdf", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
554
555
|
#if the stream could be inflated, the decryption is ok!
|
@@ -559,7 +560,7 @@ ET
|
|
559
560
|
end
|
560
561
|
def test_decrypt3
|
561
562
|
file = File.expand_path('./data/working_obj2', File.dirname(__FILE__))
|
562
|
-
input =
|
563
|
+
input = open(file, 'rb'){|file| file.read}
|
563
564
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
564
565
|
assert_equal("a9a666959bd64a96551b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
565
566
|
#if the stream could be inflated, the decryption is ok!
|
@@ -570,7 +571,7 @@ ET
|
|
570
571
|
end
|
571
572
|
def test_decrypt5
|
572
573
|
file = File.expand_path('./data/458_obj', File.dirname(__FILE__))
|
573
|
-
input =
|
574
|
+
input = open(file, 'rb'){|file| file.read}
|
574
575
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
575
576
|
#assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
576
577
|
#if the stream could be inflated, the decryption is ok!
|
@@ -581,7 +582,7 @@ ET
|
|
581
582
|
end
|
582
583
|
def test_decrypt6
|
583
584
|
file = File.expand_path('./data/450_obj', File.dirname(__FILE__))
|
584
|
-
input =
|
585
|
+
input = open(file, 'rb'){|file| file.read}
|
585
586
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
586
587
|
#assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
587
588
|
#if the stream could be inflated, the decryption is ok!
|
@@ -592,7 +593,7 @@ ET
|
|
592
593
|
end
|
593
594
|
def test_decrypt7
|
594
595
|
file = File.expand_path('./data/465_obj', File.dirname(__FILE__))
|
595
|
-
input =
|
596
|
+
input = open(file, 'rb'){|file| file.read}
|
596
597
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
597
598
|
#assert_equal("1aaeedd5d5304b79709b", @encrypt.decrypt_key(pdf_obj).unpack('h*').first)
|
598
599
|
#if the stream could be inflated, the decryption is ok!
|
@@ -603,7 +604,7 @@ ET
|
|
603
604
|
end
|
604
605
|
def test_decrypt_key
|
605
606
|
file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
|
606
|
-
src =
|
607
|
+
src = open(file, 'rb'){|file| file.read}
|
607
608
|
#byte position important! do not indent these lines!!!
|
608
609
|
obj_src = <<-EOS
|
609
610
|
473 0 obj
|
@@ -618,7 +619,7 @@ endobj
|
|
618
619
|
end
|
619
620
|
def test_inflate_obj
|
620
621
|
file = File.expand_path('./data/90_obj_comp', File.dirname(__FILE__))
|
621
|
-
input =
|
622
|
+
input = open(file, 'rb'){|file| file.read}
|
622
623
|
input = [input].pack('H*')
|
623
624
|
# puts input
|
624
625
|
assert_nothing_raised{
|
@@ -628,7 +629,7 @@ endobj
|
|
628
629
|
end
|
629
630
|
def test_parse_encrypt
|
630
631
|
file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
|
631
|
-
src =
|
632
|
+
src = open(file, 'rb'){|file| file.read}
|
632
633
|
encrypt = Rpdf2txt::PdfEncrypt.new(src)
|
633
634
|
encrypt.file_id = '8664e6986751f2a49dccc9a4b40a4f18'
|
634
635
|
assert_equal("00ecc7a7bf8d68c564a21b98258b1dbff2aaf8d24bfdbaa74a9a073467d896b6", encrypt.user_key.unpack("H*").first)
|
@@ -639,7 +640,7 @@ endobj
|
|
639
640
|
end
|
640
641
|
def test_endianess
|
641
642
|
file = File.expand_path('./data/encrypt_obj', File.dirname(__FILE__))
|
642
|
-
src =
|
643
|
+
src = open(file, 'rb'){|file| file.read}
|
643
644
|
encrypt = Rpdf2txt::PdfEncrypt.new(src)
|
644
645
|
encrypt.big_endian?
|
645
646
|
end
|
@@ -648,13 +649,13 @@ endobj
|
|
648
649
|
def setup
|
649
650
|
file = File.expand_path('./data/encrypt_string_128bit',
|
650
651
|
File.dirname(__FILE__))
|
651
|
-
src_encrypt_obj =
|
652
|
+
src_encrypt_obj = open(file, 'rb'){|file| file.read}
|
652
653
|
@encrypt = Rpdf2txt::PdfEncrypt.new(src_encrypt_obj)
|
653
654
|
@encrypt.file_id = 'D816A5E838D50653C19DB62504229EB6'
|
654
655
|
end
|
655
656
|
def test_decrypt8
|
656
657
|
file = File.expand_path('./data/3392_obj', File.dirname(__FILE__))
|
657
|
-
input =
|
658
|
+
input = open(file, 'rb'){|file| file.read}
|
658
659
|
pdf_obj = Rpdf2txt::Stream.new(input)
|
659
660
|
#if the stream could be inflated, the decryption is ok!
|
660
661
|
assert_nothing_raised{
|
@@ -1231,7 +1232,7 @@ perm\351abilit\351 vasculaire et une inflammation.
|
|
1231
1232
|
Swissmedic Journal 03/2006 226
|
1232
1233
|
EOS
|
1233
1234
|
result = handler.out.strip
|
1234
|
-
=begin
|
1235
|
+
=begin keep
|
1235
1236
|
[expected.size, result.size].max.times do |idx|
|
1236
1237
|
unless result[idx] == expected[idx]
|
1237
1238
|
flunk "unexpected result: (#{result[idx]}/#{expected[idx]} at #{idx}) ...#{expected[idx-10,20].inspect}..."
|
@@ -1653,7 +1654,7 @@ Seite 1 von 1083
|
|
1653
1654
|
expected = "HEUMANN PH GMBH&CO. KG 20 St"
|
1654
1655
|
assert_equal(expected.strip, handler.out.strip)
|
1655
1656
|
end
|
1656
|
-
=begin
|
1657
|
+
=begin keep
|
1657
1658
|
def test_text_space_bug2
|
1658
1659
|
stream = Stream.new
|
1659
1660
|
path = File.expand_path('data/space_bug_stream2.txt',
|
@@ -1705,7 +1706,7 @@ endobj
|
|
1705
1706
|
class TestImage < Test::Unit::TestCase
|
1706
1707
|
def test_png
|
1707
1708
|
path = File.expand_path('data/png.pdfobj', File.dirname(__FILE__))
|
1708
|
-
src =
|
1709
|
+
src = open(path, 'rb'){|file| file.read}
|
1709
1710
|
obj = Image.new(src)
|
1710
1711
|
assert_nothing_raised { obj.image }
|
1711
1712
|
path = File.expand_path('data/logo.png', File.dirname(__FILE__))
|
@@ -1714,10 +1715,10 @@ endobj
|
|
1714
1715
|
end
|
1715
1716
|
def test_indexed
|
1716
1717
|
path = File.expand_path('data/index.pdfobj', File.dirname(__FILE__))
|
1717
|
-
src =
|
1718
|
+
src = open(path, 'rb'){|file| file.read}
|
1718
1719
|
index = Stream.new(src)
|
1719
1720
|
path = File.expand_path('data/indexed.pdfobj', File.dirname(__FILE__))
|
1720
|
-
src =
|
1721
|
+
src = open(path, 'rb'){|file| file.read}
|
1721
1722
|
obj = Image.new(src)
|
1722
1723
|
obj.build_tree(51 => index)
|
1723
1724
|
assert_nothing_raised { obj.image }
|
@@ -1727,10 +1728,10 @@ endobj
|
|
1727
1728
|
end
|
1728
1729
|
def test_indexed_2bit
|
1729
1730
|
path = File.expand_path('data/index_2bit.pdfobj', File.dirname(__FILE__))
|
1730
|
-
src =
|
1731
|
+
src = open(path, 'rb'){|file| file.read}
|
1731
1732
|
index = Stream.new(src)
|
1732
1733
|
path = File.expand_path('data/indexed_2bit.pdfobj', File.dirname(__FILE__))
|
1733
|
-
src =
|
1734
|
+
src = open(path, 'rb'){|file| file.read}
|
1734
1735
|
obj = Image.new(src)
|
1735
1736
|
obj.build_tree(21 => index)
|
1736
1737
|
assert_nothing_raised { obj.image }
|
@@ -1740,10 +1741,10 @@ endobj
|
|
1740
1741
|
end
|
1741
1742
|
def test_indexed_masked
|
1742
1743
|
path = File.expand_path('data/index_masked.pdfobj', File.dirname(__FILE__))
|
1743
|
-
src =
|
1744
|
+
src = open(path, 'rb'){|file| file.read}
|
1744
1745
|
index = Stream.new(src)
|
1745
1746
|
path = File.expand_path('data/indexed_masked.pdfobj', File.dirname(__FILE__))
|
1746
|
-
src =
|
1747
|
+
src = open(path, 'rb'){|file| file.read}
|
1747
1748
|
obj = Image.new(src)
|
1748
1749
|
obj.build_tree(21 => index)
|
1749
1750
|
assert_nothing_raised { obj.image }
|
@@ -1759,10 +1760,10 @@ endobj
|
|
1759
1760
|
end
|
1760
1761
|
def test_lzw_image
|
1761
1762
|
path = File.expand_path('data/lzw_index.pdfobj', File.dirname(__FILE__))
|
1762
|
-
src =
|
1763
|
+
src = open(path, 'rb'){|file| file.read}
|
1763
1764
|
index = Stream.new(src)
|
1764
1765
|
path = File.expand_path('data/lzw.pdfobj', File.dirname(__FILE__))
|
1765
|
-
src =
|
1766
|
+
src = open(path, 'rb'){|file| file.read}
|
1766
1767
|
obj = Image.new(src)
|
1767
1768
|
obj.build_tree(21 => index)
|
1768
1769
|
assert_nothing_raised { obj.image }
|
data/test/test_pdf_parser.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
#
|
2
|
+
# encoding: ascii-8bit
|
3
3
|
# Rpdf2txt -- PDF to Text Parser
|
4
4
|
# Copyright (C) 2003 Andreas Schrafl, Hannes Wyss, Masaomi Hatakeyama
|
5
5
|
#
|
@@ -75,7 +75,7 @@ class TestParser < Test::Unit::TestCase
|
|
75
75
|
end
|
76
76
|
def setup
|
77
77
|
file = File.expand_path('./data/page_tree.pdf', File.dirname(__FILE__))
|
78
|
-
input =
|
78
|
+
input = open(file, 'rb'){|file| file.read}
|
79
79
|
@parser = Rpdf2txt::Parser.new(input)
|
80
80
|
end
|
81
81
|
def test_object_catalogue
|
@@ -101,7 +101,7 @@ class TestParser < Test::Unit::TestCase
|
|
101
101
|
end
|
102
102
|
def test_rebuild_object_catalogue
|
103
103
|
file = File.expand_path('./data/encrypted_object_stream.pdf', File.dirname(__FILE__))
|
104
|
-
input =
|
104
|
+
input = open(file, 'rb'){|file| file.read}
|
105
105
|
parser = Rpdf2txt::Parser.new(input)
|
106
106
|
cat = parser.object_catalogue
|
107
107
|
assert_equal(3, cat.length)
|
@@ -322,7 +322,7 @@ endobj
|
|
322
322
|
leaf = Rpdf2txt::PageLeaf.new
|
323
323
|
expected = <<-EOS
|
324
324
|
Paroxetin besitzt eine selektive Wirkung; in-vitro Studien haben gezeigt, dass es, im Gegensatz zu
|
325
|
-
trizyklischen Antidepressiva, eine geringe Affinit\344t f\374r
|
325
|
+
trizyklischen Antidepressiva, eine geringe Affinit\344t f\374r a1-, a2- und b-Adrenozeptoren sowie f\374r
|
326
326
|
Dopamin (D2)-, 5-HT1-artige, 5-HT2 und Histamin (H1)-Rezeptoren aufweist. Das Fehlen einer
|
327
327
|
EOS
|
328
328
|
handler = Rpdf2txt::SimpleHandler.new
|
@@ -464,7 +464,9 @@ endobj
|
|
464
464
|
36 => 8805,
|
465
465
|
}
|
466
466
|
font2.cmap = cmap
|
467
|
-
|
467
|
+
#require 'pp'
|
468
|
+
#print "font2="
|
469
|
+
#pp font2
|
468
470
|
fonts = {
|
469
471
|
:tt0 => font0,
|
470
472
|
:tt2 => font2,
|
@@ -472,6 +474,7 @@ endobj
|
|
472
474
|
txt.current_page = FontDonorStub.new(fonts)
|
473
475
|
leaf = Rpdf2txt::PageLeaf.new
|
474
476
|
expected = "In Studie 1 evaluierte man 271 Patienten mit einer m\344ssigen bis schweren aktiven rheumatoiden \nArthritis, die \26318 Jahre alt waren, bei denen die Therapie mit mindestens einem, aber mit nicht mehr \n"
|
477
|
+
#expected = "In Studie 1 evaluierte man 271 Patienten mit einer m\344ssigen bis schweren aktiven rheumatoiden \nArthritis, die 18 Jahre alt waren, bei denen die Therapie mit mindestens einem, aber mit nicht mehr \n"
|
475
478
|
handler = Rpdf2txt::SimpleHandler.new
|
476
479
|
leaf.join_snippets(txt.scan, handler)
|
477
480
|
result = handler.out
|
data/test/test_pdf_text.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# encoding: ascii-8bit
|
2
3
|
#
|
3
4
|
# Rpdf2txt -- PDF to Text Parser
|
4
5
|
# Copyright (C) 2003 Andreas Schrafl, Hannes Wyss
|
@@ -36,7 +37,7 @@ module Rpdf2txt
|
|
36
37
|
class TestText < Test::Unit::TestCase
|
37
38
|
def setup
|
38
39
|
path = File.expand_path("./data/test_text.txt", File.dirname(__FILE__))
|
39
|
-
src =
|
40
|
+
src = open(path, 'rb'){|file| file.read}
|
40
41
|
#@handler = Rpdf2txt::HTMLHandler.new
|
41
42
|
@text=Rpdf2txt::Text.new(src)
|
42
43
|
end
|
@@ -78,7 +79,7 @@ ET
|
|
78
79
|
ast = Rpdf2txt.text_parser.parse(@text.src)
|
79
80
|
assert_equal("-0.0002", ast.values.first.charspace.value)
|
80
81
|
text_state = Mock.new("text_state")
|
81
|
-
text_state.__next(:transformation_matrix=) {}
|
82
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
82
83
|
@text.text_state = text_state
|
83
84
|
text_state.__next(:set_char_spacing){|value|
|
84
85
|
assert_equal("-0.0002", value)
|
@@ -94,7 +95,7 @@ BT
|
|
94
95
|
ET
|
95
96
|
EOS
|
96
97
|
text_state = Mock.new("text_state")
|
97
|
-
text_state.__next(:transformation_matrix=) {}
|
98
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
98
99
|
@text.text_state = text_state
|
99
100
|
text_state.__next(:update_x){ |x|
|
100
101
|
assert_equal(-36.7896, x)
|
@@ -116,10 +117,10 @@ BT
|
|
116
117
|
ET
|
117
118
|
EOS
|
118
119
|
text_state = Mock.new("text_state")
|
119
|
-
text_state.__next(:transformation_matrix=) {}
|
120
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
120
121
|
@text.text_state = text_state
|
121
|
-
text_state.__next(:update_x){}
|
122
|
-
text_state.__next(:update_y){}
|
122
|
+
text_state.__next(:update_x){|*x|}
|
123
|
+
text_state.__next(:update_y){|*x|}
|
123
124
|
@text.scan
|
124
125
|
text_state.__verify
|
125
126
|
end
|
@@ -132,7 +133,7 @@ BT
|
|
132
133
|
ET
|
133
134
|
EOS
|
134
135
|
text_state = Mock.new("text_state")
|
135
|
-
text_state.__next(:transformation_matrix=) {}
|
136
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
136
137
|
@text.text_state = text_state
|
137
138
|
current_page.__next(:font){ |font|
|
138
139
|
assert_equal(:f16, font)
|
@@ -151,7 +152,7 @@ BT
|
|
151
152
|
ET
|
152
153
|
EOS
|
153
154
|
text_state = Mock.new("text_state")
|
154
|
-
text_state.__next(:transformation_matrix=) {}
|
155
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
155
156
|
@text.text_state = text_state
|
156
157
|
current_page.__next(:font){ |font|
|
157
158
|
assert_equal(:c2_0, font)
|
@@ -169,7 +170,7 @@ BT
|
|
169
170
|
ET
|
170
171
|
EOS
|
171
172
|
text_state = Mock.new("text_state")
|
172
|
-
text_state.__next(:transformation_matrix=) {}
|
173
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
173
174
|
@text.text_state = text_state
|
174
175
|
text_state.__next(:set_xscale){|x|
|
175
176
|
assert_equal("10", x)
|
@@ -199,7 +200,7 @@ BT
|
|
199
200
|
ET
|
200
201
|
EOS
|
201
202
|
text_state = Mock.new("text_state")
|
202
|
-
text_state.__next(:transformation_matrix=) {}
|
203
|
+
text_state.__next(:transformation_matrix=) {|*x|}
|
203
204
|
@text.text_state = text_state
|
204
205
|
text_state.__next(:set_word_spacing){ |wordspace|
|
205
206
|
assert_equal('0.0000', wordspace)
|
@@ -70,7 +70,8 @@ endobj
|
|
70
70
|
font30 = Font.new(font30_src) # WinAnsi Encoded
|
71
71
|
path = File.expand_path('data/space_bug_stream.txt',
|
72
72
|
File.dirname(__FILE__))
|
73
|
-
|
73
|
+
stream = open(path, 'rb'){|file| file.read}
|
74
|
+
stream = Stream.new(stream)
|
74
75
|
page = FontDonor.new
|
75
76
|
page.fonts = {
|
76
77
|
:f3 => font3,
|
data/test/test_stream.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# encoding: ascii-8bit
|
2
3
|
# TestStream -- rpdf2txt -- 01.06.2005 -- hwyss@ywesee.com
|
3
4
|
|
4
5
|
$: << File.expand_path('../lib', File.dirname(__FILE__))
|
@@ -32,13 +33,13 @@ BT /F1 16 Tf 1 0 0 -1 0 14.347 Tm(Ponstan
|
|
32
33
|
def test_decode_raw_stream
|
33
34
|
file = File.expand_path('./data/firststream',
|
34
35
|
File.dirname(__FILE__))
|
35
|
-
deflated =
|
36
|
+
deflated = open(file, 'rb'){|file| file.read}
|
36
37
|
src = "stream\n#{deflated}endstream"
|
37
38
|
stream = Rpdf2txt::Stream.new(src)
|
38
39
|
stream.attributes.store(:filter, '/FlateDecode')
|
39
40
|
file = File.expand_path('./data/test.txt',
|
40
41
|
File.dirname(__FILE__))
|
41
|
-
expected =
|
42
|
+
expected = open(file, 'rb'){|file| file.read}
|
42
43
|
assert_equal(expected, stream.decode_raw_stream)
|
43
44
|
end
|
44
45
|
def test_raw_stream
|
@@ -51,15 +52,15 @@ BT /F1 16 Tf 1 0 0 -1 0 14.347 Tm(Ponstan
|
|
51
52
|
end
|
52
53
|
def test_decoded_stream2
|
53
54
|
file = File.expand_path('./data/firststream', File.dirname(__FILE__))
|
54
|
-
@stream.raw_stream =
|
55
|
+
@stream.raw_stream = open(file, 'rb'){|file| file.read}
|
55
56
|
@stream.attributes[:filter] = "/FlateDecode"
|
56
57
|
file = File.expand_path('./data/test.txt', File.dirname(__FILE__))
|
57
|
-
expected =
|
58
|
+
expected = open(file, 'rb'){|file| file.read}
|
58
59
|
assert_equal(expected, @stream.decoded_stream)
|
59
60
|
end
|
60
61
|
def test_extract_text_objects
|
61
62
|
file = File.expand_path('./data/stream.txt', File.dirname(__FILE__))
|
62
|
-
@stream.decoded_stream =
|
63
|
+
@stream.decoded_stream = open(file, 'rb'){|file| file.read}
|
63
64
|
result = @stream.extract_text_objects(nil, TextState.new).select { |res|
|
64
65
|
res.is_a?(TextState)
|
65
66
|
}
|