pdf-reader 0.7.2 → 0.7.3
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +12 -2
- data/{README → README.rdoc} +27 -47
- data/Rakefile +5 -4
- data/TODO +3 -1
- data/bin/pdf_list_callbacks +1 -5
- data/bin/pdf_object +43 -0
- data/bin/pdf_text +1 -0
- data/lib/pdf/reader.rb +25 -7
- data/lib/pdf/reader/buffer.rb +3 -1
- data/lib/pdf/reader/content.rb +56 -48
- data/lib/pdf/reader/encoding.rb +82 -1088
- data/lib/pdf/reader/encodings/mac_expert.txt +159 -0
- data/lib/pdf/reader/encodings/mac_roman.txt +128 -0
- data/lib/pdf/reader/encodings/pdf_doc.txt +40 -0
- data/lib/pdf/reader/encodings/standard.txt +47 -0
- data/lib/pdf/reader/encodings/symbol.txt +154 -0
- data/lib/pdf/reader/encodings/win_ansi.txt +29 -0
- data/lib/pdf/reader/encodings/zapf_dingbats.txt +201 -0
- data/lib/pdf/reader/error.rb +1 -0
- data/lib/pdf/reader/font.rb +4 -3
- data/lib/pdf/reader/parser.rb +1 -0
- data/lib/pdf/reader/print_receiver.rb +19 -0
- data/lib/pdf/reader/xref.rb +12 -0
- metadata +26 -17
- data/lib/pdf/reader/parser.rb.rej +0 -29
@@ -0,0 +1,29 @@
|
|
1
|
+
# A mapping of WinAnsi (win-1252) characters to unicode. Anything
|
2
|
+
# not specified is left unchanged
|
3
|
+
80;20AC
|
4
|
+
82;201A
|
5
|
+
83;0192
|
6
|
+
84;201E
|
7
|
+
85;2026
|
8
|
+
86;2020
|
9
|
+
87;2021
|
10
|
+
88;02C6
|
11
|
+
89;2030
|
12
|
+
8A;0160
|
13
|
+
8B;2039
|
14
|
+
8C;0152
|
15
|
+
8E;017D
|
16
|
+
91;2018
|
17
|
+
92;2019
|
18
|
+
93;201C
|
19
|
+
94;201D
|
20
|
+
95;2022
|
21
|
+
96;2013
|
22
|
+
97;2014
|
23
|
+
98;02DC
|
24
|
+
99;2122
|
25
|
+
9A;0161
|
26
|
+
9B;203A
|
27
|
+
9C;0152
|
28
|
+
9E;017E
|
29
|
+
9F;0178
|
@@ -0,0 +1,201 @@
|
|
1
|
+
21;2701
|
2
|
+
22;2702
|
3
|
+
23;2703
|
4
|
+
24;2704
|
5
|
+
25;260E
|
6
|
+
26;2706
|
7
|
+
27;2707
|
8
|
+
28;2708
|
9
|
+
29;2709
|
10
|
+
2A;261B
|
11
|
+
2B;261E
|
12
|
+
2C;270C
|
13
|
+
2D;270D
|
14
|
+
2E;270E
|
15
|
+
2F;270F
|
16
|
+
30;2710
|
17
|
+
31;2711
|
18
|
+
32;2712
|
19
|
+
33;2713
|
20
|
+
34;2714
|
21
|
+
35;2715
|
22
|
+
36;2716
|
23
|
+
37;2717
|
24
|
+
38;2718
|
25
|
+
39;2719
|
26
|
+
3A;271A
|
27
|
+
3B;271B
|
28
|
+
3C;271C
|
29
|
+
3D;271D
|
30
|
+
3E;271E
|
31
|
+
3F;271E
|
32
|
+
40;2720
|
33
|
+
41;2721
|
34
|
+
42;2722
|
35
|
+
43;2723
|
36
|
+
44;2724
|
37
|
+
45;2725
|
38
|
+
46;2726
|
39
|
+
47;2727
|
40
|
+
48;2605
|
41
|
+
49;2729
|
42
|
+
4A;272A
|
43
|
+
4B;272B
|
44
|
+
4C;272C
|
45
|
+
4D;272D
|
46
|
+
4E;272E
|
47
|
+
4F;272F
|
48
|
+
50;2730
|
49
|
+
51;2731
|
50
|
+
52;2732
|
51
|
+
53;2733
|
52
|
+
54;2734
|
53
|
+
55;2735
|
54
|
+
56;2736
|
55
|
+
57;2737
|
56
|
+
58;2738
|
57
|
+
59;2739
|
58
|
+
5A;273A
|
59
|
+
5B;273B
|
60
|
+
5C;273C
|
61
|
+
5D;273D
|
62
|
+
5E;273E
|
63
|
+
5F;273F
|
64
|
+
60;2740
|
65
|
+
61;2741
|
66
|
+
62;2742
|
67
|
+
63;2743
|
68
|
+
64;2744
|
69
|
+
65;2745
|
70
|
+
66;2746
|
71
|
+
67;2747
|
72
|
+
68;2748
|
73
|
+
69;2749
|
74
|
+
6A;274A
|
75
|
+
6B;274B
|
76
|
+
6C;25CF
|
77
|
+
6D;274D
|
78
|
+
6E;25A0
|
79
|
+
6F;274F
|
80
|
+
70;2750
|
81
|
+
71;2751
|
82
|
+
72;2752
|
83
|
+
73;2753
|
84
|
+
74;2754
|
85
|
+
75;2755
|
86
|
+
76;2756
|
87
|
+
77;2757
|
88
|
+
78;2758
|
89
|
+
79;2759
|
90
|
+
7A;275A
|
91
|
+
7B;275B
|
92
|
+
7C;275C
|
93
|
+
7D;275D
|
94
|
+
7E;275E
|
95
|
+
80;F8D7
|
96
|
+
81;F8D8
|
97
|
+
82;F8D9
|
98
|
+
83;F8DA
|
99
|
+
84;F8DB
|
100
|
+
85;F8DC
|
101
|
+
86;F8DD
|
102
|
+
87;F8DE
|
103
|
+
88;F8DF
|
104
|
+
89;F8E0
|
105
|
+
8A;F8E1
|
106
|
+
8B;F8E2
|
107
|
+
8C;F8E3
|
108
|
+
8D;F8E4
|
109
|
+
A1;2761
|
110
|
+
A2;2762
|
111
|
+
A3;2763
|
112
|
+
A4;2764
|
113
|
+
A5;2765
|
114
|
+
A6;2766
|
115
|
+
A7;2767
|
116
|
+
A8;2663
|
117
|
+
A9;2666
|
118
|
+
AA;2665
|
119
|
+
AB;2660
|
120
|
+
AC;2460
|
121
|
+
AD;2461
|
122
|
+
AE;2462
|
123
|
+
AF;2463
|
124
|
+
B0;2464
|
125
|
+
B1;2465
|
126
|
+
B2;2466
|
127
|
+
B3;2467
|
128
|
+
B4;2468
|
129
|
+
B5;2469
|
130
|
+
B6;2776
|
131
|
+
B7;2777
|
132
|
+
B8;2778
|
133
|
+
B9;2779
|
134
|
+
BA;277A
|
135
|
+
BB;277B
|
136
|
+
BC;277C
|
137
|
+
BD;277D
|
138
|
+
BE;277E
|
139
|
+
BF;277F
|
140
|
+
C0;2780
|
141
|
+
C1;2781
|
142
|
+
C2;2782
|
143
|
+
C3;2783
|
144
|
+
C4;2784
|
145
|
+
C5;2785
|
146
|
+
C6;2786
|
147
|
+
C7;2787
|
148
|
+
C8;2788
|
149
|
+
C9;2789
|
150
|
+
CA;278A
|
151
|
+
CB;278B
|
152
|
+
CC;278C
|
153
|
+
CD;278D
|
154
|
+
CE;278E
|
155
|
+
CF;278F
|
156
|
+
D0;2790
|
157
|
+
D1;2791
|
158
|
+
D2;2792
|
159
|
+
D3;2793
|
160
|
+
D4;2794
|
161
|
+
D5;2795
|
162
|
+
D6;2796
|
163
|
+
D7;2797
|
164
|
+
D8;2798
|
165
|
+
D9;2799
|
166
|
+
DA;279A
|
167
|
+
DB;279B
|
168
|
+
DC;279C
|
169
|
+
DD;279D
|
170
|
+
DE;279E
|
171
|
+
DF;279F
|
172
|
+
E0;27A0
|
173
|
+
E1;27A1
|
174
|
+
E2;27A2
|
175
|
+
E3;27A3
|
176
|
+
E4;27A4
|
177
|
+
E5;27A5
|
178
|
+
E6;27A6
|
179
|
+
E7;27A7
|
180
|
+
E8;27A8
|
181
|
+
E9;27A9
|
182
|
+
EA;27AA
|
183
|
+
EB;27AB
|
184
|
+
EC;27AC
|
185
|
+
ED;27AD
|
186
|
+
EE;27AE
|
187
|
+
EF;27AF
|
188
|
+
F1;27B1
|
189
|
+
F2;27B2
|
190
|
+
F3;27B3
|
191
|
+
F4;27B4
|
192
|
+
F5;27B5
|
193
|
+
F6;27B6
|
194
|
+
F7;27B7
|
195
|
+
F8;27B8
|
196
|
+
F9;27B9
|
197
|
+
FA;27BA
|
198
|
+
FB;27BB
|
199
|
+
FC;27BC
|
200
|
+
FD;27BD
|
201
|
+
FE;27BE
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -48,6 +48,7 @@ class PDF::Reader
|
|
48
48
|
end
|
49
49
|
################################################################################
|
50
50
|
class MalformedPDFError < RuntimeError; end
|
51
|
+
class InvalidObjectError < MalformedPDFError; end
|
51
52
|
class UnsupportedFeatureError < RuntimeError; end
|
52
53
|
end
|
53
54
|
################################################################################
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -52,10 +52,11 @@ class PDF::Reader
|
|
52
52
|
# with encoding= if required
|
53
53
|
case font
|
54
54
|
when "Symbol" then
|
55
|
-
self.encoding = PDF::Reader::Encoding.
|
55
|
+
self.encoding = PDF::Reader::Encoding.new("SymbolEncoding")
|
56
56
|
when "ZapfDingbats" then
|
57
|
-
self.encoding = PDF::Reader::Encoding.
|
57
|
+
self.encoding = PDF::Reader::Encoding.new("ZapfDingbatsEncoding")
|
58
58
|
end
|
59
|
+
@basefont = font
|
59
60
|
end
|
60
61
|
|
61
62
|
def to_utf8(params)
|
@@ -65,7 +66,7 @@ class PDF::Reader
|
|
65
66
|
# translate the bytestram into a UTF-8 string.
|
66
67
|
# If an encoding hasn't been specified, assume the text using this
|
67
68
|
# font is in Adobe Standard Encoding.
|
68
|
-
(encoding || PDF::Reader::Encoding
|
69
|
+
(encoding || PDF::Reader::Encoding.new(:StandardEncoding)).to_utf8(params, tounicode)
|
69
70
|
elsif params.class == Array
|
70
71
|
params.collect { |param| self.to_utf8(param) }
|
71
72
|
else
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
class PDF::Reader
|
2
|
+
class PrintReceiver
|
3
|
+
|
4
|
+
attr_accessor :callbacks
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@callbacks = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def respond_to?(meth)
|
11
|
+
return false if [:begin_inline_image_data].include?(meth)
|
12
|
+
true
|
13
|
+
end
|
14
|
+
|
15
|
+
def method_missing(methodname, *args)
|
16
|
+
puts "#{methodname} => #{args.inspect}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -37,6 +37,16 @@ class PDF::Reader
|
|
37
37
|
@xref = {}
|
38
38
|
end
|
39
39
|
################################################################################
|
40
|
+
# returns the PDF version of the current document. Technically this isn't part of the XRef
|
41
|
+
# table, but it is one of the lowest level data items in the file, so we've lumped it in
|
42
|
+
# with the cross reference code.
|
43
|
+
def pdf_version
|
44
|
+
@buffer.seek(0)
|
45
|
+
m, version = *@buffer.read(8).match(/%PDF-(\d.\d)/)
|
46
|
+
raise MalformedPDFError, 'invalid PDF version' if version.nil?
|
47
|
+
return version.to_f
|
48
|
+
end
|
49
|
+
################################################################################
|
40
50
|
# Read the xref table from the underlying buffer. If offset is specified the table
|
41
51
|
# will be loaded from there, otherwise the default offset will be located and used.
|
42
52
|
#
|
@@ -122,6 +132,8 @@ class PDF::Reader
|
|
122
132
|
# ref - a PDF::Reader::Reference object containing an object ID and revision number
|
123
133
|
def offset_for (ref)
|
124
134
|
@xref[ref.id][ref.gen]
|
135
|
+
rescue
|
136
|
+
raise InvalidObjectError, "Object #{ref.id}, Generation #{ref.gen} is invalid"
|
125
137
|
end
|
126
138
|
################################################################################
|
127
139
|
# Stores an offset value for a particular PDF object ID and revision number
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Jones
|
@@ -9,44 +9,53 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-06-11 00:00:00 +10:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
16
16
|
description: The PDF::Reader library implements a PDF parser conforming as much as possible to the PDF specification from Adobe
|
17
17
|
email: pjones@pmade.com
|
18
18
|
executables:
|
19
|
+
- pdf_object
|
19
20
|
- pdf_text
|
20
21
|
- pdf_list_callbacks
|
21
22
|
extensions: []
|
22
23
|
|
23
24
|
extra_rdoc_files:
|
24
|
-
- README
|
25
|
+
- README.rdoc
|
25
26
|
- TODO
|
26
27
|
- CHANGELOG
|
27
28
|
files:
|
28
29
|
- lib/pdf
|
30
|
+
- lib/pdf/reader.rb
|
29
31
|
- lib/pdf/reader
|
30
|
-
- lib/pdf/reader/explore.rb
|
31
|
-
- lib/pdf/reader/reference.rb
|
32
|
-
- lib/pdf/reader/xref.rb
|
33
|
-
- lib/pdf/reader/token.rb
|
34
|
-
- lib/pdf/reader/filter.rb
|
35
|
-
- lib/pdf/reader/text_receiver.rb
|
36
32
|
- lib/pdf/reader/buffer.rb
|
37
|
-
- lib/pdf/reader/error.rb
|
38
|
-
- lib/pdf/reader/content.rb
|
39
|
-
- lib/pdf/reader/parser.rb
|
40
33
|
- lib/pdf/reader/cmap.rb
|
34
|
+
- lib/pdf/reader/content.rb
|
41
35
|
- lib/pdf/reader/encoding.rb
|
42
|
-
- lib/pdf/reader/
|
36
|
+
- lib/pdf/reader/error.rb
|
37
|
+
- lib/pdf/reader/explore.rb
|
38
|
+
- lib/pdf/reader/filter.rb
|
43
39
|
- lib/pdf/reader/font.rb
|
44
40
|
- lib/pdf/reader/glyphlist.txt
|
41
|
+
- lib/pdf/reader/parser.rb
|
42
|
+
- lib/pdf/reader/xref.rb
|
43
|
+
- lib/pdf/reader/reference.rb
|
44
|
+
- lib/pdf/reader/register_receiver.rb
|
45
|
+
- lib/pdf/reader/text_receiver.rb
|
46
|
+
- lib/pdf/reader/token.rb
|
47
|
+
- lib/pdf/reader/encodings
|
48
|
+
- lib/pdf/reader/encodings/mac_expert.txt
|
49
|
+
- lib/pdf/reader/encodings/mac_roman.txt
|
50
|
+
- lib/pdf/reader/encodings/pdf_doc.txt
|
51
|
+
- lib/pdf/reader/encodings/standard.txt
|
52
|
+
- lib/pdf/reader/encodings/symbol.txt
|
53
|
+
- lib/pdf/reader/encodings/win_ansi.txt
|
54
|
+
- lib/pdf/reader/encodings/zapf_dingbats.txt
|
45
55
|
- lib/pdf/reader/stream.rb
|
46
|
-
- lib/pdf/reader/
|
47
|
-
- lib/pdf/reader.rb
|
56
|
+
- lib/pdf/reader/print_receiver.rb
|
48
57
|
- Rakefile
|
49
|
-
- README
|
58
|
+
- README.rdoc
|
50
59
|
- TODO
|
51
60
|
- CHANGELOG
|
52
61
|
has_rdoc: true
|
@@ -56,7 +65,7 @@ rdoc_options:
|
|
56
65
|
- --title
|
57
66
|
- PDF::Reader Documentation
|
58
67
|
- --main
|
59
|
-
- README
|
68
|
+
- README.rdoc
|
60
69
|
- -q
|
61
70
|
require_paths:
|
62
71
|
- lib
|
@@ -1,29 +0,0 @@
|
|
1
|
-
***************
|
2
|
-
*** 173,178 ****
|
3
|
-
|
4
|
-
obj = parse_token
|
5
|
-
post_obj = parse_token
|
6
|
-
case post_obj
|
7
|
-
when "endobj" then return [obj,nil]
|
8
|
-
when "stream" then return [obj, stream(obj)]
|
9
|
-
--- 173,192 ----
|
10
|
-
|
11
|
-
obj = parse_token
|
12
|
-
post_obj = parse_token
|
13
|
-
+
|
14
|
-
+ if obj.class == Array
|
15
|
-
+ newobj = Array.new
|
16
|
-
+ obj.each_index {|idx|
|
17
|
-
+ if obj[idx].class == PDF::Reader::Reference
|
18
|
-
+ xo, xs = @xref.object(obj[idx])
|
19
|
-
+ if xs
|
20
|
-
+ newobj << xs
|
21
|
-
+ end
|
22
|
-
+ end
|
23
|
-
+ }
|
24
|
-
+ return newobj.flatten
|
25
|
-
+ end
|
26
|
-
+
|
27
|
-
case post_obj
|
28
|
-
when "endobj" then return [obj,nil]
|
29
|
-
when "stream" then return [obj, stream(obj)]
|