fireinc-pdf-reader 0.11.0.alpha
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +168 -0
- data/MIT-LICENSE +21 -0
- data/README.rdoc +137 -0
- data/Rakefile +34 -0
- data/TODO +45 -0
- data/bin/pdf_list_callbacks +15 -0
- data/bin/pdf_object +48 -0
- data/bin/pdf_text +15 -0
- data/examples/callbacks.rb +21 -0
- data/examples/extract_bates.rb +49 -0
- data/examples/extract_images.rb +108 -0
- data/examples/hash.rb +12 -0
- data/examples/metadata.rb +25 -0
- data/examples/page_counter_improved.rb +23 -0
- data/examples/page_counter_naive.rb +24 -0
- data/examples/rspec.rb +57 -0
- data/examples/text.rb +40 -0
- data/examples/version.rb +25 -0
- data/lib/pdf/hash.rb +15 -0
- data/lib/pdf/reader/abstract_strategy.rb +81 -0
- data/lib/pdf/reader/buffer.rb +346 -0
- data/lib/pdf/reader/cmap.rb +138 -0
- data/lib/pdf/reader/encoding.rb +190 -0
- data/lib/pdf/reader/encodings/mac_expert.txt +159 -0
- data/lib/pdf/reader/encodings/mac_roman.txt +128 -0
- data/lib/pdf/reader/encodings/pdf_doc.txt +40 -0
- data/lib/pdf/reader/encodings/standard.txt +47 -0
- data/lib/pdf/reader/encodings/symbol.txt +154 -0
- data/lib/pdf/reader/encodings/win_ansi.txt +29 -0
- data/lib/pdf/reader/encodings/zapf_dingbats.txt +201 -0
- data/lib/pdf/reader/error.rb +53 -0
- data/lib/pdf/reader/filter.rb +219 -0
- data/lib/pdf/reader/font.rb +133 -0
- data/lib/pdf/reader/form_xobject.rb +83 -0
- data/lib/pdf/reader/glyphlist.txt +4322 -0
- data/lib/pdf/reader/lzw.rb +123 -0
- data/lib/pdf/reader/metadata_strategy.rb +56 -0
- data/lib/pdf/reader/object_cache.rb +85 -0
- data/lib/pdf/reader/object_hash.rb +289 -0
- data/lib/pdf/reader/object_stream.rb +51 -0
- data/lib/pdf/reader/page.rb +185 -0
- data/lib/pdf/reader/page_text_receiver.rb +278 -0
- data/lib/pdf/reader/pages_strategy.rb +475 -0
- data/lib/pdf/reader/parser.rb +225 -0
- data/lib/pdf/reader/print_receiver.rb +18 -0
- data/lib/pdf/reader/reference.rb +66 -0
- data/lib/pdf/reader/register_receiver.rb +95 -0
- data/lib/pdf/reader/stream.rb +69 -0
- data/lib/pdf/reader/text_receiver.rb +264 -0
- data/lib/pdf/reader/token.rb +41 -0
- data/lib/pdf/reader/xref.rb +220 -0
- data/lib/pdf/reader.rb +296 -0
- data/lib/pdf-reader.rb +1 -0
- metadata +211 -0
@@ -0,0 +1,154 @@
|
|
1
|
+
22;2200
|
2
|
+
24;2203
|
3
|
+
27;220B
|
4
|
+
2A;2217
|
5
|
+
2D;2212
|
6
|
+
40;2245
|
7
|
+
41;0391
|
8
|
+
42;0392
|
9
|
+
43;03A7
|
10
|
+
44;0394
|
11
|
+
45;0395
|
12
|
+
46;03A6
|
13
|
+
47;0393
|
14
|
+
48;0397
|
15
|
+
49;0399
|
16
|
+
4A;03D1
|
17
|
+
4B;039A
|
18
|
+
4C;039B
|
19
|
+
4D;039C
|
20
|
+
4E;039D
|
21
|
+
4F;039F
|
22
|
+
50;03A0
|
23
|
+
51;0398
|
24
|
+
52;03A1
|
25
|
+
53;03A3
|
26
|
+
54;03A4
|
27
|
+
55;03A5
|
28
|
+
56;03C2
|
29
|
+
57;03A9
|
30
|
+
58;039E
|
31
|
+
59;03A8
|
32
|
+
5A;0396
|
33
|
+
5C;2234
|
34
|
+
5E;22A5
|
35
|
+
60;F8E5
|
36
|
+
61;03B1
|
37
|
+
62;03B2
|
38
|
+
63;03C7
|
39
|
+
64;03B4
|
40
|
+
65;03B5
|
41
|
+
66;03C6
|
42
|
+
67;03B3
|
43
|
+
68;03B7
|
44
|
+
69;03B9
|
45
|
+
6A;03D5
|
46
|
+
6B;03BA
|
47
|
+
6C;03BB
|
48
|
+
6D;03BC
|
49
|
+
6E;03BD
|
50
|
+
6F;03BF
|
51
|
+
70;03C0
|
52
|
+
71;03B8
|
53
|
+
72;03C1
|
54
|
+
73;03C3
|
55
|
+
74;03C4
|
56
|
+
75;03C5
|
57
|
+
76;03D6
|
58
|
+
77;03C9
|
59
|
+
78;03BE
|
60
|
+
79;03C8
|
61
|
+
7A;03B6
|
62
|
+
7E;223C
|
63
|
+
A0;20AC
|
64
|
+
A1;03D2
|
65
|
+
A2;2032
|
66
|
+
A3;2264
|
67
|
+
A4;2215
|
68
|
+
A5;221E
|
69
|
+
A6;0192
|
70
|
+
A7;2663
|
71
|
+
A8;2666
|
72
|
+
A9;2665
|
73
|
+
AA;2660
|
74
|
+
AB;2194
|
75
|
+
AC;2190
|
76
|
+
AD;2191
|
77
|
+
AE;2192
|
78
|
+
AF;2193
|
79
|
+
B2;2033
|
80
|
+
B3;2265
|
81
|
+
B4;00D7
|
82
|
+
B5;221D
|
83
|
+
B6;2202
|
84
|
+
B7;2022
|
85
|
+
B8;00F7
|
86
|
+
B9;2260
|
87
|
+
BA;2261
|
88
|
+
BB;2248
|
89
|
+
BC;2026
|
90
|
+
BD;F8E6
|
91
|
+
BE;F8E7
|
92
|
+
BF;21B5
|
93
|
+
C0;2135
|
94
|
+
C1;2111
|
95
|
+
C2;211C
|
96
|
+
C3;2118
|
97
|
+
C4;2297
|
98
|
+
C5;2295
|
99
|
+
C6;2205
|
100
|
+
C7;2229
|
101
|
+
C8;222A
|
102
|
+
C9;2283
|
103
|
+
CA;2287
|
104
|
+
CB;2284
|
105
|
+
CC;2282
|
106
|
+
CD;2286
|
107
|
+
CE;2208
|
108
|
+
CF;2209
|
109
|
+
D0;2220
|
110
|
+
D1;2207
|
111
|
+
D2;F6DA
|
112
|
+
D3;F6D9
|
113
|
+
D4;F6DB
|
114
|
+
D5;220F
|
115
|
+
D6;221A
|
116
|
+
D7;22C5
|
117
|
+
D8;00AC
|
118
|
+
D9;2227
|
119
|
+
DA;2228
|
120
|
+
DB;21D4
|
121
|
+
DC;21D0
|
122
|
+
DD;21D1
|
123
|
+
DE;21D2
|
124
|
+
DF;21D3
|
125
|
+
E0;25CA
|
126
|
+
E1;2329
|
127
|
+
E2;F8E8
|
128
|
+
E3;F8E9
|
129
|
+
E4;F8EA
|
130
|
+
E5;2211
|
131
|
+
E6;F8EB
|
132
|
+
E7;F8EC
|
133
|
+
E8;F8ED
|
134
|
+
E9;F8EE
|
135
|
+
EA;F8EF
|
136
|
+
EB;F8F0
|
137
|
+
EC;F8F1
|
138
|
+
ED;F8F2
|
139
|
+
EE;F8F3
|
140
|
+
EF;F8F4
|
141
|
+
F1;232A
|
142
|
+
F2;222B
|
143
|
+
F3;2320
|
144
|
+
F4;F8F5
|
145
|
+
F5;2321
|
146
|
+
F6;F8F6
|
147
|
+
F7;F8F7
|
148
|
+
F8;F8F8
|
149
|
+
F9;F8F9
|
150
|
+
FA;F8FA
|
151
|
+
FB;F8FB
|
152
|
+
FC;F8FC
|
153
|
+
FD;F8FD
|
154
|
+
FE;F8FE
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# A mapping of WinAnsi (win-1252) characters to unicode. Anything
|
2
|
+
# not specified is left unchanged
|
3
|
+
80;20AC
|
4
|
+
82;201A
|
5
|
+
83;0192
|
6
|
+
84;201E
|
7
|
+
85;2026
|
8
|
+
86;2020
|
9
|
+
87;2021
|
10
|
+
88;02C6
|
11
|
+
89;2030
|
12
|
+
8A;0160
|
13
|
+
8B;2039
|
14
|
+
8C;0152
|
15
|
+
8E;017D
|
16
|
+
91;2018
|
17
|
+
92;2019
|
18
|
+
93;201C
|
19
|
+
94;201D
|
20
|
+
95;2022
|
21
|
+
96;2013
|
22
|
+
97;2014
|
23
|
+
98;02DC
|
24
|
+
99;2122
|
25
|
+
9A;0161
|
26
|
+
9B;203A
|
27
|
+
9C;0152
|
28
|
+
9E;017E
|
29
|
+
9F;0178
|
@@ -0,0 +1,201 @@
|
|
1
|
+
21;2701
|
2
|
+
22;2702
|
3
|
+
23;2703
|
4
|
+
24;2704
|
5
|
+
25;260E
|
6
|
+
26;2706
|
7
|
+
27;2707
|
8
|
+
28;2708
|
9
|
+
29;2709
|
10
|
+
2A;261B
|
11
|
+
2B;261E
|
12
|
+
2C;270C
|
13
|
+
2D;270D
|
14
|
+
2E;270E
|
15
|
+
2F;270F
|
16
|
+
30;2710
|
17
|
+
31;2711
|
18
|
+
32;2712
|
19
|
+
33;2713
|
20
|
+
34;2714
|
21
|
+
35;2715
|
22
|
+
36;2716
|
23
|
+
37;2717
|
24
|
+
38;2718
|
25
|
+
39;2719
|
26
|
+
3A;271A
|
27
|
+
3B;271B
|
28
|
+
3C;271C
|
29
|
+
3D;271D
|
30
|
+
3E;271E
|
31
|
+
3F;271E
|
32
|
+
40;2720
|
33
|
+
41;2721
|
34
|
+
42;2722
|
35
|
+
43;2723
|
36
|
+
44;2724
|
37
|
+
45;2725
|
38
|
+
46;2726
|
39
|
+
47;2727
|
40
|
+
48;2605
|
41
|
+
49;2729
|
42
|
+
4A;272A
|
43
|
+
4B;272B
|
44
|
+
4C;272C
|
45
|
+
4D;272D
|
46
|
+
4E;272E
|
47
|
+
4F;272F
|
48
|
+
50;2730
|
49
|
+
51;2731
|
50
|
+
52;2732
|
51
|
+
53;2733
|
52
|
+
54;2734
|
53
|
+
55;2735
|
54
|
+
56;2736
|
55
|
+
57;2737
|
56
|
+
58;2738
|
57
|
+
59;2739
|
58
|
+
5A;273A
|
59
|
+
5B;273B
|
60
|
+
5C;273C
|
61
|
+
5D;273D
|
62
|
+
5E;273E
|
63
|
+
5F;273F
|
64
|
+
60;2740
|
65
|
+
61;2741
|
66
|
+
62;2742
|
67
|
+
63;2743
|
68
|
+
64;2744
|
69
|
+
65;2745
|
70
|
+
66;2746
|
71
|
+
67;2747
|
72
|
+
68;2748
|
73
|
+
69;2749
|
74
|
+
6A;274A
|
75
|
+
6B;274B
|
76
|
+
6C;25CF
|
77
|
+
6D;274D
|
78
|
+
6E;25A0
|
79
|
+
6F;274F
|
80
|
+
70;2750
|
81
|
+
71;2751
|
82
|
+
72;2752
|
83
|
+
73;2753
|
84
|
+
74;2754
|
85
|
+
75;2755
|
86
|
+
76;2756
|
87
|
+
77;2757
|
88
|
+
78;2758
|
89
|
+
79;2759
|
90
|
+
7A;275A
|
91
|
+
7B;275B
|
92
|
+
7C;275C
|
93
|
+
7D;275D
|
94
|
+
7E;275E
|
95
|
+
80;F8D7
|
96
|
+
81;F8D8
|
97
|
+
82;F8D9
|
98
|
+
83;F8DA
|
99
|
+
84;F8DB
|
100
|
+
85;F8DC
|
101
|
+
86;F8DD
|
102
|
+
87;F8DE
|
103
|
+
88;F8DF
|
104
|
+
89;F8E0
|
105
|
+
8A;F8E1
|
106
|
+
8B;F8E2
|
107
|
+
8C;F8E3
|
108
|
+
8D;F8E4
|
109
|
+
A1;2761
|
110
|
+
A2;2762
|
111
|
+
A3;2763
|
112
|
+
A4;2764
|
113
|
+
A5;2765
|
114
|
+
A6;2766
|
115
|
+
A7;2767
|
116
|
+
A8;2663
|
117
|
+
A9;2666
|
118
|
+
AA;2665
|
119
|
+
AB;2660
|
120
|
+
AC;2460
|
121
|
+
AD;2461
|
122
|
+
AE;2462
|
123
|
+
AF;2463
|
124
|
+
B0;2464
|
125
|
+
B1;2465
|
126
|
+
B2;2466
|
127
|
+
B3;2467
|
128
|
+
B4;2468
|
129
|
+
B5;2469
|
130
|
+
B6;2776
|
131
|
+
B7;2777
|
132
|
+
B8;2778
|
133
|
+
B9;2779
|
134
|
+
BA;277A
|
135
|
+
BB;277B
|
136
|
+
BC;277C
|
137
|
+
BD;277D
|
138
|
+
BE;277E
|
139
|
+
BF;277F
|
140
|
+
C0;2780
|
141
|
+
C1;2781
|
142
|
+
C2;2782
|
143
|
+
C3;2783
|
144
|
+
C4;2784
|
145
|
+
C5;2785
|
146
|
+
C6;2786
|
147
|
+
C7;2787
|
148
|
+
C8;2788
|
149
|
+
C9;2789
|
150
|
+
CA;278A
|
151
|
+
CB;278B
|
152
|
+
CC;278C
|
153
|
+
CD;278D
|
154
|
+
CE;278E
|
155
|
+
CF;278F
|
156
|
+
D0;2790
|
157
|
+
D1;2791
|
158
|
+
D2;2792
|
159
|
+
D3;2793
|
160
|
+
D4;2794
|
161
|
+
D5;2795
|
162
|
+
D6;2796
|
163
|
+
D7;2797
|
164
|
+
D8;2798
|
165
|
+
D9;2799
|
166
|
+
DA;279A
|
167
|
+
DB;279B
|
168
|
+
DC;279C
|
169
|
+
DD;279D
|
170
|
+
DE;279E
|
171
|
+
DF;279F
|
172
|
+
E0;27A0
|
173
|
+
E1;27A1
|
174
|
+
E2;27A2
|
175
|
+
E3;27A3
|
176
|
+
E4;27A4
|
177
|
+
E5;27A5
|
178
|
+
E6;27A6
|
179
|
+
E7;27A7
|
180
|
+
E8;27A8
|
181
|
+
E9;27A9
|
182
|
+
EA;27AA
|
183
|
+
EB;27AB
|
184
|
+
EC;27AC
|
185
|
+
ED;27AD
|
186
|
+
EE;27AE
|
187
|
+
EF;27AF
|
188
|
+
F1;27B1
|
189
|
+
F2;27B2
|
190
|
+
F3;27B3
|
191
|
+
F4;27B4
|
192
|
+
F5;27B5
|
193
|
+
F6;27B6
|
194
|
+
F7;27B7
|
195
|
+
F8;27B8
|
196
|
+
F9;27B9
|
197
|
+
FA;27BA
|
198
|
+
FB;27BB
|
199
|
+
FC;27BC
|
200
|
+
FD;27BD
|
201
|
+
FE;27BE
|
@@ -0,0 +1,53 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
|
25
|
+
class PDF::Reader
|
26
|
+
################################################################################
|
27
|
+
# An internal PDF::Reader class that helps to verify various parts of the PDF file
|
28
|
+
# are valid
|
29
|
+
class Error # :nodoc:
|
30
|
+
################################################################################
|
31
|
+
def self.str_assert (lvalue, rvalue, chars=nil)
|
32
|
+
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
33
|
+
lvalue = lvalue[0,chars] if chars
|
34
|
+
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue != rvalue
|
35
|
+
end
|
36
|
+
################################################################################
|
37
|
+
def self.str_assert_not (lvalue, rvalue, chars=nil)
|
38
|
+
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
39
|
+
lvalue = lvalue[0,chars] if chars
|
40
|
+
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue == rvalue
|
41
|
+
end
|
42
|
+
################################################################################
|
43
|
+
def self.assert_equal (lvalue, rvalue)
|
44
|
+
raise MalformedPDFError, "PDF malformed, expected #{rvalue} but found #{lvalue} instead" if lvalue != rvalue
|
45
|
+
end
|
46
|
+
################################################################################
|
47
|
+
end
|
48
|
+
################################################################################
|
49
|
+
class MalformedPDFError < RuntimeError; end
|
50
|
+
class InvalidObjectError < MalformedPDFError; end
|
51
|
+
class UnsupportedFeatureError < RuntimeError; end
|
52
|
+
end
|
53
|
+
################################################################################
|
@@ -0,0 +1,219 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
require 'zlib'
|
26
|
+
|
27
|
+
class PDF::Reader
|
28
|
+
################################################################################
|
29
|
+
# Various parts of a PDF file can be passed through a filter before being stored to provide
|
30
|
+
# support for features like compression and encryption. This class is for decoding that
|
31
|
+
# content.
|
32
|
+
#
|
33
|
+
class Filter # :nodoc:
|
34
|
+
################################################################################
|
35
|
+
# creates a new filter for decoding content.
|
36
|
+
#
|
37
|
+
# Filters that are only used to encode image data are accepted, but the data is
|
38
|
+
# returned untouched. At this stage PDF::Reader has no need to decode images.
|
39
|
+
#
|
40
|
+
def initialize (name, options = nil)
|
41
|
+
@options = options
|
42
|
+
|
43
|
+
case name.to_sym
|
44
|
+
when :ASCII85Decode then @filter = :ascii85
|
45
|
+
when :ASCIIHexDecode then @filter = :asciihex
|
46
|
+
when :CCITTFaxDecode then @filter = nil
|
47
|
+
when :DCTDecode then @filter = nil
|
48
|
+
when :FlateDecode then @filter = :flate
|
49
|
+
when :JBIG2Decode then @filter = nil
|
50
|
+
when :LZWDecode then @filter = :lzw
|
51
|
+
else raise UnsupportedFeatureError, "Unknown filter: #{name}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
################################################################################
|
55
|
+
# attempts to decode the specified data with the current filter
|
56
|
+
#
|
57
|
+
# Filters that are only used to encode image data are accepted, but the data is
|
58
|
+
# returned untouched. At this stage PDF::Reader has no need to decode images.
|
59
|
+
#
|
60
|
+
def filter (data)
|
61
|
+
# leave the data untouched if we don't support the required filter
|
62
|
+
return data if @filter.nil?
|
63
|
+
|
64
|
+
# decode the data
|
65
|
+
self.send(@filter, data)
|
66
|
+
end
|
67
|
+
################################################################################
|
68
|
+
# Decode the specified data using the Ascii85 algorithm. Relies on the AScii85
|
69
|
+
# rubygem.
|
70
|
+
#
|
71
|
+
def ascii85(data)
|
72
|
+
data = "<~#{data}" unless data.to_s[0,2] == "<~"
|
73
|
+
Ascii85::decode(data)
|
74
|
+
rescue Exception => e
|
75
|
+
# Oops, there was a problem decoding the stream
|
76
|
+
raise MalformedPDFError, "Error occured while decoding an ASCII85 stream (#{e.class.to_s}: #{e.to_s})"
|
77
|
+
end
|
78
|
+
################################################################################
|
79
|
+
# Decode the specified data using the AsciiHex algorithm.
|
80
|
+
#
|
81
|
+
def asciihex(data)
|
82
|
+
data.chop! if data[-1,1] == ">"
|
83
|
+
data = data[1,data.size] if data[0,1] == "<"
|
84
|
+
data.gsub!(/[^A-Fa-f0-9]/,"")
|
85
|
+
data << "0" if data.size % 2 == 1
|
86
|
+
data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
|
87
|
+
rescue Exception => e
|
88
|
+
# Oops, there was a problem decoding the stream
|
89
|
+
raise MalformedPDFError, "Error occured while decoding an ASCIIHex stream (#{e.class.to_s}: #{e.to_s})"
|
90
|
+
end
|
91
|
+
################################################################################
|
92
|
+
# Decode the specified data with the Zlib compression algorithm
|
93
|
+
def flate (data)
|
94
|
+
deflated = nil
|
95
|
+
begin
|
96
|
+
deflated = Zlib::Inflate.new.inflate(data)
|
97
|
+
rescue Zlib::DataError => e
|
98
|
+
# by default, Ruby's Zlib assumes the data it's inflating
|
99
|
+
# is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
|
100
|
+
# If that fails, then use an undocumented 'feature' to attempt to inflate
|
101
|
+
# the data as a raw RFC1951 stream.
|
102
|
+
#
|
103
|
+
# See
|
104
|
+
# - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
|
105
|
+
# - http://www.gzip.org/zlib/zlib_faq.html#faq38
|
106
|
+
deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
|
107
|
+
end
|
108
|
+
depredict(deflated, @options)
|
109
|
+
rescue Exception => e
|
110
|
+
# Oops, there was a problem inflating the stream
|
111
|
+
raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
|
112
|
+
end
|
113
|
+
################################################################################
|
114
|
+
# Decode the specified data with the LZW compression algorithm
|
115
|
+
def lzw(data)
|
116
|
+
data = PDF::Reader::LZW.decode(data)
|
117
|
+
depredict(data, @options)
|
118
|
+
end
|
119
|
+
################################################################################
|
120
|
+
def depredict(data, opts = {})
|
121
|
+
predictor = (opts || {})[:Predictor].to_i
|
122
|
+
|
123
|
+
case predictor
|
124
|
+
when 0, 1 then
|
125
|
+
data
|
126
|
+
when 2 then
|
127
|
+
tiff_depredict(data, opts)
|
128
|
+
when 10, 11, 12, 13, 14, 15 then
|
129
|
+
png_depredict(data, opts)
|
130
|
+
else
|
131
|
+
raise MalformedPDFError, "Unrecognised predictor value (#{predictor})"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
################################################################################
|
135
|
+
def tiff_depredict(data, opts = {})
|
136
|
+
raise UnsupportedFeatureError, "TIFF predictor not supported"
|
137
|
+
end
|
138
|
+
################################################################################
|
139
|
+
def png_depredict(data, opts = {})
|
140
|
+
return data if opts.nil? || opts[:Predictor].to_i < 10
|
141
|
+
|
142
|
+
data = data.unpack("C*")
|
143
|
+
|
144
|
+
pixel_bytes = 1 #pixel_bitlength / 8
|
145
|
+
scanline_length = (pixel_bytes * opts[:Columns]) + 1
|
146
|
+
row = 0
|
147
|
+
pixels = []
|
148
|
+
paeth, pa, pb, pc = nil
|
149
|
+
until data.empty? do
|
150
|
+
row_data = data.slice! 0, scanline_length
|
151
|
+
filter = row_data.shift
|
152
|
+
case filter
|
153
|
+
when 0 # None
|
154
|
+
when 1 # Sub
|
155
|
+
row_data.each_with_index do |byte, index|
|
156
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
157
|
+
row_data[index] = (byte + left) % 256
|
158
|
+
#p [byte, left, row_data[index]]
|
159
|
+
end
|
160
|
+
when 2 # Up
|
161
|
+
row_data.each_with_index do |byte, index|
|
162
|
+
col = index / pixel_bytes
|
163
|
+
upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
|
164
|
+
row_data[index] = (upper + byte) % 256
|
165
|
+
end
|
166
|
+
when 3 # Average
|
167
|
+
row_data.each_with_index do |byte, index|
|
168
|
+
col = index / pixel_bytes
|
169
|
+
upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
|
170
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
171
|
+
|
172
|
+
row_data[index] = (byte + ((left + upper)/2).floor) % 256
|
173
|
+
end
|
174
|
+
when 4 # Paeth
|
175
|
+
left = upper = upper_left = nil
|
176
|
+
row_data.each_with_index do |byte, index|
|
177
|
+
col = index / pixel_bytes
|
178
|
+
|
179
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
180
|
+
if row.zero?
|
181
|
+
upper = upper_left = 0
|
182
|
+
else
|
183
|
+
upper = pixels[row-1][col][index % pixel_bytes]
|
184
|
+
upper_left = col.zero? ? 0 :
|
185
|
+
pixels[row-1][col-1][index % pixel_bytes]
|
186
|
+
end
|
187
|
+
|
188
|
+
p = left + upper - upper_left
|
189
|
+
pa = (p - left).abs
|
190
|
+
pb = (p - upper).abs
|
191
|
+
pc = (p - upper_left).abs
|
192
|
+
|
193
|
+
paeth = if pa <= pb && pa <= pc
|
194
|
+
left
|
195
|
+
elsif pb <= pc
|
196
|
+
upper
|
197
|
+
else
|
198
|
+
upper_left
|
199
|
+
end
|
200
|
+
|
201
|
+
row_data[index] = (byte + paeth) % 256
|
202
|
+
end
|
203
|
+
else
|
204
|
+
raise ArgumentError, "Invalid filter algorithm #{filter}"
|
205
|
+
end
|
206
|
+
|
207
|
+
s = []
|
208
|
+
row_data.each_slice pixel_bytes do |slice|
|
209
|
+
s << slice
|
210
|
+
end
|
211
|
+
pixels << s
|
212
|
+
row += 1
|
213
|
+
end
|
214
|
+
|
215
|
+
pixels.map { |row| row.flatten.pack("C*") }.join("")
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
################################################################################
|