fireinc-pdf-reader 0.11.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +168 -0
- data/MIT-LICENSE +21 -0
- data/README.rdoc +137 -0
- data/Rakefile +34 -0
- data/TODO +45 -0
- data/bin/pdf_list_callbacks +15 -0
- data/bin/pdf_object +48 -0
- data/bin/pdf_text +15 -0
- data/examples/callbacks.rb +21 -0
- data/examples/extract_bates.rb +49 -0
- data/examples/extract_images.rb +108 -0
- data/examples/hash.rb +12 -0
- data/examples/metadata.rb +25 -0
- data/examples/page_counter_improved.rb +23 -0
- data/examples/page_counter_naive.rb +24 -0
- data/examples/rspec.rb +57 -0
- data/examples/text.rb +40 -0
- data/examples/version.rb +25 -0
- data/lib/pdf/hash.rb +15 -0
- data/lib/pdf/reader/abstract_strategy.rb +81 -0
- data/lib/pdf/reader/buffer.rb +346 -0
- data/lib/pdf/reader/cmap.rb +138 -0
- data/lib/pdf/reader/encoding.rb +190 -0
- data/lib/pdf/reader/encodings/mac_expert.txt +159 -0
- data/lib/pdf/reader/encodings/mac_roman.txt +128 -0
- data/lib/pdf/reader/encodings/pdf_doc.txt +40 -0
- data/lib/pdf/reader/encodings/standard.txt +47 -0
- data/lib/pdf/reader/encodings/symbol.txt +154 -0
- data/lib/pdf/reader/encodings/win_ansi.txt +29 -0
- data/lib/pdf/reader/encodings/zapf_dingbats.txt +201 -0
- data/lib/pdf/reader/error.rb +53 -0
- data/lib/pdf/reader/filter.rb +219 -0
- data/lib/pdf/reader/font.rb +133 -0
- data/lib/pdf/reader/form_xobject.rb +83 -0
- data/lib/pdf/reader/glyphlist.txt +4322 -0
- data/lib/pdf/reader/lzw.rb +123 -0
- data/lib/pdf/reader/metadata_strategy.rb +56 -0
- data/lib/pdf/reader/object_cache.rb +85 -0
- data/lib/pdf/reader/object_hash.rb +289 -0
- data/lib/pdf/reader/object_stream.rb +51 -0
- data/lib/pdf/reader/page.rb +185 -0
- data/lib/pdf/reader/page_text_receiver.rb +278 -0
- data/lib/pdf/reader/pages_strategy.rb +475 -0
- data/lib/pdf/reader/parser.rb +225 -0
- data/lib/pdf/reader/print_receiver.rb +18 -0
- data/lib/pdf/reader/reference.rb +66 -0
- data/lib/pdf/reader/register_receiver.rb +95 -0
- data/lib/pdf/reader/stream.rb +69 -0
- data/lib/pdf/reader/text_receiver.rb +264 -0
- data/lib/pdf/reader/token.rb +41 -0
- data/lib/pdf/reader/xref.rb +220 -0
- data/lib/pdf/reader.rb +296 -0
- data/lib/pdf-reader.rb +1 -0
- metadata +211 -0
@@ -0,0 +1,154 @@
|
|
1
|
+
22;2200
|
2
|
+
24;2203
|
3
|
+
27;220B
|
4
|
+
2A;2217
|
5
|
+
2D;2212
|
6
|
+
40;2245
|
7
|
+
41;0391
|
8
|
+
42;0392
|
9
|
+
43;03A7
|
10
|
+
44;0394
|
11
|
+
45;0395
|
12
|
+
46;03A6
|
13
|
+
47;0393
|
14
|
+
48;0397
|
15
|
+
49;0399
|
16
|
+
4A;03D1
|
17
|
+
4B;039A
|
18
|
+
4C;039B
|
19
|
+
4D;039C
|
20
|
+
4E;039D
|
21
|
+
4F;039F
|
22
|
+
50;03A0
|
23
|
+
51;0398
|
24
|
+
52;03A1
|
25
|
+
53;03A3
|
26
|
+
54;03A4
|
27
|
+
55;03A5
|
28
|
+
56;03C2
|
29
|
+
57;03A9
|
30
|
+
58;039E
|
31
|
+
59;03A8
|
32
|
+
5A;0396
|
33
|
+
5C;2234
|
34
|
+
5E;22A5
|
35
|
+
60;F8E5
|
36
|
+
61;03B1
|
37
|
+
62;03B2
|
38
|
+
63;03C7
|
39
|
+
64;03B4
|
40
|
+
65;03B5
|
41
|
+
66;03C6
|
42
|
+
67;03B3
|
43
|
+
68;03B7
|
44
|
+
69;03B9
|
45
|
+
6A;03D5
|
46
|
+
6B;03BA
|
47
|
+
6C;03BB
|
48
|
+
6D;03BC
|
49
|
+
6E;03BD
|
50
|
+
6F;03BF
|
51
|
+
70;03C0
|
52
|
+
71;03B8
|
53
|
+
72;03C1
|
54
|
+
73;03C3
|
55
|
+
74;03C4
|
56
|
+
75;03C5
|
57
|
+
76;03D6
|
58
|
+
77;03C9
|
59
|
+
78;03BE
|
60
|
+
79;03C8
|
61
|
+
7A;03B6
|
62
|
+
7E;223C
|
63
|
+
A0;20AC
|
64
|
+
A1;03D2
|
65
|
+
A2;2032
|
66
|
+
A3;2264
|
67
|
+
A4;2215
|
68
|
+
A5;221E
|
69
|
+
A6;0192
|
70
|
+
A7;2663
|
71
|
+
A8;2666
|
72
|
+
A9;2665
|
73
|
+
AA;2660
|
74
|
+
AB;2194
|
75
|
+
AC;2190
|
76
|
+
AD;2191
|
77
|
+
AE;2192
|
78
|
+
AF;2193
|
79
|
+
B2;2033
|
80
|
+
B3;2265
|
81
|
+
B4;00D7
|
82
|
+
B5;221D
|
83
|
+
B6;2202
|
84
|
+
B7;2022
|
85
|
+
B8;00F7
|
86
|
+
B9;2260
|
87
|
+
BA;2261
|
88
|
+
BB;2248
|
89
|
+
BC;2026
|
90
|
+
BD;F8E6
|
91
|
+
BE;F8E7
|
92
|
+
BF;21B5
|
93
|
+
C0;2135
|
94
|
+
C1;2111
|
95
|
+
C2;211C
|
96
|
+
C3;2118
|
97
|
+
C4;2297
|
98
|
+
C5;2295
|
99
|
+
C6;2205
|
100
|
+
C7;2229
|
101
|
+
C8;222A
|
102
|
+
C9;2283
|
103
|
+
CA;2287
|
104
|
+
CB;2284
|
105
|
+
CC;2282
|
106
|
+
CD;2286
|
107
|
+
CE;2208
|
108
|
+
CF;2209
|
109
|
+
D0;2220
|
110
|
+
D1;2207
|
111
|
+
D2;F6DA
|
112
|
+
D3;F6D9
|
113
|
+
D4;F6DB
|
114
|
+
D5;220F
|
115
|
+
D6;221A
|
116
|
+
D7;22C5
|
117
|
+
D8;00AC
|
118
|
+
D9;2227
|
119
|
+
DA;2228
|
120
|
+
DB;21D4
|
121
|
+
DC;21D0
|
122
|
+
DD;21D1
|
123
|
+
DE;21D2
|
124
|
+
DF;21D3
|
125
|
+
E0;25CA
|
126
|
+
E1;2329
|
127
|
+
E2;F8E8
|
128
|
+
E3;F8E9
|
129
|
+
E4;F8EA
|
130
|
+
E5;2211
|
131
|
+
E6;F8EB
|
132
|
+
E7;F8EC
|
133
|
+
E8;F8ED
|
134
|
+
E9;F8EE
|
135
|
+
EA;F8EF
|
136
|
+
EB;F8F0
|
137
|
+
EC;F8F1
|
138
|
+
ED;F8F2
|
139
|
+
EE;F8F3
|
140
|
+
EF;F8F4
|
141
|
+
F1;232A
|
142
|
+
F2;222B
|
143
|
+
F3;2320
|
144
|
+
F4;F8F5
|
145
|
+
F5;2321
|
146
|
+
F6;F8F6
|
147
|
+
F7;F8F7
|
148
|
+
F8;F8F8
|
149
|
+
F9;F8F9
|
150
|
+
FA;F8FA
|
151
|
+
FB;F8FB
|
152
|
+
FC;F8FC
|
153
|
+
FD;F8FD
|
154
|
+
FE;F8FE
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# A mapping of WinAnsi (win-1252) characters to unicode. Anything
|
2
|
+
# not specified is left unchanged
|
3
|
+
80;20AC
|
4
|
+
82;201A
|
5
|
+
83;0192
|
6
|
+
84;201E
|
7
|
+
85;2026
|
8
|
+
86;2020
|
9
|
+
87;2021
|
10
|
+
88;02C6
|
11
|
+
89;2030
|
12
|
+
8A;0160
|
13
|
+
8B;2039
|
14
|
+
8C;0152
|
15
|
+
8E;017D
|
16
|
+
91;2018
|
17
|
+
92;2019
|
18
|
+
93;201C
|
19
|
+
94;201D
|
20
|
+
95;2022
|
21
|
+
96;2013
|
22
|
+
97;2014
|
23
|
+
98;02DC
|
24
|
+
99;2122
|
25
|
+
9A;0161
|
26
|
+
9B;203A
|
27
|
+
9C;0152
|
28
|
+
9E;017E
|
29
|
+
9F;0178
|
@@ -0,0 +1,201 @@
|
|
1
|
+
21;2701
|
2
|
+
22;2702
|
3
|
+
23;2703
|
4
|
+
24;2704
|
5
|
+
25;260E
|
6
|
+
26;2706
|
7
|
+
27;2707
|
8
|
+
28;2708
|
9
|
+
29;2709
|
10
|
+
2A;261B
|
11
|
+
2B;261E
|
12
|
+
2C;270C
|
13
|
+
2D;270D
|
14
|
+
2E;270E
|
15
|
+
2F;270F
|
16
|
+
30;2710
|
17
|
+
31;2711
|
18
|
+
32;2712
|
19
|
+
33;2713
|
20
|
+
34;2714
|
21
|
+
35;2715
|
22
|
+
36;2716
|
23
|
+
37;2717
|
24
|
+
38;2718
|
25
|
+
39;2719
|
26
|
+
3A;271A
|
27
|
+
3B;271B
|
28
|
+
3C;271C
|
29
|
+
3D;271D
|
30
|
+
3E;271E
|
31
|
+
3F;271E
|
32
|
+
40;2720
|
33
|
+
41;2721
|
34
|
+
42;2722
|
35
|
+
43;2723
|
36
|
+
44;2724
|
37
|
+
45;2725
|
38
|
+
46;2726
|
39
|
+
47;2727
|
40
|
+
48;2605
|
41
|
+
49;2729
|
42
|
+
4A;272A
|
43
|
+
4B;272B
|
44
|
+
4C;272C
|
45
|
+
4D;272D
|
46
|
+
4E;272E
|
47
|
+
4F;272F
|
48
|
+
50;2730
|
49
|
+
51;2731
|
50
|
+
52;2732
|
51
|
+
53;2733
|
52
|
+
54;2734
|
53
|
+
55;2735
|
54
|
+
56;2736
|
55
|
+
57;2737
|
56
|
+
58;2738
|
57
|
+
59;2739
|
58
|
+
5A;273A
|
59
|
+
5B;273B
|
60
|
+
5C;273C
|
61
|
+
5D;273D
|
62
|
+
5E;273E
|
63
|
+
5F;273F
|
64
|
+
60;2740
|
65
|
+
61;2741
|
66
|
+
62;2742
|
67
|
+
63;2743
|
68
|
+
64;2744
|
69
|
+
65;2745
|
70
|
+
66;2746
|
71
|
+
67;2747
|
72
|
+
68;2748
|
73
|
+
69;2749
|
74
|
+
6A;274A
|
75
|
+
6B;274B
|
76
|
+
6C;25CF
|
77
|
+
6D;274D
|
78
|
+
6E;25A0
|
79
|
+
6F;274F
|
80
|
+
70;2750
|
81
|
+
71;2751
|
82
|
+
72;2752
|
83
|
+
73;2753
|
84
|
+
74;2754
|
85
|
+
75;2755
|
86
|
+
76;2756
|
87
|
+
77;2757
|
88
|
+
78;2758
|
89
|
+
79;2759
|
90
|
+
7A;275A
|
91
|
+
7B;275B
|
92
|
+
7C;275C
|
93
|
+
7D;275D
|
94
|
+
7E;275E
|
95
|
+
80;F8D7
|
96
|
+
81;F8D8
|
97
|
+
82;F8D9
|
98
|
+
83;F8DA
|
99
|
+
84;F8DB
|
100
|
+
85;F8DC
|
101
|
+
86;F8DD
|
102
|
+
87;F8DE
|
103
|
+
88;F8DF
|
104
|
+
89;F8E0
|
105
|
+
8A;F8E1
|
106
|
+
8B;F8E2
|
107
|
+
8C;F8E3
|
108
|
+
8D;F8E4
|
109
|
+
A1;2761
|
110
|
+
A2;2762
|
111
|
+
A3;2763
|
112
|
+
A4;2764
|
113
|
+
A5;2765
|
114
|
+
A6;2766
|
115
|
+
A7;2767
|
116
|
+
A8;2663
|
117
|
+
A9;2666
|
118
|
+
AA;2665
|
119
|
+
AB;2660
|
120
|
+
AC;2460
|
121
|
+
AD;2461
|
122
|
+
AE;2462
|
123
|
+
AF;2463
|
124
|
+
B0;2464
|
125
|
+
B1;2465
|
126
|
+
B2;2466
|
127
|
+
B3;2467
|
128
|
+
B4;2468
|
129
|
+
B5;2469
|
130
|
+
B6;2776
|
131
|
+
B7;2777
|
132
|
+
B8;2778
|
133
|
+
B9;2779
|
134
|
+
BA;277A
|
135
|
+
BB;277B
|
136
|
+
BC;277C
|
137
|
+
BD;277D
|
138
|
+
BE;277E
|
139
|
+
BF;277F
|
140
|
+
C0;2780
|
141
|
+
C1;2781
|
142
|
+
C2;2782
|
143
|
+
C3;2783
|
144
|
+
C4;2784
|
145
|
+
C5;2785
|
146
|
+
C6;2786
|
147
|
+
C7;2787
|
148
|
+
C8;2788
|
149
|
+
C9;2789
|
150
|
+
CA;278A
|
151
|
+
CB;278B
|
152
|
+
CC;278C
|
153
|
+
CD;278D
|
154
|
+
CE;278E
|
155
|
+
CF;278F
|
156
|
+
D0;2790
|
157
|
+
D1;2791
|
158
|
+
D2;2792
|
159
|
+
D3;2793
|
160
|
+
D4;2794
|
161
|
+
D5;2795
|
162
|
+
D6;2796
|
163
|
+
D7;2797
|
164
|
+
D8;2798
|
165
|
+
D9;2799
|
166
|
+
DA;279A
|
167
|
+
DB;279B
|
168
|
+
DC;279C
|
169
|
+
DD;279D
|
170
|
+
DE;279E
|
171
|
+
DF;279F
|
172
|
+
E0;27A0
|
173
|
+
E1;27A1
|
174
|
+
E2;27A2
|
175
|
+
E3;27A3
|
176
|
+
E4;27A4
|
177
|
+
E5;27A5
|
178
|
+
E6;27A6
|
179
|
+
E7;27A7
|
180
|
+
E8;27A8
|
181
|
+
E9;27A9
|
182
|
+
EA;27AA
|
183
|
+
EB;27AB
|
184
|
+
EC;27AC
|
185
|
+
ED;27AD
|
186
|
+
EE;27AE
|
187
|
+
EF;27AF
|
188
|
+
F1;27B1
|
189
|
+
F2;27B2
|
190
|
+
F3;27B3
|
191
|
+
F4;27B4
|
192
|
+
F5;27B5
|
193
|
+
F6;27B6
|
194
|
+
F7;27B7
|
195
|
+
F8;27B8
|
196
|
+
F9;27B9
|
197
|
+
FA;27BA
|
198
|
+
FB;27BB
|
199
|
+
FC;27BC
|
200
|
+
FD;27BD
|
201
|
+
FE;27BE
|
@@ -0,0 +1,53 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
|
25
|
+
class PDF::Reader
|
26
|
+
################################################################################
|
27
|
+
# An internal PDF::Reader class that helps to verify various parts of the PDF file
|
28
|
+
# are valid
|
29
|
+
class Error # :nodoc:
|
30
|
+
################################################################################
|
31
|
+
def self.str_assert (lvalue, rvalue, chars=nil)
|
32
|
+
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
33
|
+
lvalue = lvalue[0,chars] if chars
|
34
|
+
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue != rvalue
|
35
|
+
end
|
36
|
+
################################################################################
|
37
|
+
def self.str_assert_not (lvalue, rvalue, chars=nil)
|
38
|
+
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
39
|
+
lvalue = lvalue[0,chars] if chars
|
40
|
+
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue == rvalue
|
41
|
+
end
|
42
|
+
################################################################################
|
43
|
+
def self.assert_equal (lvalue, rvalue)
|
44
|
+
raise MalformedPDFError, "PDF malformed, expected #{rvalue} but found #{lvalue} instead" if lvalue != rvalue
|
45
|
+
end
|
46
|
+
################################################################################
|
47
|
+
end
|
48
|
+
################################################################################
|
49
|
+
class MalformedPDFError < RuntimeError; end
|
50
|
+
class InvalidObjectError < MalformedPDFError; end
|
51
|
+
class UnsupportedFeatureError < RuntimeError; end
|
52
|
+
end
|
53
|
+
################################################################################
|
@@ -0,0 +1,219 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
require 'zlib'
|
26
|
+
|
27
|
+
class PDF::Reader
|
28
|
+
################################################################################
|
29
|
+
# Various parts of a PDF file can be passed through a filter before being stored to provide
|
30
|
+
# support for features like compression and encryption. This class is for decoding that
|
31
|
+
# content.
|
32
|
+
#
|
33
|
+
class Filter # :nodoc:
|
34
|
+
################################################################################
|
35
|
+
# creates a new filter for decoding content.
|
36
|
+
#
|
37
|
+
# Filters that are only used to encode image data are accepted, but the data is
|
38
|
+
# returned untouched. At this stage PDF::Reader has no need to decode images.
|
39
|
+
#
|
40
|
+
def initialize (name, options = nil)
|
41
|
+
@options = options
|
42
|
+
|
43
|
+
case name.to_sym
|
44
|
+
when :ASCII85Decode then @filter = :ascii85
|
45
|
+
when :ASCIIHexDecode then @filter = :asciihex
|
46
|
+
when :CCITTFaxDecode then @filter = nil
|
47
|
+
when :DCTDecode then @filter = nil
|
48
|
+
when :FlateDecode then @filter = :flate
|
49
|
+
when :JBIG2Decode then @filter = nil
|
50
|
+
when :LZWDecode then @filter = :lzw
|
51
|
+
else raise UnsupportedFeatureError, "Unknown filter: #{name}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
################################################################################
|
55
|
+
# attempts to decode the specified data with the current filter
|
56
|
+
#
|
57
|
+
# Filters that are only used to encode image data are accepted, but the data is
|
58
|
+
# returned untouched. At this stage PDF::Reader has no need to decode images.
|
59
|
+
#
|
60
|
+
def filter (data)
|
61
|
+
# leave the data untouched if we don't support the required filter
|
62
|
+
return data if @filter.nil?
|
63
|
+
|
64
|
+
# decode the data
|
65
|
+
self.send(@filter, data)
|
66
|
+
end
|
67
|
+
################################################################################
|
68
|
+
# Decode the specified data using the Ascii85 algorithm. Relies on the AScii85
|
69
|
+
# rubygem.
|
70
|
+
#
|
71
|
+
def ascii85(data)
|
72
|
+
data = "<~#{data}" unless data.to_s[0,2] == "<~"
|
73
|
+
Ascii85::decode(data)
|
74
|
+
rescue Exception => e
|
75
|
+
# Oops, there was a problem decoding the stream
|
76
|
+
raise MalformedPDFError, "Error occured while decoding an ASCII85 stream (#{e.class.to_s}: #{e.to_s})"
|
77
|
+
end
|
78
|
+
################################################################################
|
79
|
+
# Decode the specified data using the AsciiHex algorithm.
|
80
|
+
#
|
81
|
+
def asciihex(data)
|
82
|
+
data.chop! if data[-1,1] == ">"
|
83
|
+
data = data[1,data.size] if data[0,1] == "<"
|
84
|
+
data.gsub!(/[^A-Fa-f0-9]/,"")
|
85
|
+
data << "0" if data.size % 2 == 1
|
86
|
+
data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
|
87
|
+
rescue Exception => e
|
88
|
+
# Oops, there was a problem decoding the stream
|
89
|
+
raise MalformedPDFError, "Error occured while decoding an ASCIIHex stream (#{e.class.to_s}: #{e.to_s})"
|
90
|
+
end
|
91
|
+
################################################################################
|
92
|
+
# Decode the specified data with the Zlib compression algorithm
|
93
|
+
def flate (data)
|
94
|
+
deflated = nil
|
95
|
+
begin
|
96
|
+
deflated = Zlib::Inflate.new.inflate(data)
|
97
|
+
rescue Zlib::DataError => e
|
98
|
+
# by default, Ruby's Zlib assumes the data it's inflating
|
99
|
+
# is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
|
100
|
+
# If that fails, then use an undocumented 'feature' to attempt to inflate
|
101
|
+
# the data as a raw RFC1951 stream.
|
102
|
+
#
|
103
|
+
# See
|
104
|
+
# - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
|
105
|
+
# - http://www.gzip.org/zlib/zlib_faq.html#faq38
|
106
|
+
deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
|
107
|
+
end
|
108
|
+
depredict(deflated, @options)
|
109
|
+
rescue Exception => e
|
110
|
+
# Oops, there was a problem inflating the stream
|
111
|
+
raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
|
112
|
+
end
|
113
|
+
################################################################################
|
114
|
+
# Decode the specified data with the LZW compression algorithm
|
115
|
+
def lzw(data)
|
116
|
+
data = PDF::Reader::LZW.decode(data)
|
117
|
+
depredict(data, @options)
|
118
|
+
end
|
119
|
+
################################################################################
|
120
|
+
def depredict(data, opts = {})
|
121
|
+
predictor = (opts || {})[:Predictor].to_i
|
122
|
+
|
123
|
+
case predictor
|
124
|
+
when 0, 1 then
|
125
|
+
data
|
126
|
+
when 2 then
|
127
|
+
tiff_depredict(data, opts)
|
128
|
+
when 10, 11, 12, 13, 14, 15 then
|
129
|
+
png_depredict(data, opts)
|
130
|
+
else
|
131
|
+
raise MalformedPDFError, "Unrecognised predictor value (#{predictor})"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
################################################################################
|
135
|
+
def tiff_depredict(data, opts = {})
|
136
|
+
raise UnsupportedFeatureError, "TIFF predictor not supported"
|
137
|
+
end
|
138
|
+
################################################################################
|
139
|
+
def png_depredict(data, opts = {})
|
140
|
+
return data if opts.nil? || opts[:Predictor].to_i < 10
|
141
|
+
|
142
|
+
data = data.unpack("C*")
|
143
|
+
|
144
|
+
pixel_bytes = 1 #pixel_bitlength / 8
|
145
|
+
scanline_length = (pixel_bytes * opts[:Columns]) + 1
|
146
|
+
row = 0
|
147
|
+
pixels = []
|
148
|
+
paeth, pa, pb, pc = nil
|
149
|
+
until data.empty? do
|
150
|
+
row_data = data.slice! 0, scanline_length
|
151
|
+
filter = row_data.shift
|
152
|
+
case filter
|
153
|
+
when 0 # None
|
154
|
+
when 1 # Sub
|
155
|
+
row_data.each_with_index do |byte, index|
|
156
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
157
|
+
row_data[index] = (byte + left) % 256
|
158
|
+
#p [byte, left, row_data[index]]
|
159
|
+
end
|
160
|
+
when 2 # Up
|
161
|
+
row_data.each_with_index do |byte, index|
|
162
|
+
col = index / pixel_bytes
|
163
|
+
upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
|
164
|
+
row_data[index] = (upper + byte) % 256
|
165
|
+
end
|
166
|
+
when 3 # Average
|
167
|
+
row_data.each_with_index do |byte, index|
|
168
|
+
col = index / pixel_bytes
|
169
|
+
upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
|
170
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
171
|
+
|
172
|
+
row_data[index] = (byte + ((left + upper)/2).floor) % 256
|
173
|
+
end
|
174
|
+
when 4 # Paeth
|
175
|
+
left = upper = upper_left = nil
|
176
|
+
row_data.each_with_index do |byte, index|
|
177
|
+
col = index / pixel_bytes
|
178
|
+
|
179
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
180
|
+
if row.zero?
|
181
|
+
upper = upper_left = 0
|
182
|
+
else
|
183
|
+
upper = pixels[row-1][col][index % pixel_bytes]
|
184
|
+
upper_left = col.zero? ? 0 :
|
185
|
+
pixels[row-1][col-1][index % pixel_bytes]
|
186
|
+
end
|
187
|
+
|
188
|
+
p = left + upper - upper_left
|
189
|
+
pa = (p - left).abs
|
190
|
+
pb = (p - upper).abs
|
191
|
+
pc = (p - upper_left).abs
|
192
|
+
|
193
|
+
paeth = if pa <= pb && pa <= pc
|
194
|
+
left
|
195
|
+
elsif pb <= pc
|
196
|
+
upper
|
197
|
+
else
|
198
|
+
upper_left
|
199
|
+
end
|
200
|
+
|
201
|
+
row_data[index] = (byte + paeth) % 256
|
202
|
+
end
|
203
|
+
else
|
204
|
+
raise ArgumentError, "Invalid filter algorithm #{filter}"
|
205
|
+
end
|
206
|
+
|
207
|
+
s = []
|
208
|
+
row_data.each_slice pixel_bytes do |slice|
|
209
|
+
s << slice
|
210
|
+
end
|
211
|
+
pixels << s
|
212
|
+
row += 1
|
213
|
+
end
|
214
|
+
|
215
|
+
pixels.map { |row| row.flatten.pack("C*") }.join("")
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
################################################################################
|