pdf-reader 1.1.1 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG +87 -2
- data/{README.rdoc → README.md} +43 -31
- data/Rakefile +21 -16
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_object +4 -1
- data/bin/pdf_text +1 -3
- data/examples/callbacks.rb +2 -1
- data/examples/extract_images.rb +11 -6
- data/examples/fuzzy_paragraphs.rb +24 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier.afm +342 -0
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -0
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
- data/lib/pdf/reader/buffer.rb +90 -63
- data/lib/pdf/reader/cid_widths.rb +63 -0
- data/lib/pdf/reader/cmap.rb +69 -38
- data/lib/pdf/reader/encoding.rb +74 -48
- data/lib/pdf/reader/error.rb +24 -4
- data/lib/pdf/reader/filter/ascii85.rb +28 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
- data/lib/pdf/reader/filter/depredict.rb +141 -0
- data/lib/pdf/reader/filter/flate.rb +53 -0
- data/lib/pdf/reader/filter/lzw.rb +21 -0
- data/lib/pdf/reader/filter/null.rb +18 -0
- data/lib/pdf/reader/filter/run_length.rb +45 -0
- data/lib/pdf/reader/filter.rb +15 -234
- data/lib/pdf/reader/font.rb +107 -43
- data/lib/pdf/reader/font_descriptor.rb +80 -0
- data/lib/pdf/reader/form_xobject.rb +26 -4
- data/lib/pdf/reader/glyph_hash.rb +56 -18
- data/lib/pdf/reader/lzw.rb +6 -4
- data/lib/pdf/reader/null_security_handler.rb +17 -0
- data/lib/pdf/reader/object_cache.rb +40 -16
- data/lib/pdf/reader/object_hash.rb +94 -40
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/orientation_detector.rb +34 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +48 -3
- data/lib/pdf/reader/page_layout.rb +125 -0
- data/lib/pdf/reader/page_state.rb +185 -70
- data/lib/pdf/reader/page_text_receiver.rb +70 -20
- data/lib/pdf/reader/pages_strategy.rb +4 -293
- data/lib/pdf/reader/parser.rb +37 -61
- data/lib/pdf/reader/print_receiver.rb +6 -0
- data/lib/pdf/reader/reference.rb +4 -1
- data/lib/pdf/reader/register_receiver.rb +17 -31
- data/lib/pdf/reader/resource_methods.rb +1 -0
- data/lib/pdf/reader/standard_security_handler.rb +82 -42
- data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
- data/lib/pdf/reader/stream.rb +5 -2
- data/lib/pdf/reader/synchronized_cache.rb +33 -0
- data/lib/pdf/reader/text_run.rb +99 -0
- data/lib/pdf/reader/token.rb +4 -1
- data/lib/pdf/reader/transformation_matrix.rb +195 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
- data/lib/pdf/reader/width_calculator/composite.rb +28 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
- data/lib/pdf/reader/width_calculator.rb +12 -0
- data/lib/pdf/reader/xref.rb +41 -9
- data/lib/pdf/reader.rb +45 -104
- data/lib/pdf-reader.rb +4 -1
- metadata +220 -101
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/hash.rb +0 -15
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -264
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'ascii85'
|
5
|
+
|
6
|
+
class PDF::Reader
|
7
|
+
module Filter # :nodoc:
|
8
|
+
# implementation of the Ascii85 filter
|
9
|
+
class Ascii85
|
10
|
+
def initialize(options = {})
|
11
|
+
@options = options
|
12
|
+
end
|
13
|
+
|
14
|
+
################################################################################
|
15
|
+
# Decode the specified data using the Ascii85 algorithm. Relies on the AScii85
|
16
|
+
# rubygem.
|
17
|
+
#
|
18
|
+
def filter(data)
|
19
|
+
data = "<~#{data}" unless data.to_s[0,2] == "<~"
|
20
|
+
::Ascii85::decode(data)
|
21
|
+
rescue Exception => e
|
22
|
+
# Oops, there was a problem decoding the stream
|
23
|
+
raise MalformedPDFError,
|
24
|
+
"Error occured while decoding an ASCII85 stream (#{e.class.to_s}: #{e.to_s})"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
class PDF::Reader
|
6
|
+
module Filter # :nodoc:
|
7
|
+
# implementation of the AsciiHex stream filter
|
8
|
+
class AsciiHex
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
################################################################################
|
14
|
+
# Decode the specified data using the AsciiHex algorithm.
|
15
|
+
#
|
16
|
+
def filter(data)
|
17
|
+
data.chop! if data[-1,1] == ">"
|
18
|
+
data = data[1,data.size] if data[0,1] == "<"
|
19
|
+
data.gsub!(/[^A-Fa-f0-9]/,"")
|
20
|
+
data << "0" if data.size % 2 == 1
|
21
|
+
data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
|
22
|
+
rescue Exception => e
|
23
|
+
# Oops, there was a problem decoding the stream
|
24
|
+
raise MalformedPDFError,
|
25
|
+
"Error occured while decoding an ASCIIHex stream (#{e.class.to_s}: #{e.to_s})"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,141 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
class PDF::Reader
|
5
|
+
module Filter # :nodoc:
|
6
|
+
# some filter implementations support preprocessing of the data to
|
7
|
+
# improve compression
|
8
|
+
class Depredict
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = options || {}
|
11
|
+
end
|
12
|
+
|
13
|
+
################################################################################
|
14
|
+
# Streams can be preprocessed to improve compression. This reverses the
|
15
|
+
# preprocessing
|
16
|
+
#
|
17
|
+
def filter(data)
|
18
|
+
predictor = @options[:Predictor].to_i
|
19
|
+
|
20
|
+
case predictor
|
21
|
+
when 0, 1 then
|
22
|
+
data
|
23
|
+
when 2 then
|
24
|
+
tiff_depredict(data)
|
25
|
+
when 10, 11, 12, 13, 14, 15 then
|
26
|
+
png_depredict(data)
|
27
|
+
else
|
28
|
+
raise MalformedPDFError, "Unrecognised predictor value (#{predictor})"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
################################################################################
|
35
|
+
def tiff_depredict(data)
|
36
|
+
data = data.unpack("C*")
|
37
|
+
unfiltered = []
|
38
|
+
bpc = @options[:BitsPerComponent] || 8
|
39
|
+
pixel_bits = bpc * @options[:Colors]
|
40
|
+
pixel_bytes = pixel_bits / 8
|
41
|
+
line_len = (pixel_bytes * @options[:Columns])
|
42
|
+
pos = 0
|
43
|
+
|
44
|
+
if bpc != 8
|
45
|
+
raise UnsupportedFeatureError, "TIFF predictor onlys supports 8 Bits Per Component"
|
46
|
+
end
|
47
|
+
|
48
|
+
until pos > data.size
|
49
|
+
row_data = data[pos, line_len]
|
50
|
+
row_data.each_with_index do |byte, index|
|
51
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
52
|
+
row_data[index] = (byte + left) % 256
|
53
|
+
end
|
54
|
+
unfiltered += row_data
|
55
|
+
pos += line_len
|
56
|
+
end
|
57
|
+
|
58
|
+
unfiltered.pack("C*")
|
59
|
+
end
|
60
|
+
################################################################################
|
61
|
+
def png_depredict(data)
|
62
|
+
return data if @options[:Predictor].to_i < 10
|
63
|
+
|
64
|
+
data = data.unpack("C*")
|
65
|
+
|
66
|
+
pixel_bytes = @options[:Colors] || 1
|
67
|
+
scanline_length = (pixel_bytes * @options[:Columns]) + 1
|
68
|
+
row = 0
|
69
|
+
pixels = []
|
70
|
+
paeth, pa, pb, pc = nil
|
71
|
+
until data.empty? do
|
72
|
+
row_data = data.slice! 0, scanline_length
|
73
|
+
filter = row_data.shift
|
74
|
+
case filter
|
75
|
+
when 0 # None
|
76
|
+
when 1 # Sub
|
77
|
+
row_data.each_with_index do |byte, index|
|
78
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
79
|
+
row_data[index] = (byte + left) % 256
|
80
|
+
#p [byte, left, row_data[index]]
|
81
|
+
end
|
82
|
+
when 2 # Up
|
83
|
+
row_data.each_with_index do |byte, index|
|
84
|
+
col = index / pixel_bytes
|
85
|
+
upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
|
86
|
+
row_data[index] = (upper + byte) % 256
|
87
|
+
end
|
88
|
+
when 3 # Average
|
89
|
+
row_data.each_with_index do |byte, index|
|
90
|
+
col = index / pixel_bytes
|
91
|
+
upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
|
92
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
93
|
+
|
94
|
+
row_data[index] = (byte + ((left + upper)/2).floor) % 256
|
95
|
+
end
|
96
|
+
when 4 # Paeth
|
97
|
+
left = upper = upper_left = nil
|
98
|
+
row_data.each_with_index do |byte, index|
|
99
|
+
col = index / pixel_bytes
|
100
|
+
|
101
|
+
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
102
|
+
if row.zero?
|
103
|
+
upper = upper_left = 0
|
104
|
+
else
|
105
|
+
upper = pixels[row-1][col][index % pixel_bytes]
|
106
|
+
upper_left = col.zero? ? 0 :
|
107
|
+
pixels[row-1][col-1][index % pixel_bytes]
|
108
|
+
end
|
109
|
+
|
110
|
+
p = left + upper - upper_left
|
111
|
+
pa = (p - left).abs
|
112
|
+
pb = (p - upper).abs
|
113
|
+
pc = (p - upper_left).abs
|
114
|
+
|
115
|
+
paeth = if pa <= pb && pa <= pc
|
116
|
+
left
|
117
|
+
elsif pb <= pc
|
118
|
+
upper
|
119
|
+
else
|
120
|
+
upper_left
|
121
|
+
end
|
122
|
+
|
123
|
+
row_data[index] = (byte + paeth) % 256
|
124
|
+
end
|
125
|
+
else
|
126
|
+
raise ArgumentError, "Invalid filter algorithm #{filter}"
|
127
|
+
end
|
128
|
+
|
129
|
+
s = []
|
130
|
+
row_data.each_slice pixel_bytes do |slice|
|
131
|
+
s << slice
|
132
|
+
end
|
133
|
+
pixels << s
|
134
|
+
row += 1
|
135
|
+
end
|
136
|
+
|
137
|
+
pixels.map { |bytes| bytes.flatten.pack("C*") }.join("")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
|
5
|
+
require 'zlib'
|
6
|
+
|
7
|
+
class PDF::Reader
|
8
|
+
module Filter # :nodoc:
|
9
|
+
# implementation of the Flate (zlib) stream filter
|
10
|
+
class Flate
|
11
|
+
ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
|
12
|
+
ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
|
13
|
+
|
14
|
+
def initialize(options = {})
|
15
|
+
@options = options
|
16
|
+
end
|
17
|
+
|
18
|
+
################################################################################
|
19
|
+
# Decode the specified data with the Zlib compression algorithm
|
20
|
+
def filter(data)
|
21
|
+
deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
|
22
|
+
|
23
|
+
if deflated.nil?
|
24
|
+
raise MalformedPDFError,
|
25
|
+
"Error while inflating a compressed stream (no suitable inflation algorithm found)"
|
26
|
+
end
|
27
|
+
Depredict.new(@options).filter(deflated)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def zlib_inflate(data)
|
33
|
+
begin
|
34
|
+
return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
|
35
|
+
rescue Zlib::DataError
|
36
|
+
# by default, Ruby's Zlib assumes the data it's inflating
|
37
|
+
# is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
|
38
|
+
# fails, swallow the exception and attempt to inflate the data as a raw
|
39
|
+
# RFC1951 stream.
|
40
|
+
end
|
41
|
+
|
42
|
+
begin
|
43
|
+
return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
|
44
|
+
rescue StandardError
|
45
|
+
# swallow this one too, so we can try some other fallback options
|
46
|
+
end
|
47
|
+
|
48
|
+
nil
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
class PDF::Reader
|
6
|
+
module Filter # :nodoc:
|
7
|
+
# implementation of the LZW stream filter
|
8
|
+
class Lzw
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
################################################################################
|
14
|
+
# Decode the specified data with the LZW compression algorithm
|
15
|
+
def filter(data)
|
16
|
+
data = PDF::Reader::LZW.decode(data)
|
17
|
+
Depredict.new(@options).filter(data)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
class PDF::Reader
|
6
|
+
module Filter # :nodoc:
|
7
|
+
# implementation of the null stream filter
|
8
|
+
class Null
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
def filter(data)
|
14
|
+
data
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
class PDF::Reader # :nodoc:
|
6
|
+
module Filter # :nodoc:
|
7
|
+
# implementation of the run length stream filter
|
8
|
+
class RunLength
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
################################################################################
|
14
|
+
# Decode the specified data with the RunLengthDecode compression algorithm
|
15
|
+
def filter(data)
|
16
|
+
pos = 0
|
17
|
+
out = "".dup
|
18
|
+
|
19
|
+
while pos < data.length
|
20
|
+
length = data.getbyte(pos)
|
21
|
+
pos += 1
|
22
|
+
|
23
|
+
case
|
24
|
+
when length == 128
|
25
|
+
break
|
26
|
+
when length < 128
|
27
|
+
# When the length is < 128, we copy the following length+1 bytes
|
28
|
+
# literally.
|
29
|
+
out << data[pos, length + 1]
|
30
|
+
pos += length
|
31
|
+
else
|
32
|
+
# When the length is > 128, we copy the next byte (257 - length)
|
33
|
+
# times; i.e., "\xFA\x00" ([250, 0]) will expand to
|
34
|
+
# "\x00\x00\x00\x00\x00\x00\x00".
|
35
|
+
out << data[pos, 1] * (257 - length)
|
36
|
+
end
|
37
|
+
|
38
|
+
pos += 1
|
39
|
+
end
|
40
|
+
|
41
|
+
Depredict.new(@options).filter(out)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/pdf/reader/filter.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
################################################################################
|
2
5
|
#
|
3
6
|
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
@@ -22,7 +25,6 @@
|
|
22
25
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
26
|
#
|
24
27
|
################################################################################
|
25
|
-
require 'zlib'
|
26
28
|
|
27
29
|
class PDF::Reader
|
28
30
|
################################################################################
|
@@ -30,7 +32,7 @@ class PDF::Reader
|
|
30
32
|
# support for features like compression and encryption. This class is for decoding that
|
31
33
|
# content.
|
32
34
|
#
|
33
|
-
|
35
|
+
module Filter # :nodoc:
|
34
36
|
|
35
37
|
################################################################################
|
36
38
|
# creates a new filter for decoding content.
|
@@ -38,242 +40,21 @@ class PDF::Reader
|
|
38
40
|
# Filters that are only used to encode image data are accepted, but the data is
|
39
41
|
# returned untouched. At this stage PDF::Reader has no need to decode images.
|
40
42
|
#
|
41
|
-
def
|
42
|
-
@options = options
|
43
|
-
|
43
|
+
def self.with(name, options = {})
|
44
44
|
case name.to_sym
|
45
|
-
when :ASCII85Decode then
|
46
|
-
when :ASCIIHexDecode then
|
47
|
-
when :CCITTFaxDecode then
|
48
|
-
when :DCTDecode then
|
49
|
-
when :FlateDecode then
|
50
|
-
when :
|
51
|
-
when :
|
52
|
-
when :
|
53
|
-
when :
|
45
|
+
when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
|
46
|
+
when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
|
47
|
+
when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
|
48
|
+
when :DCTDecode then PDF::Reader::Filter::Null.new(options)
|
49
|
+
when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
|
50
|
+
when :Fl then PDF::Reader::Filter::Flate.new(options)
|
51
|
+
when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
|
52
|
+
when :JPXDecode then PDF::Reader::Filter::Null.new(options)
|
53
|
+
when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
|
54
|
+
when :RunLengthDecode then PDF::Reader::Filter::RunLength.new(options)
|
54
55
|
else
|
55
56
|
raise UnsupportedFeatureError, "Unknown filter: #{name}"
|
56
57
|
end
|
57
58
|
end
|
58
|
-
################################################################################
|
59
|
-
# attempts to decode the specified data with the current filter
|
60
|
-
#
|
61
|
-
# Filters that are only used to encode image data are accepted, but the data is
|
62
|
-
# returned untouched. At this stage PDF::Reader has no need to decode images.
|
63
|
-
#
|
64
|
-
def filter (data)
|
65
|
-
# leave the data untouched if we don't support the required filter
|
66
|
-
return data if @filter.nil?
|
67
|
-
|
68
|
-
# decode the data
|
69
|
-
self.send(@filter, data)
|
70
|
-
end
|
71
|
-
################################################################################
|
72
|
-
# Decode the specified data using the Ascii85 algorithm. Relies on the AScii85
|
73
|
-
# rubygem.
|
74
|
-
#
|
75
|
-
def ascii85(data)
|
76
|
-
data = "<~#{data}" unless data.to_s[0,2] == "<~"
|
77
|
-
Ascii85::decode(data)
|
78
|
-
rescue Exception => e
|
79
|
-
# Oops, there was a problem decoding the stream
|
80
|
-
raise MalformedPDFError, "Error occured while decoding an ASCII85 stream (#{e.class.to_s}: #{e.to_s})"
|
81
|
-
end
|
82
|
-
################################################################################
|
83
|
-
# Decode the specified data using the AsciiHex algorithm.
|
84
|
-
#
|
85
|
-
def asciihex(data)
|
86
|
-
data.chop! if data[-1,1] == ">"
|
87
|
-
data = data[1,data.size] if data[0,1] == "<"
|
88
|
-
data.gsub!(/[^A-Fa-f0-9]/,"")
|
89
|
-
data << "0" if data.size % 2 == 1
|
90
|
-
data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
|
91
|
-
rescue Exception => e
|
92
|
-
# Oops, there was a problem decoding the stream
|
93
|
-
raise MalformedPDFError, "Error occured while decoding an ASCIIHex stream (#{e.class.to_s}: #{e.to_s})"
|
94
|
-
end
|
95
|
-
################################################################################
|
96
|
-
# Decode the specified data with the Zlib compression algorithm
|
97
|
-
def flate (data)
|
98
|
-
deflated = nil
|
99
|
-
begin
|
100
|
-
deflated = Zlib::Inflate.new.inflate(data)
|
101
|
-
rescue Zlib::DataError => e
|
102
|
-
# by default, Ruby's Zlib assumes the data it's inflating
|
103
|
-
# is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
|
104
|
-
# If that fails, then use an undocumented 'feature' to attempt to inflate
|
105
|
-
# the data as a raw RFC1951 stream.
|
106
|
-
#
|
107
|
-
# See
|
108
|
-
# - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
|
109
|
-
# - http://www.gzip.org/zlib/zlib_faq.html#faq38
|
110
|
-
deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
|
111
|
-
end
|
112
|
-
depredict(deflated, @options)
|
113
|
-
rescue Exception => e
|
114
|
-
# Oops, there was a problem inflating the stream
|
115
|
-
raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
|
116
|
-
end
|
117
|
-
################################################################################
|
118
|
-
# Decode the specified data with the LZW compression algorithm
|
119
|
-
def lzw(data)
|
120
|
-
data = PDF::Reader::LZW.decode(data)
|
121
|
-
depredict(data, @options)
|
122
|
-
end
|
123
|
-
################################################################################
|
124
|
-
# Decode the specified data with the RunLengthDecode compression algorithm
|
125
|
-
def runlength(data)
|
126
|
-
pos = 0
|
127
|
-
out = ""
|
128
|
-
|
129
|
-
while pos < data.length
|
130
|
-
if data.respond_to?(:getbyte)
|
131
|
-
length = data.getbyte(pos)
|
132
|
-
else
|
133
|
-
length = data[pos]
|
134
|
-
end
|
135
|
-
pos += 1
|
136
|
-
|
137
|
-
case
|
138
|
-
when length == 128
|
139
|
-
break
|
140
|
-
when length < 128
|
141
|
-
# When the length is < 128, we copy the following length+1 bytes
|
142
|
-
# literally.
|
143
|
-
out << data[pos, length + 1]
|
144
|
-
pos += length
|
145
|
-
else
|
146
|
-
# When the length is > 128, we copy the next byte (257 - length)
|
147
|
-
# times; i.e., "\xFA\x00" ([250, 0]) will expand to
|
148
|
-
# "\x00\x00\x00\x00\x00\x00\x00".
|
149
|
-
out << data[pos, 1] * (257 - length)
|
150
|
-
end
|
151
|
-
|
152
|
-
pos += 1
|
153
|
-
end
|
154
|
-
|
155
|
-
out
|
156
|
-
end
|
157
|
-
################################################################################
|
158
|
-
def depredict(data, opts = {})
|
159
|
-
predictor = (opts || {})[:Predictor].to_i
|
160
|
-
|
161
|
-
case predictor
|
162
|
-
when 0, 1 then
|
163
|
-
data
|
164
|
-
when 2 then
|
165
|
-
tiff_depredict(data, opts)
|
166
|
-
when 10, 11, 12, 13, 14, 15 then
|
167
|
-
png_depredict(data, opts)
|
168
|
-
else
|
169
|
-
raise MalformedPDFError, "Unrecognised predictor value (#{predictor})"
|
170
|
-
end
|
171
|
-
end
|
172
|
-
################################################################################
|
173
|
-
def tiff_depredict(data, opts = {})
|
174
|
-
data = data.unpack("C*")
|
175
|
-
unfiltered = []
|
176
|
-
bpc = opts[:BitsPerComponent] || 8
|
177
|
-
pixel_bits = bpc * opts[:Colors]
|
178
|
-
pixel_bytes = pixel_bits / 8
|
179
|
-
line_len = (pixel_bytes * opts[:Columns])
|
180
|
-
pos = 0
|
181
|
-
|
182
|
-
if bpc != 8
|
183
|
-
raise UnsupportedFeatureError, "TIFF predictor onlys supports 8 Bits Per Component"
|
184
|
-
end
|
185
|
-
|
186
|
-
until pos > data.size
|
187
|
-
row_data = data[pos, line_len]
|
188
|
-
row_data.each_with_index do |byte, index|
|
189
|
-
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
190
|
-
row_data[index] = (byte + left) % 256
|
191
|
-
end
|
192
|
-
unfiltered += row_data
|
193
|
-
pos += line_len
|
194
|
-
end
|
195
|
-
|
196
|
-
unfiltered.pack("C*")
|
197
|
-
end
|
198
|
-
################################################################################
|
199
|
-
def png_depredict(data, opts = {})
|
200
|
-
return data if opts.nil? || opts[:Predictor].to_i < 10
|
201
|
-
|
202
|
-
data = data.unpack("C*")
|
203
|
-
|
204
|
-
pixel_bytes = opts[:Colors] || 1
|
205
|
-
scanline_length = (pixel_bytes * opts[:Columns]) + 1
|
206
|
-
row = 0
|
207
|
-
pixels = []
|
208
|
-
paeth, pa, pb, pc = nil
|
209
|
-
until data.empty? do
|
210
|
-
row_data = data.slice! 0, scanline_length
|
211
|
-
filter = row_data.shift
|
212
|
-
case filter
|
213
|
-
when 0 # None
|
214
|
-
when 1 # Sub
|
215
|
-
row_data.each_with_index do |byte, index|
|
216
|
-
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
217
|
-
row_data[index] = (byte + left) % 256
|
218
|
-
#p [byte, left, row_data[index]]
|
219
|
-
end
|
220
|
-
when 2 # Up
|
221
|
-
row_data.each_with_index do |byte, index|
|
222
|
-
col = index / pixel_bytes
|
223
|
-
upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
|
224
|
-
row_data[index] = (upper + byte) % 256
|
225
|
-
end
|
226
|
-
when 3 # Average
|
227
|
-
row_data.each_with_index do |byte, index|
|
228
|
-
col = index / pixel_bytes
|
229
|
-
upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
|
230
|
-
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
231
|
-
|
232
|
-
row_data[index] = (byte + ((left + upper)/2).floor) % 256
|
233
|
-
end
|
234
|
-
when 4 # Paeth
|
235
|
-
left = upper = upper_left = nil
|
236
|
-
row_data.each_with_index do |byte, index|
|
237
|
-
col = index / pixel_bytes
|
238
|
-
|
239
|
-
left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
|
240
|
-
if row.zero?
|
241
|
-
upper = upper_left = 0
|
242
|
-
else
|
243
|
-
upper = pixels[row-1][col][index % pixel_bytes]
|
244
|
-
upper_left = col.zero? ? 0 :
|
245
|
-
pixels[row-1][col-1][index % pixel_bytes]
|
246
|
-
end
|
247
|
-
|
248
|
-
p = left + upper - upper_left
|
249
|
-
pa = (p - left).abs
|
250
|
-
pb = (p - upper).abs
|
251
|
-
pc = (p - upper_left).abs
|
252
|
-
|
253
|
-
paeth = if pa <= pb && pa <= pc
|
254
|
-
left
|
255
|
-
elsif pb <= pc
|
256
|
-
upper
|
257
|
-
else
|
258
|
-
upper_left
|
259
|
-
end
|
260
|
-
|
261
|
-
row_data[index] = (byte + paeth) % 256
|
262
|
-
end
|
263
|
-
else
|
264
|
-
raise ArgumentError, "Invalid filter algorithm #{filter}"
|
265
|
-
end
|
266
|
-
|
267
|
-
s = []
|
268
|
-
row_data.each_slice pixel_bytes do |slice|
|
269
|
-
s << slice
|
270
|
-
end
|
271
|
-
pixels << s
|
272
|
-
row += 1
|
273
|
-
end
|
274
|
-
|
275
|
-
pixels.map { |bytes| bytes.flatten.pack("C*") }.join("")
|
276
|
-
end
|
277
59
|
end
|
278
60
|
end
|
279
|
-
################################################################################
|