pdf-reader 1.1.1 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG +87 -2
- data/{README.rdoc → README.md} +43 -31
- data/Rakefile +21 -16
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_object +4 -1
- data/bin/pdf_text +1 -3
- data/examples/callbacks.rb +2 -1
- data/examples/extract_images.rb +11 -6
- data/examples/fuzzy_paragraphs.rb +24 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier.afm +342 -0
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -0
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
- data/lib/pdf/reader/buffer.rb +90 -63
- data/lib/pdf/reader/cid_widths.rb +63 -0
- data/lib/pdf/reader/cmap.rb +69 -38
- data/lib/pdf/reader/encoding.rb +74 -48
- data/lib/pdf/reader/error.rb +24 -4
- data/lib/pdf/reader/filter/ascii85.rb +28 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
- data/lib/pdf/reader/filter/depredict.rb +141 -0
- data/lib/pdf/reader/filter/flate.rb +53 -0
- data/lib/pdf/reader/filter/lzw.rb +21 -0
- data/lib/pdf/reader/filter/null.rb +18 -0
- data/lib/pdf/reader/filter/run_length.rb +45 -0
- data/lib/pdf/reader/filter.rb +15 -234
- data/lib/pdf/reader/font.rb +107 -43
- data/lib/pdf/reader/font_descriptor.rb +80 -0
- data/lib/pdf/reader/form_xobject.rb +26 -4
- data/lib/pdf/reader/glyph_hash.rb +56 -18
- data/lib/pdf/reader/lzw.rb +6 -4
- data/lib/pdf/reader/null_security_handler.rb +17 -0
- data/lib/pdf/reader/object_cache.rb +40 -16
- data/lib/pdf/reader/object_hash.rb +94 -40
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/orientation_detector.rb +34 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +48 -3
- data/lib/pdf/reader/page_layout.rb +125 -0
- data/lib/pdf/reader/page_state.rb +185 -70
- data/lib/pdf/reader/page_text_receiver.rb +70 -20
- data/lib/pdf/reader/pages_strategy.rb +4 -293
- data/lib/pdf/reader/parser.rb +37 -61
- data/lib/pdf/reader/print_receiver.rb +6 -0
- data/lib/pdf/reader/reference.rb +4 -1
- data/lib/pdf/reader/register_receiver.rb +17 -31
- data/lib/pdf/reader/resource_methods.rb +1 -0
- data/lib/pdf/reader/standard_security_handler.rb +82 -42
- data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
- data/lib/pdf/reader/stream.rb +5 -2
- data/lib/pdf/reader/synchronized_cache.rb +33 -0
- data/lib/pdf/reader/text_run.rb +99 -0
- data/lib/pdf/reader/token.rb +4 -1
- data/lib/pdf/reader/transformation_matrix.rb +195 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
- data/lib/pdf/reader/width_calculator/composite.rb +28 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
- data/lib/pdf/reader/width_calculator.rb +12 -0
- data/lib/pdf/reader/xref.rb +41 -9
- data/lib/pdf/reader.rb +45 -104
- data/lib/pdf-reader.rb +4 -1
- metadata +220 -101
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/hash.rb +0 -15
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -264
@@ -0,0 +1,63 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
|
6
|
+
require 'forwardable'
|
7
|
+
|
8
|
+
class PDF::Reader
|
9
|
+
# A Hash-like object that wraps the array of glyph widths in a CID font
|
10
|
+
# and gives us a nice way to query it for specific widths.
|
11
|
+
#
|
12
|
+
# there are two ways to calculate a cidfont_glyph_width, that are defined
|
13
|
+
# in Section 9.7.4.3 PDF 32000-1:2008 pp 271, the differences are remarked
|
14
|
+
# on below. because of these difference that may be contained within the
|
15
|
+
# same array, it is a bit difficult to parse this array.
|
16
|
+
class CidWidths
|
17
|
+
extend Forwardable
|
18
|
+
|
19
|
+
# Graphics State Operators
|
20
|
+
def_delegators :@widths, :[], :fetch
|
21
|
+
|
22
|
+
def initialize(default, array)
|
23
|
+
@widths = parse_array(default, array.dup)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def parse_array(default, array)
|
29
|
+
widths = Hash.new(default)
|
30
|
+
params = []
|
31
|
+
while array.size > 0
|
32
|
+
params << array.shift
|
33
|
+
|
34
|
+
if params.size == 2 && params.last.is_a?(Array)
|
35
|
+
widths.merge! parse_first_form(params.first, params.last)
|
36
|
+
params = []
|
37
|
+
elsif params.size == 3
|
38
|
+
widths.merge! parse_second_form(params[0], params[1], params[2])
|
39
|
+
params = []
|
40
|
+
end
|
41
|
+
end
|
42
|
+
widths
|
43
|
+
end
|
44
|
+
|
45
|
+
# this is the form 10 [234 63 234 346 47 234] where width of index 10 is
|
46
|
+
# 234, index 11 is 63, etc
|
47
|
+
def parse_first_form(first, widths)
|
48
|
+
widths.inject({}) { |accum, glyph_width|
|
49
|
+
accum[first + accum.size] = glyph_width
|
50
|
+
accum
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
# this is the form 10 20 123 where all index between 10 and 20 have width 123
|
55
|
+
def parse_second_form(first, final, width)
|
56
|
+
(first..final).inject({}) { |accum, index|
|
57
|
+
accum[index] = width
|
58
|
+
accum
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
################################################################################
|
2
5
|
#
|
3
6
|
# Copyright (C) 2008 James Healy (jimmy@deefa.com)
|
@@ -24,7 +27,22 @@
|
|
24
27
|
################################################################################
|
25
28
|
|
26
29
|
class PDF::Reader
|
30
|
+
|
31
|
+
# wraps a string containing a PDF CMap and provides convenience methods for
|
32
|
+
# extracting various useful information.
|
33
|
+
#
|
27
34
|
class CMap # :nodoc:
|
35
|
+
CMAP_KEYWORDS = {
|
36
|
+
"begincodespacerange" => 1,
|
37
|
+
"endcodespacerange" => 1,
|
38
|
+
"beginbfchar" => 1,
|
39
|
+
"endbfchar" => 1,
|
40
|
+
"beginbfrange" => 1,
|
41
|
+
"endbfrange" => 1,
|
42
|
+
"begin" => 1,
|
43
|
+
"begincmap" => 1,
|
44
|
+
"def" => 1
|
45
|
+
}
|
28
46
|
|
29
47
|
attr_reader :map
|
30
48
|
|
@@ -34,24 +52,25 @@ class PDF::Reader
|
|
34
52
|
end
|
35
53
|
|
36
54
|
def process_data(data)
|
55
|
+
parser = build_parser(data)
|
37
56
|
mode = nil
|
38
|
-
instructions =
|
57
|
+
instructions = []
|
39
58
|
|
40
|
-
|
41
|
-
if
|
59
|
+
while token = parser.parse_token(CMAP_KEYWORDS)
|
60
|
+
if token == "beginbfchar"
|
42
61
|
mode = :char
|
43
|
-
elsif
|
62
|
+
elsif token == "endbfchar"
|
44
63
|
process_bfchar_instructions(instructions)
|
45
|
-
instructions =
|
64
|
+
instructions = []
|
46
65
|
mode = nil
|
47
|
-
elsif
|
66
|
+
elsif token == "beginbfrange"
|
48
67
|
mode = :range
|
49
|
-
elsif
|
68
|
+
elsif token == "endbfrange"
|
50
69
|
process_bfrange_instructions(instructions)
|
51
|
-
instructions =
|
70
|
+
instructions = []
|
52
71
|
mode = nil
|
53
72
|
elsif mode == :char || mode == :range
|
54
|
-
instructions <<
|
73
|
+
instructions << token
|
55
74
|
end
|
56
75
|
end
|
57
76
|
end
|
@@ -60,9 +79,13 @@ class PDF::Reader
|
|
60
79
|
@map.size
|
61
80
|
end
|
62
81
|
|
82
|
+
# Convert a glyph code into one or more Codepoints.
|
83
|
+
#
|
84
|
+
# Returns an array of Integers.
|
85
|
+
#
|
63
86
|
def decode(c)
|
64
87
|
# TODO: implement the conversion
|
65
|
-
return c unless
|
88
|
+
return c unless Integer === c
|
66
89
|
@map[c]
|
67
90
|
end
|
68
91
|
|
@@ -73,33 +96,46 @@ class PDF::Reader
|
|
73
96
|
Parser.new(buffer)
|
74
97
|
end
|
75
98
|
|
99
|
+
# The following includes some manual decoding of UTF-16BE strings into unicode codepoints. In
|
100
|
+
# theory we could replace all the UTF-16 code with something based on Ruby's encoding support:
|
101
|
+
#
|
102
|
+
# str.dup.force_encoding("utf-16be").encode!("utf-8").unpack("U*")
|
103
|
+
#
|
104
|
+
# However, some cmaps contain broken surrogate pairs and the ruby encoding support raises an
|
105
|
+
# exception when we try converting broken UTF-16 to UTF-8
|
106
|
+
#
|
76
107
|
def str_to_int(str)
|
77
|
-
return nil if str.nil? || str.size == 0
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
108
|
+
return nil if str.nil? || str.size == 0
|
109
|
+
unpacked_string = if str.bytesize == 1 # UTF-8
|
110
|
+
str.unpack("C*")
|
111
|
+
else # UTF-16
|
112
|
+
str.unpack("n*")
|
113
|
+
end
|
114
|
+
result = []
|
115
|
+
while unpacked_string.any? do
|
116
|
+
if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
|
117
|
+
# this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
|
118
|
+
# lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
|
119
|
+
# low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
|
120
|
+
points = [unpacked_string.shift, unpacked_string.shift]
|
121
|
+
result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
|
122
|
+
else
|
123
|
+
result << unpacked_string.shift
|
124
|
+
end
|
83
125
|
end
|
126
|
+
result
|
84
127
|
end
|
85
128
|
|
86
129
|
def process_bfchar_instructions(instructions)
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
@map[find] = replace
|
92
|
-
find = str_to_int(parser.parse_token)
|
93
|
-
replace = str_to_int(parser.parse_token)
|
130
|
+
instructions.each_slice(2) do |one, two|
|
131
|
+
find = str_to_int(one)
|
132
|
+
replace = str_to_int(two)
|
133
|
+
@map[find.first] = replace
|
94
134
|
end
|
95
135
|
end
|
96
136
|
|
97
137
|
def process_bfrange_instructions(instructions)
|
98
|
-
|
99
|
-
start = parser.parse_token
|
100
|
-
finish = parser.parse_token
|
101
|
-
to = parser.parse_token
|
102
|
-
while start && finish && to
|
138
|
+
instructions.each_slice(3) do |start, finish, to|
|
103
139
|
if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String)
|
104
140
|
bfrange_type_one(start, finish, to)
|
105
141
|
elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
|
@@ -107,28 +143,23 @@ class PDF::Reader
|
|
107
143
|
else
|
108
144
|
raise "invalid bfrange section"
|
109
145
|
end
|
110
|
-
start = parser.parse_token
|
111
|
-
finish = parser.parse_token
|
112
|
-
to = parser.parse_token
|
113
146
|
end
|
114
147
|
end
|
115
148
|
|
116
149
|
def bfrange_type_one(start_code, end_code, dst)
|
117
|
-
start_code = str_to_int(start_code)
|
118
|
-
end_code = str_to_int(end_code)
|
150
|
+
start_code = str_to_int(start_code)[0]
|
151
|
+
end_code = str_to_int(end_code)[0]
|
119
152
|
dst = str_to_int(dst)
|
120
153
|
|
121
154
|
# add all values in the range to our mapping
|
122
155
|
(start_code..end_code).each_with_index do |val, idx|
|
123
|
-
@map[val] = dst + idx
|
124
|
-
# ensure a single range does not exceed 255 chars
|
125
|
-
raise PDF::Reader::MalformedPDFError, "a CMap bfrange cann't exceed 255 chars" if idx > 255
|
156
|
+
@map[val] = dst.length == 1 ? [dst[0] + idx] : [dst[0], dst[1] + 1]
|
126
157
|
end
|
127
158
|
end
|
128
159
|
|
129
160
|
def bfrange_type_two(start_code, end_code, dst)
|
130
|
-
start_code = str_to_int(start_code)
|
131
|
-
end_code = str_to_int(end_code)
|
161
|
+
start_code = str_to_int(start_code)[0]
|
162
|
+
end_code = str_to_int(end_code)[0]
|
132
163
|
from_range = (start_code..end_code)
|
133
164
|
|
134
165
|
# add all values in the range to our mapping
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
################################################################################
|
2
5
|
#
|
3
6
|
# Copyright (C) 2008 James Healy (jimmy@deefa.com)
|
@@ -24,6 +27,8 @@
|
|
24
27
|
################################################################################
|
25
28
|
|
26
29
|
class PDF::Reader
|
30
|
+
# Util class for working with string encodings in PDF files. Mostly used to
|
31
|
+
# convert strings of various PDF-dialect encodings into UTF-8.
|
27
32
|
class Encoding # :nodoc:
|
28
33
|
CONTROL_CHARS = [0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23,
|
29
34
|
24,25,26,27,28,29,30,31]
|
@@ -32,19 +37,25 @@ class PDF::Reader
|
|
32
37
|
attr_reader :unpack
|
33
38
|
|
34
39
|
def initialize(enc)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
enc
|
40
|
+
@mapping = default_mapping # maps from character codes to Unicode codepoints
|
41
|
+
@string_cache = {} # maps from character codes to UTF-8 strings.
|
42
|
+
|
43
|
+
@enc_name = if enc.kind_of?(Hash)
|
44
|
+
enc[:Encoding] || enc[:BaseEncoding]
|
45
|
+
elsif enc && enc.respond_to?(:to_sym)
|
46
|
+
enc.to_sym
|
40
47
|
else
|
41
|
-
|
48
|
+
:StandardEncoding
|
42
49
|
end
|
43
50
|
|
44
|
-
@
|
45
|
-
@
|
46
|
-
|
51
|
+
@unpack = get_unpack(@enc_name)
|
52
|
+
@map_file = get_mapping_file(@enc_name)
|
53
|
+
|
47
54
|
load_mapping(@map_file) if @map_file
|
55
|
+
|
56
|
+
if enc.is_a?(Hash) && enc[:Differences]
|
57
|
+
self.differences = enc[:Differences]
|
58
|
+
end
|
48
59
|
end
|
49
60
|
|
50
61
|
# set the differences table for this encoding. should be an array in the following format:
|
@@ -66,6 +77,7 @@ class PDF::Reader
|
|
66
77
|
byte = val.to_i
|
67
78
|
else
|
68
79
|
@differences[byte] = val
|
80
|
+
@mapping[byte] = glyphlist.name_to_unicode(val)
|
69
81
|
byte += 1
|
70
82
|
end
|
71
83
|
end
|
@@ -73,6 +85,7 @@ class PDF::Reader
|
|
73
85
|
end
|
74
86
|
|
75
87
|
def differences
|
88
|
+
# this method is only used by the spec tests
|
76
89
|
@differences ||= {}
|
77
90
|
end
|
78
91
|
|
@@ -95,8 +108,52 @@ class PDF::Reader
|
|
95
108
|
end
|
96
109
|
end
|
97
110
|
|
111
|
+
def int_to_utf8_string(glyph_code)
|
112
|
+
@string_cache[glyph_code] ||= internal_int_to_utf8_string(glyph_code)
|
113
|
+
end
|
114
|
+
|
115
|
+
# convert an integer glyph code into an Adobe glyph name.
|
116
|
+
#
|
117
|
+
# int_to_name(65)
|
118
|
+
# => [:A]
|
119
|
+
#
|
120
|
+
def int_to_name(glyph_code)
|
121
|
+
if @enc_name == "Identity-H" || @enc_name == "Identity-V"
|
122
|
+
[]
|
123
|
+
elsif differences[glyph_code]
|
124
|
+
[differences[glyph_code]]
|
125
|
+
elsif @mapping[glyph_code]
|
126
|
+
glyphlist.unicode_to_name(@mapping[glyph_code])
|
127
|
+
else
|
128
|
+
[]
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
98
132
|
private
|
99
133
|
|
134
|
+
# returns a hash that:
|
135
|
+
# - maps control chars and nil to the unicode "unknown character"
|
136
|
+
# - leaves all other bytes <= 255 unchaged
|
137
|
+
#
|
138
|
+
# Each specific encoding will change this default as required for their glyphs
|
139
|
+
def default_mapping
|
140
|
+
all_bytes = (0..255).to_a
|
141
|
+
tuples = all_bytes.map {|i|
|
142
|
+
CONTROL_CHARS.include?(i) ? [i, UNKNOWN_CHAR] : [i,i]
|
143
|
+
}
|
144
|
+
mapping = Hash[tuples]
|
145
|
+
mapping[nil] = UNKNOWN_CHAR
|
146
|
+
mapping
|
147
|
+
end
|
148
|
+
|
149
|
+
def internal_int_to_utf8_string(glyph_code)
|
150
|
+
ret = [
|
151
|
+
@mapping[glyph_code.to_i] || glyph_code.to_i
|
152
|
+
].pack("U*")
|
153
|
+
ret.force_encoding("UTF-8")
|
154
|
+
ret
|
155
|
+
end
|
156
|
+
|
100
157
|
def utf8_conversion_impossible?
|
101
158
|
@enc_name == :"Identity-H" || @enc_name == :"Identity-V"
|
102
159
|
end
|
@@ -104,33 +161,13 @@ class PDF::Reader
|
|
104
161
|
def little_boxes(times)
|
105
162
|
codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
|
106
163
|
ret = codepoints.pack("U*")
|
107
|
-
ret.force_encoding("UTF-8")
|
164
|
+
ret.force_encoding("UTF-8")
|
108
165
|
ret
|
109
166
|
end
|
110
167
|
|
111
168
|
def convert_to_utf8(str)
|
112
|
-
ret = str.unpack(unpack).map { |c|
|
113
|
-
|
114
|
-
}.map { |c|
|
115
|
-
mapping[c] || c
|
116
|
-
}.map { |c|
|
117
|
-
names_to_unicode[c] || c
|
118
|
-
}.map { |c|
|
119
|
-
if PDF::Reader::Encoding::CONTROL_CHARS.include?(c)
|
120
|
-
PDF::Reader::Encoding::UNKNOWN_CHAR
|
121
|
-
else
|
122
|
-
c
|
123
|
-
end
|
124
|
-
}.map { |c|
|
125
|
-
if c.nil? || !c.is_a?(Fixnum)
|
126
|
-
PDF::Reader::Encoding::UNKNOWN_CHAR
|
127
|
-
else
|
128
|
-
c
|
129
|
-
end
|
130
|
-
}.pack("U*")
|
131
|
-
|
132
|
-
ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
|
133
|
-
|
169
|
+
ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
|
170
|
+
ret.force_encoding("UTF-8")
|
134
171
|
ret
|
135
172
|
end
|
136
173
|
|
@@ -164,26 +201,15 @@ class PDF::Reader
|
|
164
201
|
end
|
165
202
|
end
|
166
203
|
|
167
|
-
def
|
168
|
-
@
|
169
|
-
end
|
170
|
-
|
171
|
-
def has_mapping?
|
172
|
-
mapping.size > 0
|
173
|
-
end
|
174
|
-
|
175
|
-
def names_to_unicode
|
176
|
-
@names_to_unicode ||= PDF::Reader::GlyphHash.new
|
204
|
+
def glyphlist
|
205
|
+
@glyphlist ||= PDF::Reader::GlyphHash.new
|
177
206
|
end
|
178
207
|
|
179
208
|
def load_mapping(file)
|
180
|
-
|
181
|
-
|
182
|
-
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
183
|
-
File.open(file, mode) do |f|
|
209
|
+
File.open(file, "r:BINARY") do |f|
|
184
210
|
f.each do |l|
|
185
|
-
|
186
|
-
mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
|
211
|
+
_m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
212
|
+
@mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
|
187
213
|
end
|
188
214
|
end
|
189
215
|
end
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
################################################################################
|
2
5
|
#
|
3
6
|
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
@@ -21,34 +24,51 @@
|
|
21
24
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
25
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
26
|
#
|
24
|
-
|
25
27
|
class PDF::Reader
|
26
28
|
################################################################################
|
27
29
|
# An internal PDF::Reader class that helps to verify various parts of the PDF file
|
28
30
|
# are valid
|
29
31
|
class Error # :nodoc:
|
30
32
|
################################################################################
|
31
|
-
def self.str_assert
|
33
|
+
def self.str_assert(lvalue, rvalue, chars=nil)
|
32
34
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
33
35
|
lvalue = lvalue[0,chars] if chars
|
34
36
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue != rvalue
|
35
37
|
end
|
36
38
|
################################################################################
|
37
|
-
def self.str_assert_not
|
39
|
+
def self.str_assert_not(lvalue, rvalue, chars=nil)
|
38
40
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
39
41
|
lvalue = lvalue[0,chars] if chars
|
40
42
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue == rvalue
|
41
43
|
end
|
42
44
|
################################################################################
|
43
|
-
def self.assert_equal
|
45
|
+
def self.assert_equal(lvalue, rvalue)
|
44
46
|
raise MalformedPDFError, "PDF malformed, expected #{rvalue} but found #{lvalue} instead" if lvalue != rvalue
|
45
47
|
end
|
46
48
|
################################################################################
|
47
49
|
end
|
50
|
+
|
48
51
|
################################################################################
|
52
|
+
# an exception that is raised when we believe the current PDF is not following
|
53
|
+
# the PDF spec and cannot be recovered
|
49
54
|
class MalformedPDFError < RuntimeError; end
|
55
|
+
|
56
|
+
################################################################################
|
57
|
+
# an exception that is raised when an invalid page number is used
|
58
|
+
class InvalidPageError < ArgumentError; end
|
59
|
+
|
60
|
+
################################################################################
|
61
|
+
# an exception that is raised when a PDF object appears to be invalid
|
50
62
|
class InvalidObjectError < MalformedPDFError; end
|
63
|
+
|
64
|
+
################################################################################
|
65
|
+
# an exception that is raised when a PDF follows the specs but uses a feature
|
66
|
+
# that we don't support just yet
|
51
67
|
class UnsupportedFeatureError < RuntimeError; end
|
68
|
+
|
69
|
+
################################################################################
|
70
|
+
# an exception that is raised when a PDF is encrypted and we don't have the
|
71
|
+
# necessary data to decrypt it
|
52
72
|
class EncryptedPDFError < UnsupportedFeatureError; end
|
53
73
|
end
|
54
74
|
################################################################################
|