pdf-reader 1.1.1 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG +87 -2
- data/{README.rdoc → README.md} +43 -31
- data/Rakefile +21 -16
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_object +4 -1
- data/bin/pdf_text +1 -3
- data/examples/callbacks.rb +2 -1
- data/examples/extract_images.rb +11 -6
- data/examples/fuzzy_paragraphs.rb +24 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier.afm +342 -0
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -0
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
- data/lib/pdf/reader/buffer.rb +90 -63
- data/lib/pdf/reader/cid_widths.rb +63 -0
- data/lib/pdf/reader/cmap.rb +69 -38
- data/lib/pdf/reader/encoding.rb +74 -48
- data/lib/pdf/reader/error.rb +24 -4
- data/lib/pdf/reader/filter/ascii85.rb +28 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
- data/lib/pdf/reader/filter/depredict.rb +141 -0
- data/lib/pdf/reader/filter/flate.rb +53 -0
- data/lib/pdf/reader/filter/lzw.rb +21 -0
- data/lib/pdf/reader/filter/null.rb +18 -0
- data/lib/pdf/reader/filter/run_length.rb +45 -0
- data/lib/pdf/reader/filter.rb +15 -234
- data/lib/pdf/reader/font.rb +107 -43
- data/lib/pdf/reader/font_descriptor.rb +80 -0
- data/lib/pdf/reader/form_xobject.rb +26 -4
- data/lib/pdf/reader/glyph_hash.rb +56 -18
- data/lib/pdf/reader/lzw.rb +6 -4
- data/lib/pdf/reader/null_security_handler.rb +17 -0
- data/lib/pdf/reader/object_cache.rb +40 -16
- data/lib/pdf/reader/object_hash.rb +94 -40
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/orientation_detector.rb +34 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +48 -3
- data/lib/pdf/reader/page_layout.rb +125 -0
- data/lib/pdf/reader/page_state.rb +185 -70
- data/lib/pdf/reader/page_text_receiver.rb +70 -20
- data/lib/pdf/reader/pages_strategy.rb +4 -293
- data/lib/pdf/reader/parser.rb +37 -61
- data/lib/pdf/reader/print_receiver.rb +6 -0
- data/lib/pdf/reader/reference.rb +4 -1
- data/lib/pdf/reader/register_receiver.rb +17 -31
- data/lib/pdf/reader/resource_methods.rb +1 -0
- data/lib/pdf/reader/standard_security_handler.rb +82 -42
- data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
- data/lib/pdf/reader/stream.rb +5 -2
- data/lib/pdf/reader/synchronized_cache.rb +33 -0
- data/lib/pdf/reader/text_run.rb +99 -0
- data/lib/pdf/reader/token.rb +4 -1
- data/lib/pdf/reader/transformation_matrix.rb +195 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
- data/lib/pdf/reader/width_calculator/composite.rb +28 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
- data/lib/pdf/reader/width_calculator.rb +12 -0
- data/lib/pdf/reader/xref.rb +41 -9
- data/lib/pdf/reader.rb +45 -104
- data/lib/pdf-reader.rb +4 -1
- metadata +220 -101
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/hash.rb +0 -15
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -264
@@ -0,0 +1,63 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
|
6
|
+
require 'forwardable'
|
7
|
+
|
8
|
+
class PDF::Reader
|
9
|
+
# A Hash-like object that wraps the array of glyph widths in a CID font
|
10
|
+
# and gives us a nice way to query it for specific widths.
|
11
|
+
#
|
12
|
+
# there are two ways to calculate a cidfont_glyph_width, that are defined
|
13
|
+
# in Section 9.7.4.3 PDF 32000-1:2008 pp 271, the differences are remarked
|
14
|
+
# on below. because of these difference that may be contained within the
|
15
|
+
# same array, it is a bit difficult to parse this array.
|
16
|
+
class CidWidths
|
17
|
+
extend Forwardable
|
18
|
+
|
19
|
+
# Graphics State Operators
|
20
|
+
def_delegators :@widths, :[], :fetch
|
21
|
+
|
22
|
+
def initialize(default, array)
|
23
|
+
@widths = parse_array(default, array.dup)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def parse_array(default, array)
|
29
|
+
widths = Hash.new(default)
|
30
|
+
params = []
|
31
|
+
while array.size > 0
|
32
|
+
params << array.shift
|
33
|
+
|
34
|
+
if params.size == 2 && params.last.is_a?(Array)
|
35
|
+
widths.merge! parse_first_form(params.first, params.last)
|
36
|
+
params = []
|
37
|
+
elsif params.size == 3
|
38
|
+
widths.merge! parse_second_form(params[0], params[1], params[2])
|
39
|
+
params = []
|
40
|
+
end
|
41
|
+
end
|
42
|
+
widths
|
43
|
+
end
|
44
|
+
|
45
|
+
# this is the form 10 [234 63 234 346 47 234] where width of index 10 is
|
46
|
+
# 234, index 11 is 63, etc
|
47
|
+
def parse_first_form(first, widths)
|
48
|
+
widths.inject({}) { |accum, glyph_width|
|
49
|
+
accum[first + accum.size] = glyph_width
|
50
|
+
accum
|
51
|
+
}
|
52
|
+
end
|
53
|
+
|
54
|
+
# this is the form 10 20 123 where all index between 10 and 20 have width 123
|
55
|
+
def parse_second_form(first, final, width)
|
56
|
+
(first..final).inject({}) { |accum, index|
|
57
|
+
accum[index] = width
|
58
|
+
accum
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
################################################################################
|
2
5
|
#
|
3
6
|
# Copyright (C) 2008 James Healy (jimmy@deefa.com)
|
@@ -24,7 +27,22 @@
|
|
24
27
|
################################################################################
|
25
28
|
|
26
29
|
class PDF::Reader
|
30
|
+
|
31
|
+
# wraps a string containing a PDF CMap and provides convenience methods for
|
32
|
+
# extracting various useful information.
|
33
|
+
#
|
27
34
|
class CMap # :nodoc:
|
35
|
+
CMAP_KEYWORDS = {
|
36
|
+
"begincodespacerange" => 1,
|
37
|
+
"endcodespacerange" => 1,
|
38
|
+
"beginbfchar" => 1,
|
39
|
+
"endbfchar" => 1,
|
40
|
+
"beginbfrange" => 1,
|
41
|
+
"endbfrange" => 1,
|
42
|
+
"begin" => 1,
|
43
|
+
"begincmap" => 1,
|
44
|
+
"def" => 1
|
45
|
+
}
|
28
46
|
|
29
47
|
attr_reader :map
|
30
48
|
|
@@ -34,24 +52,25 @@ class PDF::Reader
|
|
34
52
|
end
|
35
53
|
|
36
54
|
def process_data(data)
|
55
|
+
parser = build_parser(data)
|
37
56
|
mode = nil
|
38
|
-
instructions =
|
57
|
+
instructions = []
|
39
58
|
|
40
|
-
|
41
|
-
if
|
59
|
+
while token = parser.parse_token(CMAP_KEYWORDS)
|
60
|
+
if token == "beginbfchar"
|
42
61
|
mode = :char
|
43
|
-
elsif
|
62
|
+
elsif token == "endbfchar"
|
44
63
|
process_bfchar_instructions(instructions)
|
45
|
-
instructions =
|
64
|
+
instructions = []
|
46
65
|
mode = nil
|
47
|
-
elsif
|
66
|
+
elsif token == "beginbfrange"
|
48
67
|
mode = :range
|
49
|
-
elsif
|
68
|
+
elsif token == "endbfrange"
|
50
69
|
process_bfrange_instructions(instructions)
|
51
|
-
instructions =
|
70
|
+
instructions = []
|
52
71
|
mode = nil
|
53
72
|
elsif mode == :char || mode == :range
|
54
|
-
instructions <<
|
73
|
+
instructions << token
|
55
74
|
end
|
56
75
|
end
|
57
76
|
end
|
@@ -60,9 +79,13 @@ class PDF::Reader
|
|
60
79
|
@map.size
|
61
80
|
end
|
62
81
|
|
82
|
+
# Convert a glyph code into one or more Codepoints.
|
83
|
+
#
|
84
|
+
# Returns an array of Integers.
|
85
|
+
#
|
63
86
|
def decode(c)
|
64
87
|
# TODO: implement the conversion
|
65
|
-
return c unless
|
88
|
+
return c unless Integer === c
|
66
89
|
@map[c]
|
67
90
|
end
|
68
91
|
|
@@ -73,33 +96,46 @@ class PDF::Reader
|
|
73
96
|
Parser.new(buffer)
|
74
97
|
end
|
75
98
|
|
99
|
+
# The following includes some manual decoding of UTF-16BE strings into unicode codepoints. In
|
100
|
+
# theory we could replace all the UTF-16 code with something based on Ruby's encoding support:
|
101
|
+
#
|
102
|
+
# str.dup.force_encoding("utf-16be").encode!("utf-8").unpack("U*")
|
103
|
+
#
|
104
|
+
# However, some cmaps contain broken surrogate pairs and the ruby encoding support raises an
|
105
|
+
# exception when we try converting broken UTF-16 to UTF-8
|
106
|
+
#
|
76
107
|
def str_to_int(str)
|
77
|
-
return nil if str.nil? || str.size == 0
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
108
|
+
return nil if str.nil? || str.size == 0
|
109
|
+
unpacked_string = if str.bytesize == 1 # UTF-8
|
110
|
+
str.unpack("C*")
|
111
|
+
else # UTF-16
|
112
|
+
str.unpack("n*")
|
113
|
+
end
|
114
|
+
result = []
|
115
|
+
while unpacked_string.any? do
|
116
|
+
if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
|
117
|
+
# this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
|
118
|
+
# lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
|
119
|
+
# low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
|
120
|
+
points = [unpacked_string.shift, unpacked_string.shift]
|
121
|
+
result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
|
122
|
+
else
|
123
|
+
result << unpacked_string.shift
|
124
|
+
end
|
83
125
|
end
|
126
|
+
result
|
84
127
|
end
|
85
128
|
|
86
129
|
def process_bfchar_instructions(instructions)
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
@map[find] = replace
|
92
|
-
find = str_to_int(parser.parse_token)
|
93
|
-
replace = str_to_int(parser.parse_token)
|
130
|
+
instructions.each_slice(2) do |one, two|
|
131
|
+
find = str_to_int(one)
|
132
|
+
replace = str_to_int(two)
|
133
|
+
@map[find.first] = replace
|
94
134
|
end
|
95
135
|
end
|
96
136
|
|
97
137
|
def process_bfrange_instructions(instructions)
|
98
|
-
|
99
|
-
start = parser.parse_token
|
100
|
-
finish = parser.parse_token
|
101
|
-
to = parser.parse_token
|
102
|
-
while start && finish && to
|
138
|
+
instructions.each_slice(3) do |start, finish, to|
|
103
139
|
if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String)
|
104
140
|
bfrange_type_one(start, finish, to)
|
105
141
|
elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
|
@@ -107,28 +143,23 @@ class PDF::Reader
|
|
107
143
|
else
|
108
144
|
raise "invalid bfrange section"
|
109
145
|
end
|
110
|
-
start = parser.parse_token
|
111
|
-
finish = parser.parse_token
|
112
|
-
to = parser.parse_token
|
113
146
|
end
|
114
147
|
end
|
115
148
|
|
116
149
|
def bfrange_type_one(start_code, end_code, dst)
|
117
|
-
start_code = str_to_int(start_code)
|
118
|
-
end_code = str_to_int(end_code)
|
150
|
+
start_code = str_to_int(start_code)[0]
|
151
|
+
end_code = str_to_int(end_code)[0]
|
119
152
|
dst = str_to_int(dst)
|
120
153
|
|
121
154
|
# add all values in the range to our mapping
|
122
155
|
(start_code..end_code).each_with_index do |val, idx|
|
123
|
-
@map[val] = dst + idx
|
124
|
-
# ensure a single range does not exceed 255 chars
|
125
|
-
raise PDF::Reader::MalformedPDFError, "a CMap bfrange cann't exceed 255 chars" if idx > 255
|
156
|
+
@map[val] = dst.length == 1 ? [dst[0] + idx] : [dst[0], dst[1] + 1]
|
126
157
|
end
|
127
158
|
end
|
128
159
|
|
129
160
|
def bfrange_type_two(start_code, end_code, dst)
|
130
|
-
start_code = str_to_int(start_code)
|
131
|
-
end_code = str_to_int(end_code)
|
161
|
+
start_code = str_to_int(start_code)[0]
|
162
|
+
end_code = str_to_int(end_code)[0]
|
132
163
|
from_range = (start_code..end_code)
|
133
164
|
|
134
165
|
# add all values in the range to our mapping
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
################################################################################
|
2
5
|
#
|
3
6
|
# Copyright (C) 2008 James Healy (jimmy@deefa.com)
|
@@ -24,6 +27,8 @@
|
|
24
27
|
################################################################################
|
25
28
|
|
26
29
|
class PDF::Reader
|
30
|
+
# Util class for working with string encodings in PDF files. Mostly used to
|
31
|
+
# convert strings of various PDF-dialect encodings into UTF-8.
|
27
32
|
class Encoding # :nodoc:
|
28
33
|
CONTROL_CHARS = [0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23,
|
29
34
|
24,25,26,27,28,29,30,31]
|
@@ -32,19 +37,25 @@ class PDF::Reader
|
|
32
37
|
attr_reader :unpack
|
33
38
|
|
34
39
|
def initialize(enc)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
enc
|
40
|
+
@mapping = default_mapping # maps from character codes to Unicode codepoints
|
41
|
+
@string_cache = {} # maps from character codes to UTF-8 strings.
|
42
|
+
|
43
|
+
@enc_name = if enc.kind_of?(Hash)
|
44
|
+
enc[:Encoding] || enc[:BaseEncoding]
|
45
|
+
elsif enc && enc.respond_to?(:to_sym)
|
46
|
+
enc.to_sym
|
40
47
|
else
|
41
|
-
|
48
|
+
:StandardEncoding
|
42
49
|
end
|
43
50
|
|
44
|
-
@
|
45
|
-
@
|
46
|
-
|
51
|
+
@unpack = get_unpack(@enc_name)
|
52
|
+
@map_file = get_mapping_file(@enc_name)
|
53
|
+
|
47
54
|
load_mapping(@map_file) if @map_file
|
55
|
+
|
56
|
+
if enc.is_a?(Hash) && enc[:Differences]
|
57
|
+
self.differences = enc[:Differences]
|
58
|
+
end
|
48
59
|
end
|
49
60
|
|
50
61
|
# set the differences table for this encoding. should be an array in the following format:
|
@@ -66,6 +77,7 @@ class PDF::Reader
|
|
66
77
|
byte = val.to_i
|
67
78
|
else
|
68
79
|
@differences[byte] = val
|
80
|
+
@mapping[byte] = glyphlist.name_to_unicode(val)
|
69
81
|
byte += 1
|
70
82
|
end
|
71
83
|
end
|
@@ -73,6 +85,7 @@ class PDF::Reader
|
|
73
85
|
end
|
74
86
|
|
75
87
|
def differences
|
88
|
+
# this method is only used by the spec tests
|
76
89
|
@differences ||= {}
|
77
90
|
end
|
78
91
|
|
@@ -95,8 +108,52 @@ class PDF::Reader
|
|
95
108
|
end
|
96
109
|
end
|
97
110
|
|
111
|
+
def int_to_utf8_string(glyph_code)
|
112
|
+
@string_cache[glyph_code] ||= internal_int_to_utf8_string(glyph_code)
|
113
|
+
end
|
114
|
+
|
115
|
+
# convert an integer glyph code into an Adobe glyph name.
|
116
|
+
#
|
117
|
+
# int_to_name(65)
|
118
|
+
# => [:A]
|
119
|
+
#
|
120
|
+
def int_to_name(glyph_code)
|
121
|
+
if @enc_name == "Identity-H" || @enc_name == "Identity-V"
|
122
|
+
[]
|
123
|
+
elsif differences[glyph_code]
|
124
|
+
[differences[glyph_code]]
|
125
|
+
elsif @mapping[glyph_code]
|
126
|
+
glyphlist.unicode_to_name(@mapping[glyph_code])
|
127
|
+
else
|
128
|
+
[]
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
98
132
|
private
|
99
133
|
|
134
|
+
# returns a hash that:
|
135
|
+
# - maps control chars and nil to the unicode "unknown character"
|
136
|
+
# - leaves all other bytes <= 255 unchaged
|
137
|
+
#
|
138
|
+
# Each specific encoding will change this default as required for their glyphs
|
139
|
+
def default_mapping
|
140
|
+
all_bytes = (0..255).to_a
|
141
|
+
tuples = all_bytes.map {|i|
|
142
|
+
CONTROL_CHARS.include?(i) ? [i, UNKNOWN_CHAR] : [i,i]
|
143
|
+
}
|
144
|
+
mapping = Hash[tuples]
|
145
|
+
mapping[nil] = UNKNOWN_CHAR
|
146
|
+
mapping
|
147
|
+
end
|
148
|
+
|
149
|
+
def internal_int_to_utf8_string(glyph_code)
|
150
|
+
ret = [
|
151
|
+
@mapping[glyph_code.to_i] || glyph_code.to_i
|
152
|
+
].pack("U*")
|
153
|
+
ret.force_encoding("UTF-8")
|
154
|
+
ret
|
155
|
+
end
|
156
|
+
|
100
157
|
def utf8_conversion_impossible?
|
101
158
|
@enc_name == :"Identity-H" || @enc_name == :"Identity-V"
|
102
159
|
end
|
@@ -104,33 +161,13 @@ class PDF::Reader
|
|
104
161
|
def little_boxes(times)
|
105
162
|
codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
|
106
163
|
ret = codepoints.pack("U*")
|
107
|
-
ret.force_encoding("UTF-8")
|
164
|
+
ret.force_encoding("UTF-8")
|
108
165
|
ret
|
109
166
|
end
|
110
167
|
|
111
168
|
def convert_to_utf8(str)
|
112
|
-
ret = str.unpack(unpack).map { |c|
|
113
|
-
|
114
|
-
}.map { |c|
|
115
|
-
mapping[c] || c
|
116
|
-
}.map { |c|
|
117
|
-
names_to_unicode[c] || c
|
118
|
-
}.map { |c|
|
119
|
-
if PDF::Reader::Encoding::CONTROL_CHARS.include?(c)
|
120
|
-
PDF::Reader::Encoding::UNKNOWN_CHAR
|
121
|
-
else
|
122
|
-
c
|
123
|
-
end
|
124
|
-
}.map { |c|
|
125
|
-
if c.nil? || !c.is_a?(Fixnum)
|
126
|
-
PDF::Reader::Encoding::UNKNOWN_CHAR
|
127
|
-
else
|
128
|
-
c
|
129
|
-
end
|
130
|
-
}.pack("U*")
|
131
|
-
|
132
|
-
ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
|
133
|
-
|
169
|
+
ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
|
170
|
+
ret.force_encoding("UTF-8")
|
134
171
|
ret
|
135
172
|
end
|
136
173
|
|
@@ -164,26 +201,15 @@ class PDF::Reader
|
|
164
201
|
end
|
165
202
|
end
|
166
203
|
|
167
|
-
def
|
168
|
-
@
|
169
|
-
end
|
170
|
-
|
171
|
-
def has_mapping?
|
172
|
-
mapping.size > 0
|
173
|
-
end
|
174
|
-
|
175
|
-
def names_to_unicode
|
176
|
-
@names_to_unicode ||= PDF::Reader::GlyphHash.new
|
204
|
+
def glyphlist
|
205
|
+
@glyphlist ||= PDF::Reader::GlyphHash.new
|
177
206
|
end
|
178
207
|
|
179
208
|
def load_mapping(file)
|
180
|
-
|
181
|
-
|
182
|
-
RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
|
183
|
-
File.open(file, mode) do |f|
|
209
|
+
File.open(file, "r:BINARY") do |f|
|
184
210
|
f.each do |l|
|
185
|
-
|
186
|
-
mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
|
211
|
+
_m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
|
212
|
+
@mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
|
187
213
|
end
|
188
214
|
end
|
189
215
|
end
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
################################################################################
|
2
5
|
#
|
3
6
|
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
@@ -21,34 +24,51 @@
|
|
21
24
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
25
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
26
|
#
|
24
|
-
|
25
27
|
class PDF::Reader
|
26
28
|
################################################################################
|
27
29
|
# An internal PDF::Reader class that helps to verify various parts of the PDF file
|
28
30
|
# are valid
|
29
31
|
class Error # :nodoc:
|
30
32
|
################################################################################
|
31
|
-
def self.str_assert
|
33
|
+
def self.str_assert(lvalue, rvalue, chars=nil)
|
32
34
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
33
35
|
lvalue = lvalue[0,chars] if chars
|
34
36
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue != rvalue
|
35
37
|
end
|
36
38
|
################################################################################
|
37
|
-
def self.str_assert_not
|
39
|
+
def self.str_assert_not(lvalue, rvalue, chars=nil)
|
38
40
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
39
41
|
lvalue = lvalue[0,chars] if chars
|
40
42
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found #{lvalue} instead" if lvalue == rvalue
|
41
43
|
end
|
42
44
|
################################################################################
|
43
|
-
def self.assert_equal
|
45
|
+
def self.assert_equal(lvalue, rvalue)
|
44
46
|
raise MalformedPDFError, "PDF malformed, expected #{rvalue} but found #{lvalue} instead" if lvalue != rvalue
|
45
47
|
end
|
46
48
|
################################################################################
|
47
49
|
end
|
50
|
+
|
48
51
|
################################################################################
|
52
|
+
# an exception that is raised when we believe the current PDF is not following
|
53
|
+
# the PDF spec and cannot be recovered
|
49
54
|
class MalformedPDFError < RuntimeError; end
|
55
|
+
|
56
|
+
################################################################################
|
57
|
+
# an exception that is raised when an invalid page number is used
|
58
|
+
class InvalidPageError < ArgumentError; end
|
59
|
+
|
60
|
+
################################################################################
|
61
|
+
# an exception that is raised when a PDF object appears to be invalid
|
50
62
|
class InvalidObjectError < MalformedPDFError; end
|
63
|
+
|
64
|
+
################################################################################
|
65
|
+
# an exception that is raised when a PDF follows the specs but uses a feature
|
66
|
+
# that we don't support just yet
|
51
67
|
class UnsupportedFeatureError < RuntimeError; end
|
68
|
+
|
69
|
+
################################################################################
|
70
|
+
# an exception that is raised when a PDF is encrypted and we don't have the
|
71
|
+
# necessary data to decrypt it
|
52
72
|
class EncryptedPDFError < UnsupportedFeatureError; end
|
53
73
|
end
|
54
74
|
################################################################################
|