pdf-reader 1.4.1 → 2.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +8 -3
- data/{README.rdoc → README.md} +40 -23
- data/Rakefile +2 -2
- data/bin/pdf_object +4 -1
- data/lib/pdf/reader.rb +7 -112
- data/lib/pdf/reader/buffer.rb +2 -1
- data/lib/pdf/reader/cmap.rb +26 -24
- data/lib/pdf/reader/encoding.rb +4 -5
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/filter/run_length.rb +1 -5
- data/lib/pdf/reader/font.rb +1 -11
- data/lib/pdf/reader/glyph_hash.rb +6 -2
- data/lib/pdf/reader/lzw.rb +1 -1
- data/lib/pdf/reader/object_hash.rb +35 -16
- data/lib/pdf/reader/page_layout.rb +6 -17
- data/lib/pdf/reader/pages_strategy.rb +1 -304
- data/lib/pdf/reader/parser.rb +6 -4
- data/lib/pdf/reader/standard_security_handler.rb +18 -14
- data/lib/pdf/reader/text_run.rb +3 -9
- metadata +14 -47
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -265
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4ea96ce79d9f4cc65a0a026ea7e50da7b33cd19
|
4
|
+
data.tar.gz: e2302c2d18cdc64cd81654f30658b5cf1e8ae3c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 31da13f8b8e38dffbb19a33855beb1c85a14c2633137fd8e1957db14f3bac16a434c174e79bab6092c72954e6a6f87f0ab585562e79d39d5ede8f9398fd63f7b
|
7
|
+
data.tar.gz: e097e8aed8bbeb918676bacded748a538e109fdcf1e8740cfe1ce3d63105d891cbe2a90ad228bb87fb494cd796bd7e1c66138592ff21d62c97b87fa270eb0899
|
data/CHANGELOG
CHANGED
@@ -1,5 +1,10 @@
|
|
1
|
+
v2.0.0.beta1 (15th February 2017)
|
2
|
+
- BREAKING CHANGE: remove all methods that were deprecated in 1.0.0
|
3
|
+
- Bug: Support extra encrypted PDF variants (thanks to Gyuchang Jun)
|
4
|
+
- various bug fixes
|
5
|
+
|
1
6
|
v1.4.1 (2nd January 2017)
|
2
|
-
- improve
|
7
|
+
- improve compatibility with ruby 2.4 (thanks Akira Matsuda)
|
3
8
|
- various bug fixes
|
4
9
|
|
5
10
|
v1.4.0 (22nd February 2016)
|
@@ -91,10 +96,10 @@ v0.9.2 (24th April 2011)
|
|
91
96
|
|
92
97
|
v0.9.1 (21st December 2010)
|
93
98
|
- force gem to only install on ruby 1.8.7 or higher
|
94
|
-
- maintaining
|
99
|
+
- maintaining support for earlier versions takes more time than I have
|
95
100
|
available at the moment
|
96
101
|
- bug: fix parsing of obscure pdf name format
|
97
|
-
- bug: fix behaviour when loaded in
|
102
|
+
- bug: fix behaviour when loaded in conjunction with htmldoc gem
|
98
103
|
|
99
104
|
v0.9.0 (19th November 2010)
|
100
105
|
- support for pdf 1.5+ files that use object and xref streams
|
data/{README.rdoc → README.md}
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# Release Notes
|
2
2
|
|
3
3
|
The PDF::Reader library implements a PDF parser conforming as much as possible
|
4
4
|
to the PDF specification from Adobe.
|
@@ -15,46 +15,55 @@ higher level functionality - it's not going to render a PDF for you. There are
|
|
15
15
|
a few exceptions to support very common use cases like extracting text from a
|
16
16
|
page.
|
17
17
|
|
18
|
-
|
18
|
+
# Installation
|
19
19
|
|
20
20
|
The recommended installation method is via Rubygems.
|
21
21
|
|
22
|
+
```ruby
|
22
23
|
gem install pdf-reader
|
24
|
+
```
|
23
25
|
|
24
|
-
|
26
|
+
# Usage
|
25
27
|
|
26
28
|
Begin by creating a PDF::Reader instance that points to a PDF file. Document
|
27
29
|
level information (metadata, page count, bookmarks, etc) is available via
|
28
30
|
this object.
|
29
31
|
|
32
|
+
```ruby
|
30
33
|
reader = PDF::Reader.new("somefile.pdf")
|
31
34
|
|
32
35
|
puts reader.pdf_version
|
33
36
|
puts reader.info
|
34
37
|
puts reader.metadata
|
35
38
|
puts reader.page_count
|
39
|
+
```
|
36
40
|
|
37
41
|
PDF::Reader.new accepts an IO stream or a filename. Here's an example with
|
38
42
|
an IO stream:
|
39
43
|
|
44
|
+
```ruby
|
40
45
|
require 'open-uri'
|
41
46
|
|
42
47
|
io = open('http://example.com/somefile.pdf')
|
43
48
|
reader = PDF::Reader.new(io)
|
44
49
|
puts reader.info
|
50
|
+
```
|
45
51
|
|
46
52
|
If you open a PDF with File#open or IO#open, I strongly recommend using "rb"
|
47
53
|
mode to ensure the file isn't mangled by ruby being 'helpful'. This is
|
48
54
|
particularly important on windows and MRI >= 1.9.2.
|
49
55
|
|
56
|
+
```ruby
|
50
57
|
File.open("somefile.pdf", "rb") do |io|
|
51
58
|
reader = PDF::Reader.new(io)
|
52
59
|
puts reader.info
|
53
60
|
end
|
61
|
+
```
|
54
62
|
|
55
63
|
PDF is a page based file format, so most visible information is available via
|
56
64
|
page-based iteration
|
57
65
|
|
66
|
+
```ruby
|
58
67
|
reader = PDF::Reader.new("somefile.pdf")
|
59
68
|
|
60
69
|
reader.pages.each do |page|
|
@@ -62,10 +71,12 @@ page-based iteration
|
|
62
71
|
puts page.text
|
63
72
|
puts page.raw_content
|
64
73
|
end
|
74
|
+
```
|
65
75
|
|
66
76
|
If you need to access the full program for rendering a page, use the walk() method
|
67
77
|
of PDF::Reader::Page.
|
68
78
|
|
79
|
+
```ruby
|
69
80
|
class RedGreenBlue
|
70
81
|
def set_rgb_color_for_nonstroking(r, g, b)
|
71
82
|
puts "R: #{r}, G: #{g}, B: #{b}"
|
@@ -76,31 +87,32 @@ of PDF::Reader::Page.
|
|
76
87
|
page = reader.page(1)
|
77
88
|
receiver = RedGreenBlue.new
|
78
89
|
page.walk(receiver)
|
90
|
+
```
|
79
91
|
|
80
92
|
For low level access to the objects in a PDF file, use the ObjectHash class like
|
81
93
|
so:
|
82
94
|
|
95
|
+
```ruby
|
83
96
|
reader = PDF::Reader.new("somefile.pdf")
|
84
97
|
puts reader.objects.inspect
|
98
|
+
```
|
85
99
|
|
86
|
-
|
100
|
+
# Text Encoding
|
87
101
|
|
88
102
|
Regardless of the internal encoding used in the PDF all text will be converted
|
89
103
|
to UTF-8 before it is passed back from PDF::Reader.
|
90
104
|
|
91
|
-
Strings that contain binary data (like font blobs) will be marked as such
|
92
|
-
M17N aware VMs.
|
105
|
+
Strings that contain binary data (like font blobs) will be marked as such.
|
93
106
|
|
94
|
-
|
107
|
+
# Former API
|
95
108
|
|
96
109
|
Version 1.0.0 of PDF::Reader introduced a new page-based API that provides
|
97
110
|
efficient and easy access to any page.
|
98
111
|
|
99
|
-
The
|
100
|
-
|
101
|
-
warnings before it is completely removed in version 2.0.0.
|
112
|
+
The pre-1.0 API was deprecated during the 1.x release series, and has been
|
113
|
+
removed from 2.0.0.
|
102
114
|
|
103
|
-
|
115
|
+
# Exceptions
|
104
116
|
|
105
117
|
There are two key exceptions that you will need to watch out for when processing a
|
106
118
|
PDF file:
|
@@ -120,7 +132,7 @@ don't, 'rescue MalformedPDFError' will catch all the subclassed errors as well.
|
|
120
132
|
Any other exceptions should be considered bugs in either PDF::Reader (please
|
121
133
|
report it!).
|
122
134
|
|
123
|
-
|
135
|
+
# PDF Integrity
|
124
136
|
|
125
137
|
Windows developers may run into problems when running specs due to MalformedPDFError's
|
126
138
|
This is usually because CRLF characters are automatically added to some of the PDF's in
|
@@ -128,18 +140,20 @@ the spec folder when you checkout a branch from Git.
|
|
128
140
|
|
129
141
|
To remove any invalid CRLF characters added while checking out a branch from Git, run:
|
130
142
|
|
143
|
+
```ruby
|
131
144
|
rake fix_integrity
|
145
|
+
```
|
132
146
|
|
133
|
-
|
147
|
+
# Maintainers
|
134
148
|
|
135
|
-
|
149
|
+
* James Healy <mailto:jimmy@deefa.com>
|
136
150
|
|
137
|
-
|
151
|
+
# Licensing
|
138
152
|
|
139
153
|
This library is distributed under the terms of the MIT License. See the included file for
|
140
154
|
more detail.
|
141
155
|
|
142
|
-
|
156
|
+
# Mailing List
|
143
157
|
|
144
158
|
Any questions or feedback should be sent to the PDF::Reader google group. It's
|
145
159
|
better that any answers be available for others instead of hiding in someone's
|
@@ -147,20 +161,23 @@ inbox.
|
|
147
161
|
|
148
162
|
http://groups.google.com/group/pdf-reader
|
149
163
|
|
150
|
-
|
164
|
+
# Examples
|
151
165
|
|
152
166
|
The easiest way to explain how this works in practice is to show some examples.
|
153
167
|
Check out the examples/ directory for a few files.
|
154
168
|
|
155
|
-
|
169
|
+
# Known Limitations
|
156
170
|
|
157
171
|
Occasionally some text cannot be extracted properly due to the way it has been
|
158
172
|
stored, or the use of invalid bytes. In these cases PDF::Reader will output a
|
159
173
|
little UTF-8 friendly box to indicate an unrecognisable character.
|
160
174
|
|
161
|
-
|
175
|
+
# Resources
|
162
176
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
177
|
+
* PDF::Reader Code Repository: http://github.com/yob/pdf-reader
|
178
|
+
|
179
|
+
* PDF Specification: http://www.adobe.com/devnet/pdf/pdf_reference.html
|
180
|
+
|
181
|
+
* PDF Tutorial Slide Presentations: http://home.comcast.net/~jk05/presentations/PDFTutorials.html
|
182
|
+
|
183
|
+
* Developing with PDF (book): http://shop.oreilly.com/product/0636920025269.do
|
data/Rakefile
CHANGED
@@ -14,7 +14,7 @@ desc "Run cane to check quality metrics"
|
|
14
14
|
Cane::RakeTask.new(:quality) do |cane|
|
15
15
|
cane.abc_max = 20
|
16
16
|
cane.style_measure = 100
|
17
|
-
cane.max_violations =
|
17
|
+
cane.max_violations = 31
|
18
18
|
|
19
19
|
cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
|
20
20
|
end
|
@@ -41,7 +41,7 @@ end
|
|
41
41
|
desc "Create a YAML file of integrity info for PDFs in the spec suite"
|
42
42
|
task :integrity_yaml do
|
43
43
|
data = {}
|
44
|
-
Dir.glob("spec/data/**/*.*").each do |path|
|
44
|
+
Dir.glob("spec/data/**/*.*").sort.each do |path|
|
45
45
|
path_without_spec = path.gsub("spec/","")
|
46
46
|
data[path_without_spec] = {
|
47
47
|
:bytes => File.size(path),
|
data/bin/pdf_object
CHANGED
@@ -25,7 +25,10 @@ gen = gen.to_i
|
|
25
25
|
|
26
26
|
# make magic happen
|
27
27
|
begin
|
28
|
-
obj =
|
28
|
+
obj = nil
|
29
|
+
PDF::Reader.open(filename) do |pdf|
|
30
|
+
obj = pdf.objects[PDF::Reader::Reference.new(id, gen)]
|
31
|
+
end
|
29
32
|
|
30
33
|
case obj
|
31
34
|
when Hash, Array
|
data/lib/pdf/reader.rb
CHANGED
@@ -110,16 +110,10 @@ module PDF
|
|
110
110
|
#
|
111
111
|
# reader = PDF::Reader.new("somefile.pdf", :password => "apples")
|
112
112
|
#
|
113
|
-
def initialize(input
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
@objects = PDF::Reader::ObjectHash.new(input, opts)
|
118
|
-
else
|
119
|
-
msg = "Calling PDF::Reader#new with no arguments is deprecated and will be removed "
|
120
|
-
msg += "in the 2.0 release"
|
121
|
-
$stderr.puts(msg)
|
122
|
-
end
|
113
|
+
def initialize(input, opts = {})
|
114
|
+
@cache = PDF::Reader::ObjectCache.new
|
115
|
+
opts.merge!(:cache => @cache)
|
116
|
+
@objects = PDF::Reader::ObjectHash.new(input, opts)
|
123
117
|
end
|
124
118
|
|
125
119
|
def info
|
@@ -133,7 +127,7 @@ module PDF
|
|
133
127
|
nil
|
134
128
|
else
|
135
129
|
xml = stream.unfiltered_data
|
136
|
-
xml.force_encoding("utf-8")
|
130
|
+
xml.force_encoding("utf-8")
|
137
131
|
xml
|
138
132
|
end
|
139
133
|
end
|
@@ -164,61 +158,6 @@ module PDF
|
|
164
158
|
yield PDF::Reader.new(input, opts)
|
165
159
|
end
|
166
160
|
|
167
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
168
|
-
# eventually be removed
|
169
|
-
#
|
170
|
-
#
|
171
|
-
# Parse the file with the given name, sending events to the given receiver.
|
172
|
-
#
|
173
|
-
def self.file(name, receivers, opts = {})
|
174
|
-
msg = "PDF::Reader#file is deprecated and will be removed in the 2.0 release"
|
175
|
-
$stderr.puts(msg)
|
176
|
-
File.open(name,"rb") do |f|
|
177
|
-
new.parse(f, receivers, opts)
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
182
|
-
# eventually be removed
|
183
|
-
#
|
184
|
-
# Parse the given string, sending events to the given receiver.
|
185
|
-
#
|
186
|
-
def self.string(str, receivers, opts = {})
|
187
|
-
msg = "PDF::Reader#string is deprecated and will be removed in the 2.0 release"
|
188
|
-
$stderr.puts(msg)
|
189
|
-
StringIO.open(str) do |s|
|
190
|
-
new.parse(s, receivers, opts)
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
195
|
-
# eventually be removed
|
196
|
-
#
|
197
|
-
# Parse the file with the given name, returning an unmarshalled ruby version of
|
198
|
-
# represents the requested pdf object
|
199
|
-
#
|
200
|
-
def self.object_file(name, id, gen = 0)
|
201
|
-
msg = "PDF::Reader#object_file is deprecated and will be removed in the 2.0 release"
|
202
|
-
$stderr.puts(msg)
|
203
|
-
File.open(name,"rb") { |f|
|
204
|
-
new.object(f, id.to_i, gen.to_i)
|
205
|
-
}
|
206
|
-
end
|
207
|
-
|
208
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
209
|
-
# eventually be removed
|
210
|
-
#
|
211
|
-
# Parse the given string, returning an unmarshalled ruby version of represents
|
212
|
-
# the requested pdf object
|
213
|
-
#
|
214
|
-
def self.object_string(str, id, gen = 0)
|
215
|
-
msg = "PDF::Reader#object_string is deprecated and will be removed in the 2.0 release"
|
216
|
-
$stderr.puts(msg)
|
217
|
-
StringIO.open(str) { |s|
|
218
|
-
new.object(s, id.to_i, gen.to_i)
|
219
|
-
}
|
220
|
-
end
|
221
|
-
|
222
161
|
# returns an array of PDF::Reader::Page objects, one for each
|
223
162
|
# page in the source PDF.
|
224
163
|
#
|
@@ -259,40 +198,6 @@ module PDF
|
|
259
198
|
PDF::Reader::Page.new(@objects, num, :cache => @cache)
|
260
199
|
end
|
261
200
|
|
262
|
-
|
263
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
264
|
-
# eventually be removed
|
265
|
-
#
|
266
|
-
# Given an IO object that contains PDF data, parse it.
|
267
|
-
#
|
268
|
-
def parse(io, receivers, opts = {})
|
269
|
-
msg = "PDF::Reader#parse is deprecated and will be removed in the 2.0 release"
|
270
|
-
$stderr.puts(msg)
|
271
|
-
ohash = ObjectHash.new(io)
|
272
|
-
|
273
|
-
options = {:pages => true, :raw_text => false, :metadata => true}
|
274
|
-
options.merge!(opts)
|
275
|
-
|
276
|
-
strategies.each do |s|
|
277
|
-
s.new(ohash, receivers, options).process
|
278
|
-
end
|
279
|
-
|
280
|
-
self
|
281
|
-
end
|
282
|
-
|
283
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
284
|
-
# eventually be removed
|
285
|
-
#
|
286
|
-
# Given an IO object that contains PDF data, return the contents of a single object
|
287
|
-
#
|
288
|
-
def object(io, id, gen)
|
289
|
-
msg = "PDF::Reader#object is deprecated and will be removed in the 2.0 release"
|
290
|
-
$stderr.puts(msg)
|
291
|
-
@objects = ObjectHash.new(io)
|
292
|
-
|
293
|
-
@objects.deref(Reference.new(id, gen))
|
294
|
-
end
|
295
|
-
|
296
201
|
private
|
297
202
|
|
298
203
|
# recursively convert strings from outside a content stream into UTF-8
|
@@ -321,7 +226,7 @@ module PDF
|
|
321
226
|
# TODO find a PDF I can use to spec this behaviour
|
322
227
|
#
|
323
228
|
def pdfdoc_to_utf8(obj)
|
324
|
-
obj.force_encoding("utf-8")
|
229
|
+
obj.force_encoding("utf-8")
|
325
230
|
obj
|
326
231
|
end
|
327
232
|
|
@@ -331,17 +236,10 @@ module PDF
|
|
331
236
|
def utf16_to_utf8(obj)
|
332
237
|
str = obj[2, obj.size]
|
333
238
|
str = str.unpack("n*").pack("U*")
|
334
|
-
str.force_encoding("utf-8")
|
239
|
+
str.force_encoding("utf-8")
|
335
240
|
str
|
336
241
|
end
|
337
242
|
|
338
|
-
def strategies
|
339
|
-
@strategies ||= [
|
340
|
-
::PDF::Reader::MetadataStrategy,
|
341
|
-
::PDF::Reader::PagesStrategy
|
342
|
-
]
|
343
|
-
end
|
344
|
-
|
345
243
|
def root
|
346
244
|
@root ||= @objects.deref(@objects.trailer[:Root])
|
347
245
|
end
|
@@ -351,7 +249,6 @@ end
|
|
351
249
|
################################################################################
|
352
250
|
|
353
251
|
require 'pdf/reader/resource_methods'
|
354
|
-
require 'pdf/reader/abstract_strategy'
|
355
252
|
require 'pdf/reader/buffer'
|
356
253
|
require 'pdf/reader/cid_widths'
|
357
254
|
require 'pdf/reader/cmap'
|
@@ -370,7 +267,6 @@ require 'pdf/reader/font_descriptor'
|
|
370
267
|
require 'pdf/reader/form_xobject'
|
371
268
|
require 'pdf/reader/glyph_hash'
|
372
269
|
require 'pdf/reader/lzw'
|
373
|
-
require 'pdf/reader/metadata_strategy'
|
374
270
|
require 'pdf/reader/object_cache'
|
375
271
|
require 'pdf/reader/object_hash'
|
376
272
|
require 'pdf/reader/object_stream'
|
@@ -381,7 +277,6 @@ require 'pdf/reader/reference'
|
|
381
277
|
require 'pdf/reader/register_receiver'
|
382
278
|
require 'pdf/reader/standard_security_handler'
|
383
279
|
require 'pdf/reader/stream'
|
384
|
-
require 'pdf/reader/text_receiver'
|
385
280
|
require 'pdf/reader/text_run'
|
386
281
|
require 'pdf/reader/page_state'
|
387
282
|
require 'pdf/reader/page_text_receiver'
|
data/lib/pdf/reader/buffer.rb
CHANGED
@@ -37,6 +37,7 @@ class PDF::Reader
|
|
37
37
|
#
|
38
38
|
class Buffer
|
39
39
|
TOKEN_WHITESPACE=[0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20]
|
40
|
+
TOKEN_DELIMITER=[0x25, 0x3C, 0x3E, 0x28, 0x5B, 0x7B, 0x29, 0x5D, 0x7D, 0x2F]
|
40
41
|
|
41
42
|
# some strings for comparissons. Declaring them here avoids creating new
|
42
43
|
# strings that need GC over and over
|
@@ -366,7 +367,7 @@ class PDF::Reader
|
|
366
367
|
# PDF name, start of new token
|
367
368
|
@tokens << tok if tok.size > 0
|
368
369
|
@tokens << byte.chr
|
369
|
-
@tokens << "" if byte == 0x2F && [nil, 0x20, 0x0A].include?(peek_byte)
|
370
|
+
@tokens << "" if byte == 0x2F && ([nil, 0x20, 0x0A] + TOKEN_DELIMITER).include?(peek_byte)
|
370
371
|
tok = ""
|
371
372
|
break
|
372
373
|
else
|
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -31,6 +31,17 @@ class PDF::Reader
|
|
31
31
|
# extracting various useful information.
|
32
32
|
#
|
33
33
|
class CMap # :nodoc:
|
34
|
+
CMAP_KEYWORDS = {
|
35
|
+
"begincodespacerange" => 1,
|
36
|
+
"endcodespacerange" => 1,
|
37
|
+
"beginbfchar" => 1,
|
38
|
+
"endbfchar" => 1,
|
39
|
+
"beginbfrange" => 1,
|
40
|
+
"endbfrange" => 1,
|
41
|
+
"begin" => 1,
|
42
|
+
"begincmap" => 1,
|
43
|
+
"def" => 1
|
44
|
+
}
|
34
45
|
|
35
46
|
attr_reader :map
|
36
47
|
|
@@ -40,24 +51,25 @@ class PDF::Reader
|
|
40
51
|
end
|
41
52
|
|
42
53
|
def process_data(data)
|
54
|
+
parser = build_parser(data)
|
43
55
|
mode = nil
|
44
|
-
instructions =
|
56
|
+
instructions = []
|
45
57
|
|
46
|
-
|
47
|
-
if
|
58
|
+
while token = parser.parse_token(CMAP_KEYWORDS)
|
59
|
+
if token == "beginbfchar"
|
48
60
|
mode = :char
|
49
|
-
elsif
|
61
|
+
elsif token == "endbfchar"
|
50
62
|
process_bfchar_instructions(instructions)
|
51
|
-
instructions =
|
63
|
+
instructions = []
|
52
64
|
mode = nil
|
53
|
-
elsif
|
65
|
+
elsif token == "beginbfrange"
|
54
66
|
mode = :range
|
55
|
-
elsif
|
67
|
+
elsif token == "endbfrange"
|
56
68
|
process_bfrange_instructions(instructions)
|
57
|
-
instructions =
|
69
|
+
instructions = []
|
58
70
|
mode = nil
|
59
71
|
elsif mode == :char || mode == :range
|
60
|
-
instructions <<
|
72
|
+
instructions << token
|
61
73
|
end
|
62
74
|
end
|
63
75
|
end
|
@@ -105,22 +117,15 @@ class PDF::Reader
|
|
105
117
|
end
|
106
118
|
|
107
119
|
def process_bfchar_instructions(instructions)
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
@map[find[0]] = replace
|
113
|
-
find = str_to_int(parser.parse_token)
|
114
|
-
replace = str_to_int(parser.parse_token)
|
120
|
+
instructions.each_slice(2) do |one, two|
|
121
|
+
find = str_to_int(one)
|
122
|
+
replace = str_to_int(two)
|
123
|
+
@map[find.first] = replace
|
115
124
|
end
|
116
125
|
end
|
117
126
|
|
118
127
|
def process_bfrange_instructions(instructions)
|
119
|
-
|
120
|
-
start = parser.parse_token
|
121
|
-
finish = parser.parse_token
|
122
|
-
to = parser.parse_token
|
123
|
-
while start && finish && to
|
128
|
+
instructions.each_slice(3) do |start, finish, to|
|
124
129
|
if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String)
|
125
130
|
bfrange_type_one(start, finish, to)
|
126
131
|
elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
|
@@ -128,9 +133,6 @@ class PDF::Reader
|
|
128
133
|
else
|
129
134
|
raise "invalid bfrange section"
|
130
135
|
end
|
131
|
-
start = parser.parse_token
|
132
|
-
finish = parser.parse_token
|
133
|
-
to = parser.parse_token
|
134
136
|
end
|
135
137
|
end
|
136
138
|
|