origamindee 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +89 -0
- data/COPYING.LESSER +165 -0
- data/README.md +131 -0
- data/bin/config/pdfcop.conf.yml +236 -0
- data/bin/pdf2pdfa +87 -0
- data/bin/pdf2ruby +333 -0
- data/bin/pdfcop +476 -0
- data/bin/pdfdecompress +97 -0
- data/bin/pdfdecrypt +91 -0
- data/bin/pdfencrypt +113 -0
- data/bin/pdfexplode +223 -0
- data/bin/pdfextract +277 -0
- data/bin/pdfmetadata +143 -0
- data/bin/pdfsh +12 -0
- data/bin/shell/console.rb +128 -0
- data/bin/shell/hexdump.rb +59 -0
- data/bin/shell/irbrc +69 -0
- data/examples/README.md +34 -0
- data/examples/attachments/attachment.rb +38 -0
- data/examples/attachments/nested_document.rb +51 -0
- data/examples/encryption/encryption.rb +28 -0
- data/examples/events/events.rb +72 -0
- data/examples/flash/flash.rb +37 -0
- data/examples/flash/helloworld.swf +0 -0
- data/examples/forms/javascript.rb +54 -0
- data/examples/forms/xfa.rb +115 -0
- data/examples/javascript/hello_world.rb +22 -0
- data/examples/javascript/js_emulation.rb +54 -0
- data/examples/loop/goto.rb +32 -0
- data/examples/loop/named.rb +33 -0
- data/examples/signature/signature.rb +65 -0
- data/examples/uri/javascript.rb +56 -0
- data/examples/uri/open-uri.rb +21 -0
- data/examples/uri/submitform.rb +47 -0
- data/lib/origami/3d.rb +364 -0
- data/lib/origami/acroform.rb +321 -0
- data/lib/origami/actions.rb +318 -0
- data/lib/origami/annotations.rb +711 -0
- data/lib/origami/array.rb +242 -0
- data/lib/origami/boolean.rb +90 -0
- data/lib/origami/catalog.rb +418 -0
- data/lib/origami/collections.rb +144 -0
- data/lib/origami/compound.rb +161 -0
- data/lib/origami/destinations.rb +252 -0
- data/lib/origami/dictionary.rb +192 -0
- data/lib/origami/encryption.rb +1084 -0
- data/lib/origami/extensions/fdf.rb +347 -0
- data/lib/origami/extensions/ppklite.rb +422 -0
- data/lib/origami/filespec.rb +197 -0
- data/lib/origami/filters/ascii.rb +211 -0
- data/lib/origami/filters/ccitt/tables.rb +267 -0
- data/lib/origami/filters/ccitt.rb +357 -0
- data/lib/origami/filters/crypt.rb +38 -0
- data/lib/origami/filters/dct.rb +54 -0
- data/lib/origami/filters/flate.rb +69 -0
- data/lib/origami/filters/jbig2.rb +57 -0
- data/lib/origami/filters/jpx.rb +47 -0
- data/lib/origami/filters/lzw.rb +170 -0
- data/lib/origami/filters/predictors.rb +292 -0
- data/lib/origami/filters/runlength.rb +129 -0
- data/lib/origami/filters.rb +364 -0
- data/lib/origami/font.rb +196 -0
- data/lib/origami/functions.rb +79 -0
- data/lib/origami/graphics/colors.rb +230 -0
- data/lib/origami/graphics/instruction.rb +98 -0
- data/lib/origami/graphics/path.rb +182 -0
- data/lib/origami/graphics/patterns.rb +174 -0
- data/lib/origami/graphics/render.rb +62 -0
- data/lib/origami/graphics/state.rb +149 -0
- data/lib/origami/graphics/text.rb +225 -0
- data/lib/origami/graphics/xobject.rb +918 -0
- data/lib/origami/graphics.rb +38 -0
- data/lib/origami/header.rb +75 -0
- data/lib/origami/javascript.rb +713 -0
- data/lib/origami/linearization.rb +330 -0
- data/lib/origami/metadata.rb +172 -0
- data/lib/origami/name.rb +135 -0
- data/lib/origami/null.rb +65 -0
- data/lib/origami/numeric.rb +181 -0
- data/lib/origami/obfuscation.rb +245 -0
- data/lib/origami/object.rb +760 -0
- data/lib/origami/optionalcontent.rb +183 -0
- data/lib/origami/outline.rb +54 -0
- data/lib/origami/outputintents.rb +85 -0
- data/lib/origami/page.rb +722 -0
- data/lib/origami/parser.rb +269 -0
- data/lib/origami/parsers/fdf.rb +56 -0
- data/lib/origami/parsers/pdf/lazy.rb +176 -0
- data/lib/origami/parsers/pdf/linear.rb +122 -0
- data/lib/origami/parsers/pdf.rb +118 -0
- data/lib/origami/parsers/ppklite.rb +57 -0
- data/lib/origami/pdf.rb +1108 -0
- data/lib/origami/reference.rb +134 -0
- data/lib/origami/signature.rb +702 -0
- data/lib/origami/stream.rb +705 -0
- data/lib/origami/string.rb +444 -0
- data/lib/origami/template/patterns.rb +56 -0
- data/lib/origami/template/widgets.rb +151 -0
- data/lib/origami/trailer.rb +190 -0
- data/lib/origami/tree.rb +62 -0
- data/lib/origami/version.rb +23 -0
- data/lib/origami/webcapture.rb +100 -0
- data/lib/origami/xfa/config.rb +453 -0
- data/lib/origami/xfa/connectionset.rb +146 -0
- data/lib/origami/xfa/datasets.rb +49 -0
- data/lib/origami/xfa/localeset.rb +42 -0
- data/lib/origami/xfa/package.rb +59 -0
- data/lib/origami/xfa/pdf.rb +73 -0
- data/lib/origami/xfa/signature.rb +42 -0
- data/lib/origami/xfa/sourceset.rb +43 -0
- data/lib/origami/xfa/stylesheet.rb +44 -0
- data/lib/origami/xfa/template.rb +1691 -0
- data/lib/origami/xfa/xdc.rb +42 -0
- data/lib/origami/xfa/xfa.rb +146 -0
- data/lib/origami/xfa/xfdf.rb +43 -0
- data/lib/origami/xfa/xmpmeta.rb +43 -0
- data/lib/origami/xfa.rb +62 -0
- data/lib/origami/xreftable.rb +557 -0
- data/lib/origami.rb +47 -0
- data/test/dataset/calc.pdf +85 -0
- data/test/dataset/crypto.pdf +36 -0
- data/test/dataset/empty.pdf +49 -0
- data/test/test_actions.rb +27 -0
- data/test/test_annotations.rb +68 -0
- data/test/test_forms.rb +30 -0
- data/test/test_native_types.rb +83 -0
- data/test/test_object_tree.rb +33 -0
- data/test/test_pages.rb +60 -0
- data/test/test_pdf.rb +20 -0
- data/test/test_pdf_attachment.rb +34 -0
- data/test/test_pdf_create.rb +24 -0
- data/test/test_pdf_encrypt.rb +102 -0
- data/test/test_pdf_parse.rb +134 -0
- data/test/test_pdf_parse_lazy.rb +69 -0
- data/test/test_pdf_sign.rb +97 -0
- data/test/test_streams.rb +184 -0
- data/test/test_xrefs.rb +67 -0
- metadata +280 -0
@@ -0,0 +1,269 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
This file is part of Origami, PDF manipulation framework for Ruby
|
4
|
+
Copyright (C) 2016 Guillaume Delugré.
|
5
|
+
|
6
|
+
Origami is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU Lesser General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Origami is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU Lesser General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU Lesser General Public License
|
17
|
+
along with Origami. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
=end
|
20
|
+
|
21
|
+
require 'rainbow'
|
22
|
+
require 'strscan'
|
23
|
+
|
24
|
+
module Origami
|
25
|
+
|
26
|
+
class Parser #:nodoc:
|
27
|
+
|
28
|
+
class ParsingError < Error #:nodoc:
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# Do not output debug information.
|
33
|
+
#
|
34
|
+
VERBOSE_QUIET = 0
|
35
|
+
|
36
|
+
#
|
37
|
+
# Output some useful information.
|
38
|
+
#
|
39
|
+
VERBOSE_INFO = 1
|
40
|
+
|
41
|
+
#
|
42
|
+
# Output debug information.
|
43
|
+
#
|
44
|
+
VERBOSE_DEBUG = 2
|
45
|
+
|
46
|
+
#
|
47
|
+
# Output every objects read
|
48
|
+
#
|
49
|
+
VERBOSE_TRACE = 3
|
50
|
+
|
51
|
+
attr_accessor :options
|
52
|
+
|
53
|
+
def initialize(options = {}) #:nodoc:
|
54
|
+
# Type information for indirect objects.
|
55
|
+
@deferred_casts = {}
|
56
|
+
|
57
|
+
#Default options values
|
58
|
+
@options =
|
59
|
+
{
|
60
|
+
verbosity: VERBOSE_INFO, # Verbose level.
|
61
|
+
ignore_errors: true, # Try to keep on parsing when errors occur.
|
62
|
+
callback: Proc.new {}, # Callback procedure whenever a structure is read.
|
63
|
+
logger: STDERR, # Where to output parser messages.
|
64
|
+
colorize_log: true # Colorize parser output?
|
65
|
+
}
|
66
|
+
|
67
|
+
@options.update(options)
|
68
|
+
@logger = @options[:logger]
|
69
|
+
@data = nil
|
70
|
+
end
|
71
|
+
|
72
|
+
def pos
|
73
|
+
raise RuntimeError, "Cannot get position, parser has no loaded data." if @data.nil?
|
74
|
+
|
75
|
+
@data.pos
|
76
|
+
end
|
77
|
+
|
78
|
+
def pos=(offset)
|
79
|
+
raise RuntimeError, "Cannot set position, parser has no loaded data." if @data.nil?
|
80
|
+
|
81
|
+
@data.pos = offset
|
82
|
+
end
|
83
|
+
|
84
|
+
def parse(stream)
|
85
|
+
data =
|
86
|
+
if stream.respond_to? :read
|
87
|
+
StringScanner.new(stream.read.force_encoding('binary'))
|
88
|
+
elsif stream.is_a? ::String
|
89
|
+
@filename = stream
|
90
|
+
StringScanner.new(File.binread(@filename))
|
91
|
+
elsif stream.is_a? StringScanner
|
92
|
+
stream
|
93
|
+
else
|
94
|
+
raise TypeError
|
95
|
+
end
|
96
|
+
|
97
|
+
@data = data
|
98
|
+
@data.pos = 0
|
99
|
+
end
|
100
|
+
|
101
|
+
def parse_object(pos = @data.pos) #:nodoc:
|
102
|
+
@data.pos = pos
|
103
|
+
|
104
|
+
begin
|
105
|
+
obj = Object.parse(@data, self)
|
106
|
+
return if obj.nil?
|
107
|
+
|
108
|
+
obj = try_object_promotion(obj)
|
109
|
+
trace "Read #{obj.type} object, #{obj.reference}"
|
110
|
+
|
111
|
+
@options[:callback].call(obj)
|
112
|
+
obj
|
113
|
+
|
114
|
+
rescue UnterminatedObjectError
|
115
|
+
error $!.message
|
116
|
+
obj = $!.obj
|
117
|
+
|
118
|
+
Object.skip_until_next_obj(@data)
|
119
|
+
@options[:callback].call(obj)
|
120
|
+
obj
|
121
|
+
|
122
|
+
rescue
|
123
|
+
error "Breaking on: #{(@data.peek(10) + "...").inspect} at offset 0x#{@data.pos.to_s(16)}"
|
124
|
+
error "Last exception: [#{$!.class}] #{$!.message}"
|
125
|
+
if not @options[:ignore_errors]
|
126
|
+
error "Manually fix the file or set :ignore_errors parameter."
|
127
|
+
raise
|
128
|
+
end
|
129
|
+
|
130
|
+
debug 'Skipping this indirect object.'
|
131
|
+
raise if not Object.skip_until_next_obj(@data)
|
132
|
+
|
133
|
+
retry
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def parse_xreftable(pos = @data.pos) #:nodoc:
|
138
|
+
@data.pos = pos
|
139
|
+
|
140
|
+
begin
|
141
|
+
info "...Parsing xref table..."
|
142
|
+
xreftable = XRef::Section.parse(@data)
|
143
|
+
@options[:callback].call(xreftable)
|
144
|
+
|
145
|
+
xreftable
|
146
|
+
|
147
|
+
rescue
|
148
|
+
debug "Exception caught while parsing xref table : " + $!.message
|
149
|
+
warn "Unable to parse xref table! Xrefs might be stored into an XRef stream."
|
150
|
+
|
151
|
+
@data.pos -= 'trailer'.length unless @data.skip_until(/trailer/).nil?
|
152
|
+
|
153
|
+
nil
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def parse_trailer(pos = @data.pos) #:nodoc:
|
158
|
+
@data.pos = pos
|
159
|
+
|
160
|
+
begin
|
161
|
+
info "...Parsing trailer..."
|
162
|
+
trailer = Trailer.parse(@data, self)
|
163
|
+
|
164
|
+
@options[:callback].call(trailer)
|
165
|
+
trailer
|
166
|
+
|
167
|
+
rescue
|
168
|
+
debug "Exception caught while parsing trailer : " + $!.message
|
169
|
+
warn "Unable to parse trailer!"
|
170
|
+
|
171
|
+
raise
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def defer_type_cast(reference, type) #:nodoc:
|
176
|
+
@deferred_casts[reference] = type
|
177
|
+
end
|
178
|
+
|
179
|
+
def target_filename
|
180
|
+
@filename
|
181
|
+
end
|
182
|
+
|
183
|
+
def target_filesize
|
184
|
+
@data.string.size if @data
|
185
|
+
end
|
186
|
+
|
187
|
+
def target_data
|
188
|
+
@data.string.dup if @data
|
189
|
+
end
|
190
|
+
|
191
|
+
def error(msg = "") #:nodoc:
|
192
|
+
log(VERBOSE_QUIET, 'error', :red, msg)
|
193
|
+
end
|
194
|
+
|
195
|
+
def warn(msg = "") #:nodoc:
|
196
|
+
log(VERBOSE_INFO, 'warn ', :yellow, msg)
|
197
|
+
end
|
198
|
+
|
199
|
+
def info(msg = "") #:nodoc:
|
200
|
+
log(VERBOSE_INFO, 'info ', :green, msg)
|
201
|
+
end
|
202
|
+
|
203
|
+
def debug(msg = "") #:nodoc:
|
204
|
+
log(VERBOSE_DEBUG, 'debug', :magenta, msg)
|
205
|
+
end
|
206
|
+
|
207
|
+
def trace(msg = "") #:nodoc:
|
208
|
+
log(VERBOSE_TRACE, 'trace', :cyan, msg)
|
209
|
+
end
|
210
|
+
|
211
|
+
def self.init_scanner(stream)
|
212
|
+
if stream.is_a?(StringScanner)
|
213
|
+
stream
|
214
|
+
elsif stream.respond_to?(:to_str)
|
215
|
+
StringScanner.new(stream.to_str)
|
216
|
+
else
|
217
|
+
raise TypeError, "Cannot initialize scanner from #{stream.class}"
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
private
|
222
|
+
|
223
|
+
#
|
224
|
+
# Attempt to promote an object using the deferred casts.
|
225
|
+
#
|
226
|
+
def try_object_promotion(obj)
|
227
|
+
return obj unless Origami::OPTIONS[:enable_type_propagation] and @deferred_casts.key?(obj.reference)
|
228
|
+
|
229
|
+
types = @deferred_casts[obj.reference]
|
230
|
+
types = [ types ] unless types.is_a?(::Array)
|
231
|
+
|
232
|
+
# Promote object if a compatible type is found.
|
233
|
+
cast_type = types.find {|type| type < obj.class }
|
234
|
+
if cast_type
|
235
|
+
obj = obj.cast_to(cast_type, self)
|
236
|
+
else
|
237
|
+
obj
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
def log(level, prefix, color, message) #:nodoc:
|
242
|
+
return unless @options[:verbosity] >= level
|
243
|
+
|
244
|
+
if @options[:colorize_log]
|
245
|
+
@logger.print Rainbow("[#{prefix}]").color(color)
|
246
|
+
@logger.puts " #{message}"
|
247
|
+
else
|
248
|
+
@logger.puts "[#{prefix}] #{message}"
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def propagate_types(document)
|
253
|
+
info "...Propagating types..."
|
254
|
+
|
255
|
+
current_state = nil
|
256
|
+
until current_state == @deferred_casts
|
257
|
+
current_state = @deferred_casts.clone
|
258
|
+
|
259
|
+
current_state.each_pair do |ref, type|
|
260
|
+
type = [ type ] unless type.is_a?(::Array)
|
261
|
+
type.each do |hint|
|
262
|
+
break if document.cast_object(ref, hint)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
This file is part of Origami, PDF manipulation framework for Ruby
|
4
|
+
Copyright (C) 2016 Guillaume Delugré.
|
5
|
+
|
6
|
+
Origami is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU Lesser General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Origami is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU Lesser General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU Lesser General Public License
|
17
|
+
along with Origami. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
=end
|
20
|
+
|
21
|
+
require 'origami/parser'
|
22
|
+
|
23
|
+
module Origami
|
24
|
+
|
25
|
+
class FDF
|
26
|
+
class Parser < Origami::Parser
|
27
|
+
def parse(stream) #:nodoc:
|
28
|
+
super(stream)
|
29
|
+
|
30
|
+
fdf = FDF.new(self)
|
31
|
+
fdf.header = FDF::Header.parse(@data)
|
32
|
+
@options[:callback].call(fdf.header)
|
33
|
+
|
34
|
+
loop do
|
35
|
+
break if (object = parse_object).nil?
|
36
|
+
fdf.insert(object)
|
37
|
+
end
|
38
|
+
|
39
|
+
fdf.revisions.first.xreftable = parse_xreftable
|
40
|
+
fdf.revisions.first.trailer = parse_trailer
|
41
|
+
|
42
|
+
if Origami::OPTIONS[:enable_type_propagation]
|
43
|
+
trailer = fdf.revisions.first.trailer
|
44
|
+
|
45
|
+
if trailer[:Root].is_a?(Reference)
|
46
|
+
fdf.cast_object(trailer[:Root], FDF::Catalog)
|
47
|
+
end
|
48
|
+
|
49
|
+
propagate_types(fdf)
|
50
|
+
end
|
51
|
+
|
52
|
+
fdf
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,176 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
This file is part of Origami, PDF manipulation framework for Ruby
|
4
|
+
Copyright (C) 2016 Guillaume Delugré.
|
5
|
+
|
6
|
+
Origami is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU Lesser General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Origami is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU Lesser General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU Lesser General Public License
|
17
|
+
along with Origami. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
=end
|
20
|
+
|
21
|
+
|
22
|
+
require 'origami/parsers/pdf'
|
23
|
+
|
24
|
+
module Origami
|
25
|
+
|
26
|
+
class PDF
|
27
|
+
|
28
|
+
#
|
29
|
+
# Create a new PDF lazy Parser.
|
30
|
+
#
|
31
|
+
class LazyParser < Parser
|
32
|
+
def parse(stream)
|
33
|
+
super
|
34
|
+
|
35
|
+
pdf = parse_initialize
|
36
|
+
revisions = []
|
37
|
+
|
38
|
+
# Locate the last xref offset at the end of the file.
|
39
|
+
xref_offset = locate_last_xref_offset
|
40
|
+
|
41
|
+
while xref_offset and xref_offset != 0
|
42
|
+
|
43
|
+
# Create a new revision based on the xref section offset.
|
44
|
+
revision = parse_revision(pdf, xref_offset)
|
45
|
+
|
46
|
+
# Locate the previous xref section.
|
47
|
+
if revision.xrefstm? and revision.xrefstm[:Prev].is_a?(Integer)
|
48
|
+
xref_offset = revision.xrefstm[:Prev].to_i
|
49
|
+
elsif revision.trailer[:Prev].is_a?(Integer)
|
50
|
+
xref_offset = revision.trailer[:Prev].to_i
|
51
|
+
else
|
52
|
+
xref_offset = nil
|
53
|
+
end
|
54
|
+
|
55
|
+
# Prepend the revision.
|
56
|
+
revisions.unshift(revision)
|
57
|
+
end
|
58
|
+
|
59
|
+
pdf.revisions.clear
|
60
|
+
revisions.each do |rev|
|
61
|
+
pdf.revisions.push(rev)
|
62
|
+
pdf.insert(rev.xrefstm) if rev.xrefstm?
|
63
|
+
end
|
64
|
+
|
65
|
+
parse_finalize(pdf)
|
66
|
+
|
67
|
+
pdf
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
#
|
73
|
+
# The document is scanned starting from the end, by locating the last startxref token.
|
74
|
+
#
|
75
|
+
def locate_last_xref_offset
|
76
|
+
# Set the scanner position at the end.
|
77
|
+
@data.terminate
|
78
|
+
|
79
|
+
# Locate the startxref token.
|
80
|
+
until @data.match?(/#{Trailer::XREF_TOKEN}/)
|
81
|
+
raise ParsingError, "No xref token found" if @data.pos == 0
|
82
|
+
@data.pos -= 1
|
83
|
+
end
|
84
|
+
|
85
|
+
# Extract the offset of the last xref section.
|
86
|
+
trailer = Trailer.parse(@data, self)
|
87
|
+
raise ParsingError, "Cannot locate xref section" if trailer.startxref.zero?
|
88
|
+
|
89
|
+
trailer.startxref
|
90
|
+
end
|
91
|
+
|
92
|
+
#
|
93
|
+
# In the LazyParser, the revisions are parsed by jumping through the cross-references (table or streams).
|
94
|
+
#
|
95
|
+
def parse_revision(pdf, offset)
|
96
|
+
raise ParsingError, "Invalid xref offset" unless offset.between?(0, @data.string.size - 1)
|
97
|
+
|
98
|
+
@data.pos = offset
|
99
|
+
|
100
|
+
# Create a new revision.
|
101
|
+
revision = PDF::Revision.new(pdf)
|
102
|
+
|
103
|
+
# Regular xref section.
|
104
|
+
if @data.match?(/#{XRef::Section::TOKEN}/)
|
105
|
+
parse_revision_from_xreftable(revision)
|
106
|
+
|
107
|
+
# The xrefs are stored in a stream.
|
108
|
+
else
|
109
|
+
parse_revision_from_xrefstm(revision)
|
110
|
+
end
|
111
|
+
|
112
|
+
revision
|
113
|
+
end
|
114
|
+
|
115
|
+
#
|
116
|
+
# Assume the current pointer is at the xreftable of the revision.
|
117
|
+
# We are expecting:
|
118
|
+
# - a regular xref table, starting with xref
|
119
|
+
# - a revision trailer
|
120
|
+
#
|
121
|
+
# The trailer may hold a XRefStm entry in case of hybrid references.
|
122
|
+
#
|
123
|
+
def parse_revision_from_xreftable(revision)
|
124
|
+
xreftable = parse_xreftable
|
125
|
+
raise ParsingError, "Cannot parse xref section" if xreftable.nil?
|
126
|
+
|
127
|
+
revision.xreftable = xreftable
|
128
|
+
revision.trailer = parse_trailer
|
129
|
+
|
130
|
+
# Handle hybrid cross-references.
|
131
|
+
if revision.trailer[:XRefStm].is_a?(Integer)
|
132
|
+
begin
|
133
|
+
offset = revision.trailer[:XRefStm].to_i
|
134
|
+
xrefstm = parse_object(offset)
|
135
|
+
|
136
|
+
if xrefstm.is_a?(XRefStream)
|
137
|
+
revision.xrefstm = xrefstm
|
138
|
+
else
|
139
|
+
warn "Invalid xref stream at offset #{offset}"
|
140
|
+
end
|
141
|
+
|
142
|
+
rescue
|
143
|
+
warn "Cannot parse xref stream at offset #{offset}"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
#
|
149
|
+
# Assume the current pointer is at the xref stream of the revision.
|
150
|
+
#
|
151
|
+
# The XRefStream should normally be at the end of the revision.
|
152
|
+
# We scan after the object for a trailer token.
|
153
|
+
#
|
154
|
+
# The revision is allowed not to have a trailer, and the stream
|
155
|
+
# dictionary will be used as the trailer dictionary in that case.
|
156
|
+
#
|
157
|
+
def parse_revision_from_xrefstm(revision)
|
158
|
+
xrefstm = parse_object
|
159
|
+
raise ParsingError, "Invalid xref stream" unless xrefstm.is_a?(XRefStream)
|
160
|
+
|
161
|
+
revision.xrefstm = xrefstm
|
162
|
+
|
163
|
+
# Search for the trailer.
|
164
|
+
if @data.skip_until Regexp.union(Trailer::XREF_TOKEN, *Trailer::TOKENS)
|
165
|
+
@data.pos -= @data.matched_size
|
166
|
+
|
167
|
+
revision.trailer = parse_trailer
|
168
|
+
else
|
169
|
+
warn "No trailer found."
|
170
|
+
revision.trailer = Trailer.new
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
This file is part of Origami, PDF manipulation framework for Ruby
|
4
|
+
Copyright (C) 2016 Guillaume Delugré.
|
5
|
+
|
6
|
+
Origami is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU Lesser General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Origami is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU Lesser General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU Lesser General Public License
|
17
|
+
along with Origami. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
=end
|
20
|
+
|
21
|
+
|
22
|
+
require 'origami/parsers/pdf'
|
23
|
+
|
24
|
+
module Origami
|
25
|
+
|
26
|
+
class PDF
|
27
|
+
|
28
|
+
#
|
29
|
+
# Create a new PDF linear Parser.
|
30
|
+
#
|
31
|
+
class LinearParser < Parser
|
32
|
+
def parse(stream)
|
33
|
+
super
|
34
|
+
|
35
|
+
pdf = parse_initialize
|
36
|
+
|
37
|
+
#
|
38
|
+
# Parse each revision
|
39
|
+
#
|
40
|
+
revision = 0
|
41
|
+
until @data.eos? do
|
42
|
+
begin
|
43
|
+
pdf.add_new_revision unless revision.zero?
|
44
|
+
|
45
|
+
parse_revision(pdf, revision)
|
46
|
+
revision = revision + 1
|
47
|
+
|
48
|
+
rescue
|
49
|
+
error "Cannot read : " + (@data.peek(10) + "...").inspect
|
50
|
+
error "Stopped on exception : " + $!.message
|
51
|
+
STDERR.puts $!.backtrace.join($/)
|
52
|
+
|
53
|
+
break
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
pdf.loaded!
|
58
|
+
|
59
|
+
parse_finalize(pdf)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def parse_revision(pdf, revision_no)
|
65
|
+
revision = pdf.revisions[revision_no]
|
66
|
+
|
67
|
+
info "...Parsing revision #{revision_no + 1}..."
|
68
|
+
loop do
|
69
|
+
break if (object = parse_object).nil?
|
70
|
+
pdf.insert(object)
|
71
|
+
end
|
72
|
+
|
73
|
+
revision.xreftable = parse_xreftable
|
74
|
+
revision.trailer = parse_trailer
|
75
|
+
|
76
|
+
locate_xref_streams(pdf, revision_no)
|
77
|
+
|
78
|
+
revision
|
79
|
+
end
|
80
|
+
|
81
|
+
def locate_xref_streams(pdf, revision_no)
|
82
|
+
revision = pdf.revisions[revision_no]
|
83
|
+
trailer = revision.trailer
|
84
|
+
xrefstm = nil
|
85
|
+
|
86
|
+
# Try to match the location of the last startxref / XRefStm with an XRefStream.
|
87
|
+
if trailer.startxref != 0
|
88
|
+
xrefstm = pdf.get_object_by_offset(trailer.startxref)
|
89
|
+
elsif trailer.key?(:XRefStm)
|
90
|
+
xrefstm = pdf.get_object_by_offset(trailer[:XRefStm])
|
91
|
+
end
|
92
|
+
|
93
|
+
if xrefstm.is_a?(XRefStream)
|
94
|
+
warn "Found a XRefStream for revision #{revision_no + 1} at #{xrefstm.reference}"
|
95
|
+
revision.xrefstm = xrefstm
|
96
|
+
|
97
|
+
if xrefstm.key?(:Prev)
|
98
|
+
locate_prev_xref_streams(pdf, revision_no, xrefstm)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def locate_prev_xref_streams(pdf, revision_no, xrefstm)
|
104
|
+
return unless revision_no > 0 and xrefstm.Prev.is_a?(Integer)
|
105
|
+
|
106
|
+
prev_revision = pdf.revisions[revision_no - 1]
|
107
|
+
prev_offset = xrefstm.Prev.to_i
|
108
|
+
prev_xrefstm = pdf.get_object_by_offset(prev_offset)
|
109
|
+
|
110
|
+
if prev_xrefstm.is_a?(XRefStream)
|
111
|
+
warn "Found a previous XRefStream for revision #{revision_no} at #{prev_xrefstm.reference}"
|
112
|
+
prev_revision.xrefstm = prev_xrefstm
|
113
|
+
|
114
|
+
if prev_xrefstm.key?(:Prev)
|
115
|
+
locate_prev_xref_streams(pdf, revision_no - 1, prev_xrefstm)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|