origamindee 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +89 -0
  3. data/COPYING.LESSER +165 -0
  4. data/README.md +131 -0
  5. data/bin/config/pdfcop.conf.yml +236 -0
  6. data/bin/pdf2pdfa +87 -0
  7. data/bin/pdf2ruby +333 -0
  8. data/bin/pdfcop +476 -0
  9. data/bin/pdfdecompress +97 -0
  10. data/bin/pdfdecrypt +91 -0
  11. data/bin/pdfencrypt +113 -0
  12. data/bin/pdfexplode +223 -0
  13. data/bin/pdfextract +277 -0
  14. data/bin/pdfmetadata +143 -0
  15. data/bin/pdfsh +12 -0
  16. data/bin/shell/console.rb +128 -0
  17. data/bin/shell/hexdump.rb +59 -0
  18. data/bin/shell/irbrc +69 -0
  19. data/examples/README.md +34 -0
  20. data/examples/attachments/attachment.rb +38 -0
  21. data/examples/attachments/nested_document.rb +51 -0
  22. data/examples/encryption/encryption.rb +28 -0
  23. data/examples/events/events.rb +72 -0
  24. data/examples/flash/flash.rb +37 -0
  25. data/examples/flash/helloworld.swf +0 -0
  26. data/examples/forms/javascript.rb +54 -0
  27. data/examples/forms/xfa.rb +115 -0
  28. data/examples/javascript/hello_world.rb +22 -0
  29. data/examples/javascript/js_emulation.rb +54 -0
  30. data/examples/loop/goto.rb +32 -0
  31. data/examples/loop/named.rb +33 -0
  32. data/examples/signature/signature.rb +65 -0
  33. data/examples/uri/javascript.rb +56 -0
  34. data/examples/uri/open-uri.rb +21 -0
  35. data/examples/uri/submitform.rb +47 -0
  36. data/lib/origami/3d.rb +364 -0
  37. data/lib/origami/acroform.rb +321 -0
  38. data/lib/origami/actions.rb +318 -0
  39. data/lib/origami/annotations.rb +711 -0
  40. data/lib/origami/array.rb +242 -0
  41. data/lib/origami/boolean.rb +90 -0
  42. data/lib/origami/catalog.rb +418 -0
  43. data/lib/origami/collections.rb +144 -0
  44. data/lib/origami/compound.rb +161 -0
  45. data/lib/origami/destinations.rb +252 -0
  46. data/lib/origami/dictionary.rb +192 -0
  47. data/lib/origami/encryption.rb +1084 -0
  48. data/lib/origami/extensions/fdf.rb +347 -0
  49. data/lib/origami/extensions/ppklite.rb +422 -0
  50. data/lib/origami/filespec.rb +197 -0
  51. data/lib/origami/filters/ascii.rb +211 -0
  52. data/lib/origami/filters/ccitt/tables.rb +267 -0
  53. data/lib/origami/filters/ccitt.rb +357 -0
  54. data/lib/origami/filters/crypt.rb +38 -0
  55. data/lib/origami/filters/dct.rb +54 -0
  56. data/lib/origami/filters/flate.rb +69 -0
  57. data/lib/origami/filters/jbig2.rb +57 -0
  58. data/lib/origami/filters/jpx.rb +47 -0
  59. data/lib/origami/filters/lzw.rb +170 -0
  60. data/lib/origami/filters/predictors.rb +292 -0
  61. data/lib/origami/filters/runlength.rb +129 -0
  62. data/lib/origami/filters.rb +364 -0
  63. data/lib/origami/font.rb +196 -0
  64. data/lib/origami/functions.rb +79 -0
  65. data/lib/origami/graphics/colors.rb +230 -0
  66. data/lib/origami/graphics/instruction.rb +98 -0
  67. data/lib/origami/graphics/path.rb +182 -0
  68. data/lib/origami/graphics/patterns.rb +174 -0
  69. data/lib/origami/graphics/render.rb +62 -0
  70. data/lib/origami/graphics/state.rb +149 -0
  71. data/lib/origami/graphics/text.rb +225 -0
  72. data/lib/origami/graphics/xobject.rb +918 -0
  73. data/lib/origami/graphics.rb +38 -0
  74. data/lib/origami/header.rb +75 -0
  75. data/lib/origami/javascript.rb +713 -0
  76. data/lib/origami/linearization.rb +330 -0
  77. data/lib/origami/metadata.rb +172 -0
  78. data/lib/origami/name.rb +135 -0
  79. data/lib/origami/null.rb +65 -0
  80. data/lib/origami/numeric.rb +181 -0
  81. data/lib/origami/obfuscation.rb +245 -0
  82. data/lib/origami/object.rb +760 -0
  83. data/lib/origami/optionalcontent.rb +183 -0
  84. data/lib/origami/outline.rb +54 -0
  85. data/lib/origami/outputintents.rb +85 -0
  86. data/lib/origami/page.rb +722 -0
  87. data/lib/origami/parser.rb +269 -0
  88. data/lib/origami/parsers/fdf.rb +56 -0
  89. data/lib/origami/parsers/pdf/lazy.rb +176 -0
  90. data/lib/origami/parsers/pdf/linear.rb +122 -0
  91. data/lib/origami/parsers/pdf.rb +118 -0
  92. data/lib/origami/parsers/ppklite.rb +57 -0
  93. data/lib/origami/pdf.rb +1108 -0
  94. data/lib/origami/reference.rb +134 -0
  95. data/lib/origami/signature.rb +702 -0
  96. data/lib/origami/stream.rb +705 -0
  97. data/lib/origami/string.rb +444 -0
  98. data/lib/origami/template/patterns.rb +56 -0
  99. data/lib/origami/template/widgets.rb +151 -0
  100. data/lib/origami/trailer.rb +190 -0
  101. data/lib/origami/tree.rb +62 -0
  102. data/lib/origami/version.rb +23 -0
  103. data/lib/origami/webcapture.rb +100 -0
  104. data/lib/origami/xfa/config.rb +453 -0
  105. data/lib/origami/xfa/connectionset.rb +146 -0
  106. data/lib/origami/xfa/datasets.rb +49 -0
  107. data/lib/origami/xfa/localeset.rb +42 -0
  108. data/lib/origami/xfa/package.rb +59 -0
  109. data/lib/origami/xfa/pdf.rb +73 -0
  110. data/lib/origami/xfa/signature.rb +42 -0
  111. data/lib/origami/xfa/sourceset.rb +43 -0
  112. data/lib/origami/xfa/stylesheet.rb +44 -0
  113. data/lib/origami/xfa/template.rb +1691 -0
  114. data/lib/origami/xfa/xdc.rb +42 -0
  115. data/lib/origami/xfa/xfa.rb +146 -0
  116. data/lib/origami/xfa/xfdf.rb +43 -0
  117. data/lib/origami/xfa/xmpmeta.rb +43 -0
  118. data/lib/origami/xfa.rb +62 -0
  119. data/lib/origami/xreftable.rb +557 -0
  120. data/lib/origami.rb +47 -0
  121. data/test/dataset/calc.pdf +85 -0
  122. data/test/dataset/crypto.pdf +36 -0
  123. data/test/dataset/empty.pdf +49 -0
  124. data/test/test_actions.rb +27 -0
  125. data/test/test_annotations.rb +68 -0
  126. data/test/test_forms.rb +30 -0
  127. data/test/test_native_types.rb +83 -0
  128. data/test/test_object_tree.rb +33 -0
  129. data/test/test_pages.rb +60 -0
  130. data/test/test_pdf.rb +20 -0
  131. data/test/test_pdf_attachment.rb +34 -0
  132. data/test/test_pdf_create.rb +24 -0
  133. data/test/test_pdf_encrypt.rb +102 -0
  134. data/test/test_pdf_parse.rb +134 -0
  135. data/test/test_pdf_parse_lazy.rb +69 -0
  136. data/test/test_pdf_sign.rb +97 -0
  137. data/test/test_streams.rb +184 -0
  138. data/test/test_xrefs.rb +67 -0
  139. metadata +280 -0
@@ -0,0 +1,269 @@
1
+ =begin
2
+
3
+ This file is part of Origami, PDF manipulation framework for Ruby
4
+ Copyright (C) 2016 Guillaume Delugré.
5
+
6
+ Origami is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU Lesser General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Origami is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public License
17
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ =end
20
+
21
+ require 'rainbow'
22
+ require 'strscan'
23
+
24
+ module Origami
25
+
26
+ class Parser #:nodoc:
27
+
28
+ class ParsingError < Error #:nodoc:
29
+ end
30
+
31
+ #
32
+ # Do not output debug information.
33
+ #
34
+ VERBOSE_QUIET = 0
35
+
36
+ #
37
+ # Output some useful information.
38
+ #
39
+ VERBOSE_INFO = 1
40
+
41
+ #
42
+ # Output debug information.
43
+ #
44
+ VERBOSE_DEBUG = 2
45
+
46
+ #
47
+ # Output every objects read
48
+ #
49
+ VERBOSE_TRACE = 3
50
+
51
+ attr_accessor :options
52
+
53
+ def initialize(options = {}) #:nodoc:
54
+ # Type information for indirect objects.
55
+ @deferred_casts = {}
56
+
57
+ #Default options values
58
+ @options =
59
+ {
60
+ verbosity: VERBOSE_INFO, # Verbose level.
61
+ ignore_errors: true, # Try to keep on parsing when errors occur.
62
+ callback: Proc.new {}, # Callback procedure whenever a structure is read.
63
+ logger: STDERR, # Where to output parser messages.
64
+ colorize_log: true # Colorize parser output?
65
+ }
66
+
67
+ @options.update(options)
68
+ @logger = @options[:logger]
69
+ @data = nil
70
+ end
71
+
72
+ def pos
73
+ raise RuntimeError, "Cannot get position, parser has no loaded data." if @data.nil?
74
+
75
+ @data.pos
76
+ end
77
+
78
+ def pos=(offset)
79
+ raise RuntimeError, "Cannot set position, parser has no loaded data." if @data.nil?
80
+
81
+ @data.pos = offset
82
+ end
83
+
84
+ def parse(stream)
85
+ data =
86
+ if stream.respond_to? :read
87
+ StringScanner.new(stream.read.force_encoding('binary'))
88
+ elsif stream.is_a? ::String
89
+ @filename = stream
90
+ StringScanner.new(File.binread(@filename))
91
+ elsif stream.is_a? StringScanner
92
+ stream
93
+ else
94
+ raise TypeError
95
+ end
96
+
97
+ @data = data
98
+ @data.pos = 0
99
+ end
100
+
101
+ def parse_object(pos = @data.pos) #:nodoc:
102
+ @data.pos = pos
103
+
104
+ begin
105
+ obj = Object.parse(@data, self)
106
+ return if obj.nil?
107
+
108
+ obj = try_object_promotion(obj)
109
+ trace "Read #{obj.type} object, #{obj.reference}"
110
+
111
+ @options[:callback].call(obj)
112
+ obj
113
+
114
+ rescue UnterminatedObjectError
115
+ error $!.message
116
+ obj = $!.obj
117
+
118
+ Object.skip_until_next_obj(@data)
119
+ @options[:callback].call(obj)
120
+ obj
121
+
122
+ rescue
123
+ error "Breaking on: #{(@data.peek(10) + "...").inspect} at offset 0x#{@data.pos.to_s(16)}"
124
+ error "Last exception: [#{$!.class}] #{$!.message}"
125
+ if not @options[:ignore_errors]
126
+ error "Manually fix the file or set :ignore_errors parameter."
127
+ raise
128
+ end
129
+
130
+ debug 'Skipping this indirect object.'
131
+ raise if not Object.skip_until_next_obj(@data)
132
+
133
+ retry
134
+ end
135
+ end
136
+
137
+ def parse_xreftable(pos = @data.pos) #:nodoc:
138
+ @data.pos = pos
139
+
140
+ begin
141
+ info "...Parsing xref table..."
142
+ xreftable = XRef::Section.parse(@data)
143
+ @options[:callback].call(xreftable)
144
+
145
+ xreftable
146
+
147
+ rescue
148
+ debug "Exception caught while parsing xref table : " + $!.message
149
+ warn "Unable to parse xref table! Xrefs might be stored into an XRef stream."
150
+
151
+ @data.pos -= 'trailer'.length unless @data.skip_until(/trailer/).nil?
152
+
153
+ nil
154
+ end
155
+ end
156
+
157
+ def parse_trailer(pos = @data.pos) #:nodoc:
158
+ @data.pos = pos
159
+
160
+ begin
161
+ info "...Parsing trailer..."
162
+ trailer = Trailer.parse(@data, self)
163
+
164
+ @options[:callback].call(trailer)
165
+ trailer
166
+
167
+ rescue
168
+ debug "Exception caught while parsing trailer : " + $!.message
169
+ warn "Unable to parse trailer!"
170
+
171
+ raise
172
+ end
173
+ end
174
+
175
+ def defer_type_cast(reference, type) #:nodoc:
176
+ @deferred_casts[reference] = type
177
+ end
178
+
179
+ def target_filename
180
+ @filename
181
+ end
182
+
183
+ def target_filesize
184
+ @data.string.size if @data
185
+ end
186
+
187
+ def target_data
188
+ @data.string.dup if @data
189
+ end
190
+
191
+ def error(msg = "") #:nodoc:
192
+ log(VERBOSE_QUIET, 'error', :red, msg)
193
+ end
194
+
195
+ def warn(msg = "") #:nodoc:
196
+ log(VERBOSE_INFO, 'warn ', :yellow, msg)
197
+ end
198
+
199
+ def info(msg = "") #:nodoc:
200
+ log(VERBOSE_INFO, 'info ', :green, msg)
201
+ end
202
+
203
+ def debug(msg = "") #:nodoc:
204
+ log(VERBOSE_DEBUG, 'debug', :magenta, msg)
205
+ end
206
+
207
+ def trace(msg = "") #:nodoc:
208
+ log(VERBOSE_TRACE, 'trace', :cyan, msg)
209
+ end
210
+
211
+ def self.init_scanner(stream)
212
+ if stream.is_a?(StringScanner)
213
+ stream
214
+ elsif stream.respond_to?(:to_str)
215
+ StringScanner.new(stream.to_str)
216
+ else
217
+ raise TypeError, "Cannot initialize scanner from #{stream.class}"
218
+ end
219
+ end
220
+
221
+ private
222
+
223
+ #
224
+ # Attempt to promote an object using the deferred casts.
225
+ #
226
+ def try_object_promotion(obj)
227
+ return obj unless Origami::OPTIONS[:enable_type_propagation] and @deferred_casts.key?(obj.reference)
228
+
229
+ types = @deferred_casts[obj.reference]
230
+ types = [ types ] unless types.is_a?(::Array)
231
+
232
+ # Promote object if a compatible type is found.
233
+ cast_type = types.find {|type| type < obj.class }
234
+ if cast_type
235
+ obj = obj.cast_to(cast_type, self)
236
+ else
237
+ obj
238
+ end
239
+ end
240
+
241
+ def log(level, prefix, color, message) #:nodoc:
242
+ return unless @options[:verbosity] >= level
243
+
244
+ if @options[:colorize_log]
245
+ @logger.print Rainbow("[#{prefix}]").color(color)
246
+ @logger.puts " #{message}"
247
+ else
248
+ @logger.puts "[#{prefix}] #{message}"
249
+ end
250
+ end
251
+
252
+ def propagate_types(document)
253
+ info "...Propagating types..."
254
+
255
+ current_state = nil
256
+ until current_state == @deferred_casts
257
+ current_state = @deferred_casts.clone
258
+
259
+ current_state.each_pair do |ref, type|
260
+ type = [ type ] unless type.is_a?(::Array)
261
+ type.each do |hint|
262
+ break if document.cast_object(ref, hint)
263
+ end
264
+ end
265
+ end
266
+ end
267
+ end
268
+
269
+ end
@@ -0,0 +1,56 @@
1
+ =begin
2
+
3
+ This file is part of Origami, PDF manipulation framework for Ruby
4
+ Copyright (C) 2016 Guillaume Delugré.
5
+
6
+ Origami is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU Lesser General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Origami is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public License
17
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ =end
20
+
21
+ require 'origami/parser'
22
+
23
+ module Origami
24
+
25
+ class FDF
26
+ class Parser < Origami::Parser
27
+ def parse(stream) #:nodoc:
28
+ super(stream)
29
+
30
+ fdf = FDF.new(self)
31
+ fdf.header = FDF::Header.parse(@data)
32
+ @options[:callback].call(fdf.header)
33
+
34
+ loop do
35
+ break if (object = parse_object).nil?
36
+ fdf.insert(object)
37
+ end
38
+
39
+ fdf.revisions.first.xreftable = parse_xreftable
40
+ fdf.revisions.first.trailer = parse_trailer
41
+
42
+ if Origami::OPTIONS[:enable_type_propagation]
43
+ trailer = fdf.revisions.first.trailer
44
+
45
+ if trailer[:Root].is_a?(Reference)
46
+ fdf.cast_object(trailer[:Root], FDF::Catalog)
47
+ end
48
+
49
+ propagate_types(fdf)
50
+ end
51
+
52
+ fdf
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,176 @@
1
+ =begin
2
+
3
+ This file is part of Origami, PDF manipulation framework for Ruby
4
+ Copyright (C) 2016 Guillaume Delugré.
5
+
6
+ Origami is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU Lesser General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Origami is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public License
17
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ =end
20
+
21
+
22
+ require 'origami/parsers/pdf'
23
+
24
+ module Origami
25
+
26
+ class PDF
27
+
28
+ #
29
+ # Create a new PDF lazy Parser.
30
+ #
31
+ class LazyParser < Parser
32
+ def parse(stream)
33
+ super
34
+
35
+ pdf = parse_initialize
36
+ revisions = []
37
+
38
+ # Locate the last xref offset at the end of the file.
39
+ xref_offset = locate_last_xref_offset
40
+
41
+ while xref_offset and xref_offset != 0
42
+
43
+ # Create a new revision based on the xref section offset.
44
+ revision = parse_revision(pdf, xref_offset)
45
+
46
+ # Locate the previous xref section.
47
+ if revision.xrefstm? and revision.xrefstm[:Prev].is_a?(Integer)
48
+ xref_offset = revision.xrefstm[:Prev].to_i
49
+ elsif revision.trailer[:Prev].is_a?(Integer)
50
+ xref_offset = revision.trailer[:Prev].to_i
51
+ else
52
+ xref_offset = nil
53
+ end
54
+
55
+ # Prepend the revision.
56
+ revisions.unshift(revision)
57
+ end
58
+
59
+ pdf.revisions.clear
60
+ revisions.each do |rev|
61
+ pdf.revisions.push(rev)
62
+ pdf.insert(rev.xrefstm) if rev.xrefstm?
63
+ end
64
+
65
+ parse_finalize(pdf)
66
+
67
+ pdf
68
+ end
69
+
70
+ private
71
+
72
+ #
73
+ # The document is scanned starting from the end, by locating the last startxref token.
74
+ #
75
+ def locate_last_xref_offset
76
+ # Set the scanner position at the end.
77
+ @data.terminate
78
+
79
+ # Locate the startxref token.
80
+ until @data.match?(/#{Trailer::XREF_TOKEN}/)
81
+ raise ParsingError, "No xref token found" if @data.pos == 0
82
+ @data.pos -= 1
83
+ end
84
+
85
+ # Extract the offset of the last xref section.
86
+ trailer = Trailer.parse(@data, self)
87
+ raise ParsingError, "Cannot locate xref section" if trailer.startxref.zero?
88
+
89
+ trailer.startxref
90
+ end
91
+
92
+ #
93
+ # In the LazyParser, the revisions are parsed by jumping through the cross-references (table or streams).
94
+ #
95
+ def parse_revision(pdf, offset)
96
+ raise ParsingError, "Invalid xref offset" unless offset.between?(0, @data.string.size - 1)
97
+
98
+ @data.pos = offset
99
+
100
+ # Create a new revision.
101
+ revision = PDF::Revision.new(pdf)
102
+
103
+ # Regular xref section.
104
+ if @data.match?(/#{XRef::Section::TOKEN}/)
105
+ parse_revision_from_xreftable(revision)
106
+
107
+ # The xrefs are stored in a stream.
108
+ else
109
+ parse_revision_from_xrefstm(revision)
110
+ end
111
+
112
+ revision
113
+ end
114
+
115
+ #
116
+ # Assume the current pointer is at the xreftable of the revision.
117
+ # We are expecting:
118
+ # - a regular xref table, starting with xref
119
+ # - a revision trailer
120
+ #
121
+ # The trailer may hold a XRefStm entry in case of hybrid references.
122
+ #
123
+ def parse_revision_from_xreftable(revision)
124
+ xreftable = parse_xreftable
125
+ raise ParsingError, "Cannot parse xref section" if xreftable.nil?
126
+
127
+ revision.xreftable = xreftable
128
+ revision.trailer = parse_trailer
129
+
130
+ # Handle hybrid cross-references.
131
+ if revision.trailer[:XRefStm].is_a?(Integer)
132
+ begin
133
+ offset = revision.trailer[:XRefStm].to_i
134
+ xrefstm = parse_object(offset)
135
+
136
+ if xrefstm.is_a?(XRefStream)
137
+ revision.xrefstm = xrefstm
138
+ else
139
+ warn "Invalid xref stream at offset #{offset}"
140
+ end
141
+
142
+ rescue
143
+ warn "Cannot parse xref stream at offset #{offset}"
144
+ end
145
+ end
146
+ end
147
+
148
+ #
149
+ # Assume the current pointer is at the xref stream of the revision.
150
+ #
151
+ # The XRefStream should normally be at the end of the revision.
152
+ # We scan after the object for a trailer token.
153
+ #
154
+ # The revision is allowed not to have a trailer, and the stream
155
+ # dictionary will be used as the trailer dictionary in that case.
156
+ #
157
+ def parse_revision_from_xrefstm(revision)
158
+ xrefstm = parse_object
159
+ raise ParsingError, "Invalid xref stream" unless xrefstm.is_a?(XRefStream)
160
+
161
+ revision.xrefstm = xrefstm
162
+
163
+ # Search for the trailer.
164
+ if @data.skip_until Regexp.union(Trailer::XREF_TOKEN, *Trailer::TOKENS)
165
+ @data.pos -= @data.matched_size
166
+
167
+ revision.trailer = parse_trailer
168
+ else
169
+ warn "No trailer found."
170
+ revision.trailer = Trailer.new
171
+ end
172
+ end
173
+ end
174
+ end
175
+
176
+ end
@@ -0,0 +1,122 @@
1
+ =begin
2
+
3
+ This file is part of Origami, PDF manipulation framework for Ruby
4
+ Copyright (C) 2016 Guillaume Delugré.
5
+
6
+ Origami is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU Lesser General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Origami is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public License
17
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ =end
20
+
21
+
22
+ require 'origami/parsers/pdf'
23
+
24
+ module Origami
25
+
26
+ class PDF
27
+
28
+ #
29
+ # Create a new PDF linear Parser.
30
+ #
31
+ class LinearParser < Parser
32
+ def parse(stream)
33
+ super
34
+
35
+ pdf = parse_initialize
36
+
37
+ #
38
+ # Parse each revision
39
+ #
40
+ revision = 0
41
+ until @data.eos? do
42
+ begin
43
+ pdf.add_new_revision unless revision.zero?
44
+
45
+ parse_revision(pdf, revision)
46
+ revision = revision + 1
47
+
48
+ rescue
49
+ error "Cannot read : " + (@data.peek(10) + "...").inspect
50
+ error "Stopped on exception : " + $!.message
51
+ STDERR.puts $!.backtrace.join($/)
52
+
53
+ break
54
+ end
55
+ end
56
+
57
+ pdf.loaded!
58
+
59
+ parse_finalize(pdf)
60
+ end
61
+
62
+ private
63
+
64
+ def parse_revision(pdf, revision_no)
65
+ revision = pdf.revisions[revision_no]
66
+
67
+ info "...Parsing revision #{revision_no + 1}..."
68
+ loop do
69
+ break if (object = parse_object).nil?
70
+ pdf.insert(object)
71
+ end
72
+
73
+ revision.xreftable = parse_xreftable
74
+ revision.trailer = parse_trailer
75
+
76
+ locate_xref_streams(pdf, revision_no)
77
+
78
+ revision
79
+ end
80
+
81
+ def locate_xref_streams(pdf, revision_no)
82
+ revision = pdf.revisions[revision_no]
83
+ trailer = revision.trailer
84
+ xrefstm = nil
85
+
86
+ # Try to match the location of the last startxref / XRefStm with an XRefStream.
87
+ if trailer.startxref != 0
88
+ xrefstm = pdf.get_object_by_offset(trailer.startxref)
89
+ elsif trailer.key?(:XRefStm)
90
+ xrefstm = pdf.get_object_by_offset(trailer[:XRefStm])
91
+ end
92
+
93
+ if xrefstm.is_a?(XRefStream)
94
+ warn "Found a XRefStream for revision #{revision_no + 1} at #{xrefstm.reference}"
95
+ revision.xrefstm = xrefstm
96
+
97
+ if xrefstm.key?(:Prev)
98
+ locate_prev_xref_streams(pdf, revision_no, xrefstm)
99
+ end
100
+ end
101
+ end
102
+
103
+ def locate_prev_xref_streams(pdf, revision_no, xrefstm)
104
+ return unless revision_no > 0 and xrefstm.Prev.is_a?(Integer)
105
+
106
+ prev_revision = pdf.revisions[revision_no - 1]
107
+ prev_offset = xrefstm.Prev.to_i
108
+ prev_xrefstm = pdf.get_object_by_offset(prev_offset)
109
+
110
+ if prev_xrefstm.is_a?(XRefStream)
111
+ warn "Found a previous XRefStream for revision #{revision_no} at #{prev_xrefstm.reference}"
112
+ prev_revision.xrefstm = prev_xrefstm
113
+
114
+ if prev_xrefstm.key?(:Prev)
115
+ locate_prev_xref_streams(pdf, revision_no - 1, prev_xrefstm)
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ end