origami 1.2.7 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -0
  3. data/README.md +112 -0
  4. data/bin/config/pdfcop.conf.yml +232 -233
  5. data/bin/gui/about.rb +27 -37
  6. data/bin/gui/config.rb +108 -117
  7. data/bin/gui/file.rb +416 -365
  8. data/bin/gui/gtkhex.rb +1138 -1153
  9. data/bin/gui/hexview.rb +55 -57
  10. data/bin/gui/imgview.rb +48 -51
  11. data/bin/gui/menu.rb +388 -386
  12. data/bin/gui/properties.rb +114 -130
  13. data/bin/gui/signing.rb +571 -617
  14. data/bin/gui/textview.rb +77 -95
  15. data/bin/gui/treeview.rb +382 -387
  16. data/bin/gui/walker.rb +227 -232
  17. data/bin/gui/xrefs.rb +56 -60
  18. data/bin/pdf2pdfa +53 -57
  19. data/bin/pdf2ruby +212 -228
  20. data/bin/pdfcop +338 -348
  21. data/bin/pdfdecompress +58 -65
  22. data/bin/pdfdecrypt +56 -60
  23. data/bin/pdfencrypt +75 -80
  24. data/bin/pdfexplode +185 -182
  25. data/bin/pdfextract +201 -218
  26. data/bin/pdfmetadata +83 -82
  27. data/bin/pdfsh +4 -5
  28. data/bin/pdfwalker +1 -2
  29. data/bin/shell/.irbrc +45 -82
  30. data/bin/shell/console.rb +105 -130
  31. data/bin/shell/hexdump.rb +40 -64
  32. data/examples/README.md +34 -0
  33. data/examples/attachments/attachment.rb +38 -0
  34. data/examples/attachments/nested_document.rb +51 -0
  35. data/examples/encryption/encryption.rb +28 -0
  36. data/{samples/actions/triggerevents/trigger.rb → examples/events/events.rb} +13 -16
  37. data/examples/flash/flash.rb +37 -0
  38. data/{samples → examples}/flash/helloworld.swf +0 -0
  39. data/examples/forms/javascript.rb +54 -0
  40. data/examples/forms/xfa.rb +115 -0
  41. data/examples/javascript/hello_world.rb +22 -0
  42. data/examples/javascript/js_emulation.rb +54 -0
  43. data/examples/loop/goto.rb +32 -0
  44. data/examples/loop/named.rb +33 -0
  45. data/examples/signature/signature.rb +65 -0
  46. data/examples/uri/javascript.rb +56 -0
  47. data/examples/uri/open-uri.rb +21 -0
  48. data/examples/uri/submitform.rb +47 -0
  49. data/lib/origami.rb +29 -42
  50. data/lib/origami/3d.rb +350 -225
  51. data/lib/origami/acroform.rb +262 -288
  52. data/lib/origami/actions.rb +268 -288
  53. data/lib/origami/annotations.rb +697 -722
  54. data/lib/origami/array.rb +258 -184
  55. data/lib/origami/boolean.rb +74 -84
  56. data/lib/origami/catalog.rb +397 -434
  57. data/lib/origami/collections.rb +144 -0
  58. data/lib/origami/destinations.rb +233 -194
  59. data/lib/origami/dictionary.rb +253 -232
  60. data/lib/origami/encryption.rb +1274 -1243
  61. data/lib/origami/export.rb +232 -268
  62. data/lib/origami/extensions/fdf.rb +307 -220
  63. data/lib/origami/extensions/ppklite.rb +368 -435
  64. data/lib/origami/filespec.rb +197 -0
  65. data/lib/origami/filters.rb +301 -295
  66. data/lib/origami/filters/ascii.rb +177 -180
  67. data/lib/origami/filters/ccitt.rb +528 -535
  68. data/lib/origami/filters/crypt.rb +26 -35
  69. data/lib/origami/filters/dct.rb +46 -52
  70. data/lib/origami/filters/flate.rb +95 -94
  71. data/lib/origami/filters/jbig2.rb +49 -55
  72. data/lib/origami/filters/jpx.rb +38 -44
  73. data/lib/origami/filters/lzw.rb +189 -183
  74. data/lib/origami/filters/predictors.rb +221 -235
  75. data/lib/origami/filters/runlength.rb +103 -104
  76. data/lib/origami/font.rb +173 -186
  77. data/lib/origami/functions.rb +67 -81
  78. data/lib/origami/graphics.rb +25 -21
  79. data/lib/origami/graphics/colors.rb +178 -187
  80. data/lib/origami/graphics/instruction.rb +79 -85
  81. data/lib/origami/graphics/path.rb +142 -148
  82. data/lib/origami/graphics/patterns.rb +160 -167
  83. data/lib/origami/graphics/render.rb +43 -50
  84. data/lib/origami/graphics/state.rb +138 -153
  85. data/lib/origami/graphics/text.rb +188 -205
  86. data/lib/origami/graphics/xobject.rb +819 -815
  87. data/lib/origami/header.rb +63 -78
  88. data/lib/origami/javascript.rb +596 -597
  89. data/lib/origami/linearization.rb +285 -290
  90. data/lib/origami/metadata.rb +139 -148
  91. data/lib/origami/name.rb +112 -148
  92. data/lib/origami/null.rb +53 -62
  93. data/lib/origami/numeric.rb +162 -175
  94. data/lib/origami/obfuscation.rb +186 -174
  95. data/lib/origami/object.rb +593 -573
  96. data/lib/origami/outline.rb +42 -47
  97. data/lib/origami/outputintents.rb +73 -82
  98. data/lib/origami/page.rb +703 -592
  99. data/lib/origami/parser.rb +238 -290
  100. data/lib/origami/parsers/fdf.rb +41 -33
  101. data/lib/origami/parsers/pdf.rb +75 -95
  102. data/lib/origami/parsers/pdf/lazy.rb +137 -0
  103. data/lib/origami/parsers/pdf/linear.rb +64 -66
  104. data/lib/origami/parsers/ppklite.rb +34 -70
  105. data/lib/origami/pdf.rb +1030 -1005
  106. data/lib/origami/reference.rb +102 -102
  107. data/lib/origami/signature.rb +591 -609
  108. data/lib/origami/stream.rb +668 -551
  109. data/lib/origami/string.rb +397 -373
  110. data/lib/origami/template/patterns.rb +56 -0
  111. data/lib/origami/template/widgets.rb +151 -0
  112. data/lib/origami/trailer.rb +144 -158
  113. data/lib/origami/tree.rb +62 -0
  114. data/lib/origami/version.rb +23 -0
  115. data/lib/origami/webcapture.rb +88 -79
  116. data/lib/origami/xfa.rb +2863 -2882
  117. data/lib/origami/xreftable.rb +472 -384
  118. data/test/dataset/calc.pdf +85 -0
  119. data/test/dataset/crypto.pdf +82 -0
  120. data/test/dataset/empty.pdf +49 -0
  121. data/test/test_actions.rb +27 -0
  122. data/test/test_annotations.rb +90 -0
  123. data/test/test_pages.rb +31 -0
  124. data/test/test_pdf.rb +16 -0
  125. data/test/test_pdf_attachment.rb +34 -0
  126. data/test/test_pdf_create.rb +24 -0
  127. data/test/test_pdf_encrypt.rb +95 -0
  128. data/test/test_pdf_parse.rb +96 -0
  129. data/test/test_pdf_sign.rb +58 -0
  130. data/test/test_streams.rb +182 -0
  131. data/test/test_xrefs.rb +67 -0
  132. metadata +88 -58
  133. data/README +0 -67
  134. data/bin/pdf2graph +0 -121
  135. data/bin/pdfcocoon +0 -104
  136. data/lib/origami/file.rb +0 -233
  137. data/samples/README.txt +0 -45
  138. data/samples/actions/launch/calc.rb +0 -87
  139. data/samples/actions/launch/winparams.rb +0 -22
  140. data/samples/actions/loop/loopgoto.rb +0 -24
  141. data/samples/actions/loop/loopnamed.rb +0 -21
  142. data/samples/actions/named/named.rb +0 -31
  143. data/samples/actions/samba/smbrelay.rb +0 -26
  144. data/samples/actions/webbug/submitform.js +0 -26
  145. data/samples/actions/webbug/webbug-browser.rb +0 -68
  146. data/samples/actions/webbug/webbug-js.rb +0 -67
  147. data/samples/actions/webbug/webbug-reader.rb +0 -90
  148. data/samples/attachments/attach.rb +0 -40
  149. data/samples/attachments/attached.txt +0 -1
  150. data/samples/crypto/crypto.rb +0 -28
  151. data/samples/digsig/signed.rb +0 -46
  152. data/samples/exploits/cve-2008-2992-utilprintf.rb +0 -87
  153. data/samples/exploits/cve-2009-0927-geticon.rb +0 -65
  154. data/samples/exploits/exploit_customdictopen.rb +0 -55
  155. data/samples/exploits/getannots.rb +0 -69
  156. data/samples/flash/flash.rb +0 -31
  157. data/samples/javascript/attached.txt +0 -1
  158. data/samples/javascript/js.rb +0 -52
  159. data/templates/patterns.rb +0 -66
  160. data/templates/widgets.rb +0 -173
  161. data/templates/xdp.rb +0 -92
  162. data/test/ts_pdf.rb +0 -50
@@ -1,21 +1,20 @@
1
1
  =begin
2
2
 
3
- = File
4
- parsers/ppklite.rb
3
+ This file is part of Origami, PDF manipulation framework for Ruby
4
+ Copyright (C) 2016 Guillaume Delugré.
5
5
 
6
- = Info
7
- Origami is free software: you can redistribute it and/or modify
8
- it under the terms of the GNU Lesser General Public License as published by
9
- the Free Software Foundation, either version 3 of the License, or
10
- (at your option) any later version.
6
+ Origami is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU Lesser General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
11
10
 
12
- Origami is distributed in the hope that it will be useful,
13
- but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- GNU Lesser General Public License for more details.
11
+ Origami is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU Lesser General Public License for more details.
16
15
 
17
- You should have received a copy of the GNU Lesser General Public License
18
- along with Origami. If not, see <http://www.gnu.org/licenses/>.
16
+ You should have received a copy of the GNU Lesser General Public License
17
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
19
18
 
20
19
  =end
21
20
 
@@ -23,71 +22,36 @@ require 'origami/parser'
23
22
 
24
23
  module Origami
25
24
 
26
- module Adobe
27
-
28
25
  class PPKLite
29
26
 
30
- class Parser < Origami::Parser
31
- def parse(stream) #:nodoc:
32
- super
27
+ class Parser < Origami::Parser
28
+ def parse(stream) #:nodoc:
29
+ super
30
+
31
+ address_book = PPKLite.new(self)
32
+ address_book.header = PPKLite::Header.parse(@data)
33
+ @options[:callback].call(address_book.header)
33
34
 
34
- addrbk = Adobe::PPKLite.new
35
- addrbk.header = Adobe::PPKLite::Header.parse(stream)
36
- @options[:callback].call(addrbk.header)
37
-
38
- loop do
39
- break if (object = parse_object).nil?
40
- addrbk << object
41
- end
35
+ loop do
36
+ break if (object = parse_object).nil?
37
+ address_book.insert(object)
38
+ end
42
39
 
43
- addrbk.revisions.first.xreftable = parse_xreftable
44
- addrbm.revisions.first.trailer = parse_trailer
45
- book_specialize_entries(addrbk)
40
+ address_book.revisions.first.xreftable = parse_xreftable
41
+ address_book.revisions.first.trailer = parse_trailer
46
42
 
47
- addrbk
48
- end
49
-
50
- def book_specialize_entries(addrbk) #:nodoc:
51
- addrbk.revisions.first.body.each_pair do |ref, obj|
52
-
53
- if obj.is_a?(Dictionary)
54
-
55
- if obj[:Type] == :Catalog
56
-
57
- o = Adobe::PPKLite::Catalog.new(obj)
58
- o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
59
-
60
- if o.PPK.is_a?(Dictionary) and o.PPK[:Type] == :PPK
61
- o.PPK = Adobe::PPKLite::PPK.new(o.PPK)
62
-
63
- if o.PPK.User.is_a?(Dictionary) and o.PPK.User[:Type] == :User
64
- o.PPK.User = Adobe::PPKLite::UserList.new(o.PPK.User)
65
- end
66
-
67
- if o.PPK.AddressBook.is_a?(Dictionary) and o.PPK.AddressBook[:Type] == :AddressBook
68
- o.PPK.AddressBook = Adobe::PPKLite::AddressList.new(o.PPK.AddressBook)
69
- end
43
+ if Origami::OPTIONS[:enable_type_propagation]
44
+ trailer = address_book.revisions.first.trailer
45
+
46
+ if trailer[:Root].is_a?(Reference)
47
+ address_book.cast_object(trailer[:Root], PPKLite::Catalog, self)
48
+ end
49
+
50
+ propagate_types(address_book)
70
51
  end
71
-
72
- addrbk.revisions.first.body[ref] = o
73
-
74
- elsif obj[:ABEType] == Adobe::PPKLite::Descriptor::USER
75
- o = Adobe::PPKLite::User.new(obj)
76
- o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
77
-
78
- addrbk.revisions.first.body[ref] = o
79
- elsif obj[:ABEType] == Adobe::PPKLite::Descriptor::CERTIFICATE
80
- o = Adobe::PPKLite::Certificate.new(obj)
81
- o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
82
-
83
- addrbk.revisions.first.body[ref] = o
84
- end
85
52
 
53
+ address_book
86
54
  end
87
- end
88
55
  end
89
- end
90
56
  end
91
- end
92
57
  end
93
-
@@ -1,25 +1,20 @@
1
1
  =begin
2
2
 
3
- = File
4
- pdf.rb
5
-
6
- = Info
7
- This file is part of Origami, PDF manipulation framework for Ruby
8
- Copyright (C) 2010 Guillaume Delugré <guillaume AT security-labs DOT org>
9
- All right reserved.
10
-
11
- Origami is free software: you can redistribute it and/or modify
12
- it under the terms of the GNU Lesser General Public License as published by
13
- the Free Software Foundation, either version 3 of the License, or
14
- (at your option) any later version.
15
-
16
- Origami is distributed in the hope that it will be useful,
17
- but WITHOUT ANY WARRANTY; without even the implied warranty of
18
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
- GNU Lesser General Public License for more details.
20
-
21
- You should have received a copy of the GNU Lesser General Public License
22
- along with Origami. If not, see <http://www.gnu.org/licenses/>.
3
+ This file is part of Origami, PDF manipulation framework for Ruby
4
+ Copyright (C) 2016 Guillaume Delugré.
5
+
6
+ Origami is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU Lesser General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Origami is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public License
17
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
23
18
 
24
19
  =end
25
20
 
@@ -33,1106 +28,1136 @@ require 'origami/numeric'
33
28
  require 'origami/string'
34
29
  require 'origami/array'
35
30
  require 'origami/stream'
31
+ require 'origami/tree'
36
32
  require 'origami/filters'
37
- require 'origami/trailer'
38
- require 'origami/xreftable'
39
33
  require 'origami/header'
34
+ require 'origami/metadata'
40
35
  require 'origami/functions'
41
- require 'origami/catalog'
42
- require 'origami/font'
43
36
  require 'origami/page'
37
+ require 'origami/font'
44
38
  require 'origami/graphics'
45
39
  require 'origami/destinations'
46
- require 'origami/outline'
47
- require 'origami/actions'
48
- require 'origami/file'
40
+ require 'origami/filespec'
41
+ require 'origami/xfa'
49
42
  require 'origami/acroform'
50
43
  require 'origami/annotations'
44
+ require 'origami/actions'
51
45
  require 'origami/3d'
52
46
  require 'origami/signature'
53
47
  require 'origami/webcapture'
54
- require 'origami/metadata'
55
48
  require 'origami/export'
56
49
  require 'origami/webcapture'
57
50
  require 'origami/encryption'
58
51
  require 'origami/linearization'
59
52
  require 'origami/obfuscation'
60
- require 'origami/xfa'
61
53
  require 'origami/javascript'
54
+ require 'origami/outline'
62
55
  require 'origami/outputintents'
56
+ require 'origami/collections'
57
+ require 'origami/catalog'
58
+ require 'origami/xreftable'
59
+ require 'origami/trailer'
63
60
 
64
- require 'origami/parsers/pdf'
61
+ require 'origami/parsers/pdf/linear'
62
+ require 'origami/parsers/pdf/lazy'
65
63
 
66
64
  module Origami
67
65
 
68
- class InvalidPDFError < Exception #:nodoc:
69
- end
70
-
71
- #
72
- # Main class representing a PDF file and its inner contents.
73
- # A PDF file contains a set of Revision.
74
- #
75
- class PDF
76
-
77
- #
78
- # Class representing a particular revision in a PDF file.
79
- # Revision contains :
80
- # * A Body, which is a sequence of Object.
81
- # * A XRef::Section, holding XRef information about objects in body.
82
- # * A Trailer.
83
- #
84
- class Revision
85
- attr_accessor :pdf
86
- attr_accessor :body, :xreftable, :xrefstm, :trailer
87
-
88
- def initialize(pdf)
89
- @pdf = pdf
90
- @body = {}
91
- @xreftable = nil
92
- @xrefstm = nil
93
- @trailer = nil
94
- end
95
-
96
- def trailer=(trl)
97
- trl.pdf = @pdf
98
- @trailer = trl
99
- end
100
-
101
- def has_xreftable?
102
- not @xreftable.nil?
103
- end
104
-
105
- def has_xrefstm?
106
- not @xrefstm.nil?
107
- end
108
-
109
- def objects
110
- @body.values
111
- end
66
+ class InvalidPDFError < Error #:nodoc:
112
67
  end
113
68
 
114
- attr_accessor :header, :revisions
115
-
116
- class << self
117
-
118
- #
119
- # Reads and parses a PDF file from disk.
120
- #
121
- def read(filename, options = {})
122
- filename = File.expand_path(filename) if filename.is_a?(::String)
123
- PDF::LinearParser.new(options).parse(filename)
124
- end
125
-
126
- #
127
- # Creates a new PDF and saves it.
128
- # If a block is passed, the PDF instance can be processed before saving.
129
- #
130
- def create(output, options = {})
131
- pdf = PDF.new
132
- yield(pdf) if block_given?
133
- pdf.save(output, options)
134
- end
135
- alias write create
136
-
137
- #
138
- # Deserializes a PDF dump.
139
- #
140
- def deserialize(filename)
141
- Zlib::GzipReader.open(filename) { |gz|
142
- pdf = Marshal.load(gz.read)
143
- }
144
-
145
- pdf
146
- end
147
- end
148
-
149
- #
150
- # Creates a new PDF instance.
151
- # _parser_:: The Parser object creating the document. If none is specified, some default structures are automatically created to get a minimal working document.
152
- #
153
- def initialize(parser = nil)
154
- @header = PDF::Header.new
155
- @revisions = []
156
-
157
- add_new_revision
158
- @revisions.first.trailer = Trailer.new
159
-
160
- if parser
161
- @parser = parser
162
- else
163
- init
164
- end
165
- end
166
-
167
69
  #
168
- # Original file name if parsed from disk, nil otherwise.
70
+ # Main class representing a PDF file and its inner contents.
71
+ # A PDF file contains a set of Revision.
169
72
  #
170
- def original_filename
171
- @parser.target_filename if @parser
172
- end
73
+ class PDF
173
74
 
174
- #
175
- # Original file size if parsed from a data stream, nil otherwise.
176
- #
177
- def original_filesize
178
- @parser.target_filesize if @parser
179
- end
75
+ #
76
+ # Class representing a particular revision in a PDF file.
77
+ # Revision contains :
78
+ # * A Body, which is a sequence of Object.
79
+ # * A XRef::Section, holding XRef information about objects in body.
80
+ # * A Trailer.
81
+ #
82
+ class Revision
83
+ attr_accessor :pdf
84
+ attr_accessor :body, :xreftable, :xrefstm
85
+ attr_reader :trailer
86
+
87
+ def initialize(doc)
88
+ @document = doc
89
+ @body = {}
90
+ @xreftable = nil
91
+ @xrefstm = nil
92
+ @trailer = nil
93
+ end
180
94
 
181
- #
182
- # Original data parsed to create this document, nil if created from scratch.
183
- #
184
- def original_data
185
- @parser.target_data if @parser
186
- end
187
-
188
- #
189
- # Serializes the current PDF.
190
- #
191
- def serialize(filename)
192
- parser = @parser
193
- @parser = nil # do not serialize the parser
194
-
195
- Zlib::GzipWriter.open(filename) { |gz|
196
- gz.write Marshal.dump(self)
197
- }
198
-
199
- @parser = parser
200
- self
201
- end
202
-
203
- #
204
- # Saves the current document.
205
- # _filename_:: The path where to save this PDF.
206
- #
207
- def save(path, params = {})
208
-
209
- options =
210
- {
211
- :delinearize => true,
212
- :recompile => true,
213
- :decrypt => false
214
- }
215
- options.update(params)
216
-
217
- if self.frozen? # incompatible flags with frozen doc (signed)
218
- options[:recompile] =
219
- options[:rebuildxrefs] =
220
- options[:noindent] =
221
- options[:obfuscate] = false
222
- end
223
-
224
- if path.respond_to?(:write)
225
- fd = path
226
- else
227
- path = File.expand_path(path)
228
- fd = File.open(path, 'w').binmode
229
- end
230
-
231
- intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i
232
- self.delinearize! if options[:delinearize] and self.is_linearized?
233
- compile(options) if options[:recompile]
234
-
235
- fd.write output(options)
236
- fd.close
237
-
238
- self
239
- end
240
- alias write save
241
-
242
- #
243
- # Saves the file up to given revision number.
244
- # This can be useful to visualize the modifications over different incremental updates.
245
- # _revision_:: The revision number to save.
246
- # _filename_:: The path where to save this PDF.
247
- #
248
- def save_upto(revision, filename)
249
- save(filename, :up_to_revision => revision)
250
- end
95
+ def trailer=(trl)
96
+ trl.document = @document
251
97
 
252
- #
253
- # Returns an array of Objects whose content is matching _pattern_.
254
- #
255
- # def grep(*patterns)
256
- #
257
- # patterns.map! do |pattern|
258
- # pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
259
- # end
260
- #
261
- # unless patterns.all? { |pattern| pattern.is_a?(Regexp) }
262
- # raise TypeError, "Expected a String or Regexp"
263
- # end
264
- #
265
- # result = []
266
- # objects.each do |obj|
267
- # begin
268
- # case obj
269
- # when String, Name
270
- # result << obj if patterns.any?{|pattern| obj.value.to_s.match(pattern)}
271
- # when Stream
272
- # result << obj if patterns.any?{|pattern| obj.data.match(pattern)}
273
- # end
274
- # rescue Exception => e
275
- # puts "[#{e.class}] #{e.message}"
276
- #
277
- # next
278
- # end
279
- # end
280
- #
281
- # result
282
- # end
98
+ @trailer = trl
99
+ end
283
100
 
284
- #
285
- # Returns an array of strings and streams matching the given pattern.
286
- #
287
- def grep(*patterns) #:nodoc:
288
- patterns.map! do |pattern|
289
- if pattern.is_a?(::String)
290
- Regexp.new(Regexp.escape(pattern), Regexp::IGNORECASE)
291
- else
292
- pattern
293
- end
294
- end
295
-
296
- unless patterns.all? { |pattern| pattern.is_a?(Regexp) }
297
- raise TypeError, "Expected a String or Regexp"
298
- end
299
-
300
- objset = []
301
- self.indirect_objects.each do |indobj|
302
- case indobj
303
- when Stream then
304
- objset.push indobj
305
- objset.concat(indobj.dictionary.strings_cache)
306
- objset.concat(indobj.dictionary.names_cache)
307
- when Name,String then objset.push indobj
308
- when Dictionary,Array then
309
- objset.concat(indobj.strings_cache)
310
- objset.concat(indobj.names_cache)
311
- end
312
- end
313
-
314
- objset.delete_if do |obj|
315
- begin
316
- case obj
317
- when String, Name
318
- not patterns.any?{|pattern| obj.value.to_s.match(pattern)}
319
- when Stream
320
- not patterns.any?{|pattern| obj.data.match(pattern)}
321
- end
322
- rescue Exception => e
323
- true
101
+ def has_xreftable?
102
+ not @xreftable.nil?
103
+ end
104
+
105
+ def has_xrefstm?
106
+ not @xrefstm.nil?
107
+ end
108
+
109
+ def each_object(&b)
110
+ @body.each_value(&b)
111
+ end
112
+
113
+ def objects
114
+ @body.values
115
+ end
324
116
  end
325
- end
326
- end
327
117
 
328
- #
329
- # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
330
- #
331
- def ls(*patterns)
332
- return objects(:include_keys => false) if patterns.empty?
118
+ #
119
+ # Document header and revisions.
120
+ #
121
+ attr_accessor :header, :revisions
122
+
123
+ class << self
124
+ #
125
+ # Reads and parses a PDF file from disk.
126
+ #
127
+ def read(path, options = {})
128
+ path = File.expand_path(path) if path.is_a?(::String)
129
+ lazy = options[:lazy]
130
+
131
+ if lazy
132
+ parser_class = PDF::LazyParser
133
+ else
134
+ parser_class = PDF::LinearParser
135
+ end
333
136
 
334
- result = []
137
+ parser_class.new(options).parse(path)
138
+ end
335
139
 
336
- patterns.map! do |pattern|
337
- pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
338
- end
140
+ #
141
+ # Creates a new PDF and saves it.
142
+ # If a block is passed, the PDF instance can be processed before saving.
143
+ #
144
+ def create(output, options = {})
145
+ pdf = PDF.new
146
+ yield(pdf) if block_given?
147
+ pdf.save(output, options)
148
+ end
149
+ alias write create
339
150
 
340
- objects(:only_keys => true).each do |key|
341
- if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
342
- value = key.parent[key]
343
- result << ( value.is_a?(Reference) ? value.solve : value )
151
+ #
152
+ # Deserializes a PDF dump.
153
+ #
154
+ def deserialize(filename)
155
+ Zlib::GzipReader.open(filename) { |gz|
156
+ return Marshal.load(gz.read)
157
+ }
158
+ end
344
159
  end
345
- end
346
160
 
347
- result
348
- end
161
+ #
162
+ # Creates a new PDF instance.
163
+ # _parser_:: The Parser object creating the document.
164
+ # If none is specified, some default structures are automatically created to get a minimal working document.
165
+ #
166
+ def initialize(parser = nil)
167
+ @header = PDF::Header.new
168
+ @revisions = []
349
169
 
350
- #
351
- # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
352
- # Do not follow references.
353
- #
354
- def ls_no_follow(*patterns)
355
- return objects(:include_keys => false) if patterns.empty?
170
+ add_new_revision
171
+ @revisions.first.trailer = Trailer.new
172
+
173
+ if parser
174
+ @loaded = false
175
+ @parser = parser
176
+ else
177
+ init
178
+ end
179
+ end
356
180
 
357
- result = []
181
+ #
182
+ # Original file name if parsed from disk, nil otherwise.
183
+ #
184
+ def original_filename
185
+ @parser.target_filename if @parser
186
+ end
358
187
 
359
- patterns.map! do |pattern|
360
- pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
361
- end
188
+ #
189
+ # Original file size if parsed from a data stream, nil otherwise.
190
+ #
191
+ def original_filesize
192
+ @parser.target_filesize if @parser
193
+ end
362
194
 
363
- objects(:only_keys => true).each do |key|
364
- if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
365
- value = key.parent[key]
366
- result << value
195
+ #
196
+ # Original data parsed to create this document, nil if created from scratch.
197
+ #
198
+ def original_data
199
+ @parser.target_data if @parser
367
200
  end
368
- end
369
201
 
370
- result
371
- end
202
+ #
203
+ # Serializes the current PDF.
204
+ #
205
+ def serialize(filename)
206
+ parser = @parser
207
+ @parser = nil # do not serialize the parser
372
208
 
373
- #
374
- # Returns an array of objects matching specified block.
375
- #
376
- def find(params = {}, &b)
377
-
378
- options =
379
- {
380
- :only_indirect => false
381
- }
382
- options.update(params)
383
-
384
- objset = (options[:only_indirect] == true) ?
385
- self.indirect_objects : self.objects
386
-
387
- objset.find_all(&b)
388
- end
389
-
390
- #
391
- # Returns an array of objects embedded in the PDF body.
392
- # _include_objstm_:: Whether it shall return objects embedded in object streams.
393
- # Note : Shall return to an iterator for Ruby 1.9 comp.
394
- #
395
- def objects(params = {})
396
-
397
- def append_subobj(root, objset, opts)
398
-
399
- if objset.find{ |o| root.equal?(o) }.nil?
400
- objset << root unless opts[:only_keys]
401
-
402
- if root.is_a?(Dictionary)
403
- root.each_pair { |name, value|
404
- objset << name if opts[:only_keys]
405
-
406
- append_subobj(name, objset, opts) if opts[:include_keys] and not opts[:only_keys]
407
- append_subobj(value, objset, opts)
209
+ Zlib::GzipWriter.open(filename) { |gz|
210
+ gz.write Marshal.dump(self)
408
211
  }
409
- elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and opts[:include_objectstreams])
410
- root.each { |subobj| append_subobj(subobj, objset, opts) }
411
- end
212
+
213
+ @parser = parser
214
+ self
412
215
  end
413
- end
414
-
415
- options =
416
- {
417
- :include_objectstreams => true,
418
- :include_keys => true,
419
- :only_keys => false
420
- }
421
- options.update(params)
422
-
423
- options[:include_keys] |= options[:only_keys]
424
-
425
- objset = []
426
- @revisions.each do |revision|
427
- revision.objects.each do |object|
428
- append_subobj(object, objset, options)
216
+
217
+ #
218
+ # Saves the current document.
219
+ # _filename_:: The path where to save this PDF.
220
+ #
221
+ def save(path, params = {})
222
+ options =
223
+ {
224
+ delinearize: true,
225
+ recompile: true,
226
+ decrypt: false
227
+ }
228
+ options.update(params)
229
+
230
+ if self.frozen? # incompatible flags with frozen doc (signed)
231
+ options[:recompile] =
232
+ options[:rebuild_xrefs] =
233
+ options[:noindent] =
234
+ options[:obfuscate] = false
235
+ end
236
+
237
+ if path.respond_to?(:write)
238
+ fd = path
239
+ else
240
+ path = File.expand_path(path)
241
+ fd = File.open(path, 'w').binmode
242
+ close = true
243
+ end
244
+
245
+ load_all_objects unless @loaded
246
+
247
+ intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i
248
+ self.delinearize! if options[:delinearize] and self.linearized?
249
+ compile(options) if options[:recompile]
250
+
251
+ fd.write output(options)
252
+ fd.close if close
253
+
254
+ self
429
255
  end
430
- end
431
-
432
- objset
433
- end
434
-
435
- #
436
- # Return an array of indirect objects.
437
- #
438
- def indirect_objects
439
- @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
440
- end
441
- alias :root_objects :indirect_objects
442
-
443
- #
444
- # Adds a new object to the PDF file.
445
- # If this object has no version number, then a new one will be automatically computed and assignated to him.
446
- # It returns a Reference to this Object.
447
- # _object_:: The object to add.
448
- #
449
- def <<(object)
450
- owner = object.pdf
451
-
452
- #
453
- # Does object belongs to another PDF ?
454
- #
455
- if owner and not owner.equal?(self)
456
- import object
457
- else
458
- add_to_revision(object, @revisions.last)
459
- end
460
- end
461
- alias :insert :<<
462
-
463
- #
464
- # Similar to PDF#insert or PDF#<<, but for an object belonging to another document.
465
- # Object will be recursively copied and new version numbers will be assigned.
466
- # Returns the new reference to the imported object.
467
- # _object_:: The object to import.
468
- #
469
- def import(object)
470
- self.insert(object.export)
471
- end
256
+ alias write save
472
257
 
473
- #
474
- # Adds a new object to a specific revision.
475
- # If this object has no version number, then a new one will be automatically computed and assignated to him.
476
- # It returns a Reference to this Object.
477
- # _object_:: The object to add.
478
- # _revision_:: The revision to add the object to.
479
- #
480
- def add_to_revision(object, revision)
481
-
482
- object.set_indirect(true)
483
- object.set_pdf(self)
484
-
485
- object.no, object.generation = alloc_new_object_number if object.no == 0
486
-
487
- revision.body[object.reference] = object
488
-
489
- object.reference
490
- end
258
+ #
259
+ # Saves the file up to given revision number.
260
+ # This can be useful to visualize the modifications over different incremental updates.
261
+ # _revision_:: The revision number to save.
262
+ # _filename_:: The path where to save this PDF.
263
+ #
264
+ def save_upto(revision, filename)
265
+ save(filename, up_to_revision: revision)
266
+ end
491
267
 
492
- #
493
- # Ends the current Revision, and starts a new one.
494
- #
495
- def add_new_revision
496
-
497
- root = @revisions.last.trailer[:Root] unless @revisions.empty?
268
+ #
269
+ # Returns an array of strings, names and streams matching the given pattern.
270
+ # _streams_: Search into decoded stream data.
271
+ # _object_streams_: Search into objects inside object streams.
272
+ #
273
+ def grep(pattern, streams: true, object_streams: true) #:nodoc:
498
274
 
499
- @revisions << Revision.new(self)
500
- @revisions.last.trailer = Trailer.new
501
- @revisions.last.trailer.Root = root
275
+ pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
276
+ raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)
502
277
 
503
- self
504
- end
278
+ result = []
505
279
 
506
- #
507
- # Removes a whole document revision.
508
- # _index_:: Revision index, first is 0.
509
- #
510
- def remove_revision(index)
511
- if index < 0 or index > @revisions.size
512
- raise IndexError, "Not a valid revision index"
513
- end
280
+ search_object = -> (object) do
281
+ case object
282
+ when Stream
283
+ result.concat object.dictionary.strings_cache.select{|str| pattern === str}
284
+ result.concat object.dictionary.names_cache.select{|name| pattern === name.value}
514
285
 
515
- if @revisions.size == 1
516
- raise InvalidPDFError, "Cannot remove last revision"
517
- end
286
+ begin
287
+ result.push object if streams and object.data.match(pattern)
288
+ rescue Filter::Error
289
+ next # Skip object if a decoding error occured.
290
+ end
518
291
 
519
- @revisions.delete_at(index)
520
- self
521
- end
522
-
523
- #
524
- # Looking for an object present at a specified file offset.
525
- #
526
- def get_object_by_offset(offset) #:nodoc:
527
- self.indirect_objects.find { |obj| obj.file_offset == offset }
528
- end
292
+ next if object.is_a?(ObjectStream) and not object_streams
529
293
 
530
- #
531
- # Remove an object.
532
- #
533
- def delete_object(no, generation = 0)
534
-
535
- case no
536
- when Reference
537
- target = no
538
- when ::Integer
539
- target = Reference.new(no, generation)
540
- else
541
- raise TypeError, "Invalid parameter type : #{no.class}"
542
- end
543
-
544
- @revisions.each do |rev|
545
- rev.body.delete(target)
546
- end
294
+ object.each do |subobject|
295
+ search_object.call(subobject)
296
+ end
547
297
 
548
- end
298
+ when Name, String
299
+ result.push object if object.value.match(pattern)
300
+
301
+ when Dictionary, Array then
302
+ result.concat object.strings_cache.select{|str| pattern === str}
303
+ result.concat object.names_cache.select{|name| pattern === name.value}
304
+ end
305
+ end
306
+
307
+ self.indirect_objects.each do |object|
308
+ search_object.call(object)
309
+ end
310
+
311
+ result
312
+ end
313
+
314
+ #
315
+ # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
316
+ #
317
+ def ls(pattern, follow_references: true)
318
+
319
+ pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
320
+ raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)
321
+
322
+ self.grep(pattern, streams: false, object_streams: true)
323
+ .select {|object| object.is_a?(Name) and object.parent.is_a?(Dictionary) and object.parent.key?(object) }
324
+ .collect {|object| result = object.parent[object]; follow_references ? result.solve : result }
325
+ end
326
+
327
+ #
328
+ # Iterates over the objects of the document.
329
+ # _compressed_: iterates over the objects inside object streams.
330
+ # _recursive_: iterates recursively inside objects like arrays and dictionaries.
331
+ #
332
+ def each_object(compressed: false, recursive: false)
333
+ return enum_for(__method__, compressed: compressed,
334
+ recursive: recursive
335
+ ) unless block_given?
336
+
337
+ walk_object = -> (object) do
338
+ case object
339
+ when Dictionary
340
+ object.each_value do |value|
341
+ yield(value)
342
+ walk_object.call(value)
343
+ end
344
+
345
+ when Array
346
+ object.each do |child|
347
+ yield(child)
348
+ walk_object.call(child)
349
+ end
350
+
351
+ when Stream
352
+ yield(object.dictionary)
353
+ walk_object.call(object.dictionary)
354
+ end
355
+ end
356
+
357
+ @revisions.each do |revision|
358
+ revision.each_object do |object|
359
+ yield(object)
360
+
361
+ walk_object.call(object) if recursive
362
+
363
+ if object.is_a?(ObjectStream) and compressed
364
+ object.each do |child_obj|
365
+ yield(child_obj)
366
+
367
+ walk_object.call(child_obj) if recursive
368
+ end
369
+ end
370
+ end
371
+ end
372
+ end
373
+
374
+
375
+ #
376
+ # Return an array of indirect objects.
377
+ #
378
+ def indirect_objects
379
+ @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
380
+ end
381
+ alias root_objects indirect_objects
382
+
383
+ #
384
+ # Adds a new object to the PDF file.
385
+ # If this object has no version number, then a new one will be automatically
386
+ # computed and assignated to him.
387
+ #
388
+ # It returns a Reference to this Object.
389
+ # _object_:: The object to add.
390
+ #
391
+ def <<(object)
392
+ owner = object.document
549
393
 
550
- #
551
- # Search for an indirect object in the document.
552
- # _no_:: Reference or number of the object.
553
- # _generation_:: Object generation.
554
- #
555
- def get_object(no, generation = 0, use_xrefstm = true) #:nodoc:
556
- case no
557
- when Reference
558
- target = no
559
- when ::Integer
560
- target = Reference.new(no, generation)
561
- when Origami::Object
562
- return no
563
- else
564
- raise TypeError, "Invalid parameter type : #{no.class}"
565
- end
566
-
567
- #
568
- # Search through accessible indirect objects.
569
- #
570
- @revisions.each do |rev|
571
- return rev.body[target] if rev.body.include?(target)
572
- end
573
-
574
- # Look into XRef streams.
575
- if use_xrefstm == true
576
-
577
- if @revisions.last.has_xrefstm?
578
- xrefstm = @revisions.last.xrefstm
579
-
580
- done = []
581
- while xrefstm.is_a?(XRefStream) and not done.include?(xrefstm)
582
- xref = xrefstm.find(target.refno)
583
-
584
394
  #
585
- # We found a matching XRef.
395
+ # Does object belongs to another PDF ?
586
396
  #
587
- if xref.is_a?(XRefToCompressedObj)
588
- objstm = get_object(xref.objstmno, 0, false)
589
-
590
- object = objstm.extract_by_index(xref.index)
591
- if object.is_a?(Origami::Object) and object.no == target.refno
592
- return object
593
- else
594
- return objstm.extract(target.refno)
595
- end
596
- elsif xrefstm.has_field?(:Prev)
597
- done << xrefstm
598
- xrefstm = get_object_by_offset(xrefstm.Prev)
397
+ if owner and not owner.equal?(self)
398
+ import object
599
399
  else
600
- break
400
+ add_to_revision(object, @revisions.last)
601
401
  end
602
- end
603
402
  end
403
+ alias insert <<
604
404
 
605
405
  #
606
- # Lastly search directly into Object streams (might be very slow).
406
+ # Similar to PDF#insert or PDF#<<, but for an object belonging to another document.
407
+ # Object will be recursively copied and new version numbers will be assigned.
408
+ # Returns the new reference to the imported object.
409
+ # _object_:: The object to import.
607
410
  #
608
- @revisions.each do |rev|
609
- streams = rev.objects.find_all{|obj| obj.is_a?(ObjectStream) and obj.include?(target.refno)}
610
- return streams.first.extract(target.refno) unless streams.empty?
411
+ def import(object)
412
+ self.insert(object.export)
611
413
  end
612
414
 
613
- nil
614
- end
615
-
616
- end
415
+ #
416
+ # Adds a new object to a specific revision.
417
+ # If this object has no version number, then a new one will be automatically
418
+ # computed and assignated to him.
419
+ #
420
+ # It returns a Reference to this Object.
421
+ # _object_:: The object to add.
422
+ # _revision_:: The revision to add the object to.
423
+ #
424
+ def add_to_revision(object, revision)
425
+ object.set_indirect(true)
426
+ object.set_document(self)
427
+
428
+ object.no, object.generation = allocate_new_object_number if object.no == 0
617
429
 
618
- alias :[] :get_object
430
+ revision.body[object.reference] = object
619
431
 
620
- def cast_object(reference, type) #:nodoc:
621
- @revisions.each do |rev|
622
- if rev.body.include?(reference) and type < rev.body[reference].class
623
- rev.body[reference] = rev.body[reference].cast_to(type)
432
+ object.reference
624
433
  end
625
- end
626
- end
627
-
628
- #
629
- # Returns a new number/generation for future object.
630
- #
631
- def alloc_new_object_number
632
- no = 1
633
434
 
634
- # Deprecated number allocation policy (first available)
635
- #no = no + 1 while get_object(no)
435
+ #
436
+ # Ends the current Revision, and starts a new one.
437
+ #
438
+ def add_new_revision
439
+ root = @revisions.last.trailer[:Root] unless @revisions.empty?
636
440
 
637
- objset = self.indirect_objects
638
- self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
639
- objstm.each{|obj| objset << obj}
640
- end
441
+ @revisions << Revision.new(self)
442
+ @revisions.last.trailer = Trailer.new
443
+ @revisions.last.trailer.Root = root
641
444
 
642
- allocated = objset.collect{|obj| obj.no}.compact
643
- no = allocated.max + 1 unless allocated.empty?
445
+ self
446
+ end
644
447
 
645
- [ no, 0 ]
646
- end
647
-
648
- ##########################
649
- private
650
- ##########################
651
-
652
- #
653
- # Compute and update XRef::Section for each Revision.
654
- #
655
- def rebuildxrefs
656
-
657
- size = 0
658
- startxref = @header.to_s.size
659
-
660
- @revisions.each do |revision|
661
-
662
- revision.objects.each do |object|
663
- startxref += object.to_s.size
448
+ #
449
+ # Removes a whole document revision.
450
+ # _index_:: Revision index, first is 0.
451
+ #
452
+ def remove_revision(index)
453
+ if index < 0 or index > @revisions.size
454
+ raise IndexError, "Not a valid revision index"
455
+ end
456
+
457
+ if @revisions.size == 1
458
+ raise InvalidPDFError, "Cannot remove last revision"
459
+ end
460
+
461
+ @revisions.delete_at(index)
462
+ self
664
463
  end
665
-
666
- size += revision.body.size
667
- revision.xreftable = buildxrefs(revision.objects)
668
-
669
- revision.trailer ||= Trailer.new
670
- revision.trailer.Size = size + 1
671
- revision.trailer.startxref = startxref
672
-
673
- startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
674
- end
675
-
676
- self
677
- end
678
-
679
- #
680
- # This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
681
- # * Allocates objects references.
682
- # * Sets some objects missing required values.
683
- #
684
- def compile(options = {})
685
-
686
- #
687
- # A valid document must have at least one page.
688
- #
689
- append_page if pages.empty?
690
-
691
- #
692
- # Allocates object numbers and creates references.
693
- # Invokes object finalization methods.
694
- #
695
- if self.is_a?(Encryption::EncryptedDocument)
696
- physicalize(options)
697
- else
698
- physicalize
699
- end
700
-
701
- #
702
- # Sets the PDF version header.
703
- #
704
- version, level = version_required
705
- @header.majorversion = version[0,1].to_i
706
- @header.minorversion = version[2,1].to_i
707
-
708
- set_extension_level(version, level) if level > 0
709
-
710
- self
711
- end
712
-
713
- #
714
- # Cleans the document from its references.
715
- # Indirects objects are made direct whenever possible.
716
- # TODO: Circuit-checking to avoid infinite induction
717
- #
718
- def logicalize #:nodoc:
719
-
720
- fail "Not yet supported"
721
-
722
- processed = []
723
-
724
- def convert(root) #:nodoc:
725
-
726
- replaced = []
727
- if root.is_a?(Dictionary) or root.is_a?(Array)
728
-
729
- root.each { |obj|
730
- convert(obj)
731
- }
732
-
733
- root.map! { |obj|
734
- if obj.is_a?(Reference)
735
- target = obj.solve
736
- # Streams can't be direct objects
737
- if target.is_a?(Stream)
738
- obj
739
- else
740
- replaced << obj
741
- target
742
- end
464
+
465
+ #
466
+ # Looking for an object present at a specified file offset.
467
+ #
468
+ def get_object_by_offset(offset) #:nodoc:
469
+ self.indirect_objects.find { |obj| obj.file_offset == offset }
470
+ end
471
+
472
+ #
473
+ # Remove an object.
474
+ #
475
+ def delete_object(no, generation = 0)
476
+ case no
477
+ when Reference
478
+ target = no
479
+ when ::Integer
480
+ target = Reference.new(no, generation)
481
+ else
482
+ raise TypeError, "Invalid parameter type : #{no.class}"
483
+ end
484
+
485
+ @revisions.each do |rev|
486
+ rev.body.delete(target)
487
+ end
488
+ end
489
+
490
+ #
491
+ # Search for an indirect object in the document.
492
+ # _no_:: Reference or number of the object.
493
+ # _generation_:: Object generation.
494
+ #
495
+ def get_object(no, generation = 0, use_xrefstm: true) #:nodoc:
496
+ case no
497
+ when Reference
498
+ target = no
499
+ when ::Integer
500
+ target = Reference.new(no, generation)
501
+ when Origami::Object
502
+ return no
743
503
  else
744
- obj
504
+ raise TypeError, "Invalid parameter type : #{no.class}"
505
+ end
506
+
507
+ #
508
+ # Search through accessible indirect objects.
509
+ #
510
+ @revisions.reverse_each do |rev|
511
+ return rev.body[target] if rev.body.include?(target)
512
+ end
513
+
514
+ #
515
+ # Search through xref sections.
516
+ #
517
+ @revisions.reverse_each do |rev|
518
+ next unless rev.has_xreftable?
519
+
520
+ xref = rev.xreftable.find(target.refno)
521
+ next if xref.nil? or xref.free?
522
+
523
+ # Try loading the object if it is not present.
524
+ object = load_object_at_offset(rev, xref.offset)
525
+ return object unless object.nil?
526
+ end
527
+
528
+ return nil unless use_xrefstm
529
+
530
+ # Search through xref streams.
531
+ @revisions.reverse_each do |rev|
532
+ next unless rev.has_xrefstm?
533
+
534
+ xrefstm = rev.xrefstm
535
+
536
+ xref = xrefstm.find(target.refno)
537
+ next if xref.nil?
538
+
539
+ #
540
+ # We found a matching XRef.
541
+ #
542
+ if xref.is_a?(XRefToCompressedObj)
543
+ objstm = get_object(xref.objstmno, 0, use_xrefstm: use_xrefstm)
544
+
545
+ object = objstm.extract_by_index(xref.index)
546
+ if object.is_a?(Origami::Object) and object.no == target.refno
547
+ return object
548
+ else
549
+ return objstm.extract(target.refno)
550
+ end
551
+ elsif xref.is_a?(XRef)
552
+ object = load_object_at_offset(rev, xref.offset)
553
+ return object unless object.nil?
554
+ end
555
+ end
556
+
557
+ #
558
+ # Lastly search directly into Object streams (might be very slow).
559
+ #
560
+ @revisions.reverse_each do |rev|
561
+ stream = rev.objects.find{|obj| obj.is_a?(ObjectStream) and obj.include?(target.refno)}
562
+ return stream.extract(target.refno) unless stream.nil?
563
+ end
564
+
565
+ nil
566
+ end
567
+ alias [] get_object
568
+
569
+ #
570
+ # Casts a PDF object into another object type.
571
+ # The target type must be a subtype of the original type.
572
+ #
573
+ def cast_object(reference, type, parser = nil) #:nodoc:
574
+ @revisions.each do |rev|
575
+ if rev.body.include?(reference) and type < rev.body[reference].class
576
+ rev.body[reference] = rev.body[reference].cast_to(type, parser)
577
+
578
+ rev.body[reference]
579
+ else
580
+ nil
581
+ end
745
582
  end
746
- }
747
-
748
583
  end
749
584
 
750
- replaced
751
- end
585
+ #
586
+ # Returns a new number/generation for future object.
587
+ #
588
+ def allocate_new_object_number
589
+ no = 1
590
+
591
+ # Deprecated number allocation policy (first available)
592
+ #no = no + 1 while get_object(no)
752
593
 
753
- @revisions.each do |revision|
754
- revision.objects.each do |obj|
755
- processed.concat(convert(obj))
594
+ objset = self.indirect_objects
595
+ self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
596
+ objstm.each{|obj| objset << obj}
597
+ end
598
+
599
+ allocated = objset.collect{|obj| obj.no}.compact
600
+ no = allocated.max + 1 unless allocated.empty?
601
+
602
+ [ no, 0 ]
756
603
  end
757
- end
758
604
 
759
- end
760
-
761
- #
762
- # Converts a logical PDF view into a physical view ready for writing.
763
- #
764
- def physicalize
765
-
766
- #
767
- # Indirect objects are added to the revision and assigned numbers.
768
- #
769
- def build(obj, revision) #:nodoc:
605
+ #
606
+ # Mark the document as complete.
607
+ # No more objects needs to be fetched by the parser.
608
+ #
609
+ def loaded!
610
+ @loaded = true
611
+ end
612
+
613
+ ##########################
614
+ private
615
+ ##########################
770
616
 
771
617
  #
772
- # Finalize any subobjects before building the stream.
618
+ # Load an object from its given file offset.
619
+ # The document must have an associated Parser.
773
620
  #
774
- if obj.is_a?(ObjectStream)
775
- obj.each do |subobj|
776
- build(subobj, revision)
777
- end
621
+ def load_object_at_offset(revision, offset)
622
+ return nil if @loaded or @parser.nil?
623
+ pos = @parser.pos
624
+
625
+ begin
626
+ object = @parser.parse_object(offset)
627
+ return nil if object.nil?
628
+
629
+ if self.is_a?(Encryption::EncryptedDocument)
630
+ case object
631
+ when String
632
+ object.extend(Encryption::EncryptedString)
633
+ object.decrypted = false
634
+ when Stream
635
+ object.extend(Encryption::EncryptedStream)
636
+ object.decrypted = false
637
+ when Dictionary, Array
638
+ object.strings_cache.each do |string|
639
+ string.extend(Encryption::EncryptedString)
640
+ string.decrypted = false
641
+ end
642
+ end
643
+ end
644
+
645
+ add_to_revision(object, revision)
646
+ ensure
647
+ @parser.pos = pos
648
+ end
649
+
650
+ object
778
651
  end
779
-
780
- obj.pre_build
781
-
782
- if obj.is_a?(Dictionary) or obj.is_a?(Array)
783
-
784
- obj.map! do |subobj|
785
- if subobj.is_indirect?
786
- if get_object(subobj.reference)
787
- subobj.reference
652
+
653
+ #
654
+ # Force the loading of all objects in the document.
655
+ #
656
+ def load_all_objects
657
+ return if @loaded or @parser.nil?
658
+
659
+ @revisions.each do |revision|
660
+ if revision.has_xreftable?
661
+ xrefs = revision.xreftable
662
+ elsif revision.has_xrefstm?
663
+ xrefs = revision.xrefstm
788
664
  else
789
- ref = add_to_revision(subobj, revision)
790
- build(subobj, revision)
791
- ref
665
+ next
666
+ end
667
+
668
+ xrefs.each_with_number do |_, no|
669
+ self.get_object(no)
792
670
  end
793
- else
794
- subobj
795
- end
796
671
  end
797
-
798
- obj.each do |subobj|
799
- build(subobj, revision)
672
+
673
+ @loaded = true
674
+ end
675
+
676
+ #
677
+ # Compute and update XRef::Section for each Revision.
678
+ #
679
+ def rebuild_xrefs
680
+ size = 0
681
+ startxref = @header.to_s.size
682
+
683
+ @revisions.each do |revision|
684
+ revision.objects.each do |object|
685
+ startxref += object.to_s.size
686
+ end
687
+
688
+ size += revision.body.size
689
+ revision.xreftable = build_xrefs(revision.objects)
690
+
691
+ revision.trailer ||= Trailer.new
692
+ revision.trailer.Size = size + 1
693
+ revision.trailer.startxref = startxref
694
+
695
+ startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
800
696
  end
801
-
802
- elsif obj.is_a?(Stream)
803
- build(obj.dictionary, revision)
697
+
698
+ self
804
699
  end
805
700
 
806
- obj.post_build
807
-
808
- end
809
-
810
- indirect_objects_by_rev.each do |obj, revision|
811
- build(obj, revision)
812
- end
813
-
814
- self
815
- end
701
+ #
702
+ # This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
703
+ # * Allocates objects references.
704
+ # * Sets some objects missing required values.
705
+ #
706
+ def compile(options = {})
816
707
 
817
- #
818
- # Returns the final binary representation of the current document.
819
- #
820
- def output(params = {})
821
-
822
- has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)}
823
-
824
- options =
825
- {
826
- :rebuildxrefs => true,
827
- :noindent => false,
828
- :obfuscate => false,
829
- :use_xrefstm => has_objstm,
830
- :use_xreftable => (not has_objstm),
831
- :up_to_revision => @revisions.size
832
- }
833
- options.update(params)
834
-
835
- options[:up_to_revision] = @revisions.size if options[:up_to_revision] > @revisions.size
836
-
837
- # Reset to default params if no xrefs are chosen (hybrid files not supported yet)
838
- if options[:use_xrefstm] == options[:use_xreftable]
839
- options[:use_xrefstm] = has_objstm
840
- options[:use_xreftable] = (not has_objstm)
841
- end
842
-
843
- # Get trailer dictionary
844
- trailer_info = get_trailer_info
845
- if trailer_info.nil?
846
- raise InvalidPDFError, "No trailer information found"
847
- end
848
- trailer_dict = trailer_info.dictionary
849
-
850
- prev_xref_offset = nil
851
- xrefstm_offset = nil
852
- xreftable_offset = nil
853
-
854
- # Header
855
- bin = ""
856
- bin << @header.to_s
857
-
858
- # For each revision
859
- @revisions[0, options[:up_to_revision]].each do |rev|
860
-
861
- # Create xref table/stream.
862
- if options[:rebuildxrefs] == true
863
- lastno_table, lastno_stm = 0, 0
864
- brange_table, brange_stm = 0, 0
865
-
866
- xrefs_stm = [ XRef.new(0, 0, XRef::FREE) ]
867
- xrefs_table = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
868
-
869
- if options[:use_xreftable] == true
870
- xrefsection = XRef::Section.new
871
- end
708
+ load_all_objects unless @loaded
872
709
 
873
- if options[:use_xrefstm] == true
874
- xrefstm = rev.xrefstm || XRefStream.new
875
- if xrefstm == rev.xrefstm
876
- xrefstm.clear
710
+ #
711
+ # A valid document must have at least one page.
712
+ #
713
+ append_page if pages.empty?
714
+
715
+ #
716
+ # Allocates object numbers and creates references.
717
+ # Invokes object finalization methods.
718
+ #
719
+ if self.is_a?(Encryption::EncryptedDocument)
720
+ physicalize(options)
877
721
  else
878
- add_to_revision(xrefstm, rev)
722
+ physicalize
879
723
  end
880
- end
724
+
725
+ #
726
+ # Sets the PDF version header.
727
+ #
728
+ version, level = version_required
729
+ @header.major_version = version[0,1].to_i
730
+ @header.minor_version = version[2,1].to_i
731
+
732
+ set_extension_level(version, level) if level > 0
733
+
734
+ self
881
735
  end
882
-
883
- objset = rev.objects
884
-
885
- objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
886
- objset.concat objstm.objects
887
- end if options[:rebuildxrefs] == true and options[:use_xrefstm] == true
888
-
889
- # For each object, in number order
890
- objset.sort.each do |obj|
891
-
892
- # Create xref entry.
893
- if options[:rebuildxrefs] == true
894
-
895
- # Adding subsections if needed
896
- if options[:use_xreftable] and (obj.no - lastno_table).abs > 1
897
- xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
898
-
899
- xrefs_table.clear
900
- brange_table = obj.no
736
+
737
+ #
738
+ # Cleans the document from its references.
739
+ # Indirects objects are made direct whenever possible.
740
+ # TODO: Circuit-checking to avoid infinite induction
741
+ #
742
+ def logicalize #:nodoc:
743
+ raise NotImplementedError
744
+
745
+ processed = []
746
+
747
+ convert = -> (root) do
748
+ replaced = []
749
+ if root.is_a?(Dictionary) or root.is_a?(Array)
750
+ root.each do |obj|
751
+ convert[obj]
752
+ end
753
+
754
+ root.map! do |obj|
755
+ if obj.is_a?(Reference)
756
+ target = obj.solve
757
+ # Streams can't be direct objects
758
+ if target.is_a?(Stream)
759
+ obj
760
+ else
761
+ replaced << obj
762
+ target
763
+ end
764
+ else
765
+ obj
766
+ end
767
+ end
768
+ end
769
+
770
+ replaced
901
771
  end
902
- if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1
903
- xrefs_stm.each do |xref| xrefstm << xref end
904
- xrefstm.Index ||= []
905
- xrefstm.Index << brange_stm << xrefs_stm.length
906
772
 
907
- xrefs_stm.clear
908
- brange_stm = obj.no
773
+ @revisions.each do |revision|
774
+ revision.objects.each do |obj|
775
+ processed.concat(convert[obj])
776
+ end
909
777
  end
778
+ end
910
779
 
911
- # Process embedded objects
912
- if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
913
- index = obj.parent.index(obj.no)
914
-
915
- xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
916
-
917
- lastno_stm = obj.no
918
- else
919
- xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
920
- xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)
780
+ #
781
+ # Converts a logical PDF view into a physical view ready for writing.
782
+ #
783
+ def physicalize
784
+
785
+ #
786
+ # Indirect objects are added to the revision and assigned numbers.
787
+ #
788
+ build = -> (obj, revision) do
789
+ #
790
+ # Finalize any subobjects before building the stream.
791
+ #
792
+ if obj.is_a?(ObjectStream)
793
+ obj.each do |subobj|
794
+ build.call(subobj, revision)
795
+ end
796
+ end
921
797
 
922
- lastno_table = lastno_stm = obj.no
798
+ obj.pre_build
799
+
800
+ if obj.is_a?(Dictionary) or obj.is_a?(Array)
801
+
802
+ obj.map! do |subobj|
803
+ if subobj.indirect?
804
+ if get_object(subobj.reference)
805
+ subobj.reference
806
+ else
807
+ ref = add_to_revision(subobj, revision)
808
+ build.call(subobj, revision)
809
+ ref
810
+ end
811
+ else
812
+ subobj
813
+ end
814
+ end
815
+
816
+ obj.each do |subobj|
817
+ build.call(subobj, revision)
818
+ end
819
+
820
+ elsif obj.is_a?(Stream)
821
+ build.call(obj.dictionary, revision)
822
+ end
823
+
824
+ obj.post_build
923
825
  end
924
826
 
827
+ indirect_objects_by_rev.each do |obj, revision|
828
+ build.call(obj, revision)
925
829
  end
926
-
927
- if obj.parent == obj or not obj.parent.is_a?(ObjectStream)
928
-
929
- # Finalize XRefStm
930
- if options[:rebuildxrefs] == true and options[:use_xrefstm] == true and obj == xrefstm
931
- xrefstm_offset = bin.size
932
-
933
- xrefs_stm.each do |xref| xrefstm << xref end
934
-
935
- xrefstm.W = [ 1, (xrefstm_offset.to_s(2).size + 7) >> 3, 2 ]
936
- if xrefstm.DecodeParms.is_a?(Dictionary) and xrefstm.DecodeParms.has_key?(:Columns)
937
- xrefstm.DecodeParms[:Columns] = xrefstm.W[0] + xrefstm.W[1] + xrefstm.W[2]
938
- end
939
-
940
- xrefstm.Index ||= []
941
- xrefstm.Index << brange_stm << xrefs_stm.size
942
-
943
- xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict)
944
- xrefstm.Prev = prev_xref_offset
945
- rev.trailer.dictionary = nil
946
-
947
- add_to_revision(xrefstm, rev)
948
-
949
- xrefstm.pre_build
950
- xrefstm.post_build
830
+
831
+ self
832
+ end
833
+
834
+ #
835
+ # Returns the final binary representation of the current document.
836
+ #
837
+ def output(params = {})
838
+
839
+ has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)}
840
+
841
+ options =
842
+ {
843
+ rebuild_xrefs: true,
844
+ noindent: false,
845
+ obfuscate: false,
846
+ use_xrefstm: has_objstm,
847
+ use_xreftable: (not has_objstm),
848
+ up_to_revision: @revisions.size
849
+ }
850
+ options.update(params)
851
+
852
+ options[:up_to_revision] = @revisions.size if options[:up_to_revision] > @revisions.size
853
+
854
+ # Reset to default params if no xrefs are chosen (hybrid files not supported yet)
855
+ if options[:use_xrefstm] == options[:use_xreftable]
856
+ options[:use_xrefstm] = has_objstm
857
+ options[:use_xreftable] = (not has_objstm)
951
858
  end
952
859
 
953
- # Output object code
954
- if (obj.is_a?(Dictionary) or obj.is_a?(Stream)) and options[:noindent]
955
- bin << obj.to_s(0)
956
- else
957
- bin << obj.to_s
860
+ # Get trailer dictionary
861
+ trailer_info = get_trailer_info
862
+ raise InvalidPDFError, "No trailer information found" if trailer_info.nil?
863
+ trailer_dict = trailer_info.dictionary
864
+
865
+ prev_xref_offset = nil
866
+ xrefstm_offset = nil
867
+
868
+ # Header
869
+ bin = ""
870
+ bin << @header.to_s
871
+
872
+ # For each revision
873
+ @revisions[0, options[:up_to_revision]].each do |rev|
874
+
875
+ # Create xref table/stream.
876
+ if options[:rebuild_xrefs] == true
877
+ lastno_table, lastno_stm = 0, 0
878
+ brange_table, brange_stm = 0, 0
879
+
880
+ xrefs_stm = [ XRef.new(0, 0, XRef::FREE) ]
881
+ xrefs_table = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
882
+
883
+ if options[:use_xreftable] == true
884
+ xrefsection = XRef::Section.new
885
+ end
886
+
887
+ if options[:use_xrefstm] == true
888
+ xrefstm = rev.xrefstm || XRefStream.new
889
+ if xrefstm == rev.xrefstm
890
+ xrefstm.clear
891
+ else
892
+ add_to_revision(xrefstm, rev)
893
+ end
894
+ end
895
+ end
896
+
897
+ objset = rev.objects
898
+
899
+ objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
900
+ objset.concat objstm.objects
901
+ end if options[:rebuild_xrefs] == true and options[:use_xrefstm] == true
902
+
903
+ previous_obj = nil
904
+
905
+ # For each object, in number order
906
+ # Move any XRefStream to the end of the revision.
907
+ objset.sort_by {|obj| [obj.is_a?(XRefStream) ? 1 : 0, obj.no, obj.generation] }
908
+ .each do |obj|
909
+
910
+ # Ensures that every object has a unique reference number.
911
+ # Duplicates should never happen in a well-formed revision and will cause breakage of xrefs.
912
+ if previous_obj and previous_obj.reference == obj.reference
913
+ raise InvalidPDFError, "Duplicate object detected, reference #{obj.reference}"
914
+ else
915
+ previous_obj = obj
916
+ end
917
+
918
+ # Create xref entry.
919
+ if options[:rebuild_xrefs] == true
920
+
921
+ # Adding subsections if needed
922
+ if options[:use_xreftable] and (obj.no - lastno_table).abs > 1
923
+ xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
924
+
925
+ xrefs_table.clear
926
+ brange_table = obj.no
927
+ end
928
+
929
+ if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1
930
+ xrefs_stm.each do |xref| xrefstm << xref end
931
+ xrefstm.Index ||= []
932
+ xrefstm.Index << brange_stm << xrefs_stm.length
933
+
934
+ xrefs_stm.clear
935
+ brange_stm = obj.no
936
+ end
937
+
938
+ # Process embedded objects
939
+ if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
940
+ index = obj.parent.index(obj.no)
941
+
942
+ xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
943
+
944
+ lastno_stm = obj.no
945
+ else
946
+ xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
947
+ xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)
948
+
949
+ lastno_table = lastno_stm = obj.no
950
+ end
951
+ end
952
+
953
+ if obj.parent == obj or not obj.parent.is_a?(ObjectStream)
954
+
955
+ # Finalize XRefStm
956
+ if options[:rebuild_xrefs] == true and options[:use_xrefstm] == true and obj == xrefstm
957
+ xrefstm_offset = bin.size
958
+
959
+ xrefs_stm.each do |xref| xrefstm << xref end
960
+
961
+ xrefstm.W = [ 1, (xrefstm_offset.to_s(2).size + 7) >> 3, 2 ]
962
+ if xrefstm.DecodeParms.is_a?(Dictionary) and xrefstm.DecodeParms.has_key?(:Columns)
963
+ xrefstm.DecodeParms[:Columns] = xrefstm.W[0] + xrefstm.W[1] + xrefstm.W[2]
964
+ end
965
+
966
+ xrefstm.Index ||= []
967
+ xrefstm.Index << brange_stm << xrefs_stm.size
968
+
969
+ xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict)
970
+ xrefstm.Prev = prev_xref_offset
971
+ rev.trailer.dictionary = nil
972
+
973
+ add_to_revision(xrefstm, rev)
974
+
975
+ xrefstm.pre_build
976
+ xrefstm.post_build
977
+ end
978
+
979
+ # Output object code
980
+ if (obj.is_a?(Dictionary) or obj.is_a?(Stream)) and options[:noindent]
981
+ bin << obj.to_s(indent: 0)
982
+ else
983
+ bin << obj.to_s
984
+ end
985
+ end
986
+ end # end each object
987
+
988
+ rev.trailer ||= Trailer.new
989
+
990
+ # XRef table
991
+ if options[:rebuild_xrefs] == true
992
+
993
+ if options[:use_xreftable] == true
994
+ table_offset = bin.size
995
+
996
+ xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
997
+ rev.xreftable = xrefsection
998
+
999
+ rev.trailer.dictionary = trailer_dict
1000
+ rev.trailer.Size = objset.size + 1
1001
+ rev.trailer.Prev = prev_xref_offset
1002
+
1003
+ rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true
1004
+ end
1005
+
1006
+ startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset
1007
+ rev.trailer.startxref = prev_xref_offset = startxref
1008
+
1009
+ end
1010
+
1011
+ # Trailer
1012
+ bin << rev.xreftable.to_s if options[:use_xreftable] == true
1013
+ bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s)
1014
+
1015
+ end # end each revision
1016
+
1017
+ bin
1018
+ end
1019
+
1020
+ #
1021
+ # Instanciates basic structures required for a valid PDF file.
1022
+ #
1023
+ def init
1024
+ catalog = (self.Catalog = (trailer_key(:Root) || Catalog.new))
1025
+ catalog.Pages = PageTreeNode.new.set_indirect(true)
1026
+ @revisions.last.trailer.Root = catalog.reference
1027
+
1028
+ @loaded = true
1029
+
1030
+ self
1031
+ end
1032
+
1033
+ def filesize #:nodoc:
1034
+ output(rebuild_xrefs: false).size
1035
+ end
1036
+
1037
+ def version_required #:nodoc:
1038
+ max = [ 1.0, 0 ]
1039
+ @revisions.each do |revision|
1040
+ revision.objects.each do |object|
1041
+ current = object.version_required
1042
+ max = current if (current <=> max) > 0
1043
+ end
958
1044
  end
959
- end
1045
+
1046
+ max[0] = max[0].to_s
1047
+
1048
+ max
960
1049
  end
961
-
962
- rev.trailer ||= Trailer.new
963
-
964
- # XRef table
965
- if options[:rebuildxrefs] == true
966
-
967
- if options[:use_xreftable] == true
968
- table_offset = bin.size
969
-
970
- xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
971
- rev.xreftable = xrefsection
972
-
973
- rev.trailer.dictionary = trailer_dict
974
- rev.trailer.Size = objset.size + 1
975
- rev.trailer.Prev = prev_xref_offset
976
-
977
- rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true
978
- end
979
1050
 
980
- startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset
981
- rev.trailer.startxref = prev_xref_offset = startxref
982
-
983
- end # end each rev
984
-
985
- # Trailer
986
- bin << rev.xreftable.to_s if options[:use_xreftable] == true
987
- bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s)
988
-
989
- end
990
-
991
- bin
992
- end
993
-
994
- #
995
- # Instanciates basic structures required for a valid PDF file.
996
- #
997
- def init
998
- catalog = (self.Catalog = (get_doc_attr(:Root) || Catalog.new))
999
- catalog.Pages = PageTreeNode.new.set_indirect(true)
1000
- @revisions.last.trailer.Root = catalog.reference
1051
+ def indirect_objects_by_rev #:nodoc:
1052
+ @revisions.inject([]) do |set,rev|
1053
+ objset = rev.objects
1054
+ set.concat(objset.zip(::Array.new(objset.length, rev)))
1055
+ end
1056
+ end
1057
+
1058
+ #
1059
+ # Compute and update XRef::Section for each Revision.
1060
+ #
1061
+ def rebuild_dummy_xrefs #:nodoc
1062
+
1063
+ build_dummy_xrefs = -> (objects) do
1064
+ lastno = 0
1065
+ brange = 0
1066
+
1067
+ xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
1068
+
1069
+ xrefsection = XRef::Section.new
1070
+ objects.sort.each do |object|
1071
+ if (object.no - lastno).abs > 1
1072
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1073
+ brange = object.no
1074
+ xrefs.clear
1075
+ end
1076
+
1077
+ xrefs << XRef.new(0, 0, XRef::FREE)
1078
+
1079
+ lastno = object.no
1080
+ end
1081
+
1082
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1083
+
1084
+ xrefsection
1085
+ end
1086
+
1087
+ size = 0
1088
+ startxref = @header.to_s.size
1089
+
1090
+ @revisions.each do |revision|
1091
+ revision.objects.each do |object|
1092
+ startxref += object.to_s.size
1093
+ end
1094
+
1095
+ size += revision.body.size
1096
+ revision.xreftable = build_dummy_xrefs.call(revision.objects)
1097
+
1098
+ revision.trailer ||= Trailer.new
1099
+ revision.trailer.Size = size + 1
1100
+ revision.trailer.startxref = startxref
1101
+
1102
+ startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
1103
+ end
1104
+
1105
+ self
1106
+ end
1107
+
1108
+ #
1109
+ # Build a xref section from a set of objects.
1110
+ #
1111
+ def build_xrefs(objects) #:nodoc:
1112
+
1113
+ lastno = 0
1114
+ brange = 0
1115
+
1116
+ xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
1117
+
1118
+ xrefsection = XRef::Section.new
1119
+ objects.sort.each do |object|
1120
+ if (object.no - lastno).abs > 1
1121
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1122
+ brange = object.no
1123
+ xrefs.clear
1124
+ end
1125
+
1126
+ xrefs << XRef.new(get_object_offset(object.no, object.generation), object.generation, XRef::USED)
1127
+
1128
+ lastno = object.no
1129
+ end
1001
1130
 
1002
- self
1003
- end
1004
-
1005
- def filesize #:nodoc:
1006
- output(:rebuildxrefs => false).size
1007
- end
1008
-
1009
- def version_required #:nodoc:
1010
-
1011
- max = [ 1.0, 0 ]
1012
- @revisions.each { |revision|
1013
- revision.objects.each { |object|
1014
- current = object.pdf_version_required
1015
- max = current if (current <=> max) > 0
1016
- }
1017
- }
1018
- max[0] = max[0].to_s
1019
-
1020
- max
1021
- end
1022
-
1023
- def indirect_objects_by_rev #:nodoc:
1024
- @revisions.inject([]) do |set,rev|
1025
- objset = rev.objects
1026
- set.concat(objset.zip(::Array.new(objset.length, rev)))
1027
- end
1028
- end
1029
-
1030
- #
1031
- # Compute and update XRef::Section for each Revision.
1032
- #
1033
- def rebuild_dummy_xrefs #:nodoc
1034
-
1035
- def build_dummy_xrefs(objects)
1036
-
1037
- lastno = 0
1038
- brange = 0
1039
-
1040
- xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
1041
-
1042
- xrefsection = XRef::Section.new
1043
- objects.sort.each { |object|
1044
- if (object.no - lastno).abs > 1
1045
1131
  xrefsection << XRef::Subsection.new(brange, xrefs)
1046
- brange = object.no
1047
- xrefs.clear
1048
- end
1049
-
1050
- xrefs << XRef.new(0, 0, XRef::FREE)
1051
-
1052
- lastno = object.no
1053
- }
1054
-
1055
- xrefsection << XRef::Subsection.new(brange, xrefs)
1056
-
1057
- xrefsection
1058
- end
1059
-
1060
- size = 0
1061
- startxref = @header.to_s.size
1062
-
1063
- @revisions.each do |revision|
1064
- revision.objects.each do |object|
1065
- startxref += object.to_s.size
1132
+
1133
+ xrefsection
1066
1134
  end
1067
-
1068
- size += revision.body.size
1069
- revision.xreftable = build_dummy_xrefs(revision.objects)
1070
-
1071
- revision.trailer ||= Trailer.new
1072
- revision.trailer.Size = size + 1
1073
- revision.trailer.startxref = startxref
1074
-
1075
- startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
1076
- end
1077
-
1078
- self
1079
- end
1080
-
1081
- #
1082
- # Build a xref section from a set of objects.
1083
- #
1084
- def buildxrefs(objects) #:nodoc:
1085
-
1086
- lastno = 0
1087
- brange = 0
1088
-
1089
- xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
1090
-
1091
- xrefsection = XRef::Section.new
1092
- objects.sort.each { |object|
1093
- if (object.no - lastno).abs > 1
1094
- xrefsection << XRef::Subsection.new(brange, xrefs)
1095
- brange = object.no
1096
- xrefs.clear
1135
+
1136
+ def delete_revision(ngen) #:nodoc:
1137
+ @revisions.delete_at[ngen]
1097
1138
  end
1098
-
1099
- xrefs << XRef.new(get_object_offset(object.no, object.generation), object.generation, XRef::USED)
1100
-
1101
- lastno = object.no
1102
- }
1103
-
1104
- xrefsection << XRef::Subsection.new(brange, xrefs)
1105
-
1106
- xrefsection
1107
- end
1108
-
1109
- def delete_revision(ngen) #:nodoc:
1110
- @revisions.delete_at[ngen]
1111
- end
1112
-
1113
- def get_revision(ngen) #:nodoc:
1114
- @revisions[ngen].body
1115
- end
1116
-
1117
- def get_object_offset(no,generation) #:nodoc:
1118
- objectoffset = @header.to_s.size
1119
-
1120
- @revisions.each do |revision|
1121
- revision.objects.sort.each do |object|
1122
- if object.no == no and object.generation == generation then return objectoffset
1123
- else
1124
- objectoffset += object.to_s.size
1125
- end
1139
+
1140
+ def get_revision(ngen) #:nodoc:
1141
+ @revisions[ngen].body
1126
1142
  end
1127
-
1128
- objectoffset += revision.xreftable.to_s.size
1129
- objectoffset += revision.trailer.to_s.size
1130
- end
1131
-
1132
- nil
1133
- end
1134
1143
 
1135
- end
1144
+ def get_object_offset(no,generation) #:nodoc:
1145
+ objectoffset = @header.to_s.size
1136
1146
 
1137
- end
1147
+ @revisions.each do |revision|
1148
+ revision.objects.sort.each do |object|
1149
+ if object.no == no and object.generation == generation then return objectoffset
1150
+ else
1151
+ objectoffset += object.to_s.size
1152
+ end
1153
+ end
1138
1154
 
1155
+ objectoffset += revision.xreftable.to_s.size
1156
+ objectoffset += revision.trailer.to_s.size
1157
+ end
1158
+
1159
+ nil
1160
+ end
1161
+ end
1162
+
1163
+ end