origamindee 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +89 -0
- data/COPYING.LESSER +165 -0
- data/README.md +131 -0
- data/bin/config/pdfcop.conf.yml +236 -0
- data/bin/pdf2pdfa +87 -0
- data/bin/pdf2ruby +333 -0
- data/bin/pdfcop +476 -0
- data/bin/pdfdecompress +97 -0
- data/bin/pdfdecrypt +91 -0
- data/bin/pdfencrypt +113 -0
- data/bin/pdfexplode +223 -0
- data/bin/pdfextract +277 -0
- data/bin/pdfmetadata +143 -0
- data/bin/pdfsh +12 -0
- data/bin/shell/console.rb +128 -0
- data/bin/shell/hexdump.rb +59 -0
- data/bin/shell/irbrc +69 -0
- data/examples/README.md +34 -0
- data/examples/attachments/attachment.rb +38 -0
- data/examples/attachments/nested_document.rb +51 -0
- data/examples/encryption/encryption.rb +28 -0
- data/examples/events/events.rb +72 -0
- data/examples/flash/flash.rb +37 -0
- data/examples/flash/helloworld.swf +0 -0
- data/examples/forms/javascript.rb +54 -0
- data/examples/forms/xfa.rb +115 -0
- data/examples/javascript/hello_world.rb +22 -0
- data/examples/javascript/js_emulation.rb +54 -0
- data/examples/loop/goto.rb +32 -0
- data/examples/loop/named.rb +33 -0
- data/examples/signature/signature.rb +65 -0
- data/examples/uri/javascript.rb +56 -0
- data/examples/uri/open-uri.rb +21 -0
- data/examples/uri/submitform.rb +47 -0
- data/lib/origami/3d.rb +364 -0
- data/lib/origami/acroform.rb +321 -0
- data/lib/origami/actions.rb +318 -0
- data/lib/origami/annotations.rb +711 -0
- data/lib/origami/array.rb +242 -0
- data/lib/origami/boolean.rb +90 -0
- data/lib/origami/catalog.rb +418 -0
- data/lib/origami/collections.rb +144 -0
- data/lib/origami/compound.rb +161 -0
- data/lib/origami/destinations.rb +252 -0
- data/lib/origami/dictionary.rb +192 -0
- data/lib/origami/encryption.rb +1084 -0
- data/lib/origami/extensions/fdf.rb +347 -0
- data/lib/origami/extensions/ppklite.rb +422 -0
- data/lib/origami/filespec.rb +197 -0
- data/lib/origami/filters/ascii.rb +211 -0
- data/lib/origami/filters/ccitt/tables.rb +267 -0
- data/lib/origami/filters/ccitt.rb +357 -0
- data/lib/origami/filters/crypt.rb +38 -0
- data/lib/origami/filters/dct.rb +54 -0
- data/lib/origami/filters/flate.rb +69 -0
- data/lib/origami/filters/jbig2.rb +57 -0
- data/lib/origami/filters/jpx.rb +47 -0
- data/lib/origami/filters/lzw.rb +170 -0
- data/lib/origami/filters/predictors.rb +292 -0
- data/lib/origami/filters/runlength.rb +129 -0
- data/lib/origami/filters.rb +364 -0
- data/lib/origami/font.rb +196 -0
- data/lib/origami/functions.rb +79 -0
- data/lib/origami/graphics/colors.rb +230 -0
- data/lib/origami/graphics/instruction.rb +98 -0
- data/lib/origami/graphics/path.rb +182 -0
- data/lib/origami/graphics/patterns.rb +174 -0
- data/lib/origami/graphics/render.rb +62 -0
- data/lib/origami/graphics/state.rb +149 -0
- data/lib/origami/graphics/text.rb +225 -0
- data/lib/origami/graphics/xobject.rb +918 -0
- data/lib/origami/graphics.rb +38 -0
- data/lib/origami/header.rb +75 -0
- data/lib/origami/javascript.rb +713 -0
- data/lib/origami/linearization.rb +330 -0
- data/lib/origami/metadata.rb +172 -0
- data/lib/origami/name.rb +135 -0
- data/lib/origami/null.rb +65 -0
- data/lib/origami/numeric.rb +181 -0
- data/lib/origami/obfuscation.rb +245 -0
- data/lib/origami/object.rb +760 -0
- data/lib/origami/optionalcontent.rb +183 -0
- data/lib/origami/outline.rb +54 -0
- data/lib/origami/outputintents.rb +85 -0
- data/lib/origami/page.rb +722 -0
- data/lib/origami/parser.rb +269 -0
- data/lib/origami/parsers/fdf.rb +56 -0
- data/lib/origami/parsers/pdf/lazy.rb +176 -0
- data/lib/origami/parsers/pdf/linear.rb +122 -0
- data/lib/origami/parsers/pdf.rb +118 -0
- data/lib/origami/parsers/ppklite.rb +57 -0
- data/lib/origami/pdf.rb +1108 -0
- data/lib/origami/reference.rb +134 -0
- data/lib/origami/signature.rb +702 -0
- data/lib/origami/stream.rb +705 -0
- data/lib/origami/string.rb +444 -0
- data/lib/origami/template/patterns.rb +56 -0
- data/lib/origami/template/widgets.rb +151 -0
- data/lib/origami/trailer.rb +190 -0
- data/lib/origami/tree.rb +62 -0
- data/lib/origami/version.rb +23 -0
- data/lib/origami/webcapture.rb +100 -0
- data/lib/origami/xfa/config.rb +453 -0
- data/lib/origami/xfa/connectionset.rb +146 -0
- data/lib/origami/xfa/datasets.rb +49 -0
- data/lib/origami/xfa/localeset.rb +42 -0
- data/lib/origami/xfa/package.rb +59 -0
- data/lib/origami/xfa/pdf.rb +73 -0
- data/lib/origami/xfa/signature.rb +42 -0
- data/lib/origami/xfa/sourceset.rb +43 -0
- data/lib/origami/xfa/stylesheet.rb +44 -0
- data/lib/origami/xfa/template.rb +1691 -0
- data/lib/origami/xfa/xdc.rb +42 -0
- data/lib/origami/xfa/xfa.rb +146 -0
- data/lib/origami/xfa/xfdf.rb +43 -0
- data/lib/origami/xfa/xmpmeta.rb +43 -0
- data/lib/origami/xfa.rb +62 -0
- data/lib/origami/xreftable.rb +557 -0
- data/lib/origami.rb +47 -0
- data/test/dataset/calc.pdf +85 -0
- data/test/dataset/crypto.pdf +36 -0
- data/test/dataset/empty.pdf +49 -0
- data/test/test_actions.rb +27 -0
- data/test/test_annotations.rb +68 -0
- data/test/test_forms.rb +30 -0
- data/test/test_native_types.rb +83 -0
- data/test/test_object_tree.rb +33 -0
- data/test/test_pages.rb +60 -0
- data/test/test_pdf.rb +20 -0
- data/test/test_pdf_attachment.rb +34 -0
- data/test/test_pdf_create.rb +24 -0
- data/test/test_pdf_encrypt.rb +102 -0
- data/test/test_pdf_parse.rb +134 -0
- data/test/test_pdf_parse_lazy.rb +69 -0
- data/test/test_pdf_sign.rb +97 -0
- data/test/test_streams.rb +184 -0
- data/test/test_xrefs.rb +67 -0
- metadata +280 -0
@@ -0,0 +1,330 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
This file is part of Origami, PDF manipulation framework for Ruby
|
4
|
+
Copyright (C) 2016 Guillaume Delugré.
|
5
|
+
|
6
|
+
Origami is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU Lesser General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Origami is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU Lesser General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU Lesser General Public License
|
17
|
+
along with Origami. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
=end
|
20
|
+
|
21
|
+
module Origami
|
22
|
+
|
23
|
+
class PDF
|
24
|
+
|
25
|
+
class LinearizationError < Error #:nodoc:
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Returns whether the current document is linearized.
|
30
|
+
#
|
31
|
+
def linearized?
|
32
|
+
begin
|
33
|
+
first_obj = @revisions.first.objects.min_by{|obj| obj.file_offset}
|
34
|
+
rescue
|
35
|
+
return false
|
36
|
+
end
|
37
|
+
|
38
|
+
@revisions.size > 1 and first_obj.is_a?(Dictionary) and first_obj.has_key? :Linearized
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# Tries to delinearize the document if it has been linearized.
|
43
|
+
# This operation is xrefs destructive, should be fixed in the future to merge tables.
|
44
|
+
#
|
45
|
+
def delinearize!
|
46
|
+
raise LinearizationError, 'Not a linearized document' unless self.linearized?
|
47
|
+
|
48
|
+
#
|
49
|
+
# Saves the first trailer.
|
50
|
+
#
|
51
|
+
prev_trailer = @revisions.first.trailer
|
52
|
+
|
53
|
+
linear_dict = @revisions.first.objects.min_by{|obj| obj.file_offset}
|
54
|
+
|
55
|
+
#
|
56
|
+
# Removes hint streams used by linearization.
|
57
|
+
#
|
58
|
+
delete_hint_streams(linear_dict)
|
59
|
+
|
60
|
+
#
|
61
|
+
# Update the trailer.
|
62
|
+
#
|
63
|
+
last_trailer = (@revisions.last.trailer ||= Trailer.new)
|
64
|
+
last_trailer.dictionary ||= Dictionary.new
|
65
|
+
|
66
|
+
if prev_trailer.dictionary?
|
67
|
+
last_trailer.dictionary =
|
68
|
+
last_trailer.dictionary.merge(prev_trailer.dictionary)
|
69
|
+
else
|
70
|
+
xrefstm = @revisions.last.xrefstm
|
71
|
+
raise LinearizationError,
|
72
|
+
'Cannot find trailer info while delinearizing document' unless xrefstm.is_a?(XRefStream)
|
73
|
+
|
74
|
+
last_trailer.dictionary[:Root] = xrefstm[:Root]
|
75
|
+
last_trailer.dictionary[:Encrypt] = xrefstm[:Encrypt]
|
76
|
+
last_trailer.dictionary[:Info] = xrefstm[:Info]
|
77
|
+
last_trailer.dictionary[:ID] = xrefstm[:ID]
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
# Remove all xrefs.
|
82
|
+
# Fix: Should be merged instead.
|
83
|
+
#
|
84
|
+
remove_xrefs
|
85
|
+
|
86
|
+
#
|
87
|
+
# Remove the linearization revision.
|
88
|
+
#
|
89
|
+
@revisions.first.body.delete(linear_dict.reference)
|
90
|
+
@revisions.last.body.merge! @revisions.first.body
|
91
|
+
|
92
|
+
remove_revision(0)
|
93
|
+
|
94
|
+
self
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
#
|
100
|
+
# Strip the document from Hint streams given a linearization dictionary.
|
101
|
+
#
|
102
|
+
def delete_hint_streams(linearization_dict)
|
103
|
+
hints = linearization_dict[:H]
|
104
|
+
return unless hints.is_a?(Array)
|
105
|
+
|
106
|
+
hints.each_slice(2) do |offset, _length|
|
107
|
+
next unless offset.is_a?(Integer)
|
108
|
+
|
109
|
+
stream = get_object_by_offset(offset)
|
110
|
+
delete_object(stream.reference) if stream.is_a?(Stream)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
#
|
116
|
+
# Class representing a linearization dictionary.
|
117
|
+
#
|
118
|
+
class Linearization < Dictionary
|
119
|
+
include StandardObject
|
120
|
+
|
121
|
+
field :Linearized, :Type => Real, :Default => 1.0, :Required => true
|
122
|
+
field :L, :Type => Integer, :Required => true
|
123
|
+
field :H, :Type => Array.of(Integer), :Required => true
|
124
|
+
field :O, :Type => Integer, :Required => true
|
125
|
+
field :E, :Type => Integer, :Required => true
|
126
|
+
field :N, :Type => Integer, :Required => true
|
127
|
+
field :T, :Type => Integer, :Required => true
|
128
|
+
field :P, :Type => Integer, :Default => 0
|
129
|
+
|
130
|
+
def initialize(hash = {}, parser = nil)
|
131
|
+
super(hash, parser)
|
132
|
+
|
133
|
+
set_indirect(true)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
class InvalidHintTableError < Error #:nodoc:
|
138
|
+
end
|
139
|
+
|
140
|
+
module HintTable
|
141
|
+
module ClassMethods
|
142
|
+
def header_item_size(number, size)
|
143
|
+
@header_items_size[number] = size
|
144
|
+
end
|
145
|
+
|
146
|
+
def get_header_item_size(number)
|
147
|
+
@header_items_size[number]
|
148
|
+
end
|
149
|
+
|
150
|
+
def entry_item_size(number, size)
|
151
|
+
@entry_items_size[number] = size
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_entry_item_size(number)
|
155
|
+
@entry_items_size[number]
|
156
|
+
end
|
157
|
+
|
158
|
+
def nb_header_items
|
159
|
+
@header_items_size.size
|
160
|
+
end
|
161
|
+
|
162
|
+
def nb_entry_items
|
163
|
+
@entry_items_size.size
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def self.included(receiver)
|
168
|
+
receiver.instance_variable_set(:@header_items_size, {})
|
169
|
+
receiver.instance_variable_set(:@entry_items_size, {})
|
170
|
+
receiver.extend(ClassMethods)
|
171
|
+
end
|
172
|
+
|
173
|
+
attr_accessor :header_items
|
174
|
+
attr_accessor :entries
|
175
|
+
|
176
|
+
def initialize
|
177
|
+
@header_items = {}
|
178
|
+
@entries = []
|
179
|
+
end
|
180
|
+
|
181
|
+
def to_s
|
182
|
+
data = ""
|
183
|
+
|
184
|
+
nitems = self.class.nb_header_items
|
185
|
+
for no in (1..nitems)
|
186
|
+
unless @header_items.include?(no)
|
187
|
+
raise InvalidHintTableError, "Missing item #{no} in header section of #{self.class}"
|
188
|
+
end
|
189
|
+
|
190
|
+
value = @header_items[no]
|
191
|
+
item_size = self.class.get_header_item_size(no)
|
192
|
+
|
193
|
+
item_size = ((item_size + 7) >> 3) << 3
|
194
|
+
item_data = value.to_s(2)
|
195
|
+
item_data = "0" * (item_size - item_data.size) + item_data
|
196
|
+
|
197
|
+
data << [ item_data ].pack("B*")
|
198
|
+
end
|
199
|
+
|
200
|
+
nitems = self.class.nb_entry_items
|
201
|
+
@entries.each_with_index do |entry, i|
|
202
|
+
for no in (1..nitems)
|
203
|
+
unless entry.include?(no)
|
204
|
+
raise InvalidHintTableError, "Missing item #{no} in entry #{i} of #{self.class}"
|
205
|
+
end
|
206
|
+
|
207
|
+
value = entry[no]
|
208
|
+
item_size = self.class.get_entry_item_size(no)
|
209
|
+
|
210
|
+
item_size = ((item_size + 7) >> 3) << 3
|
211
|
+
item_data = value.to_s(2)
|
212
|
+
item_data = "0" * (item_size - item_data.size) + item_data
|
213
|
+
|
214
|
+
data << [ item_data ].pack("B*")
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
data
|
219
|
+
end
|
220
|
+
|
221
|
+
class PageOffsetTable
|
222
|
+
include HintTable
|
223
|
+
|
224
|
+
header_item_size 1, 32
|
225
|
+
header_item_size 2, 32
|
226
|
+
header_item_size 3, 16
|
227
|
+
header_item_size 4, 32
|
228
|
+
header_item_size 5, 16
|
229
|
+
header_item_size 6, 32
|
230
|
+
header_item_size 7, 16
|
231
|
+
header_item_size 8, 32
|
232
|
+
header_item_size 9, 16
|
233
|
+
header_item_size 10, 16
|
234
|
+
header_item_size 11, 16
|
235
|
+
header_item_size 12, 16
|
236
|
+
header_item_size 13, 16
|
237
|
+
|
238
|
+
entry_item_size 1, 16
|
239
|
+
entry_item_size 2, 16
|
240
|
+
entry_item_size 3, 16
|
241
|
+
entry_item_size 4, 16
|
242
|
+
entry_item_size 5, 16
|
243
|
+
entry_item_size 6, 16
|
244
|
+
entry_item_size 7, 16
|
245
|
+
end
|
246
|
+
|
247
|
+
class SharedObjectTable
|
248
|
+
include HintTable
|
249
|
+
|
250
|
+
header_item_size 1, 32
|
251
|
+
header_item_size 2, 32
|
252
|
+
header_item_size 3, 32
|
253
|
+
header_item_size 4, 32
|
254
|
+
header_item_size 5, 16
|
255
|
+
header_item_size 6, 32
|
256
|
+
header_item_size 7, 16
|
257
|
+
|
258
|
+
entry_item_size 1, 16
|
259
|
+
entry_item_size 2, 1
|
260
|
+
entry_item_size 3, 128
|
261
|
+
entry_item_size 4, 16
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
class InvalidHintStreamObjectError < InvalidStreamObjectError #:nodoc:
|
266
|
+
end
|
267
|
+
|
268
|
+
class HintStream < Stream
|
269
|
+
attr_accessor :page_offset_table
|
270
|
+
attr_accessor :shared_objects_table
|
271
|
+
attr_accessor :thumbnails_table
|
272
|
+
attr_accessor :outlines_table
|
273
|
+
attr_accessor :threads_table
|
274
|
+
attr_accessor :named_destinations_table
|
275
|
+
attr_accessor :interactive_forms_table
|
276
|
+
attr_accessor :information_dictionary_table
|
277
|
+
attr_accessor :logical_structure_table
|
278
|
+
attr_accessor :page_labels_table
|
279
|
+
attr_accessor :renditions_table
|
280
|
+
attr_accessor :embedded_files_table
|
281
|
+
|
282
|
+
field :S, :Type => Integer, :Required => true # Shared objects
|
283
|
+
field :T, :Type => Integer # Thumbnails
|
284
|
+
field :O, :Type => Integer # Outlines
|
285
|
+
field :A, :Type => Integer # Threads
|
286
|
+
field :E, :Type => Integer # Named destinations
|
287
|
+
field :V, :Type => Integer # Interactive forms
|
288
|
+
field :I, :Type => Integer # Information dictionary
|
289
|
+
field :C, :Type => Integer # Logical structure
|
290
|
+
field :L, :Type => Integer # Page labels
|
291
|
+
field :R, :Type => Integer # Renditions
|
292
|
+
field :B, :Type => Integer # Embedded files
|
293
|
+
|
294
|
+
def pre_build
|
295
|
+
if @page_offset_table.nil?
|
296
|
+
raise InvalidHintStreamObjectError, "No page offset hint table"
|
297
|
+
end
|
298
|
+
|
299
|
+
if @shared_objects_table.nil?
|
300
|
+
raise InvalidHintStreamObjectError, "No shared objects hint table"
|
301
|
+
end
|
302
|
+
|
303
|
+
@data = ""
|
304
|
+
save_table(@page_offset_table)
|
305
|
+
save_table(@shared_objects_table, :S)
|
306
|
+
save_table(@thumbnails_table, :T)
|
307
|
+
save_table(@outlines_table, :O)
|
308
|
+
save_table(@threads_table, :A)
|
309
|
+
save_table(@named_destinations_table, :E)
|
310
|
+
save_table(@interactive_forms_table, :V)
|
311
|
+
save_table(@information_dictionary_table, :I)
|
312
|
+
save_table(@logical_structure_table, :C)
|
313
|
+
save_table(@page_labels_table, :L)
|
314
|
+
save_table(@renditions_table, :R)
|
315
|
+
save_table(@embedded_files_table, :B)
|
316
|
+
|
317
|
+
super
|
318
|
+
end
|
319
|
+
|
320
|
+
private
|
321
|
+
|
322
|
+
def save_table(table, name = nil)
|
323
|
+
unless table.nil?
|
324
|
+
self[name] = @data.size if name
|
325
|
+
@data << table.to_s
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
This file is part of Origami, PDF manipulation framework for Ruby
|
4
|
+
Copyright (C) 2016 Guillaume Delugré.
|
5
|
+
|
6
|
+
Origami is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU Lesser General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Origami is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU Lesser General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU Lesser General Public License
|
17
|
+
along with Origami. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
=end
|
20
|
+
|
21
|
+
require 'rexml/document'
|
22
|
+
|
23
|
+
module Origami
|
24
|
+
|
25
|
+
class PDF
|
26
|
+
#
|
27
|
+
# Returns true if the document has a document information dictionary.
|
28
|
+
#
|
29
|
+
def document_info?
|
30
|
+
trailer_key? :Info
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# Returns the document information dictionary if present.
|
35
|
+
#
|
36
|
+
def document_info
|
37
|
+
trailer_key :Info
|
38
|
+
end
|
39
|
+
|
40
|
+
def title; get_document_info_field(:Title) end
|
41
|
+
def author; get_document_info_field(:Author) end
|
42
|
+
def subject; get_document_info_field(:Subject) end
|
43
|
+
def keywords; get_document_info_field(:Keywords) end
|
44
|
+
def creator; get_document_info_field(:Creator) end
|
45
|
+
def producer; get_document_info_field(:Producer) end
|
46
|
+
def creation_date; get_document_info_field(:CreationDate) end
|
47
|
+
def mod_date; get_document_info_field(:ModDate) end
|
48
|
+
|
49
|
+
#
|
50
|
+
# Returns true if the document has a catalog metadata stream.
|
51
|
+
#
|
52
|
+
def metadata?
|
53
|
+
self.Catalog.Metadata.is_a?(Stream)
|
54
|
+
end
|
55
|
+
|
56
|
+
#
|
57
|
+
# Returns a Hash of the information found in the metadata stream
|
58
|
+
#
|
59
|
+
def metadata
|
60
|
+
metadata_stm = self.Catalog.Metadata
|
61
|
+
|
62
|
+
if metadata_stm.is_a?(Stream)
|
63
|
+
doc = REXML::Document.new(metadata_stm.data)
|
64
|
+
info = {}
|
65
|
+
|
66
|
+
doc.elements.each('*/*/rdf:Description') do |description|
|
67
|
+
|
68
|
+
description.attributes.each_attribute do |attr|
|
69
|
+
case attr.prefix
|
70
|
+
when 'pdf','xap'
|
71
|
+
info[attr.name] = attr.value
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
description.elements.each('*') do |element|
|
76
|
+
value = (element.elements['.//rdf:li'] || element).text
|
77
|
+
info[element.name] = value.to_s
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
info
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
#
|
86
|
+
# Modifies or creates a metadata stream.
|
87
|
+
#
|
88
|
+
def create_metadata(info = {})
|
89
|
+
skeleton = <<-XMP
|
90
|
+
<?packet begin="\xef\xbb\xbf" id="W5M0MpCehiHzreSzNTczkc9d"?>
|
91
|
+
<x:xmpmeta xmlns:x="adobe:ns:meta/">
|
92
|
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
93
|
+
<rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
|
94
|
+
</rdf:Description>
|
95
|
+
</rdf:RDF>
|
96
|
+
</x:xmpmeta>
|
97
|
+
<?xpacket end="w"?>
|
98
|
+
XMP
|
99
|
+
|
100
|
+
xml =
|
101
|
+
if self.Catalog.Metadata.is_a?(Stream)
|
102
|
+
self.Catalog.Metadata.data
|
103
|
+
else
|
104
|
+
skeleton
|
105
|
+
end
|
106
|
+
|
107
|
+
doc = REXML::Document.new(xml)
|
108
|
+
desc = doc.elements['*/*/rdf:Description']
|
109
|
+
|
110
|
+
info.each do |name, value|
|
111
|
+
elt = REXML::Element.new "pdf:#{name}"
|
112
|
+
elt.text = value
|
113
|
+
|
114
|
+
desc.elements << elt
|
115
|
+
end
|
116
|
+
|
117
|
+
xml = ""; doc.write(xml, 4)
|
118
|
+
|
119
|
+
if self.Catalog.Metadata.is_a?(Stream)
|
120
|
+
self.Catalog.Metadata.data = xml
|
121
|
+
else
|
122
|
+
self.Catalog.Metadata = Stream.new(xml)
|
123
|
+
end
|
124
|
+
|
125
|
+
self.Catalog.Metadata
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
def get_document_info_field(field) #:nodoc:
|
131
|
+
if self.document_info?
|
132
|
+
doc_info = self.document_info
|
133
|
+
|
134
|
+
if doc_info.key?(field)
|
135
|
+
case obj = doc_info[field].solve
|
136
|
+
when String then obj.value
|
137
|
+
when Stream then obj.data
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
#
|
145
|
+
# Class representing an information Dictionary, containing title, author, date of creation and the like.
|
146
|
+
#
|
147
|
+
class Metadata < Dictionary
|
148
|
+
include StandardObject
|
149
|
+
|
150
|
+
field :Title, :Type => String, :Version => "1.1"
|
151
|
+
field :Author, :Type => String
|
152
|
+
field :Subject, :Type => String, :Version => "1.1"
|
153
|
+
field :Keywords, :Type => String, :Version => "1.1"
|
154
|
+
field :Creator, :Type => String
|
155
|
+
field :Producer, :Type => String
|
156
|
+
field :CreationDate, :Type => String
|
157
|
+
field :ModDate, :Type => String, :Version => "1.1"
|
158
|
+
field :Trapped, :Type => Name, :Default => :Unknown, :Version => "1.3"
|
159
|
+
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# Class representing a metadata Stream.
|
163
|
+
# This stream can contain the same information as the Metadata dictionary, but is storing in XML data.
|
164
|
+
#
|
165
|
+
class MetadataStream < Stream
|
166
|
+
include StandardObject
|
167
|
+
|
168
|
+
field :Type, :Type => Name, :Default => :Metadata, :Required => true
|
169
|
+
field :Subtype, :Type => Name, :Default =>:XML, :Required => true
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
data/lib/origami/name.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
This file is part of Origami, PDF manipulation framework for Ruby
|
4
|
+
Copyright (C) 2016 Guillaume Delugré.
|
5
|
+
|
6
|
+
Origami is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU Lesser General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Origami is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU Lesser General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU Lesser General Public License
|
17
|
+
along with Origami. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
=end
|
20
|
+
|
21
|
+
module Origami
|
22
|
+
|
23
|
+
REGULARCHARS = "([^ \\t\\r\\n\\0\\[\\]<>()%\\/]|#[a-fA-F0-9][a-fA-F0-9])*" #:nodoc:
|
24
|
+
|
25
|
+
class InvalidNameObjectError < InvalidObjectError #:nodoc:
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Class representing a Name Object.
|
30
|
+
# Name objects are strings which identify some PDF file inner structures.
|
31
|
+
#
|
32
|
+
class Name
|
33
|
+
include Origami::Object
|
34
|
+
include Comparable
|
35
|
+
|
36
|
+
TOKENS = %w{ / } #:nodoc:
|
37
|
+
|
38
|
+
@@regexp = Regexp.new(WHITESPACES + TOKENS.first + "(?<name>#{REGULARCHARS})" + WHITESPACES) #:nodoc
|
39
|
+
|
40
|
+
#
|
41
|
+
# Creates a new Name.
|
42
|
+
# _name_:: A symbol representing the new Name value.
|
43
|
+
#
|
44
|
+
def initialize(name = "")
|
45
|
+
unless name.is_a?(Symbol) or name.is_a?(::String)
|
46
|
+
raise TypeError, "Expected type Symbol or String, received #{name.class}."
|
47
|
+
end
|
48
|
+
|
49
|
+
@value = name.to_s
|
50
|
+
|
51
|
+
super()
|
52
|
+
end
|
53
|
+
|
54
|
+
def value
|
55
|
+
@value.to_sym
|
56
|
+
end
|
57
|
+
alias to_sym value
|
58
|
+
|
59
|
+
def <=>(name)
|
60
|
+
return unless name.is_a?(Name)
|
61
|
+
|
62
|
+
self.value <=> name.value
|
63
|
+
end
|
64
|
+
|
65
|
+
def ==(object) #:nodoc:
|
66
|
+
self.eql?(object) or @value.to_sym == object
|
67
|
+
end
|
68
|
+
|
69
|
+
def eql?(object) #:nodoc:
|
70
|
+
object.is_a?(Name) and self.value.eql?(object.value)
|
71
|
+
end
|
72
|
+
|
73
|
+
def hash #:nodoc:
|
74
|
+
@value.hash
|
75
|
+
end
|
76
|
+
|
77
|
+
def to_s(eol: $/) #:nodoc:
|
78
|
+
super(TOKENS.first + Name.expand(@value), eol: eol)
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.parse(stream, _parser = nil) #:nodoc:
|
82
|
+
scanner = Parser.init_scanner(stream)
|
83
|
+
offset = scanner.pos
|
84
|
+
|
85
|
+
name =
|
86
|
+
if scanner.scan(@@regexp).nil?
|
87
|
+
raise InvalidNameObjectError, "Bad name format"
|
88
|
+
else
|
89
|
+
value = scanner['name']
|
90
|
+
|
91
|
+
Name.new(value.include?('#') ? contract(value) : value)
|
92
|
+
end
|
93
|
+
|
94
|
+
name.file_offset = offset
|
95
|
+
|
96
|
+
name
|
97
|
+
end
|
98
|
+
|
99
|
+
def self.contract(name) #:nodoc:
|
100
|
+
i = 0
|
101
|
+
name = name.dup
|
102
|
+
|
103
|
+
while i < name.length
|
104
|
+
if name[i] == "#"
|
105
|
+
digits = name[i+1, 2]
|
106
|
+
|
107
|
+
unless digits =~ /^[A-Za-z0-9]{2}$/
|
108
|
+
raise InvalidNameObjectError, "Irregular use of # token"
|
109
|
+
end
|
110
|
+
|
111
|
+
char = digits.hex.chr
|
112
|
+
|
113
|
+
if char == "\0"
|
114
|
+
raise InvalidNameObjectError, "Null byte forbidden inside name definition"
|
115
|
+
end
|
116
|
+
|
117
|
+
name[i, 3] = char
|
118
|
+
end
|
119
|
+
|
120
|
+
i = i + 1
|
121
|
+
end
|
122
|
+
|
123
|
+
name
|
124
|
+
end
|
125
|
+
|
126
|
+
def self.expand(name) #:nodoc:
|
127
|
+
forbiddenchars = /[ #\t\r\n\0\[\]<>()%\/]/
|
128
|
+
|
129
|
+
name.gsub(forbiddenchars) do |c|
|
130
|
+
"#" + c.ord.to_s(16).rjust(2,"0")
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
data/lib/origami/null.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
=begin
|
2
|
+
|
3
|
+
This file is part of Origami, PDF manipulation framework for Ruby
|
4
|
+
Copyright (C) 2016 Guillaume Delugré.
|
5
|
+
|
6
|
+
Origami is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU Lesser General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Origami is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU Lesser General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU Lesser General Public License
|
17
|
+
along with Origami. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
=end
|
20
|
+
|
21
|
+
module Origami
|
22
|
+
|
23
|
+
class InvalidNullObjectError < InvalidObjectError #:nodoc:
|
24
|
+
end
|
25
|
+
|
26
|
+
#
|
27
|
+
# Class representing Null Object.
|
28
|
+
#
|
29
|
+
class Null
|
30
|
+
include Origami::Object
|
31
|
+
|
32
|
+
TOKENS = %w{ null } #:nodoc:
|
33
|
+
@@regexp = Regexp.new(WHITESPACES + TOKENS.first)
|
34
|
+
|
35
|
+
def initialize
|
36
|
+
super
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.parse(stream, _parser = nil) #:nodoc:
|
40
|
+
scanner = Parser.init_scanner(stream)
|
41
|
+
offset = scanner.pos
|
42
|
+
|
43
|
+
if scanner.skip(@@regexp).nil?
|
44
|
+
raise InvalidNullObjectError
|
45
|
+
end
|
46
|
+
|
47
|
+
null = Null.new
|
48
|
+
null.file_offset = offset
|
49
|
+
|
50
|
+
null
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Returns *nil*.
|
55
|
+
#
|
56
|
+
def value
|
57
|
+
nil
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_s(eol: $/) #:nodoc:
|
61
|
+
super(TOKENS.first, eol: eol)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|