origami 2.0.3 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/pdf2ruby +6 -17
- data/bin/pdfcop +46 -23
- data/lib/origami/graphics/xobject.rb +4 -4
- data/lib/origami/parser.rb +20 -20
- data/lib/origami/pdf.rb +4 -4
- data/lib/origami/trailer.rb +2 -2
- data/lib/origami/version.rb +1 -1
- data/test/test_objects.rb +33 -0
- data/test/test_pdf.rb +1 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67f206ada90cab96c6551107f6e1b7c882608a9c
|
4
|
+
data.tar.gz: c64e6272dd9512390470c9e5c56454f569162913
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a5335917cb27a2fba1c7e36cd805b924d225d436b62be76411d43c51102c651eb5cbdc931cf12bfd4de206e93a421afcf01edae11b20816a4d6815d06b7aba3f
|
7
|
+
data.tar.gz: aa3eaf290b1c5ece1b06630dcb1440e81403865d753355dbccac57ba7f395486eb2bd3b14ba0a96baa8b15c2d4666072d37dead6a54de7c44a40d212ebb67bb8
|
data/bin/pdf2ruby
CHANGED
@@ -96,10 +96,8 @@ def objectToRuby(obj, inclevel = 0, internalname = nil, do_convert = false)
|
|
96
96
|
case obj
|
97
97
|
when Origami::Null
|
98
98
|
"Null.new"
|
99
|
-
when Origami::Boolean, Origami::Number
|
100
|
-
obj
|
101
|
-
when Origami::String
|
102
|
-
obj.inspect
|
99
|
+
when Origami::Boolean, Origami::Number, Origami::Name, Origami::String
|
100
|
+
literalToRuby(obj)
|
103
101
|
when Origami::Dictionary
|
104
102
|
customclass = nil
|
105
103
|
if obj.class != Origami::Dictionary
|
@@ -111,8 +109,6 @@ def objectToRuby(obj, inclevel = 0, internalname = nil, do_convert = false)
|
|
111
109
|
arrayToRuby(obj, inclevel, internalname)
|
112
110
|
when Origami::Stream
|
113
111
|
streamToRuby(obj, internalname) unless obj.is_a?(ObjectStream) or obj.is_a?(XRefStream)
|
114
|
-
when Origami::Name
|
115
|
-
nameToRuby(obj)
|
116
112
|
when Origami::Reference
|
117
113
|
referenceToRuby(obj, internalname)
|
118
114
|
else
|
@@ -144,15 +140,8 @@ def referenceToRuby(ref, internalname)
|
|
144
140
|
end
|
145
141
|
end
|
146
142
|
|
147
|
-
def
|
148
|
-
|
149
|
-
valid = (name.value.to_s =~ /[+.:-]/).nil?
|
150
|
-
|
151
|
-
code << '"' unless valid
|
152
|
-
code << name.value.to_s
|
153
|
-
code << '"' unless valid
|
154
|
-
|
155
|
-
code
|
143
|
+
def literalToRuby(obj)
|
144
|
+
obj.value.inspect
|
156
145
|
end
|
157
146
|
|
158
147
|
def arrayToRuby(arr, inclevel, internalname)
|
@@ -180,7 +169,7 @@ def dictionaryToRuby(dict, inclevel, internalname, customtype = nil)
|
|
180
169
|
else
|
181
170
|
code << "{\n"
|
182
171
|
dict.each_pair do |key, val|
|
183
|
-
rubyname =
|
172
|
+
rubyname = literalToRuby(key)
|
184
173
|
subintname = "#{internalname}[#{rubyname}]"
|
185
174
|
|
186
175
|
if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
|
@@ -212,7 +201,7 @@ def dictionaryToHashMap(dict, inclevel, internalname)
|
|
212
201
|
i = 0
|
213
202
|
code = "\n"
|
214
203
|
dict.each_pair do |key, val|
|
215
|
-
rubyname =
|
204
|
+
rubyname = literalToRuby(key)
|
216
205
|
subintname = "#{internalname}[#{rubyname}]"
|
217
206
|
|
218
207
|
if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
|
data/bin/pdfcop
CHANGED
@@ -36,7 +36,8 @@ end
|
|
36
36
|
require 'optparse'
|
37
37
|
require 'yaml'
|
38
38
|
require 'rexml/document'
|
39
|
-
require 'digest/
|
39
|
+
require 'digest/sha2'
|
40
|
+
require 'fileutils'
|
40
41
|
require 'colorize'
|
41
42
|
|
42
43
|
DEFAULT_CONFIG_FILE = "#{File.dirname(__FILE__)}/config/pdfcop.conf.yml"
|
@@ -74,6 +75,10 @@ USAGE
|
|
74
75
|
options[:policy] = policy
|
75
76
|
end
|
76
77
|
|
78
|
+
opts.on("-m", "--move PATH", "Move rejected documents to the specified directory.") do |dir|
|
79
|
+
options[:move_dir] = dir
|
80
|
+
end
|
81
|
+
|
77
82
|
opts.on("-P", "--password PASSWORD", "Password to use if the document is encrypted") do |passwd|
|
78
83
|
options[:password] = passwd
|
79
84
|
end
|
@@ -95,20 +100,24 @@ USAGE
|
|
95
100
|
end
|
96
101
|
|
97
102
|
@options = OptParser.parse(ARGV)
|
98
|
-
if @options.
|
103
|
+
if @options.key?(:output_log)
|
99
104
|
LOGGER = File.open(@options[:output_log], "a+")
|
100
105
|
else
|
101
106
|
LOGGER = STDOUT
|
102
107
|
end
|
103
108
|
|
104
|
-
if not @options.
|
109
|
+
if not @options.key?(:policy)
|
105
110
|
@options[:policy] = DEFAULT_POLICY
|
106
111
|
end
|
107
112
|
|
113
|
+
if @options.key?(:move_dir) and not File.directory?(@options[:move_dir])
|
114
|
+
abort "Error: #{@options[:move_dir]} is not a valid directory."
|
115
|
+
end
|
116
|
+
|
108
117
|
String.disable_colorization @options[:disable_colors]
|
109
118
|
|
110
119
|
load_config_file(@options[:config_file] || DEFAULT_CONFIG_FILE)
|
111
|
-
unless SECURITY_POLICIES.
|
120
|
+
unless SECURITY_POLICIES.key?("POLICY_#{@options[:policy].upcase}")
|
112
121
|
abort "Undeclared policy `#{@options[:policy]}'"
|
113
122
|
end
|
114
123
|
|
@@ -124,9 +133,23 @@ end
|
|
124
133
|
|
125
134
|
def reject(cause)
|
126
135
|
log("Document rejected by policy `#{@options[:policy]}', caused by #{cause.inspect}.", :red)
|
136
|
+
|
137
|
+
if @options.key?(:move_dir)
|
138
|
+
quarantine(TARGET, @options[:move_dir])
|
139
|
+
end
|
140
|
+
|
127
141
|
abort
|
128
142
|
end
|
129
143
|
|
144
|
+
def quarantine(file, quarantine_folder)
|
145
|
+
digest = Digest::SHA256.file(TARGET)
|
146
|
+
ext = File.extname(TARGET)
|
147
|
+
dest_name = "#{File.basename(TARGET, ext)}_#{digest}#{ext}"
|
148
|
+
dest_path = File.join(@options[:move_dir], dest_name)
|
149
|
+
|
150
|
+
FileUtils.move(TARGET, dest_path)
|
151
|
+
end
|
152
|
+
|
130
153
|
def check_rights(*required_rights)
|
131
154
|
current_rights = SECURITY_POLICIES["POLICY_#{@options[:policy].upcase}"]
|
132
155
|
|
@@ -165,7 +188,7 @@ end
|
|
165
188
|
def analyze_annotation(annot, _level = 0)
|
166
189
|
check_rights(:allowAnnotations)
|
167
190
|
|
168
|
-
if annot.is_a?(Origami::Dictionary) and annot.
|
191
|
+
if annot.is_a?(Origami::Dictionary) and annot.key?(:Subtype)
|
169
192
|
case annot[:Subtype].solve.value
|
170
193
|
when :FileAttachment
|
171
194
|
check_rights(:allowAttachments, :allowFileAttachmentAnnotation)
|
@@ -186,7 +209,7 @@ def analyze_annotation(annot, _level = 0)
|
|
186
209
|
check_rights(:allow3DAnnotation)
|
187
210
|
|
188
211
|
# 3D annotation might pull in JavaScript for real-time driven behavior.
|
189
|
-
if annot.
|
212
|
+
if annot.key?(:"3DD")
|
190
213
|
dd = annot[:"3DD"].solve
|
191
214
|
u3dstream = nil
|
192
215
|
|
@@ -200,7 +223,7 @@ def analyze_annotation(annot, _level = 0)
|
|
200
223
|
if u3dstream and u3dstream.key?(:OnInstantiate)
|
201
224
|
check_rights(:allowJS)
|
202
225
|
|
203
|
-
if annot.
|
226
|
+
if annot.key?(:"3DA") # is 3d view instantiated automatically?
|
204
227
|
u3dactiv = annot[:"3DA"].solve
|
205
228
|
|
206
229
|
check_rights(:allowJSAtOpening) if u3dactiv.is_a?(Origami::Dictionary) and (u3dactiv[:A] == :PO or u3dactiv[:A] == :PV)
|
@@ -223,13 +246,13 @@ def analyze_page(page, level = 0)
|
|
223
246
|
#
|
224
247
|
# Checking page additional actions.
|
225
248
|
#
|
226
|
-
if page.
|
249
|
+
if page.key?(:AA)
|
227
250
|
if page.AA.is_a?(Origami::Dictionary)
|
228
251
|
log(text_prefix + " Page has an action dictionary.")
|
229
252
|
|
230
253
|
aa = Origami::Page::AdditionalActions.new(page.AA); aa.parent = page.AA.parent
|
231
|
-
analyze_action(aa.O, true, level + 1) if aa.
|
232
|
-
analyze_action(aa.C, false, level + 1) if aa.
|
254
|
+
analyze_action(aa.O, true, level + 1) if aa.key?(:O)
|
255
|
+
analyze_action(aa.C, false, level + 1) if aa.key?(:C)
|
233
256
|
end
|
234
257
|
end
|
235
258
|
|
@@ -280,7 +303,7 @@ def analyze_action(action, triggered_at_opening, level = 0)
|
|
280
303
|
check_rights(:allowGoToRAction)
|
281
304
|
|
282
305
|
when :Thread
|
283
|
-
check_rights(:allowGoToRAction) if action.
|
306
|
+
check_rights(:allowGoToRAction) if action.key?(:F)
|
284
307
|
|
285
308
|
when :URI
|
286
309
|
check_rights(:allowURIAction)
|
@@ -307,7 +330,7 @@ def analyze_action(action, triggered_at_opening, level = 0)
|
|
307
330
|
check_rights(:allow3DAnnotation,:allowGoTo3DAction)
|
308
331
|
end
|
309
332
|
|
310
|
-
if action.
|
333
|
+
if action.key?(:Next)
|
311
334
|
log(text_prefix + "This action is chained to another action!")
|
312
335
|
check_rights(:allowChainedActions)
|
313
336
|
analyze_action(action.Next)
|
@@ -329,7 +352,7 @@ end
|
|
329
352
|
begin
|
330
353
|
log("PDFcop is running on target `#{TARGET}', policy = `#{@options[:policy]}'", :green)
|
331
354
|
log(" File size: #{File.size(TARGET)} bytes", :magenta)
|
332
|
-
log("
|
355
|
+
log(" SHA256: #{Digest::SHA256.file(TARGET)}", :magenta)
|
333
356
|
|
334
357
|
@pdf = Origami::PDF.read(TARGET,
|
335
358
|
verbosity: Origami::Parser::VERBOSE_QUIET,
|
@@ -349,31 +372,31 @@ begin
|
|
349
372
|
catalog = @pdf.Catalog
|
350
373
|
reject("Invalid document catalog") unless catalog.is_a?(Origami::Catalog)
|
351
374
|
|
352
|
-
if catalog.
|
375
|
+
if catalog.key?(:OpenAction)
|
353
376
|
log(" . OpenAction entry = YES")
|
354
377
|
check_rights(:allowOpenAction)
|
355
378
|
action = catalog.OpenAction
|
356
379
|
analyze_action(action, true, 1)
|
357
380
|
end
|
358
381
|
|
359
|
-
if catalog.
|
382
|
+
if catalog.key?(:AA)
|
360
383
|
if catalog.AA.is_a?(Origami::Dictionary)
|
361
384
|
aa = Origami::CatalogAdditionalActions.new(catalog.AA); aa.parent = catalog;
|
362
385
|
log(" . Additional actions dictionary = YES")
|
363
|
-
analyze_action(aa.WC, false, 1) if aa.
|
364
|
-
analyze_action(aa.WS, false, 1) if aa.
|
365
|
-
analyze_action(aa.DS, false, 1) if aa.
|
366
|
-
analyze_action(aa.WP, false, 1) if aa.
|
367
|
-
analyze_action(aa.DP, false, 1) if aa.
|
386
|
+
analyze_action(aa.WC, false, 1) if aa.key?(:WC)
|
387
|
+
analyze_action(aa.WS, false, 1) if aa.key?(:WS)
|
388
|
+
analyze_action(aa.DS, false, 1) if aa.key?(:DS)
|
389
|
+
analyze_action(aa.WP, false, 1) if aa.key?(:WP)
|
390
|
+
analyze_action(aa.DP, false, 1) if aa.key?(:DP)
|
368
391
|
end
|
369
392
|
end
|
370
393
|
|
371
|
-
if catalog.
|
394
|
+
if catalog.key?(:AcroForm)
|
372
395
|
acroform = catalog.AcroForm
|
373
396
|
if acroform.is_a?(Origami::Dictionary)
|
374
397
|
log(" . AcroForm = YES")
|
375
398
|
check_rights(:allowAcroForms)
|
376
|
-
if acroform.
|
399
|
+
if acroform.key?(:XFA)
|
377
400
|
log(" . XFA = YES")
|
378
401
|
check_rights(:allowXFAForms)
|
379
402
|
|
@@ -400,7 +423,7 @@ begin
|
|
400
423
|
|
401
424
|
log("> Inspecting document streams...", :yellow)
|
402
425
|
@pdf.indirect_objects.find_all{|obj| obj.is_a?(Origami::Stream)}.each do |stream|
|
403
|
-
if stream.dictionary.
|
426
|
+
if stream.dictionary.key?(:Filter)
|
404
427
|
filters = stream.Filter
|
405
428
|
filters = [ filters ] if filters.is_a?(Origami::Name)
|
406
429
|
|
@@ -659,22 +659,22 @@ module Origami
|
|
659
659
|
data = fd.read
|
660
660
|
else
|
661
661
|
data = File.binread(File.expand_path(path))
|
662
|
-
format ||= File.extname(path)
|
662
|
+
format ||= File.extname(path)[1..-1]
|
663
663
|
end
|
664
664
|
|
665
665
|
image = ImageXObject.new
|
666
666
|
|
667
667
|
raise ArgumentError, "Missing file format" if format.nil?
|
668
668
|
case format.downcase
|
669
|
-
when '
|
669
|
+
when 'jpg', 'jpeg', 'jpe', 'jif', 'jfif', 'jfi'
|
670
670
|
image.setFilter :DCTDecode
|
671
671
|
image.encoded_data = data
|
672
672
|
|
673
|
-
when '
|
673
|
+
when 'jp2','jpx','j2k','jpf','jpm','mj2'
|
674
674
|
image.setFilter :JPXDecode
|
675
675
|
image.encoded_data = data
|
676
676
|
|
677
|
-
when '.
|
677
|
+
when '.b2', 'jbig', 'jbig2'
|
678
678
|
image.setFilter :JBIG2Decode
|
679
679
|
image.encoded_data = data
|
680
680
|
else
|
data/lib/origami/parser.rb
CHANGED
@@ -188,26 +188,6 @@ module Origami
|
|
188
188
|
@data.string.dup if @data
|
189
189
|
end
|
190
190
|
|
191
|
-
private
|
192
|
-
|
193
|
-
#
|
194
|
-
# Attempt to promote an object using the deferred casts.
|
195
|
-
#
|
196
|
-
def try_object_promotion(obj)
|
197
|
-
return obj unless Origami::OPTIONS[:enable_type_propagation] and @deferred_casts.key?(obj.reference)
|
198
|
-
|
199
|
-
types = @deferred_casts[obj.reference]
|
200
|
-
types = [ types ] unless types.is_a?(::Array)
|
201
|
-
|
202
|
-
# Promote object if a compatible type is found.
|
203
|
-
cast_type = types.find {|type| type < obj.class }
|
204
|
-
if cast_type
|
205
|
-
obj = obj.cast_to(cast_type, self)
|
206
|
-
else
|
207
|
-
obj
|
208
|
-
end
|
209
|
-
end
|
210
|
-
|
211
191
|
def error(msg = "") #:nodoc:
|
212
192
|
log(VERBOSE_QUIET, 'error', :red, msg.red)
|
213
193
|
end
|
@@ -228,6 +208,26 @@ module Origami
|
|
228
208
|
log(VERBOSE_TRACE, 'trace', :cyan, msg)
|
229
209
|
end
|
230
210
|
|
211
|
+
private
|
212
|
+
|
213
|
+
#
|
214
|
+
# Attempt to promote an object using the deferred casts.
|
215
|
+
#
|
216
|
+
def try_object_promotion(obj)
|
217
|
+
return obj unless Origami::OPTIONS[:enable_type_propagation] and @deferred_casts.key?(obj.reference)
|
218
|
+
|
219
|
+
types = @deferred_casts[obj.reference]
|
220
|
+
types = [ types ] unless types.is_a?(::Array)
|
221
|
+
|
222
|
+
# Promote object if a compatible type is found.
|
223
|
+
cast_type = types.find {|type| type < obj.class }
|
224
|
+
if cast_type
|
225
|
+
obj = obj.cast_to(cast_type, self)
|
226
|
+
else
|
227
|
+
obj
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
231
|
def log(level, prefix, color, message) #:nodoc:
|
232
232
|
return unless @options[:verbosity] >= level
|
233
233
|
|
data/lib/origami/pdf.rb
CHANGED
@@ -543,7 +543,7 @@ module Origami
|
|
543
543
|
#
|
544
544
|
# Iterates over the children of an object, avoiding cycles.
|
545
545
|
#
|
546
|
-
def walk_object(object, excludes: [])
|
546
|
+
def walk_object(object, excludes: [], &block)
|
547
547
|
return enum_for(__method__, object, excludes: excludes) unless block_given?
|
548
548
|
|
549
549
|
return if excludes.include?(object)
|
@@ -553,18 +553,18 @@ module Origami
|
|
553
553
|
when Dictionary
|
554
554
|
object.each_value do |value|
|
555
555
|
yield(value)
|
556
|
-
walk_object(value, excludes: excludes)
|
556
|
+
walk_object(value, excludes: excludes, &block)
|
557
557
|
end
|
558
558
|
|
559
559
|
when Array
|
560
560
|
object.each do |child|
|
561
561
|
yield(child)
|
562
|
-
walk_object(child, excludes: excludes)
|
562
|
+
walk_object(child, excludes: excludes, &block)
|
563
563
|
end
|
564
564
|
|
565
565
|
when Stream
|
566
566
|
yield(object.dictionary)
|
567
|
-
walk_object(object.dictionary, excludes: excludes)
|
567
|
+
walk_object(object.dictionary, excludes: excludes, &block)
|
568
568
|
end
|
569
569
|
end
|
570
570
|
|
data/lib/origami/trailer.rb
CHANGED
@@ -123,13 +123,13 @@ module Origami
|
|
123
123
|
end
|
124
124
|
|
125
125
|
if not stream.scan(@@regexp_xref)
|
126
|
-
|
126
|
+
raise InvalidTrailerError, "Cannot get startxref value"
|
127
127
|
end
|
128
128
|
|
129
129
|
startxref = stream['startxref'].to_i
|
130
130
|
|
131
131
|
if not stream.scan(@@regexp_close)
|
132
|
-
|
132
|
+
parser.warn("No %%EOF token found") if parser
|
133
133
|
end
|
134
134
|
|
135
135
|
Trailer.new(startxref, dictionary)
|
data/lib/origami/version.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
class TestPDFObjects < Minitest::Test
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@pdf = PDF.new.append_page
|
8
|
+
@contents = ContentStream.new("abc")
|
9
|
+
@pdf.pages.first.Contents = @contents
|
10
|
+
@pdf.Catalog.Loop = @pdf.Catalog
|
11
|
+
@pdf.save StringIO.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_pdf_object_tree
|
15
|
+
assert_instance_of Catalog, @pdf.Catalog
|
16
|
+
assert_nil @pdf.Catalog.parent
|
17
|
+
|
18
|
+
@pdf.each_object(recursive: true) do |obj|
|
19
|
+
assert_kind_of Origami::Object, obj
|
20
|
+
assert_equal obj.document, @pdf
|
21
|
+
|
22
|
+
unless obj.indirect?
|
23
|
+
assert_kind_of Origami::Object, obj.parent
|
24
|
+
assert_equal obj.parent.document, @pdf
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
enum = @pdf.each_object(recursive: true)
|
29
|
+
assert_kind_of Enumerator, enum
|
30
|
+
assert enum.include?(@pdf.Catalog.Pages)
|
31
|
+
assert enum.include?(@contents.dictionary)
|
32
|
+
end
|
33
|
+
end
|
data/test/test_pdf.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: origami
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Guillaume Delugré
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colorize
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '5.0'
|
41
41
|
description: Origami is a pure Ruby library to parse, modify and generate PDF documents.
|
42
|
-
email:
|
42
|
+
email: origami@subvert.technology
|
43
43
|
executables:
|
44
44
|
- pdfsh
|
45
45
|
- pdfwalker
|
@@ -194,6 +194,7 @@ files:
|
|
194
194
|
- test/test_actions.rb
|
195
195
|
- test/test_annotations.rb
|
196
196
|
- test/test_forms.rb
|
197
|
+
- test/test_objects.rb
|
197
198
|
- test/test_pages.rb
|
198
199
|
- test/test_pdf.rb
|
199
200
|
- test/test_pdf_attachment.rb
|
@@ -225,7 +226,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
225
226
|
requirements:
|
226
227
|
- gtk2 to run the graphical interface
|
227
228
|
rubyforge_project:
|
228
|
-
rubygems_version: 2.6.
|
229
|
+
rubygems_version: 2.6.13
|
229
230
|
signing_key:
|
230
231
|
specification_version: 4
|
231
232
|
summary: Ruby framework to manipulate PDF documents
|