origami-docspring 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +70 -0
  3. data/COPYING.LESSER +165 -0
  4. data/README.md +115 -0
  5. data/bin/config/pdfcop.conf.yml +236 -0
  6. data/bin/pdf2pdfa +87 -0
  7. data/bin/pdf2ruby +333 -0
  8. data/bin/pdfcop +474 -0
  9. data/bin/pdfdecompress +97 -0
  10. data/bin/pdfdecrypt +91 -0
  11. data/bin/pdfencrypt +113 -0
  12. data/bin/pdfexplode +223 -0
  13. data/bin/pdfextract +277 -0
  14. data/bin/pdfmetadata +144 -0
  15. data/bin/pdfsh +12 -0
  16. data/bin/shell/console.rb +128 -0
  17. data/bin/shell/hexdump.rb +59 -0
  18. data/bin/shell/irbrc +69 -0
  19. data/examples/README.md +34 -0
  20. data/examples/attachments/attachment.rb +38 -0
  21. data/examples/attachments/nested_document.rb +51 -0
  22. data/examples/encryption/encryption.rb +28 -0
  23. data/examples/events/events.rb +72 -0
  24. data/examples/flash/flash.rb +37 -0
  25. data/examples/flash/helloworld.swf +0 -0
  26. data/examples/forms/javascript.rb +54 -0
  27. data/examples/forms/xfa.rb +115 -0
  28. data/examples/javascript/hello_world.rb +22 -0
  29. data/examples/javascript/js_emulation.rb +54 -0
  30. data/examples/loop/goto.rb +32 -0
  31. data/examples/loop/named.rb +33 -0
  32. data/examples/signature/signature.rb +65 -0
  33. data/examples/uri/javascript.rb +56 -0
  34. data/examples/uri/open-uri.rb +21 -0
  35. data/examples/uri/submitform.rb +47 -0
  36. data/lib/origami/3d.rb +364 -0
  37. data/lib/origami/acroform.rb +321 -0
  38. data/lib/origami/actions.rb +318 -0
  39. data/lib/origami/annotations.rb +711 -0
  40. data/lib/origami/array.rb +242 -0
  41. data/lib/origami/boolean.rb +90 -0
  42. data/lib/origami/catalog.rb +418 -0
  43. data/lib/origami/collections.rb +144 -0
  44. data/lib/origami/compound.rb +161 -0
  45. data/lib/origami/destinations.rb +252 -0
  46. data/lib/origami/dictionary.rb +192 -0
  47. data/lib/origami/encryption.rb +1085 -0
  48. data/lib/origami/extensions/fdf.rb +347 -0
  49. data/lib/origami/extensions/ppklite.rb +422 -0
  50. data/lib/origami/filespec.rb +197 -0
  51. data/lib/origami/filters/ascii.rb +211 -0
  52. data/lib/origami/filters/ccitt/tables.rb +267 -0
  53. data/lib/origami/filters/ccitt.rb +357 -0
  54. data/lib/origami/filters/crypt.rb +38 -0
  55. data/lib/origami/filters/dct.rb +54 -0
  56. data/lib/origami/filters/flate.rb +69 -0
  57. data/lib/origami/filters/jbig2.rb +57 -0
  58. data/lib/origami/filters/jpx.rb +47 -0
  59. data/lib/origami/filters/lzw.rb +170 -0
  60. data/lib/origami/filters/predictors.rb +292 -0
  61. data/lib/origami/filters/runlength.rb +129 -0
  62. data/lib/origami/filters.rb +364 -0
  63. data/lib/origami/font.rb +196 -0
  64. data/lib/origami/functions.rb +79 -0
  65. data/lib/origami/graphics/colors.rb +230 -0
  66. data/lib/origami/graphics/instruction.rb +98 -0
  67. data/lib/origami/graphics/path.rb +182 -0
  68. data/lib/origami/graphics/patterns.rb +174 -0
  69. data/lib/origami/graphics/render.rb +62 -0
  70. data/lib/origami/graphics/state.rb +149 -0
  71. data/lib/origami/graphics/text.rb +225 -0
  72. data/lib/origami/graphics/xobject.rb +918 -0
  73. data/lib/origami/graphics.rb +38 -0
  74. data/lib/origami/header.rb +75 -0
  75. data/lib/origami/javascript.rb +713 -0
  76. data/lib/origami/linearization.rb +330 -0
  77. data/lib/origami/metadata.rb +172 -0
  78. data/lib/origami/name.rb +135 -0
  79. data/lib/origami/null.rb +65 -0
  80. data/lib/origami/numeric.rb +181 -0
  81. data/lib/origami/obfuscation.rb +245 -0
  82. data/lib/origami/object.rb +760 -0
  83. data/lib/origami/optionalcontent.rb +183 -0
  84. data/lib/origami/outline.rb +54 -0
  85. data/lib/origami/outputintents.rb +85 -0
  86. data/lib/origami/page.rb +722 -0
  87. data/lib/origami/parser.rb +269 -0
  88. data/lib/origami/parsers/fdf.rb +56 -0
  89. data/lib/origami/parsers/pdf/lazy.rb +176 -0
  90. data/lib/origami/parsers/pdf/linear.rb +122 -0
  91. data/lib/origami/parsers/pdf.rb +118 -0
  92. data/lib/origami/parsers/ppklite.rb +57 -0
  93. data/lib/origami/pdf.rb +1108 -0
  94. data/lib/origami/reference.rb +134 -0
  95. data/lib/origami/signature.rb +702 -0
  96. data/lib/origami/stream.rb +705 -0
  97. data/lib/origami/string.rb +444 -0
  98. data/lib/origami/template/patterns.rb +56 -0
  99. data/lib/origami/template/widgets.rb +151 -0
  100. data/lib/origami/trailer.rb +190 -0
  101. data/lib/origami/tree.rb +62 -0
  102. data/lib/origami/version.rb +23 -0
  103. data/lib/origami/webcapture.rb +100 -0
  104. data/lib/origami/xfa/config.rb +453 -0
  105. data/lib/origami/xfa/connectionset.rb +146 -0
  106. data/lib/origami/xfa/datasets.rb +49 -0
  107. data/lib/origami/xfa/localeset.rb +42 -0
  108. data/lib/origami/xfa/package.rb +59 -0
  109. data/lib/origami/xfa/pdf.rb +73 -0
  110. data/lib/origami/xfa/signature.rb +42 -0
  111. data/lib/origami/xfa/sourceset.rb +43 -0
  112. data/lib/origami/xfa/stylesheet.rb +44 -0
  113. data/lib/origami/xfa/template.rb +1691 -0
  114. data/lib/origami/xfa/xdc.rb +42 -0
  115. data/lib/origami/xfa/xfa.rb +146 -0
  116. data/lib/origami/xfa/xfdf.rb +43 -0
  117. data/lib/origami/xfa/xmpmeta.rb +43 -0
  118. data/lib/origami/xfa.rb +62 -0
  119. data/lib/origami/xreftable.rb +557 -0
  120. data/lib/origami.rb +47 -0
  121. data/test/dataset/calc.pdf +85 -0
  122. data/test/dataset/crypto.pdf +82 -0
  123. data/test/dataset/empty.pdf +49 -0
  124. data/test/test_actions.rb +27 -0
  125. data/test/test_annotations.rb +68 -0
  126. data/test/test_forms.rb +30 -0
  127. data/test/test_native_types.rb +83 -0
  128. data/test/test_object_tree.rb +33 -0
  129. data/test/test_pages.rb +60 -0
  130. data/test/test_pdf.rb +20 -0
  131. data/test/test_pdf_attachment.rb +34 -0
  132. data/test/test_pdf_create.rb +24 -0
  133. data/test/test_pdf_encrypt.rb +95 -0
  134. data/test/test_pdf_parse.rb +134 -0
  135. data/test/test_pdf_parse_lazy.rb +69 -0
  136. data/test/test_pdf_sign.rb +97 -0
  137. data/test/test_streams.rb +184 -0
  138. data/test/test_xrefs.rb +67 -0
  139. metadata +243 -0
data/bin/pdf2ruby ADDED
@@ -0,0 +1,333 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ =begin
4
+
5
+ = Info
6
+ Convert a PDF document to an Origami script.
7
+ Experimental.
8
+
9
+ = License:
10
+ Copyright (C) 2016 Guillaume Delugré.
11
+
12
+ Origami is free software: you can redistribute it and/or modify
13
+ it under the terms of the GNU Lesser General Public License as published by
14
+ the Free Software Foundation, either version 3 of the License, or
15
+ (at your option) any later version.
16
+
17
+ Origami is distributed in the hope that it will be useful,
18
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
+ GNU Lesser General Public License for more details.
21
+
22
+ You should have received a copy of the GNU Lesser General Public License
23
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
24
+
25
+ =end
26
+
27
+ require 'optparse'
28
+ require 'fileutils'
29
+ require 'colorize'
30
+
31
+ begin
32
+ require 'origami'
33
+ rescue LoadError
34
+ $: << File.join(__dir__, '../lib')
35
+ require 'origami'
36
+ end
37
+ include Origami
38
+
39
+ @var_hash = {}
40
+ @code_hash = {}
41
+ @obj_route = []
42
+ @current_idx = nil
43
+
44
+ class OptParser
45
+ def self.parse(args)
46
+ options = {}
47
+ options[:verbose] =
48
+ options[:xstreams] = false
49
+
50
+ parser = OptionParser.new do |opts|
51
+ opts.banner = <<BANNER
52
+ Usage: #{$0} [-v] [-x] <PDF-file>
53
+ Convert a PDF document to an Origami script (experimental).
54
+
55
+ Options:
56
+ BANNER
57
+
58
+ opts.on("-v", "--verbose", "Verbose mode") do
59
+ options[:verbose] = true
60
+ end
61
+
62
+ opts.on("-x", "--extract-streams", "Extract PDF streams to separate files") do
63
+ options[:xstreams] = true
64
+ end
65
+
66
+ opts.on_tail("-h", "--help", "Show this message") do
67
+ puts opts
68
+ exit
69
+ end
70
+ end
71
+
72
+ parser.parse!(args)
73
+
74
+ options
75
+ end
76
+ end
77
+
78
+ @options = OptParser.parse(ARGV)
79
+
80
+ if ARGV.empty?
81
+ abort "Error: No filename was specified. #{$0} --help for details."
82
+ else
83
+ TARGET = ARGV.shift
84
+ end
85
+
86
+ Origami::OPTIONS[:enable_type_guessing] = Origami::OPTIONS[:enable_type_propagation] = true
87
+
88
+ TARGET_DIR = File.basename(TARGET, '.pdf')
89
+ TARGET_FILE = File.join(TARGET_DIR, "#{TARGET_DIR}.rb")
90
+ STREAM_DIR = "streams"
91
+
92
+ def objectToRuby(obj, inclevel = 0, internalname = nil, do_convert = false)
93
+ code = ""
94
+
95
+ code <<
96
+ case obj
97
+ when Origami::Null
98
+ "Null.new"
99
+ when Origami::Boolean, Origami::Number, Origami::Name, Origami::String
100
+ literalToRuby(obj)
101
+ when Origami::Dictionary
102
+ customclass = nil
103
+ if obj.class != Origami::Dictionary
104
+ p = (obj.class == Origami::Encoding) ? 0 : 1
105
+ customclass = obj.class.to_s.split('::')[p..-1].join('::') # strip Origami prefix if there is no collision
106
+ end
107
+ dictionaryToRuby(obj, inclevel, internalname, customclass)
108
+ when Origami::Array
109
+ arrayToRuby(obj, inclevel, internalname)
110
+ when Origami::Stream
111
+ streamToRuby(obj, internalname) unless obj.is_a?(ObjectStream) or obj.is_a?(XRefStream)
112
+ when Origami::Reference
113
+ referenceToRuby(obj, internalname)
114
+ else
115
+ raise RuntimeError, "Unknown object type: #{obj.class}"
116
+ end
117
+
118
+ case obj
119
+ when Origami::String, Origami::Dictionary, Origami::Array, Origami::Name
120
+ code << ".to_o" if do_convert
121
+ end
122
+
123
+ code
124
+ end
125
+
126
+ def referenceToRuby(ref, internalname)
127
+ varname = @var_hash[ref]
128
+
129
+ if varname.nil?
130
+ "nil"
131
+ elsif @obj_route[0..@current_idx].include?(varname)
132
+ @code_hash[varname] ||= {}
133
+ @code_hash[varname][:afterDecl] ||= []
134
+ @code_hash[varname][:afterDecl] << "#{internalname} = #{varname}"#.to_o.set_indirect(true)"
135
+
136
+ "nil"
137
+ else
138
+ @obj_route.push(varname) unless @obj_route.include?(varname)
139
+ varname
140
+ end
141
+ end
142
+
143
+ def literalToRuby(obj)
144
+ obj.value.inspect
145
+ end
146
+
147
+ def arrayToRuby(arr, inclevel, internalname)
148
+ i = 0
149
+ code = "\n" + " " * inclevel + "["
150
+ arr.each do |obj|
151
+ subintname = "#{internalname}[#{i}]"
152
+
153
+ code << "#{objectToRuby(obj, inclevel + 1, subintname)}"
154
+ code << ", " unless i == arr.length - 1
155
+ i = i + 1
156
+ end
157
+ code << "]"
158
+
159
+ code
160
+ end
161
+
162
+ def dictionaryToRuby(dict, inclevel, internalname, customtype = nil)
163
+ i = 0
164
+ code = "\n" + " " * inclevel
165
+
166
+ if customtype
167
+ code << "#{customtype}.new(#{dictionaryToHashMap(dict, inclevel, internalname)}"
168
+ code << " " * inclevel + ")"
169
+ else
170
+ code << "{\n"
171
+ dict.each_pair do |key, val|
172
+ rubyname = literalToRuby(key)
173
+ subintname = "#{internalname}[#{rubyname}]"
174
+
175
+ if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
176
+ oldname = @var_hash[val]
177
+ newname = (key.value.to_s.downcase.gsub(/[^[[:alnum:]]]/,'_') + "_" + @var_hash[val][4..-1]).tr('.', '_')
178
+
179
+ if not @obj_route.include?(oldname)
180
+ @var_hash[val] = newname
181
+ @code_hash[newname] = @code_hash[oldname]
182
+ @code_hash.delete(oldname)
183
+ end
184
+ end
185
+
186
+ code << " " * (inclevel + 1) +
187
+ "#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}"
188
+ code << ", " unless i == dict.length - 1
189
+
190
+ i = i + 1
191
+ code << "\n"
192
+ end
193
+
194
+ code << " " * inclevel + "}"
195
+ end
196
+
197
+ code
198
+ end
199
+
200
+ def dictionaryToHashMap(dict, inclevel, internalname)
201
+ i = 0
202
+ code = "\n"
203
+ dict.each_pair do |key, val|
204
+ rubyname = literalToRuby(key)
205
+ subintname = "#{internalname}[#{rubyname}]"
206
+
207
+ if val.is_a?(Origami::Reference) and @var_hash[val] and @var_hash[val][0,3] == "obj"
208
+ oldname = @var_hash[val]
209
+ newname = (key.value.to_s.downcase + "_" + @var_hash[val][4..-1]).tr('.', '_')
210
+
211
+ if not @obj_route.include?(oldname)
212
+ @var_hash[val] = newname
213
+ @code_hash[newname] = @code_hash[oldname]
214
+ @code_hash.delete(oldname)
215
+ end
216
+ end
217
+
218
+ code << " " * (inclevel + 1) +
219
+ "#{rubyname} => #{objectToRuby(val, inclevel + 2, subintname)}"
220
+ code << ", " unless i == dict.length - 1
221
+ i = i + 1
222
+ code << "\n"
223
+ end
224
+
225
+ code
226
+ end
227
+
228
+ def streamToRuby(stm, internalname)
229
+ dict = stm.dictionary.dup.delete_if {|k, _| k == :Length}
230
+
231
+ code = "Stream.new("
232
+
233
+ if @options[:xstreams]
234
+ stmdir = File.join(TARGET_DIR, STREAM_DIR)
235
+ Dir::mkdir(stmdir) unless File.directory? stmdir
236
+ stmfile = File.join(stmdir, "stm_#{stm.reference.refno}.data")
237
+ File.binwrite(stmfile, stm.data)
238
+
239
+ code << "File.binread('#{stmfile}')"
240
+ else
241
+ code << stm.data.inspect << ".b"
242
+ end
243
+
244
+ code << ", #{dictionaryToHashMap(dict, 1, internalname)}" unless dict.empty?
245
+ code << ")"
246
+
247
+ code
248
+ end
249
+
250
+ puts "[*] ".red + "Loading document '#{TARGET}'"
251
+
252
+ verbosity = @options[:verbose] ? Parser::VERBOSE_TRACE : Parser::VERBOSE_QUIET
253
+ target = PDF.read(TARGET, verbosity: verbosity)
254
+ puts "[*] ".red + "Document successfully loaded into Origami"
255
+
256
+ Dir::mkdir(TARGET_DIR) unless File.directory? TARGET_DIR
257
+ fd = File.open(TARGET_FILE, 'w', 0700)
258
+
259
+ DOCREF = "pdf"
260
+
261
+ fd.puts <<RUBY
262
+ #!/usr/bin/env ruby
263
+
264
+ begin
265
+ require 'origami'
266
+ rescue LoadError
267
+ $: << "#{File.join(__dir__, '../lib')}"
268
+ require 'origami'
269
+ end
270
+ include Origami
271
+ using Origami::TypeConversion
272
+
273
+ # Disable automatic type casting.
274
+ Origami::OPTIONS[:enable_type_guessing] = false
275
+
276
+ OUTPUT = "\#{File.basename(__FILE__, '.rb')}.pdf"
277
+
278
+ #
279
+ # Creates the PDF object.
280
+ #
281
+ #{DOCREF} = PDF.new
282
+
283
+ RUBY
284
+
285
+ puts "[*] ".red + "Retrieving all indirect objects..."
286
+ target.each_object(compressed: true) do |obj|
287
+ var_name = "obj_" + obj.no.to_s
288
+ @var_hash[obj.reference] = var_name
289
+ end
290
+
291
+ puts "[*] ".red + "Retrieving the document Catalog..."
292
+ catalog = target.Catalog
293
+
294
+ @var_hash[catalog.reference] = "#{DOCREF}.Catalog"
295
+ @obj_route.push "#{DOCREF}.Catalog"
296
+
297
+ puts "[*] ".red + "Processing the object hierarchy..."
298
+ @current_idx = 0
299
+ while @current_idx != @obj_route.size
300
+ var_name = @obj_route[@current_idx]
301
+ obj = target[@var_hash.key(var_name)]
302
+
303
+ @code_hash[var_name] ||= {}
304
+ @code_hash[var_name][:body] = objectToRuby(obj, 0, var_name, true)
305
+
306
+ @current_idx = @current_idx + 1
307
+ end
308
+
309
+ @obj_route.reverse_each do |varname|
310
+ fd.puts "#{varname} = #{@code_hash[varname][:body]}"
311
+ if @code_hash[varname][:afterDecl]
312
+ @code_hash[varname][:afterDecl].each do |decl|
313
+ fd.puts decl
314
+ end
315
+ end
316
+ fd.puts
317
+ end
318
+
319
+ @obj_route.each do |varname|
320
+ fd.puts "#{DOCREF}.insert(#{varname})" unless varname == "#{DOCREF}.Catalog"
321
+ end
322
+ fd.puts
323
+
324
+ fd.puts <<RUBY
325
+ #
326
+ # Saves the document.
327
+ #
328
+ #{DOCREF}.save(OUTPUT)
329
+ RUBY
330
+
331
+ puts "[*] ".red + "Successfully generated script '#{TARGET_FILE}'"
332
+
333
+ fd.close