origamindee 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +89 -0
  3. data/COPYING.LESSER +165 -0
  4. data/README.md +131 -0
  5. data/bin/config/pdfcop.conf.yml +236 -0
  6. data/bin/pdf2pdfa +87 -0
  7. data/bin/pdf2ruby +333 -0
  8. data/bin/pdfcop +476 -0
  9. data/bin/pdfdecompress +97 -0
  10. data/bin/pdfdecrypt +91 -0
  11. data/bin/pdfencrypt +113 -0
  12. data/bin/pdfexplode +223 -0
  13. data/bin/pdfextract +277 -0
  14. data/bin/pdfmetadata +143 -0
  15. data/bin/pdfsh +12 -0
  16. data/bin/shell/console.rb +128 -0
  17. data/bin/shell/hexdump.rb +59 -0
  18. data/bin/shell/irbrc +69 -0
  19. data/examples/README.md +34 -0
  20. data/examples/attachments/attachment.rb +38 -0
  21. data/examples/attachments/nested_document.rb +51 -0
  22. data/examples/encryption/encryption.rb +28 -0
  23. data/examples/events/events.rb +72 -0
  24. data/examples/flash/flash.rb +37 -0
  25. data/examples/flash/helloworld.swf +0 -0
  26. data/examples/forms/javascript.rb +54 -0
  27. data/examples/forms/xfa.rb +115 -0
  28. data/examples/javascript/hello_world.rb +22 -0
  29. data/examples/javascript/js_emulation.rb +54 -0
  30. data/examples/loop/goto.rb +32 -0
  31. data/examples/loop/named.rb +33 -0
  32. data/examples/signature/signature.rb +65 -0
  33. data/examples/uri/javascript.rb +56 -0
  34. data/examples/uri/open-uri.rb +21 -0
  35. data/examples/uri/submitform.rb +47 -0
  36. data/lib/origami/3d.rb +364 -0
  37. data/lib/origami/acroform.rb +321 -0
  38. data/lib/origami/actions.rb +318 -0
  39. data/lib/origami/annotations.rb +711 -0
  40. data/lib/origami/array.rb +242 -0
  41. data/lib/origami/boolean.rb +90 -0
  42. data/lib/origami/catalog.rb +418 -0
  43. data/lib/origami/collections.rb +144 -0
  44. data/lib/origami/compound.rb +161 -0
  45. data/lib/origami/destinations.rb +252 -0
  46. data/lib/origami/dictionary.rb +192 -0
  47. data/lib/origami/encryption.rb +1084 -0
  48. data/lib/origami/extensions/fdf.rb +347 -0
  49. data/lib/origami/extensions/ppklite.rb +422 -0
  50. data/lib/origami/filespec.rb +197 -0
  51. data/lib/origami/filters/ascii.rb +211 -0
  52. data/lib/origami/filters/ccitt/tables.rb +267 -0
  53. data/lib/origami/filters/ccitt.rb +357 -0
  54. data/lib/origami/filters/crypt.rb +38 -0
  55. data/lib/origami/filters/dct.rb +54 -0
  56. data/lib/origami/filters/flate.rb +69 -0
  57. data/lib/origami/filters/jbig2.rb +57 -0
  58. data/lib/origami/filters/jpx.rb +47 -0
  59. data/lib/origami/filters/lzw.rb +170 -0
  60. data/lib/origami/filters/predictors.rb +292 -0
  61. data/lib/origami/filters/runlength.rb +129 -0
  62. data/lib/origami/filters.rb +364 -0
  63. data/lib/origami/font.rb +196 -0
  64. data/lib/origami/functions.rb +79 -0
  65. data/lib/origami/graphics/colors.rb +230 -0
  66. data/lib/origami/graphics/instruction.rb +98 -0
  67. data/lib/origami/graphics/path.rb +182 -0
  68. data/lib/origami/graphics/patterns.rb +174 -0
  69. data/lib/origami/graphics/render.rb +62 -0
  70. data/lib/origami/graphics/state.rb +149 -0
  71. data/lib/origami/graphics/text.rb +225 -0
  72. data/lib/origami/graphics/xobject.rb +918 -0
  73. data/lib/origami/graphics.rb +38 -0
  74. data/lib/origami/header.rb +75 -0
  75. data/lib/origami/javascript.rb +713 -0
  76. data/lib/origami/linearization.rb +330 -0
  77. data/lib/origami/metadata.rb +172 -0
  78. data/lib/origami/name.rb +135 -0
  79. data/lib/origami/null.rb +65 -0
  80. data/lib/origami/numeric.rb +181 -0
  81. data/lib/origami/obfuscation.rb +245 -0
  82. data/lib/origami/object.rb +760 -0
  83. data/lib/origami/optionalcontent.rb +183 -0
  84. data/lib/origami/outline.rb +54 -0
  85. data/lib/origami/outputintents.rb +85 -0
  86. data/lib/origami/page.rb +722 -0
  87. data/lib/origami/parser.rb +269 -0
  88. data/lib/origami/parsers/fdf.rb +56 -0
  89. data/lib/origami/parsers/pdf/lazy.rb +176 -0
  90. data/lib/origami/parsers/pdf/linear.rb +122 -0
  91. data/lib/origami/parsers/pdf.rb +118 -0
  92. data/lib/origami/parsers/ppklite.rb +57 -0
  93. data/lib/origami/pdf.rb +1108 -0
  94. data/lib/origami/reference.rb +134 -0
  95. data/lib/origami/signature.rb +702 -0
  96. data/lib/origami/stream.rb +705 -0
  97. data/lib/origami/string.rb +444 -0
  98. data/lib/origami/template/patterns.rb +56 -0
  99. data/lib/origami/template/widgets.rb +151 -0
  100. data/lib/origami/trailer.rb +190 -0
  101. data/lib/origami/tree.rb +62 -0
  102. data/lib/origami/version.rb +23 -0
  103. data/lib/origami/webcapture.rb +100 -0
  104. data/lib/origami/xfa/config.rb +453 -0
  105. data/lib/origami/xfa/connectionset.rb +146 -0
  106. data/lib/origami/xfa/datasets.rb +49 -0
  107. data/lib/origami/xfa/localeset.rb +42 -0
  108. data/lib/origami/xfa/package.rb +59 -0
  109. data/lib/origami/xfa/pdf.rb +73 -0
  110. data/lib/origami/xfa/signature.rb +42 -0
  111. data/lib/origami/xfa/sourceset.rb +43 -0
  112. data/lib/origami/xfa/stylesheet.rb +44 -0
  113. data/lib/origami/xfa/template.rb +1691 -0
  114. data/lib/origami/xfa/xdc.rb +42 -0
  115. data/lib/origami/xfa/xfa.rb +146 -0
  116. data/lib/origami/xfa/xfdf.rb +43 -0
  117. data/lib/origami/xfa/xmpmeta.rb +43 -0
  118. data/lib/origami/xfa.rb +62 -0
  119. data/lib/origami/xreftable.rb +557 -0
  120. data/lib/origami.rb +47 -0
  121. data/test/dataset/calc.pdf +85 -0
  122. data/test/dataset/crypto.pdf +36 -0
  123. data/test/dataset/empty.pdf +49 -0
  124. data/test/test_actions.rb +27 -0
  125. data/test/test_annotations.rb +68 -0
  126. data/test/test_forms.rb +30 -0
  127. data/test/test_native_types.rb +83 -0
  128. data/test/test_object_tree.rb +33 -0
  129. data/test/test_pages.rb +60 -0
  130. data/test/test_pdf.rb +20 -0
  131. data/test/test_pdf_attachment.rb +34 -0
  132. data/test/test_pdf_create.rb +24 -0
  133. data/test/test_pdf_encrypt.rb +102 -0
  134. data/test/test_pdf_parse.rb +134 -0
  135. data/test/test_pdf_parse_lazy.rb +69 -0
  136. data/test/test_pdf_sign.rb +97 -0
  137. data/test/test_streams.rb +184 -0
  138. data/test/test_xrefs.rb +67 -0
  139. metadata +280 -0
data/bin/pdfencrypt ADDED
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ =begin
4
+
5
+ = Info
6
+ Encrypts a PDF document.
7
+
8
+ = License
9
+ Copyright (C) 2016 Guillaume Delugré.
10
+
11
+ Origami is free software: you can redistribute it and/or modify
12
+ it under the terms of the GNU Lesser General Public License as published by
13
+ the Free Software Foundation, either version 3 of the License, or
14
+ (at your option) any later version.
15
+
16
+ Origami is distributed in the hope that it will be useful,
17
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ GNU Lesser General Public License for more details.
20
+
21
+ You should have received a copy of the GNU Lesser General Public License
22
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
23
+
24
+ =end
25
+
26
+ begin
27
+ require 'origami'
28
+ rescue LoadError
29
+ $: << File.join(__dir__, '../lib')
30
+ require 'origami'
31
+ end
32
+ include Origami
33
+
34
+ require 'optparse'
35
+
36
+ class OptParser
37
+ BANNER = <<USAGE
38
+ Usage: #{$0} [<PDF-file>] [-p <password>] [-c <cipher>] [-s <key-size>] [--hardened] [-o <output-file>]
39
+ Encrypts a PDF document. Supports RC4 40 to 128 bits, AES128, AES256.
40
+ Bug reports or feature requests at: http://github.com/gdelugre/origami
41
+
42
+ Options:
43
+ USAGE
44
+
45
+ def self.parser(options)
46
+ OptionParser.new do |opts|
47
+ opts.banner = BANNER
48
+
49
+ opts.on("-o", "--output FILE", "Output PDF file (stdout by default)") do |o|
50
+ options[:output] = o
51
+ end
52
+
53
+ opts.on("-p", "--password PASSWORD", "Password of the document") do |p|
54
+ options[:password] = p
55
+ end
56
+
57
+ opts.on("-c", "--cipher CIPHER", "Cipher used to encrypt the document (Default: AES)") do |c|
58
+ options[:cipher] = c
59
+ end
60
+
61
+ opts.on("-s", "--key-size KEYSIZE", "Key size in bits (Default: 256)") do |s|
62
+ options[:key_size] = s.to_i
63
+ end
64
+
65
+ opts.on("--hardened", "Use stronger key validation scheme (only AES-256)") do
66
+ options[:hardened] = true
67
+ end
68
+
69
+ opts.on_tail("-h", "--help", "Show this message") do
70
+ puts opts
71
+ exit
72
+ end
73
+ end
74
+ end
75
+
76
+ def self.parse(args)
77
+ options =
78
+ {
79
+ output: STDOUT,
80
+ password: '',
81
+ cipher: 'aes',
82
+ key_size: 256,
83
+ hardened: false
84
+ }
85
+
86
+ self.parser(options).parse!(args)
87
+
88
+ options
89
+ end
90
+ end
91
+
92
+ begin
93
+ @options = OptParser.parse(ARGV)
94
+
95
+ target = (ARGV.empty?) ? STDIN : ARGV.shift
96
+ params =
97
+ {
98
+ verbosity: Parser::VERBOSE_QUIET,
99
+ }
100
+
101
+ pdf = PDF.read(target, params)
102
+ pdf.encrypt(
103
+ user_passwd: @options[:password],
104
+ owner_passwd: @options[:password],
105
+ cipher: @options[:cipher],
106
+ key_size: @options[:key_size],
107
+ hardened: @options[:hardened]
108
+ )
109
+ pdf.save(@options[:output], noindent: true)
110
+
111
+ rescue
112
+ abort "#{$!.class}: #{$!.message}"
113
+ end
data/bin/pdfexplode ADDED
@@ -0,0 +1,223 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ =begin
4
+
5
+ = Info
6
+ Explodes a PDF into separate documents.
7
+
8
+ = License
9
+ Copyright (C) 2016 Guillaume Delugré.
10
+
11
+ Origami is free software: you can redistribute it and/or modify
12
+ it under the terms of the GNU Lesser General Public License as published by
13
+ the Free Software Foundation, either version 3 of the License, or
14
+ (at your option) any later version.
15
+
16
+ Origami is distributed in the hope that it will be useful,
17
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ GNU Lesser General Public License for more details.
20
+
21
+ You should have received a copy of the GNU Lesser General Public License
22
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
23
+
24
+ =end
25
+
26
+ begin
27
+ require 'origami'
28
+ rescue LoadError
29
+ $: << File.join(__dir__, '../lib')
30
+ require 'origami'
31
+ end
32
+ include Origami
33
+
34
+ require 'optparse'
35
+ require 'rexml/document'
36
+
37
+ class OptParser
38
+ BANNER = <<USAGE
39
+ Usage: #{$0} <PDF-file> [-r <range>] [-t pages|rsrc] [-d <output-directory>]
40
+ Explodes a document into separate documents.
41
+ Bug reports or feature requests at: http://github.com/gdelugre/origami
42
+
43
+ Options:
44
+ USAGE
45
+
46
+ def self.parser(options)
47
+ OptionParser.new do |opts|
48
+ opts.banner = BANNER
49
+
50
+ opts.on("-d", "--output-dir DIR", "Output directory.") do |d|
51
+ options[:output_dir] = d
52
+ end
53
+
54
+ opts.on("-r", "--range PAGES", "Page range (e.g: 2-, 1-3, 5). Default to '-'.") do |r|
55
+ range =
56
+ if r.index('-').nil?
57
+ page = r.to_i
58
+ Range.new(page-1, page-1)
59
+ else
60
+ from, to = r.split('-').map{|bound| bound.to_i}
61
+ from ||= 1
62
+ to ||= 0
63
+ Range.new(from-1, to-1)
64
+ end
65
+ options[:page_range] = range
66
+ end
67
+
68
+ opts.on("-t", "--type TYPE", "Split by type. Can be 'pages' or 'rsrc'. Default to 'pages'.") do |t|
69
+ options[:split_by] = t
70
+ end
71
+
72
+ opts.on_tail("-h", "--help", "Show this message.") do
73
+ puts opts
74
+ exit
75
+ end
76
+ end
77
+ end
78
+
79
+ def self.parse(args)
80
+ options =
81
+ {
82
+ page_range: (0..-1),
83
+ split_by: 'pages'
84
+ }
85
+
86
+ self.parser(options).parse!(args)
87
+
88
+ options
89
+ end
90
+ end
91
+
92
+ begin
93
+ @options = OptParser.parse(ARGV)
94
+
95
+ if ARGV.empty?
96
+ abort "Error: No filename was specified. #{$0} --help for details."
97
+ else
98
+ target = ARGV.shift
99
+ end
100
+
101
+ if @options[:output_dir].nil?
102
+ @options[:output_dir] = "#{File.join(File.dirname(target), File.basename(target,'.pdf'))}.explode"
103
+ end
104
+
105
+ Origami::OPTIONS[:ignore_bad_references] = true
106
+ OUTPUT_DIR = @options[:output_dir]
107
+ Dir::mkdir(OUTPUT_DIR) unless File.directory?(OUTPUT_DIR)
108
+
109
+ def split_by_rsrc(n, page, type)
110
+ all_rsrc = page.resources
111
+ type_rsrc = page.resources(type)
112
+ other_rsrc = all_rsrc.keys - type_rsrc.keys
113
+
114
+ return unless type_rsrc.empty?
115
+
116
+ # Keep only specified resource type.
117
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_keeponly_#{type}.pdf")
118
+ PDF.write(output_file) do |pdf|
119
+ reduced = page.copy
120
+ # New resource dictionary with only matching resources.
121
+ reduced.Resources = Resources.new(type => type_rsrc)
122
+ # Remove mention of other resources.
123
+ reduced.each_content_stream do |stream|
124
+ stream.data = stream.data.lines.
125
+ delete_if {|line| other_rsrc.any?{|res| line =~ /#{res}/}}.join
126
+ end
127
+
128
+ STDERR.puts "Creating #{output_file}..."
129
+ pdf.append_page(reduced)
130
+ end
131
+
132
+ # Remove all specified resource type.
133
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_excluded_#{type}.pdf")
134
+ PDF.write(output_file) do |pdf|
135
+ reduced = page.copy
136
+ # New resource dictionary with no resource of specified type.
137
+ reduced.Resources = reduced.Resources.copy
138
+ reduced.Resources.delete(type)
139
+ # Remove mention this resource type.
140
+ reduced.each_content_stream do |stream|
141
+ stream.data = stream.data.lines.
142
+ delete_if {|line| type_rsrc.keys.any?{|res| line =~ /#{res}/}}.join
143
+ end
144
+
145
+ STDERR.puts "Creating #{output_file}..."
146
+ pdf.append_page(reduced)
147
+ end
148
+
149
+ # Now treating each resource object separately.
150
+ type_rsrc.each_pair do |name, rsrc|
151
+ anyother_rsrc = all_rsrc.keys - [ name ]
152
+ # Keey only specified resource object.
153
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_keeponly_#{type}_#{name}.pdf")
154
+ PDF.write(output_file) do |pdf|
155
+ reduced = page.copy
156
+ # New resource dictionary with only specified resource object.
157
+ reduced.Resources = Resources.new(type => {name => rsrc})
158
+ # Remove mention of all other resources.
159
+ reduced.each_content_stream do |stream|
160
+ stream.data = stream.data.lines.
161
+ delete_if {|line| anyother_rsrc.any?{|res| line =~ /#{res}/}}.join
162
+ end
163
+
164
+ STDERR.puts "Creating #{output_file}..."
165
+ pdf.append_page(reduced)
166
+ end
167
+
168
+ # Remove only specified resource object.
169
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_excluded_#{type}_#{name}.pdf")
170
+ PDF.write(output_file) do |pdf|
171
+ reduced = page.copy
172
+ # New resource dictionary with only specified resource object.
173
+ reduced.Resources = reduced.Resources.copy
174
+ reduced.Resources[type] = reduced.Resources.send(type).copy
175
+ reduced.Resources[type].delete(name)
176
+ # Remove mention of this resource only.
177
+ reduced.each_content_stream do |stream|
178
+ stream.data = stream.data.lines.
179
+ delete_if {|line| line =~ /#{name}/}.join
180
+ end
181
+
182
+ STDERR.puts "Creating #{output_file}..."
183
+ pdf.append_page(reduced)
184
+ end
185
+ end
186
+ end
187
+
188
+ params =
189
+ {
190
+ verbosity: Parser::VERBOSE_QUIET,
191
+ }
192
+ pdf = PDF.read(target, params)
193
+
194
+ i = @options[:page_range].first + 1
195
+ pdf.pages[@options[:page_range]].each do |page|
196
+ case @options[:split_by]
197
+ when 'pages'
198
+ output_file = File.join(OUTPUT_DIR, "page_#{i}.pdf")
199
+ PDF.write(output_file) do |doc|
200
+ STDERR.puts "Creating #{output_file}..."
201
+ doc.append_page(page)
202
+ end
203
+
204
+ when 'rsrc'
205
+ [ Resources::EXTGSTATE,
206
+ Resources::COLORSPACE,
207
+ Resources::PATTERN,
208
+ Resources::SHADING,
209
+ Resources::XOBJECT,
210
+ Resources::FONT,
211
+ Resources::PROPERTIES
212
+ ].each { |type| split_by_rsrc(i, page, type) }
213
+
214
+ else
215
+ raise ArgumentError, "Unknown split option: #{@options[:split_by]}"
216
+ end
217
+
218
+ i += 1
219
+ end
220
+
221
+ rescue
222
+ abort "#{$!.class}: #{$!.message} #{$!.backtrace.join($/)}"
223
+ end
data/bin/pdfextract ADDED
@@ -0,0 +1,277 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ =begin
4
+
5
+ = Info
6
+ Extracts valuable data from a PDF document. Can extract:
7
+ - decoded streams
8
+ - JavaScript
9
+ - file attachments
10
+
11
+ = License
12
+ Copyright (C) 2016 Guillaume Delugré.
13
+
14
+ Origami is free software: you can redistribute it and/or modify
15
+ it under the terms of the GNU Lesser General Public License as published by
16
+ the Free Software Foundation, either version 3 of the License, or
17
+ (at your option) any later version.
18
+
19
+ Origami is distributed in the hope that it will be useful,
20
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22
+ GNU Lesser General Public License for more details.
23
+
24
+ You should have received a copy of the GNU Lesser General Public License
25
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
26
+
27
+ =end
28
+
29
+ begin
30
+ require 'origami'
31
+ rescue LoadError
32
+ $: << File.join(__dir__, '../lib')
33
+ require 'origami'
34
+ end
35
+ include Origami
36
+
37
+ require 'optparse'
38
+ require 'rexml/document'
39
+
40
+ class OptParser
41
+ BANNER = <<USAGE
42
+ Usage: #{$0} <PDF-file> [-afjms] [-d <output-directory>]
43
+ Extracts various data out of a document (streams, scripts, images, fonts, metadata, attachments).
44
+ Bug reports or feature requests at: http://github.com/gdelugre/origami
45
+
46
+ Options:
47
+ USAGE
48
+
49
+ def self.parser(options)
50
+ OptionParser.new do |opts|
51
+ opts.banner = BANNER
52
+
53
+ opts.on("-d", "--output-dir DIR", "Output directory") do |d|
54
+ options[:output_dir] = d
55
+ end
56
+
57
+ opts.on("-s", "--streams", "Extracts all decoded streams") do
58
+ options[:streams] = true
59
+ end
60
+
61
+ opts.on("-a", "--attachments", "Extracts file attachments") do
62
+ options[:attachments] = true
63
+ end
64
+
65
+ opts.on("-f", "--fonts", "Extracts embedded font files") do
66
+ options[:fonts] = true
67
+ end
68
+
69
+ opts.on("-j", "--js", "Extracts JavaScript scripts") do
70
+ options[:javascript] = true
71
+ end
72
+
73
+ opts.on("-m", "--metadata", "Extracts metadata streams") do
74
+ options[:metadata] = true
75
+ end
76
+
77
+ opts.on("-i", "--images", "Extracts embedded images") do
78
+ options[:images] = true
79
+ end
80
+
81
+ opts.on_tail("-h", "--help", "Show this message") do
82
+ puts opts
83
+ exit
84
+ end
85
+ end
86
+ end
87
+
88
+ def self.parse(args)
89
+ options = {}
90
+
91
+ self.parser(options).parse!(args)
92
+
93
+ options
94
+ end
95
+ end
96
+
97
+ begin
98
+ @options = OptParser.parse(ARGV)
99
+
100
+ if ARGV.empty?
101
+ abort "Error: No filename was specified. #{$0} --help for details."
102
+ else
103
+ target = ARGV.shift
104
+ end
105
+
106
+ unless %i[streams javascript attachments fonts metadata images].any? {|opt| @options[opt]}
107
+ @options[:streams] =
108
+ @options[:javascript] =
109
+ @options[:fonts] =
110
+ @options[:attachments] =
111
+ @options[:images] = true
112
+ end
113
+
114
+ if @options[:output_dir].nil?
115
+ @options[:output_dir] = "#{File.basename(target, '.pdf')}.dump"
116
+ end
117
+
118
+ # Force data extraction, even for invalid FlateDecode streams.
119
+ Origami::OPTIONS[:ignore_zlib_errors] = true
120
+ Origami::OPTIONS[:ignore_png_errors] = true
121
+
122
+ OUTPUT_DIR = @options[:output_dir]
123
+ Dir::mkdir(OUTPUT_DIR) unless File.directory?(OUTPUT_DIR)
124
+
125
+ params =
126
+ {
127
+ verbosity: Parser::VERBOSE_QUIET,
128
+ }
129
+ pdf = PDF.read(target, params)
130
+
131
+ if @options[:streams]
132
+ nstreams = 0
133
+ stream_dir = File.join(OUTPUT_DIR, "streams")
134
+ Dir::mkdir(stream_dir) unless File.directory?(stream_dir)
135
+
136
+ pdf.each_object.select {|obj| obj.is_a?(Stream)}.each do |stream|
137
+ stream_file = File.join(stream_dir, "stream_#{stream.reference.refno}.dmp")
138
+ begin
139
+ File.binwrite(stream_file, stream.data)
140
+ rescue
141
+ STDERR.puts "Cannot decode stream #{stream.reference}: #{$!.message}"
142
+ next
143
+ end
144
+
145
+ nstreams += 1
146
+ end
147
+
148
+ puts "Extracted #{nstreams} PDF streams to '#{stream_dir}'."
149
+ end
150
+
151
+ if @options[:javascript]
152
+ nscripts = 0
153
+ js_dir = File.join(OUTPUT_DIR, "scripts")
154
+ Dir::mkdir(js_dir) unless File.directory?(js_dir)
155
+
156
+ pdf.ls(/^JS$/).each do |script|
157
+ script_file = File.join(js_dir, "script_#{script.hash}.js")
158
+ script_data =
159
+ case script
160
+ when Stream then script.data
161
+ else script.value
162
+ end
163
+
164
+ File.binwrite(script_file, script_data)
165
+ nscripts += 1
166
+ end
167
+
168
+ # Also checking for presence of JavaScript in XML forms.
169
+ if pdf.form? and pdf.Catalog.AcroForm.has_key?(:XFA)
170
+ xfa = pdf.Catalog.AcroForm.XFA
171
+
172
+ case xfa
173
+ when Array then
174
+ xml = ""
175
+ i = 0
176
+ xfa.each do |packet|
177
+ if i % 2 == 1
178
+ xml << packet.solve.data
179
+ end
180
+
181
+ i = i + 1
182
+ end
183
+ when Stream then
184
+ xml = xfa.data
185
+ else
186
+ reject("Malformed XFA dictionary")
187
+ end
188
+
189
+ xfadoc = REXML::Document.new(xml)
190
+ REXML::XPath.match(xfadoc, "//script").each do |script|
191
+ script_file = File.join(js_dir, "script_#{script.hash}.js")
192
+ File.binwrite(script_file, script.text)
193
+ nscripts += 1
194
+ end
195
+ end
196
+
197
+ puts "Extracted #{nscripts} scripts to '#{js_dir}'."
198
+ end
199
+
200
+ if @options[:attachments]
201
+ nattach = 0
202
+ attachments_dir = File.join(OUTPUT_DIR, "attachments")
203
+ Dir::mkdir(attachments_dir) unless File.directory?(attachments_dir)
204
+
205
+ pdf.each_attachment do |name, attachment|
206
+ name = name.to_utf8.tr("\/\x00", "_")
207
+ attached_file = File.join(attachments_dir, "attached_#{File.basename(name)}")
208
+
209
+ if attachment and attachment.EF and attachment.EF.F.is_a?(Stream)
210
+ File.binwrite(attached_file, attachment.EF.F.data)
211
+ nattach += 1
212
+ end
213
+ end
214
+
215
+ puts "Extracted #{nattach} attachments to '#{attachments_dir}'."
216
+ end
217
+
218
+ if @options[:fonts]
219
+ nfonts = 0
220
+ fonts_dir = File.join(OUTPUT_DIR, "fonts")
221
+ Dir::mkdir(fonts_dir) unless File.directory?(fonts_dir)
222
+
223
+ pdf.each_object.select {|obj| obj.is_a?(Stream)}.each do |stream|
224
+ font = stream.xrefs.find{|obj| obj.is_a?(FontDescriptor)}
225
+ if font
226
+ font_file = File.join(fonts_dir, File.basename(font.FontName.value.to_s))
227
+ File.binwrite(font_file, stream.data)
228
+ nfonts += 1
229
+ end
230
+ end
231
+
232
+ puts "Extracted #{nfonts} fonts to '#{fonts_dir}'."
233
+ end
234
+
235
+ if @options[:metadata]
236
+ nmeta = 0
237
+ metadata_dir = File.join(OUTPUT_DIR, "metadata")
238
+ Dir::mkdir(metadata_dir) unless File.directory?(metadata_dir)
239
+
240
+ pdf.each_object.select {|obj| obj.is_a?(MetadataStream)}.each do |stream|
241
+ metadata_file = File.join(metadata_dir, "metadata_#{stream.reference.refno}.xml")
242
+ File.binwrite(metadata_file, stream.data)
243
+ nmeta += 1
244
+ end
245
+
246
+ puts "Extracted #{nmeta} metadata streams to '#{metadata_dir}'."
247
+ end
248
+
249
+ if @options[:images]
250
+ nimages = 0
251
+ image_dir = File.join(OUTPUT_DIR, "images")
252
+ Dir::mkdir(image_dir) unless File.directory?(image_dir)
253
+
254
+ pdf.each_object.select {|obj| obj.is_a?(Graphics::ImageXObject)}.each do |stream|
255
+ begin
256
+ ext, image_data = stream.to_image_file
257
+ image_file = File.join(image_dir, "image_#{stream.reference.refno}.#{ext}")
258
+
259
+ if ext != 'png' and stream.ColorSpace == Graphics::Color::Space::DEVICE_CMYK
260
+ STDERR.puts "Warning: file '#{image_file}' is intended to be viewed in CMYK color space."
261
+ end
262
+
263
+ File.binwrite(image_file, image_data)
264
+ nimages += 1
265
+ rescue
266
+ STDERR.puts "Unable to decode image (stream #{stream.reference.refno}). #{$!.message}"
267
+ STDERR.puts $!.backtrace.join($/)
268
+ end
269
+ end
270
+
271
+ puts "Extracted #{nimages} images to '#{image_dir}'."
272
+ end
273
+
274
+ rescue
275
+ STDERR.puts $!.backtrace.join($/)
276
+ abort "#{$!.class}: #{$!.message}"
277
+ end