origami 1.2.7 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (162) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -0
  3. data/README.md +112 -0
  4. data/bin/config/pdfcop.conf.yml +232 -233
  5. data/bin/gui/about.rb +27 -37
  6. data/bin/gui/config.rb +108 -117
  7. data/bin/gui/file.rb +416 -365
  8. data/bin/gui/gtkhex.rb +1138 -1153
  9. data/bin/gui/hexview.rb +55 -57
  10. data/bin/gui/imgview.rb +48 -51
  11. data/bin/gui/menu.rb +388 -386
  12. data/bin/gui/properties.rb +114 -130
  13. data/bin/gui/signing.rb +571 -617
  14. data/bin/gui/textview.rb +77 -95
  15. data/bin/gui/treeview.rb +382 -387
  16. data/bin/gui/walker.rb +227 -232
  17. data/bin/gui/xrefs.rb +56 -60
  18. data/bin/pdf2pdfa +53 -57
  19. data/bin/pdf2ruby +212 -228
  20. data/bin/pdfcop +338 -348
  21. data/bin/pdfdecompress +58 -65
  22. data/bin/pdfdecrypt +56 -60
  23. data/bin/pdfencrypt +75 -80
  24. data/bin/pdfexplode +185 -182
  25. data/bin/pdfextract +201 -218
  26. data/bin/pdfmetadata +83 -82
  27. data/bin/pdfsh +4 -5
  28. data/bin/pdfwalker +1 -2
  29. data/bin/shell/.irbrc +45 -82
  30. data/bin/shell/console.rb +105 -130
  31. data/bin/shell/hexdump.rb +40 -64
  32. data/examples/README.md +34 -0
  33. data/examples/attachments/attachment.rb +38 -0
  34. data/examples/attachments/nested_document.rb +51 -0
  35. data/examples/encryption/encryption.rb +28 -0
  36. data/{samples/actions/triggerevents/trigger.rb → examples/events/events.rb} +13 -16
  37. data/examples/flash/flash.rb +37 -0
  38. data/{samples → examples}/flash/helloworld.swf +0 -0
  39. data/examples/forms/javascript.rb +54 -0
  40. data/examples/forms/xfa.rb +115 -0
  41. data/examples/javascript/hello_world.rb +22 -0
  42. data/examples/javascript/js_emulation.rb +54 -0
  43. data/examples/loop/goto.rb +32 -0
  44. data/examples/loop/named.rb +33 -0
  45. data/examples/signature/signature.rb +65 -0
  46. data/examples/uri/javascript.rb +56 -0
  47. data/examples/uri/open-uri.rb +21 -0
  48. data/examples/uri/submitform.rb +47 -0
  49. data/lib/origami.rb +29 -42
  50. data/lib/origami/3d.rb +350 -225
  51. data/lib/origami/acroform.rb +262 -288
  52. data/lib/origami/actions.rb +268 -288
  53. data/lib/origami/annotations.rb +697 -722
  54. data/lib/origami/array.rb +258 -184
  55. data/lib/origami/boolean.rb +74 -84
  56. data/lib/origami/catalog.rb +397 -434
  57. data/lib/origami/collections.rb +144 -0
  58. data/lib/origami/destinations.rb +233 -194
  59. data/lib/origami/dictionary.rb +253 -232
  60. data/lib/origami/encryption.rb +1274 -1243
  61. data/lib/origami/export.rb +232 -268
  62. data/lib/origami/extensions/fdf.rb +307 -220
  63. data/lib/origami/extensions/ppklite.rb +368 -435
  64. data/lib/origami/filespec.rb +197 -0
  65. data/lib/origami/filters.rb +301 -295
  66. data/lib/origami/filters/ascii.rb +177 -180
  67. data/lib/origami/filters/ccitt.rb +528 -535
  68. data/lib/origami/filters/crypt.rb +26 -35
  69. data/lib/origami/filters/dct.rb +46 -52
  70. data/lib/origami/filters/flate.rb +95 -94
  71. data/lib/origami/filters/jbig2.rb +49 -55
  72. data/lib/origami/filters/jpx.rb +38 -44
  73. data/lib/origami/filters/lzw.rb +189 -183
  74. data/lib/origami/filters/predictors.rb +221 -235
  75. data/lib/origami/filters/runlength.rb +103 -104
  76. data/lib/origami/font.rb +173 -186
  77. data/lib/origami/functions.rb +67 -81
  78. data/lib/origami/graphics.rb +25 -21
  79. data/lib/origami/graphics/colors.rb +178 -187
  80. data/lib/origami/graphics/instruction.rb +79 -85
  81. data/lib/origami/graphics/path.rb +142 -148
  82. data/lib/origami/graphics/patterns.rb +160 -167
  83. data/lib/origami/graphics/render.rb +43 -50
  84. data/lib/origami/graphics/state.rb +138 -153
  85. data/lib/origami/graphics/text.rb +188 -205
  86. data/lib/origami/graphics/xobject.rb +819 -815
  87. data/lib/origami/header.rb +63 -78
  88. data/lib/origami/javascript.rb +596 -597
  89. data/lib/origami/linearization.rb +285 -290
  90. data/lib/origami/metadata.rb +139 -148
  91. data/lib/origami/name.rb +112 -148
  92. data/lib/origami/null.rb +53 -62
  93. data/lib/origami/numeric.rb +162 -175
  94. data/lib/origami/obfuscation.rb +186 -174
  95. data/lib/origami/object.rb +593 -573
  96. data/lib/origami/outline.rb +42 -47
  97. data/lib/origami/outputintents.rb +73 -82
  98. data/lib/origami/page.rb +703 -592
  99. data/lib/origami/parser.rb +238 -290
  100. data/lib/origami/parsers/fdf.rb +41 -33
  101. data/lib/origami/parsers/pdf.rb +75 -95
  102. data/lib/origami/parsers/pdf/lazy.rb +137 -0
  103. data/lib/origami/parsers/pdf/linear.rb +64 -66
  104. data/lib/origami/parsers/ppklite.rb +34 -70
  105. data/lib/origami/pdf.rb +1030 -1005
  106. data/lib/origami/reference.rb +102 -102
  107. data/lib/origami/signature.rb +591 -609
  108. data/lib/origami/stream.rb +668 -551
  109. data/lib/origami/string.rb +397 -373
  110. data/lib/origami/template/patterns.rb +56 -0
  111. data/lib/origami/template/widgets.rb +151 -0
  112. data/lib/origami/trailer.rb +144 -158
  113. data/lib/origami/tree.rb +62 -0
  114. data/lib/origami/version.rb +23 -0
  115. data/lib/origami/webcapture.rb +88 -79
  116. data/lib/origami/xfa.rb +2863 -2882
  117. data/lib/origami/xreftable.rb +472 -384
  118. data/test/dataset/calc.pdf +85 -0
  119. data/test/dataset/crypto.pdf +82 -0
  120. data/test/dataset/empty.pdf +49 -0
  121. data/test/test_actions.rb +27 -0
  122. data/test/test_annotations.rb +90 -0
  123. data/test/test_pages.rb +31 -0
  124. data/test/test_pdf.rb +16 -0
  125. data/test/test_pdf_attachment.rb +34 -0
  126. data/test/test_pdf_create.rb +24 -0
  127. data/test/test_pdf_encrypt.rb +95 -0
  128. data/test/test_pdf_parse.rb +96 -0
  129. data/test/test_pdf_sign.rb +58 -0
  130. data/test/test_streams.rb +182 -0
  131. data/test/test_xrefs.rb +67 -0
  132. metadata +88 -58
  133. data/README +0 -67
  134. data/bin/pdf2graph +0 -121
  135. data/bin/pdfcocoon +0 -104
  136. data/lib/origami/file.rb +0 -233
  137. data/samples/README.txt +0 -45
  138. data/samples/actions/launch/calc.rb +0 -87
  139. data/samples/actions/launch/winparams.rb +0 -22
  140. data/samples/actions/loop/loopgoto.rb +0 -24
  141. data/samples/actions/loop/loopnamed.rb +0 -21
  142. data/samples/actions/named/named.rb +0 -31
  143. data/samples/actions/samba/smbrelay.rb +0 -26
  144. data/samples/actions/webbug/submitform.js +0 -26
  145. data/samples/actions/webbug/webbug-browser.rb +0 -68
  146. data/samples/actions/webbug/webbug-js.rb +0 -67
  147. data/samples/actions/webbug/webbug-reader.rb +0 -90
  148. data/samples/attachments/attach.rb +0 -40
  149. data/samples/attachments/attached.txt +0 -1
  150. data/samples/crypto/crypto.rb +0 -28
  151. data/samples/digsig/signed.rb +0 -46
  152. data/samples/exploits/cve-2008-2992-utilprintf.rb +0 -87
  153. data/samples/exploits/cve-2009-0927-geticon.rb +0 -65
  154. data/samples/exploits/exploit_customdictopen.rb +0 -55
  155. data/samples/exploits/getannots.rb +0 -69
  156. data/samples/flash/flash.rb +0 -31
  157. data/samples/javascript/attached.txt +0 -1
  158. data/samples/javascript/js.rb +0 -52
  159. data/templates/patterns.rb +0 -66
  160. data/templates/widgets.rb +0 -173
  161. data/templates/xdp.rb +0 -92
  162. data/test/ts_pdf.rb +0 -50
@@ -1,34 +1,33 @@
1
- #!/usr/bin/env ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  =begin
4
4
 
5
- = Author:
6
- Guillaume Delugré <guillaume/at/security-labs.org>
5
+ = Info
6
+ Explodes a PDF into separate documents.
7
7
 
8
- = Info:
9
- Explodes a PDF into separate documents.
10
- = License:
11
- Origami is free software: you can redistribute it and/or modify
12
- it under the terms of the GNU Lesser General Public License as published by
13
- the Free Software Foundation, either version 3 of the License, or
14
- (at your option) any later version.
8
+ = License
9
+ Copyright (C) 2016 Guillaume Delugré.
15
10
 
16
- Origami is distributed in the hope that it will be useful,
17
- but WITHOUT ANY WARRANTY; without even the implied warranty of
18
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
- GNU Lesser General Public License for more details.
11
+ Origami is free software: you can redistribute it and/or modify
12
+ it under the terms of the GNU Lesser General Public License as published by
13
+ the Free Software Foundation, either version 3 of the License, or
14
+ (at your option) any later version.
20
15
 
21
- You should have received a copy of the GNU Lesser General Public License
22
- along with Origami. If not, see <http://www.gnu.org/licenses/>.
16
+ Origami is distributed in the hope that it will be useful,
17
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ GNU Lesser General Public License for more details.
20
+
21
+ You should have received a copy of the GNU Lesser General Public License
22
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
23
23
 
24
24
  =end
25
25
 
26
26
  begin
27
- require 'origami'
27
+ require 'origami'
28
28
  rescue LoadError
29
- ORIGAMIDIR = "#{File.dirname(__FILE__)}/../lib"
30
- $: << ORIGAMIDIR
31
- require 'origami'
29
+ $: << File.join(__dir__, '../lib')
30
+ require 'origami'
32
31
  end
33
32
  include Origami
34
33
 
@@ -36,185 +35,189 @@ require 'optparse'
36
35
  require 'rexml/document'
37
36
 
38
37
  class OptParser
39
- BANNER = <<USAGE
38
+ BANNER = <<USAGE
40
39
  Usage: #{$0} <PDF-file> [-r <range>] [-t pages|rsrc] [-d <output-directory>]
41
40
  Explodes a document into separate documents.
42
- Bug reports or feature requests at: http://origami-pdf.googlecode.com/
41
+ Bug reports or feature requests at: http://github.com/gdelugre/origami
43
42
 
44
43
  Options:
45
44
  USAGE
46
45
 
47
- def self.parser(options)
48
- OptionParser.new do |opts|
49
- opts.banner = BANNER
50
-
51
- opts.on("-d", "--output-dir DIR", "Output directory.") do |d|
52
- options[:output_dir] = d
53
- end
54
-
55
- opts.on("-r", "--range PAGES", "Page range (e.g: 2-, 1-3, 5). Default to '-'.") do |r|
56
- range =
57
- if r.index('-').nil?
58
- page = r.to_i
59
- Range.new(page-1, page-1)
60
- else
61
- from, to = r.split('-').map{|bound| bound.to_i}
62
- from ||= 1
63
- to ||= 0
64
- Range.new(from-1, to-1)
65
- end
66
- options[:page_range] = range
67
- end
68
-
69
- opts.on("-t", "--type TYPE", "Split by type. Can be 'pages' or 'rsrc'. Default to 'pages'.") do |t|
70
- options[:split_by] = t
71
- end
72
-
73
- opts.on_tail("-h", "--help", "Show this message.") do
74
- puts opts
75
- exit
76
- end
46
+ def self.parser(options)
47
+ OptionParser.new do |opts|
48
+ opts.banner = BANNER
49
+
50
+ opts.on("-d", "--output-dir DIR", "Output directory.") do |d|
51
+ options[:output_dir] = d
52
+ end
53
+
54
+ opts.on("-r", "--range PAGES", "Page range (e.g: 2-, 1-3, 5). Default to '-'.") do |r|
55
+ range =
56
+ if r.index('-').nil?
57
+ page = r.to_i
58
+ Range.new(page-1, page-1)
59
+ else
60
+ from, to = r.split('-').map{|bound| bound.to_i}
61
+ from ||= 1
62
+ to ||= 0
63
+ Range.new(from-1, to-1)
64
+ end
65
+ options[:page_range] = range
66
+ end
67
+
68
+ opts.on("-t", "--type TYPE", "Split by type. Can be 'pages' or 'rsrc'. Default to 'pages'.") do |t|
69
+ options[:split_by] = t
70
+ end
71
+
72
+ opts.on_tail("-h", "--help", "Show this message.") do
73
+ puts opts
74
+ exit
75
+ end
76
+ end
77
77
  end
78
- end
79
78
 
80
- def self.parse(args)
81
- options =
82
- {
83
- :page_range => (0..-1),
84
- :split_by => 'pages'
85
- }
79
+ def self.parse(args)
80
+ options =
81
+ {
82
+ page_range: (0..-1),
83
+ split_by: 'pages'
84
+ }
86
85
 
87
- self.parser(options).parse!(args)
86
+ self.parser(options).parse!(args)
88
87
 
89
- options
90
- end
88
+ options
89
+ end
91
90
  end
92
91
 
93
92
  begin
94
- @options = OptParser.parse(ARGV)
95
-
96
- if ARGV.empty?
97
- STDERR.puts "Error: No filename was specified. #{$0} --help for details."
98
- exit 1
99
- else
100
- target = ARGV.shift
101
- end
102
-
103
- if @options[:output_dir].nil?
104
- @options[:output_dir] = "#{File.join(File.dirname(target), File.basename(target,'.pdf'))}.explode"
105
- end
106
-
107
- Origami::OPTIONS[:ignore_bad_references] = true
108
- OUTPUT_DIR = @options[:output_dir]
109
- Dir::mkdir(OUTPUT_DIR) unless File.directory?(OUTPUT_DIR)
110
-
111
- def split_by_rsrc(n, page, type)
112
- all_rsrc = page.resources
113
- type_rsrc = page.ls_resources(type)
114
- other_rsrc = all_rsrc.keys - type_rsrc.keys
115
-
116
- unless type_rsrc.empty?
117
- # Keep only specified resource type.
118
- output_file = File.join(OUTPUT_DIR, "page_#{n}_keeponly_#{type}.pdf")
119
- PDF.write(output_file) do |pdf|
120
- reduced = page.copy
121
- # New resource dictionary with only matching resources.
122
- reduced.Resources = Resources.new(type => type_rsrc)
123
- # Remove mention of other resources.
124
- reduced.Contents.data = reduced.Contents.data.lines.to_a.
125
- delete_if {|line| other_rsrc.any?{|rsrc| line =~ /#{rsrc}/}}.join
126
-
127
- STDERR.puts "Creating #{output_file}..."
128
- pdf.append_page(reduced)
129
- end
130
-
131
- # Remove all specified resource type.
132
- output_file = File.join(OUTPUT_DIR, "page_#{n}_excluded_#{type}.pdf")
133
- PDF.write(output_file) do |pdf|
134
- reduced = page.copy
135
- # New resource dictionary with no resource of specified type.
136
- reduced.Resources = reduced.Resources.copy
137
- reduced.Resources.delete(type)
138
- # Remove mention this resource type.
139
- reduced.Contents.data = reduced.Contents.data.lines.to_a.
140
- delete_if {|line| type_rsrc.keys.any?{|rsrc| line =~ /#{rsrc}/}}.join
141
-
142
- STDERR.puts "Creating #{output_file}..."
143
- pdf.append_page(reduced)
144
- end
145
-
146
- # Now treating each resource object separately.
147
- type_rsrc.each_pair do |name, rsrc|
148
- anyother_rsrc = all_rsrc.keys - [ name ]
149
- # Keey only specified resource object.
150
- output_file = File.join(OUTPUT_DIR, "page_#{n}_keeponly_#{type}_#{name}.pdf")
151
- PDF.write(output_file) do |pdf|
152
- reduced = page.copy
153
- # New resource dictionary with only specified resource object.
154
- reduced.Resources = Resources.new(type => {name => rsrc})
155
- # Remove mention of all other resources.
156
- reduced.Contents.data = reduced.Contents.data.lines.to_a.
157
- delete_if {|line| anyother_rsrc.any?{|rsrc| line =~ /#{rsrc}/}}.join
158
-
159
- STDERR.puts "Creating #{output_file}..."
160
- pdf.append_page(reduced)
161
- end
162
-
163
- # Remove only specified resource object.
164
- output_file = File.join(OUTPUT_DIR, "page_#{n}_excluded_#{type}_#{name}.pdf")
165
- PDF.write(output_file) do |pdf|
166
- reduced = page.copy
167
- # New resource dictionary with only specified resource object.
168
- reduced.Resources = reduced.Resources.copy
169
- reduced.Resources[type] = reduced.Resources.send(type).copy
170
- reduced.Resources[type].delete(name)
171
- # Remove mention of this resource only.
172
- reduced.Contents.data = reduced.Contents.data.lines.to_a.
173
- delete_if {|line| line =~ /#{name}/}.join
174
-
175
- STDERR.puts "Creating #{output_file}..."
176
- pdf.append_page(reduced)
93
+ @options = OptParser.parse(ARGV)
94
+
95
+ if ARGV.empty?
96
+ abort "Error: No filename was specified. #{$0} --help for details."
97
+ else
98
+ target = ARGV.shift
99
+ end
100
+
101
+ if @options[:output_dir].nil?
102
+ @options[:output_dir] = "#{File.join(File.dirname(target), File.basename(target,'.pdf'))}.explode"
103
+ end
104
+
105
+ Origami::OPTIONS[:ignore_bad_references] = true
106
+ OUTPUT_DIR = @options[:output_dir]
107
+ Dir::mkdir(OUTPUT_DIR) unless File.directory?(OUTPUT_DIR)
108
+
109
+ def split_by_rsrc(n, page, type)
110
+ all_rsrc = page.resources
111
+ type_rsrc = page.resources(type)
112
+ other_rsrc = all_rsrc.keys - type_rsrc.keys
113
+
114
+ unless type_rsrc.empty?
115
+ # Keep only specified resource type.
116
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_keeponly_#{type}.pdf")
117
+ PDF.write(output_file) do |pdf|
118
+ reduced = page.copy
119
+ # New resource dictionary with only matching resources.
120
+ reduced.Resources = Resources.new(type => type_rsrc)
121
+ # Remove mention of other resources.
122
+ reduced.each_content_stream do |stream|
123
+ stream.data = stream.data.lines.
124
+ delete_if {|line| other_rsrc.any?{|rsrc| line =~ /#{rsrc}/}}.join
125
+ end
126
+
127
+ STDERR.puts "Creating #{output_file}..."
128
+ pdf.append_page(reduced)
129
+ end
130
+
131
+ # Remove all specified resource type.
132
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_excluded_#{type}.pdf")
133
+ PDF.write(output_file) do |pdf|
134
+ reduced = page.copy
135
+ # New resource dictionary with no resource of specified type.
136
+ reduced.Resources = reduced.Resources.copy
137
+ reduced.Resources.delete(type)
138
+ # Remove mention this resource type.
139
+ reduced.each_content_stream do |stream|
140
+ stream.data = stream.data.lines.
141
+ delete_if {|line| type_rsrc.keys.any?{|rsrc| line =~ /#{rsrc}/}}.join
142
+ end
143
+
144
+ STDERR.puts "Creating #{output_file}..."
145
+ pdf.append_page(reduced)
146
+ end
147
+
148
+ # Now treating each resource object separately.
149
+ type_rsrc.each_pair do |name, rsrc|
150
+ anyother_rsrc = all_rsrc.keys - [ name ]
151
+ # Keey only specified resource object.
152
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_keeponly_#{type}_#{name}.pdf")
153
+ PDF.write(output_file) do |pdf|
154
+ reduced = page.copy
155
+ # New resource dictionary with only specified resource object.
156
+ reduced.Resources = Resources.new(type => {name => rsrc})
157
+ # Remove mention of all other resources.
158
+ reduced.each_content_stream do |stream|
159
+ stream.data = stream.data.lines.
160
+ delete_if {|line| anyother_rsrc.any?{|rsrc| line =~ /#{rsrc}/}}.join
161
+ end
162
+
163
+ STDERR.puts "Creating #{output_file}..."
164
+ pdf.append_page(reduced)
165
+ end
166
+
167
+ # Remove only specified resource object.
168
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_excluded_#{type}_#{name}.pdf")
169
+ PDF.write(output_file) do |pdf|
170
+ reduced = page.copy
171
+ # New resource dictionary with only specified resource object.
172
+ reduced.Resources = reduced.Resources.copy
173
+ reduced.Resources[type] = reduced.Resources.send(type).copy
174
+ reduced.Resources[type].delete(name)
175
+ # Remove mention of this resource only.
176
+ reduced.each_content_stream do |stream|
177
+ stream.data = stream.data.lines.
178
+ delete_if {|line| line =~ /#{name}/}.join
179
+ end
180
+
181
+ STDERR.puts "Creating #{output_file}..."
182
+ pdf.append_page(reduced)
183
+ end
184
+ end
177
185
  end
178
- end
179
186
  end
180
- end
181
-
182
- params =
183
- {
184
- :verbosity => Parser::VERBOSE_QUIET,
185
- }
186
- pdf = PDF.read(target, params)
187
-
188
- i = @options[:page_range].first + 1
189
- pdf.pages[@options[:page_range]].each do |page|
190
- case @options[:split_by]
191
- when 'pages'
192
- output_file = File.join(OUTPUT_DIR, "page_#{i}.pdf")
193
- PDF.write(output_file) do |pdf|
194
- STDERR.puts "Creating #{output_file}..."
195
- pdf.append_page(page)
187
+
188
+ params =
189
+ {
190
+ verbosity: Parser::VERBOSE_QUIET,
191
+ }
192
+ pdf = PDF.read(target, params)
193
+
194
+ i = @options[:page_range].first + 1
195
+ pdf.pages[@options[:page_range]].each do |page|
196
+ case @options[:split_by]
197
+ when 'pages'
198
+ output_file = File.join(OUTPUT_DIR, "page_#{i}.pdf")
199
+ PDF.write(output_file) do |pdf|
200
+ STDERR.puts "Creating #{output_file}..."
201
+ pdf.append_page(page)
202
+ end
203
+
204
+ when 'rsrc'
205
+ [ Resources::EXTGSTATE,
206
+ Resources::COLORSPACE,
207
+ Resources::PATTERN,
208
+ Resources::SHADING,
209
+ Resources::XOBJECT,
210
+ Resources::FONT,
211
+ Resources::PROPERTIES
212
+ ].each { |type| split_by_rsrc(i, page, type) }
213
+
214
+ else
215
+ raise ArgumentError, "Unknown split option: #{@options[:split_by]}"
196
216
  end
197
217
 
198
- when 'rsrc'
199
- [ Resources::EXTGSTATE,
200
- Resources::COLORSPACE,
201
- Resources::PATTERN,
202
- Resources::SHADING,
203
- Resources::XOBJECT,
204
- Resources::FONT,
205
- Resources::PROPERTIES
206
- ].each { |type| split_by_rsrc(i, page, type) }
207
-
208
- else
209
- raise ArgumentError, "Unknown split option: #{@options[:split_by]}"
218
+ i += 1
210
219
  end
211
220
 
212
- i += 1
213
- end
214
-
215
- rescue SystemExit
216
- rescue Exception => e
217
- STDERR.puts "#{e.class}: #{e.message} #{e.backtrace}"
218
- exit 1
221
+ rescue
222
+ abort "#{$!.class}: #{$!.message} #{$!.backtrace.join($/)}"
219
223
  end
220
-
@@ -1,38 +1,36 @@
1
- #!/usr/bin/env ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  =begin
4
4
 
5
- = Author:
6
- Guillaume Delugré <guillaume/at/security-labs.org>
5
+ = Info
6
+ Extracts valuable data from a PDF document. Can extract:
7
+ - decoded streams
8
+ - JavaScript
9
+ - file attachments
7
10
 
8
- = Info:
9
- Extracts valuable data from a PDF document. Can extract:
10
- - decoded streams
11
- - JavaScript
12
- - file attachments
11
+ = License
12
+ Copyright (C) 2016 Guillaume Delugré.
13
13
 
14
- = License:
15
- Origami is free software: you can redistribute it and/or modify
16
- it under the terms of the GNU Lesser General Public License as published by
17
- the Free Software Foundation, either version 3 of the License, or
18
- (at your option) any later version.
14
+ Origami is free software: you can redistribute it and/or modify
15
+ it under the terms of the GNU Lesser General Public License as published by
16
+ the Free Software Foundation, either version 3 of the License, or
17
+ (at your option) any later version.
19
18
 
20
- Origami is distributed in the hope that it will be useful,
21
- but WITHOUT ANY WARRANTY; without even the implied warranty of
22
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
- GNU Lesser General Public License for more details.
19
+ Origami is distributed in the hope that it will be useful,
20
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22
+ GNU Lesser General Public License for more details.
24
23
 
25
- You should have received a copy of the GNU Lesser General Public License
26
- along with Origami. If not, see <http://www.gnu.org/licenses/>.
24
+ You should have received a copy of the GNU Lesser General Public License
25
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
27
26
 
28
27
  =end
29
28
 
30
29
  begin
31
- require 'origami'
30
+ require 'origami'
32
31
  rescue LoadError
33
- ORIGAMIDIR = "#{File.dirname(__FILE__)}/../lib"
34
- $: << ORIGAMIDIR
35
- require 'origami'
32
+ $: << File.join(__dir__, '../lib')
33
+ require 'origami'
36
34
  end
37
35
  include Origami
38
36
 
@@ -40,246 +38,231 @@ require 'optparse'
40
38
  require 'rexml/document'
41
39
 
42
40
  class OptParser
43
- BANNER = <<USAGE
41
+ BANNER = <<USAGE
44
42
  Usage: #{$0} <PDF-file> [-afjms] [-d <output-directory>]
45
43
  Extracts various data out of a document (streams, scripts, images, fonts, metadata, attachments).
46
- Bug reports or feature requests at: http://origami-pdf.googlecode.com/
44
+ Bug reports or feature requests at: http://github.com/gdelugre/origami
47
45
 
48
46
  Options:
49
47
  USAGE
50
48
 
51
- def self.parser(options)
52
- OptionParser.new do |opts|
53
- opts.banner = BANNER
49
+ def self.parser(options)
50
+ OptionParser.new do |opts|
51
+ opts.banner = BANNER
54
52
 
55
- opts.on("-d", "--output-dir DIR", "Output directory") do |d|
56
- options[:output_dir] = d
57
- end
53
+ opts.on("-d", "--output-dir DIR", "Output directory") do |d|
54
+ options[:output_dir] = d
55
+ end
58
56
 
59
- opts.on("-s", "--streams", "Extracts all decoded streams") do
60
- options[:streams] = true
61
- end
57
+ opts.on("-s", "--streams", "Extracts all decoded streams") do
58
+ options[:streams] = true
59
+ end
62
60
 
63
- opts.on("-a", "--attachments", "Extracts file attachments") do
64
- options[:attachments] = true
65
- end
61
+ opts.on("-a", "--attachments", "Extracts file attachments") do
62
+ options[:attachments] = true
63
+ end
66
64
 
67
- opts.on("-f", "--fonts", "Extracts embedded font files") do
68
- options[:fonts] = true
69
- end
65
+ opts.on("-f", "--fonts", "Extracts embedded font files") do
66
+ options[:fonts] = true
67
+ end
70
68
 
71
- opts.on("-j", "--js", "Extracts JavaScript scripts") do
72
- options[:javascript] = true
73
- end
69
+ opts.on("-j", "--js", "Extracts JavaScript scripts") do
70
+ options[:javascript] = true
71
+ end
74
72
 
75
- opts.on("-m", "--metadata", "Extracts metadata streams") do
76
- options[:metadata] = true
77
- end
73
+ opts.on("-m", "--metadata", "Extracts metadata streams") do
74
+ options[:metadata] = true
75
+ end
78
76
 
79
- opts.on("-i", "--images", "Extracts embedded images") do
80
- options[:images] = true
81
- end
77
+ opts.on("-i", "--images", "Extracts embedded images") do
78
+ options[:images] = true
79
+ end
82
80
 
83
- opts.on_tail("-h", "--help", "Show this message") do
84
- puts opts
85
- exit
86
- end
81
+ opts.on_tail("-h", "--help", "Show this message") do
82
+ puts opts
83
+ exit
84
+ end
85
+ end
87
86
  end
88
- end
89
87
 
90
- def self.parse(args)
91
- options =
92
- {
93
- }
88
+ def self.parse(args)
89
+ options = {}
94
90
 
95
- self.parser(options).parse!(args)
91
+ self.parser(options).parse!(args)
96
92
 
97
- options
98
- end
93
+ options
94
+ end
99
95
  end
100
96
 
101
97
  begin
102
- @options = OptParser.parse(ARGV)
103
-
104
- if ARGV.empty?
105
- STDERR.puts "Error: No filename was specified. #{$0} --help for details."
106
- exit 1
107
- else
108
- target = ARGV.shift
109
- end
110
-
111
- unless [:streams,:javascript,:attachments,:fonts,:metadata,:images].any? {|opt| @options[opt]}
112
- @options[:streams] =
113
- @options[:javascript] =
114
- @options[:fonts] =
115
- @options[:attachments] =
116
- @options[:images] = true
117
- end
118
-
119
- if @options[:output_dir].nil?
120
- @options[:output_dir] = "#{File.basename(target, '.pdf')}.dump"
121
- end
122
-
123
- # Force data extraction, even for invalid FlateDecode streams.
124
- Origami::OPTIONS[:ignore_zlib_errors] = true
125
-
126
- OUTPUT_DIR = @options[:output_dir]
127
- Dir::mkdir(OUTPUT_DIR) unless File.directory?(OUTPUT_DIR)
128
-
129
- params =
130
- {
131
- :verbosity => Parser::VERBOSE_QUIET,
132
- }
133
- pdf = PDF.read(target, params)
134
-
135
- if @options[:streams]
136
- nstreams = 0
137
- Dir::mkdir("#{OUTPUT_DIR}/streams") unless File.directory?("#{OUTPUT_DIR}/streams")
138
-
139
- pdf.root_objects.find_all{|obj| obj.is_a?(Stream)}.each do |stream|
140
- stream_file = "#{OUTPUT_DIR}/streams/stream_#{stream.reference.refno}.dmp"
141
- File.open(stream_file, "wb") do |fd|
142
- fd.write(stream.data)
143
- end
144
- nstreams += 1
98
+ @options = OptParser.parse(ARGV)
99
+
100
+ if ARGV.empty?
101
+ abort "Error: No filename was specified. #{$0} --help for details."
102
+ else
103
+ target = ARGV.shift
145
104
  end
146
105
 
147
- puts "Extracted #{nstreams} PDF streams to '#{OUTPUT_DIR}/streams'."
148
- end
106
+ unless %i[streams javascript attachments fonts metadata images].any? {|opt| @options[opt]}
107
+ @options[:streams] =
108
+ @options[:javascript] =
109
+ @options[:fonts] =
110
+ @options[:attachments] =
111
+ @options[:images] = true
112
+ end
149
113
 
150
- if @options[:javascript]
151
- nscripts = 0
152
- Dir::mkdir("#{OUTPUT_DIR}/scripts") unless File.directory?("#{OUTPUT_DIR}/scripts")
114
+ if @options[:output_dir].nil?
115
+ @options[:output_dir] = "#{File.basename(target, '.pdf')}.dump"
116
+ end
153
117
 
154
- pdf.ls(/^JS$/).each do |script|
155
- script_file = "#{OUTPUT_DIR}/scripts/script_#{script.hash}.js"
156
- File.open(script_file, "wb") do |fd|
157
- fd.write(
158
- case script
159
- when Stream then
160
- script.data
161
- else
162
- script.value
163
- end
164
- )
165
- end
166
- nscripts += 1
118
+ # Force data extraction, even for invalid FlateDecode streams.
119
+ Origami::OPTIONS[:ignore_zlib_errors] = true
120
+ Origami::OPTIONS[:ignore_png_errors] = true
121
+
122
+ OUTPUT_DIR = @options[:output_dir]
123
+ Dir::mkdir(OUTPUT_DIR) unless File.directory?(OUTPUT_DIR)
124
+
125
+ params =
126
+ {
127
+ verbosity: Parser::VERBOSE_QUIET,
128
+ }
129
+ pdf = PDF.read(target, params)
130
+
131
+ if @options[:streams]
132
+ nstreams = 0
133
+ stream_dir = File.join(OUTPUT_DIR, "streams")
134
+ Dir::mkdir(stream_dir) unless File.directory?(stream_dir)
135
+
136
+ pdf.root_objects.find_all{|obj| obj.is_a?(Stream)}.each do |stream|
137
+ stream_file = File.join(stream_dir, "stream_#{stream.reference.refno}.dmp")
138
+ File.binwrite(stream_file, stream.data)
139
+ nstreams += 1
140
+ end
141
+
142
+ puts "Extracted #{nstreams} PDF streams to '#{stream_dir}'."
167
143
  end
168
144
 
169
- # Also checking for presence of JavaScript in XML forms.
170
- if pdf.has_form? and pdf.Catalog.AcroForm.has_key?(:XFA)
171
- xfa = pdf.Catalog.AcroForm[:XFA].solve
172
-
173
- case xfa
174
- when Array then
175
- xml = ""
176
- i = 0
177
- xfa.each do |packet|
178
- if i % 2 == 1
179
- xml << packet.solve.data
145
+ if @options[:javascript]
146
+ nscripts = 0
147
+ js_dir = File.join(OUTPUT_DIR, "scripts")
148
+ Dir::mkdir(js_dir) unless File.directory?(js_dir)
149
+
150
+ pdf.ls(/^JS$/).each do |script|
151
+ script_file = File.join(js_dir, "script_#{script.hash}.js")
152
+ script_data =
153
+ case script
154
+ when Stream then script.data
155
+ else script.value
156
+ end
157
+
158
+ File.binwrite(script_file, script_data)
159
+ nscripts += 1
160
+ end
161
+
162
+ # Also checking for presence of JavaScript in XML forms.
163
+ if pdf.form? and pdf.Catalog.AcroForm.has_key?(:XFA)
164
+ xfa = pdf.Catalog.AcroForm[:XFA].solve
165
+
166
+ case xfa
167
+ when Array then
168
+ xml = ""
169
+ i = 0
170
+ xfa.each do |packet|
171
+ if i % 2 == 1
172
+ xml << packet.solve.data
173
+ end
174
+
175
+ i = i + 1
176
+ end
177
+ when Stream then
178
+ xml = xfa.data
179
+ else
180
+ reject("Malformed XFA dictionary")
180
181
  end
181
182
 
182
- i = i + 1
183
- end
184
- when Stream then
185
- xml = xfa.data
186
- else
187
- reject("Malformed XFA dictionary")
188
- end
189
-
190
- xfadoc = REXML::Document.new(xml)
191
- REXML::XPath.match(xfadoc, "//script").each do |script|
192
- script_file = "#{OUTPUT_DIR}/script_#{script.hash}.js"
193
- File.open(script_file, "wb") do |fd|
194
- fd.write(script.text)
183
+ xfadoc = REXML::Document.new(xml)
184
+ REXML::XPath.match(xfadoc, "//script").each do |script|
185
+ script_file = File.join(js_dir, "script_#{script.hash}.js")
186
+ File.binwrite(script_file, script.text)
187
+ nscripts += 1
188
+ end
195
189
  end
196
- nscripts += 1
197
- end
190
+
191
+ puts "Extracted #{nscripts} scripts to '#{js_dir}'."
198
192
  end
199
193
 
200
- puts "Extracted #{nscripts} scripts to '#{OUTPUT_DIR}/scripts'."
201
- end
202
-
203
- if @options[:attachments]
204
- nattach = 0
205
- Dir::mkdir("#{OUTPUT_DIR}/attachments") unless File.directory?("#{OUTPUT_DIR}/attachments")
206
-
207
- pdf.ls_names(Names::Root::EMBEDDEDFILES).each do |name, attachment|
208
- attached_file = "#{OUTPUT_DIR}/attachments/attached_#{File.basename(name)}"
209
- spec = attachment.solve
210
- if spec and spec.EF and f = spec.EF.F and f.is_a?(Stream)
211
- File.open(attached_file, "wb") do |fd|
212
- fd.write(f.data)
194
+ if @options[:attachments]
195
+ nattach = 0
196
+ attachments_dir = File.join(OUTPUT_DIR, "attachments")
197
+ Dir::mkdir(attachments_dir) unless File.directory?(attachments_dir)
198
+
199
+ pdf.each_attachment do |name, attachment|
200
+ attached_file = File.join(attachments_dir, "attached_#{File.basename(name)}")
201
+ spec = attachment.solve
202
+ if spec and spec.EF and f = spec.EF.F and f.is_a?(Stream)
203
+ File.binwrite(attached_file, f.data)
204
+ nattach += 1
205
+ end
213
206
  end
214
- nattach += 1
215
- end
207
+
208
+ puts "Extracted #{nattach} attachments to '#{attachments_dir}'."
216
209
  end
217
-
218
- puts "Extracted #{nattach} attachments to '#{OUTPUT_DIR}/attachments'."
219
- end
220
-
221
- if @options[:fonts]
222
- nfonts = 0
223
- Dir::mkdir("#{OUTPUT_DIR}/fonts") unless File.directory?("#{OUTPUT_DIR}/fonts")
224
-
225
- pdf.root_objects.find_all{|obj| obj.is_a?(Stream)}.each do |stream|
226
- font = stream.xrefs.find{|obj| obj.is_a?(FontDescriptor)}
227
- if font
228
- font_file = "#{OUTPUT_DIR}/fonts/font_#{File.basename(font.FontName.value.to_s)}"
229
- File.open(font_file, "wb") do |fd|
230
- fd.write(stream.data)
210
+
211
+ if @options[:fonts]
212
+ nfonts = 0
213
+ fonts_dir = File.join(OUTPUT_DIR, "fonts")
214
+ Dir::mkdir(fonts_dir) unless File.directory?(fonts_dir)
215
+
216
+ pdf.root_objects.find_all{|obj| obj.is_a?(Stream)}.each do |stream|
217
+ font = stream.xrefs.find{|obj| obj.is_a?(FontDescriptor)}
218
+ if font
219
+ font_file = File.join(fonts_dir, File.basename(font.FontName.value.to_s))
220
+ File.binwrite(font_file, stream.data)
221
+ nfonts += 1
222
+ end
231
223
  end
232
- nfonts += 1
233
- end
224
+
225
+ puts "Extracted #{nfonts} fonts to '#{fonts_dir}'."
234
226
  end
235
227
 
236
- puts "Extracted #{nfonts} fonts to '#{OUTPUT_DIR}/fonts'."
237
- end
228
+ if @options[:metadata]
229
+ nmeta = 0
230
+ metadata_dir = File.join(OUTPUT_DIR, "metadata")
231
+ Dir::mkdir(metadata_dir) unless File.directory?(metadata_dir)
238
232
 
239
- if @options[:metadata]
240
- nmeta = 0
241
- Dir::mkdir("#{OUTPUT_DIR}/metadata") unless File.directory?("#{OUTPUT_DIR}/metadata")
233
+ pdf.root_objects.find_all{|obj| obj.is_a?(MetadataStream)}.each do |stream|
234
+ metadata_file = File.join(metadata_dir, "metadata_#{stream.reference.refno}.xml")
235
+ File.binwrite(metadata_file, stream.data)
236
+ nmeta += 1
237
+ end
242
238
 
243
- pdf.root_objects.find_all{|obj| obj.is_a?(MetadataStream)}.each do |stream|
244
- metadata_file = "#{OUTPUT_DIR}/metadata/metadata_#{stream.reference.refno}.xml"
245
- File.open(metadata_file, "wb") do |fd|
246
- fd.write(stream.data)
247
- end
248
- nmeta += 1
239
+ puts "Extracted #{nmeta} metadata streams to '#{metadata_dir}'."
249
240
  end
250
241
 
251
- puts "Extracted #{nmeta} metadata streams to '#{OUTPUT_DIR}/metadata'."
252
- end
242
+ if @options[:images]
243
+ nimages = 0
244
+ image_dir = File.join(OUTPUT_DIR, "images")
245
+ Dir::mkdir(image_dir) unless File.directory?(image_dir)
253
246
 
254
- if @options[:images]
255
- nimages = 0
256
- Dir::mkdir("#{OUTPUT_DIR}/images") unless File.directory?("#{OUTPUT_DIR}/images")
257
-
258
- pdf.root_objects.find_all{|obj| obj.is_a?(Graphics::ImageXObject)}.each do |stream|
259
- begin
260
- ext, image_data = stream.to_image_file
261
- image_file = "#{OUTPUT_DIR}/images/image_#{stream.reference.refno}.#{ext}"
247
+ pdf.root_objects.find_all{|obj| obj.is_a?(Graphics::ImageXObject)}.each do |stream|
248
+ begin
249
+ ext, image_data = stream.to_image_file
250
+ image_file = File.join(image_dir, "image_#{stream.reference.refno}.#{ext}")
262
251
 
263
- if ext != 'png' and stream.ColorSpace == Graphics::Color::Space::DEVICE_CMYK
264
- STDERR.puts "Warning: file '#{image_file}' is intended to be viewed in CMYK color space."
265
- end
252
+ if ext != 'png' and stream.ColorSpace == Graphics::Color::Space::DEVICE_CMYK
253
+ STDERR.puts "Warning: file '#{image_file}' is intended to be viewed in CMYK color space."
254
+ end
266
255
 
267
- File.open(image_file, "wb") do |fd|
268
- fd.write(image_data)
256
+ File.binwrite(image_file, image_data)
257
+ nimages += 1
258
+ rescue
259
+ STDERR.puts "Unable to decode image (stream #{stream.reference.refno}). #{$!.message}"
260
+ end
269
261
  end
270
- nimages += 1
271
262
 
272
- rescue Exception => e
273
- STDERR.puts "Unable to decode image (stream #{stream.reference.refno}). #{e.message}"
274
- end
263
+ puts "Extracted #{nimages} images to '#{image_dir}'."
275
264
  end
276
-
277
- puts "Extracted #{nimages} images to '#{OUTPUT_DIR}/images'."
278
- end
279
-
280
- rescue SystemExit
281
- rescue Exception => e
282
- STDERR.puts "#{e.class}: #{e.message}"
283
- exit 1
284
- end
285
265
 
266
+ rescue
267
+ abort "#{$!.class}: #{$!.message}"
268
+ end