origami 1.2.7 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -0
  3. data/README.md +112 -0
  4. data/bin/config/pdfcop.conf.yml +232 -233
  5. data/bin/gui/about.rb +27 -37
  6. data/bin/gui/config.rb +108 -117
  7. data/bin/gui/file.rb +416 -365
  8. data/bin/gui/gtkhex.rb +1138 -1153
  9. data/bin/gui/hexview.rb +55 -57
  10. data/bin/gui/imgview.rb +48 -51
  11. data/bin/gui/menu.rb +388 -386
  12. data/bin/gui/properties.rb +114 -130
  13. data/bin/gui/signing.rb +571 -617
  14. data/bin/gui/textview.rb +77 -95
  15. data/bin/gui/treeview.rb +382 -387
  16. data/bin/gui/walker.rb +227 -232
  17. data/bin/gui/xrefs.rb +56 -60
  18. data/bin/pdf2pdfa +53 -57
  19. data/bin/pdf2ruby +212 -228
  20. data/bin/pdfcop +338 -348
  21. data/bin/pdfdecompress +58 -65
  22. data/bin/pdfdecrypt +56 -60
  23. data/bin/pdfencrypt +75 -80
  24. data/bin/pdfexplode +185 -182
  25. data/bin/pdfextract +201 -218
  26. data/bin/pdfmetadata +83 -82
  27. data/bin/pdfsh +4 -5
  28. data/bin/pdfwalker +1 -2
  29. data/bin/shell/.irbrc +45 -82
  30. data/bin/shell/console.rb +105 -130
  31. data/bin/shell/hexdump.rb +40 -64
  32. data/examples/README.md +34 -0
  33. data/examples/attachments/attachment.rb +38 -0
  34. data/examples/attachments/nested_document.rb +51 -0
  35. data/examples/encryption/encryption.rb +28 -0
  36. data/{samples/actions/triggerevents/trigger.rb → examples/events/events.rb} +13 -16
  37. data/examples/flash/flash.rb +37 -0
  38. data/{samples → examples}/flash/helloworld.swf +0 -0
  39. data/examples/forms/javascript.rb +54 -0
  40. data/examples/forms/xfa.rb +115 -0
  41. data/examples/javascript/hello_world.rb +22 -0
  42. data/examples/javascript/js_emulation.rb +54 -0
  43. data/examples/loop/goto.rb +32 -0
  44. data/examples/loop/named.rb +33 -0
  45. data/examples/signature/signature.rb +65 -0
  46. data/examples/uri/javascript.rb +56 -0
  47. data/examples/uri/open-uri.rb +21 -0
  48. data/examples/uri/submitform.rb +47 -0
  49. data/lib/origami.rb +29 -42
  50. data/lib/origami/3d.rb +350 -225
  51. data/lib/origami/acroform.rb +262 -288
  52. data/lib/origami/actions.rb +268 -288
  53. data/lib/origami/annotations.rb +697 -722
  54. data/lib/origami/array.rb +258 -184
  55. data/lib/origami/boolean.rb +74 -84
  56. data/lib/origami/catalog.rb +397 -434
  57. data/lib/origami/collections.rb +144 -0
  58. data/lib/origami/destinations.rb +233 -194
  59. data/lib/origami/dictionary.rb +253 -232
  60. data/lib/origami/encryption.rb +1274 -1243
  61. data/lib/origami/export.rb +232 -268
  62. data/lib/origami/extensions/fdf.rb +307 -220
  63. data/lib/origami/extensions/ppklite.rb +368 -435
  64. data/lib/origami/filespec.rb +197 -0
  65. data/lib/origami/filters.rb +301 -295
  66. data/lib/origami/filters/ascii.rb +177 -180
  67. data/lib/origami/filters/ccitt.rb +528 -535
  68. data/lib/origami/filters/crypt.rb +26 -35
  69. data/lib/origami/filters/dct.rb +46 -52
  70. data/lib/origami/filters/flate.rb +95 -94
  71. data/lib/origami/filters/jbig2.rb +49 -55
  72. data/lib/origami/filters/jpx.rb +38 -44
  73. data/lib/origami/filters/lzw.rb +189 -183
  74. data/lib/origami/filters/predictors.rb +221 -235
  75. data/lib/origami/filters/runlength.rb +103 -104
  76. data/lib/origami/font.rb +173 -186
  77. data/lib/origami/functions.rb +67 -81
  78. data/lib/origami/graphics.rb +25 -21
  79. data/lib/origami/graphics/colors.rb +178 -187
  80. data/lib/origami/graphics/instruction.rb +79 -85
  81. data/lib/origami/graphics/path.rb +142 -148
  82. data/lib/origami/graphics/patterns.rb +160 -167
  83. data/lib/origami/graphics/render.rb +43 -50
  84. data/lib/origami/graphics/state.rb +138 -153
  85. data/lib/origami/graphics/text.rb +188 -205
  86. data/lib/origami/graphics/xobject.rb +819 -815
  87. data/lib/origami/header.rb +63 -78
  88. data/lib/origami/javascript.rb +596 -597
  89. data/lib/origami/linearization.rb +285 -290
  90. data/lib/origami/metadata.rb +139 -148
  91. data/lib/origami/name.rb +112 -148
  92. data/lib/origami/null.rb +53 -62
  93. data/lib/origami/numeric.rb +162 -175
  94. data/lib/origami/obfuscation.rb +186 -174
  95. data/lib/origami/object.rb +593 -573
  96. data/lib/origami/outline.rb +42 -47
  97. data/lib/origami/outputintents.rb +73 -82
  98. data/lib/origami/page.rb +703 -592
  99. data/lib/origami/parser.rb +238 -290
  100. data/lib/origami/parsers/fdf.rb +41 -33
  101. data/lib/origami/parsers/pdf.rb +75 -95
  102. data/lib/origami/parsers/pdf/lazy.rb +137 -0
  103. data/lib/origami/parsers/pdf/linear.rb +64 -66
  104. data/lib/origami/parsers/ppklite.rb +34 -70
  105. data/lib/origami/pdf.rb +1030 -1005
  106. data/lib/origami/reference.rb +102 -102
  107. data/lib/origami/signature.rb +591 -609
  108. data/lib/origami/stream.rb +668 -551
  109. data/lib/origami/string.rb +397 -373
  110. data/lib/origami/template/patterns.rb +56 -0
  111. data/lib/origami/template/widgets.rb +151 -0
  112. data/lib/origami/trailer.rb +144 -158
  113. data/lib/origami/tree.rb +62 -0
  114. data/lib/origami/version.rb +23 -0
  115. data/lib/origami/webcapture.rb +88 -79
  116. data/lib/origami/xfa.rb +2863 -2882
  117. data/lib/origami/xreftable.rb +472 -384
  118. data/test/dataset/calc.pdf +85 -0
  119. data/test/dataset/crypto.pdf +82 -0
  120. data/test/dataset/empty.pdf +49 -0
  121. data/test/test_actions.rb +27 -0
  122. data/test/test_annotations.rb +90 -0
  123. data/test/test_pages.rb +31 -0
  124. data/test/test_pdf.rb +16 -0
  125. data/test/test_pdf_attachment.rb +34 -0
  126. data/test/test_pdf_create.rb +24 -0
  127. data/test/test_pdf_encrypt.rb +95 -0
  128. data/test/test_pdf_parse.rb +96 -0
  129. data/test/test_pdf_sign.rb +58 -0
  130. data/test/test_streams.rb +182 -0
  131. data/test/test_xrefs.rb +67 -0
  132. metadata +88 -58
  133. data/README +0 -67
  134. data/bin/pdf2graph +0 -121
  135. data/bin/pdfcocoon +0 -104
  136. data/lib/origami/file.rb +0 -233
  137. data/samples/README.txt +0 -45
  138. data/samples/actions/launch/calc.rb +0 -87
  139. data/samples/actions/launch/winparams.rb +0 -22
  140. data/samples/actions/loop/loopgoto.rb +0 -24
  141. data/samples/actions/loop/loopnamed.rb +0 -21
  142. data/samples/actions/named/named.rb +0 -31
  143. data/samples/actions/samba/smbrelay.rb +0 -26
  144. data/samples/actions/webbug/submitform.js +0 -26
  145. data/samples/actions/webbug/webbug-browser.rb +0 -68
  146. data/samples/actions/webbug/webbug-js.rb +0 -67
  147. data/samples/actions/webbug/webbug-reader.rb +0 -90
  148. data/samples/attachments/attach.rb +0 -40
  149. data/samples/attachments/attached.txt +0 -1
  150. data/samples/crypto/crypto.rb +0 -28
  151. data/samples/digsig/signed.rb +0 -46
  152. data/samples/exploits/cve-2008-2992-utilprintf.rb +0 -87
  153. data/samples/exploits/cve-2009-0927-geticon.rb +0 -65
  154. data/samples/exploits/exploit_customdictopen.rb +0 -55
  155. data/samples/exploits/getannots.rb +0 -69
  156. data/samples/flash/flash.rb +0 -31
  157. data/samples/javascript/attached.txt +0 -1
  158. data/samples/javascript/js.rb +0 -52
  159. data/templates/patterns.rb +0 -66
  160. data/templates/widgets.rb +0 -173
  161. data/templates/xdp.rb +0 -92
  162. data/test/ts_pdf.rb +0 -50
@@ -1,34 +1,33 @@
1
- #!/usr/bin/env ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  =begin
4
4
 
5
- = Author:
6
- Guillaume Delugré <guillaume/at/security-labs.org>
5
+ = Info
6
+ Explodes a PDF into separate documents.
7
7
 
8
- = Info:
9
- Explodes a PDF into separate documents.
10
- = License:
11
- Origami is free software: you can redistribute it and/or modify
12
- it under the terms of the GNU Lesser General Public License as published by
13
- the Free Software Foundation, either version 3 of the License, or
14
- (at your option) any later version.
8
+ = License
9
+ Copyright (C) 2016 Guillaume Delugré.
15
10
 
16
- Origami is distributed in the hope that it will be useful,
17
- but WITHOUT ANY WARRANTY; without even the implied warranty of
18
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
- GNU Lesser General Public License for more details.
11
+ Origami is free software: you can redistribute it and/or modify
12
+ it under the terms of the GNU Lesser General Public License as published by
13
+ the Free Software Foundation, either version 3 of the License, or
14
+ (at your option) any later version.
20
15
 
21
- You should have received a copy of the GNU Lesser General Public License
22
- along with Origami. If not, see <http://www.gnu.org/licenses/>.
16
+ Origami is distributed in the hope that it will be useful,
17
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ GNU Lesser General Public License for more details.
20
+
21
+ You should have received a copy of the GNU Lesser General Public License
22
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
23
23
 
24
24
  =end
25
25
 
26
26
  begin
27
- require 'origami'
27
+ require 'origami'
28
28
  rescue LoadError
29
- ORIGAMIDIR = "#{File.dirname(__FILE__)}/../lib"
30
- $: << ORIGAMIDIR
31
- require 'origami'
29
+ $: << File.join(__dir__, '../lib')
30
+ require 'origami'
32
31
  end
33
32
  include Origami
34
33
 
@@ -36,185 +35,189 @@ require 'optparse'
36
35
  require 'rexml/document'
37
36
 
38
37
  class OptParser
39
- BANNER = <<USAGE
38
+ BANNER = <<USAGE
40
39
  Usage: #{$0} <PDF-file> [-r <range>] [-t pages|rsrc] [-d <output-directory>]
41
40
  Explodes a document into separate documents.
42
- Bug reports or feature requests at: http://origami-pdf.googlecode.com/
41
+ Bug reports or feature requests at: http://github.com/gdelugre/origami
43
42
 
44
43
  Options:
45
44
  USAGE
46
45
 
47
- def self.parser(options)
48
- OptionParser.new do |opts|
49
- opts.banner = BANNER
50
-
51
- opts.on("-d", "--output-dir DIR", "Output directory.") do |d|
52
- options[:output_dir] = d
53
- end
54
-
55
- opts.on("-r", "--range PAGES", "Page range (e.g: 2-, 1-3, 5). Default to '-'.") do |r|
56
- range =
57
- if r.index('-').nil?
58
- page = r.to_i
59
- Range.new(page-1, page-1)
60
- else
61
- from, to = r.split('-').map{|bound| bound.to_i}
62
- from ||= 1
63
- to ||= 0
64
- Range.new(from-1, to-1)
65
- end
66
- options[:page_range] = range
67
- end
68
-
69
- opts.on("-t", "--type TYPE", "Split by type. Can be 'pages' or 'rsrc'. Default to 'pages'.") do |t|
70
- options[:split_by] = t
71
- end
72
-
73
- opts.on_tail("-h", "--help", "Show this message.") do
74
- puts opts
75
- exit
76
- end
46
+ def self.parser(options)
47
+ OptionParser.new do |opts|
48
+ opts.banner = BANNER
49
+
50
+ opts.on("-d", "--output-dir DIR", "Output directory.") do |d|
51
+ options[:output_dir] = d
52
+ end
53
+
54
+ opts.on("-r", "--range PAGES", "Page range (e.g: 2-, 1-3, 5). Default to '-'.") do |r|
55
+ range =
56
+ if r.index('-').nil?
57
+ page = r.to_i
58
+ Range.new(page-1, page-1)
59
+ else
60
+ from, to = r.split('-').map{|bound| bound.to_i}
61
+ from ||= 1
62
+ to ||= 0
63
+ Range.new(from-1, to-1)
64
+ end
65
+ options[:page_range] = range
66
+ end
67
+
68
+ opts.on("-t", "--type TYPE", "Split by type. Can be 'pages' or 'rsrc'. Default to 'pages'.") do |t|
69
+ options[:split_by] = t
70
+ end
71
+
72
+ opts.on_tail("-h", "--help", "Show this message.") do
73
+ puts opts
74
+ exit
75
+ end
76
+ end
77
77
  end
78
- end
79
78
 
80
- def self.parse(args)
81
- options =
82
- {
83
- :page_range => (0..-1),
84
- :split_by => 'pages'
85
- }
79
+ def self.parse(args)
80
+ options =
81
+ {
82
+ page_range: (0..-1),
83
+ split_by: 'pages'
84
+ }
86
85
 
87
- self.parser(options).parse!(args)
86
+ self.parser(options).parse!(args)
88
87
 
89
- options
90
- end
88
+ options
89
+ end
91
90
  end
92
91
 
93
92
  begin
94
- @options = OptParser.parse(ARGV)
95
-
96
- if ARGV.empty?
97
- STDERR.puts "Error: No filename was specified. #{$0} --help for details."
98
- exit 1
99
- else
100
- target = ARGV.shift
101
- end
102
-
103
- if @options[:output_dir].nil?
104
- @options[:output_dir] = "#{File.join(File.dirname(target), File.basename(target,'.pdf'))}.explode"
105
- end
106
-
107
- Origami::OPTIONS[:ignore_bad_references] = true
108
- OUTPUT_DIR = @options[:output_dir]
109
- Dir::mkdir(OUTPUT_DIR) unless File.directory?(OUTPUT_DIR)
110
-
111
- def split_by_rsrc(n, page, type)
112
- all_rsrc = page.resources
113
- type_rsrc = page.ls_resources(type)
114
- other_rsrc = all_rsrc.keys - type_rsrc.keys
115
-
116
- unless type_rsrc.empty?
117
- # Keep only specified resource type.
118
- output_file = File.join(OUTPUT_DIR, "page_#{n}_keeponly_#{type}.pdf")
119
- PDF.write(output_file) do |pdf|
120
- reduced = page.copy
121
- # New resource dictionary with only matching resources.
122
- reduced.Resources = Resources.new(type => type_rsrc)
123
- # Remove mention of other resources.
124
- reduced.Contents.data = reduced.Contents.data.lines.to_a.
125
- delete_if {|line| other_rsrc.any?{|rsrc| line =~ /#{rsrc}/}}.join
126
-
127
- STDERR.puts "Creating #{output_file}..."
128
- pdf.append_page(reduced)
129
- end
130
-
131
- # Remove all specified resource type.
132
- output_file = File.join(OUTPUT_DIR, "page_#{n}_excluded_#{type}.pdf")
133
- PDF.write(output_file) do |pdf|
134
- reduced = page.copy
135
- # New resource dictionary with no resource of specified type.
136
- reduced.Resources = reduced.Resources.copy
137
- reduced.Resources.delete(type)
138
- # Remove mention this resource type.
139
- reduced.Contents.data = reduced.Contents.data.lines.to_a.
140
- delete_if {|line| type_rsrc.keys.any?{|rsrc| line =~ /#{rsrc}/}}.join
141
-
142
- STDERR.puts "Creating #{output_file}..."
143
- pdf.append_page(reduced)
144
- end
145
-
146
- # Now treating each resource object separately.
147
- type_rsrc.each_pair do |name, rsrc|
148
- anyother_rsrc = all_rsrc.keys - [ name ]
149
- # Keey only specified resource object.
150
- output_file = File.join(OUTPUT_DIR, "page_#{n}_keeponly_#{type}_#{name}.pdf")
151
- PDF.write(output_file) do |pdf|
152
- reduced = page.copy
153
- # New resource dictionary with only specified resource object.
154
- reduced.Resources = Resources.new(type => {name => rsrc})
155
- # Remove mention of all other resources.
156
- reduced.Contents.data = reduced.Contents.data.lines.to_a.
157
- delete_if {|line| anyother_rsrc.any?{|rsrc| line =~ /#{rsrc}/}}.join
158
-
159
- STDERR.puts "Creating #{output_file}..."
160
- pdf.append_page(reduced)
161
- end
162
-
163
- # Remove only specified resource object.
164
- output_file = File.join(OUTPUT_DIR, "page_#{n}_excluded_#{type}_#{name}.pdf")
165
- PDF.write(output_file) do |pdf|
166
- reduced = page.copy
167
- # New resource dictionary with only specified resource object.
168
- reduced.Resources = reduced.Resources.copy
169
- reduced.Resources[type] = reduced.Resources.send(type).copy
170
- reduced.Resources[type].delete(name)
171
- # Remove mention of this resource only.
172
- reduced.Contents.data = reduced.Contents.data.lines.to_a.
173
- delete_if {|line| line =~ /#{name}/}.join
174
-
175
- STDERR.puts "Creating #{output_file}..."
176
- pdf.append_page(reduced)
93
+ @options = OptParser.parse(ARGV)
94
+
95
+ if ARGV.empty?
96
+ abort "Error: No filename was specified. #{$0} --help for details."
97
+ else
98
+ target = ARGV.shift
99
+ end
100
+
101
+ if @options[:output_dir].nil?
102
+ @options[:output_dir] = "#{File.join(File.dirname(target), File.basename(target,'.pdf'))}.explode"
103
+ end
104
+
105
+ Origami::OPTIONS[:ignore_bad_references] = true
106
+ OUTPUT_DIR = @options[:output_dir]
107
+ Dir::mkdir(OUTPUT_DIR) unless File.directory?(OUTPUT_DIR)
108
+
109
+ def split_by_rsrc(n, page, type)
110
+ all_rsrc = page.resources
111
+ type_rsrc = page.resources(type)
112
+ other_rsrc = all_rsrc.keys - type_rsrc.keys
113
+
114
+ unless type_rsrc.empty?
115
+ # Keep only specified resource type.
116
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_keeponly_#{type}.pdf")
117
+ PDF.write(output_file) do |pdf|
118
+ reduced = page.copy
119
+ # New resource dictionary with only matching resources.
120
+ reduced.Resources = Resources.new(type => type_rsrc)
121
+ # Remove mention of other resources.
122
+ reduced.each_content_stream do |stream|
123
+ stream.data = stream.data.lines.
124
+ delete_if {|line| other_rsrc.any?{|rsrc| line =~ /#{rsrc}/}}.join
125
+ end
126
+
127
+ STDERR.puts "Creating #{output_file}..."
128
+ pdf.append_page(reduced)
129
+ end
130
+
131
+ # Remove all specified resource type.
132
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_excluded_#{type}.pdf")
133
+ PDF.write(output_file) do |pdf|
134
+ reduced = page.copy
135
+ # New resource dictionary with no resource of specified type.
136
+ reduced.Resources = reduced.Resources.copy
137
+ reduced.Resources.delete(type)
138
+ # Remove mention this resource type.
139
+ reduced.each_content_stream do |stream|
140
+ stream.data = stream.data.lines.
141
+ delete_if {|line| type_rsrc.keys.any?{|rsrc| line =~ /#{rsrc}/}}.join
142
+ end
143
+
144
+ STDERR.puts "Creating #{output_file}..."
145
+ pdf.append_page(reduced)
146
+ end
147
+
148
+ # Now treating each resource object separately.
149
+ type_rsrc.each_pair do |name, rsrc|
150
+ anyother_rsrc = all_rsrc.keys - [ name ]
151
+ # Keey only specified resource object.
152
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_keeponly_#{type}_#{name}.pdf")
153
+ PDF.write(output_file) do |pdf|
154
+ reduced = page.copy
155
+ # New resource dictionary with only specified resource object.
156
+ reduced.Resources = Resources.new(type => {name => rsrc})
157
+ # Remove mention of all other resources.
158
+ reduced.each_content_stream do |stream|
159
+ stream.data = stream.data.lines.
160
+ delete_if {|line| anyother_rsrc.any?{|rsrc| line =~ /#{rsrc}/}}.join
161
+ end
162
+
163
+ STDERR.puts "Creating #{output_file}..."
164
+ pdf.append_page(reduced)
165
+ end
166
+
167
+ # Remove only specified resource object.
168
+ output_file = File.join(OUTPUT_DIR, "page_#{n}_excluded_#{type}_#{name}.pdf")
169
+ PDF.write(output_file) do |pdf|
170
+ reduced = page.copy
171
+ # New resource dictionary with only specified resource object.
172
+ reduced.Resources = reduced.Resources.copy
173
+ reduced.Resources[type] = reduced.Resources.send(type).copy
174
+ reduced.Resources[type].delete(name)
175
+ # Remove mention of this resource only.
176
+ reduced.each_content_stream do |stream|
177
+ stream.data = stream.data.lines.
178
+ delete_if {|line| line =~ /#{name}/}.join
179
+ end
180
+
181
+ STDERR.puts "Creating #{output_file}..."
182
+ pdf.append_page(reduced)
183
+ end
184
+ end
177
185
  end
178
- end
179
186
  end
180
- end
181
-
182
- params =
183
- {
184
- :verbosity => Parser::VERBOSE_QUIET,
185
- }
186
- pdf = PDF.read(target, params)
187
-
188
- i = @options[:page_range].first + 1
189
- pdf.pages[@options[:page_range]].each do |page|
190
- case @options[:split_by]
191
- when 'pages'
192
- output_file = File.join(OUTPUT_DIR, "page_#{i}.pdf")
193
- PDF.write(output_file) do |pdf|
194
- STDERR.puts "Creating #{output_file}..."
195
- pdf.append_page(page)
187
+
188
+ params =
189
+ {
190
+ verbosity: Parser::VERBOSE_QUIET,
191
+ }
192
+ pdf = PDF.read(target, params)
193
+
194
+ i = @options[:page_range].first + 1
195
+ pdf.pages[@options[:page_range]].each do |page|
196
+ case @options[:split_by]
197
+ when 'pages'
198
+ output_file = File.join(OUTPUT_DIR, "page_#{i}.pdf")
199
+ PDF.write(output_file) do |pdf|
200
+ STDERR.puts "Creating #{output_file}..."
201
+ pdf.append_page(page)
202
+ end
203
+
204
+ when 'rsrc'
205
+ [ Resources::EXTGSTATE,
206
+ Resources::COLORSPACE,
207
+ Resources::PATTERN,
208
+ Resources::SHADING,
209
+ Resources::XOBJECT,
210
+ Resources::FONT,
211
+ Resources::PROPERTIES
212
+ ].each { |type| split_by_rsrc(i, page, type) }
213
+
214
+ else
215
+ raise ArgumentError, "Unknown split option: #{@options[:split_by]}"
196
216
  end
197
217
 
198
- when 'rsrc'
199
- [ Resources::EXTGSTATE,
200
- Resources::COLORSPACE,
201
- Resources::PATTERN,
202
- Resources::SHADING,
203
- Resources::XOBJECT,
204
- Resources::FONT,
205
- Resources::PROPERTIES
206
- ].each { |type| split_by_rsrc(i, page, type) }
207
-
208
- else
209
- raise ArgumentError, "Unknown split option: #{@options[:split_by]}"
218
+ i += 1
210
219
  end
211
220
 
212
- i += 1
213
- end
214
-
215
- rescue SystemExit
216
- rescue Exception => e
217
- STDERR.puts "#{e.class}: #{e.message} #{e.backtrace}"
218
- exit 1
221
+ rescue
222
+ abort "#{$!.class}: #{$!.message} #{$!.backtrace.join($/)}"
219
223
  end
220
-
@@ -1,38 +1,36 @@
1
- #!/usr/bin/env ruby
1
+ #!/usr/bin/env ruby
2
2
 
3
3
  =begin
4
4
 
5
- = Author:
6
- Guillaume Delugré <guillaume/at/security-labs.org>
5
+ = Info
6
+ Extracts valuable data from a PDF document. Can extract:
7
+ - decoded streams
8
+ - JavaScript
9
+ - file attachments
7
10
 
8
- = Info:
9
- Extracts valuable data from a PDF document. Can extract:
10
- - decoded streams
11
- - JavaScript
12
- - file attachments
11
+ = License
12
+ Copyright (C) 2016 Guillaume Delugré.
13
13
 
14
- = License:
15
- Origami is free software: you can redistribute it and/or modify
16
- it under the terms of the GNU Lesser General Public License as published by
17
- the Free Software Foundation, either version 3 of the License, or
18
- (at your option) any later version.
14
+ Origami is free software: you can redistribute it and/or modify
15
+ it under the terms of the GNU Lesser General Public License as published by
16
+ the Free Software Foundation, either version 3 of the License, or
17
+ (at your option) any later version.
19
18
 
20
- Origami is distributed in the hope that it will be useful,
21
- but WITHOUT ANY WARRANTY; without even the implied warranty of
22
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
- GNU Lesser General Public License for more details.
19
+ Origami is distributed in the hope that it will be useful,
20
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22
+ GNU Lesser General Public License for more details.
24
23
 
25
- You should have received a copy of the GNU Lesser General Public License
26
- along with Origami. If not, see <http://www.gnu.org/licenses/>.
24
+ You should have received a copy of the GNU Lesser General Public License
25
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
27
26
 
28
27
  =end
29
28
 
30
29
  begin
31
- require 'origami'
30
+ require 'origami'
32
31
  rescue LoadError
33
- ORIGAMIDIR = "#{File.dirname(__FILE__)}/../lib"
34
- $: << ORIGAMIDIR
35
- require 'origami'
32
+ $: << File.join(__dir__, '../lib')
33
+ require 'origami'
36
34
  end
37
35
  include Origami
38
36
 
@@ -40,246 +38,231 @@ require 'optparse'
40
38
  require 'rexml/document'
41
39
 
42
40
  class OptParser
43
- BANNER = <<USAGE
41
+ BANNER = <<USAGE
44
42
  Usage: #{$0} <PDF-file> [-afjms] [-d <output-directory>]
45
43
  Extracts various data out of a document (streams, scripts, images, fonts, metadata, attachments).
46
- Bug reports or feature requests at: http://origami-pdf.googlecode.com/
44
+ Bug reports or feature requests at: http://github.com/gdelugre/origami
47
45
 
48
46
  Options:
49
47
  USAGE
50
48
 
51
- def self.parser(options)
52
- OptionParser.new do |opts|
53
- opts.banner = BANNER
49
+ def self.parser(options)
50
+ OptionParser.new do |opts|
51
+ opts.banner = BANNER
54
52
 
55
- opts.on("-d", "--output-dir DIR", "Output directory") do |d|
56
- options[:output_dir] = d
57
- end
53
+ opts.on("-d", "--output-dir DIR", "Output directory") do |d|
54
+ options[:output_dir] = d
55
+ end
58
56
 
59
- opts.on("-s", "--streams", "Extracts all decoded streams") do
60
- options[:streams] = true
61
- end
57
+ opts.on("-s", "--streams", "Extracts all decoded streams") do
58
+ options[:streams] = true
59
+ end
62
60
 
63
- opts.on("-a", "--attachments", "Extracts file attachments") do
64
- options[:attachments] = true
65
- end
61
+ opts.on("-a", "--attachments", "Extracts file attachments") do
62
+ options[:attachments] = true
63
+ end
66
64
 
67
- opts.on("-f", "--fonts", "Extracts embedded font files") do
68
- options[:fonts] = true
69
- end
65
+ opts.on("-f", "--fonts", "Extracts embedded font files") do
66
+ options[:fonts] = true
67
+ end
70
68
 
71
- opts.on("-j", "--js", "Extracts JavaScript scripts") do
72
- options[:javascript] = true
73
- end
69
+ opts.on("-j", "--js", "Extracts JavaScript scripts") do
70
+ options[:javascript] = true
71
+ end
74
72
 
75
- opts.on("-m", "--metadata", "Extracts metadata streams") do
76
- options[:metadata] = true
77
- end
73
+ opts.on("-m", "--metadata", "Extracts metadata streams") do
74
+ options[:metadata] = true
75
+ end
78
76
 
79
- opts.on("-i", "--images", "Extracts embedded images") do
80
- options[:images] = true
81
- end
77
+ opts.on("-i", "--images", "Extracts embedded images") do
78
+ options[:images] = true
79
+ end
82
80
 
83
- opts.on_tail("-h", "--help", "Show this message") do
84
- puts opts
85
- exit
86
- end
81
+ opts.on_tail("-h", "--help", "Show this message") do
82
+ puts opts
83
+ exit
84
+ end
85
+ end
87
86
  end
88
- end
89
87
 
90
- def self.parse(args)
91
- options =
92
- {
93
- }
88
+ def self.parse(args)
89
+ options = {}
94
90
 
95
- self.parser(options).parse!(args)
91
+ self.parser(options).parse!(args)
96
92
 
97
- options
98
- end
93
+ options
94
+ end
99
95
  end
100
96
 
101
97
  begin
102
- @options = OptParser.parse(ARGV)
103
-
104
- if ARGV.empty?
105
- STDERR.puts "Error: No filename was specified. #{$0} --help for details."
106
- exit 1
107
- else
108
- target = ARGV.shift
109
- end
110
-
111
- unless [:streams,:javascript,:attachments,:fonts,:metadata,:images].any? {|opt| @options[opt]}
112
- @options[:streams] =
113
- @options[:javascript] =
114
- @options[:fonts] =
115
- @options[:attachments] =
116
- @options[:images] = true
117
- end
118
-
119
- if @options[:output_dir].nil?
120
- @options[:output_dir] = "#{File.basename(target, '.pdf')}.dump"
121
- end
122
-
123
- # Force data extraction, even for invalid FlateDecode streams.
124
- Origami::OPTIONS[:ignore_zlib_errors] = true
125
-
126
- OUTPUT_DIR = @options[:output_dir]
127
- Dir::mkdir(OUTPUT_DIR) unless File.directory?(OUTPUT_DIR)
128
-
129
- params =
130
- {
131
- :verbosity => Parser::VERBOSE_QUIET,
132
- }
133
- pdf = PDF.read(target, params)
134
-
135
- if @options[:streams]
136
- nstreams = 0
137
- Dir::mkdir("#{OUTPUT_DIR}/streams") unless File.directory?("#{OUTPUT_DIR}/streams")
138
-
139
- pdf.root_objects.find_all{|obj| obj.is_a?(Stream)}.each do |stream|
140
- stream_file = "#{OUTPUT_DIR}/streams/stream_#{stream.reference.refno}.dmp"
141
- File.open(stream_file, "wb") do |fd|
142
- fd.write(stream.data)
143
- end
144
- nstreams += 1
98
+ @options = OptParser.parse(ARGV)
99
+
100
+ if ARGV.empty?
101
+ abort "Error: No filename was specified. #{$0} --help for details."
102
+ else
103
+ target = ARGV.shift
145
104
  end
146
105
 
147
- puts "Extracted #{nstreams} PDF streams to '#{OUTPUT_DIR}/streams'."
148
- end
106
+ unless %i[streams javascript attachments fonts metadata images].any? {|opt| @options[opt]}
107
+ @options[:streams] =
108
+ @options[:javascript] =
109
+ @options[:fonts] =
110
+ @options[:attachments] =
111
+ @options[:images] = true
112
+ end
149
113
 
150
- if @options[:javascript]
151
- nscripts = 0
152
- Dir::mkdir("#{OUTPUT_DIR}/scripts") unless File.directory?("#{OUTPUT_DIR}/scripts")
114
+ if @options[:output_dir].nil?
115
+ @options[:output_dir] = "#{File.basename(target, '.pdf')}.dump"
116
+ end
153
117
 
154
- pdf.ls(/^JS$/).each do |script|
155
- script_file = "#{OUTPUT_DIR}/scripts/script_#{script.hash}.js"
156
- File.open(script_file, "wb") do |fd|
157
- fd.write(
158
- case script
159
- when Stream then
160
- script.data
161
- else
162
- script.value
163
- end
164
- )
165
- end
166
- nscripts += 1
118
+ # Force data extraction, even for invalid FlateDecode streams.
119
+ Origami::OPTIONS[:ignore_zlib_errors] = true
120
+ Origami::OPTIONS[:ignore_png_errors] = true
121
+
122
+ OUTPUT_DIR = @options[:output_dir]
123
+ Dir::mkdir(OUTPUT_DIR) unless File.directory?(OUTPUT_DIR)
124
+
125
+ params =
126
+ {
127
+ verbosity: Parser::VERBOSE_QUIET,
128
+ }
129
+ pdf = PDF.read(target, params)
130
+
131
+ if @options[:streams]
132
+ nstreams = 0
133
+ stream_dir = File.join(OUTPUT_DIR, "streams")
134
+ Dir::mkdir(stream_dir) unless File.directory?(stream_dir)
135
+
136
+ pdf.root_objects.find_all{|obj| obj.is_a?(Stream)}.each do |stream|
137
+ stream_file = File.join(stream_dir, "stream_#{stream.reference.refno}.dmp")
138
+ File.binwrite(stream_file, stream.data)
139
+ nstreams += 1
140
+ end
141
+
142
+ puts "Extracted #{nstreams} PDF streams to '#{stream_dir}'."
167
143
  end
168
144
 
169
- # Also checking for presence of JavaScript in XML forms.
170
- if pdf.has_form? and pdf.Catalog.AcroForm.has_key?(:XFA)
171
- xfa = pdf.Catalog.AcroForm[:XFA].solve
172
-
173
- case xfa
174
- when Array then
175
- xml = ""
176
- i = 0
177
- xfa.each do |packet|
178
- if i % 2 == 1
179
- xml << packet.solve.data
145
+ if @options[:javascript]
146
+ nscripts = 0
147
+ js_dir = File.join(OUTPUT_DIR, "scripts")
148
+ Dir::mkdir(js_dir) unless File.directory?(js_dir)
149
+
150
+ pdf.ls(/^JS$/).each do |script|
151
+ script_file = File.join(js_dir, "script_#{script.hash}.js")
152
+ script_data =
153
+ case script
154
+ when Stream then script.data
155
+ else script.value
156
+ end
157
+
158
+ File.binwrite(script_file, script_data)
159
+ nscripts += 1
160
+ end
161
+
162
+ # Also checking for presence of JavaScript in XML forms.
163
+ if pdf.form? and pdf.Catalog.AcroForm.has_key?(:XFA)
164
+ xfa = pdf.Catalog.AcroForm[:XFA].solve
165
+
166
+ case xfa
167
+ when Array then
168
+ xml = ""
169
+ i = 0
170
+ xfa.each do |packet|
171
+ if i % 2 == 1
172
+ xml << packet.solve.data
173
+ end
174
+
175
+ i = i + 1
176
+ end
177
+ when Stream then
178
+ xml = xfa.data
179
+ else
180
+ reject("Malformed XFA dictionary")
180
181
  end
181
182
 
182
- i = i + 1
183
- end
184
- when Stream then
185
- xml = xfa.data
186
- else
187
- reject("Malformed XFA dictionary")
188
- end
189
-
190
- xfadoc = REXML::Document.new(xml)
191
- REXML::XPath.match(xfadoc, "//script").each do |script|
192
- script_file = "#{OUTPUT_DIR}/script_#{script.hash}.js"
193
- File.open(script_file, "wb") do |fd|
194
- fd.write(script.text)
183
+ xfadoc = REXML::Document.new(xml)
184
+ REXML::XPath.match(xfadoc, "//script").each do |script|
185
+ script_file = File.join(js_dir, "script_#{script.hash}.js")
186
+ File.binwrite(script_file, script.text)
187
+ nscripts += 1
188
+ end
195
189
  end
196
- nscripts += 1
197
- end
190
+
191
+ puts "Extracted #{nscripts} scripts to '#{js_dir}'."
198
192
  end
199
193
 
200
- puts "Extracted #{nscripts} scripts to '#{OUTPUT_DIR}/scripts'."
201
- end
202
-
203
- if @options[:attachments]
204
- nattach = 0
205
- Dir::mkdir("#{OUTPUT_DIR}/attachments") unless File.directory?("#{OUTPUT_DIR}/attachments")
206
-
207
- pdf.ls_names(Names::Root::EMBEDDEDFILES).each do |name, attachment|
208
- attached_file = "#{OUTPUT_DIR}/attachments/attached_#{File.basename(name)}"
209
- spec = attachment.solve
210
- if spec and spec.EF and f = spec.EF.F and f.is_a?(Stream)
211
- File.open(attached_file, "wb") do |fd|
212
- fd.write(f.data)
194
+ if @options[:attachments]
195
+ nattach = 0
196
+ attachments_dir = File.join(OUTPUT_DIR, "attachments")
197
+ Dir::mkdir(attachments_dir) unless File.directory?(attachments_dir)
198
+
199
+ pdf.each_attachment do |name, attachment|
200
+ attached_file = File.join(attachments_dir, "attached_#{File.basename(name)}")
201
+ spec = attachment.solve
202
+ if spec and spec.EF and f = spec.EF.F and f.is_a?(Stream)
203
+ File.binwrite(attached_file, f.data)
204
+ nattach += 1
205
+ end
213
206
  end
214
- nattach += 1
215
- end
207
+
208
+ puts "Extracted #{nattach} attachments to '#{attachments_dir}'."
216
209
  end
217
-
218
- puts "Extracted #{nattach} attachments to '#{OUTPUT_DIR}/attachments'."
219
- end
220
-
221
- if @options[:fonts]
222
- nfonts = 0
223
- Dir::mkdir("#{OUTPUT_DIR}/fonts") unless File.directory?("#{OUTPUT_DIR}/fonts")
224
-
225
- pdf.root_objects.find_all{|obj| obj.is_a?(Stream)}.each do |stream|
226
- font = stream.xrefs.find{|obj| obj.is_a?(FontDescriptor)}
227
- if font
228
- font_file = "#{OUTPUT_DIR}/fonts/font_#{File.basename(font.FontName.value.to_s)}"
229
- File.open(font_file, "wb") do |fd|
230
- fd.write(stream.data)
210
+
211
+ if @options[:fonts]
212
+ nfonts = 0
213
+ fonts_dir = File.join(OUTPUT_DIR, "fonts")
214
+ Dir::mkdir(fonts_dir) unless File.directory?(fonts_dir)
215
+
216
+ pdf.root_objects.find_all{|obj| obj.is_a?(Stream)}.each do |stream|
217
+ font = stream.xrefs.find{|obj| obj.is_a?(FontDescriptor)}
218
+ if font
219
+ font_file = File.join(fonts_dir, File.basename(font.FontName.value.to_s))
220
+ File.binwrite(font_file, stream.data)
221
+ nfonts += 1
222
+ end
231
223
  end
232
- nfonts += 1
233
- end
224
+
225
+ puts "Extracted #{nfonts} fonts to '#{fonts_dir}'."
234
226
  end
235
227
 
236
- puts "Extracted #{nfonts} fonts to '#{OUTPUT_DIR}/fonts'."
237
- end
228
+ if @options[:metadata]
229
+ nmeta = 0
230
+ metadata_dir = File.join(OUTPUT_DIR, "metadata")
231
+ Dir::mkdir(metadata_dir) unless File.directory?(metadata_dir)
238
232
 
239
- if @options[:metadata]
240
- nmeta = 0
241
- Dir::mkdir("#{OUTPUT_DIR}/metadata") unless File.directory?("#{OUTPUT_DIR}/metadata")
233
+ pdf.root_objects.find_all{|obj| obj.is_a?(MetadataStream)}.each do |stream|
234
+ metadata_file = File.join(metadata_dir, "metadata_#{stream.reference.refno}.xml")
235
+ File.binwrite(metadata_file, stream.data)
236
+ nmeta += 1
237
+ end
242
238
 
243
- pdf.root_objects.find_all{|obj| obj.is_a?(MetadataStream)}.each do |stream|
244
- metadata_file = "#{OUTPUT_DIR}/metadata/metadata_#{stream.reference.refno}.xml"
245
- File.open(metadata_file, "wb") do |fd|
246
- fd.write(stream.data)
247
- end
248
- nmeta += 1
239
+ puts "Extracted #{nmeta} metadata streams to '#{metadata_dir}'."
249
240
  end
250
241
 
251
- puts "Extracted #{nmeta} metadata streams to '#{OUTPUT_DIR}/metadata'."
252
- end
242
+ if @options[:images]
243
+ nimages = 0
244
+ image_dir = File.join(OUTPUT_DIR, "images")
245
+ Dir::mkdir(image_dir) unless File.directory?(image_dir)
253
246
 
254
- if @options[:images]
255
- nimages = 0
256
- Dir::mkdir("#{OUTPUT_DIR}/images") unless File.directory?("#{OUTPUT_DIR}/images")
257
-
258
- pdf.root_objects.find_all{|obj| obj.is_a?(Graphics::ImageXObject)}.each do |stream|
259
- begin
260
- ext, image_data = stream.to_image_file
261
- image_file = "#{OUTPUT_DIR}/images/image_#{stream.reference.refno}.#{ext}"
247
+ pdf.root_objects.find_all{|obj| obj.is_a?(Graphics::ImageXObject)}.each do |stream|
248
+ begin
249
+ ext, image_data = stream.to_image_file
250
+ image_file = File.join(image_dir, "image_#{stream.reference.refno}.#{ext}")
262
251
 
263
- if ext != 'png' and stream.ColorSpace == Graphics::Color::Space::DEVICE_CMYK
264
- STDERR.puts "Warning: file '#{image_file}' is intended to be viewed in CMYK color space."
265
- end
252
+ if ext != 'png' and stream.ColorSpace == Graphics::Color::Space::DEVICE_CMYK
253
+ STDERR.puts "Warning: file '#{image_file}' is intended to be viewed in CMYK color space."
254
+ end
266
255
 
267
- File.open(image_file, "wb") do |fd|
268
- fd.write(image_data)
256
+ File.binwrite(image_file, image_data)
257
+ nimages += 1
258
+ rescue
259
+ STDERR.puts "Unable to decode image (stream #{stream.reference.refno}). #{$!.message}"
260
+ end
269
261
  end
270
- nimages += 1
271
262
 
272
- rescue Exception => e
273
- STDERR.puts "Unable to decode image (stream #{stream.reference.refno}). #{e.message}"
274
- end
263
+ puts "Extracted #{nimages} images to '#{image_dir}'."
275
264
  end
276
-
277
- puts "Extracted #{nimages} images to '#{OUTPUT_DIR}/images'."
278
- end
279
-
280
- rescue SystemExit
281
- rescue Exception => e
282
- STDERR.puts "#{e.class}: #{e.message}"
283
- exit 1
284
- end
285
265
 
266
+ rescue
267
+ abort "#{$!.class}: #{$!.message}"
268
+ end