origami 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -1
- data/bin/gui/walker.rb +4 -4
- data/bin/pdfwalker +1 -1
- data/bin/shell/.irbrc +3 -1
- data/origami/encryption.rb +1 -1
- data/origami/javascript.rb +255 -61
- data/origami/linearization.rb +25 -10
- data/origami/parser.rb +13 -0
- data/origami/parsers/pdf/linear.rb +1 -1
- data/origami/pdf.rb +39 -11
- data/origami/stream.rb +1 -1
- metadata +4 -4
data/README
CHANGED
data/bin/gui/walker.rb
CHANGED
@@ -61,13 +61,13 @@ module PDFWalker #:nodoc:all
|
|
61
61
|
attr_reader :config
|
62
62
|
attr_reader :filename
|
63
63
|
|
64
|
-
def self.start
|
64
|
+
def self.start(file = nil)
|
65
65
|
Gtk.init
|
66
|
-
Walker.new
|
66
|
+
Walker.new(file)
|
67
67
|
Gtk.main
|
68
68
|
end
|
69
69
|
|
70
|
-
def initialize
|
70
|
+
def initialize(target_file = nil)
|
71
71
|
super("PDF Walker")
|
72
72
|
|
73
73
|
@config = Walker::Config.new
|
@@ -115,7 +115,7 @@ module PDFWalker #:nodoc:all
|
|
115
115
|
#maximize
|
116
116
|
show_all
|
117
117
|
|
118
|
-
open
|
118
|
+
open(target_file)
|
119
119
|
end
|
120
120
|
|
121
121
|
def error(msg)
|
data/bin/pdfwalker
CHANGED
data/bin/shell/.irbrc
CHANGED
@@ -10,7 +10,9 @@ include Origami
|
|
10
10
|
require 'console.rb'
|
11
11
|
require 'readline'
|
12
12
|
|
13
|
-
|
13
|
+
OPENSSL_SUPPORT = (defined?(OpenSSL).nil?) ? 'no' : 'yes'
|
14
|
+
JAVASCRIPT_SUPPORT = (defined?(PDF::JavaScript::Engine).nil?) ? 'no' : 'yes'
|
15
|
+
DEFAULT_BANNER = "Welcome to the PDF shell (Origami release #{Origami::VERSION}) [OpenSSL: #{OPENSSL_SUPPORT}, JavaScript: #{JAVASCRIPT_SUPPORT}]\n\n"
|
14
16
|
|
15
17
|
def set_completion
|
16
18
|
|
data/origami/encryption.rb
CHANGED
@@ -58,7 +58,7 @@ module Origami
|
|
58
58
|
end
|
59
59
|
|
60
60
|
encrypt_dict = get_doc_attr(:Encrypt)
|
61
|
-
handler = Encryption::Standard::Dictionary.new(encrypt_dict.
|
61
|
+
handler = Encryption::Standard::Dictionary.new(encrypt_dict.dup)
|
62
62
|
|
63
63
|
unless handler.Filter == :Standard
|
64
64
|
raise EncryptionNotSupportedError, "Unknown security handler : '#{handler.Filter.to_s}'"
|
data/origami/javascript.rb
CHANGED
@@ -26,110 +26,304 @@
|
|
26
26
|
module Origami
|
27
27
|
|
28
28
|
class PDF
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
29
|
+
|
30
|
+
module JavaScript
|
31
|
+
|
32
|
+
module Platforms
|
33
|
+
WINDOWS = "WIN"
|
34
|
+
UNIX = "UNIX"
|
35
|
+
MAC = "MAC"
|
36
|
+
end
|
37
|
+
|
38
|
+
module Viewers
|
39
|
+
ADOBE_READER = "Reader"
|
40
|
+
end
|
41
|
+
|
42
|
+
class AcrobatObject
|
43
|
+
#def to_s
|
44
|
+
# "[object #{self.class.to_s.split('::').last}]"
|
45
|
+
#end
|
46
|
+
end
|
47
|
+
|
48
|
+
class Doc < AcrobatObject
|
49
|
+
attr_reader :info
|
50
|
+
attr_accessor :disclosed
|
51
|
+
attr_reader :hidden
|
46
52
|
|
47
|
-
|
48
|
-
|
53
|
+
class Info < AcrobatObject
|
54
|
+
def initialize(doc)
|
55
|
+
@doc = doc
|
56
|
+
end
|
57
|
+
|
58
|
+
def title; @doc.title.to_s end
|
59
|
+
def author; @doc.author.to_s end
|
60
|
+
def subject; @doc.subject.to_s end
|
61
|
+
def keywords; @doc.keywords.to_s end
|
62
|
+
def creator; @doc.creator.to_s end
|
63
|
+
def creationDate; @doc.creation_date.to_s end
|
64
|
+
def modDate; @doc.mod_date.to_s end
|
65
|
+
end
|
66
|
+
|
67
|
+
def initialize(pdf)
|
68
|
+
@pdf = pdf
|
69
|
+
@disclosed = false
|
70
|
+
@hidden = false
|
71
|
+
@info = Info.new(pdf)
|
72
|
+
end
|
73
|
+
|
74
|
+
def numFields
|
75
|
+
fields = @pdf.fields
|
76
|
+
if fields.nil?
|
77
|
+
0
|
78
|
+
else
|
79
|
+
fields.size
|
49
80
|
end
|
50
81
|
end
|
51
82
|
|
52
|
-
|
53
|
-
DEFAULT_VIEWER_VERSION = 9
|
83
|
+
def numPages; @pdf.pages.size end
|
54
84
|
|
55
|
-
|
56
|
-
|
85
|
+
def title; @info.title end
|
86
|
+
def author; @info.author end
|
87
|
+
def subject; @info.subject end
|
88
|
+
def keywords; @info.keywords end
|
89
|
+
def creator; @info.creator end
|
90
|
+
def creationDate; @info.creationDate end
|
91
|
+
def modDate; @info.modDate end
|
57
92
|
|
58
|
-
|
59
|
-
|
60
|
-
@viewerVariation = @viewerType = "Reader"
|
61
|
-
end
|
93
|
+
def metadata
|
94
|
+
meta = @pdf.Catalog.Metadata
|
62
95
|
|
63
|
-
|
64
|
-
"[object App]"
|
65
|
-
end
|
96
|
+
(meta.data if meta.is_a?(Stream)).to_s
|
66
97
|
end
|
67
98
|
|
68
|
-
|
69
|
-
|
70
|
-
|
99
|
+
def filesize; @pdf.original_filesize end
|
100
|
+
def path; @pdf.original_filename end
|
101
|
+
def documentFileName; File.basename(self.path) end
|
102
|
+
def URL; "file://#{self.path}" end
|
103
|
+
def baseURL; '' end
|
104
|
+
|
105
|
+
def dataObjects
|
106
|
+
data_objs = []
|
107
|
+
@pdf.ls_names(Names::Root::EMBEDDEDFILES).each do |name, file_desc|
|
108
|
+
if file_desc
|
109
|
+
ef = file_desc[:EF].solve
|
110
|
+
if ef
|
111
|
+
f = ef[:F].solve
|
112
|
+
data_objs.push Data.new(name, f.data.size) if f.is_a?(Stream)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
71
116
|
end
|
117
|
+
|
118
|
+
data_objs
|
119
|
+
end
|
72
120
|
|
73
|
-
|
74
|
-
|
75
|
-
def hide; end
|
121
|
+
def getDataObject(cName)
|
122
|
+
file_desc = @pdf.resolve_name(Names::Root::EMBEDDEDFILES, cName)
|
76
123
|
|
77
|
-
|
78
|
-
|
124
|
+
if file_desc
|
125
|
+
ef = file_desc[:EF].solve
|
126
|
+
if ef
|
127
|
+
f = ef[:F].solve
|
128
|
+
Data.new(cName, f.data.size) if f.is_a?(Stream)
|
129
|
+
end
|
79
130
|
end
|
80
131
|
end
|
81
132
|
|
82
|
-
|
83
|
-
|
84
|
-
|
133
|
+
def getDataObjectContents(cName, bAllowAuth = false)
|
134
|
+
file_desc = @pdf.resolve_name(Names::Root::EMBEDDEDFILES, cName)
|
135
|
+
|
136
|
+
if file_desc
|
137
|
+
ef = file_desc[:EF].solve
|
138
|
+
if ef
|
139
|
+
f = ef[:F].solve
|
140
|
+
ReadStream.new(f.data) if f.is_a?(Stream)
|
141
|
+
end
|
85
142
|
end
|
86
143
|
end
|
87
144
|
|
88
|
-
def
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
145
|
+
def getField(cName)
|
146
|
+
field = @pdf.get_field(cName)
|
147
|
+
|
148
|
+
Field.new(field) if field
|
149
|
+
end
|
150
|
+
|
151
|
+
def getNthFieldName(nIndex)
|
152
|
+
fields = @pdf.fields
|
153
|
+
|
154
|
+
(Field.new(fields[nIndex]).name if fields and fields[nIndex]).to_s
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
class App < AcrobatObject
|
159
|
+
attr_reader :platform, :viewerVariation, :viewerVersion
|
160
|
+
|
161
|
+
def initialize(platform, viewerVariation, viewerVersion)
|
162
|
+
@platform = platform
|
163
|
+
@viewerVariation, @viewerVersion = viewerVariation, viewerVersion
|
164
|
+
end
|
165
|
+
|
166
|
+
def response(params)
|
167
|
+
puts params.class
|
168
|
+
gets
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
class Console < AcrobatObject
|
173
|
+
def initialize(output = STDOUT)
|
174
|
+
@output = output
|
175
|
+
end
|
176
|
+
|
177
|
+
def println(msg)
|
178
|
+
@output.puts msg
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
class Util < AcrobatObject
|
183
|
+
def streamFromString(cString, cCharset = 'utf-8')
|
184
|
+
ReadStream.new(cString.to_s)
|
185
|
+
end
|
186
|
+
|
187
|
+
def stringFromStream(oStream, cCharset = 'utf-8')
|
188
|
+
if oStream.is_a?(ReadStream)
|
189
|
+
oStream.instance_variable_get(:@data).dup
|
94
190
|
end
|
191
|
+
end
|
192
|
+
end
|
95
193
|
|
96
|
-
|
97
|
-
|
194
|
+
class Field < AcrobatObject
|
195
|
+
def initialize(field)
|
196
|
+
@field = field
|
197
|
+
end
|
98
198
|
|
99
|
-
|
100
|
-
|
101
|
-
|
199
|
+
def doc; Doc.new(@field.pdf) end
|
200
|
+
def name
|
201
|
+
(@field[:T].solve.value if @field.has_key?(:T)).to_s
|
202
|
+
end
|
203
|
+
|
204
|
+
def value
|
205
|
+
@field[:V].solve.value if @field.has_key?(:V)
|
206
|
+
end
|
102
207
|
|
103
|
-
|
104
|
-
|
105
|
-
evaluate('this.doc.eval = function(script) {eval(script)}')
|
106
|
-
evaluate('this.doc = undefined')
|
208
|
+
def valueAsString
|
209
|
+
self.value.to_s
|
107
210
|
end
|
108
211
|
|
109
|
-
def
|
110
|
-
@
|
212
|
+
def type
|
213
|
+
(if @field.has_key?(:FT)
|
214
|
+
case @field[:FT].solve.value
|
215
|
+
when PDF::Field::Type::BUTTON
|
216
|
+
if @fields.has_key?(:Ff)
|
217
|
+
flags = @fields[:Ff].solve.value
|
218
|
+
|
219
|
+
if (flags & Origami::Annotation::Widget::Button::Flags::PUSHBUTTON) != 0
|
220
|
+
'button'
|
221
|
+
elsif (flags & Origami::Annotation::Widget::Button::Flags::RADIO) != 0
|
222
|
+
'radiobox'
|
223
|
+
else
|
224
|
+
'checkbox'
|
225
|
+
end
|
226
|
+
end
|
227
|
+
when PDF::Field::Type::TEXT then 'text'
|
228
|
+
when PDF::Field::Type::SIGNATURE then 'signature'
|
229
|
+
when PDF::Field::Type::CHOICE
|
230
|
+
if @field.has_key?(:Ff)
|
231
|
+
if (@field[:Ff].solve.value & Origami::Annotation::Widget::Choice::Flags::COMBO).zero?
|
232
|
+
'listbox'
|
233
|
+
else
|
234
|
+
'combobox'
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end).to_s
|
111
239
|
end
|
112
240
|
|
113
241
|
end
|
114
242
|
|
243
|
+
class Data < AcrobatObject
|
244
|
+
attr_reader :name, :path, :size
|
245
|
+
attr_reader :creationDate, :modDate
|
246
|
+
attr_reader :description, :MIMEType
|
247
|
+
|
248
|
+
def initialize(name, size, path = nil, creationDate = nil, modDate = nil, description = nil, mimeType = nil)
|
249
|
+
@name, @path, @size = name, path, size
|
250
|
+
@creationDate, @modDate = creationDate, modDate
|
251
|
+
@description, @MIMEType = description, mimeType
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
class ReadStream < AcrobatObject
|
256
|
+
def initialize(data)
|
257
|
+
@data = data
|
258
|
+
end
|
259
|
+
|
260
|
+
def read(n)
|
261
|
+
@data.slice!(0, n)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
begin
|
267
|
+
require 'v8'
|
268
|
+
|
269
|
+
class JavaScript::Engine
|
270
|
+
|
271
|
+
attr_reader :context
|
272
|
+
|
273
|
+
def initialize(pdf, params = {})
|
274
|
+
options =
|
275
|
+
{
|
276
|
+
:viewerVersion => JavaScript::Platforms::WINDOWS,
|
277
|
+
:viewerVariation => JavaScript::Viewers::ADOBE_READER,
|
278
|
+
:platform => 9
|
279
|
+
}.update(params)
|
280
|
+
|
281
|
+
doc = JavaScript::Doc.new(pdf)
|
282
|
+
app = JavaScript::App.new(options[:platform], options[:viewerVariation], options[:viewerVersion])
|
283
|
+
util = JavaScript::Util.new
|
284
|
+
console = JavaScript::Console.new
|
285
|
+
|
286
|
+
@context = V8::Context.new(:with => doc)
|
287
|
+
@context['app'] = app
|
288
|
+
@context['console'] = console
|
289
|
+
@context['util'] = util
|
290
|
+
end
|
291
|
+
|
292
|
+
def exec(script)
|
293
|
+
@context.eval(script)
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
115
297
|
rescue LoadError
|
116
298
|
end
|
117
299
|
end
|
118
300
|
|
119
|
-
if defined?(PDF::
|
301
|
+
if defined?(PDF::JavaScript::Engine)
|
120
302
|
module String
|
121
303
|
def eval_as_js(options = {})
|
122
|
-
runtime = options[:runtime] || PDF::
|
304
|
+
runtime = options[:runtime] || PDF::JavaScript::Engine.new(self.pdf, options)
|
123
305
|
runtime.exec(self.value)
|
124
306
|
end
|
125
307
|
end
|
126
308
|
|
127
309
|
class Stream
|
128
310
|
def eval_as_js(options = {})
|
129
|
-
runtime = options[:runtime] || PDF::
|
311
|
+
runtime = options[:runtime] || PDF::JavaScript::Engine.new(self.pdf, options)
|
130
312
|
runtime.exec(self.data)
|
131
313
|
end
|
132
314
|
end
|
315
|
+
|
316
|
+
class PDF
|
317
|
+
def eval_js(code)
|
318
|
+
get_js_engine.exec(code)
|
319
|
+
end
|
320
|
+
|
321
|
+
private
|
322
|
+
|
323
|
+
def get_js_engine(options = {})
|
324
|
+
@js_engine ||= PDF::JavaScript::Engine.new(self, options)
|
325
|
+
end
|
326
|
+
end
|
133
327
|
end
|
134
328
|
|
135
329
|
end
|
data/origami/linearization.rb
CHANGED
@@ -27,6 +27,9 @@ module Origami
|
|
27
27
|
|
28
28
|
class PDF
|
29
29
|
|
30
|
+
class LinearizationError < Exception #:nodoc:
|
31
|
+
end
|
32
|
+
|
30
33
|
#
|
31
34
|
# Returns whether the current document is linearized.
|
32
35
|
#
|
@@ -45,7 +48,7 @@ module Origami
|
|
45
48
|
# This operation is xrefs destructive, should be fixed in the future to merge tables.
|
46
49
|
#
|
47
50
|
def delinearize!
|
48
|
-
raise
|
51
|
+
raise LinearizationError, 'Not a linearized document' unless is_linearized?
|
49
52
|
|
50
53
|
#
|
51
54
|
# Saves the first trailer.
|
@@ -70,6 +73,27 @@ module Origami
|
|
70
73
|
end
|
71
74
|
end
|
72
75
|
|
76
|
+
#
|
77
|
+
# Update the trailer.
|
78
|
+
#
|
79
|
+
last_trailer = (@revisions.last.trailer ||= Trailer.new)
|
80
|
+
|
81
|
+
last_trailer.dictionary ||= Dictionary.new
|
82
|
+
|
83
|
+
if prev_trailer.has_dictionary?
|
84
|
+
last_trailer.dictionary =
|
85
|
+
last_trailer.dictionary.merge(prev_trailer.dictionary)
|
86
|
+
else
|
87
|
+
xrefstm = get_object_by_offset(last_trailer.startxref)
|
88
|
+
raise LinearizationError,
|
89
|
+
'Cannot find trailer info while delinearizing document' unless xrefstm.is_a?(XRefStream)
|
90
|
+
|
91
|
+
last_trailer.dictionary[:Root] = xrefstm[:Root]
|
92
|
+
last_trailer.dictionary[:Encrypt] = xrefstm[:Encrypt]
|
93
|
+
last_trailer.dictionary[:Info] = xrefstm[:Info]
|
94
|
+
last_trailer.dictionary[:ID] = xrefstm[:ID]
|
95
|
+
end
|
96
|
+
|
73
97
|
#
|
74
98
|
# Remove all xrefs.
|
75
99
|
# Fix: Should be merged instead.
|
@@ -81,15 +105,6 @@ module Origami
|
|
81
105
|
#
|
82
106
|
remove_revision(0)
|
83
107
|
|
84
|
-
#
|
85
|
-
# Update the trailer.
|
86
|
-
#
|
87
|
-
last_trailer = (@revisions.last.trailer ||= Trailer.new)
|
88
|
-
|
89
|
-
last_trailer.dictionary ||= Dictionary.new
|
90
|
-
last_trailer.dictionary =
|
91
|
-
last_trailer.dictionary.merge(prev_trailer.dictionary)
|
92
|
-
|
93
108
|
self
|
94
109
|
end
|
95
110
|
|
data/origami/parser.rb
CHANGED
@@ -155,6 +155,7 @@ module Origami
|
|
155
155
|
StringScanner.new(stream.read)
|
156
156
|
end
|
157
157
|
elsif stream.is_a? ::String
|
158
|
+
@filename = stream
|
158
159
|
if ''.respond_to? :force_encoding
|
159
160
|
StringScanner.new(File.open(stream, "r", :encoding => 'binary').binmode.read)
|
160
161
|
else
|
@@ -246,6 +247,18 @@ module Origami
|
|
246
247
|
end
|
247
248
|
end
|
248
249
|
|
250
|
+
def target_filename
|
251
|
+
@filename
|
252
|
+
end
|
253
|
+
|
254
|
+
def target_filesize
|
255
|
+
@data.string.size if @data
|
256
|
+
end
|
257
|
+
|
258
|
+
def target_data
|
259
|
+
@data.string.dup if @data
|
260
|
+
end
|
261
|
+
|
249
262
|
private
|
250
263
|
|
251
264
|
def error(str = "") #:nodoc:
|
data/origami/pdf.rb
CHANGED
@@ -57,8 +57,8 @@ require 'origami/xfa'
|
|
57
57
|
|
58
58
|
module Origami
|
59
59
|
|
60
|
-
VERSION = "1.1.
|
61
|
-
REVISION = "$Revision: rev
|
60
|
+
VERSION = "1.1.2"
|
61
|
+
REVISION = "$Revision: rev 122/, 2011/09/26 12:12:39 darko $" #:nodoc:
|
62
62
|
|
63
63
|
#
|
64
64
|
# Global options for Origami.
|
@@ -206,28 +206,56 @@ module Origami
|
|
206
206
|
|
207
207
|
#
|
208
208
|
# Creates a new PDF instance.
|
209
|
-
#
|
209
|
+
# _parser_:: The Parser object creating the document. If none is specified, some default structures are automatically created to get a minimal working document.
|
210
210
|
#
|
211
|
-
def initialize(
|
211
|
+
def initialize(parser = nil)
|
212
212
|
@header = PDF::Header.new
|
213
213
|
@revisions = []
|
214
214
|
|
215
215
|
add_new_revision
|
216
216
|
@revisions.first.trailer = Trailer.new
|
217
217
|
|
218
|
-
|
218
|
+
if parser
|
219
|
+
@parser = parser
|
220
|
+
else
|
221
|
+
init
|
222
|
+
end
|
219
223
|
end
|
220
224
|
|
225
|
+
#
|
226
|
+
# Original file name if parsed from disk, nil otherwise.
|
227
|
+
#
|
228
|
+
def original_filename
|
229
|
+
@parser.target_filename if @parser
|
230
|
+
end
|
231
|
+
|
232
|
+
#
|
233
|
+
# Original file size if parsed from a data stream, nil otherwise.
|
234
|
+
#
|
235
|
+
def original_filesize
|
236
|
+
@parser.target_filesize if @parser
|
237
|
+
end
|
238
|
+
|
239
|
+
#
|
240
|
+
# Original data parsed to create this document, nil if created from scratch.
|
241
|
+
#
|
242
|
+
def original_data
|
243
|
+
@parser.target_data if @parser
|
244
|
+
end
|
221
245
|
|
222
246
|
#
|
223
|
-
# Serializes the current PDF
|
247
|
+
# Serializes the current PDF.
|
224
248
|
#
|
225
249
|
def serialize(filename)
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
self
|
250
|
+
parser = @parser
|
251
|
+
@parser = nil # do not serialize the parser
|
252
|
+
|
253
|
+
Zlib::GzipWriter.open(filename) { |gz|
|
254
|
+
gz.write Marshal.dump(self)
|
255
|
+
}
|
256
|
+
|
257
|
+
@parser = parser
|
258
|
+
self
|
231
259
|
end
|
232
260
|
|
233
261
|
#
|
data/origami/stream.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: origami
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 1.1.
|
9
|
+
- 2
|
10
|
+
version: 1.1.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- "Guillaume Delugr\xC3\xA9"
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-09-
|
18
|
+
date: 2011-09-26 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies: []
|
21
21
|
|