origami 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -14,9 +14,10 @@ VERSION
14
14
 
15
15
  1.1
16
16
 
17
- DEPENDENCIES
17
+ OPTIONAL DEPENDENCIES
18
18
 
19
19
  - Ruby-GTK2 (only for GUI), http://ruby-gnome2.sourceforge.jp/
20
+ - Ruby with OpenSSL support
20
21
 
21
22
  INSTALL
22
23
 
@@ -61,13 +61,13 @@ module PDFWalker #:nodoc:all
61
61
  attr_reader :config
62
62
  attr_reader :filename
63
63
 
64
- def self.start
64
+ def self.start(file = nil)
65
65
  Gtk.init
66
- Walker.new
66
+ Walker.new(file)
67
67
  Gtk.main
68
68
  end
69
69
 
70
- def initialize
70
+ def initialize(target_file = nil)
71
71
  super("PDF Walker")
72
72
 
73
73
  @config = Walker::Config.new
@@ -115,7 +115,7 @@ module PDFWalker #:nodoc:all
115
115
  #maximize
116
116
  show_all
117
117
 
118
- open
118
+ open(target_file)
119
119
  end
120
120
 
121
121
  def error(msg)
@@ -3,5 +3,5 @@
3
3
  $:.unshift "#{File.dirname(__FILE__)}" if RUBY_VERSION >= '1.9'
4
4
  require 'gui/walker'
5
5
 
6
- PDFWalker::Walker.start
6
+ PDFWalker::Walker.start(ARGV[0])
7
7
 
@@ -10,7 +10,9 @@ include Origami
10
10
  require 'console.rb'
11
11
  require 'readline'
12
12
 
13
- DEFAULT_BANNER = "Welcome to the PDF shell (Origami release #{Origami::VERSION})\n\n"
13
+ OPENSSL_SUPPORT = (defined?(OpenSSL).nil?) ? 'no' : 'yes'
14
+ JAVASCRIPT_SUPPORT = (defined?(PDF::JavaScript::Engine).nil?) ? 'no' : 'yes'
15
+ DEFAULT_BANNER = "Welcome to the PDF shell (Origami release #{Origami::VERSION}) [OpenSSL: #{OPENSSL_SUPPORT}, JavaScript: #{JAVASCRIPT_SUPPORT}]\n\n"
14
16
 
15
17
  def set_completion
16
18
 
@@ -58,7 +58,7 @@ module Origami
58
58
  end
59
59
 
60
60
  encrypt_dict = get_doc_attr(:Encrypt)
61
- handler = Encryption::Standard::Dictionary.new(encrypt_dict.copy)
61
+ handler = Encryption::Standard::Dictionary.new(encrypt_dict.dup)
62
62
 
63
63
  unless handler.Filter == :Standard
64
64
  raise EncryptionNotSupportedError, "Unknown security handler : '#{handler.Filter.to_s}'"
@@ -26,110 +26,304 @@
26
26
  module Origami
27
27
 
28
28
  class PDF
29
- begin
30
- require 'johnson'
31
-
32
- class JavaScriptEngine < Johnson::SpiderMonkey::Runtime
33
-
34
- class DocumentObject
35
- attr_reader :author, :creator, :creationDate, :keywords, :modDate, :producer, :subject, :title
36
-
37
- def initialize(pdf)
38
- @pdf = pdf
39
-
40
- @author = @pdf.author || ''
41
- @creator = @pdf.creator || ''
42
- @keywords = @pdf.keywords || ''
43
- @producer = @pdf.producer || ''
44
- @subject = @pdf.subject || ''
45
- end
29
+
30
+ module JavaScript
31
+
32
+ module Platforms
33
+ WINDOWS = "WIN"
34
+ UNIX = "UNIX"
35
+ MAC = "MAC"
36
+ end
37
+
38
+ module Viewers
39
+ ADOBE_READER = "Reader"
40
+ end
41
+
42
+ class AcrobatObject
43
+ #def to_s
44
+ # "[object #{self.class.to_s.split('::').last}]"
45
+ #end
46
+ end
47
+
48
+ class Doc < AcrobatObject
49
+ attr_reader :info
50
+ attr_accessor :disclosed
51
+ attr_reader :hidden
46
52
 
47
- def to_s
48
- "[object Doc]"
53
+ class Info < AcrobatObject
54
+ def initialize(doc)
55
+ @doc = doc
56
+ end
57
+
58
+ def title; @doc.title.to_s end
59
+ def author; @doc.author.to_s end
60
+ def subject; @doc.subject.to_s end
61
+ def keywords; @doc.keywords.to_s end
62
+ def creator; @doc.creator.to_s end
63
+ def creationDate; @doc.creation_date.to_s end
64
+ def modDate; @doc.mod_date.to_s end
65
+ end
66
+
67
+ def initialize(pdf)
68
+ @pdf = pdf
69
+ @disclosed = false
70
+ @hidden = false
71
+ @info = Info.new(pdf)
72
+ end
73
+
74
+ def numFields
75
+ fields = @pdf.fields
76
+ if fields.nil?
77
+ 0
78
+ else
79
+ fields.size
49
80
  end
50
81
  end
51
82
 
52
- class ApplicationObject
53
- DEFAULT_VIEWER_VERSION = 9
83
+ def numPages; @pdf.pages.size end
54
84
 
55
- attr_reader :viewerVersion
56
- attr_reader :viewerVariation
85
+ def title; @info.title end
86
+ def author; @info.author end
87
+ def subject; @info.subject end
88
+ def keywords; @info.keywords end
89
+ def creator; @info.creator end
90
+ def creationDate; @info.creationDate end
91
+ def modDate; @info.modDate end
57
92
 
58
- def initialize
59
- @viewerVersion = DEFAULT_VIEWER_VERSION
60
- @viewerVariation = @viewerType = "Reader"
61
- end
93
+ def metadata
94
+ meta = @pdf.Catalog.Metadata
62
95
 
63
- def to_s
64
- "[object App]"
65
- end
96
+ (meta.data if meta.is_a?(Stream)).to_s
66
97
  end
67
98
 
68
- class ConsoleObject
69
- def println(msg)
70
- puts msg.to_s
99
+ def filesize; @pdf.original_filesize end
100
+ def path; @pdf.original_filename end
101
+ def documentFileName; File.basename(self.path) end
102
+ def URL; "file://#{self.path}" end
103
+ def baseURL; '' end
104
+
105
+ def dataObjects
106
+ data_objs = []
107
+ @pdf.ls_names(Names::Root::EMBEDDEDFILES).each do |name, file_desc|
108
+ if file_desc
109
+ ef = file_desc[:EF].solve
110
+ if ef
111
+ f = ef[:F].solve
112
+ data_objs.push Data.new(name, f.data.size) if f.is_a?(Stream)
113
+ end
114
+ end
115
+
71
116
  end
117
+
118
+ data_objs
119
+ end
72
120
 
73
- def show; end
74
- def clear; end
75
- def hide; end
121
+ def getDataObject(cName)
122
+ file_desc = @pdf.resolve_name(Names::Root::EMBEDDEDFILES, cName)
76
123
 
77
- def to_s
78
- "[object Console]"
124
+ if file_desc
125
+ ef = file_desc[:EF].solve
126
+ if ef
127
+ f = ef[:F].solve
128
+ Data.new(cName, f.data.size) if f.is_a?(Stream)
129
+ end
79
130
  end
80
131
  end
81
132
 
82
- class UtilObject
83
- def to_s
84
- "[object Util]"
133
+ def getDataObjectContents(cName, bAllowAuth = false)
134
+ file_desc = @pdf.resolve_name(Names::Root::EMBEDDEDFILES, cName)
135
+
136
+ if file_desc
137
+ ef = file_desc[:EF].solve
138
+ if ef
139
+ f = ef[:F].solve
140
+ ReadStream.new(f.data) if f.is_a?(Stream)
141
+ end
85
142
  end
86
143
  end
87
144
 
88
- def initialize(pdf, options = {})
89
- super()
90
-
91
- self['app'] = ApplicationObject.new
92
- if options.has_key?(:viewerVersion)
93
- self['app'].instance_variable_set :@viewerVersion, options[:viewerVersion]
145
+ def getField(cName)
146
+ field = @pdf.get_field(cName)
147
+
148
+ Field.new(field) if field
149
+ end
150
+
151
+ def getNthFieldName(nIndex)
152
+ fields = @pdf.fields
153
+
154
+ (Field.new(fields[nIndex]).name if fields and fields[nIndex]).to_s
155
+ end
156
+ end
157
+
158
+ class App < AcrobatObject
159
+ attr_reader :platform, :viewerVariation, :viewerVersion
160
+
161
+ def initialize(platform, viewerVariation, viewerVersion)
162
+ @platform = platform
163
+ @viewerVariation, @viewerVersion = viewerVariation, viewerVersion
164
+ end
165
+
166
+ def response(params)
167
+ puts params.class
168
+ gets
169
+ end
170
+ end
171
+
172
+ class Console < AcrobatObject
173
+ def initialize(output = STDOUT)
174
+ @output = output
175
+ end
176
+
177
+ def println(msg)
178
+ @output.puts msg
179
+ end
180
+ end
181
+
182
+ class Util < AcrobatObject
183
+ def streamFromString(cString, cCharset = 'utf-8')
184
+ ReadStream.new(cString.to_s)
185
+ end
186
+
187
+ def stringFromStream(oStream, cCharset = 'utf-8')
188
+ if oStream.is_a?(ReadStream)
189
+ oStream.instance_variable_get(:@data).dup
94
190
  end
191
+ end
192
+ end
95
193
 
96
- self['console'] = ConsoleObject.new
97
- self['util'] = UtilObject.new
194
+ class Field < AcrobatObject
195
+ def initialize(field)
196
+ @field = field
197
+ end
98
198
 
99
- # Johnson includes the Ruby namespace in the global scope.
100
- # Unsets that for obvious security reasons.
101
- self['Ruby'] = nil
199
+ def doc; Doc.new(@field.pdf) end
200
+ def name
201
+ (@field[:T].solve.value if @field.has_key?(:T)).to_s
202
+ end
203
+
204
+ def value
205
+ @field[:V].solve.value if @field.has_key?(:V)
206
+ end
102
207
 
103
- # Hack the 'this' object to point to the DocumentObject
104
- @doc_obj = self['doc'] = DocumentObject.new(pdf)
105
- evaluate('this.doc.eval = function(script) {eval(script)}')
106
- evaluate('this.doc = undefined')
208
+ def valueAsString
209
+ self.value.to_s
107
210
  end
108
211
 
109
- def exec(script)
110
- @doc_obj.eval(script)
212
+ def type
213
+ (if @field.has_key?(:FT)
214
+ case @field[:FT].solve.value
215
+ when PDF::Field::Type::BUTTON
216
+ if @fields.has_key?(:Ff)
217
+ flags = @fields[:Ff].solve.value
218
+
219
+ if (flags & Origami::Annotation::Widget::Button::Flags::PUSHBUTTON) != 0
220
+ 'button'
221
+ elsif (flags & Origami::Annotation::Widget::Button::Flags::RADIO) != 0
222
+ 'radiobox'
223
+ else
224
+ 'checkbox'
225
+ end
226
+ end
227
+ when PDF::Field::Type::TEXT then 'text'
228
+ when PDF::Field::Type::SIGNATURE then 'signature'
229
+ when PDF::Field::Type::CHOICE
230
+ if @field.has_key?(:Ff)
231
+ if (@field[:Ff].solve.value & Origami::Annotation::Widget::Choice::Flags::COMBO).zero?
232
+ 'listbox'
233
+ else
234
+ 'combobox'
235
+ end
236
+ end
237
+ end
238
+ end).to_s
111
239
  end
112
240
 
113
241
  end
114
242
 
243
+ class Data < AcrobatObject
244
+ attr_reader :name, :path, :size
245
+ attr_reader :creationDate, :modDate
246
+ attr_reader :description, :MIMEType
247
+
248
+ def initialize(name, size, path = nil, creationDate = nil, modDate = nil, description = nil, mimeType = nil)
249
+ @name, @path, @size = name, path, size
250
+ @creationDate, @modDate = creationDate, modDate
251
+ @description, @MIMEType = description, mimeType
252
+ end
253
+ end
254
+
255
+ class ReadStream < AcrobatObject
256
+ def initialize(data)
257
+ @data = data
258
+ end
259
+
260
+ def read(n)
261
+ @data.slice!(0, n)
262
+ end
263
+ end
264
+ end
265
+
266
+ begin
267
+ require 'v8'
268
+
269
+ class JavaScript::Engine
270
+
271
+ attr_reader :context
272
+
273
+ def initialize(pdf, params = {})
274
+ options =
275
+ {
276
+ :viewerVersion => JavaScript::Platforms::WINDOWS,
277
+ :viewerVariation => JavaScript::Viewers::ADOBE_READER,
278
+ :platform => 9
279
+ }.update(params)
280
+
281
+ doc = JavaScript::Doc.new(pdf)
282
+ app = JavaScript::App.new(options[:platform], options[:viewerVariation], options[:viewerVersion])
283
+ util = JavaScript::Util.new
284
+ console = JavaScript::Console.new
285
+
286
+ @context = V8::Context.new(:with => doc)
287
+ @context['app'] = app
288
+ @context['console'] = console
289
+ @context['util'] = util
290
+ end
291
+
292
+ def exec(script)
293
+ @context.eval(script)
294
+ end
295
+ end
296
+
115
297
  rescue LoadError
116
298
  end
117
299
  end
118
300
 
119
- if defined?(PDF::JavaScriptEngine)
301
+ if defined?(PDF::JavaScript::Engine)
120
302
  module String
121
303
  def eval_as_js(options = {})
122
- runtime = options[:runtime] || PDF::JavaScriptEngine.new(self.pdf, options)
304
+ runtime = options[:runtime] || PDF::JavaScript::Engine.new(self.pdf, options)
123
305
  runtime.exec(self.value)
124
306
  end
125
307
  end
126
308
 
127
309
  class Stream
128
310
  def eval_as_js(options = {})
129
- runtime = options[:runtime] || PDF::JavaScriptEngine.new(self.pdf, options)
311
+ runtime = options[:runtime] || PDF::JavaScript::Engine.new(self.pdf, options)
130
312
  runtime.exec(self.data)
131
313
  end
132
314
  end
315
+
316
+ class PDF
317
+ def eval_js(code)
318
+ get_js_engine.exec(code)
319
+ end
320
+
321
+ private
322
+
323
+ def get_js_engine(options = {})
324
+ @js_engine ||= PDF::JavaScript::Engine.new(self, options)
325
+ end
326
+ end
133
327
  end
134
328
 
135
329
  end
@@ -27,6 +27,9 @@ module Origami
27
27
 
28
28
  class PDF
29
29
 
30
+ class LinearizationError < Exception #:nodoc:
31
+ end
32
+
30
33
  #
31
34
  # Returns whether the current document is linearized.
32
35
  #
@@ -45,7 +48,7 @@ module Origami
45
48
  # This operation is xrefs destructive, should be fixed in the future to merge tables.
46
49
  #
47
50
  def delinearize!
48
- raise RuntimeError, 'Not a linearized document' unless is_linearized?
51
+ raise LinearizationError, 'Not a linearized document' unless is_linearized?
49
52
 
50
53
  #
51
54
  # Saves the first trailer.
@@ -70,6 +73,27 @@ module Origami
70
73
  end
71
74
  end
72
75
 
76
+ #
77
+ # Update the trailer.
78
+ #
79
+ last_trailer = (@revisions.last.trailer ||= Trailer.new)
80
+
81
+ last_trailer.dictionary ||= Dictionary.new
82
+
83
+ if prev_trailer.has_dictionary?
84
+ last_trailer.dictionary =
85
+ last_trailer.dictionary.merge(prev_trailer.dictionary)
86
+ else
87
+ xrefstm = get_object_by_offset(last_trailer.startxref)
88
+ raise LinearizationError,
89
+ 'Cannot find trailer info while delinearizing document' unless xrefstm.is_a?(XRefStream)
90
+
91
+ last_trailer.dictionary[:Root] = xrefstm[:Root]
92
+ last_trailer.dictionary[:Encrypt] = xrefstm[:Encrypt]
93
+ last_trailer.dictionary[:Info] = xrefstm[:Info]
94
+ last_trailer.dictionary[:ID] = xrefstm[:ID]
95
+ end
96
+
73
97
  #
74
98
  # Remove all xrefs.
75
99
  # Fix: Should be merged instead.
@@ -81,15 +105,6 @@ module Origami
81
105
  #
82
106
  remove_revision(0)
83
107
 
84
- #
85
- # Update the trailer.
86
- #
87
- last_trailer = (@revisions.last.trailer ||= Trailer.new)
88
-
89
- last_trailer.dictionary ||= Dictionary.new
90
- last_trailer.dictionary =
91
- last_trailer.dictionary.merge(prev_trailer.dictionary)
92
-
93
108
  self
94
109
  end
95
110
 
@@ -155,6 +155,7 @@ module Origami
155
155
  StringScanner.new(stream.read)
156
156
  end
157
157
  elsif stream.is_a? ::String
158
+ @filename = stream
158
159
  if ''.respond_to? :force_encoding
159
160
  StringScanner.new(File.open(stream, "r", :encoding => 'binary').binmode.read)
160
161
  else
@@ -246,6 +247,18 @@ module Origami
246
247
  end
247
248
  end
248
249
 
250
+ def target_filename
251
+ @filename
252
+ end
253
+
254
+ def target_filesize
255
+ @data.string.size if @data
256
+ end
257
+
258
+ def target_data
259
+ @data.string.dup if @data
260
+ end
261
+
249
262
  private
250
263
 
251
264
  def error(str = "") #:nodoc:
@@ -39,7 +39,7 @@ module Origami
39
39
  @data.pos = @data.pos - 5
40
40
  end
41
41
 
42
- pdf = PDF.new(false)
42
+ pdf = PDF.new(self)
43
43
 
44
44
  info "...Reading header..."
45
45
  begin
@@ -57,8 +57,8 @@ require 'origami/xfa'
57
57
 
58
58
  module Origami
59
59
 
60
- VERSION = "1.1.1"
61
- REVISION = "$Revision: rev 117/, 2011/09/14 15:24:40 darko $" #:nodoc:
60
+ VERSION = "1.1.2"
61
+ REVISION = "$Revision: rev 122/, 2011/09/26 12:12:39 darko $" #:nodoc:
62
62
 
63
63
  #
64
64
  # Global options for Origami.
@@ -206,28 +206,56 @@ module Origami
206
206
 
207
207
  #
208
208
  # Creates a new PDF instance.
209
- # _init_structure_:: If this flag is set, then some structures will be automatically generated while manipulating this PDF. Set it if you are creating a new PDF file, this _must_ _not_ be used when parsing an existing file.
209
+ # _parser_:: The Parser object creating the document. If none is specified, some default structures are automatically created to get a minimal working document.
210
210
  #
211
- def initialize(init_structure = true)
211
+ def initialize(parser = nil)
212
212
  @header = PDF::Header.new
213
213
  @revisions = []
214
214
 
215
215
  add_new_revision
216
216
  @revisions.first.trailer = Trailer.new
217
217
 
218
- init if init_structure
218
+ if parser
219
+ @parser = parser
220
+ else
221
+ init
222
+ end
219
223
  end
220
224
 
225
+ #
226
+ # Original file name if parsed from disk, nil otherwise.
227
+ #
228
+ def original_filename
229
+ @parser.target_filename if @parser
230
+ end
231
+
232
+ #
233
+ # Original file size if parsed from a data stream, nil otherwise.
234
+ #
235
+ def original_filesize
236
+ @parser.target_filesize if @parser
237
+ end
238
+
239
+ #
240
+ # Original data parsed to create this document, nil if created from scratch.
241
+ #
242
+ def original_data
243
+ @parser.target_data if @parser
244
+ end
221
245
 
222
246
  #
223
- # Serializes the current PDF
247
+ # Serializes the current PDF.
224
248
  #
225
249
  def serialize(filename)
226
- Zlib::GzipWriter.open(filename) { |gz|
227
- gz.write Marshal.dump(self)
228
- }
229
-
230
- self
250
+ parser = @parser
251
+ @parser = nil # do not serialize the parser
252
+
253
+ Zlib::GzipWriter.open(filename) { |gz|
254
+ gz.write Marshal.dump(self)
255
+ }
256
+
257
+ @parser = parser
258
+ self
231
259
  end
232
260
 
233
261
  #
@@ -436,7 +436,7 @@ module Origami
436
436
  obj.set_indirect(true)
437
437
  end
438
438
 
439
- @data = prolog + data
439
+ self.data = prolog + data
440
440
 
441
441
  @dictionary[:N] = @objects.size
442
442
  @dictionary[:First] = prolog.size
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: origami
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
8
  - 1
9
- - 1
10
- version: 1.1.1
9
+ - 2
10
+ version: 1.1.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - "Guillaume Delugr\xC3\xA9"
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-09-22 00:00:00 +02:00
18
+ date: 2011-09-26 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies: []
21
21