origami 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -14,9 +14,10 @@ VERSION
14
14
 
15
15
  1.1
16
16
 
17
- DEPENDENCIES
17
+ OPTIONAL DEPENDENCIES
18
18
 
19
19
  - Ruby-GTK2 (only for GUI), http://ruby-gnome2.sourceforge.jp/
20
+ - Ruby with OpenSSL support
20
21
 
21
22
  INSTALL
22
23
 
@@ -61,13 +61,13 @@ module PDFWalker #:nodoc:all
61
61
  attr_reader :config
62
62
  attr_reader :filename
63
63
 
64
- def self.start
64
+ def self.start(file = nil)
65
65
  Gtk.init
66
- Walker.new
66
+ Walker.new(file)
67
67
  Gtk.main
68
68
  end
69
69
 
70
- def initialize
70
+ def initialize(target_file = nil)
71
71
  super("PDF Walker")
72
72
 
73
73
  @config = Walker::Config.new
@@ -115,7 +115,7 @@ module PDFWalker #:nodoc:all
115
115
  #maximize
116
116
  show_all
117
117
 
118
- open
118
+ open(target_file)
119
119
  end
120
120
 
121
121
  def error(msg)
@@ -3,5 +3,5 @@
3
3
  $:.unshift "#{File.dirname(__FILE__)}" if RUBY_VERSION >= '1.9'
4
4
  require 'gui/walker'
5
5
 
6
- PDFWalker::Walker.start
6
+ PDFWalker::Walker.start(ARGV[0])
7
7
 
@@ -10,7 +10,9 @@ include Origami
10
10
  require 'console.rb'
11
11
  require 'readline'
12
12
 
13
- DEFAULT_BANNER = "Welcome to the PDF shell (Origami release #{Origami::VERSION})\n\n"
13
+ OPENSSL_SUPPORT = (defined?(OpenSSL).nil?) ? 'no' : 'yes'
14
+ JAVASCRIPT_SUPPORT = (defined?(PDF::JavaScript::Engine).nil?) ? 'no' : 'yes'
15
+ DEFAULT_BANNER = "Welcome to the PDF shell (Origami release #{Origami::VERSION}) [OpenSSL: #{OPENSSL_SUPPORT}, JavaScript: #{JAVASCRIPT_SUPPORT}]\n\n"
14
16
 
15
17
  def set_completion
16
18
 
@@ -58,7 +58,7 @@ module Origami
58
58
  end
59
59
 
60
60
  encrypt_dict = get_doc_attr(:Encrypt)
61
- handler = Encryption::Standard::Dictionary.new(encrypt_dict.copy)
61
+ handler = Encryption::Standard::Dictionary.new(encrypt_dict.dup)
62
62
 
63
63
  unless handler.Filter == :Standard
64
64
  raise EncryptionNotSupportedError, "Unknown security handler : '#{handler.Filter.to_s}'"
@@ -26,110 +26,304 @@
26
26
  module Origami
27
27
 
28
28
  class PDF
29
- begin
30
- require 'johnson'
31
-
32
- class JavaScriptEngine < Johnson::SpiderMonkey::Runtime
33
-
34
- class DocumentObject
35
- attr_reader :author, :creator, :creationDate, :keywords, :modDate, :producer, :subject, :title
36
-
37
- def initialize(pdf)
38
- @pdf = pdf
39
-
40
- @author = @pdf.author || ''
41
- @creator = @pdf.creator || ''
42
- @keywords = @pdf.keywords || ''
43
- @producer = @pdf.producer || ''
44
- @subject = @pdf.subject || ''
45
- end
29
+
30
+ module JavaScript
31
+
32
+ module Platforms
33
+ WINDOWS = "WIN"
34
+ UNIX = "UNIX"
35
+ MAC = "MAC"
36
+ end
37
+
38
+ module Viewers
39
+ ADOBE_READER = "Reader"
40
+ end
41
+
42
+ class AcrobatObject
43
+ #def to_s
44
+ # "[object #{self.class.to_s.split('::').last}]"
45
+ #end
46
+ end
47
+
48
+ class Doc < AcrobatObject
49
+ attr_reader :info
50
+ attr_accessor :disclosed
51
+ attr_reader :hidden
46
52
 
47
- def to_s
48
- "[object Doc]"
53
+ class Info < AcrobatObject
54
+ def initialize(doc)
55
+ @doc = doc
56
+ end
57
+
58
+ def title; @doc.title.to_s end
59
+ def author; @doc.author.to_s end
60
+ def subject; @doc.subject.to_s end
61
+ def keywords; @doc.keywords.to_s end
62
+ def creator; @doc.creator.to_s end
63
+ def creationDate; @doc.creation_date.to_s end
64
+ def modDate; @doc.mod_date.to_s end
65
+ end
66
+
67
+ def initialize(pdf)
68
+ @pdf = pdf
69
+ @disclosed = false
70
+ @hidden = false
71
+ @info = Info.new(pdf)
72
+ end
73
+
74
+ def numFields
75
+ fields = @pdf.fields
76
+ if fields.nil?
77
+ 0
78
+ else
79
+ fields.size
49
80
  end
50
81
  end
51
82
 
52
- class ApplicationObject
53
- DEFAULT_VIEWER_VERSION = 9
83
+ def numPages; @pdf.pages.size end
54
84
 
55
- attr_reader :viewerVersion
56
- attr_reader :viewerVariation
85
+ def title; @info.title end
86
+ def author; @info.author end
87
+ def subject; @info.subject end
88
+ def keywords; @info.keywords end
89
+ def creator; @info.creator end
90
+ def creationDate; @info.creationDate end
91
+ def modDate; @info.modDate end
57
92
 
58
- def initialize
59
- @viewerVersion = DEFAULT_VIEWER_VERSION
60
- @viewerVariation = @viewerType = "Reader"
61
- end
93
+ def metadata
94
+ meta = @pdf.Catalog.Metadata
62
95
 
63
- def to_s
64
- "[object App]"
65
- end
96
+ (meta.data if meta.is_a?(Stream)).to_s
66
97
  end
67
98
 
68
- class ConsoleObject
69
- def println(msg)
70
- puts msg.to_s
99
+ def filesize; @pdf.original_filesize end
100
+ def path; @pdf.original_filename end
101
+ def documentFileName; File.basename(self.path) end
102
+ def URL; "file://#{self.path}" end
103
+ def baseURL; '' end
104
+
105
+ def dataObjects
106
+ data_objs = []
107
+ @pdf.ls_names(Names::Root::EMBEDDEDFILES).each do |name, file_desc|
108
+ if file_desc
109
+ ef = file_desc[:EF].solve
110
+ if ef
111
+ f = ef[:F].solve
112
+ data_objs.push Data.new(name, f.data.size) if f.is_a?(Stream)
113
+ end
114
+ end
115
+
71
116
  end
117
+
118
+ data_objs
119
+ end
72
120
 
73
- def show; end
74
- def clear; end
75
- def hide; end
121
+ def getDataObject(cName)
122
+ file_desc = @pdf.resolve_name(Names::Root::EMBEDDEDFILES, cName)
76
123
 
77
- def to_s
78
- "[object Console]"
124
+ if file_desc
125
+ ef = file_desc[:EF].solve
126
+ if ef
127
+ f = ef[:F].solve
128
+ Data.new(cName, f.data.size) if f.is_a?(Stream)
129
+ end
79
130
  end
80
131
  end
81
132
 
82
- class UtilObject
83
- def to_s
84
- "[object Util]"
133
+ def getDataObjectContents(cName, bAllowAuth = false)
134
+ file_desc = @pdf.resolve_name(Names::Root::EMBEDDEDFILES, cName)
135
+
136
+ if file_desc
137
+ ef = file_desc[:EF].solve
138
+ if ef
139
+ f = ef[:F].solve
140
+ ReadStream.new(f.data) if f.is_a?(Stream)
141
+ end
85
142
  end
86
143
  end
87
144
 
88
- def initialize(pdf, options = {})
89
- super()
90
-
91
- self['app'] = ApplicationObject.new
92
- if options.has_key?(:viewerVersion)
93
- self['app'].instance_variable_set :@viewerVersion, options[:viewerVersion]
145
+ def getField(cName)
146
+ field = @pdf.get_field(cName)
147
+
148
+ Field.new(field) if field
149
+ end
150
+
151
+ def getNthFieldName(nIndex)
152
+ fields = @pdf.fields
153
+
154
+ (Field.new(fields[nIndex]).name if fields and fields[nIndex]).to_s
155
+ end
156
+ end
157
+
158
+ class App < AcrobatObject
159
+ attr_reader :platform, :viewerVariation, :viewerVersion
160
+
161
+ def initialize(platform, viewerVariation, viewerVersion)
162
+ @platform = platform
163
+ @viewerVariation, @viewerVersion = viewerVariation, viewerVersion
164
+ end
165
+
166
+ def response(params)
167
+ puts params.class
168
+ gets
169
+ end
170
+ end
171
+
172
+ class Console < AcrobatObject
173
+ def initialize(output = STDOUT)
174
+ @output = output
175
+ end
176
+
177
+ def println(msg)
178
+ @output.puts msg
179
+ end
180
+ end
181
+
182
+ class Util < AcrobatObject
183
+ def streamFromString(cString, cCharset = 'utf-8')
184
+ ReadStream.new(cString.to_s)
185
+ end
186
+
187
+ def stringFromStream(oStream, cCharset = 'utf-8')
188
+ if oStream.is_a?(ReadStream)
189
+ oStream.instance_variable_get(:@data).dup
94
190
  end
191
+ end
192
+ end
95
193
 
96
- self['console'] = ConsoleObject.new
97
- self['util'] = UtilObject.new
194
+ class Field < AcrobatObject
195
+ def initialize(field)
196
+ @field = field
197
+ end
98
198
 
99
- # Johnson includes the Ruby namespace in the global scope.
100
- # Unsets that for obvious security reasons.
101
- self['Ruby'] = nil
199
+ def doc; Doc.new(@field.pdf) end
200
+ def name
201
+ (@field[:T].solve.value if @field.has_key?(:T)).to_s
202
+ end
203
+
204
+ def value
205
+ @field[:V].solve.value if @field.has_key?(:V)
206
+ end
102
207
 
103
- # Hack the 'this' object to point to the DocumentObject
104
- @doc_obj = self['doc'] = DocumentObject.new(pdf)
105
- evaluate('this.doc.eval = function(script) {eval(script)}')
106
- evaluate('this.doc = undefined')
208
+ def valueAsString
209
+ self.value.to_s
107
210
  end
108
211
 
109
- def exec(script)
110
- @doc_obj.eval(script)
212
+ def type
213
+ (if @field.has_key?(:FT)
214
+ case @field[:FT].solve.value
215
+ when PDF::Field::Type::BUTTON
216
+ if @fields.has_key?(:Ff)
217
+ flags = @fields[:Ff].solve.value
218
+
219
+ if (flags & Origami::Annotation::Widget::Button::Flags::PUSHBUTTON) != 0
220
+ 'button'
221
+ elsif (flags & Origami::Annotation::Widget::Button::Flags::RADIO) != 0
222
+ 'radiobox'
223
+ else
224
+ 'checkbox'
225
+ end
226
+ end
227
+ when PDF::Field::Type::TEXT then 'text'
228
+ when PDF::Field::Type::SIGNATURE then 'signature'
229
+ when PDF::Field::Type::CHOICE
230
+ if @field.has_key?(:Ff)
231
+ if (@field[:Ff].solve.value & Origami::Annotation::Widget::Choice::Flags::COMBO).zero?
232
+ 'listbox'
233
+ else
234
+ 'combobox'
235
+ end
236
+ end
237
+ end
238
+ end).to_s
111
239
  end
112
240
 
113
241
  end
114
242
 
243
+ class Data < AcrobatObject
244
+ attr_reader :name, :path, :size
245
+ attr_reader :creationDate, :modDate
246
+ attr_reader :description, :MIMEType
247
+
248
+ def initialize(name, size, path = nil, creationDate = nil, modDate = nil, description = nil, mimeType = nil)
249
+ @name, @path, @size = name, path, size
250
+ @creationDate, @modDate = creationDate, modDate
251
+ @description, @MIMEType = description, mimeType
252
+ end
253
+ end
254
+
255
+ class ReadStream < AcrobatObject
256
+ def initialize(data)
257
+ @data = data
258
+ end
259
+
260
+ def read(n)
261
+ @data.slice!(0, n)
262
+ end
263
+ end
264
+ end
265
+
266
+ begin
267
+ require 'v8'
268
+
269
+ class JavaScript::Engine
270
+
271
+ attr_reader :context
272
+
273
+ def initialize(pdf, params = {})
274
+ options =
275
+ {
276
+ :viewerVersion => JavaScript::Platforms::WINDOWS,
277
+ :viewerVariation => JavaScript::Viewers::ADOBE_READER,
278
+ :platform => 9
279
+ }.update(params)
280
+
281
+ doc = JavaScript::Doc.new(pdf)
282
+ app = JavaScript::App.new(options[:platform], options[:viewerVariation], options[:viewerVersion])
283
+ util = JavaScript::Util.new
284
+ console = JavaScript::Console.new
285
+
286
+ @context = V8::Context.new(:with => doc)
287
+ @context['app'] = app
288
+ @context['console'] = console
289
+ @context['util'] = util
290
+ end
291
+
292
+ def exec(script)
293
+ @context.eval(script)
294
+ end
295
+ end
296
+
115
297
  rescue LoadError
116
298
  end
117
299
  end
118
300
 
119
- if defined?(PDF::JavaScriptEngine)
301
+ if defined?(PDF::JavaScript::Engine)
120
302
  module String
121
303
  def eval_as_js(options = {})
122
- runtime = options[:runtime] || PDF::JavaScriptEngine.new(self.pdf, options)
304
+ runtime = options[:runtime] || PDF::JavaScript::Engine.new(self.pdf, options)
123
305
  runtime.exec(self.value)
124
306
  end
125
307
  end
126
308
 
127
309
  class Stream
128
310
  def eval_as_js(options = {})
129
- runtime = options[:runtime] || PDF::JavaScriptEngine.new(self.pdf, options)
311
+ runtime = options[:runtime] || PDF::JavaScript::Engine.new(self.pdf, options)
130
312
  runtime.exec(self.data)
131
313
  end
132
314
  end
315
+
316
+ class PDF
317
+ def eval_js(code)
318
+ get_js_engine.exec(code)
319
+ end
320
+
321
+ private
322
+
323
+ def get_js_engine(options = {})
324
+ @js_engine ||= PDF::JavaScript::Engine.new(self, options)
325
+ end
326
+ end
133
327
  end
134
328
 
135
329
  end
@@ -27,6 +27,9 @@ module Origami
27
27
 
28
28
  class PDF
29
29
 
30
+ class LinearizationError < Exception #:nodoc:
31
+ end
32
+
30
33
  #
31
34
  # Returns whether the current document is linearized.
32
35
  #
@@ -45,7 +48,7 @@ module Origami
45
48
  # This operation is xrefs destructive, should be fixed in the future to merge tables.
46
49
  #
47
50
  def delinearize!
48
- raise RuntimeError, 'Not a linearized document' unless is_linearized?
51
+ raise LinearizationError, 'Not a linearized document' unless is_linearized?
49
52
 
50
53
  #
51
54
  # Saves the first trailer.
@@ -70,6 +73,27 @@ module Origami
70
73
  end
71
74
  end
72
75
 
76
+ #
77
+ # Update the trailer.
78
+ #
79
+ last_trailer = (@revisions.last.trailer ||= Trailer.new)
80
+
81
+ last_trailer.dictionary ||= Dictionary.new
82
+
83
+ if prev_trailer.has_dictionary?
84
+ last_trailer.dictionary =
85
+ last_trailer.dictionary.merge(prev_trailer.dictionary)
86
+ else
87
+ xrefstm = get_object_by_offset(last_trailer.startxref)
88
+ raise LinearizationError,
89
+ 'Cannot find trailer info while delinearizing document' unless xrefstm.is_a?(XRefStream)
90
+
91
+ last_trailer.dictionary[:Root] = xrefstm[:Root]
92
+ last_trailer.dictionary[:Encrypt] = xrefstm[:Encrypt]
93
+ last_trailer.dictionary[:Info] = xrefstm[:Info]
94
+ last_trailer.dictionary[:ID] = xrefstm[:ID]
95
+ end
96
+
73
97
  #
74
98
  # Remove all xrefs.
75
99
  # Fix: Should be merged instead.
@@ -81,15 +105,6 @@ module Origami
81
105
  #
82
106
  remove_revision(0)
83
107
 
84
- #
85
- # Update the trailer.
86
- #
87
- last_trailer = (@revisions.last.trailer ||= Trailer.new)
88
-
89
- last_trailer.dictionary ||= Dictionary.new
90
- last_trailer.dictionary =
91
- last_trailer.dictionary.merge(prev_trailer.dictionary)
92
-
93
108
  self
94
109
  end
95
110
 
@@ -155,6 +155,7 @@ module Origami
155
155
  StringScanner.new(stream.read)
156
156
  end
157
157
  elsif stream.is_a? ::String
158
+ @filename = stream
158
159
  if ''.respond_to? :force_encoding
159
160
  StringScanner.new(File.open(stream, "r", :encoding => 'binary').binmode.read)
160
161
  else
@@ -246,6 +247,18 @@ module Origami
246
247
  end
247
248
  end
248
249
 
250
+ def target_filename
251
+ @filename
252
+ end
253
+
254
+ def target_filesize
255
+ @data.string.size if @data
256
+ end
257
+
258
+ def target_data
259
+ @data.string.dup if @data
260
+ end
261
+
249
262
  private
250
263
 
251
264
  def error(str = "") #:nodoc:
@@ -39,7 +39,7 @@ module Origami
39
39
  @data.pos = @data.pos - 5
40
40
  end
41
41
 
42
- pdf = PDF.new(false)
42
+ pdf = PDF.new(self)
43
43
 
44
44
  info "...Reading header..."
45
45
  begin
@@ -57,8 +57,8 @@ require 'origami/xfa'
57
57
 
58
58
  module Origami
59
59
 
60
- VERSION = "1.1.1"
61
- REVISION = "$Revision: rev 117/, 2011/09/14 15:24:40 darko $" #:nodoc:
60
+ VERSION = "1.1.2"
61
+ REVISION = "$Revision: rev 122/, 2011/09/26 12:12:39 darko $" #:nodoc:
62
62
 
63
63
  #
64
64
  # Global options for Origami.
@@ -206,28 +206,56 @@ module Origami
206
206
 
207
207
  #
208
208
  # Creates a new PDF instance.
209
- # _init_structure_:: If this flag is set, then some structures will be automatically generated while manipulating this PDF. Set it if you are creating a new PDF file, this _must_ _not_ be used when parsing an existing file.
209
+ # _parser_:: The Parser object creating the document. If none is specified, some default structures are automatically created to get a minimal working document.
210
210
  #
211
- def initialize(init_structure = true)
211
+ def initialize(parser = nil)
212
212
  @header = PDF::Header.new
213
213
  @revisions = []
214
214
 
215
215
  add_new_revision
216
216
  @revisions.first.trailer = Trailer.new
217
217
 
218
- init if init_structure
218
+ if parser
219
+ @parser = parser
220
+ else
221
+ init
222
+ end
219
223
  end
220
224
 
225
+ #
226
+ # Original file name if parsed from disk, nil otherwise.
227
+ #
228
+ def original_filename
229
+ @parser.target_filename if @parser
230
+ end
231
+
232
+ #
233
+ # Original file size if parsed from a data stream, nil otherwise.
234
+ #
235
+ def original_filesize
236
+ @parser.target_filesize if @parser
237
+ end
238
+
239
+ #
240
+ # Original data parsed to create this document, nil if created from scratch.
241
+ #
242
+ def original_data
243
+ @parser.target_data if @parser
244
+ end
221
245
 
222
246
  #
223
- # Serializes the current PDF
247
+ # Serializes the current PDF.
224
248
  #
225
249
  def serialize(filename)
226
- Zlib::GzipWriter.open(filename) { |gz|
227
- gz.write Marshal.dump(self)
228
- }
229
-
230
- self
250
+ parser = @parser
251
+ @parser = nil # do not serialize the parser
252
+
253
+ Zlib::GzipWriter.open(filename) { |gz|
254
+ gz.write Marshal.dump(self)
255
+ }
256
+
257
+ @parser = parser
258
+ self
231
259
  end
232
260
 
233
261
  #
@@ -436,7 +436,7 @@ module Origami
436
436
  obj.set_indirect(true)
437
437
  end
438
438
 
439
- @data = prolog + data
439
+ self.data = prolog + data
440
440
 
441
441
  @dictionary[:N] = @objects.size
442
442
  @dictionary[:First] = prolog.size
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: origami
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
8
  - 1
9
- - 1
10
- version: 1.1.1
9
+ - 2
10
+ version: 1.1.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - "Guillaume Delugr\xC3\xA9"
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-09-22 00:00:00 +02:00
18
+ date: 2011-09-26 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies: []
21
21