pdf-toolkit 0.5.0 → 1.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,42 @@
1
+ # pdf-toolkit - A ruby interface to pdftk
2
+
3
+ pdf-toolkit allows you to access pdf metadata in read-write in a very simple
4
+ way, through the [`pdftk` commandline tool](http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/).
5
+
6
+ A typical usecase is as follows:
7
+
8
+ my_pdf = PDF::Toolkit.open("somefile.pdf")
9
+ my_pdf.updated_at = Time.now # ModDate
10
+ my_pdf["SomeAttribute"] = "Some value"
11
+ my_pdf.save!
12
+
13
+ class MyDocument < PDF::Toolkit
14
+ info_accessor :some_attribute
15
+ def before_save
16
+ self.updated_at = Time.now
17
+ end
18
+ end
19
+ my_pdf = MyDocument.open("somefile.pdf")
20
+ my_pdf.some_attribute = "Some value"
21
+ my_pdf.save!
22
+
23
+ ## Note about this version
24
+
25
+ This is a prerelease 1.0.0.rc1 version on an almost abandonned project. The main
26
+ difference (broken API) with the 0.5.0 branch is that support for ActiveRecord
27
+ has been entirely removed (mostly because the implementation was ugly so far).
28
+ If you use pdf-toolkit and would like activerecord to be included in 1.0.0,
29
+ please just tell us and we'll add it. If you upgrade from 0.5.0 to 1.0.0.rc1 and
30
+ something else goes wrong, please report the issue on github.
31
+
32
+ ## Contributors
33
+
34
+ * Tim Pope is the original author of pdf-toolkit
35
+ * Preston Marshall ported the project to github
36
+ * Bernard Lambeau is the current maintainer
37
+
38
+ Please report issues on [github](https://github.com/blambeau/pdf-toolkit/issues)
39
+
40
+ ## Licence
41
+
42
+ pdf-toolkit is released under a MIT licence. See LICENCE.md
data/Rakefile CHANGED
@@ -1,41 +1,19 @@
1
- begin
2
- require 'rubygems'
3
- rescue LoadError
4
- end
5
1
  require 'rake'
6
- require 'rake/testtask'
7
- require 'rake/rdoctask'
8
- require 'rake/packagetask'
9
- require 'rake/gempackagetask'
10
- require 'rake/contrib/sshpublisher'
11
- require 'rake/contrib/rubyforgepublisher'
12
- require File.join(File.dirname(__FILE__), 'lib', 'pdf', 'toolkit')
13
-
14
- PKG_BUILD = ENV['PKG_BUILD'] ? '.' + ENV['PKG_BUILD'] : ''
15
- PKG_NAME = 'pdf-toolkit'
16
- PKG_VERSION = PDF::Toolkit::PDF_TOOLKIT_VERSION
17
- PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
18
- # PKG_DESTINATION = ENV["PKG_DESTINATION"] || "../#{PKG_NAME}"
19
-
20
- # RELEASE_NAME = "REL #{PKG_VERSION}"
21
-
22
- RUBY_FORGE_PROJECT = PKG_NAME
23
- RUBY_FORGE_USER = "tpope"
24
2
 
25
3
  desc "Default task: test"
26
4
  task :default => [ :test ]
27
5
 
28
-
29
6
  # Run the unit tests
7
+ require 'rake/testtask'
30
8
  Rake::TestTask.new { |t|
31
- t.libs << "test"
9
+ t.libs << "lib" << "test"
32
10
  t.test_files = Dir['test/*_test.rb'] + Dir['test/test_*.rb']
33
11
  t.verbose = true
34
12
  }
35
13
 
36
-
37
14
  # Generate the RDoc documentation
38
- Rake::RDocTask.new { |rdoc|
15
+ require 'rdoc/task'
16
+ RDoc::Task.new { |rdoc|
39
17
  rdoc.rdoc_dir = 'doc'
40
18
  rdoc.rdoc_files.add('lib')
41
19
  rdoc.main = "PDF::Toolkit"
@@ -43,63 +21,12 @@ Rake::RDocTask.new { |rdoc|
43
21
  rdoc.options << '--inline-source'
44
22
  }
45
23
 
46
-
47
24
  # Create compressed packages
48
- spec = Gem::Specification.new do |s|
49
- s.platform = Gem::Platform::RUBY
50
- s.name = PKG_NAME
51
- s.summary = 'A wrapper around pdftk to allow PDF metadata manipulation'
52
- s.description = 'PDF::Toolkit provides a simple interface for querying and unpdation PDF metadata like the document Author and Title.'
53
- s.version = PKG_VERSION
54
-
55
- s.author = 'Tim Pope'
56
- s.email = 'ruby@tp0pe.inf0'.gsub(/0/,'o')
57
- s.rubyforge_project = RUBY_FORGE_PROJECT
58
- s.homepage = "http://#{PKG_NAME}.rubyforge.org"
59
-
60
- s.has_rdoc = true
61
- # s.requirements << 'none'
62
- s.require_path = 'lib'
63
-
64
- s.add_dependency('activesupport')
65
-
66
- s.files = [ "Rakefile", "README", "setup.rb" ]
67
- s.files = s.files + Dir.glob( "lib/**/*.rb" )
68
- s.files = s.files + Dir.glob( "test/**/*" ).reject { |item| item.include?( "\.svn" ) }
69
- end
70
-
71
- Rake::GemPackageTask.new(spec) do |p|
25
+ require 'rubygems/package_task'
26
+ spec = eval(File.read("pdf-toolkit.gemspec"), binding, "pdf-toolkit.gemspec")
27
+ Gem::PackageTask.new(spec) do |p|
72
28
  p.gem_spec = spec
73
29
  p.need_tar = true
74
30
  p.need_zip = true
75
31
  end
76
32
 
77
- # Publish documentation
78
- desc "Publish the API documentation"
79
- task :pdoc => [:rerdoc] do
80
- # Rake::RubyForgePublisher.new(RUBY_FORGE_PROJECT,RUBY_FORGE_USER).upload
81
- Rake::SshDirPublisher.new("rubyforge.org", "/var/www/gforge-projects/#{PKG_NAME}", "doc").upload
82
- end
83
-
84
- desc "Publish the release files to RubyForge."
85
- task :release => [ :package ] do
86
- `rubyforge login`
87
-
88
- for ext in %w( gem tgz zip )
89
- release_command = "rubyforge add_release #{PKG_NAME} #{PKG_NAME} 'REL #{PKG_VERSION}' pkg/#{PKG_NAME}-#{PKG_VERSION}.#{ext}"
90
- puts release_command
91
- system(release_command)
92
- end
93
- end
94
-
95
- begin
96
- require 'rcov/rcovtask'
97
- Rcov::RcovTask.new do |t|
98
- t.test_files = Dir['test/*_test.rb'] + Dir['test/test_*.rb']
99
- t.verbose = true
100
- t.rcov_opts << "--text-report"
101
- # t.rcov_opts << "--exclude \\\\A/var/lib/gems"
102
- t.rcov_opts << "--exclude '/(active_record|active_support)\\b'"
103
- end
104
- rescue LoadError
105
- end
@@ -1,28 +1,5 @@
1
- # Copyright (c) 2006 Tim Pope
2
- #
3
- # Permission is hereby granted, free of charge, to any person obtaining
4
- # a copy of this software and associated documentation files (the
5
- # "Software"), to deal in the Software without restriction, including
6
- # without limitation the rights to use, copy, modify, merge, publish,
7
- # distribute, sublicense, and/or sell copies of the Software, and to
8
- # permit persons to whom the Software is furnished to do so, subject to
9
- # the following conditions:
10
- #
11
- # The above copyright notice and this permission notice shall be
12
- # included in all copies or substantial portions of the Software.
13
- #
14
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
-
22
- require 'rubygems' rescue nil
23
1
  require 'tempfile'
24
2
  require 'forwardable'
25
- require 'active_support'
26
3
 
27
4
  # Certain existing libraries have a PDF class; no sense in being unnecessarily
28
5
  # incompatible.
@@ -55,8 +32,7 @@ end unless defined? PDF
55
32
  #
56
33
  # PDF::Toolkit requires +pdftk+, which is available from
57
34
  # http://www.accesspdf.com/pdftk. For full functionality, also install
58
- # +xpdf+ from http://www.foolabs.com/xpdf. ActiveSupport (from Ruby on Rails)
59
- # is also required but this dependency may be removed in the future.
35
+ # +xpdf+ from http://www.foolabs.com/xpdf.
60
36
  #
61
37
  # == Limitations
62
38
  #
@@ -65,12 +41,15 @@ end unless defined? PDF
65
41
  #
66
42
  # +pdftk+ requires the owner password, even for simply querying the document.
67
43
  class PDF::Toolkit
68
-
69
- PDF_TOOLKIT_VERSION = "0.5.0"
70
44
  extend Forwardable
71
- class Error < ::StandardError #:nodoc:
72
- end
73
- class ExecutionError < Error #:nodoc:
45
+
46
+ VERSION = "1.0.0.rc1"
47
+
48
+ # Raised when something fails with the toolkit
49
+ class Error < ::StandardError; end
50
+
51
+ # Raised when an invocation of `pdftk` fails under the cover
52
+ class ExecutionError < Error
74
53
  attr_reader :command, :exit_status
75
54
  def initialize(msg = nil, cmd = nil, exit_status = nil)
76
55
  super(msg)
@@ -78,139 +57,15 @@ class PDF::Toolkit
78
57
  @exit_status = exit_status
79
58
  end
80
59
  end
81
- class FileNotSaved < Error #:nodoc:
82
- end
83
-
84
- class <<self
85
-
86
- # Add an accessor for a key. If the key is omitted, defaults to a
87
- # camelized version of the accessor (+foo_bar+ becomes +FooBar+). The
88
- # example below illustrates the defaults.
89
- #
90
- # class MyDocument < PDF::Toolkit
91
- # info_accessor :created_at, "CreationDate"
92
- # info_accessor :updated_at, "ModDate"
93
- # info_accessor :author
94
- # [:subject, :title, :keywords, :producer, :creator].each do |key|
95
- # info_accessor key
96
- # end
97
- # end
98
- #
99
- # MyDocument.open("document.pdf").created_at
100
- def info_accessor(accessor_name, info_key = nil)
101
- info_key ||= camelize_key(accessor_name)
102
- read_inheritable_attribute(:info_accessors)[accessor_name] = info_key
103
- define_method accessor_name do
104
- self[info_key]
105
- end
106
- define_method "#{accessor_name}=" do |value|
107
- self[info_key] = value
108
- end
109
- end
110
-
111
- # Invoke +pdftk+ with the given arguments, plus +dont_ask+. If :mode or
112
- # a block is given, IO::popen is called. Otherwise, Kernel#system is
113
- # used.
114
- #
115
- # result = PDF::Toolkit.pdftk(*%w(foo.pdf bar.pdf cat output baz.pdf))
116
- # io = PDF::Toolkit.pdftk("foo.pdf","dump_data","output","-",:mode => 'r')
117
- # PDF::Toolkit.pdftk("foo.pdf","dump_data","output","-") { |io| io.read }
118
- def pdftk(*args,&block)
119
- options = args.last.is_a?(Hash) ? args.pop : {}
120
- args << "dont_ask"
121
- args << options
122
- result = call_program(executables[:pdftk],*args,&block)
123
- return block_given? ? $?.success? : result
124
- end
125
-
126
- # Invoke +pdftotext+. If +outfile+ is omitted, returns an +IO+ object for
127
- # the output.
128
- def pdftotext(file,outfile = nil,&block)
129
- call_program(executables[:pdftotext],file,
130
- outfile||"-",:mode => (outfile ? nil : 'r'),&block)
131
- end
132
-
133
- # This method will +require+ and +include+ validations, callbacks, and
134
- # timestamping from +ActiveRecord+. Use at your own risk.
135
- def loot_active_record
136
- require 'active_support'
137
- require 'active_record'
138
- # require 'active_record/validations'
139
- # require 'active_record/callbacks'
140
- # require 'active_record/timestamp'
141
-
142
- unless defined? @@looted_active_record
143
- @@looted_active_record = true
144
- meta = (class <<self; self; end)
145
- alias_method :initialize_ar_hack, :initialize
146
- include ActiveRecord::Validations
147
- include ActiveRecord::Callbacks
148
- include ActiveRecord::Timestamp
149
- alias_method :initialize, :initialize_ar_hack
150
-
151
- cattr_accessor :record_timestamps # nil by default
152
-
153
- meta.send(:define_method,:default_timezone) do
154
- defined? ActiveRecord::Base ? ActiveRecord::Base.default_timezone : :local
155
- end
156
- end
157
- self
158
- end
159
60
 
160
- def human_attribute_name(arg) #:nodoc:
161
- defined? ActiveRecord::Base ? ActiveRecord::Base.human_attribute_name(arg) : arg.gsub(/_/,' ')
162
- end
61
+ # Raised when a .pdf file cannot be saved
62
+ class FileNotSaved < Error; end
163
63
 
164
- private
64
+ require 'pdf/toolkit/native'
65
+ extend Native
165
66
 
166
- def instantiate(*args) #:nodoc:
167
- raise NoMethodError, "stub method `instantiate' called for #{self}:#{self.class}"
168
- end
169
-
170
- def call_program(*args,&block)
171
- old_stream = nil
172
- options = args.last.is_a?(Hash) ? args.pop : {}
173
- options[:mode] ||= 'r' if block_given?
174
- unless options[:silence_stderr] == false
175
- old_stream = STDERR.dup
176
- STDERR.reopen(RUBY_PLATFORM =~ /mswin/ ? 'NUL:' : '/dev/null')
177
- STDERR.sync = true
178
- end
179
- if options[:mode]
180
- command = (args.map {|arg| %{"#{arg.gsub('"','\\"')}"}}).join(" ")
181
- retval = IO.popen(command,options[:mode],&block)
182
- retval
183
- else
184
- system(*args)
185
- end
186
- ensure
187
- STDERR.reopen(old_stream) if old_stream
188
- end
189
-
190
- def camelize_key(key)
191
- if key.to_s.respond_to?(:camelize)
192
- key.to_s.camelize
193
- else
194
- key.to_s.gsub(/_+([^_])/) {$1.upcase}.sub(/^./) {|l|l.upcase}
195
- end
196
- end
197
-
198
- end
199
-
200
- class_inheritable_accessor :executables, :default_permissions, :default_input_password
201
- class_inheritable_accessor :default_owner_password, :default_user_password
202
- protected :default_owner_password=, :default_user_password=
203
- # self.pdftk = "pdftk"
204
- self.executables = Hash.new {|h,k| k.to_s.dup}
205
- write_inheritable_attribute :info_accessors, Hash.new { |h,k|
206
- if h.has_key?(k.to_s.to_sym)
207
- h[k.to_s.to_sym]
208
- elsif k.kind_of?(Symbol)
209
- camelize_key(k)
210
- else
211
- k.dup
212
- end
213
- }
67
+ require 'pdf/toolkit/class_methods'
68
+ extend ClassMethods
214
69
 
215
70
  info_accessor :created_at, "CreationDate"
216
71
  info_accessor :updated_at, "ModDate"
@@ -225,30 +80,29 @@ class PDF::Toolkit
225
80
  def self.open(filename,input_password = nil)
226
81
  object = new(filename,input_password)
227
82
  object.reload
228
- object
83
+ block_given? ? yield(object) : object
229
84
  end
230
85
 
231
86
  # Like +open+, only the attributes are lazily loaded. Under most
232
87
  # circumstances, +open+ is preferred.
233
- def initialize(filename,input_password = nil)
234
- @filename = if filename.respond_to?(:to_str)
235
- filename.to_str
236
- elsif filename.kind_of?(self.class)
237
- filename.instance_variable_get("@filename")
238
- elsif filename.respond_to?(:path)
239
- filename.path
240
- else
241
- filename
242
- end
88
+ def initialize(filename, input_password = nil)
89
+ coercer = [:to_path, :to_str, :path].find{|meth| filename.respond_to? meth}
90
+ @filename = coercer ? filename.send(coercer) : filename
91
+
243
92
  @input_password = input_password || default_input_password
244
93
  @owner_password = default_owner_password
245
94
  @user_password = default_user_password
246
- @permissions = default_permissions || []
247
- @new_info = {}
248
- callback(:after_initialize) if respond_to?(:after_initialize) && respond_to?(:callback)
249
- # reload
95
+ @permissions = default_permissions || []
96
+ @new_info = {}
97
+
98
+ run_callbacks_for(:after_initialize)
250
99
  end
251
100
 
101
+ def_delegators :"self.class", :default_input_password,
102
+ :default_owner_password,
103
+ :default_user_password,
104
+ :default_permissions
105
+
252
106
  attr_reader :pdf_ids, :permissions
253
107
  attr_writer :owner_password, :user_password
254
108
 
@@ -256,13 +110,13 @@ class PDF::Toolkit
256
110
  read_data unless @pages
257
111
  @pages
258
112
  end
259
-
260
113
  alias pages page_count
261
114
 
262
115
  # Path to the file.
263
116
  def path
264
117
  @new_filename || @filename
265
118
  end
119
+ alias :to_path :path
266
120
 
267
121
  # Retrieve the file's version as a symbol.
268
122
  #
@@ -289,8 +143,6 @@ class PDF::Toolkit
289
143
  end
290
144
 
291
145
  # Like +save+, only raise an exception if the operation fails.
292
- #
293
- # TODO: ensure no ActiveRecord::RecordInvalid errors make it through.
294
146
  def save!
295
147
  if save
296
148
  self
@@ -313,8 +165,6 @@ class PDF::Toolkit
313
165
  @new_filename = filename
314
166
  save!
315
167
  self
316
- rescue ActiveRecord::RecordInvalid
317
- raise FileNotSaved
318
168
  end
319
169
 
320
170
  # Invoke +pdftotext+ on the file and return an +IO+ object for reading the
@@ -329,8 +179,6 @@ class PDF::Toolkit
329
179
  "#<#{self.class}:#{path}>"
330
180
  end
331
181
 
332
- # Enumerable/Hash methods {{{1
333
-
334
182
  # Create a hash from the file's metadata.
335
183
  def to_hash
336
184
  ensure_loaded
@@ -356,7 +204,6 @@ class PDF::Toolkit
356
204
  @info[key.to_s]
357
205
  end
358
206
 
359
-
360
207
  # Write a metadata attribute.
361
208
  #
362
209
  # my_pdf["Author"] = author
@@ -423,27 +270,8 @@ class PDF::Toolkit
423
270
  self
424
271
  end
425
272
 
426
- # }}}1
427
-
428
273
  protected
429
274
 
430
- =begin
431
- def method_missing(method,*args)
432
- args_needed = method.to_s.last == "=" ? 1 : 0
433
- if args.length != args_needed
434
- raise ArgumentError,
435
- "wrong number of arguments (#{args.length} for #{args_needed})"
436
- end
437
- ensure_loaded
438
- attribute = lookup_key(method.to_s.chomp("=").to_sym)
439
- if method.to_s.last == "="
440
- self[attribute] = args.first
441
- else
442
- self[attribute]
443
- end
444
- end
445
- =end
446
-
447
275
  def read_attribute(key)
448
276
  self[key]
449
277
  end
@@ -460,7 +288,7 @@ class PDF::Toolkit
460
288
  end
461
289
 
462
290
  def lookup_key(key)
463
- return self.class.read_inheritable_attribute(:info_accessors)[key]
291
+ return self.class.info_accessors[key]
464
292
  end
465
293
 
466
294
  def call_pdftk_on_file(*args,&block)
@@ -471,48 +299,8 @@ class PDF::Toolkit
471
299
  self.class.send(:pdftk,*args,&block)
472
300
  end
473
301
 
474
- def cast_field(field)
475
- case field
476
- when /^D:(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)([-+].*)?$/
477
- parse_time(field)
478
- when /^\d+$/
479
- field.to_i
480
- else
481
- field
482
- end
483
- end
484
-
485
- def parse_time(string)
486
- if string =~ /^D:(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)([-+].*)?$/
487
- date = $~[1..6].map {|n|n.to_i}
488
- tz = $7
489
- time = Time.utc(*date)
490
- tz_match = tz.match(/^([+-])(\d{1,2})(?:'(\d\d)')?$/) if tz
491
- if tz_match
492
- direction, hours, minutes = tz_match[1..3]
493
- offset = (hours.to_i*60+minutes.to_i)*60
494
- # Go the *opposite* direction
495
- time += (offset == "+" ? -offset : offset)
496
- end
497
- time.getlocal
498
- else
499
- string
500
- end
501
- end
502
-
503
- def format_field(field)
504
- format_time(field)
505
- end
506
-
507
- def format_time(time)
508
- if time.kind_of?(Time)
509
- string = ("D:%04d"+"%02d"*5) % time.to_a[0..5].reverse
510
- string += (time.utc_offset < 0 ? "-" : "+")
511
- string += "%02d'%02d'" % [time.utc_offset.abs/3600,(time.utc_offset.abs/60)%60]
512
- else
513
- time
514
- end
515
- end
302
+ require 'pdf/toolkit/coercions'
303
+ include Coercions
516
304
 
517
305
  def read_data
518
306
  last = nil
@@ -615,21 +403,10 @@ class PDF::Toolkit
615
403
  def create_or_update #:nodoc:
616
404
  run_callbacks_for(:before_save)
617
405
  result = new_record? ? create : update
618
- if result
619
- # run_callbacks_for(:after_save)
620
- end
406
+ # run_callbacks_for(:after_save) if result
621
407
  result
622
408
  end
623
409
 
624
- def respond_to_without_attributes?(method)
625
- respond_to?(method)
626
- end
627
-
628
- def destroy
629
- raise NoMethodError, "stub method `destroy' called for #{self}:#{self.class}"
630
- # File.unlink(@filename); self.freeze!
631
- end
632
-
633
410
  def cleanup
634
411
  if @info
635
412
  # Create a new hash on purpose