pdf_split_bookmark-pdftk 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/README.md +17 -0
  2. data/lib/pdf_split_bookmark-pdftk.rb +64 -0
  3. metadata +95 -0
@@ -0,0 +1,17 @@
1
+ pdf_split_bookmark-pdftk
2
+ ========================
3
+
4
+ This utility reads bookmarks using PDFTK, then splits based on a specified bookmark level.
5
+
6
+ You will need the pdftk binaries from Sid Steward (http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/).
7
+
8
+ Caveat: Bookmarks should start a new page.
9
+
10
+ Usage:
11
+
12
+ pdf = PDF.new('foo.pdf')
13
+ pdf.split_by_bookmark_at_level(2)
14
+
15
+ Optionally specify an output directory:
16
+
17
+ pdf.split_by_bookmark_at_level(2, '/dev/null')
@@ -0,0 +1,64 @@
1
+ require 'crowd_support'
2
+
3
+ class Bookmark
4
+ attr_accessor :level, :title, :page, :pdf
5
+ end
6
+
7
+ class PDF
8
+ attr_reader :filename, :bookmarks
9
+
10
+ def initialize(filename)
11
+ @filename = filename
12
+ @bookmarks = []
13
+ end
14
+
15
+ def length
16
+ if @length.nil?
17
+ @length = pdftk_data.select { |d| d =~ /NumberOfPages: [0-9]+/}.first.gsub('NumberOfPages: ', '').to_i
18
+ end
19
+ @length
20
+ end
21
+
22
+ def split_by_bookmark_at_level(level, result_directory = Dir.pwd)
23
+ splitmarks = bookmarks_at_level(level)
24
+ startpages = splitmarks.collect { |bookmark| bookmark.page }
25
+ endpage = length
26
+ result_filenames = []
27
+ splitmarks.reverse.each do |splitmark|
28
+ startpage = splitmark.page
29
+ result_filename = File.join(result_directory, splitmark.title.sanitize_filename)
30
+ command = "pdftk A=\"#{filename}\" cat A#{startpage}-#{endpage} output \"#{result_filename}.pdf\" dont_ask"
31
+ `#{command}`
32
+ endpage = startpage - 1
33
+ result_filenames << result_filename
34
+ end
35
+ result_filenames
36
+ end
37
+
38
+ def bookmarks_at_level(level)
39
+ bookmarks.reject { |bookmark| bookmark.level != level }
40
+ end
41
+
42
+ def bookmarks
43
+ if @bookmarks.empty?
44
+ o = pdftk_data.reject { |i| i.scan(/^Bookmark/).empty? }
45
+ o.each_slice(3) do |bmark|
46
+ b = Bookmark.new
47
+ b.title = bmark[0].gsub('BookmarkTitle: ', '')
48
+ b.level = bmark[1].gsub('BookmarkLevel: ', '').to_i
49
+ b.page = bmark[2].gsub('BookmarkPageNumber: ', '').to_i
50
+ b.pdf = self
51
+ @bookmarks << b
52
+ end
53
+ @bookmarks.sort { |f, b| f.page <=> b.page }
54
+ end
55
+ @bookmarks
56
+ end
57
+
58
+ def pdftk_data
59
+ if @pdftk_data.nil?
60
+ @pdftk_data = `pdftk "#{@filename}" dump_data`.split("\n")
61
+ end
62
+ @pdftk_data
63
+ end
64
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdf_split_bookmark-pdftk
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Aaron Breckenridge
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-22 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: crowd_support
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Use pdftk to split pdfs on bookmark level. Limited functionality, see
63
+ README.md for more info.
64
+ email: aaronbreckenridge@gmail.com
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - README.md
70
+ - lib/pdf_split_bookmark-pdftk.rb
71
+ homepage: https://github.com/breckenedge/pdf_split_bookmark-pdftk
72
+ licenses: []
73
+ post_install_message:
74
+ rdoc_options: []
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: 1.9.2
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ! '>='
87
+ - !ruby/object:Gem::Version
88
+ version: 1.3.6
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 1.8.24
92
+ signing_key:
93
+ specification_version: 3
94
+ summary: split pdfs on bookmarks
95
+ test_files: []