pdf_split_bookmark-pdftk 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +17 -0
- data/lib/pdf_split_bookmark-pdftk.rb +64 -0
- metadata +95 -0
data/README.md
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
pdf_split_bookmark-pdftk
|
2
|
+
========================
|
3
|
+
|
4
|
+
This utility reads bookmarks using PDFTK, then splits based on a specified bookmark level.
|
5
|
+
|
6
|
+
You will need the pdftk binaries from Sid Steward (http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/).
|
7
|
+
|
8
|
+
Caveat: Bookmarks should start a new page.
|
9
|
+
|
10
|
+
Usage:
|
11
|
+
|
12
|
+
pdf = PDF.new('foo.pdf')
|
13
|
+
pdf.split_by_bookmark_at_level(2)
|
14
|
+
|
15
|
+
Optionally specify an output directory:
|
16
|
+
|
17
|
+
pdf.split_by_bookmark_at_level(2, '/dev/null')
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'crowd_support'
|
2
|
+
|
3
|
+
class Bookmark
|
4
|
+
attr_accessor :level, :title, :page, :pdf
|
5
|
+
end
|
6
|
+
|
7
|
+
class PDF
|
8
|
+
attr_reader :filename, :bookmarks
|
9
|
+
|
10
|
+
def initialize(filename)
|
11
|
+
@filename = filename
|
12
|
+
@bookmarks = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def length
|
16
|
+
if @length.nil?
|
17
|
+
@length = pdftk_data.select { |d| d =~ /NumberOfPages: [0-9]+/}.first.gsub('NumberOfPages: ', '').to_i
|
18
|
+
end
|
19
|
+
@length
|
20
|
+
end
|
21
|
+
|
22
|
+
def split_by_bookmark_at_level(level, result_directory = Dir.pwd)
|
23
|
+
splitmarks = bookmarks_at_level(level)
|
24
|
+
startpages = splitmarks.collect { |bookmark| bookmark.page }
|
25
|
+
endpage = length
|
26
|
+
result_filenames = []
|
27
|
+
splitmarks.reverse.each do |splitmark|
|
28
|
+
startpage = splitmark.page
|
29
|
+
result_filename = File.join(result_directory, splitmark.title.sanitize_filename)
|
30
|
+
command = "pdftk A=\"#{filename}\" cat A#{startpage}-#{endpage} output \"#{result_filename}.pdf\" dont_ask"
|
31
|
+
`#{command}`
|
32
|
+
endpage = startpage - 1
|
33
|
+
result_filenames << result_filename
|
34
|
+
end
|
35
|
+
result_filenames
|
36
|
+
end
|
37
|
+
|
38
|
+
def bookmarks_at_level(level)
|
39
|
+
bookmarks.reject { |bookmark| bookmark.level != level }
|
40
|
+
end
|
41
|
+
|
42
|
+
def bookmarks
|
43
|
+
if @bookmarks.empty?
|
44
|
+
o = pdftk_data.reject { |i| i.scan(/^Bookmark/).empty? }
|
45
|
+
o.each_slice(3) do |bmark|
|
46
|
+
b = Bookmark.new
|
47
|
+
b.title = bmark[0].gsub('BookmarkTitle: ', '')
|
48
|
+
b.level = bmark[1].gsub('BookmarkLevel: ', '').to_i
|
49
|
+
b.page = bmark[2].gsub('BookmarkPageNumber: ', '').to_i
|
50
|
+
b.pdf = self
|
51
|
+
@bookmarks << b
|
52
|
+
end
|
53
|
+
@bookmarks.sort { |f, b| f.page <=> b.page }
|
54
|
+
end
|
55
|
+
@bookmarks
|
56
|
+
end
|
57
|
+
|
58
|
+
def pdftk_data
|
59
|
+
if @pdftk_data.nil?
|
60
|
+
@pdftk_data = `pdftk "#{@filename}" dump_data`.split("\n")
|
61
|
+
end
|
62
|
+
@pdftk_data
|
63
|
+
end
|
64
|
+
end
|
metadata
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pdf_split_bookmark-pdftk
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Aaron Breckenridge
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-08-22 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: crowd_support
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rspec
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
description: Use pdftk to split pdfs on bookmark level. Limited functionality, see
|
63
|
+
README.md for more info.
|
64
|
+
email: aaronbreckenridge@gmail.com
|
65
|
+
executables: []
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- README.md
|
70
|
+
- lib/pdf_split_bookmark-pdftk.rb
|
71
|
+
homepage: https://github.com/breckenedge/pdf_split_bookmark-pdftk
|
72
|
+
licenses: []
|
73
|
+
post_install_message:
|
74
|
+
rdoc_options: []
|
75
|
+
require_paths:
|
76
|
+
- lib
|
77
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.9.2
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
85
|
+
requirements:
|
86
|
+
- - ! '>='
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 1.3.6
|
89
|
+
requirements: []
|
90
|
+
rubyforge_project:
|
91
|
+
rubygems_version: 1.8.24
|
92
|
+
signing_key:
|
93
|
+
specification_version: 3
|
94
|
+
summary: split pdfs on bookmarks
|
95
|
+
test_files: []
|