pdf_split_bookmark-pdftk 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +17 -0
- data/lib/pdf_split_bookmark-pdftk.rb +64 -0
- metadata +95 -0
data/README.md
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
pdf_split_bookmark-pdftk
|
2
|
+
========================
|
3
|
+
|
4
|
+
This utility reads bookmarks using PDFTK, then splits based on a specified bookmark level.
|
5
|
+
|
6
|
+
You will need the pdftk binaries from Sid Steward (http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/).
|
7
|
+
|
8
|
+
Caveat: Bookmarks should start a new page.
|
9
|
+
|
10
|
+
Usage:
|
11
|
+
|
12
|
+
pdf = PDF.new('foo.pdf')
|
13
|
+
pdf.split_by_bookmark_at_level(2)
|
14
|
+
|
15
|
+
Optionally specify an output directory:
|
16
|
+
|
17
|
+
pdf.split_by_bookmark_at_level(2, '/dev/null')
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'crowd_support'
|
2
|
+
|
3
|
+
class Bookmark
|
4
|
+
attr_accessor :level, :title, :page, :pdf
|
5
|
+
end
|
6
|
+
|
7
|
+
class PDF
|
8
|
+
attr_reader :filename, :bookmarks
|
9
|
+
|
10
|
+
def initialize(filename)
|
11
|
+
@filename = filename
|
12
|
+
@bookmarks = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def length
|
16
|
+
if @length.nil?
|
17
|
+
@length = pdftk_data.select { |d| d =~ /NumberOfPages: [0-9]+/}.first.gsub('NumberOfPages: ', '').to_i
|
18
|
+
end
|
19
|
+
@length
|
20
|
+
end
|
21
|
+
|
22
|
+
def split_by_bookmark_at_level(level, result_directory = Dir.pwd)
|
23
|
+
splitmarks = bookmarks_at_level(level)
|
24
|
+
startpages = splitmarks.collect { |bookmark| bookmark.page }
|
25
|
+
endpage = length
|
26
|
+
result_filenames = []
|
27
|
+
splitmarks.reverse.each do |splitmark|
|
28
|
+
startpage = splitmark.page
|
29
|
+
result_filename = File.join(result_directory, splitmark.title.sanitize_filename)
|
30
|
+
command = "pdftk A=\"#{filename}\" cat A#{startpage}-#{endpage} output \"#{result_filename}.pdf\" dont_ask"
|
31
|
+
`#{command}`
|
32
|
+
endpage = startpage - 1
|
33
|
+
result_filenames << result_filename
|
34
|
+
end
|
35
|
+
result_filenames
|
36
|
+
end
|
37
|
+
|
38
|
+
def bookmarks_at_level(level)
|
39
|
+
bookmarks.reject { |bookmark| bookmark.level != level }
|
40
|
+
end
|
41
|
+
|
42
|
+
def bookmarks
|
43
|
+
if @bookmarks.empty?
|
44
|
+
o = pdftk_data.reject { |i| i.scan(/^Bookmark/).empty? }
|
45
|
+
o.each_slice(3) do |bmark|
|
46
|
+
b = Bookmark.new
|
47
|
+
b.title = bmark[0].gsub('BookmarkTitle: ', '')
|
48
|
+
b.level = bmark[1].gsub('BookmarkLevel: ', '').to_i
|
49
|
+
b.page = bmark[2].gsub('BookmarkPageNumber: ', '').to_i
|
50
|
+
b.pdf = self
|
51
|
+
@bookmarks << b
|
52
|
+
end
|
53
|
+
@bookmarks.sort { |f, b| f.page <=> b.page }
|
54
|
+
end
|
55
|
+
@bookmarks
|
56
|
+
end
|
57
|
+
|
58
|
+
def pdftk_data
|
59
|
+
if @pdftk_data.nil?
|
60
|
+
@pdftk_data = `pdftk "#{@filename}" dump_data`.split("\n")
|
61
|
+
end
|
62
|
+
@pdftk_data
|
63
|
+
end
|
64
|
+
end
|
metadata
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pdf_split_bookmark-pdftk
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Aaron Breckenridge
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-08-22 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: crowd_support
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rspec
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
description: Use pdftk to split pdfs on bookmark level. Limited functionality, see
|
63
|
+
README.md for more info.
|
64
|
+
email: aaronbreckenridge@gmail.com
|
65
|
+
executables: []
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- README.md
|
70
|
+
- lib/pdf_split_bookmark-pdftk.rb
|
71
|
+
homepage: https://github.com/breckenedge/pdf_split_bookmark-pdftk
|
72
|
+
licenses: []
|
73
|
+
post_install_message:
|
74
|
+
rdoc_options: []
|
75
|
+
require_paths:
|
76
|
+
- lib
|
77
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.9.2
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
85
|
+
requirements:
|
86
|
+
- - ! '>='
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 1.3.6
|
89
|
+
requirements: []
|
90
|
+
rubyforge_project:
|
91
|
+
rubygems_version: 1.8.24
|
92
|
+
signing_key:
|
93
|
+
specification_version: 3
|
94
|
+
summary: split pdfs on bookmarks
|
95
|
+
test_files: []
|