pdf_split_bookmark-pdftk 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/README.md +17 -0
  2. data/lib/pdf_split_bookmark-pdftk.rb +64 -0
  3. metadata +95 -0
@@ -0,0 +1,17 @@
1
+ pdf_split_bookmark-pdftk
2
+ ========================
3
+
4
+ This utility reads bookmarks using PDFTK, then splits based on a specified bookmark level.
5
+
6
+ You will need the pdftk binaries from Sid Steward (http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/).
7
+
8
+ Caveat: Bookmarks should start a new page.
9
+
10
+ Usage:
11
+
12
+ pdf = PDF.new('foo.pdf')
13
+ pdf.split_by_bookmark_at_level(2)
14
+
15
+ Optionally specify an output directory:
16
+
17
+ pdf.split_by_bookmark_at_level(2, '/dev/null')
@@ -0,0 +1,64 @@
1
+ require 'crowd_support'
2
+
3
+ class Bookmark
4
+ attr_accessor :level, :title, :page, :pdf
5
+ end
6
+
7
+ class PDF
8
+ attr_reader :filename, :bookmarks
9
+
10
+ def initialize(filename)
11
+ @filename = filename
12
+ @bookmarks = []
13
+ end
14
+
15
+ def length
16
+ if @length.nil?
17
+ @length = pdftk_data.select { |d| d =~ /NumberOfPages: [0-9]+/}.first.gsub('NumberOfPages: ', '').to_i
18
+ end
19
+ @length
20
+ end
21
+
22
+ def split_by_bookmark_at_level(level, result_directory = Dir.pwd)
23
+ splitmarks = bookmarks_at_level(level)
24
+ startpages = splitmarks.collect { |bookmark| bookmark.page }
25
+ endpage = length
26
+ result_filenames = []
27
+ splitmarks.reverse.each do |splitmark|
28
+ startpage = splitmark.page
29
+ result_filename = File.join(result_directory, splitmark.title.sanitize_filename)
30
+ command = "pdftk A=\"#{filename}\" cat A#{startpage}-#{endpage} output \"#{result_filename}.pdf\" dont_ask"
31
+ `#{command}`
32
+ endpage = startpage - 1
33
+ result_filenames << result_filename
34
+ end
35
+ result_filenames
36
+ end
37
+
38
+ def bookmarks_at_level(level)
39
+ bookmarks.reject { |bookmark| bookmark.level != level }
40
+ end
41
+
42
+ def bookmarks
43
+ if @bookmarks.empty?
44
+ o = pdftk_data.reject { |i| i.scan(/^Bookmark/).empty? }
45
+ o.each_slice(3) do |bmark|
46
+ b = Bookmark.new
47
+ b.title = bmark[0].gsub('BookmarkTitle: ', '')
48
+ b.level = bmark[1].gsub('BookmarkLevel: ', '').to_i
49
+ b.page = bmark[2].gsub('BookmarkPageNumber: ', '').to_i
50
+ b.pdf = self
51
+ @bookmarks << b
52
+ end
53
+ @bookmarks.sort { |f, b| f.page <=> b.page }
54
+ end
55
+ @bookmarks
56
+ end
57
+
58
+ def pdftk_data
59
+ if @pdftk_data.nil?
60
+ @pdftk_data = `pdftk "#{@filename}" dump_data`.split("\n")
61
+ end
62
+ @pdftk_data
63
+ end
64
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdf_split_bookmark-pdftk
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Aaron Breckenridge
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-22 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: crowd_support
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Use pdftk to split pdfs on bookmark level. Limited functionality, see
63
+ README.md for more info.
64
+ email: aaronbreckenridge@gmail.com
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - README.md
70
+ - lib/pdf_split_bookmark-pdftk.rb
71
+ homepage: https://github.com/breckenedge/pdf_split_bookmark-pdftk
72
+ licenses: []
73
+ post_install_message:
74
+ rdoc_options: []
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: 1.9.2
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ! '>='
87
+ - !ruby/object:Gem::Version
88
+ version: 1.3.6
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 1.8.24
92
+ signing_key:
93
+ specification_version: 3
94
+ summary: split pdfs on bookmarks
95
+ test_files: []