pdf-reader-markup 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pdf-reader-htmlpage.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Liz Conlan
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,54 @@
1
+ # Pdf::Reader::Markup
2
+
3
+ A markup extension for the PDF::Reader library.
4
+
5
+ As well as continuing to support fetching a collection of lines for an
6
+ individual page in a PDF file, this adds the method formatted_lines
7
+ which uses HTML-style tags to mark up bold and italic text.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'pdf-reader-markup'
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install pdf-reader-markup
22
+
23
+ ## Usage
24
+
25
+ Require the gem in the source file that contains the PDF-handling code:
26
+
27
+ require 'pdf/reader/markup'
28
+
29
+ You should now be able to use the custom MarkupPage handler to get back
30
+ matching plaintext and formatted lines for each page:
31
+
32
+ pdf = PDF::Reader.new("./spec/sample docs/Dorian_Gray_excerpt.pdf")
33
+ page = PDF::Reader::MarkupPage.new(pdf.pages[1])
34
+
35
+ # slightly modified version of the lines() method
36
+ lines_of_plaintext = page.lines()
37
+
38
+ #the new formatted_line() method
39
+ lines_with_markup = page.formatted_lines()
40
+
41
+ # and not forgetting content() which will return the all the lines as
42
+ # a solid block of text
43
+ entire_page_text = page.content()
44
+
45
+ # and its formatted equivalent markup
46
+ entired_page_markup = page.markup()
47
+
48
+ Note that you can still access the original PDF::Reader methods within the
49
+ same project by using `PDF::Reader::PageTextReceiver` and walking the page,
50
+ giving access to the standard content and lines as functionality.
51
+
52
+ You can also, if you prefer, use the
53
+ `Reader::MarkupPage::PageBoldItalicReceiver` receiver directly rather than
54
+ using the PDF::Reader::MarkupPage wrapper.
@@ -0,0 +1,30 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rspec/core/rake_task'
4
+ require 'rdoc/task'
5
+
6
+ desc "Run tests with SimpleCov"
7
+ task :spec do |t|
8
+ RSpec::Core::RakeTask.new(:cov) do |t|
9
+ ENV["COVERAGE"] = "1"
10
+ end
11
+ end
12
+
13
+ RSpec::Core::RakeTask.new(:spec)
14
+ task :default => :spec
15
+
16
+ desc "Alias for 'rake spec'"
17
+ task :test => [:spec]
18
+
19
+ # Generate the RDoc documentation
20
+ desc "Create documentation"
21
+ Rake::RDocTask.new("doc") do |rdoc|
22
+ rdoc.title = "pdf-reader-markup"
23
+ rdoc.rdoc_dir = "doc"
24
+ rdoc.rdoc_files.include('README.md')
25
+ rdoc.main = 'README.md'
26
+ rdoc.rdoc_files.include('LICENSE.txt')
27
+ rdoc.rdoc_files.include('lib/pdf/reader/*.rb')
28
+ rdoc.rdoc_files.include('lib/pdf/reader/markup/*.rb')
29
+ rdoc.options << "--main"
30
+ end
@@ -0,0 +1,39 @@
1
+ #encoding: utf-8
2
+ require "pdf/reader/markup/version"
3
+ require "pdf/reader/markup/page_bold_italic_receiver.rb"
4
+
5
+ module PDF #:nodoc:all:
6
+ class Reader #:nodoc:
7
+ class MarkupPage #:doc:
8
+ ##
9
+ # Returns the plaintext content of the page
10
+ attr_reader :content
11
+
12
+ ##
13
+ # Returns the formatted lines for the page
14
+ # as an array
15
+ attr_reader :formatted_lines
16
+
17
+ ##
18
+ # Returns the plaintext lines for the page
19
+ # as an array
20
+ attr_reader :lines
21
+
22
+ ##
23
+ # Returns the formatted content of the page
24
+ attr_reader :markup
25
+
26
+ ##
27
+ # Wrapper function for walking the page with the
28
+ # Reader::MarkupPage::PageBoldItalicReceiver receiver
29
+ def initialize(page)
30
+ receiver = PageBoldItalicReceiver.new()
31
+ page.walk(receiver)
32
+ @content = receiver.content
33
+ @markup = receiver.markup
34
+ @lines = @content.lines.to_a
35
+ @formatted_lines = @markup.lines.to_a
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,167 @@
1
+ #encoding: utf-8
2
+
3
+ require "pdf/reader"
4
+ require "nokogiri"
5
+
6
+ module PDF #:nodoc:all:
7
+ class Reader #:nodoc:
8
+ class MarkupPage #:doc:
9
+ ##
10
+ # Builds a UTF-8 plaintext string and a UTF-8 string that includes
11
+ # simple Bold and Italic markup of all the text on a single page by
12
+ # processing all the operators in a content stream.
13
+ class PageBoldItalicReceiver < PDF::Reader::PageTextReceiver
14
+ ##
15
+ # starting a new page
16
+ def page=(page)
17
+ super(page)
18
+ @last_tag_end = ""
19
+ @open_tag = ""
20
+ @lasty = 0.0
21
+ @footer = []
22
+ @text = []
23
+ @lines = []
24
+ end
25
+
26
+ ##
27
+ # Returns the value of the markup attribute - equivalent to the
28
+ # content attribute but with bold and italic markup
29
+ def markup
30
+ unless @text.empty?
31
+ line = fix_markup("#{@text.join("").strip}#{@last_tag_end}")
32
+ @lines << line
33
+ @text = []
34
+ end
35
+ if @footer.join("").strip.empty?
36
+ if @lines.last.empty?
37
+ output = @lines[0..-2].join("\n")
38
+ else
39
+ output = @lines.join("\n")
40
+ end
41
+ else
42
+ output = %Q|#{@lines.join("\n")}\n#{@footer.join("")}|
43
+ end
44
+ output
45
+ end
46
+
47
+ ##
48
+ # Returns the value of the content attribute
49
+ def content
50
+ lines = super.lines.to_a
51
+ fixed = []
52
+ current_line = 0
53
+ offset = 0
54
+ formatted_lines = markup.lines.to_a
55
+ lines.each_with_index do |line, index|
56
+ formatted_line = formatted_lines[index + offset]
57
+ if line.strip == "" and (formatted_line and formatted_lines[index + offset].strip != "")
58
+ offset -= 1
59
+ else
60
+ fixed << line
61
+ end
62
+ end
63
+ lines = fixed.join("")
64
+ lines
65
+ end
66
+
67
+
68
+ private
69
+
70
+ def fix_markup(string)
71
+ #get Nokogiri to close any open tags
72
+ string = Nokogiri::HTML::fragment(string).to_html
73
+
74
+ #strip empty markup tags
75
+ while string =~ /<(?:b|i)>\s*<\/(?:b|i)>/
76
+ string = string.gsub(/<(?:b|i)>\s*<\/(?:b|i)>/, "").strip
77
+ end
78
+ string
79
+ end
80
+
81
+ def font_type(font, type)
82
+ if font.basefont.to_s.include?(type)
83
+ return true
84
+ end
85
+ false
86
+ end
87
+
88
+ def markup_tags(font)
89
+ open = ""
90
+ close = ""
91
+ if font_type(@state.current_font, "Bold")
92
+ open = "<b>"
93
+ close = "</b>"
94
+ end
95
+ if font_type(@state.current_font, "Italic")
96
+ open = "#{open}<i>"
97
+ close = "</i>#{close}"
98
+ end
99
+ {:open => open, :close => close}
100
+ end
101
+
102
+ def append_line(tags, run)
103
+ line = fix_markup("#{@text.join("").strip}#{@last_tag_end}")
104
+ unless @lines.empty? and line.strip.empty?
105
+ @lines << line
106
+ end
107
+ @last_tag_end = ""
108
+ @text = ["#{tags[:open]}#{run.to_s}"]
109
+ end
110
+
111
+ def internal_show_text(string)
112
+ if @state.current_font.nil?
113
+ raise PDF::Reader::MalformedPDFError, "current font is invalid"
114
+ end
115
+ glyphs = @state.current_font.unpack(string)
116
+ text = ""
117
+ glyphs.each_with_index do |glyph_code, index|
118
+ # paint the current glyph
119
+ newx, newy = @state.trm_transform(0,0)
120
+ utf8_chars = @state.current_font.to_utf8(glyph_code)
121
+
122
+ # apply to glyph displacment for the current glyph so the next
123
+ # glyph will appear in the correct position
124
+ glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0
125
+ th = 1
126
+ scaled_glyph_width = glyph_width * @state.font_size * th
127
+ run = TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars)
128
+ @characters << run
129
+ @state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE)
130
+
131
+ build_markup(newy, run)
132
+ end
133
+ end
134
+
135
+ def build_markup(newy, run)
136
+ tags = markup_tags(@state.current_font)
137
+ if tags[:open] == @open_tag
138
+ if newy < 50
139
+ @footer << run.to_s
140
+ newy = @lasty
141
+ else
142
+ if newy < @lasty
143
+ append_line(tags, run)
144
+ else
145
+ @text << "#{run.to_s}"
146
+ end
147
+ end
148
+ else
149
+ if newy < 50
150
+ @footer << "#{@last_tag_end}#{run.to_s}"
151
+ newy = @lasty
152
+ else
153
+ if newy < @lasty
154
+ append_line(tags, run)
155
+ else
156
+ @text << "#{@last_tag_end}#{tags[:open]}#{run.to_s}"
157
+ end
158
+ end
159
+ @last_tag_end = tags[:close]
160
+ end
161
+ @open_tag = tags[:open]
162
+ @lasty = newy
163
+ end
164
+ end
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,8 @@
1
+ #:stopdoc:
2
+ module PDF
3
+ class Reader
4
+ class MarkupPage
5
+ VERSION = "0.0.1"
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pdf/reader/markup/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pdf-reader-markup"
8
+ spec.version = PDF::Reader::MarkupPage::VERSION
9
+ spec.authors = ["Liz Conlan"]
10
+ spec.email = ["lizconlan@gmail.com"]
11
+ spec.description = %q{A markup extension for the PDF::Reader library}
12
+ spec.summary = %q{Adds the option to retrieve text lines marked up with bold and italic tags when parsing PDF pages with PDF::Reader}
13
+ spec.homepage = "https://github.com/lizconlan/pdf-reader-markup"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "pdf-reader", "~> 1.3"
22
+ spec.add_dependency "nokogiri"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "simplecov"
28
+ end
@@ -0,0 +1,101 @@
1
+ #encoding: utf-8
2
+ require_relative "../rspec_helper.rb"
3
+
4
+ require './lib/pdf/reader/markup.rb'
5
+
6
+ describe "MarkupPage" do
7
+ context "when given an excerpt from The Canterville Ghost" do
8
+ before(:all) do
9
+ @pdf = PDF::Reader.new("./spec/sample docs/canterville-ghost-excerpt.pdf")
10
+ end
11
+
12
+ context "reading the first page" do
13
+ before(:all) do
14
+ @pdf_page = PDF::Reader::MarkupPage.new(@pdf.pages[0])
15
+ end
16
+
17
+ it "should find 8 lines of text" do
18
+ @pdf_page.lines.count.should eq 8
19
+ @pdf_page.formatted_lines.count.should eq 8
20
+ end
21
+
22
+ it "should find a mixture of plain and italic text" do
23
+ @pdf_page.formatted_lines[0].should eq "The Canterville Ghost\n"
24
+ @pdf_page.formatted_lines[1].should eq "<i>An amusing chronicle of the tribulations of the Ghost of Canterville Chase when his ancestral halls became the</i>\n"
25
+ @pdf_page.formatted_lines[2].should eq "<i>home of the American Minister to the Court of St. James</i>\n"
26
+ end
27
+ end
28
+
29
+ context "reading the second page" do
30
+ before(:all) do
31
+ @pdf_page = PDF::Reader::MarkupPage.new(@pdf.pages[1])
32
+ end
33
+
34
+ it "should find 16 lines of text" do
35
+ @pdf_page.lines.count.should eq 16
36
+ @pdf_page.formatted_lines.count.should eq 16
37
+ end
38
+
39
+ it "should correctly relocate the footer to the end of the text block" do
40
+ @pdf_page.lines.last.should eq "The Pennsylvania State University is an equal opportunity university."
41
+ end
42
+ end
43
+ end
44
+
45
+ context "when given an excerpt from The Picture of Dorian Gray" do
46
+ before(:all) do
47
+ @pdf = PDF::Reader.new("./spec/sample docs/Dorian_Gray_excerpt.pdf")
48
+ end
49
+
50
+ context "reading the second page" do
51
+ before(:all) do
52
+ @pdf_page = PDF::Reader::MarkupPage.new(@pdf.pages[1])
53
+ end
54
+
55
+ it "should find 24 lines of text" do
56
+ @pdf_page.lines.count.should eq 24
57
+ @pdf_page.formatted_lines.count.should eq 24
58
+ end
59
+
60
+ it "should find a mixture of plain, bold and italic text" do
61
+ @pdf_page.formatted_lines[0].should eq "<i>The Picture of Dorian Gray</i>\n"
62
+ @pdf_page.formatted_lines[1].should eq "<b>Chapter I</b>\n"
63
+ @pdf_page.formatted_lines[2].should eq "The studio was filled with the rich odor of roses, and\n"
64
+ @pdf_page.formatted_lines[3].should eq "when the light summer wind stirred amidst the trees of the\n"
65
+ @pdf_page.formatted_lines[4].should eq "garden there came through the open door the heavy scent\n"
66
+ @pdf_page.formatted_lines[5].should eq "of the lilac, or the more delicate perfume of the pink-\n"
67
+ @pdf_page.formatted_lines[6].should eq "flowering thorn.\n"
68
+ end
69
+ end
70
+ end
71
+
72
+ context "when given a House of Lords Forthcoming Business document" do
73
+ before(:all) do
74
+ @pdf = PDF::Reader.new("./spec/sample docs/Lords-Forthcoming-Business.pdf")
75
+ end
76
+
77
+ context "reading the first page" do
78
+ before(:all) do
79
+ @pdf_page = PDF::Reader::MarkupPage.new(@pdf.pages[0])
80
+ end
81
+
82
+ it "should find 32 lines of text" do
83
+ @pdf_page.lines.count.should eq 32
84
+ @pdf_page.formatted_lines.count.should eq 32
85
+ end
86
+
87
+ it "should find a mixture of plain, bold and italic text" do
88
+ @pdf_page.lines[0].should eq " GOVERNMENT WHIPS’ OFFICE\n"
89
+ @pdf_page.formatted_lines[0].should eq "GOVERNMENT WHIPS’ OFFICE\n"
90
+
91
+ @pdf_page.lines[3].should eq " FORTHCOMING BUSINESS\n"
92
+ @pdf_page.formatted_lines[3].should eq "<b>FORTHCOMING BUSINESS</b>\n"
93
+
94
+ @pdf_page.lines[6].should eq " [Notes about this document are set out at the end]\n"
95
+ @pdf_page.formatted_lines[6].should eq "[<i>Notes about this document are set out at the end</i>]\n"
96
+
97
+ @pdf_page.formatted_lines[29].should eq "<b><i>Easter adjournment:</i></b>\n"
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,8 @@
1
+ if ENV['COVERAGE']
2
+ require 'simplecov'
3
+ SimpleCov.start do
4
+ add_filter 'spec'
5
+ end
6
+ end
7
+
8
+ require 'rspec/autorun'
metadata ADDED
@@ -0,0 +1,162 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdf-reader-markup
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Liz Conlan
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-11-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: pdf-reader
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: nokogiri
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: bundler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '1.3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.3'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rspec
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: simplecov
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ description: A markup extension for the PDF::Reader library
111
+ email:
112
+ - lizconlan@gmail.com
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files: []
116
+ files:
117
+ - .gitignore
118
+ - Gemfile
119
+ - LICENSE.txt
120
+ - README.md
121
+ - Rakefile
122
+ - lib/pdf/reader/markup.rb
123
+ - lib/pdf/reader/markup/page_bold_italic_receiver.rb
124
+ - lib/pdf/reader/markup/version.rb
125
+ - pdf-reader-markup.gemspec
126
+ - spec/lib/markup_spec.rb
127
+ - spec/rspec_helper.rb
128
+ - spec/sample docs/Dorian_Gray_excerpt.pdf
129
+ - spec/sample docs/Lords-Forthcoming-Business.pdf
130
+ - spec/sample docs/canterville-ghost-excerpt.pdf
131
+ homepage: https://github.com/lizconlan/pdf-reader-markup
132
+ licenses:
133
+ - MIT
134
+ post_install_message:
135
+ rdoc_options: []
136
+ require_paths:
137
+ - lib
138
+ required_ruby_version: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ! '>='
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ required_rubygems_version: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ! '>='
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
150
+ requirements: []
151
+ rubyforge_project:
152
+ rubygems_version: 1.8.24
153
+ signing_key:
154
+ specification_version: 3
155
+ summary: Adds the option to retrieve text lines marked up with bold and italic tags
156
+ when parsing PDF pages with PDF::Reader
157
+ test_files:
158
+ - spec/lib/markup_spec.rb
159
+ - spec/rspec_helper.rb
160
+ - spec/sample docs/Dorian_Gray_excerpt.pdf
161
+ - spec/sample docs/Lords-Forthcoming-Business.pdf
162
+ - spec/sample docs/canterville-ghost-excerpt.pdf