jekyll_pages_api 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/bin/jekyll_pages_api +49 -0
- data/lib/jekyll_pages_api.rb +5 -0
- data/lib/jekyll_pages_api/generated_page.rb +34 -0
- data/lib/jekyll_pages_api/generated_page_parser.rb +121 -0
- data/lib/jekyll_pages_api/generated_site.rb +46 -0
- data/lib/jekyll_pages_api/version.rb +1 -1
- data/spec/generated_page_parser_spec.rb +181 -0
- data/spec/generated_page_spec.rb +86 -0
- data/spec/generated_site_spec.rb +58 -0
- metadata +14 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63259d15c6f30bdddfb3a67d17462c200f006a35
|
4
|
+
data.tar.gz: ce08cd1149ab684423f129329412ed8a7593dec8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6aa6643cbd4053ba3344fd26d0201c8a0887a74c0a754affb65287361e6248415aeac19b0b292a3d91a7f6560830744c7e79669dab37ad91d963342ca1073643
|
7
|
+
data.tar.gz: 150a483db15cf7692f3d8356ff7075cd67ddd4f8efe03f9baa61fb8c8477cd96179a703172c0517a3b19dd3a1ad275afeed59d8a2ddb7aa5d44856fedbe28624
|
data/README.md
CHANGED
@@ -44,6 +44,10 @@ This endpoint will be re-generated any time your site is rebuilt.
|
|
44
44
|
|
45
45
|
The [Jekyll Pages API Search plugin](https://github.com/18F/jekyll_pages_api_search) uses this plugin to build a search index. Add `skip_index: true` to the front matter of any documents you wish to exclude from this index.
|
46
46
|
|
47
|
+
### Running standalone
|
48
|
+
|
49
|
+
If you wish to generate a `pages.json` file when using a site generation tool other than Jekyll, you can run the `jekyll_pages_api` executable as a post-generation step. Run `jekyll_pages_api -h` for instructions.
|
50
|
+
|
47
51
|
## Developing
|
48
52
|
|
49
53
|
* Run `bundle` to install any necessary gems.
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
# Author: Mike Bland <michael.bland@gsa.gov>
|
3
|
+
# Date: 2015-06-21
|
4
|
+
|
5
|
+
require_relative '../lib/jekyll_pages_api'
|
6
|
+
|
7
|
+
USAGE=<<END_USAGE
|
8
|
+
#{$0}: generate Jekyll Pages API output from a pregenerated site
|
9
|
+
|
10
|
+
Usage:
|
11
|
+
#{$0} baseurl basedir title_prefix body_element_tag > pages.json
|
12
|
+
#{$0} -h
|
13
|
+
|
14
|
+
Arguments:
|
15
|
+
-h
|
16
|
+
Print this help message
|
17
|
+
baseurl
|
18
|
+
URL prefix of every page of the generated site
|
19
|
+
basedir
|
20
|
+
Path to the generated site's root directory
|
21
|
+
title_prefix
|
22
|
+
Prefix to strip from page titles
|
23
|
+
body_element_tag
|
24
|
+
Tag (or tag prefix) identifying the main content element within the <body>
|
25
|
+
element of each document. Can be a complete tag (ending in '>'), or the
|
26
|
+
prefix of a longer tag. Used to strip boilerplate out of the content
|
27
|
+
exported via the API.
|
28
|
+
END_USAGE
|
29
|
+
|
30
|
+
if ARGV.length == 1 && ARGV[0] == '-h'
|
31
|
+
puts USAGE
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
|
35
|
+
if ARGV.length != 4
|
36
|
+
$stderr.puts USAGE
|
37
|
+
exit 1
|
38
|
+
end
|
39
|
+
|
40
|
+
baseurl, basedir, title_prefix, body_element_tag = ARGV
|
41
|
+
unless Dir.exist?(basedir)
|
42
|
+
$stderr.puts "#{basedir} does not exist"
|
43
|
+
exit 1
|
44
|
+
end
|
45
|
+
|
46
|
+
generator = ::JekyllPagesApi::Generator.new(
|
47
|
+
::JekyllPagesApi::GeneratedSite.new(
|
48
|
+
baseurl, basedir, title_prefix, body_element_tag))
|
49
|
+
puts generator.page.output
|
data/lib/jekyll_pages_api.rb
CHANGED
@@ -1,2 +1,7 @@
|
|
1
|
+
require 'jekyll_pages_api/filters'
|
2
|
+
require 'jekyll_pages_api/generated_site'
|
3
|
+
require 'jekyll_pages_api/generator'
|
4
|
+
require 'jekyll_pages_api/page'
|
5
|
+
require 'jekyll_pages_api/page_without_a_file'
|
1
6
|
require 'jekyll_pages_api/version'
|
2
7
|
require_relative 'jekyll/site'
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# @author Mike Bland (michael.bland@gsa.gov)
|
2
|
+
|
3
|
+
require_relative 'generated_page_parser'
|
4
|
+
|
5
|
+
module JekyllPagesApi
|
6
|
+
# Used by GeneratedSite to mimic a Jekyll page object when processing an
|
7
|
+
# already-generated site using the Generator.
|
8
|
+
class GeneratedPage
|
9
|
+
attr_reader :path, :relative_path, :data, :content
|
10
|
+
|
11
|
+
# @param path [String] full path to the generated page's file
|
12
|
+
# @param basedir see {GeneratedSite#initialize}
|
13
|
+
# @param title_prefix see {GeneratedSite#initialize}
|
14
|
+
# @param body_element_tag see {GeneratedSite#initialize}
|
15
|
+
# @param content [String] HTML content of the generated page's file
|
16
|
+
# @raises [RuntimError] if path does not begin with basedir
|
17
|
+
def initialize(path, basedir, title_prefix, body_element_tag, content)
|
18
|
+
unless path.start_with? basedir
|
19
|
+
raise "#{path} does not start with #{basedir}"
|
20
|
+
end
|
21
|
+
|
22
|
+
@path = path
|
23
|
+
basedir_len = basedir.size
|
24
|
+
basedir_len -= File::SEPARATOR.size if basedir.end_with? File::SEPARATOR
|
25
|
+
|
26
|
+
end_path = path.size
|
27
|
+
index_suffix = File.join "", "index.html"
|
28
|
+
end_path -= index_suffix.size if path.end_with? index_suffix
|
29
|
+
@relative_path = (path[basedir_len..end_path] || "")
|
30
|
+
@data, @content = GeneratedPageParser.parse_generated_page(
|
31
|
+
content, title_prefix, body_element_tag)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# @author Mike Bland (michael.bland@gsa.gov)
|
2
|
+
|
3
|
+
module JekyllPagesApi
|
4
|
+
# Contains helper methods for parsing values from HTML.
|
5
|
+
class GeneratedPageParser
|
6
|
+
# Parses elements from a generated page's content needed by GeneratedPage.
|
7
|
+
# @param content see {GeneratedPage#initialize}
|
8
|
+
# @param title_prefix see {GeneratedSite#initialize}
|
9
|
+
# @param body_element_tag see {GeneratedSite#initialize}
|
10
|
+
# @return [Hash<String, String>, String] the metadata hash containing the
|
11
|
+
# `title`, `tags`, and `skip-index` elements; and the body content
|
12
|
+
# stripped of boilerplate
|
13
|
+
def self.parse_generated_page(content, title_prefix, body_element_tag)
|
14
|
+
data = {}
|
15
|
+
head_element = self.parse_basic_tag 'head', content
|
16
|
+
return data, "" if head_element.nil?
|
17
|
+
|
18
|
+
title = self.parse_basic_tag 'title', head_element
|
19
|
+
if !title.nil? && title.start_with?(title_prefix)
|
20
|
+
title = title[title_prefix.size..title.size]
|
21
|
+
end
|
22
|
+
data['title'] = title
|
23
|
+
data.merge!(self.parse_meta_tags head_element)
|
24
|
+
return data, self.parse_content_from_body(content, body_element_tag)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Parses a value from content from between tags that cannot be nested,
|
28
|
+
# e.g. <head>, <body>, <title>.
|
29
|
+
# @param tag_name [String] name of the tag to parse
|
30
|
+
# @param content [String] HTML content from which to parse a value
|
31
|
+
# @return [String] if a value is successfully parsed
|
32
|
+
# @return [nil] if the tag isn't present in content, or is not well-formed
|
33
|
+
def self.parse_basic_tag(tag_name, content)
|
34
|
+
open_tag = "<#{tag_name}"
|
35
|
+
close_tag = "</#{tag_name}>"
|
36
|
+
open_i = content.index open_tag
|
37
|
+
return nil if open_i.nil?
|
38
|
+
open_i = content.index('>', open_i + open_tag.size) + 1
|
39
|
+
close_i = content.index close_tag, open_i
|
40
|
+
return nil if close_i.nil?
|
41
|
+
content[open_i..close_i-1]
|
42
|
+
end
|
43
|
+
|
44
|
+
# Parses the (name, content) pairs from <meta> tags in the <head> element.
|
45
|
+
# Note that it parses _only_ the `name` and `content` fields.
|
46
|
+
# @param head_element [String] <head> element from an HTML document
|
47
|
+
# @return [Hash<String, String>] a collection of (name, content) values
|
48
|
+
def self.parse_meta_tags(head_element)
|
49
|
+
open_tag = "<meta "
|
50
|
+
open_i = head_element.index open_tag
|
51
|
+
meta_tags = {}
|
52
|
+
|
53
|
+
until open_i.nil? do
|
54
|
+
# -1 to remove the space at the end.
|
55
|
+
open_i += open_tag.size - 1
|
56
|
+
close_i = head_element.index '>', open_i
|
57
|
+
return meta_tags if close_i.nil?
|
58
|
+
|
59
|
+
current = head_element[open_i..close_i]
|
60
|
+
attrs = {'name' => nil, 'content' => nil}
|
61
|
+
|
62
|
+
attrs.keys.each do |attr|
|
63
|
+
attr_begin = " #{attr}="
|
64
|
+
attr_begin_i = current.index attr_begin
|
65
|
+
unless attr_begin_i.nil?
|
66
|
+
attr_begin_i += attr_begin.size + 1
|
67
|
+
delim = current[attr_begin_i-1]
|
68
|
+
attr_end_i = current.index delim, attr_begin_i
|
69
|
+
next if attr_end_i.nil?
|
70
|
+
attr_end_i -= 1
|
71
|
+
attrs[attr] = current[attr_begin_i..attr_end_i]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
meta_name = attrs['name']
|
75
|
+
meta_tags[meta_name] = attrs['content'] unless meta_name.nil?
|
76
|
+
close_i += 1
|
77
|
+
open_i = head_element.index open_tag, close_i
|
78
|
+
end
|
79
|
+
meta_tags
|
80
|
+
end
|
81
|
+
|
82
|
+
# Parse actual content from an HTML page, leaving out boilerplate.
|
83
|
+
# @param content [String] content of an HTML document
|
84
|
+
# @param body_element_tag see {GeneratedSite#initialize}
|
85
|
+
def self.parse_content_from_body(content, body_element_tag)
|
86
|
+
body = parse_basic_tag 'body', content
|
87
|
+
return content if body.nil?
|
88
|
+
start_body = body.index body_element_tag unless body_element_tag.empty?
|
89
|
+
return body if start_body.nil?
|
90
|
+
|
91
|
+
start_body += 1
|
92
|
+
end_name_i = body.index ' ', start_body
|
93
|
+
bracket_i = body.index '>', start_body
|
94
|
+
end_name_i = bracket_i if bracket_i < end_name_i
|
95
|
+
tag_name = body[start_body..end_name_i-1]
|
96
|
+
open_tag = "<#{tag_name}"
|
97
|
+
end_tag = "</#{tag_name}>"
|
98
|
+
|
99
|
+
start_body = bracket_i + 1
|
100
|
+
search_i = start_body
|
101
|
+
open_tag_i = body.index open_tag, search_i
|
102
|
+
end_tag_i = body.index end_tag, search_i
|
103
|
+
depth = 1
|
104
|
+
until depth == 0
|
105
|
+
if end_tag_i.nil?
|
106
|
+
raise "End tag missing: #{end_tag}"
|
107
|
+
end
|
108
|
+
if !open_tag_i.nil? && open_tag_i < end_tag_i
|
109
|
+
depth += 1
|
110
|
+
search_i = open_tag_i + open_tag.size
|
111
|
+
open_tag_i = body.index open_tag, search_i
|
112
|
+
else
|
113
|
+
depth -= 1
|
114
|
+
search_i = end_tag_i + end_tag.size
|
115
|
+
end_tag_i = body.index end_tag, search_i unless depth == 0
|
116
|
+
end
|
117
|
+
end
|
118
|
+
body[start_body..end_tag_i-1]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# @author Mike Bland (michael.bland@gsa.gov)
|
2
|
+
|
3
|
+
require_relative 'generated_page'
|
4
|
+
|
5
|
+
module JekyllPagesApi
|
6
|
+
# Used by the standalone executable to mimic a Jekyll::Site when processing
|
7
|
+
# an already-generated site using the Generator.
|
8
|
+
class GeneratedSite
|
9
|
+
# @see #initialize
|
10
|
+
attr_reader :baseurl, :basedir, :title_prefix, :body_element_tag
|
11
|
+
|
12
|
+
# @return [Array<>] a dummy empty Array
|
13
|
+
attr_accessor :pages
|
14
|
+
|
15
|
+
# @param baseurl [String] URL prefix of every page of the generated site
|
16
|
+
# @param basedir [String] Path to the generated site's root directory
|
17
|
+
# @param title_prefix [String] Prefix to strip from page titles
|
18
|
+
# @param body_element_tag [String] Tag (or tag prefix) identifying the
|
19
|
+
# main content element within the <body> element of each document. Can
|
20
|
+
# be a complete tag (ending in '>'), or the prefix of a longer tag. Used
|
21
|
+
# to strip boilerplate out of the content exported via the API.
|
22
|
+
def initialize(baseurl, basedir, title_prefix, body_element_tag)
|
23
|
+
@baseurl = baseurl
|
24
|
+
@basedir = basedir
|
25
|
+
@title_prefix = title_prefix
|
26
|
+
@body_element_tag = body_element_tag
|
27
|
+
@pages = []
|
28
|
+
end
|
29
|
+
|
30
|
+
# Generator yielding each HTML page (as a {GeneratedPage}) that should be
|
31
|
+
# exported via the API.
|
32
|
+
def each_site_file
|
33
|
+
Dir.glob(File.join(self.basedir, '**', '*')) do |f|
|
34
|
+
next unless f.end_with? '.html'
|
35
|
+
begin
|
36
|
+
page = GeneratedPage.new(f, self.basedir, self.title_prefix,
|
37
|
+
self.body_element_tag, File.read(f))
|
38
|
+
yield page unless page.data['title'].nil?
|
39
|
+
rescue
|
40
|
+
$stderr.puts "Error while processing #{f}:"
|
41
|
+
raise
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
module JekyllPagesApi
|
2
|
+
describe GeneratedPageParser do
|
3
|
+
describe '#parse_basic_tag' do
|
4
|
+
it "returns nil if content is empty" do
|
5
|
+
expect(GeneratedPageParser.parse_basic_tag('', '')).to eq(nil)
|
6
|
+
end
|
7
|
+
|
8
|
+
it "returns nil if the tag is not present" do
|
9
|
+
expect(GeneratedPageParser.parse_basic_tag(
|
10
|
+
'title', 'foobar')).to eq(nil)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "returns nil if the tag is not closed" do
|
14
|
+
expect(GeneratedPageParser.parse_basic_tag(
|
15
|
+
'title', '<title>foobar')).to eq(nil)
|
16
|
+
end
|
17
|
+
|
18
|
+
it "returns the content of the tag" do
|
19
|
+
expect(GeneratedPageParser.parse_basic_tag(
|
20
|
+
'title', '<title>foobar</title>')).to eq('foobar')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
describe '#parse_meta_tags' do
|
25
|
+
it "returns an empty hash if the head_element is empty" do
|
26
|
+
expect(GeneratedPageParser.parse_meta_tags('')).to eq({})
|
27
|
+
end
|
28
|
+
|
29
|
+
it "returns an empty hash if a meta tag isn't closed properly" do
|
30
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
31
|
+
'<meta name="foo" content="bar"')).to eq({})
|
32
|
+
end
|
33
|
+
|
34
|
+
it "returns an empty hash if the only meta tag lacks a name" do
|
35
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
36
|
+
'<meta not_a_name="foo" content="bar">')).to eq({})
|
37
|
+
end
|
38
|
+
|
39
|
+
it "returns a valid hash for a well-formed meta tag" do
|
40
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
41
|
+
'<meta name="foo" content="bar">')).to eq("foo" => "bar")
|
42
|
+
end
|
43
|
+
|
44
|
+
it "returns a valid hash for a self-closing meta tag" do
|
45
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
46
|
+
'<meta name="foo" content="bar"/>')).to eq("foo" => "bar")
|
47
|
+
end
|
48
|
+
|
49
|
+
it "returns a valid hash for a meta tag with single-quote delimiters" do
|
50
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
51
|
+
"<meta name='foo' content='bar'/>")).to eq("foo" => "bar")
|
52
|
+
end
|
53
|
+
|
54
|
+
it "returns a valid hash for a meta tag with mixed-quote delimiters" do
|
55
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
56
|
+
"<meta name='foo' content=\"bar\"/>")).to eq("foo" => "bar")
|
57
|
+
end
|
58
|
+
|
59
|
+
it "returns a valid hash regardless of attribute order" do
|
60
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
61
|
+
'<meta content="bar" name="foo" />')).to eq("foo" => "bar")
|
62
|
+
end
|
63
|
+
|
64
|
+
it "returns a valid hash for multiple meta tags" do
|
65
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
66
|
+
'<meta name="foo" content="bar"/>'+
|
67
|
+
"<meta name='baz' content='quux' other=\"don't care\" >" +
|
68
|
+
'<meta content="plugh" name="xyzzy" />')
|
69
|
+
).to eq("foo" => "bar", "baz" => "quux", "xyzzy" => "plugh")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe '#parse_content_from_body' do
|
74
|
+
it "returns the empty string if passed all empty strings" do
|
75
|
+
expect(GeneratedPageParser.parse_content_from_body("", "")).to eq("")
|
76
|
+
end
|
77
|
+
|
78
|
+
it "returns the original content if there are no body tags" do
|
79
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
80
|
+
"foobar", "")).to eq("foobar")
|
81
|
+
end
|
82
|
+
|
83
|
+
it "returns the original content if the body tag isn't closed" do
|
84
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
85
|
+
"<body>foobar", "")).to eq("<body>foobar")
|
86
|
+
end
|
87
|
+
|
88
|
+
it "returns the full body content if the body_element_tag is empty" do
|
89
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
90
|
+
"<body>header<div class='content'>foobar</div>footer</body>", "")
|
91
|
+
).to eq("header<div class='content'>foobar</div>footer")
|
92
|
+
end
|
93
|
+
|
94
|
+
it "returns only the body content within the body_element_tag" do
|
95
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
96
|
+
"<body><div>header</div>"+
|
97
|
+
"<div class='content'>foobar</div>"+
|
98
|
+
"<div>footer</div></body>",
|
99
|
+
"<div class='content'>")).to eq("foobar")
|
100
|
+
end
|
101
|
+
|
102
|
+
it "returns only the body content when body_element_tag is a prefix" do
|
103
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
104
|
+
"<body><div>header</div>"+
|
105
|
+
"<div class='content' plus='blah blah woof woof'>foobar</div>"+
|
106
|
+
"<div>footer</div></body>",
|
107
|
+
"<div class='content'")).to eq("foobar")
|
108
|
+
end
|
109
|
+
|
110
|
+
it "handles nested divs within the body content" do
|
111
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
112
|
+
"<body><div>header</div>"+
|
113
|
+
"<div class='content' plus='blah blah woof woof'>"+
|
114
|
+
"blah blah"+
|
115
|
+
"<div>plus<div>some</div><div>nested</div>divs</div>"+
|
116
|
+
"woof woof"+
|
117
|
+
"</div>"+
|
118
|
+
"<div>footer</div></body>",
|
119
|
+
"<div class='content'")
|
120
|
+
).to eq(
|
121
|
+
"blah blah"+
|
122
|
+
"<div>plus<div>some</div><div>nested</div>divs</div>"+
|
123
|
+
"woof woof"
|
124
|
+
)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe '#parse_generated_page' do
|
129
|
+
it "returns empty values when passed all empty strings" do
|
130
|
+
data, content = GeneratedPageParser.parse_generated_page "", "", ""
|
131
|
+
expect(data).to eq({})
|
132
|
+
expect(content).to eq("")
|
133
|
+
end
|
134
|
+
|
135
|
+
it "returns empty values when the head element isn't present" do
|
136
|
+
data, content = GeneratedPageParser.parse_generated_page(
|
137
|
+
"<body>foobar</body>", "", "")
|
138
|
+
expect(data).to eq({})
|
139
|
+
expect(content).to eq("")
|
140
|
+
end
|
141
|
+
|
142
|
+
it "returns a nil title and all body content" do
|
143
|
+
data, content = GeneratedPageParser.parse_generated_page(
|
144
|
+
"<head></head>"+
|
145
|
+
"<body><div>header</div>"+
|
146
|
+
"<div class='content'>foobar</div>"+
|
147
|
+
"<div>footer</div></body>", "", "")
|
148
|
+
expect(data).to eq({"title" => nil})
|
149
|
+
expect(content).to eq(
|
150
|
+
"<div>header</div><div class='content'>foobar</div><div>footer</div>")
|
151
|
+
end
|
152
|
+
|
153
|
+
it "returns the title and only body content within body_element_tag" do
|
154
|
+
data, content = GeneratedPageParser.parse_generated_page(
|
155
|
+
"<head><title>Blah Blah Woof Woof</title></head>"+
|
156
|
+
"<body><div>header</div>"+
|
157
|
+
"<div class='content'>foobar</div>"+
|
158
|
+
"<div>footer</div></body>", "", "<div class='content'")
|
159
|
+
expect(data).to eq({"title" => "Blah Blah Woof Woof"})
|
160
|
+
expect(content).to eq("foobar")
|
161
|
+
end
|
162
|
+
|
163
|
+
it "returns the stripped title body content and metadata" do
|
164
|
+
data, content = GeneratedPageParser.parse_generated_page(
|
165
|
+
"<head><title>18F — Blah Blah Woof Woof</title>"+
|
166
|
+
"<meta name='skip-index' content='true'>"+
|
167
|
+
"<meta content='baz,quux,xyzzy,plugh' name=\"tags\" />"+
|
168
|
+
"</head>"+
|
169
|
+
"<body><div>header</div>"+
|
170
|
+
"<div class='content'>foobar</div>"+
|
171
|
+
"<div>footer</div></body>", "18F — ", "<div class='content'")
|
172
|
+
expect(data).to eq(
|
173
|
+
"title" => "Blah Blah Woof Woof",
|
174
|
+
"skip-index" => "true",
|
175
|
+
"tags" => "baz,quux,xyzzy,plugh",
|
176
|
+
)
|
177
|
+
expect(content).to eq("foobar")
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module JekyllPagesApi
|
2
|
+
describe GeneratedPage do
|
3
|
+
describe '#initialize' do
|
4
|
+
it "handles all empty strings correctly" do
|
5
|
+
page = GeneratedPage.new "", "", "", "", ""
|
6
|
+
expect(page.path).to eq("")
|
7
|
+
expect(page.relative_path).to eq("")
|
8
|
+
expect(page.data).to eq({})
|
9
|
+
expect(page.content).to eq("")
|
10
|
+
end
|
11
|
+
|
12
|
+
it "parses the relative path when basedir ends in SEPARATOR" do
|
13
|
+
path = File.join "foo", "bar", "baz.html"
|
14
|
+
basedir = File.join "foo", ""
|
15
|
+
page = GeneratedPage.new path, basedir, "", "", ""
|
16
|
+
expect(page.path).to eq("foo/bar/baz.html")
|
17
|
+
expect(page.relative_path).to eq("/bar/baz.html")
|
18
|
+
expect(page.data).to eq({})
|
19
|
+
expect(page.content).to eq("")
|
20
|
+
end
|
21
|
+
|
22
|
+
it "parses the relative path when basedir doesn't end in SEPARATOR" do
|
23
|
+
path = File.join "foo", "bar", "baz.html"
|
24
|
+
basedir = "foo"
|
25
|
+
page = GeneratedPage.new path, basedir, "", "", ""
|
26
|
+
expect(page.path).to eq("foo/bar/baz.html")
|
27
|
+
expect(page.relative_path).to eq("/bar/baz.html")
|
28
|
+
expect(page.data).to eq({})
|
29
|
+
expect(page.content).to eq("")
|
30
|
+
end
|
31
|
+
|
32
|
+
it "raises RuntimeError when path does not begin with basedir" do
|
33
|
+
path = File.join "foo", "bar", "baz.html"
|
34
|
+
basedir = File.join "quux", ""
|
35
|
+
expect{GeneratedPage.new path, basedir, "", "", ""
|
36
|
+
}.to raise_error(
|
37
|
+
RuntimeError, "#{path} does not start with #{basedir}")
|
38
|
+
end
|
39
|
+
|
40
|
+
it "parses out index.html suffix and leaves trailing slash" do
|
41
|
+
path = File.join "foo", "bar", "index.html"
|
42
|
+
basedir = File.join "foo", ""
|
43
|
+
page = GeneratedPage.new path, basedir, "", "", ""
|
44
|
+
expect(page.path).to eq("foo/bar/index.html")
|
45
|
+
expect(page.relative_path).to eq("/bar/")
|
46
|
+
expect(page.data).to eq({})
|
47
|
+
expect(page.content).to eq("")
|
48
|
+
end
|
49
|
+
|
50
|
+
it "parses out index.html suffix and leaves trailing slash for root" do
|
51
|
+
path = File.join "foo", "index.html"
|
52
|
+
basedir = File.join "foo", ""
|
53
|
+
page = GeneratedPage.new path, basedir, "", "", ""
|
54
|
+
expect(page.path).to eq("foo/index.html")
|
55
|
+
expect(page.relative_path).to eq("/")
|
56
|
+
expect(page.data).to eq({})
|
57
|
+
expect(page.content).to eq("")
|
58
|
+
end
|
59
|
+
|
60
|
+
it "parses content correctly" do
|
61
|
+
path = File.join "foo", "bar", "index.html"
|
62
|
+
basedir = File.join "foo", ""
|
63
|
+
title_prefix = "18F — "
|
64
|
+
body_element_tag = "<div class='content'"
|
65
|
+
content = "<head><title>18F — Blah Blah Woof Woof</title>"+
|
66
|
+
"<meta name='skip-index' content='true'>"+
|
67
|
+
"<meta content='baz,quux,xyzzy,plugh' name=\"tags\" />"+
|
68
|
+
"</head>"+
|
69
|
+
"<body><div>header</div>"+
|
70
|
+
"<div class='content'>foobar</div>"+
|
71
|
+
"<div>footer</div></body>"
|
72
|
+
|
73
|
+
page = GeneratedPage.new(path, basedir, title_prefix,
|
74
|
+
body_element_tag, content)
|
75
|
+
expect(page.path).to eq("foo/bar/index.html")
|
76
|
+
expect(page.relative_path).to eq("/bar/")
|
77
|
+
expect(page.data).to eq(
|
78
|
+
"title" => "Blah Blah Woof Woof",
|
79
|
+
"skip-index" => "true",
|
80
|
+
"tags" => "baz,quux,xyzzy,plugh",
|
81
|
+
)
|
82
|
+
expect(page.content).to eq("foobar")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'tmpdir'
|
3
|
+
|
4
|
+
module JekyllPagesApi
|
5
|
+
describe GeneratedPage do
|
6
|
+
describe '#each_site_file' do
|
7
|
+
before :each do
|
8
|
+
@test_tmpdir = Dir.mktmpdir
|
9
|
+
end
|
10
|
+
|
11
|
+
after :each do
|
12
|
+
FileUtils.remove_entry @test_tmpdir
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should only select .html files containing a title" do
|
16
|
+
basedir = File.join @test_tmpdir, "foo"
|
17
|
+
FileUtils.mkdir_p basedir
|
18
|
+
|
19
|
+
content_dir = File.join(basedir, "bar")
|
20
|
+
FileUtils.mkdir_p content_dir
|
21
|
+
|
22
|
+
File.open(File.join(content_dir, "baz.txt"), "w") do |f|
|
23
|
+
f << "Text file that should be excluded."
|
24
|
+
end
|
25
|
+
|
26
|
+
File.open(File.join(content_dir, "quux.html"), "w") do |f|
|
27
|
+
f << "<html><head><title>18F — Include me!</title></head>"
|
28
|
+
f << "<body><div>header</div>"
|
29
|
+
f << "<div class='content'>This page should be included.</div>"
|
30
|
+
f << "<div>footer</div></body></html>"
|
31
|
+
end
|
32
|
+
|
33
|
+
File.open(File.join(content_dir, "xyzzy.html"), "w") do |f|
|
34
|
+
f << "<html><head></head><body>"
|
35
|
+
f << "This page shouldn't be included because it lacks a title."
|
36
|
+
f << "</body></html>"
|
37
|
+
end
|
38
|
+
|
39
|
+
paths = ['baz.txt', 'quux.html', 'xyzzy.html'].sort.map do |f|
|
40
|
+
File.join content_dir, f
|
41
|
+
end
|
42
|
+
expect(Dir.glob(File.join(content_dir, '**', '*'))).to eq(paths)
|
43
|
+
|
44
|
+
site = GeneratedSite.new("https://unused/", basedir,
|
45
|
+
"18F — ", "<div class='content'")
|
46
|
+
pages = []
|
47
|
+
site.each_site_file {|f| pages << f}
|
48
|
+
expect(pages.size).to eq(1)
|
49
|
+
|
50
|
+
page = pages.first
|
51
|
+
expect(page.path).to eq(File.join content_dir, 'quux.html')
|
52
|
+
expect(page.relative_path).to eq("/bar/quux.html")
|
53
|
+
expect(page.data).to eq("title" => "Include me!")
|
54
|
+
expect(page.content).to eq("This page should be included.")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll_pages_api
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aidan Feldman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -89,7 +89,8 @@ dependencies:
|
|
89
89
|
description:
|
90
90
|
email:
|
91
91
|
- aidan.feldman@gsa.gov
|
92
|
-
executables:
|
92
|
+
executables:
|
93
|
+
- jekyll_pages_api
|
93
94
|
extensions: []
|
94
95
|
extra_rdoc_files: []
|
95
96
|
files:
|
@@ -101,17 +102,24 @@ files:
|
|
101
102
|
- LICENSE.md
|
102
103
|
- README.md
|
103
104
|
- Rakefile
|
105
|
+
- bin/jekyll_pages_api
|
104
106
|
- gemfiles/jekyll_2.gemfile
|
105
107
|
- gemfiles/jekyll_3.gemfile
|
106
108
|
- jekyll_pages_api.gemspec
|
107
109
|
- lib/jekyll/site.rb
|
108
110
|
- lib/jekyll_pages_api.rb
|
109
111
|
- lib/jekyll_pages_api/filters.rb
|
112
|
+
- lib/jekyll_pages_api/generated_page.rb
|
113
|
+
- lib/jekyll_pages_api/generated_page_parser.rb
|
114
|
+
- lib/jekyll_pages_api/generated_site.rb
|
110
115
|
- lib/jekyll_pages_api/generator.rb
|
111
116
|
- lib/jekyll_pages_api/page.rb
|
112
117
|
- lib/jekyll_pages_api/page_without_a_file.rb
|
113
118
|
- lib/jekyll_pages_api/version.rb
|
114
119
|
- spec/filters_spec.rb
|
120
|
+
- spec/generated_page_parser_spec.rb
|
121
|
+
- spec/generated_page_spec.rb
|
122
|
+
- spec/generated_site_spec.rb
|
115
123
|
- spec/integration_spec.rb
|
116
124
|
- spec/page_spec.rb
|
117
125
|
- spec/site/.gitignore
|
@@ -163,6 +171,9 @@ summary: A Jekyll Plugin that generates a JSON file with data for all the Pages
|
|
163
171
|
your Site.
|
164
172
|
test_files:
|
165
173
|
- spec/filters_spec.rb
|
174
|
+
- spec/generated_page_parser_spec.rb
|
175
|
+
- spec/generated_page_spec.rb
|
176
|
+
- spec/generated_site_spec.rb
|
166
177
|
- spec/integration_spec.rb
|
167
178
|
- spec/page_spec.rb
|
168
179
|
- spec/site/.gitignore
|