jekyll_pages_api 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/bin/jekyll_pages_api +49 -0
- data/lib/jekyll_pages_api.rb +5 -0
- data/lib/jekyll_pages_api/generated_page.rb +34 -0
- data/lib/jekyll_pages_api/generated_page_parser.rb +121 -0
- data/lib/jekyll_pages_api/generated_site.rb +46 -0
- data/lib/jekyll_pages_api/version.rb +1 -1
- data/spec/generated_page_parser_spec.rb +181 -0
- data/spec/generated_page_spec.rb +86 -0
- data/spec/generated_site_spec.rb +58 -0
- metadata +14 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63259d15c6f30bdddfb3a67d17462c200f006a35
|
4
|
+
data.tar.gz: ce08cd1149ab684423f129329412ed8a7593dec8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6aa6643cbd4053ba3344fd26d0201c8a0887a74c0a754affb65287361e6248415aeac19b0b292a3d91a7f6560830744c7e79669dab37ad91d963342ca1073643
|
7
|
+
data.tar.gz: 150a483db15cf7692f3d8356ff7075cd67ddd4f8efe03f9baa61fb8c8477cd96179a703172c0517a3b19dd3a1ad275afeed59d8a2ddb7aa5d44856fedbe28624
|
data/README.md
CHANGED
@@ -44,6 +44,10 @@ This endpoint will be re-generated any time your site is rebuilt.
|
|
44
44
|
|
45
45
|
The [Jekyll Pages API Search plugin](https://github.com/18F/jekyll_pages_api_search) uses this plugin to build a search index. Add `skip_index: true` to the front matter of any documents you wish to exclude from this index.
|
46
46
|
|
47
|
+
### Running standalone
|
48
|
+
|
49
|
+
If you wish to generate a `pages.json` file when using a site generation tool other than Jekyll, you can run the `jekyll_pages_api` executable as a post-generation step. Run `jekyll_pages_api -h` for instructions.
|
50
|
+
|
47
51
|
## Developing
|
48
52
|
|
49
53
|
* Run `bundle` to install any necessary gems.
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
# Author: Mike Bland <michael.bland@gsa.gov>
|
3
|
+
# Date: 2015-06-21
|
4
|
+
|
5
|
+
require_relative '../lib/jekyll_pages_api'
|
6
|
+
|
7
|
+
USAGE=<<END_USAGE
|
8
|
+
#{$0}: generate Jekyll Pages API output from a pregenerated site
|
9
|
+
|
10
|
+
Usage:
|
11
|
+
#{$0} baseurl basedir title_prefix body_element_tag > pages.json
|
12
|
+
#{$0} -h
|
13
|
+
|
14
|
+
Arguments:
|
15
|
+
-h
|
16
|
+
Print this help message
|
17
|
+
baseurl
|
18
|
+
URL prefix of every page of the generated site
|
19
|
+
basedir
|
20
|
+
Path to the generated site's root directory
|
21
|
+
title_prefix
|
22
|
+
Prefix to strip from page titles
|
23
|
+
body_element_tag
|
24
|
+
Tag (or tag prefix) identifying the main content element within the <body>
|
25
|
+
element of each document. Can be a complete tag (ending in '>'), or the
|
26
|
+
prefix of a longer tag. Used to strip boilerplate out of the content
|
27
|
+
exported via the API.
|
28
|
+
END_USAGE
|
29
|
+
|
30
|
+
if ARGV.length == 1 && ARGV[0] == '-h'
|
31
|
+
puts USAGE
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
|
35
|
+
if ARGV.length != 4
|
36
|
+
$stderr.puts USAGE
|
37
|
+
exit 1
|
38
|
+
end
|
39
|
+
|
40
|
+
baseurl, basedir, title_prefix, body_element_tag = ARGV
|
41
|
+
unless Dir.exist?(basedir)
|
42
|
+
$stderr.puts "#{basedir} does not exist"
|
43
|
+
exit 1
|
44
|
+
end
|
45
|
+
|
46
|
+
generator = ::JekyllPagesApi::Generator.new(
|
47
|
+
::JekyllPagesApi::GeneratedSite.new(
|
48
|
+
baseurl, basedir, title_prefix, body_element_tag))
|
49
|
+
puts generator.page.output
|
data/lib/jekyll_pages_api.rb
CHANGED
@@ -1,2 +1,7 @@
|
|
1
|
+
require 'jekyll_pages_api/filters'
|
2
|
+
require 'jekyll_pages_api/generated_site'
|
3
|
+
require 'jekyll_pages_api/generator'
|
4
|
+
require 'jekyll_pages_api/page'
|
5
|
+
require 'jekyll_pages_api/page_without_a_file'
|
1
6
|
require 'jekyll_pages_api/version'
|
2
7
|
require_relative 'jekyll/site'
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# @author Mike Bland (michael.bland@gsa.gov)
|
2
|
+
|
3
|
+
require_relative 'generated_page_parser'
|
4
|
+
|
5
|
+
module JekyllPagesApi
|
6
|
+
# Used by GeneratedSite to mimic a Jekyll page object when processing an
|
7
|
+
# already-generated site using the Generator.
|
8
|
+
class GeneratedPage
|
9
|
+
attr_reader :path, :relative_path, :data, :content
|
10
|
+
|
11
|
+
# @param path [String] full path to the generated page's file
|
12
|
+
# @param basedir see {GeneratedSite#initialize}
|
13
|
+
# @param title_prefix see {GeneratedSite#initialize}
|
14
|
+
# @param body_element_tag see {GeneratedSite#initialize}
|
15
|
+
# @param content [String] HTML content of the generated page's file
|
16
|
+
# @raises [RuntimError] if path does not begin with basedir
|
17
|
+
def initialize(path, basedir, title_prefix, body_element_tag, content)
|
18
|
+
unless path.start_with? basedir
|
19
|
+
raise "#{path} does not start with #{basedir}"
|
20
|
+
end
|
21
|
+
|
22
|
+
@path = path
|
23
|
+
basedir_len = basedir.size
|
24
|
+
basedir_len -= File::SEPARATOR.size if basedir.end_with? File::SEPARATOR
|
25
|
+
|
26
|
+
end_path = path.size
|
27
|
+
index_suffix = File.join "", "index.html"
|
28
|
+
end_path -= index_suffix.size if path.end_with? index_suffix
|
29
|
+
@relative_path = (path[basedir_len..end_path] || "")
|
30
|
+
@data, @content = GeneratedPageParser.parse_generated_page(
|
31
|
+
content, title_prefix, body_element_tag)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# @author Mike Bland (michael.bland@gsa.gov)
|
2
|
+
|
3
|
+
module JekyllPagesApi
|
4
|
+
# Contains helper methods for parsing values from HTML.
|
5
|
+
class GeneratedPageParser
|
6
|
+
# Parses elements from a generated page's content needed by GeneratedPage.
|
7
|
+
# @param content see {GeneratedPage#initialize}
|
8
|
+
# @param title_prefix see {GeneratedSite#initialize}
|
9
|
+
# @param body_element_tag see {GeneratedSite#initialize}
|
10
|
+
# @return [Hash<String, String>, String] the metadata hash containing the
|
11
|
+
# `title`, `tags`, and `skip-index` elements; and the body content
|
12
|
+
# stripped of boilerplate
|
13
|
+
def self.parse_generated_page(content, title_prefix, body_element_tag)
|
14
|
+
data = {}
|
15
|
+
head_element = self.parse_basic_tag 'head', content
|
16
|
+
return data, "" if head_element.nil?
|
17
|
+
|
18
|
+
title = self.parse_basic_tag 'title', head_element
|
19
|
+
if !title.nil? && title.start_with?(title_prefix)
|
20
|
+
title = title[title_prefix.size..title.size]
|
21
|
+
end
|
22
|
+
data['title'] = title
|
23
|
+
data.merge!(self.parse_meta_tags head_element)
|
24
|
+
return data, self.parse_content_from_body(content, body_element_tag)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Parses a value from content from between tags that cannot be nested,
|
28
|
+
# e.g. <head>, <body>, <title>.
|
29
|
+
# @param tag_name [String] name of the tag to parse
|
30
|
+
# @param content [String] HTML content from which to parse a value
|
31
|
+
# @return [String] if a value is successfully parsed
|
32
|
+
# @return [nil] if the tag isn't present in content, or is not well-formed
|
33
|
+
def self.parse_basic_tag(tag_name, content)
|
34
|
+
open_tag = "<#{tag_name}"
|
35
|
+
close_tag = "</#{tag_name}>"
|
36
|
+
open_i = content.index open_tag
|
37
|
+
return nil if open_i.nil?
|
38
|
+
open_i = content.index('>', open_i + open_tag.size) + 1
|
39
|
+
close_i = content.index close_tag, open_i
|
40
|
+
return nil if close_i.nil?
|
41
|
+
content[open_i..close_i-1]
|
42
|
+
end
|
43
|
+
|
44
|
+
# Parses the (name, content) pairs from <meta> tags in the <head> element.
|
45
|
+
# Note that it parses _only_ the `name` and `content` fields.
|
46
|
+
# @param head_element [String] <head> element from an HTML document
|
47
|
+
# @return [Hash<String, String>] a collection of (name, content) values
|
48
|
+
def self.parse_meta_tags(head_element)
|
49
|
+
open_tag = "<meta "
|
50
|
+
open_i = head_element.index open_tag
|
51
|
+
meta_tags = {}
|
52
|
+
|
53
|
+
until open_i.nil? do
|
54
|
+
# -1 to remove the space at the end.
|
55
|
+
open_i += open_tag.size - 1
|
56
|
+
close_i = head_element.index '>', open_i
|
57
|
+
return meta_tags if close_i.nil?
|
58
|
+
|
59
|
+
current = head_element[open_i..close_i]
|
60
|
+
attrs = {'name' => nil, 'content' => nil}
|
61
|
+
|
62
|
+
attrs.keys.each do |attr|
|
63
|
+
attr_begin = " #{attr}="
|
64
|
+
attr_begin_i = current.index attr_begin
|
65
|
+
unless attr_begin_i.nil?
|
66
|
+
attr_begin_i += attr_begin.size + 1
|
67
|
+
delim = current[attr_begin_i-1]
|
68
|
+
attr_end_i = current.index delim, attr_begin_i
|
69
|
+
next if attr_end_i.nil?
|
70
|
+
attr_end_i -= 1
|
71
|
+
attrs[attr] = current[attr_begin_i..attr_end_i]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
meta_name = attrs['name']
|
75
|
+
meta_tags[meta_name] = attrs['content'] unless meta_name.nil?
|
76
|
+
close_i += 1
|
77
|
+
open_i = head_element.index open_tag, close_i
|
78
|
+
end
|
79
|
+
meta_tags
|
80
|
+
end
|
81
|
+
|
82
|
+
# Parse actual content from an HTML page, leaving out boilerplate.
|
83
|
+
# @param content [String] content of an HTML document
|
84
|
+
# @param body_element_tag see {GeneratedSite#initialize}
|
85
|
+
def self.parse_content_from_body(content, body_element_tag)
|
86
|
+
body = parse_basic_tag 'body', content
|
87
|
+
return content if body.nil?
|
88
|
+
start_body = body.index body_element_tag unless body_element_tag.empty?
|
89
|
+
return body if start_body.nil?
|
90
|
+
|
91
|
+
start_body += 1
|
92
|
+
end_name_i = body.index ' ', start_body
|
93
|
+
bracket_i = body.index '>', start_body
|
94
|
+
end_name_i = bracket_i if bracket_i < end_name_i
|
95
|
+
tag_name = body[start_body..end_name_i-1]
|
96
|
+
open_tag = "<#{tag_name}"
|
97
|
+
end_tag = "</#{tag_name}>"
|
98
|
+
|
99
|
+
start_body = bracket_i + 1
|
100
|
+
search_i = start_body
|
101
|
+
open_tag_i = body.index open_tag, search_i
|
102
|
+
end_tag_i = body.index end_tag, search_i
|
103
|
+
depth = 1
|
104
|
+
until depth == 0
|
105
|
+
if end_tag_i.nil?
|
106
|
+
raise "End tag missing: #{end_tag}"
|
107
|
+
end
|
108
|
+
if !open_tag_i.nil? && open_tag_i < end_tag_i
|
109
|
+
depth += 1
|
110
|
+
search_i = open_tag_i + open_tag.size
|
111
|
+
open_tag_i = body.index open_tag, search_i
|
112
|
+
else
|
113
|
+
depth -= 1
|
114
|
+
search_i = end_tag_i + end_tag.size
|
115
|
+
end_tag_i = body.index end_tag, search_i unless depth == 0
|
116
|
+
end
|
117
|
+
end
|
118
|
+
body[start_body..end_tag_i-1]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# @author Mike Bland (michael.bland@gsa.gov)
|
2
|
+
|
3
|
+
require_relative 'generated_page'
|
4
|
+
|
5
|
+
module JekyllPagesApi
|
6
|
+
# Used by the standalone executable to mimic a Jekyll::Site when processing
|
7
|
+
# an already-generated site using the Generator.
|
8
|
+
class GeneratedSite
|
9
|
+
# @see #initialize
|
10
|
+
attr_reader :baseurl, :basedir, :title_prefix, :body_element_tag
|
11
|
+
|
12
|
+
# @return [Array<>] a dummy empty Array
|
13
|
+
attr_accessor :pages
|
14
|
+
|
15
|
+
# @param baseurl [String] URL prefix of every page of the generated site
|
16
|
+
# @param basedir [String] Path to the generated site's root directory
|
17
|
+
# @param title_prefix [String] Prefix to strip from page titles
|
18
|
+
# @param body_element_tag [String] Tag (or tag prefix) identifying the
|
19
|
+
# main content element within the <body> element of each document. Can
|
20
|
+
# be a complete tag (ending in '>'), or the prefix of a longer tag. Used
|
21
|
+
# to strip boilerplate out of the content exported via the API.
|
22
|
+
def initialize(baseurl, basedir, title_prefix, body_element_tag)
|
23
|
+
@baseurl = baseurl
|
24
|
+
@basedir = basedir
|
25
|
+
@title_prefix = title_prefix
|
26
|
+
@body_element_tag = body_element_tag
|
27
|
+
@pages = []
|
28
|
+
end
|
29
|
+
|
30
|
+
# Generator yielding each HTML page (as a {GeneratedPage}) that should be
|
31
|
+
# exported via the API.
|
32
|
+
def each_site_file
|
33
|
+
Dir.glob(File.join(self.basedir, '**', '*')) do |f|
|
34
|
+
next unless f.end_with? '.html'
|
35
|
+
begin
|
36
|
+
page = GeneratedPage.new(f, self.basedir, self.title_prefix,
|
37
|
+
self.body_element_tag, File.read(f))
|
38
|
+
yield page unless page.data['title'].nil?
|
39
|
+
rescue
|
40
|
+
$stderr.puts "Error while processing #{f}:"
|
41
|
+
raise
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
module JekyllPagesApi
|
2
|
+
describe GeneratedPageParser do
|
3
|
+
describe '#parse_basic_tag' do
|
4
|
+
it "returns nil if content is empty" do
|
5
|
+
expect(GeneratedPageParser.parse_basic_tag('', '')).to eq(nil)
|
6
|
+
end
|
7
|
+
|
8
|
+
it "returns nil if the tag is not present" do
|
9
|
+
expect(GeneratedPageParser.parse_basic_tag(
|
10
|
+
'title', 'foobar')).to eq(nil)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "returns nil if the tag is not closed" do
|
14
|
+
expect(GeneratedPageParser.parse_basic_tag(
|
15
|
+
'title', '<title>foobar')).to eq(nil)
|
16
|
+
end
|
17
|
+
|
18
|
+
it "returns the content of the tag" do
|
19
|
+
expect(GeneratedPageParser.parse_basic_tag(
|
20
|
+
'title', '<title>foobar</title>')).to eq('foobar')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
describe '#parse_meta_tags' do
|
25
|
+
it "returns an empty hash if the head_element is empty" do
|
26
|
+
expect(GeneratedPageParser.parse_meta_tags('')).to eq({})
|
27
|
+
end
|
28
|
+
|
29
|
+
it "returns an empty hash if a meta tag isn't closed properly" do
|
30
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
31
|
+
'<meta name="foo" content="bar"')).to eq({})
|
32
|
+
end
|
33
|
+
|
34
|
+
it "returns an empty hash if the only meta tag lacks a name" do
|
35
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
36
|
+
'<meta not_a_name="foo" content="bar">')).to eq({})
|
37
|
+
end
|
38
|
+
|
39
|
+
it "returns a valid hash for a well-formed meta tag" do
|
40
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
41
|
+
'<meta name="foo" content="bar">')).to eq("foo" => "bar")
|
42
|
+
end
|
43
|
+
|
44
|
+
it "returns a valid hash for a self-closing meta tag" do
|
45
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
46
|
+
'<meta name="foo" content="bar"/>')).to eq("foo" => "bar")
|
47
|
+
end
|
48
|
+
|
49
|
+
it "returns a valid hash for a meta tag with single-quote delimiters" do
|
50
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
51
|
+
"<meta name='foo' content='bar'/>")).to eq("foo" => "bar")
|
52
|
+
end
|
53
|
+
|
54
|
+
it "returns a valid hash for a meta tag with mixed-quote delimiters" do
|
55
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
56
|
+
"<meta name='foo' content=\"bar\"/>")).to eq("foo" => "bar")
|
57
|
+
end
|
58
|
+
|
59
|
+
it "returns a valid hash regardless of attribute order" do
|
60
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
61
|
+
'<meta content="bar" name="foo" />')).to eq("foo" => "bar")
|
62
|
+
end
|
63
|
+
|
64
|
+
it "returns a valid hash for multiple meta tags" do
|
65
|
+
expect(GeneratedPageParser.parse_meta_tags(
|
66
|
+
'<meta name="foo" content="bar"/>'+
|
67
|
+
"<meta name='baz' content='quux' other=\"don't care\" >" +
|
68
|
+
'<meta content="plugh" name="xyzzy" />')
|
69
|
+
).to eq("foo" => "bar", "baz" => "quux", "xyzzy" => "plugh")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe '#parse_content_from_body' do
|
74
|
+
it "returns the empty string if passed all empty strings" do
|
75
|
+
expect(GeneratedPageParser.parse_content_from_body("", "")).to eq("")
|
76
|
+
end
|
77
|
+
|
78
|
+
it "returns the original content if there are no body tags" do
|
79
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
80
|
+
"foobar", "")).to eq("foobar")
|
81
|
+
end
|
82
|
+
|
83
|
+
it "returns the original content if the body tag isn't closed" do
|
84
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
85
|
+
"<body>foobar", "")).to eq("<body>foobar")
|
86
|
+
end
|
87
|
+
|
88
|
+
it "returns the full body content if the body_element_tag is empty" do
|
89
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
90
|
+
"<body>header<div class='content'>foobar</div>footer</body>", "")
|
91
|
+
).to eq("header<div class='content'>foobar</div>footer")
|
92
|
+
end
|
93
|
+
|
94
|
+
it "returns only the body content within the body_element_tag" do
|
95
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
96
|
+
"<body><div>header</div>"+
|
97
|
+
"<div class='content'>foobar</div>"+
|
98
|
+
"<div>footer</div></body>",
|
99
|
+
"<div class='content'>")).to eq("foobar")
|
100
|
+
end
|
101
|
+
|
102
|
+
it "returns only the body content when body_element_tag is a prefix" do
|
103
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
104
|
+
"<body><div>header</div>"+
|
105
|
+
"<div class='content' plus='blah blah woof woof'>foobar</div>"+
|
106
|
+
"<div>footer</div></body>",
|
107
|
+
"<div class='content'")).to eq("foobar")
|
108
|
+
end
|
109
|
+
|
110
|
+
it "handles nested divs within the body content" do
|
111
|
+
expect(GeneratedPageParser.parse_content_from_body(
|
112
|
+
"<body><div>header</div>"+
|
113
|
+
"<div class='content' plus='blah blah woof woof'>"+
|
114
|
+
"blah blah"+
|
115
|
+
"<div>plus<div>some</div><div>nested</div>divs</div>"+
|
116
|
+
"woof woof"+
|
117
|
+
"</div>"+
|
118
|
+
"<div>footer</div></body>",
|
119
|
+
"<div class='content'")
|
120
|
+
).to eq(
|
121
|
+
"blah blah"+
|
122
|
+
"<div>plus<div>some</div><div>nested</div>divs</div>"+
|
123
|
+
"woof woof"
|
124
|
+
)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe '#parse_generated_page' do
|
129
|
+
it "returns empty values when passed all empty strings" do
|
130
|
+
data, content = GeneratedPageParser.parse_generated_page "", "", ""
|
131
|
+
expect(data).to eq({})
|
132
|
+
expect(content).to eq("")
|
133
|
+
end
|
134
|
+
|
135
|
+
it "returns empty values when the head element isn't present" do
|
136
|
+
data, content = GeneratedPageParser.parse_generated_page(
|
137
|
+
"<body>foobar</body>", "", "")
|
138
|
+
expect(data).to eq({})
|
139
|
+
expect(content).to eq("")
|
140
|
+
end
|
141
|
+
|
142
|
+
it "returns a nil title and all body content" do
|
143
|
+
data, content = GeneratedPageParser.parse_generated_page(
|
144
|
+
"<head></head>"+
|
145
|
+
"<body><div>header</div>"+
|
146
|
+
"<div class='content'>foobar</div>"+
|
147
|
+
"<div>footer</div></body>", "", "")
|
148
|
+
expect(data).to eq({"title" => nil})
|
149
|
+
expect(content).to eq(
|
150
|
+
"<div>header</div><div class='content'>foobar</div><div>footer</div>")
|
151
|
+
end
|
152
|
+
|
153
|
+
it "returns the title and only body content within body_element_tag" do
|
154
|
+
data, content = GeneratedPageParser.parse_generated_page(
|
155
|
+
"<head><title>Blah Blah Woof Woof</title></head>"+
|
156
|
+
"<body><div>header</div>"+
|
157
|
+
"<div class='content'>foobar</div>"+
|
158
|
+
"<div>footer</div></body>", "", "<div class='content'")
|
159
|
+
expect(data).to eq({"title" => "Blah Blah Woof Woof"})
|
160
|
+
expect(content).to eq("foobar")
|
161
|
+
end
|
162
|
+
|
163
|
+
it "returns the stripped title body content and metadata" do
|
164
|
+
data, content = GeneratedPageParser.parse_generated_page(
|
165
|
+
"<head><title>18F — Blah Blah Woof Woof</title>"+
|
166
|
+
"<meta name='skip-index' content='true'>"+
|
167
|
+
"<meta content='baz,quux,xyzzy,plugh' name=\"tags\" />"+
|
168
|
+
"</head>"+
|
169
|
+
"<body><div>header</div>"+
|
170
|
+
"<div class='content'>foobar</div>"+
|
171
|
+
"<div>footer</div></body>", "18F — ", "<div class='content'")
|
172
|
+
expect(data).to eq(
|
173
|
+
"title" => "Blah Blah Woof Woof",
|
174
|
+
"skip-index" => "true",
|
175
|
+
"tags" => "baz,quux,xyzzy,plugh",
|
176
|
+
)
|
177
|
+
expect(content).to eq("foobar")
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module JekyllPagesApi
|
2
|
+
describe GeneratedPage do
|
3
|
+
describe '#initialize' do
|
4
|
+
it "handles all empty strings correctly" do
|
5
|
+
page = GeneratedPage.new "", "", "", "", ""
|
6
|
+
expect(page.path).to eq("")
|
7
|
+
expect(page.relative_path).to eq("")
|
8
|
+
expect(page.data).to eq({})
|
9
|
+
expect(page.content).to eq("")
|
10
|
+
end
|
11
|
+
|
12
|
+
it "parses the relative path when basedir ends in SEPARATOR" do
|
13
|
+
path = File.join "foo", "bar", "baz.html"
|
14
|
+
basedir = File.join "foo", ""
|
15
|
+
page = GeneratedPage.new path, basedir, "", "", ""
|
16
|
+
expect(page.path).to eq("foo/bar/baz.html")
|
17
|
+
expect(page.relative_path).to eq("/bar/baz.html")
|
18
|
+
expect(page.data).to eq({})
|
19
|
+
expect(page.content).to eq("")
|
20
|
+
end
|
21
|
+
|
22
|
+
it "parses the relative path when basedir doesn't end in SEPARATOR" do
|
23
|
+
path = File.join "foo", "bar", "baz.html"
|
24
|
+
basedir = "foo"
|
25
|
+
page = GeneratedPage.new path, basedir, "", "", ""
|
26
|
+
expect(page.path).to eq("foo/bar/baz.html")
|
27
|
+
expect(page.relative_path).to eq("/bar/baz.html")
|
28
|
+
expect(page.data).to eq({})
|
29
|
+
expect(page.content).to eq("")
|
30
|
+
end
|
31
|
+
|
32
|
+
it "raises RuntimeError when path does not begin with basedir" do
|
33
|
+
path = File.join "foo", "bar", "baz.html"
|
34
|
+
basedir = File.join "quux", ""
|
35
|
+
expect{GeneratedPage.new path, basedir, "", "", ""
|
36
|
+
}.to raise_error(
|
37
|
+
RuntimeError, "#{path} does not start with #{basedir}")
|
38
|
+
end
|
39
|
+
|
40
|
+
it "parses out index.html suffix and leaves trailing slash" do
|
41
|
+
path = File.join "foo", "bar", "index.html"
|
42
|
+
basedir = File.join "foo", ""
|
43
|
+
page = GeneratedPage.new path, basedir, "", "", ""
|
44
|
+
expect(page.path).to eq("foo/bar/index.html")
|
45
|
+
expect(page.relative_path).to eq("/bar/")
|
46
|
+
expect(page.data).to eq({})
|
47
|
+
expect(page.content).to eq("")
|
48
|
+
end
|
49
|
+
|
50
|
+
it "parses out index.html suffix and leaves trailing slash for root" do
|
51
|
+
path = File.join "foo", "index.html"
|
52
|
+
basedir = File.join "foo", ""
|
53
|
+
page = GeneratedPage.new path, basedir, "", "", ""
|
54
|
+
expect(page.path).to eq("foo/index.html")
|
55
|
+
expect(page.relative_path).to eq("/")
|
56
|
+
expect(page.data).to eq({})
|
57
|
+
expect(page.content).to eq("")
|
58
|
+
end
|
59
|
+
|
60
|
+
it "parses content correctly" do
|
61
|
+
path = File.join "foo", "bar", "index.html"
|
62
|
+
basedir = File.join "foo", ""
|
63
|
+
title_prefix = "18F — "
|
64
|
+
body_element_tag = "<div class='content'"
|
65
|
+
content = "<head><title>18F — Blah Blah Woof Woof</title>"+
|
66
|
+
"<meta name='skip-index' content='true'>"+
|
67
|
+
"<meta content='baz,quux,xyzzy,plugh' name=\"tags\" />"+
|
68
|
+
"</head>"+
|
69
|
+
"<body><div>header</div>"+
|
70
|
+
"<div class='content'>foobar</div>"+
|
71
|
+
"<div>footer</div></body>"
|
72
|
+
|
73
|
+
page = GeneratedPage.new(path, basedir, title_prefix,
|
74
|
+
body_element_tag, content)
|
75
|
+
expect(page.path).to eq("foo/bar/index.html")
|
76
|
+
expect(page.relative_path).to eq("/bar/")
|
77
|
+
expect(page.data).to eq(
|
78
|
+
"title" => "Blah Blah Woof Woof",
|
79
|
+
"skip-index" => "true",
|
80
|
+
"tags" => "baz,quux,xyzzy,plugh",
|
81
|
+
)
|
82
|
+
expect(page.content).to eq("foobar")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'tmpdir'
|
3
|
+
|
4
|
+
module JekyllPagesApi
|
5
|
+
describe GeneratedPage do
|
6
|
+
describe '#each_site_file' do
|
7
|
+
before :each do
|
8
|
+
@test_tmpdir = Dir.mktmpdir
|
9
|
+
end
|
10
|
+
|
11
|
+
after :each do
|
12
|
+
FileUtils.remove_entry @test_tmpdir
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should only select .html files containing a title" do
|
16
|
+
basedir = File.join @test_tmpdir, "foo"
|
17
|
+
FileUtils.mkdir_p basedir
|
18
|
+
|
19
|
+
content_dir = File.join(basedir, "bar")
|
20
|
+
FileUtils.mkdir_p content_dir
|
21
|
+
|
22
|
+
File.open(File.join(content_dir, "baz.txt"), "w") do |f|
|
23
|
+
f << "Text file that should be excluded."
|
24
|
+
end
|
25
|
+
|
26
|
+
File.open(File.join(content_dir, "quux.html"), "w") do |f|
|
27
|
+
f << "<html><head><title>18F — Include me!</title></head>"
|
28
|
+
f << "<body><div>header</div>"
|
29
|
+
f << "<div class='content'>This page should be included.</div>"
|
30
|
+
f << "<div>footer</div></body></html>"
|
31
|
+
end
|
32
|
+
|
33
|
+
File.open(File.join(content_dir, "xyzzy.html"), "w") do |f|
|
34
|
+
f << "<html><head></head><body>"
|
35
|
+
f << "This page shouldn't be included because it lacks a title."
|
36
|
+
f << "</body></html>"
|
37
|
+
end
|
38
|
+
|
39
|
+
paths = ['baz.txt', 'quux.html', 'xyzzy.html'].sort.map do |f|
|
40
|
+
File.join content_dir, f
|
41
|
+
end
|
42
|
+
expect(Dir.glob(File.join(content_dir, '**', '*'))).to eq(paths)
|
43
|
+
|
44
|
+
site = GeneratedSite.new("https://unused/", basedir,
|
45
|
+
"18F — ", "<div class='content'")
|
46
|
+
pages = []
|
47
|
+
site.each_site_file {|f| pages << f}
|
48
|
+
expect(pages.size).to eq(1)
|
49
|
+
|
50
|
+
page = pages.first
|
51
|
+
expect(page.path).to eq(File.join content_dir, 'quux.html')
|
52
|
+
expect(page.relative_path).to eq("/bar/quux.html")
|
53
|
+
expect(page.data).to eq("title" => "Include me!")
|
54
|
+
expect(page.content).to eq("This page should be included.")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll_pages_api
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aidan Feldman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -89,7 +89,8 @@ dependencies:
|
|
89
89
|
description:
|
90
90
|
email:
|
91
91
|
- aidan.feldman@gsa.gov
|
92
|
-
executables:
|
92
|
+
executables:
|
93
|
+
- jekyll_pages_api
|
93
94
|
extensions: []
|
94
95
|
extra_rdoc_files: []
|
95
96
|
files:
|
@@ -101,17 +102,24 @@ files:
|
|
101
102
|
- LICENSE.md
|
102
103
|
- README.md
|
103
104
|
- Rakefile
|
105
|
+
- bin/jekyll_pages_api
|
104
106
|
- gemfiles/jekyll_2.gemfile
|
105
107
|
- gemfiles/jekyll_3.gemfile
|
106
108
|
- jekyll_pages_api.gemspec
|
107
109
|
- lib/jekyll/site.rb
|
108
110
|
- lib/jekyll_pages_api.rb
|
109
111
|
- lib/jekyll_pages_api/filters.rb
|
112
|
+
- lib/jekyll_pages_api/generated_page.rb
|
113
|
+
- lib/jekyll_pages_api/generated_page_parser.rb
|
114
|
+
- lib/jekyll_pages_api/generated_site.rb
|
110
115
|
- lib/jekyll_pages_api/generator.rb
|
111
116
|
- lib/jekyll_pages_api/page.rb
|
112
117
|
- lib/jekyll_pages_api/page_without_a_file.rb
|
113
118
|
- lib/jekyll_pages_api/version.rb
|
114
119
|
- spec/filters_spec.rb
|
120
|
+
- spec/generated_page_parser_spec.rb
|
121
|
+
- spec/generated_page_spec.rb
|
122
|
+
- spec/generated_site_spec.rb
|
115
123
|
- spec/integration_spec.rb
|
116
124
|
- spec/page_spec.rb
|
117
125
|
- spec/site/.gitignore
|
@@ -163,6 +171,9 @@ summary: A Jekyll Plugin that generates a JSON file with data for all the Pages
|
|
163
171
|
your Site.
|
164
172
|
test_files:
|
165
173
|
- spec/filters_spec.rb
|
174
|
+
- spec/generated_page_parser_spec.rb
|
175
|
+
- spec/generated_page_spec.rb
|
176
|
+
- spec/generated_site_spec.rb
|
166
177
|
- spec/integration_spec.rb
|
167
178
|
- spec/page_spec.rb
|
168
179
|
- spec/site/.gitignore
|