jekyll_search 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/jekyll_search/html_processor.rb +42 -0
- data/lib/jekyll_search/version.rb +1 -1
- data/lib/jekyll_search.rb +12 -16
- data/spec/lib/jekyll_search/html_processor_spec.rb +23 -0
- metadata +4 -3
- data/spec/lib/jekyll_search_spec.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e903bb117234165daca55baee25cc4a352e5cd91
|
4
|
+
data.tar.gz: db9eb1cf7534ec9929057194af1e0248b52dc243
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e6f6263cb8391612c44c4f81b1cb24285f9bdf68b910ae3f2bbe6f2d71eccede16a95cf61648b3e576fc3368766caaf7fe70706a75277e2d2ca2b72486a200d
|
7
|
+
data.tar.gz: 84529eaef4b0b389bb13bfb1d11ed6ad3bd22e997bef30c59eef4ff18c4706d593ae04f28934939cc568d5c22e6716b6d9abe6244db9b2049f795560187b0d31
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'loofah'
|
2
|
+
require 'loofah/helpers'
|
3
|
+
|
4
|
+
module JekyllSearch
|
5
|
+
class HtmlProcessor
|
6
|
+
def self.strip_html(input)
|
7
|
+
strip_pre = Loofah::Scrubber.new do |node|
|
8
|
+
if node.name == 'pre'
|
9
|
+
node.remove
|
10
|
+
Loofah::Scrubber::STOP
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
Loofah.fragment(input).
|
15
|
+
scrub!(:prune).
|
16
|
+
scrub!(strip_pre).
|
17
|
+
to_text.
|
18
|
+
gsub(/([\r\n\t\s]+)/, ' ').strip
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.detect_sections(input)
|
22
|
+
result = []
|
23
|
+
|
24
|
+
current = { :id => nil, :title => nil, :content => '' }
|
25
|
+
Loofah.fragment(input).children.each { |node|
|
26
|
+
if node.name =~ /^h\d$/
|
27
|
+
result << current
|
28
|
+
current = { :id => nil, :title => nil, :content => '' }
|
29
|
+
current[:id] = if node.has_attribute?('id') then node.attribute('id').value else nil end
|
30
|
+
current[:title] = node.text
|
31
|
+
else
|
32
|
+
current[:content] += node.to_html
|
33
|
+
end
|
34
|
+
}
|
35
|
+
if current[:title] != nil or current[:content] != ''
|
36
|
+
result << current
|
37
|
+
end
|
38
|
+
|
39
|
+
result
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/jekyll_search.rb
CHANGED
@@ -2,6 +2,7 @@ require 'jekyll'
|
|
2
2
|
require 'elasticsearch'
|
3
3
|
require 'loofah'
|
4
4
|
require 'loofah/helpers'
|
5
|
+
require 'jekyll_search/html_processor'
|
5
6
|
|
6
7
|
module Jekyll
|
7
8
|
module Commands
|
@@ -35,29 +36,24 @@ module Jekyll
|
|
35
36
|
select { |p| p.data['searchable'].nil? or p.data['searchable'] != false }
|
36
37
|
|
37
38
|
for page in pages
|
38
|
-
|
39
|
+
page_body = {
|
39
40
|
url: site.baseurl + page.url,
|
40
41
|
title: page.data['title'],
|
41
|
-
content:
|
42
|
+
content: JekyllSearch::HtmlProcessor.strip_html(page.content)
|
42
43
|
}
|
43
44
|
|
44
|
-
client.index index: settings['index']['name'], type: 'page', body:
|
45
|
-
end
|
46
|
-
end
|
45
|
+
client.index index: settings['index']['name'], type: 'page', body: page_body
|
47
46
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
47
|
+
for section in JekyllSearch::HtmlProcessor.detect_sections(page.content)
|
48
|
+
section_body = {
|
49
|
+
url: if section[:id] != nil then site.baseurl + page.url + '#' + section[:id] else site.baseurl + page.url end,
|
50
|
+
title: if section[:title] != nil then section[:title] else page.data['title'] end,
|
51
|
+
content: JekyllSearch::HtmlProcessor.strip_html(section[:content])
|
52
|
+
}
|
53
|
+
|
54
|
+
client.index index: settings['index']['name'], type: 'section', body: section_body
|
53
55
|
end
|
54
56
|
end
|
55
|
-
|
56
|
-
Loofah.fragment(dirty).
|
57
|
-
scrub!(:prune).
|
58
|
-
scrub!(strip_pre).
|
59
|
-
to_text.
|
60
|
-
gsub(/([\r\n\t\s]+)/, ' ').strip
|
61
57
|
end
|
62
58
|
|
63
59
|
def create_index(client, settings)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'jekyll_search/html_processor'
|
2
|
+
|
3
|
+
RSpec.describe JekyllSearch::HtmlProcessor do
|
4
|
+
it 'stripes html' do
|
5
|
+
expect(JekyllSearch::HtmlProcessor.strip_html('test')).to eq 'test'
|
6
|
+
expect(JekyllSearch::HtmlProcessor.strip_html("<h1>title</h1>\n<p>text</p>")).to eq 'title text'
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'detects sections' do
|
10
|
+
expect(JekyllSearch::HtmlProcessor.detect_sections('test')).to eq [
|
11
|
+
{ :id => nil, :title => nil, :content => 'test'}
|
12
|
+
]
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'detects sections' do
|
16
|
+
expect(JekyllSearch::HtmlProcessor.detect_sections("foo<h1>first</h1>bar <p>\npara\n</p>\n\n<H4 ID=\"sec\">sec<span>ond</span></H4>apple<h2>third</h2>")).to eq [
|
17
|
+
{ :id => nil, :title => nil, :content => "foo"},
|
18
|
+
{ :id => nil, :title => 'first', :content => "bar <p>\npara\n</p>\n\n"},
|
19
|
+
{ :id => 'sec', :title => 'second', :content => "apple" },
|
20
|
+
{ :id => nil, :title => 'third', :content => '' }
|
21
|
+
]
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christian Hoffmeister
|
@@ -110,8 +110,9 @@ files:
|
|
110
110
|
- Rakefile
|
111
111
|
- jekyll_search.gemspec
|
112
112
|
- lib/jekyll_search.rb
|
113
|
+
- lib/jekyll_search/html_processor.rb
|
113
114
|
- lib/jekyll_search/version.rb
|
114
|
-
- spec/lib/
|
115
|
+
- spec/lib/jekyll_search/html_processor_spec.rb
|
115
116
|
- spec/spec_helper.rb
|
116
117
|
homepage: https://github.com/choffmeister/jekyll_search
|
117
118
|
licenses:
|
@@ -138,5 +139,5 @@ signing_key:
|
|
138
139
|
specification_version: 4
|
139
140
|
summary: An Elasticsearch full text search index generator for Jekyll.
|
140
141
|
test_files:
|
141
|
-
- spec/lib/
|
142
|
+
- spec/lib/jekyll_search/html_processor_spec.rb
|
142
143
|
- spec/spec_helper.rb
|