jekyll_search 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 266d467d948e0f06e9b67d80b4460e39127e57e1
4
- data.tar.gz: 42098ce4e4628cf9a2ac55ea3dad5fe5f0871f11
3
+ metadata.gz: e903bb117234165daca55baee25cc4a352e5cd91
4
+ data.tar.gz: db9eb1cf7534ec9929057194af1e0248b52dc243
5
5
  SHA512:
6
- metadata.gz: 7a74dead448ac960a3c32e6546b181fe7561d9c1a57ca6ddcdbca4104378f7dae2d2ef0fe7ff2482adb11d77b0b72709d57bf8a0947c2cce69777931780f8f55
7
- data.tar.gz: 4a08fdc4d9474469f2e0a8f1c4cb1c30146ae8bf83dc499498b9bfd4b7e944e4607d1672c08a640880903fc7a1712a8b13517b87e958b25801f1876026cc8369
6
+ metadata.gz: 7e6f6263cb8391612c44c4f81b1cb24285f9bdf68b910ae3f2bbe6f2d71eccede16a95cf61648b3e576fc3368766caaf7fe70706a75277e2d2ca2b72486a200d
7
+ data.tar.gz: 84529eaef4b0b389bb13bfb1d11ed6ad3bd22e997bef30c59eef4ff18c4706d593ae04f28934939cc568d5c22e6716b6d9abe6244db9b2049f795560187b0d31
@@ -0,0 +1,42 @@
1
+ require 'loofah'
2
+ require 'loofah/helpers'
3
+
4
+ module JekyllSearch
5
+ class HtmlProcessor
6
+ def self.strip_html(input)
7
+ strip_pre = Loofah::Scrubber.new do |node|
8
+ if node.name == 'pre'
9
+ node.remove
10
+ Loofah::Scrubber::STOP
11
+ end
12
+ end
13
+
14
+ Loofah.fragment(input).
15
+ scrub!(:prune).
16
+ scrub!(strip_pre).
17
+ to_text.
18
+ gsub(/([\r\n\t\s]+)/, ' ').strip
19
+ end
20
+
21
+ def self.detect_sections(input)
22
+ result = []
23
+
24
+ current = { :id => nil, :title => nil, :content => '' }
25
+ Loofah.fragment(input).children.each { |node|
26
+ if node.name =~ /^h\d$/
27
+ result << current
28
+ current = { :id => nil, :title => nil, :content => '' }
29
+ current[:id] = if node.has_attribute?('id') then node.attribute('id').value else nil end
30
+ current[:title] = node.text
31
+ else
32
+ current[:content] += node.to_html
33
+ end
34
+ }
35
+ if current[:title] != nil or current[:content] != ''
36
+ result << current
37
+ end
38
+
39
+ result
40
+ end
41
+ end
42
+ end
@@ -1,3 +1,3 @@
1
1
  module JekyllSearch
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/jekyll_search.rb CHANGED
@@ -2,6 +2,7 @@ require 'jekyll'
2
2
  require 'elasticsearch'
3
3
  require 'loofah'
4
4
  require 'loofah/helpers'
5
+ require 'jekyll_search/html_processor'
5
6
 
6
7
  module Jekyll
7
8
  module Commands
@@ -35,29 +36,24 @@ module Jekyll
35
36
  select { |p| p.data['searchable'].nil? or p.data['searchable'] != false }
36
37
 
37
38
  for page in pages
38
- body = {
39
+ page_body = {
39
40
  url: site.baseurl + page.url,
40
41
  title: page.data['title'],
41
- content: clean_content(page.content)
42
+ content: JekyllSearch::HtmlProcessor.strip_html(page.content)
42
43
  }
43
44
 
44
- client.index index: settings['index']['name'], type: 'page', body: body
45
- end
46
- end
45
+ client.index index: settings['index']['name'], type: 'page', body: page_body
47
46
 
48
- def clean_content(dirty)
49
- strip_pre = Loofah::Scrubber.new do |node|
50
- if node.name == 'pre'
51
- node.remove
52
- Loofah::Scrubber::STOP
47
+ for section in JekyllSearch::HtmlProcessor.detect_sections(page.content)
48
+ section_body = {
49
+ url: if section[:id] != nil then site.baseurl + page.url + '#' + section[:id] else site.baseurl + page.url end,
50
+ title: if section[:title] != nil then section[:title] else page.data['title'] end,
51
+ content: JekyllSearch::HtmlProcessor.strip_html(section[:content])
52
+ }
53
+
54
+ client.index index: settings['index']['name'], type: 'section', body: section_body
53
55
  end
54
56
  end
55
-
56
- Loofah.fragment(dirty).
57
- scrub!(:prune).
58
- scrub!(strip_pre).
59
- to_text.
60
- gsub(/([\r\n\t\s]+)/, ' ').strip
61
57
  end
62
58
 
63
59
  def create_index(client, settings)
@@ -0,0 +1,23 @@
1
+ require 'jekyll_search/html_processor'
2
+
3
+ RSpec.describe JekyllSearch::HtmlProcessor do
4
+ it 'stripes html' do
5
+ expect(JekyllSearch::HtmlProcessor.strip_html('test')).to eq 'test'
6
+ expect(JekyllSearch::HtmlProcessor.strip_html("<h1>title</h1>\n<p>text</p>")).to eq 'title text'
7
+ end
8
+
9
+ it 'detects sections' do
10
+ expect(JekyllSearch::HtmlProcessor.detect_sections('test')).to eq [
11
+ { :id => nil, :title => nil, :content => 'test'}
12
+ ]
13
+ end
14
+
15
+ it 'detects sections' do
16
+ expect(JekyllSearch::HtmlProcessor.detect_sections("foo<h1>first</h1>bar <p>\npara\n</p>\n\n<H4 ID=\"sec\">sec<span>ond</span></H4>apple<h2>third</h2>")).to eq [
17
+ { :id => nil, :title => nil, :content => "foo"},
18
+ { :id => nil, :title => 'first', :content => "bar <p>\npara\n</p>\n\n"},
19
+ { :id => 'sec', :title => 'second', :content => "apple" },
20
+ { :id => nil, :title => 'third', :content => '' }
21
+ ]
22
+ end
23
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Hoffmeister
@@ -110,8 +110,9 @@ files:
110
110
  - Rakefile
111
111
  - jekyll_search.gemspec
112
112
  - lib/jekyll_search.rb
113
+ - lib/jekyll_search/html_processor.rb
113
114
  - lib/jekyll_search/version.rb
114
- - spec/lib/jekyll_search_spec.rb
115
+ - spec/lib/jekyll_search/html_processor_spec.rb
115
116
  - spec/spec_helper.rb
116
117
  homepage: https://github.com/choffmeister/jekyll_search
117
118
  licenses:
@@ -138,5 +139,5 @@ signing_key:
138
139
  specification_version: 4
139
140
  summary: An Elasticsearch full text search index generator for Jekyll.
140
141
  test_files:
141
- - spec/lib/jekyll_search_spec.rb
142
+ - spec/lib/jekyll_search/html_processor_spec.rb
142
143
  - spec/spec_helper.rb
@@ -1,5 +0,0 @@
1
- require 'jekyll_search'
2
-
3
- RSpec.describe Jekyll::Commands::Index do
4
- it 'works'
5
- end