jekyll_search 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 266d467d948e0f06e9b67d80b4460e39127e57e1
4
- data.tar.gz: 42098ce4e4628cf9a2ac55ea3dad5fe5f0871f11
3
+ metadata.gz: e903bb117234165daca55baee25cc4a352e5cd91
4
+ data.tar.gz: db9eb1cf7534ec9929057194af1e0248b52dc243
5
5
  SHA512:
6
- metadata.gz: 7a74dead448ac960a3c32e6546b181fe7561d9c1a57ca6ddcdbca4104378f7dae2d2ef0fe7ff2482adb11d77b0b72709d57bf8a0947c2cce69777931780f8f55
7
- data.tar.gz: 4a08fdc4d9474469f2e0a8f1c4cb1c30146ae8bf83dc499498b9bfd4b7e944e4607d1672c08a640880903fc7a1712a8b13517b87e958b25801f1876026cc8369
6
+ metadata.gz: 7e6f6263cb8391612c44c4f81b1cb24285f9bdf68b910ae3f2bbe6f2d71eccede16a95cf61648b3e576fc3368766caaf7fe70706a75277e2d2ca2b72486a200d
7
+ data.tar.gz: 84529eaef4b0b389bb13bfb1d11ed6ad3bd22e997bef30c59eef4ff18c4706d593ae04f28934939cc568d5c22e6716b6d9abe6244db9b2049f795560187b0d31
@@ -0,0 +1,42 @@
1
+ require 'loofah'
2
+ require 'loofah/helpers'
3
+
4
+ module JekyllSearch
5
+ class HtmlProcessor
6
+ def self.strip_html(input)
7
+ strip_pre = Loofah::Scrubber.new do |node|
8
+ if node.name == 'pre'
9
+ node.remove
10
+ Loofah::Scrubber::STOP
11
+ end
12
+ end
13
+
14
+ Loofah.fragment(input).
15
+ scrub!(:prune).
16
+ scrub!(strip_pre).
17
+ to_text.
18
+ gsub(/([\r\n\t\s]+)/, ' ').strip
19
+ end
20
+
21
+ def self.detect_sections(input)
22
+ result = []
23
+
24
+ current = { :id => nil, :title => nil, :content => '' }
25
+ Loofah.fragment(input).children.each { |node|
26
+ if node.name =~ /^h\d$/
27
+ result << current
28
+ current = { :id => nil, :title => nil, :content => '' }
29
+ current[:id] = if node.has_attribute?('id') then node.attribute('id').value else nil end
30
+ current[:title] = node.text
31
+ else
32
+ current[:content] += node.to_html
33
+ end
34
+ }
35
+ if current[:title] != nil or current[:content] != ''
36
+ result << current
37
+ end
38
+
39
+ result
40
+ end
41
+ end
42
+ end
@@ -1,3 +1,3 @@
1
1
  module JekyllSearch
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/jekyll_search.rb CHANGED
@@ -2,6 +2,7 @@ require 'jekyll'
2
2
  require 'elasticsearch'
3
3
  require 'loofah'
4
4
  require 'loofah/helpers'
5
+ require 'jekyll_search/html_processor'
5
6
 
6
7
  module Jekyll
7
8
  module Commands
@@ -35,29 +36,24 @@ module Jekyll
35
36
  select { |p| p.data['searchable'].nil? or p.data['searchable'] != false }
36
37
 
37
38
  for page in pages
38
- body = {
39
+ page_body = {
39
40
  url: site.baseurl + page.url,
40
41
  title: page.data['title'],
41
- content: clean_content(page.content)
42
+ content: JekyllSearch::HtmlProcessor.strip_html(page.content)
42
43
  }
43
44
 
44
- client.index index: settings['index']['name'], type: 'page', body: body
45
- end
46
- end
45
+ client.index index: settings['index']['name'], type: 'page', body: page_body
47
46
 
48
- def clean_content(dirty)
49
- strip_pre = Loofah::Scrubber.new do |node|
50
- if node.name == 'pre'
51
- node.remove
52
- Loofah::Scrubber::STOP
47
+ for section in JekyllSearch::HtmlProcessor.detect_sections(page.content)
48
+ section_body = {
49
+ url: if section[:id] != nil then site.baseurl + page.url + '#' + section[:id] else site.baseurl + page.url end,
50
+ title: if section[:title] != nil then section[:title] else page.data['title'] end,
51
+ content: JekyllSearch::HtmlProcessor.strip_html(section[:content])
52
+ }
53
+
54
+ client.index index: settings['index']['name'], type: 'section', body: section_body
53
55
  end
54
56
  end
55
-
56
- Loofah.fragment(dirty).
57
- scrub!(:prune).
58
- scrub!(strip_pre).
59
- to_text.
60
- gsub(/([\r\n\t\s]+)/, ' ').strip
61
57
  end
62
58
 
63
59
  def create_index(client, settings)
@@ -0,0 +1,23 @@
1
+ require 'jekyll_search/html_processor'
2
+
3
+ RSpec.describe JekyllSearch::HtmlProcessor do
4
+ it 'stripes html' do
5
+ expect(JekyllSearch::HtmlProcessor.strip_html('test')).to eq 'test'
6
+ expect(JekyllSearch::HtmlProcessor.strip_html("<h1>title</h1>\n<p>text</p>")).to eq 'title text'
7
+ end
8
+
9
+ it 'detects sections' do
10
+ expect(JekyllSearch::HtmlProcessor.detect_sections('test')).to eq [
11
+ { :id => nil, :title => nil, :content => 'test'}
12
+ ]
13
+ end
14
+
15
+ it 'detects sections' do
16
+ expect(JekyllSearch::HtmlProcessor.detect_sections("foo<h1>first</h1>bar <p>\npara\n</p>\n\n<H4 ID=\"sec\">sec<span>ond</span></H4>apple<h2>third</h2>")).to eq [
17
+ { :id => nil, :title => nil, :content => "foo"},
18
+ { :id => nil, :title => 'first', :content => "bar <p>\npara\n</p>\n\n"},
19
+ { :id => 'sec', :title => 'second', :content => "apple" },
20
+ { :id => nil, :title => 'third', :content => '' }
21
+ ]
22
+ end
23
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Hoffmeister
@@ -110,8 +110,9 @@ files:
110
110
  - Rakefile
111
111
  - jekyll_search.gemspec
112
112
  - lib/jekyll_search.rb
113
+ - lib/jekyll_search/html_processor.rb
113
114
  - lib/jekyll_search/version.rb
114
- - spec/lib/jekyll_search_spec.rb
115
+ - spec/lib/jekyll_search/html_processor_spec.rb
115
116
  - spec/spec_helper.rb
116
117
  homepage: https://github.com/choffmeister/jekyll_search
117
118
  licenses:
@@ -138,5 +139,5 @@ signing_key:
138
139
  specification_version: 4
139
140
  summary: An Elasticsearch full text search index generator for Jekyll.
140
141
  test_files:
141
- - spec/lib/jekyll_search_spec.rb
142
+ - spec/lib/jekyll_search/html_processor_spec.rb
142
143
  - spec/spec_helper.rb
@@ -1,5 +0,0 @@
1
- require 'jekyll_search'
2
-
3
- RSpec.describe Jekyll::Commands::Index do
4
- it 'works'
5
- end