jekyll-lunr-js-search 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/lib/jekyll-lunr-js-search.rb +5 -0
- data/lib/jekyll_lunr_js_search/indexer.rb +91 -0
- data/lib/jekyll_lunr_js_search/page_renderer.rb +19 -0
- data/lib/jekyll_lunr_js_search/search_entry.rb +54 -0
- data/lib/jekyll_lunr_js_search/search_index_file.rb +10 -0
- data/lib/jekyll_lunr_js_search/version.rb +5 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MTU1YjcxZmY2NjBlYzY2M2RlM2RiYjQ4ZGYwNjI1ZmZmYzM4ODRjMA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
M2Y3ZWI1NTQzMWRjZThhMjI2MjI4YjA0MWE0MzgzNDY4ZWU0NjFmZA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YWQ2NGExZTAyOWIxZmQ4MzU3NTNhZTc2ZDI2NGFhNzNiMzExZTkzMDg5MDY0
|
10
|
+
MDdhNGFhYmE1ZjlmMDdjMWI0ZDk2ZGI1NjAxMmQ2MzAzNTYzMTFlNjRjZTBk
|
11
|
+
MDEyOGVmMTRlMDliZGRmMjBmNWYwMDJjZGI4ZWE1NDAzNmE1YWY=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZjcxNjEwY2MzODhiN2I4ODdjNmQyODg3ZDdlMDllNTRmNTU0ODAzMjJhMTc3
|
14
|
+
YjkwNjYyY2Q4NmRjOWFlNzliNjhkYjk3NzM0MTdhNjA4NzUxNTA1ZTU4Yzk2
|
15
|
+
ZDQ2OWM2NTgzODRhMzk1MjgxNWNmMTM0MWIwMWFkY2IxMGFhMDc=
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module LunrJsSearch
|
5
|
+
class Indexer < Jekyll::Generator
|
6
|
+
def initialize(config = {})
|
7
|
+
super(config)
|
8
|
+
|
9
|
+
lunr_config = {
|
10
|
+
'excludes' => [],
|
11
|
+
'strip_index_html' => false,
|
12
|
+
'min_length' => 3,
|
13
|
+
'stopwords' => 'stopwords.txt'
|
14
|
+
}.merge!(config['lunr_search'] || {})
|
15
|
+
|
16
|
+
@excludes = lunr_config['excludes']
|
17
|
+
|
18
|
+
# if web host supports index.html as default doc, then optionally exclude it from the url
|
19
|
+
@strip_index_html = lunr_config['strip_index_html']
|
20
|
+
|
21
|
+
# stop word exclusion configuration
|
22
|
+
@min_length = lunr_config['min_length']
|
23
|
+
@stopwords_file = lunr_config['stopwords']
|
24
|
+
end
|
25
|
+
|
26
|
+
# Index all pages except pages matching any value in config['lunr_excludes'] or with date['exclude_from_search']
|
27
|
+
# The main content from each page is extracted and saved to disk as json
|
28
|
+
def generate(site)
|
29
|
+
puts 'Running the search indexer...'
|
30
|
+
|
31
|
+
# gather pages and posts
|
32
|
+
items = pages_to_index(site)
|
33
|
+
content_renderer = PageRenderer.new(site)
|
34
|
+
index = []
|
35
|
+
|
36
|
+
items.each do |item|
|
37
|
+
entry = SearchEntry.create(item, content_renderer)
|
38
|
+
|
39
|
+
entry.strip_index_suffix_from_url! if @strip_index_html
|
40
|
+
entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)
|
41
|
+
|
42
|
+
index << {
|
43
|
+
:title => entry.title,
|
44
|
+
:url => entry.url,
|
45
|
+
:date => entry.date,
|
46
|
+
:categories => entry.categories,
|
47
|
+
:body => entry.body
|
48
|
+
}
|
49
|
+
|
50
|
+
puts 'Indexed ' << "#{entry.title} (#{entry.url})"
|
51
|
+
end
|
52
|
+
|
53
|
+
json = JSON.generate({:entries => index})
|
54
|
+
|
55
|
+
# Create destination directory if it doesn't exist yet. Otherwise, we cannot write our file there.
|
56
|
+
Dir::mkdir(site.dest) unless File.directory?(site.dest)
|
57
|
+
|
58
|
+
# File I/O: create search.json file and write out pretty-printed JSON
|
59
|
+
filename = 'search.json'
|
60
|
+
|
61
|
+
File.open(File.join(site.dest, filename), "w") do |file|
|
62
|
+
file.write(json)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Keep the search.json file from being cleaned by Jekyll
|
66
|
+
site.static_files << SearchIndexFile.new(site, site.dest, "/", filename)
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
# load the stopwords file
|
72
|
+
def stopwords
|
73
|
+
@stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
|
74
|
+
end
|
75
|
+
|
76
|
+
def pages_to_index(site)
|
77
|
+
items = []
|
78
|
+
|
79
|
+
# deep copy pages
|
80
|
+
site.pages.each {|page| items << page.dup }
|
81
|
+
site.posts.each {|post| items << post.dup }
|
82
|
+
|
83
|
+
# only process files that will be converted to .html and only non excluded files
|
84
|
+
items.select! {|i| i.output_ext == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
|
85
|
+
items.reject! {|i| i.data['exclude_from_search'] }
|
86
|
+
|
87
|
+
items
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module LunrJsSearch
|
5
|
+
class PageRenderer
|
6
|
+
def initialize(site)
|
7
|
+
@site = site
|
8
|
+
end
|
9
|
+
|
10
|
+
# render the item, parse the output and get all text inside <p> elements
|
11
|
+
def render(item)
|
12
|
+
item.render({}, @site.site_payload)
|
13
|
+
doc = Nokogiri::HTML(item.output)
|
14
|
+
paragraphs = doc.search('//text()').map {|t| t.content }
|
15
|
+
paragraphs = paragraphs.join(" ").gsub("\r", " ").gsub("\n", " ").gsub("\t", " ").gsub(/\s+/, " ")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module LunrJsSearch
|
5
|
+
class SearchEntry
|
6
|
+
def self.create(page_or_post, renderer)
|
7
|
+
return create_from_post(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Post)
|
8
|
+
return create_from_page(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Page)
|
9
|
+
raise 'Not supported'
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.create_from_page(page, renderer)
|
13
|
+
title, url = extract_title_and_url(page)
|
14
|
+
body = renderer.render(page)
|
15
|
+
date = nil
|
16
|
+
categories = []
|
17
|
+
|
18
|
+
SearchEntry.new(title, url, date, categories, body)
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.create_from_post(post, renderer)
|
22
|
+
title, url = extract_title_and_url(post)
|
23
|
+
body = renderer.render(post)
|
24
|
+
date = post.date
|
25
|
+
categories = post.categories
|
26
|
+
|
27
|
+
SearchEntry.new(title, url, date, categories, body)
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.extract_title_and_url(item)
|
31
|
+
data = item.to_liquid
|
32
|
+
[ data['title'], data['url'] ]
|
33
|
+
end
|
34
|
+
|
35
|
+
attr_reader :title, :url, :date, :categories, :body
|
36
|
+
|
37
|
+
def initialize(title, url, date, categories, body)
|
38
|
+
@title, @url, @date, @categories, @body = title, url, date, categories, body
|
39
|
+
end
|
40
|
+
|
41
|
+
def strip_index_suffix_from_url!
|
42
|
+
@url.gsub!(/index\.html$/, '')
|
43
|
+
end
|
44
|
+
|
45
|
+
# remove anything that is in the stop words list from the text to be indexed
|
46
|
+
def strip_stopwords!(stopwords, min_length)
|
47
|
+
@body = @body.split.delete_if() do |x|
|
48
|
+
t = x.downcase.gsub(/[^a-z]/, '')
|
49
|
+
t.length < min_length || stopwords.include?(t)
|
50
|
+
end.join(' ')
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jekyll-lunr-js-search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ben Smith
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-08-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: json
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.8'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: uglifier
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.5'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '2.5'
|
69
|
+
description: Use lunr.js to provide simple full-text search, using JavaScript in your
|
70
|
+
browser, for your Jekyll static website.
|
71
|
+
email: ben@10consulting.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- lib/jekyll-lunr-js-search.rb
|
77
|
+
- lib/jekyll_lunr_js_search/indexer.rb
|
78
|
+
- lib/jekyll_lunr_js_search/page_renderer.rb
|
79
|
+
- lib/jekyll_lunr_js_search/search_entry.rb
|
80
|
+
- lib/jekyll_lunr_js_search/search_index_file.rb
|
81
|
+
- lib/jekyll_lunr_js_search/version.rb
|
82
|
+
homepage: https://github.com/slashdotdash/jekyll-lunr-js-search
|
83
|
+
licenses:
|
84
|
+
- MIT
|
85
|
+
metadata: {}
|
86
|
+
post_install_message:
|
87
|
+
rdoc_options: []
|
88
|
+
require_paths:
|
89
|
+
- lib
|
90
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - ! '>='
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: '0'
|
95
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ! '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
requirements: []
|
101
|
+
rubyforge_project:
|
102
|
+
rubygems_version: 2.4.1
|
103
|
+
signing_key:
|
104
|
+
specification_version: 4
|
105
|
+
summary: Jekyll + lunr.js = static websites with powerful full-text search using JavaScript
|
106
|
+
test_files: []
|