jekyll-lunr-js-search 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/lib/jekyll-lunr-js-search.rb +5 -0
- data/lib/jekyll_lunr_js_search/indexer.rb +91 -0
- data/lib/jekyll_lunr_js_search/page_renderer.rb +19 -0
- data/lib/jekyll_lunr_js_search/search_entry.rb +54 -0
- data/lib/jekyll_lunr_js_search/search_index_file.rb +10 -0
- data/lib/jekyll_lunr_js_search/version.rb +5 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MTU1YjcxZmY2NjBlYzY2M2RlM2RiYjQ4ZGYwNjI1ZmZmYzM4ODRjMA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
M2Y3ZWI1NTQzMWRjZThhMjI2MjI4YjA0MWE0MzgzNDY4ZWU0NjFmZA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YWQ2NGExZTAyOWIxZmQ4MzU3NTNhZTc2ZDI2NGFhNzNiMzExZTkzMDg5MDY0
|
10
|
+
MDdhNGFhYmE1ZjlmMDdjMWI0ZDk2ZGI1NjAxMmQ2MzAzNTYzMTFlNjRjZTBk
|
11
|
+
MDEyOGVmMTRlMDliZGRmMjBmNWYwMDJjZGI4ZWE1NDAzNmE1YWY=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZjcxNjEwY2MzODhiN2I4ODdjNmQyODg3ZDdlMDllNTRmNTU0ODAzMjJhMTc3
|
14
|
+
YjkwNjYyY2Q4NmRjOWFlNzliNjhkYjk3NzM0MTdhNjA4NzUxNTA1ZTU4Yzk2
|
15
|
+
ZDQ2OWM2NTgzODRhMzk1MjgxNWNmMTM0MWIwMWFkY2IxMGFhMDc=
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module LunrJsSearch
|
5
|
+
class Indexer < Jekyll::Generator
|
6
|
+
def initialize(config = {})
|
7
|
+
super(config)
|
8
|
+
|
9
|
+
lunr_config = {
|
10
|
+
'excludes' => [],
|
11
|
+
'strip_index_html' => false,
|
12
|
+
'min_length' => 3,
|
13
|
+
'stopwords' => 'stopwords.txt'
|
14
|
+
}.merge!(config['lunr_search'] || {})
|
15
|
+
|
16
|
+
@excludes = lunr_config['excludes']
|
17
|
+
|
18
|
+
# if web host supports index.html as default doc, then optionally exclude it from the url
|
19
|
+
@strip_index_html = lunr_config['strip_index_html']
|
20
|
+
|
21
|
+
# stop word exclusion configuration
|
22
|
+
@min_length = lunr_config['min_length']
|
23
|
+
@stopwords_file = lunr_config['stopwords']
|
24
|
+
end
|
25
|
+
|
26
|
+
# Index all pages except pages matching any value in config['lunr_excludes'] or with date['exclude_from_search']
|
27
|
+
# The main content from each page is extracted and saved to disk as json
|
28
|
+
def generate(site)
|
29
|
+
puts 'Running the search indexer...'
|
30
|
+
|
31
|
+
# gather pages and posts
|
32
|
+
items = pages_to_index(site)
|
33
|
+
content_renderer = PageRenderer.new(site)
|
34
|
+
index = []
|
35
|
+
|
36
|
+
items.each do |item|
|
37
|
+
entry = SearchEntry.create(item, content_renderer)
|
38
|
+
|
39
|
+
entry.strip_index_suffix_from_url! if @strip_index_html
|
40
|
+
entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)
|
41
|
+
|
42
|
+
index << {
|
43
|
+
:title => entry.title,
|
44
|
+
:url => entry.url,
|
45
|
+
:date => entry.date,
|
46
|
+
:categories => entry.categories,
|
47
|
+
:body => entry.body
|
48
|
+
}
|
49
|
+
|
50
|
+
puts 'Indexed ' << "#{entry.title} (#{entry.url})"
|
51
|
+
end
|
52
|
+
|
53
|
+
json = JSON.generate({:entries => index})
|
54
|
+
|
55
|
+
# Create destination directory if it doesn't exist yet. Otherwise, we cannot write our file there.
|
56
|
+
Dir::mkdir(site.dest) unless File.directory?(site.dest)
|
57
|
+
|
58
|
+
# File I/O: create search.json file and write out pretty-printed JSON
|
59
|
+
filename = 'search.json'
|
60
|
+
|
61
|
+
File.open(File.join(site.dest, filename), "w") do |file|
|
62
|
+
file.write(json)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Keep the search.json file from being cleaned by Jekyll
|
66
|
+
site.static_files << SearchIndexFile.new(site, site.dest, "/", filename)
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
# load the stopwords file
|
72
|
+
def stopwords
|
73
|
+
@stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
|
74
|
+
end
|
75
|
+
|
76
|
+
def pages_to_index(site)
|
77
|
+
items = []
|
78
|
+
|
79
|
+
# deep copy pages
|
80
|
+
site.pages.each {|page| items << page.dup }
|
81
|
+
site.posts.each {|post| items << post.dup }
|
82
|
+
|
83
|
+
# only process files that will be converted to .html and only non excluded files
|
84
|
+
items.select! {|i| i.output_ext == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
|
85
|
+
items.reject! {|i| i.data['exclude_from_search'] }
|
86
|
+
|
87
|
+
items
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module LunrJsSearch
|
5
|
+
class PageRenderer
|
6
|
+
def initialize(site)
|
7
|
+
@site = site
|
8
|
+
end
|
9
|
+
|
10
|
+
# render the item, parse the output and get all text inside <p> elements
|
11
|
+
def render(item)
|
12
|
+
item.render({}, @site.site_payload)
|
13
|
+
doc = Nokogiri::HTML(item.output)
|
14
|
+
paragraphs = doc.search('//text()').map {|t| t.content }
|
15
|
+
paragraphs = paragraphs.join(" ").gsub("\r", " ").gsub("\n", " ").gsub("\t", " ").gsub(/\s+/, " ")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module LunrJsSearch
|
5
|
+
class SearchEntry
|
6
|
+
def self.create(page_or_post, renderer)
|
7
|
+
return create_from_post(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Post)
|
8
|
+
return create_from_page(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Page)
|
9
|
+
raise 'Not supported'
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.create_from_page(page, renderer)
|
13
|
+
title, url = extract_title_and_url(page)
|
14
|
+
body = renderer.render(page)
|
15
|
+
date = nil
|
16
|
+
categories = []
|
17
|
+
|
18
|
+
SearchEntry.new(title, url, date, categories, body)
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.create_from_post(post, renderer)
|
22
|
+
title, url = extract_title_and_url(post)
|
23
|
+
body = renderer.render(post)
|
24
|
+
date = post.date
|
25
|
+
categories = post.categories
|
26
|
+
|
27
|
+
SearchEntry.new(title, url, date, categories, body)
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.extract_title_and_url(item)
|
31
|
+
data = item.to_liquid
|
32
|
+
[ data['title'], data['url'] ]
|
33
|
+
end
|
34
|
+
|
35
|
+
attr_reader :title, :url, :date, :categories, :body
|
36
|
+
|
37
|
+
def initialize(title, url, date, categories, body)
|
38
|
+
@title, @url, @date, @categories, @body = title, url, date, categories, body
|
39
|
+
end
|
40
|
+
|
41
|
+
def strip_index_suffix_from_url!
|
42
|
+
@url.gsub!(/index\.html$/, '')
|
43
|
+
end
|
44
|
+
|
45
|
+
# remove anything that is in the stop words list from the text to be indexed
|
46
|
+
def strip_stopwords!(stopwords, min_length)
|
47
|
+
@body = @body.split.delete_if() do |x|
|
48
|
+
t = x.downcase.gsub(/[^a-z]/, '')
|
49
|
+
t.length < min_length || stopwords.include?(t)
|
50
|
+
end.join(' ')
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jekyll-lunr-js-search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ben Smith
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-08-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: json
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.8'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: uglifier
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.5'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '2.5'
|
69
|
+
description: Use lunr.js to provide simple full-text search, using JavaScript in your
|
70
|
+
browser, for your Jekyll static website.
|
71
|
+
email: ben@10consulting.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- lib/jekyll-lunr-js-search.rb
|
77
|
+
- lib/jekyll_lunr_js_search/indexer.rb
|
78
|
+
- lib/jekyll_lunr_js_search/page_renderer.rb
|
79
|
+
- lib/jekyll_lunr_js_search/search_entry.rb
|
80
|
+
- lib/jekyll_lunr_js_search/search_index_file.rb
|
81
|
+
- lib/jekyll_lunr_js_search/version.rb
|
82
|
+
homepage: https://github.com/slashdotdash/jekyll-lunr-js-search
|
83
|
+
licenses:
|
84
|
+
- MIT
|
85
|
+
metadata: {}
|
86
|
+
post_install_message:
|
87
|
+
rdoc_options: []
|
88
|
+
require_paths:
|
89
|
+
- lib
|
90
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - ! '>='
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: '0'
|
95
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ! '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
requirements: []
|
101
|
+
rubyforge_project:
|
102
|
+
rubygems_version: 2.4.1
|
103
|
+
signing_key:
|
104
|
+
specification_version: 4
|
105
|
+
summary: Jekyll + lunr.js = static websites with powerful full-text search using JavaScript
|
106
|
+
test_files: []
|