jekyll-lunr-js-search-plusplus 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/jekyll-lunr-js-search.rb +5 -0
- data/lib/jekyll_lunr_js_search/indexer.rb +121 -0
- data/lib/jekyll_lunr_js_search/page_renderer.rb +26 -0
- data/lib/jekyll_lunr_js_search/search_entry.rb +62 -0
- data/lib/jekyll_lunr_js_search/search_index_file.rb +10 -0
- data/lib/jekyll_lunr_js_search/version.rb +5 -0
- metadata +92 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 94cdd3d615f27ee7c6efd7e3d05fdd58b5ca5e95
|
4
|
+
data.tar.gz: 3ec0febef9b0a1eb932224fe5d6ebbc2592ccc08
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b91f860335f4311ce61351e875386afbb40a58f7862c95d6e01f7c82ed00939a64bb34afc3463aac1db0f4a9069fafe6cfa4025edc5648f8b33bbe3069cbbe2b
|
7
|
+
data.tar.gz: 5e3ce2c4be224fb9ff8032753e15f79762b8c03bf7c02f39f3ba82bd8701df768832fdfcb0984d9e1544f0d3fab7eeb8a3e9e5f25a452fbba199be3135855f44
|
@@ -0,0 +1,121 @@
|
|
1
|
+
#!/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'json'
|
5
|
+
require 'date'
|
6
|
+
|
7
|
+
module Jekyll
|
8
|
+
module LunrJsSearch
|
9
|
+
class Indexer < Jekyll::Generator
|
10
|
+
def initialize(config = {})
|
11
|
+
super(config)
|
12
|
+
|
13
|
+
lunr_config = {
|
14
|
+
'excludes' => [],
|
15
|
+
'strip_index_html' => false,
|
16
|
+
'min_length' => 3,
|
17
|
+
'stopwords' => 'stopwords.txt'
|
18
|
+
}.merge!(config['lunr_search'] || {})
|
19
|
+
|
20
|
+
@excludes = lunr_config['excludes']
|
21
|
+
|
22
|
+
# if web host supports index.html as default doc, then optionally exclude it from the url
|
23
|
+
@strip_index_html = lunr_config['strip_index_html']
|
24
|
+
|
25
|
+
# stop word exclusion configuration
|
26
|
+
@min_length = lunr_config['min_length']
|
27
|
+
@stopwords_file = lunr_config['stopwords']
|
28
|
+
|
29
|
+
@dev_mode = lunr_config['dev_mode']
|
30
|
+
|
31
|
+
# File I/O: create search.json file and write out pretty-printed JSON
|
32
|
+
@filename = 'search.json'
|
33
|
+
|
34
|
+
@generation_strftime = "%m-%d-%y"
|
35
|
+
end
|
36
|
+
|
37
|
+
# Index all pages except pages matching any value in config['lunr_excludes'] or with date['exclude_from_search']
|
38
|
+
# The main content from each page is extracted and saved to disk as json
|
39
|
+
def generate(site)
|
40
|
+
if @dev_mode && File.exist?(search_json_location)
|
41
|
+
search_json = JSON.parse(File.open(search_json_location).read)
|
42
|
+
if search_json["generation_time"] && Date.strptime(search_json["generation_time"], @generation_strftime).day == Time.now.day
|
43
|
+
puts "\nNot running indexer in dev mode since search.json exists within the last day...\n"
|
44
|
+
return
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
puts "\nRunning the search indexer...\n"
|
49
|
+
|
50
|
+
# gather pages and posts
|
51
|
+
items = pages_to_index(site)
|
52
|
+
|
53
|
+
site.collections.each do |name, collection|
|
54
|
+
collection.docs.each{ |document| items << document }
|
55
|
+
end
|
56
|
+
|
57
|
+
content_renderer = PageRenderer.new(site)
|
58
|
+
index = []
|
59
|
+
|
60
|
+
items.each do |item|
|
61
|
+
entry = SearchEntry.create(item, content_renderer)
|
62
|
+
|
63
|
+
next if entry.nil?
|
64
|
+
|
65
|
+
entry.strip_index_suffix_from_url! if @strip_index_html
|
66
|
+
entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)
|
67
|
+
|
68
|
+
index << {
|
69
|
+
:title => entry.title,
|
70
|
+
:url => entry.url,
|
71
|
+
:date => entry.date,
|
72
|
+
:categories => entry.categories,
|
73
|
+
:collection => entry.collection,
|
74
|
+
:class => entry.class,
|
75
|
+
:body => entry.body,
|
76
|
+
:excerpt => entry.body[0..140] + "…"
|
77
|
+
}
|
78
|
+
|
79
|
+
# puts 'Indexed ' << "#{entry.title} (#{entry.collection} - #{entry.url})"
|
80
|
+
end
|
81
|
+
|
82
|
+
json = {:generation_time => Time.now.strftime(@generation_strftime), :entries => index}
|
83
|
+
|
84
|
+
# Create destination directory if it doesn't exist yet. Otherwise, we cannot write our file there.
|
85
|
+
Dir::mkdir(site.dest) unless File.directory?(site.dest)
|
86
|
+
|
87
|
+
File.open(search_json_location, "w") do |file|
|
88
|
+
file.write(JSON.pretty_generate(json))
|
89
|
+
end
|
90
|
+
|
91
|
+
# Keep the search.json file from being cleaned by Jekyll
|
92
|
+
site.static_files << SearchIndexFile.new(site, site.dest, "/", @filename)
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
|
97
|
+
def search_json_location
|
98
|
+
File.join("search", @filename)
|
99
|
+
end
|
100
|
+
|
101
|
+
# load the stopwords file
|
102
|
+
def stopwords
|
103
|
+
@stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
|
104
|
+
end
|
105
|
+
|
106
|
+
def pages_to_index(site)
|
107
|
+
items = []
|
108
|
+
|
109
|
+
# deep copy pages
|
110
|
+
site.pages.each {|page| items << page.dup }
|
111
|
+
site.posts.each {|post| items << post.dup }
|
112
|
+
|
113
|
+
# only process files that will be converted to .html and only non excluded files
|
114
|
+
items.select! {|i| i.output_ext == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
|
115
|
+
items.reject! {|i| i.data['exclude_from_search'] }
|
116
|
+
|
117
|
+
items
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module LunrJsSearch
|
5
|
+
class PageRenderer
|
6
|
+
def initialize(site)
|
7
|
+
@site = site
|
8
|
+
end
|
9
|
+
|
10
|
+
# render the item, parse the output and get all text inside of it
|
11
|
+
def render(item)
|
12
|
+
if item.is_a?(Jekyll::Document)
|
13
|
+
item.output = Jekyll::Renderer.new(@site, item).run
|
14
|
+
else
|
15
|
+
item.render({}, @site.site_payload)
|
16
|
+
end
|
17
|
+
doc = Nokogiri::HTML(item.output)
|
18
|
+
|
19
|
+
paragraphs = doc.search('//div[contains(concat(" ", normalize-space(@class), " "), " article-body ")]').map {|t| t.content }
|
20
|
+
paragraphs = paragraphs.join(" ").gsub("\r", " ").gsub("\n", " ").gsub("\t", " ").gsub(/\s+/, " ")
|
21
|
+
paragraphs.strip
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module LunrJsSearch
|
5
|
+
class SearchEntry
|
6
|
+
def self.create(page_or_post, renderer)
|
7
|
+
return create_from_post(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Post)
|
8
|
+
return create_from_page(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Page)
|
9
|
+
return create_from_document(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Document)
|
10
|
+
raise 'Item type not supported'
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.create_from_document(document, renderer)
|
14
|
+
return if document.data["exclude_from_search"] || document.data["redirect_to"]
|
15
|
+
body = renderer.render(document)
|
16
|
+
data = document.to_liquid
|
17
|
+
SearchEntry.new(data['title'], data['url'], Time.now, data['category'], body)
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.create_from_page(page, renderer)
|
21
|
+
title, url = extract_title_and_url(page)
|
22
|
+
body = renderer.render(page)
|
23
|
+
date = nil
|
24
|
+
categories = []
|
25
|
+
|
26
|
+
SearchEntry.new(title, url, date, categories, body, nil)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.create_from_post(post, renderer)
|
30
|
+
title, url = extract_title_and_url(post)
|
31
|
+
body = renderer.render(post)
|
32
|
+
date = post.date
|
33
|
+
categories = post.categories
|
34
|
+
|
35
|
+
SearchEntry.new(title, url, date, categories, body, nil)
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.extract_title_and_url(item)
|
39
|
+
data = item.to_liquid
|
40
|
+
[ data['title'], data['url'] ]
|
41
|
+
end
|
42
|
+
|
43
|
+
attr_reader :title, :url, :date, :categories, :body, :collection
|
44
|
+
|
45
|
+
def initialize(title, url, date, categories, body)
|
46
|
+
@title, @url, @date, @categories, @body, @collection = title, url, date, categories, body, collection
|
47
|
+
end
|
48
|
+
|
49
|
+
def strip_index_suffix_from_url!
|
50
|
+
@url.gsub!(/index\.html$/, '')
|
51
|
+
end
|
52
|
+
|
53
|
+
# remove anything that is in the stop words list from the text to be indexed
|
54
|
+
def strip_stopwords!(stopwords, min_length)
|
55
|
+
@body = @body.split.delete_if() do |x|
|
56
|
+
t = x.downcase.gsub(/[^a-z]/, '')
|
57
|
+
t.length < min_length || stopwords.include?(t)
|
58
|
+
end.join(' ')
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jekyll-lunr-js-search-plusplus
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Garen J. Torikian
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-09-29 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: json
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.8'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.3'
|
55
|
+
description: Use lunr.js to provide simple full-text search, using JavaScript in your
|
56
|
+
browser, for your Jekyll static website.
|
57
|
+
email: gjtorikian@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- lib/jekyll-lunr-js-search.rb
|
63
|
+
- lib/jekyll_lunr_js_search/indexer.rb
|
64
|
+
- lib/jekyll_lunr_js_search/page_renderer.rb
|
65
|
+
- lib/jekyll_lunr_js_search/search_entry.rb
|
66
|
+
- lib/jekyll_lunr_js_search/search_index_file.rb
|
67
|
+
- lib/jekyll_lunr_js_search/version.rb
|
68
|
+
homepage: https://github.com/slashdotdash/jekyll-lunr-js-search
|
69
|
+
licenses:
|
70
|
+
- MIT
|
71
|
+
metadata: {}
|
72
|
+
post_install_message:
|
73
|
+
rdoc_options: []
|
74
|
+
require_paths:
|
75
|
+
- lib
|
76
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: '0'
|
81
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
requirements: []
|
87
|
+
rubyforge_project:
|
88
|
+
rubygems_version: 2.2.2
|
89
|
+
signing_key:
|
90
|
+
specification_version: 4
|
91
|
+
summary: Jekyll + lunr.js = static websites with powerful full-text search using JavaScript
|
92
|
+
test_files: []
|