jekyll-lunr-js-search-plusplus 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/jekyll-lunr-js-search.rb +5 -0
- data/lib/jekyll_lunr_js_search/indexer.rb +121 -0
- data/lib/jekyll_lunr_js_search/page_renderer.rb +26 -0
- data/lib/jekyll_lunr_js_search/search_entry.rb +62 -0
- data/lib/jekyll_lunr_js_search/search_index_file.rb +10 -0
- data/lib/jekyll_lunr_js_search/version.rb +5 -0
- metadata +92 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 94cdd3d615f27ee7c6efd7e3d05fdd58b5ca5e95
|
4
|
+
data.tar.gz: 3ec0febef9b0a1eb932224fe5d6ebbc2592ccc08
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b91f860335f4311ce61351e875386afbb40a58f7862c95d6e01f7c82ed00939a64bb34afc3463aac1db0f4a9069fafe6cfa4025edc5648f8b33bbe3069cbbe2b
|
7
|
+
data.tar.gz: 5e3ce2c4be224fb9ff8032753e15f79762b8c03bf7c02f39f3ba82bd8701df768832fdfcb0984d9e1544f0d3fab7eeb8a3e9e5f25a452fbba199be3135855f44
|
@@ -0,0 +1,121 @@
|
|
1
|
+
#!/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'json'
|
5
|
+
require 'date'
|
6
|
+
|
7
|
+
module Jekyll
|
8
|
+
module LunrJsSearch
|
9
|
+
class Indexer < Jekyll::Generator
|
10
|
+
def initialize(config = {})
|
11
|
+
super(config)
|
12
|
+
|
13
|
+
lunr_config = {
|
14
|
+
'excludes' => [],
|
15
|
+
'strip_index_html' => false,
|
16
|
+
'min_length' => 3,
|
17
|
+
'stopwords' => 'stopwords.txt'
|
18
|
+
}.merge!(config['lunr_search'] || {})
|
19
|
+
|
20
|
+
@excludes = lunr_config['excludes']
|
21
|
+
|
22
|
+
# if web host supports index.html as default doc, then optionally exclude it from the url
|
23
|
+
@strip_index_html = lunr_config['strip_index_html']
|
24
|
+
|
25
|
+
# stop word exclusion configuration
|
26
|
+
@min_length = lunr_config['min_length']
|
27
|
+
@stopwords_file = lunr_config['stopwords']
|
28
|
+
|
29
|
+
@dev_mode = lunr_config['dev_mode']
|
30
|
+
|
31
|
+
# File I/O: create search.json file and write out pretty-printed JSON
|
32
|
+
@filename = 'search.json'
|
33
|
+
|
34
|
+
@generation_strftime = "%m-%d-%y"
|
35
|
+
end
|
36
|
+
|
37
|
+
# Index all pages except pages matching any value in config['lunr_excludes'] or with date['exclude_from_search']
|
38
|
+
# The main content from each page is extracted and saved to disk as json
|
39
|
+
def generate(site)
|
40
|
+
if @dev_mode && File.exist?(search_json_location)
|
41
|
+
search_json = JSON.parse(File.open(search_json_location).read)
|
42
|
+
if search_json["generation_time"] && Date.strptime(search_json["generation_time"], @generation_strftime).day == Time.now.day
|
43
|
+
puts "\nNot running indexer in dev mode since search.json exists within the last day...\n"
|
44
|
+
return
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
puts "\nRunning the search indexer...\n"
|
49
|
+
|
50
|
+
# gather pages and posts
|
51
|
+
items = pages_to_index(site)
|
52
|
+
|
53
|
+
site.collections.each do |name, collection|
|
54
|
+
collection.docs.each{ |document| items << document }
|
55
|
+
end
|
56
|
+
|
57
|
+
content_renderer = PageRenderer.new(site)
|
58
|
+
index = []
|
59
|
+
|
60
|
+
items.each do |item|
|
61
|
+
entry = SearchEntry.create(item, content_renderer)
|
62
|
+
|
63
|
+
next if entry.nil?
|
64
|
+
|
65
|
+
entry.strip_index_suffix_from_url! if @strip_index_html
|
66
|
+
entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)
|
67
|
+
|
68
|
+
index << {
|
69
|
+
:title => entry.title,
|
70
|
+
:url => entry.url,
|
71
|
+
:date => entry.date,
|
72
|
+
:categories => entry.categories,
|
73
|
+
:collection => entry.collection,
|
74
|
+
:class => entry.class,
|
75
|
+
:body => entry.body,
|
76
|
+
:excerpt => entry.body[0..140] + "…"
|
77
|
+
}
|
78
|
+
|
79
|
+
# puts 'Indexed ' << "#{entry.title} (#{entry.collection} - #{entry.url})"
|
80
|
+
end
|
81
|
+
|
82
|
+
json = {:generation_time => Time.now.strftime(@generation_strftime), :entries => index}
|
83
|
+
|
84
|
+
# Create destination directory if it doesn't exist yet. Otherwise, we cannot write our file there.
|
85
|
+
Dir::mkdir(site.dest) unless File.directory?(site.dest)
|
86
|
+
|
87
|
+
File.open(search_json_location, "w") do |file|
|
88
|
+
file.write(JSON.pretty_generate(json))
|
89
|
+
end
|
90
|
+
|
91
|
+
# Keep the search.json file from being cleaned by Jekyll
|
92
|
+
site.static_files << SearchIndexFile.new(site, site.dest, "/", @filename)
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
|
97
|
+
def search_json_location
|
98
|
+
File.join("search", @filename)
|
99
|
+
end
|
100
|
+
|
101
|
+
# load the stopwords file
|
102
|
+
def stopwords
|
103
|
+
@stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
|
104
|
+
end
|
105
|
+
|
106
|
+
def pages_to_index(site)
|
107
|
+
items = []
|
108
|
+
|
109
|
+
# deep copy pages
|
110
|
+
site.pages.each {|page| items << page.dup }
|
111
|
+
site.posts.each {|post| items << post.dup }
|
112
|
+
|
113
|
+
# only process files that will be converted to .html and only non excluded files
|
114
|
+
items.select! {|i| i.output_ext == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
|
115
|
+
items.reject! {|i| i.data['exclude_from_search'] }
|
116
|
+
|
117
|
+
items
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module LunrJsSearch
|
5
|
+
class PageRenderer
|
6
|
+
def initialize(site)
|
7
|
+
@site = site
|
8
|
+
end
|
9
|
+
|
10
|
+
# render the item, parse the output and get all text inside of it
|
11
|
+
def render(item)
|
12
|
+
if item.is_a?(Jekyll::Document)
|
13
|
+
item.output = Jekyll::Renderer.new(@site, item).run
|
14
|
+
else
|
15
|
+
item.render({}, @site.site_payload)
|
16
|
+
end
|
17
|
+
doc = Nokogiri::HTML(item.output)
|
18
|
+
|
19
|
+
paragraphs = doc.search('//div[contains(concat(" ", normalize-space(@class), " "), " article-body ")]').map {|t| t.content }
|
20
|
+
paragraphs = paragraphs.join(" ").gsub("\r", " ").gsub("\n", " ").gsub("\t", " ").gsub(/\s+/, " ")
|
21
|
+
paragraphs.strip
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module LunrJsSearch
|
5
|
+
class SearchEntry
|
6
|
+
def self.create(page_or_post, renderer)
|
7
|
+
return create_from_post(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Post)
|
8
|
+
return create_from_page(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Page)
|
9
|
+
return create_from_document(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Document)
|
10
|
+
raise 'Item type not supported'
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.create_from_document(document, renderer)
|
14
|
+
return if document.data["exclude_from_search"] || document.data["redirect_to"]
|
15
|
+
body = renderer.render(document)
|
16
|
+
data = document.to_liquid
|
17
|
+
SearchEntry.new(data['title'], data['url'], Time.now, data['category'], body)
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.create_from_page(page, renderer)
|
21
|
+
title, url = extract_title_and_url(page)
|
22
|
+
body = renderer.render(page)
|
23
|
+
date = nil
|
24
|
+
categories = []
|
25
|
+
|
26
|
+
SearchEntry.new(title, url, date, categories, body, nil)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.create_from_post(post, renderer)
|
30
|
+
title, url = extract_title_and_url(post)
|
31
|
+
body = renderer.render(post)
|
32
|
+
date = post.date
|
33
|
+
categories = post.categories
|
34
|
+
|
35
|
+
SearchEntry.new(title, url, date, categories, body, nil)
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.extract_title_and_url(item)
|
39
|
+
data = item.to_liquid
|
40
|
+
[ data['title'], data['url'] ]
|
41
|
+
end
|
42
|
+
|
43
|
+
attr_reader :title, :url, :date, :categories, :body, :collection
|
44
|
+
|
45
|
+
def initialize(title, url, date, categories, body)
|
46
|
+
@title, @url, @date, @categories, @body, @collection = title, url, date, categories, body, collection
|
47
|
+
end
|
48
|
+
|
49
|
+
def strip_index_suffix_from_url!
|
50
|
+
@url.gsub!(/index\.html$/, '')
|
51
|
+
end
|
52
|
+
|
53
|
+
# remove anything that is in the stop words list from the text to be indexed
|
54
|
+
def strip_stopwords!(stopwords, min_length)
|
55
|
+
@body = @body.split.delete_if() do |x|
|
56
|
+
t = x.downcase.gsub(/[^a-z]/, '')
|
57
|
+
t.length < min_length || stopwords.include?(t)
|
58
|
+
end.join(' ')
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jekyll-lunr-js-search-plusplus
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Garen J. Torikian
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-09-29 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: json
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.8'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.3'
|
55
|
+
description: Use lunr.js to provide simple full-text search, using JavaScript in your
|
56
|
+
browser, for your Jekyll static website.
|
57
|
+
email: gjtorikian@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- lib/jekyll-lunr-js-search.rb
|
63
|
+
- lib/jekyll_lunr_js_search/indexer.rb
|
64
|
+
- lib/jekyll_lunr_js_search/page_renderer.rb
|
65
|
+
- lib/jekyll_lunr_js_search/search_entry.rb
|
66
|
+
- lib/jekyll_lunr_js_search/search_index_file.rb
|
67
|
+
- lib/jekyll_lunr_js_search/version.rb
|
68
|
+
homepage: https://github.com/slashdotdash/jekyll-lunr-js-search
|
69
|
+
licenses:
|
70
|
+
- MIT
|
71
|
+
metadata: {}
|
72
|
+
post_install_message:
|
73
|
+
rdoc_options: []
|
74
|
+
require_paths:
|
75
|
+
- lib
|
76
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: '0'
|
81
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
requirements: []
|
87
|
+
rubyforge_project:
|
88
|
+
rubygems_version: 2.2.2
|
89
|
+
signing_key:
|
90
|
+
specification_version: 4
|
91
|
+
summary: Jekyll + lunr.js = static websites with powerful full-text search using JavaScript
|
92
|
+
test_files: []
|