jekyll-lunr-js-search 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MTU1YjcxZmY2NjBlYzY2M2RlM2RiYjQ4ZGYwNjI1ZmZmYzM4ODRjMA==
5
+ data.tar.gz: !binary |-
6
+ M2Y3ZWI1NTQzMWRjZThhMjI2MjI4YjA0MWE0MzgzNDY4ZWU0NjFmZA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YWQ2NGExZTAyOWIxZmQ4MzU3NTNhZTc2ZDI2NGFhNzNiMzExZTkzMDg5MDY0
10
+ MDdhNGFhYmE1ZjlmMDdjMWI0ZDk2ZGI1NjAxMmQ2MzAzNTYzMTFlNjRjZTBk
11
+ MDEyOGVmMTRlMDliZGRmMjBmNWYwMDJjZGI4ZWE1NDAzNmE1YWY=
12
+ data.tar.gz: !binary |-
13
+ ZjcxNjEwY2MzODhiN2I4ODdjNmQyODg3ZDdlMDllNTRmNTU0ODAzMjJhMTc3
14
+ YjkwNjYyY2Q4NmRjOWFlNzliNjhkYjk3NzM0MTdhNjA4NzUxNTA1ZTU4Yzk2
15
+ ZDQ2OWM2NTgzODRhMzk1MjgxNWNmMTM0MWIwMWFkY2IxMGFhMDc=
@@ -0,0 +1,5 @@
1
+ require 'jekyll_lunr_js_search/version'
2
+ require 'jekyll_lunr_js_search/indexer'
3
+ require 'jekyll_lunr_js_search/page_renderer'
4
+ require 'jekyll_lunr_js_search/search_entry'
5
+ require 'jekyll_lunr_js_search/search_index_file'
@@ -0,0 +1,91 @@
1
+ require 'json'
2
+
3
+ module Jekyll
4
+ module LunrJsSearch
5
+ class Indexer < Jekyll::Generator
6
+ def initialize(config = {})
7
+ super(config)
8
+
9
+ lunr_config = {
10
+ 'excludes' => [],
11
+ 'strip_index_html' => false,
12
+ 'min_length' => 3,
13
+ 'stopwords' => 'stopwords.txt'
14
+ }.merge!(config['lunr_search'] || {})
15
+
16
+ @excludes = lunr_config['excludes']
17
+
18
+ # if web host supports index.html as default doc, then optionally exclude it from the url
19
+ @strip_index_html = lunr_config['strip_index_html']
20
+
21
+ # stop word exclusion configuration
22
+ @min_length = lunr_config['min_length']
23
+ @stopwords_file = lunr_config['stopwords']
24
+ end
25
+
26
+ # Index all pages except pages matching any value in config['lunr_excludes'] or with date['exclude_from_search']
27
+ # The main content from each page is extracted and saved to disk as json
28
+ def generate(site)
29
+ puts 'Running the search indexer...'
30
+
31
+ # gather pages and posts
32
+ items = pages_to_index(site)
33
+ content_renderer = PageRenderer.new(site)
34
+ index = []
35
+
36
+ items.each do |item|
37
+ entry = SearchEntry.create(item, content_renderer)
38
+
39
+ entry.strip_index_suffix_from_url! if @strip_index_html
40
+ entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)
41
+
42
+ index << {
43
+ :title => entry.title,
44
+ :url => entry.url,
45
+ :date => entry.date,
46
+ :categories => entry.categories,
47
+ :body => entry.body
48
+ }
49
+
50
+ puts 'Indexed ' << "#{entry.title} (#{entry.url})"
51
+ end
52
+
53
+ json = JSON.generate({:entries => index})
54
+
55
+ # Create destination directory if it doesn't exist yet. Otherwise, we cannot write our file there.
56
+ Dir::mkdir(site.dest) unless File.directory?(site.dest)
57
+
58
+ # File I/O: create search.json file and write out pretty-printed JSON
59
+ filename = 'search.json'
60
+
61
+ File.open(File.join(site.dest, filename), "w") do |file|
62
+ file.write(json)
63
+ end
64
+
65
+ # Keep the search.json file from being cleaned by Jekyll
66
+ site.static_files << SearchIndexFile.new(site, site.dest, "/", filename)
67
+ end
68
+
69
+ private
70
+
71
+ # load the stopwords file
72
+ def stopwords
73
+ @stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
74
+ end
75
+
76
+ def pages_to_index(site)
77
+ items = []
78
+
79
+ # deep copy pages
80
+ site.pages.each {|page| items << page.dup }
81
+ site.posts.each {|post| items << post.dup }
82
+
83
+ # only process files that will be converted to .html and only non excluded files
84
+ items.select! {|i| i.output_ext == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
85
+ items.reject! {|i| i.data['exclude_from_search'] }
86
+
87
+ items
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,19 @@
1
+ require 'nokogiri'
2
+
3
+ module Jekyll
4
+ module LunrJsSearch
5
+ class PageRenderer
6
+ def initialize(site)
7
+ @site = site
8
+ end
9
+
10
+ # render the item, parse the output and get all text inside <p> elements
11
+ def render(item)
12
+ item.render({}, @site.site_payload)
13
+ doc = Nokogiri::HTML(item.output)
14
+ paragraphs = doc.search('//text()').map {|t| t.content }
15
+ paragraphs = paragraphs.join(" ").gsub("\r", " ").gsub("\n", " ").gsub("\t", " ").gsub(/\s+/, " ")
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,54 @@
1
+ require 'nokogiri'
2
+
3
+ module Jekyll
4
+ module LunrJsSearch
5
+ class SearchEntry
6
+ def self.create(page_or_post, renderer)
7
+ return create_from_post(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Post)
8
+ return create_from_page(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Page)
9
+ raise 'Not supported'
10
+ end
11
+
12
+ def self.create_from_page(page, renderer)
13
+ title, url = extract_title_and_url(page)
14
+ body = renderer.render(page)
15
+ date = nil
16
+ categories = []
17
+
18
+ SearchEntry.new(title, url, date, categories, body)
19
+ end
20
+
21
+ def self.create_from_post(post, renderer)
22
+ title, url = extract_title_and_url(post)
23
+ body = renderer.render(post)
24
+ date = post.date
25
+ categories = post.categories
26
+
27
+ SearchEntry.new(title, url, date, categories, body)
28
+ end
29
+
30
+ def self.extract_title_and_url(item)
31
+ data = item.to_liquid
32
+ [ data['title'], data['url'] ]
33
+ end
34
+
35
+ attr_reader :title, :url, :date, :categories, :body
36
+
37
+ def initialize(title, url, date, categories, body)
38
+ @title, @url, @date, @categories, @body = title, url, date, categories, body
39
+ end
40
+
41
+ def strip_index_suffix_from_url!
42
+ @url.gsub!(/index\.html$/, '')
43
+ end
44
+
45
+ # remove anything that is in the stop words list from the text to be indexed
46
+ def strip_stopwords!(stopwords, min_length)
47
+ @body = @body.split.delete_if() do |x|
48
+ t = x.downcase.gsub(/[^a-z]/, '')
49
+ t.length < min_length || stopwords.include?(t)
50
+ end.join(' ')
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,10 @@
1
+ module Jekyll
2
+ module LunrJsSearch
3
+ class SearchIndexFile < Jekyll::StaticFile
4
+ # Override write as the search.json index file has already been created
5
+ def write(dest)
6
+ true
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,5 @@
1
+ module Jekyll
2
+ module LunrJsSearch
3
+ VERSION = "0.1.1"
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jekyll-lunr-js-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Ben Smith
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.8'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '10.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '10.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: uglifier
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2.5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '2.5'
69
+ description: Use lunr.js to provide simple full-text search, using JavaScript in your
70
+ browser, for your Jekyll static website.
71
+ email: ben@10consulting.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - lib/jekyll-lunr-js-search.rb
77
+ - lib/jekyll_lunr_js_search/indexer.rb
78
+ - lib/jekyll_lunr_js_search/page_renderer.rb
79
+ - lib/jekyll_lunr_js_search/search_entry.rb
80
+ - lib/jekyll_lunr_js_search/search_index_file.rb
81
+ - lib/jekyll_lunr_js_search/version.rb
82
+ homepage: https://github.com/slashdotdash/jekyll-lunr-js-search
83
+ licenses:
84
+ - MIT
85
+ metadata: {}
86
+ post_install_message:
87
+ rdoc_options: []
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ! '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ! '>='
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ requirements: []
101
+ rubyforge_project:
102
+ rubygems_version: 2.4.1
103
+ signing_key:
104
+ specification_version: 4
105
+ summary: Jekyll + lunr.js = static websites with powerful full-text search using JavaScript
106
+ test_files: []