jekyll-lunr-js-search 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MTU1YjcxZmY2NjBlYzY2M2RlM2RiYjQ4ZGYwNjI1ZmZmYzM4ODRjMA==
5
+ data.tar.gz: !binary |-
6
+ M2Y3ZWI1NTQzMWRjZThhMjI2MjI4YjA0MWE0MzgzNDY4ZWU0NjFmZA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YWQ2NGExZTAyOWIxZmQ4MzU3NTNhZTc2ZDI2NGFhNzNiMzExZTkzMDg5MDY0
10
+ MDdhNGFhYmE1ZjlmMDdjMWI0ZDk2ZGI1NjAxMmQ2MzAzNTYzMTFlNjRjZTBk
11
+ MDEyOGVmMTRlMDliZGRmMjBmNWYwMDJjZGI4ZWE1NDAzNmE1YWY=
12
+ data.tar.gz: !binary |-
13
+ ZjcxNjEwY2MzODhiN2I4ODdjNmQyODg3ZDdlMDllNTRmNTU0ODAzMjJhMTc3
14
+ YjkwNjYyY2Q4NmRjOWFlNzliNjhkYjk3NzM0MTdhNjA4NzUxNTA1ZTU4Yzk2
15
+ ZDQ2OWM2NTgzODRhMzk1MjgxNWNmMTM0MWIwMWFkY2IxMGFhMDc=
@@ -0,0 +1,5 @@
1
+ require 'jekyll_lunr_js_search/version'
2
+ require 'jekyll_lunr_js_search/indexer'
3
+ require 'jekyll_lunr_js_search/page_renderer'
4
+ require 'jekyll_lunr_js_search/search_entry'
5
+ require 'jekyll_lunr_js_search/search_index_file'
@@ -0,0 +1,91 @@
1
+ require 'json'
2
+
3
+ module Jekyll
4
+ module LunrJsSearch
5
+ class Indexer < Jekyll::Generator
6
+ def initialize(config = {})
7
+ super(config)
8
+
9
+ lunr_config = {
10
+ 'excludes' => [],
11
+ 'strip_index_html' => false,
12
+ 'min_length' => 3,
13
+ 'stopwords' => 'stopwords.txt'
14
+ }.merge!(config['lunr_search'] || {})
15
+
16
+ @excludes = lunr_config['excludes']
17
+
18
+ # if web host supports index.html as default doc, then optionally exclude it from the url
19
+ @strip_index_html = lunr_config['strip_index_html']
20
+
21
+ # stop word exclusion configuration
22
+ @min_length = lunr_config['min_length']
23
+ @stopwords_file = lunr_config['stopwords']
24
+ end
25
+
26
+ # Index all pages except pages matching any value in config['lunr_excludes'] or with date['exclude_from_search']
27
+ # The main content from each page is extracted and saved to disk as json
28
+ def generate(site)
29
+ puts 'Running the search indexer...'
30
+
31
+ # gather pages and posts
32
+ items = pages_to_index(site)
33
+ content_renderer = PageRenderer.new(site)
34
+ index = []
35
+
36
+ items.each do |item|
37
+ entry = SearchEntry.create(item, content_renderer)
38
+
39
+ entry.strip_index_suffix_from_url! if @strip_index_html
40
+ entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)
41
+
42
+ index << {
43
+ :title => entry.title,
44
+ :url => entry.url,
45
+ :date => entry.date,
46
+ :categories => entry.categories,
47
+ :body => entry.body
48
+ }
49
+
50
+ puts 'Indexed ' << "#{entry.title} (#{entry.url})"
51
+ end
52
+
53
+ json = JSON.generate({:entries => index})
54
+
55
+ # Create destination directory if it doesn't exist yet. Otherwise, we cannot write our file there.
56
+ Dir::mkdir(site.dest) unless File.directory?(site.dest)
57
+
58
+ # File I/O: create search.json file and write out pretty-printed JSON
59
+ filename = 'search.json'
60
+
61
+ File.open(File.join(site.dest, filename), "w") do |file|
62
+ file.write(json)
63
+ end
64
+
65
+ # Keep the search.json file from being cleaned by Jekyll
66
+ site.static_files << SearchIndexFile.new(site, site.dest, "/", filename)
67
+ end
68
+
69
+ private
70
+
71
+ # load the stopwords file
72
+ def stopwords
73
+ @stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
74
+ end
75
+
76
+ def pages_to_index(site)
77
+ items = []
78
+
79
+ # deep copy pages
80
+ site.pages.each {|page| items << page.dup }
81
+ site.posts.each {|post| items << post.dup }
82
+
83
+ # only process files that will be converted to .html and only non excluded files
84
+ items.select! {|i| i.output_ext == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
85
+ items.reject! {|i| i.data['exclude_from_search'] }
86
+
87
+ items
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,19 @@
1
+ require 'nokogiri'
2
+
3
+ module Jekyll
4
+ module LunrJsSearch
5
+ class PageRenderer
6
+ def initialize(site)
7
+ @site = site
8
+ end
9
+
10
+ # render the item, parse the output and get all text inside <p> elements
11
+ def render(item)
12
+ item.render({}, @site.site_payload)
13
+ doc = Nokogiri::HTML(item.output)
14
+ paragraphs = doc.search('//text()').map {|t| t.content }
15
+ paragraphs = paragraphs.join(" ").gsub("\r", " ").gsub("\n", " ").gsub("\t", " ").gsub(/\s+/, " ")
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,54 @@
1
+ require 'nokogiri'
2
+
3
+ module Jekyll
4
+ module LunrJsSearch
5
+ class SearchEntry
6
+ def self.create(page_or_post, renderer)
7
+ return create_from_post(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Post)
8
+ return create_from_page(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Page)
9
+ raise 'Not supported'
10
+ end
11
+
12
+ def self.create_from_page(page, renderer)
13
+ title, url = extract_title_and_url(page)
14
+ body = renderer.render(page)
15
+ date = nil
16
+ categories = []
17
+
18
+ SearchEntry.new(title, url, date, categories, body)
19
+ end
20
+
21
+ def self.create_from_post(post, renderer)
22
+ title, url = extract_title_and_url(post)
23
+ body = renderer.render(post)
24
+ date = post.date
25
+ categories = post.categories
26
+
27
+ SearchEntry.new(title, url, date, categories, body)
28
+ end
29
+
30
+ def self.extract_title_and_url(item)
31
+ data = item.to_liquid
32
+ [ data['title'], data['url'] ]
33
+ end
34
+
35
+ attr_reader :title, :url, :date, :categories, :body
36
+
37
+ def initialize(title, url, date, categories, body)
38
+ @title, @url, @date, @categories, @body = title, url, date, categories, body
39
+ end
40
+
41
+ def strip_index_suffix_from_url!
42
+ @url.gsub!(/index\.html$/, '')
43
+ end
44
+
45
+ # remove anything that is in the stop words list from the text to be indexed
46
+ def strip_stopwords!(stopwords, min_length)
47
+ @body = @body.split.delete_if() do |x|
48
+ t = x.downcase.gsub(/[^a-z]/, '')
49
+ t.length < min_length || stopwords.include?(t)
50
+ end.join(' ')
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,10 @@
1
+ module Jekyll
2
+ module LunrJsSearch
3
+ class SearchIndexFile < Jekyll::StaticFile
4
+ # Override write as the search.json index file has already been created
5
+ def write(dest)
6
+ true
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,5 @@
1
+ module Jekyll
2
+ module LunrJsSearch
3
+ VERSION = "0.1.1"
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jekyll-lunr-js-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Ben Smith
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.8'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '10.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '10.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: uglifier
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2.5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '2.5'
69
+ description: Use lunr.js to provide simple full-text search, using JavaScript in your
70
+ browser, for your Jekyll static website.
71
+ email: ben@10consulting.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - lib/jekyll-lunr-js-search.rb
77
+ - lib/jekyll_lunr_js_search/indexer.rb
78
+ - lib/jekyll_lunr_js_search/page_renderer.rb
79
+ - lib/jekyll_lunr_js_search/search_entry.rb
80
+ - lib/jekyll_lunr_js_search/search_index_file.rb
81
+ - lib/jekyll_lunr_js_search/version.rb
82
+ homepage: https://github.com/slashdotdash/jekyll-lunr-js-search
83
+ licenses:
84
+ - MIT
85
+ metadata: {}
86
+ post_install_message:
87
+ rdoc_options: []
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ! '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ! '>='
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ requirements: []
101
+ rubyforge_project:
102
+ rubygems_version: 2.4.1
103
+ signing_key:
104
+ specification_version: 4
105
+ summary: Jekyll + lunr.js = static websites with powerful full-text search using JavaScript
106
+ test_files: []