jekyll-lunr-js-search-plusplus 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 94cdd3d615f27ee7c6efd7e3d05fdd58b5ca5e95
4
+ data.tar.gz: 3ec0febef9b0a1eb932224fe5d6ebbc2592ccc08
5
+ SHA512:
6
+ metadata.gz: b91f860335f4311ce61351e875386afbb40a58f7862c95d6e01f7c82ed00939a64bb34afc3463aac1db0f4a9069fafe6cfa4025edc5648f8b33bbe3069cbbe2b
7
+ data.tar.gz: 5e3ce2c4be224fb9ff8032753e15f79762b8c03bf7c02f39f3ba82bd8701df768832fdfcb0984d9e1544f0d3fab7eeb8a3e9e5f25a452fbba199be3135855f44
@@ -0,0 +1,5 @@
1
+ require 'jekyll_lunr_js_search/version'
2
+ require 'jekyll_lunr_js_search/indexer'
3
+ require 'jekyll_lunr_js_search/page_renderer'
4
+ require 'jekyll_lunr_js_search/search_entry'
5
+ require 'jekyll_lunr_js_search/search_index_file'
@@ -0,0 +1,121 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'json'
5
+ require 'date'
6
+
7
+ module Jekyll
8
+ module LunrJsSearch
9
+ class Indexer < Jekyll::Generator
10
+ def initialize(config = {})
11
+ super(config)
12
+
13
+ lunr_config = {
14
+ 'excludes' => [],
15
+ 'strip_index_html' => false,
16
+ 'min_length' => 3,
17
+ 'stopwords' => 'stopwords.txt'
18
+ }.merge!(config['lunr_search'] || {})
19
+
20
+ @excludes = lunr_config['excludes']
21
+
22
+ # if web host supports index.html as default doc, then optionally exclude it from the url
23
+ @strip_index_html = lunr_config['strip_index_html']
24
+
25
+ # stop word exclusion configuration
26
+ @min_length = lunr_config['min_length']
27
+ @stopwords_file = lunr_config['stopwords']
28
+
29
+ @dev_mode = lunr_config['dev_mode']
30
+
31
+ # File I/O: create search.json file and write out pretty-printed JSON
32
+ @filename = 'search.json'
33
+
34
+ @generation_strftime = "%m-%d-%y"
35
+ end
36
+
37
+ # Index all pages except pages matching any value in config['lunr_excludes'] or with date['exclude_from_search']
38
+ # The main content from each page is extracted and saved to disk as json
39
+ def generate(site)
40
+ if @dev_mode && File.exist?(search_json_location)
41
+ search_json = JSON.parse(File.open(search_json_location).read)
42
+ if search_json["generation_time"] && Date.strptime(search_json["generation_time"], @generation_strftime).day == Time.now.day
43
+ puts "\nNot running indexer in dev mode since search.json exists within the last day...\n"
44
+ return
45
+ end
46
+ end
47
+
48
+ puts "\nRunning the search indexer...\n"
49
+
50
+ # gather pages and posts
51
+ items = pages_to_index(site)
52
+
53
+ site.collections.each do |name, collection|
54
+ collection.docs.each{ |document| items << document }
55
+ end
56
+
57
+ content_renderer = PageRenderer.new(site)
58
+ index = []
59
+
60
+ items.each do |item|
61
+ entry = SearchEntry.create(item, content_renderer)
62
+
63
+ next if entry.nil?
64
+
65
+ entry.strip_index_suffix_from_url! if @strip_index_html
66
+ entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)
67
+
68
+ index << {
69
+ :title => entry.title,
70
+ :url => entry.url,
71
+ :date => entry.date,
72
+ :categories => entry.categories,
73
+ :collection => entry.collection,
74
+ :class => entry.class,
75
+ :body => entry.body,
76
+ :excerpt => entry.body[0..140] + "…"
77
+ }
78
+
79
+ # puts 'Indexed ' << "#{entry.title} (#{entry.collection} - #{entry.url})"
80
+ end
81
+
82
+ json = {:generation_time => Time.now.strftime(@generation_strftime), :entries => index}
83
+
84
+ # Create destination directory if it doesn't exist yet. Otherwise, we cannot write our file there.
85
+ Dir::mkdir(site.dest) unless File.directory?(site.dest)
86
+
87
+ File.open(search_json_location, "w") do |file|
88
+ file.write(JSON.pretty_generate(json))
89
+ end
90
+
91
+ # Keep the search.json file from being cleaned by Jekyll
92
+ site.static_files << SearchIndexFile.new(site, site.dest, "/", @filename)
93
+ end
94
+
95
+ private
96
+
97
+ def search_json_location
98
+ File.join("search", @filename)
99
+ end
100
+
101
+ # load the stopwords file
102
+ def stopwords
103
+ @stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
104
+ end
105
+
106
+ def pages_to_index(site)
107
+ items = []
108
+
109
+ # deep copy pages
110
+ site.pages.each {|page| items << page.dup }
111
+ site.posts.each {|post| items << post.dup }
112
+
113
+ # only process files that will be converted to .html and only non excluded files
114
+ items.select! {|i| i.output_ext == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
115
+ items.reject! {|i| i.data['exclude_from_search'] }
116
+
117
+ items
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,26 @@
1
+ require 'nokogiri'
2
+
3
+ module Jekyll
4
+ module LunrJsSearch
5
+ class PageRenderer
6
+ def initialize(site)
7
+ @site = site
8
+ end
9
+
10
+ # render the item, parse the output and get all text inside of it
11
+ def render(item)
12
+ if item.is_a?(Jekyll::Document)
13
+ item.output = Jekyll::Renderer.new(@site, item).run
14
+ else
15
+ item.render({}, @site.site_payload)
16
+ end
17
+ doc = Nokogiri::HTML(item.output)
18
+
19
+ paragraphs = doc.search('//div[contains(concat(" ", normalize-space(@class), " "), " article-body ")]').map {|t| t.content }
20
+ paragraphs = paragraphs.join(" ").gsub("\r", " ").gsub("\n", " ").gsub("\t", " ").gsub(/\s+/, " ")
21
+ paragraphs.strip
22
+ end
23
+ end
24
+ end
25
+ end
26
+
@@ -0,0 +1,62 @@
1
+ require 'nokogiri'
2
+
3
+ module Jekyll
4
+ module LunrJsSearch
5
+ class SearchEntry
6
+ def self.create(page_or_post, renderer)
7
+ return create_from_post(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Post)
8
+ return create_from_page(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Page)
9
+ return create_from_document(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Document)
10
+ raise 'Item type not supported'
11
+ end
12
+
13
+ def self.create_from_document(document, renderer)
14
+ return if document.data["exclude_from_search"] || document.data["redirect_to"]
15
+ body = renderer.render(document)
16
+ data = document.to_liquid
17
+ SearchEntry.new(data['title'], data['url'], Time.now, data['category'], body)
18
+ end
19
+
20
+ def self.create_from_page(page, renderer)
21
+ title, url = extract_title_and_url(page)
22
+ body = renderer.render(page)
23
+ date = nil
24
+ categories = []
25
+
26
+ SearchEntry.new(title, url, date, categories, body, nil)
27
+ end
28
+
29
+ def self.create_from_post(post, renderer)
30
+ title, url = extract_title_and_url(post)
31
+ body = renderer.render(post)
32
+ date = post.date
33
+ categories = post.categories
34
+
35
+ SearchEntry.new(title, url, date, categories, body, nil)
36
+ end
37
+
38
+ def self.extract_title_and_url(item)
39
+ data = item.to_liquid
40
+ [ data['title'], data['url'] ]
41
+ end
42
+
43
+ attr_reader :title, :url, :date, :categories, :body, :collection
44
+
45
+ def initialize(title, url, date, categories, body)
46
+ @title, @url, @date, @categories, @body, @collection = title, url, date, categories, body, collection
47
+ end
48
+
49
+ def strip_index_suffix_from_url!
50
+ @url.gsub!(/index\.html$/, '')
51
+ end
52
+
53
+ # remove anything that is in the stop words list from the text to be indexed
54
+ def strip_stopwords!(stopwords, min_length)
55
+ @body = @body.split.delete_if() do |x|
56
+ t = x.downcase.gsub(/[^a-z]/, '')
57
+ t.length < min_length || stopwords.include?(t)
58
+ end.join(' ')
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,10 @@
1
+ module Jekyll
2
+ module LunrJsSearch
3
+ class SearchIndexFile < Jekyll::StaticFile
4
+ # Override write as the search.json index file has already been created
5
+ def write(dest)
6
+ true
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,5 @@
1
+ module Jekyll
2
+ module LunrJsSearch
3
+ VERSION = "0.2.0"
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jekyll-lunr-js-search-plusplus
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Garen J. Torikian
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.8'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.3'
55
+ description: Use lunr.js to provide simple full-text search, using JavaScript in your
56
+ browser, for your Jekyll static website.
57
+ email: gjtorikian@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - lib/jekyll-lunr-js-search.rb
63
+ - lib/jekyll_lunr_js_search/indexer.rb
64
+ - lib/jekyll_lunr_js_search/page_renderer.rb
65
+ - lib/jekyll_lunr_js_search/search_entry.rb
66
+ - lib/jekyll_lunr_js_search/search_index_file.rb
67
+ - lib/jekyll_lunr_js_search/version.rb
68
+ homepage: https://github.com/slashdotdash/jekyll-lunr-js-search
69
+ licenses:
70
+ - MIT
71
+ metadata: {}
72
+ post_install_message:
73
+ rdoc_options: []
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ requirements: []
87
+ rubyforge_project:
88
+ rubygems_version: 2.2.2
89
+ signing_key:
90
+ specification_version: 4
91
+ summary: Jekyll + lunr.js = static websites with powerful full-text search using JavaScript
92
+ test_files: []