jekyll-lunr-js-search-plusplus 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 94cdd3d615f27ee7c6efd7e3d05fdd58b5ca5e95
4
+ data.tar.gz: 3ec0febef9b0a1eb932224fe5d6ebbc2592ccc08
5
+ SHA512:
6
+ metadata.gz: b91f860335f4311ce61351e875386afbb40a58f7862c95d6e01f7c82ed00939a64bb34afc3463aac1db0f4a9069fafe6cfa4025edc5648f8b33bbe3069cbbe2b
7
+ data.tar.gz: 5e3ce2c4be224fb9ff8032753e15f79762b8c03bf7c02f39f3ba82bd8701df768832fdfcb0984d9e1544f0d3fab7eeb8a3e9e5f25a452fbba199be3135855f44
@@ -0,0 +1,5 @@
1
+ require 'jekyll_lunr_js_search/version'
2
+ require 'jekyll_lunr_js_search/indexer'
3
+ require 'jekyll_lunr_js_search/page_renderer'
4
+ require 'jekyll_lunr_js_search/search_entry'
5
+ require 'jekyll_lunr_js_search/search_index_file'
@@ -0,0 +1,121 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'json'
5
+ require 'date'
6
+
7
+ module Jekyll
8
+ module LunrJsSearch
9
+ class Indexer < Jekyll::Generator
10
+ def initialize(config = {})
11
+ super(config)
12
+
13
+ lunr_config = {
14
+ 'excludes' => [],
15
+ 'strip_index_html' => false,
16
+ 'min_length' => 3,
17
+ 'stopwords' => 'stopwords.txt'
18
+ }.merge!(config['lunr_search'] || {})
19
+
20
+ @excludes = lunr_config['excludes']
21
+
22
+ # if web host supports index.html as default doc, then optionally exclude it from the url
23
+ @strip_index_html = lunr_config['strip_index_html']
24
+
25
+ # stop word exclusion configuration
26
+ @min_length = lunr_config['min_length']
27
+ @stopwords_file = lunr_config['stopwords']
28
+
29
+ @dev_mode = lunr_config['dev_mode']
30
+
31
+ # File I/O: create search.json file and write out pretty-printed JSON
32
+ @filename = 'search.json'
33
+
34
+ @generation_strftime = "%m-%d-%y"
35
+ end
36
+
37
+ # Index all pages except pages matching any value in config['lunr_excludes'] or with date['exclude_from_search']
38
+ # The main content from each page is extracted and saved to disk as json
39
+ def generate(site)
40
+ if @dev_mode && File.exist?(search_json_location)
41
+ search_json = JSON.parse(File.open(search_json_location).read)
42
+ if search_json["generation_time"] && Date.strptime(search_json["generation_time"], @generation_strftime).day == Time.now.day
43
+ puts "\nNot running indexer in dev mode since search.json exists within the last day...\n"
44
+ return
45
+ end
46
+ end
47
+
48
+ puts "\nRunning the search indexer...\n"
49
+
50
+ # gather pages and posts
51
+ items = pages_to_index(site)
52
+
53
+ site.collections.each do |name, collection|
54
+ collection.docs.each{ |document| items << document }
55
+ end
56
+
57
+ content_renderer = PageRenderer.new(site)
58
+ index = []
59
+
60
+ items.each do |item|
61
+ entry = SearchEntry.create(item, content_renderer)
62
+
63
+ next if entry.nil?
64
+
65
+ entry.strip_index_suffix_from_url! if @strip_index_html
66
+ entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)
67
+
68
+ index << {
69
+ :title => entry.title,
70
+ :url => entry.url,
71
+ :date => entry.date,
72
+ :categories => entry.categories,
73
+ :collection => entry.collection,
74
+ :class => entry.class,
75
+ :body => entry.body,
76
+ :excerpt => entry.body[0..140] + "…"
77
+ }
78
+
79
+ # puts 'Indexed ' << "#{entry.title} (#{entry.collection} - #{entry.url})"
80
+ end
81
+
82
+ json = {:generation_time => Time.now.strftime(@generation_strftime), :entries => index}
83
+
84
+ # Create destination directory if it doesn't exist yet. Otherwise, we cannot write our file there.
85
+ Dir::mkdir(site.dest) unless File.directory?(site.dest)
86
+
87
+ File.open(search_json_location, "w") do |file|
88
+ file.write(JSON.pretty_generate(json))
89
+ end
90
+
91
+ # Keep the search.json file from being cleaned by Jekyll
92
+ site.static_files << SearchIndexFile.new(site, site.dest, "/", @filename)
93
+ end
94
+
95
+ private
96
+
97
+ def search_json_location
98
+ File.join("search", @filename)
99
+ end
100
+
101
+ # load the stopwords file
102
+ def stopwords
103
+ @stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
104
+ end
105
+
106
+ def pages_to_index(site)
107
+ items = []
108
+
109
+ # deep copy pages
110
+ site.pages.each {|page| items << page.dup }
111
+ site.posts.each {|post| items << post.dup }
112
+
113
+ # only process files that will be converted to .html and only non excluded files
114
+ items.select! {|i| i.output_ext == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
115
+ items.reject! {|i| i.data['exclude_from_search'] }
116
+
117
+ items
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,26 @@
1
+ require 'nokogiri'
2
+
3
+ module Jekyll
4
+ module LunrJsSearch
5
+ class PageRenderer
6
+ def initialize(site)
7
+ @site = site
8
+ end
9
+
10
+ # render the item, parse the output and get all text inside of it
11
+ def render(item)
12
+ if item.is_a?(Jekyll::Document)
13
+ item.output = Jekyll::Renderer.new(@site, item).run
14
+ else
15
+ item.render({}, @site.site_payload)
16
+ end
17
+ doc = Nokogiri::HTML(item.output)
18
+
19
+ paragraphs = doc.search('//div[contains(concat(" ", normalize-space(@class), " "), " article-body ")]').map {|t| t.content }
20
+ paragraphs = paragraphs.join(" ").gsub("\r", " ").gsub("\n", " ").gsub("\t", " ").gsub(/\s+/, " ")
21
+ paragraphs.strip
22
+ end
23
+ end
24
+ end
25
+ end
26
+
@@ -0,0 +1,62 @@
1
+ require 'nokogiri'
2
+
3
+ module Jekyll
4
+ module LunrJsSearch
5
+ class SearchEntry
6
+ def self.create(page_or_post, renderer)
7
+ return create_from_post(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Post)
8
+ return create_from_page(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Page)
9
+ return create_from_document(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Document)
10
+ raise 'Item type not supported'
11
+ end
12
+
13
+ def self.create_from_document(document, renderer)
14
+ return if document.data["exclude_from_search"] || document.data["redirect_to"]
15
+ body = renderer.render(document)
16
+ data = document.to_liquid
17
+ SearchEntry.new(data['title'], data['url'], Time.now, data['category'], body)
18
+ end
19
+
20
+ def self.create_from_page(page, renderer)
21
+ title, url = extract_title_and_url(page)
22
+ body = renderer.render(page)
23
+ date = nil
24
+ categories = []
25
+
26
+ SearchEntry.new(title, url, date, categories, body, nil)
27
+ end
28
+
29
+ def self.create_from_post(post, renderer)
30
+ title, url = extract_title_and_url(post)
31
+ body = renderer.render(post)
32
+ date = post.date
33
+ categories = post.categories
34
+
35
+ SearchEntry.new(title, url, date, categories, body, nil)
36
+ end
37
+
38
+ def self.extract_title_and_url(item)
39
+ data = item.to_liquid
40
+ [ data['title'], data['url'] ]
41
+ end
42
+
43
+ attr_reader :title, :url, :date, :categories, :body, :collection
44
+
45
+ def initialize(title, url, date, categories, body)
46
+ @title, @url, @date, @categories, @body, @collection = title, url, date, categories, body, collection
47
+ end
48
+
49
+ def strip_index_suffix_from_url!
50
+ @url.gsub!(/index\.html$/, '')
51
+ end
52
+
53
+ # remove anything that is in the stop words list from the text to be indexed
54
+ def strip_stopwords!(stopwords, min_length)
55
+ @body = @body.split.delete_if() do |x|
56
+ t = x.downcase.gsub(/[^a-z]/, '')
57
+ t.length < min_length || stopwords.include?(t)
58
+ end.join(' ')
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,10 @@
1
+ module Jekyll
2
+ module LunrJsSearch
3
+ class SearchIndexFile < Jekyll::StaticFile
4
+ # Override write as the search.json index file has already been created
5
+ def write(dest)
6
+ true
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,5 @@
1
+ module Jekyll
2
+ module LunrJsSearch
3
+ VERSION = "0.2.0"
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jekyll-lunr-js-search-plusplus
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Garen J. Torikian
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.8'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.3'
55
+ description: Use lunr.js to provide simple full-text search, using JavaScript in your
56
+ browser, for your Jekyll static website.
57
+ email: gjtorikian@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - lib/jekyll-lunr-js-search.rb
63
+ - lib/jekyll_lunr_js_search/indexer.rb
64
+ - lib/jekyll_lunr_js_search/page_renderer.rb
65
+ - lib/jekyll_lunr_js_search/search_entry.rb
66
+ - lib/jekyll_lunr_js_search/search_index_file.rb
67
+ - lib/jekyll_lunr_js_search/version.rb
68
+ homepage: https://github.com/slashdotdash/jekyll-lunr-js-search
69
+ licenses:
70
+ - MIT
71
+ metadata: {}
72
+ post_install_message:
73
+ rdoc_options: []
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ requirements: []
87
+ rubyforge_project:
88
+ rubygems_version: 2.2.2
89
+ signing_key:
90
+ specification_version: 4
91
+ summary: Jekyll + lunr.js = static websites with powerful full-text search using JavaScript
92
+ test_files: []