RubyGems - jekyll-lunr-js-search-plusplus - Versions diffs - 0.2.0 - Mend

jekyll-lunr-js-search-plusplus 0.2.0

Files changed (8) hide show

checksums.yaml +7 -0
data/lib/jekyll-lunr-js-search.rb +5 -0
data/lib/jekyll_lunr_js_search/indexer.rb +121 -0
data/lib/jekyll_lunr_js_search/page_renderer.rb +26 -0
data/lib/jekyll_lunr_js_search/search_entry.rb +62 -0
data/lib/jekyll_lunr_js_search/search_index_file.rb +10 -0
data/lib/jekyll_lunr_js_search/version.rb +5 -0
metadata +92 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 94cdd3d615f27ee7c6efd7e3d05fdd58b5ca5e95
+  data.tar.gz: 3ec0febef9b0a1eb932224fe5d6ebbc2592ccc08
+SHA512:
+  metadata.gz: b91f860335f4311ce61351e875386afbb40a58f7862c95d6e01f7c82ed00939a64bb34afc3463aac1db0f4a9069fafe6cfa4025edc5648f8b33bbe3069cbbe2b
+  data.tar.gz: 5e3ce2c4be224fb9ff8032753e15f79762b8c03bf7c02f39f3ba82bd8701df768832fdfcb0984d9e1544f0d3fab7eeb8a3e9e5f25a452fbba199be3135855f44

data/lib/jekyll-lunr-js-search.rb ADDED Viewed

@@ -0,0 +1,5 @@
+require 'jekyll_lunr_js_search/version'
+require 'jekyll_lunr_js_search/indexer'
+require 'jekyll_lunr_js_search/page_renderer'
+require 'jekyll_lunr_js_search/search_entry'
+require 'jekyll_lunr_js_search/search_index_file'

data/lib/jekyll_lunr_js_search/indexer.rb ADDED Viewed

@@ -0,0 +1,121 @@
+#!/bin/env ruby
+# encoding: utf-8
+require 'json'
+require 'date'
+module Jekyll
+  module LunrJsSearch
+    class Indexer < Jekyll::Generator
+      def initialize(config = {})
+        super(config)
+        lunr_config = {
+          'excludes' => [],
+          'strip_index_html' => false,
+          'min_length' => 3,
+          'stopwords' => 'stopwords.txt'
+        }.merge!(config['lunr_search'] || {})
+        @excludes = lunr_config['excludes']
+        # if web host supports index.html as default doc, then optionally exclude it from the url
+        @strip_index_html = lunr_config['strip_index_html']
+        # stop word exclusion configuration
+        @min_length = lunr_config['min_length']
+        @stopwords_file = lunr_config['stopwords']
+        @dev_mode = lunr_config['dev_mode']
+        # File I/O: create search.json file and write out pretty-printed JSON
+        @filename = 'search.json'
+        @generation_strftime = "%m-%d-%y"
+      end
+      # Index all pages except pages matching any value in config['lunr_excludes'] or with date['exclude_from_search']
+      # The main content from each page is extracted and saved to disk as json
+      def generate(site)
+        if @dev_mode && File.exist?(search_json_location)
+          search_json = JSON.parse(File.open(search_json_location).read)
+          if search_json["generation_time"] && Date.strptime(search_json["generation_time"], @generation_strftime).day == Time.now.day
+            puts "\nNot running indexer in dev mode since search.json exists within the last day...\n"
+            return
+          end
+        end
+        puts "\nRunning the search indexer...\n"
+        # gather pages and posts
+        items = pages_to_index(site)
+        site.collections.each do |name, collection|
+          collection.docs.each{ |document| items << document }
+        end
+        content_renderer = PageRenderer.new(site)
+        index = []
+        items.each do |item|
+          entry = SearchEntry.create(item, content_renderer)
+          next if entry.nil?
+          entry.strip_index_suffix_from_url! if @strip_index_html
+          entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)
+          index << {
+            :title => entry.title,
+            :url => entry.url,
+            :date => entry.date,
+            :categories => entry.categories,
+            :collection => entry.collection,
+            :class => entry.class,
+            :body => entry.body,
+            :excerpt => entry.body[0..140] + "…"
+          }
+          # puts 'Indexed ' << "#{entry.title} (#{entry.collection} - #{entry.url})"
+        end
+        json = {:generation_time => Time.now.strftime(@generation_strftime), :entries => index}
+        # Create destination directory if it doesn't exist yet. Otherwise, we cannot write our file there.
+        Dir::mkdir(site.dest) unless File.directory?(site.dest)
+        File.open(search_json_location, "w") do |file|
+          file.write(JSON.pretty_generate(json))
+        end
+        # Keep the search.json file from being cleaned by Jekyll
+        site.static_files << SearchIndexFile.new(site, site.dest, "/", @filename)
+      end
+    private
+      def search_json_location
+        File.join("search", @filename)
+      end
+      # load the stopwords file
+      def stopwords
+        @stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
+      end
+      def pages_to_index(site)
+        items = []
+        # deep copy pages
+        site.pages.each {|page| items << page.dup }
+        site.posts.each {|post| items << post.dup }
+        # only process files that will be converted to .html and only non excluded files
+        items.select! {|i| i.output_ext == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
+        items.reject! {|i| i.data['exclude_from_search'] }
+        items
+      end
+    end
+  end
+end

data/lib/jekyll_lunr_js_search/page_renderer.rb ADDED Viewed

@@ -0,0 +1,26 @@
+require 'nokogiri'
+module Jekyll
+  module LunrJsSearch
+    class PageRenderer
+      def initialize(site)
+        @site = site
+      end
+      # render the item, parse the output and get all text inside of it
+      def render(item)
+        if item.is_a?(Jekyll::Document)
+          item.output = Jekyll::Renderer.new(@site, item).run
+        else
+          item.render({}, @site.site_payload)
+        end
+        doc = Nokogiri::HTML(item.output)
+        paragraphs = doc.search('//div[contains(concat(" ", normalize-space(@class), " "), " article-body ")]').map {|t| t.content }
+        paragraphs = paragraphs.join(" ").gsub("\r", " ").gsub("\n", " ").gsub("\t", " ").gsub(/\s+/, " ")
+        paragraphs.strip
+      end
+    end
+  end
+end

data/lib/jekyll_lunr_js_search/search_entry.rb ADDED Viewed

@@ -0,0 +1,62 @@
+require 'nokogiri'
+module Jekyll
+  module LunrJsSearch
+    class SearchEntry
+      def self.create(page_or_post, renderer)
+        return create_from_post(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Post)
+        return create_from_page(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Page)
+        return create_from_document(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Document)
+        raise 'Item type not supported'
+      end
+      def self.create_from_document(document, renderer)
+        return if document.data["exclude_from_search"] || document.data["redirect_to"]
+        body = renderer.render(document)
+        data = document.to_liquid
+        SearchEntry.new(data['title'], data['url'], Time.now, data['category'], body)
+      end
+      def self.create_from_page(page, renderer)
+        title, url = extract_title_and_url(page)
+        body = renderer.render(page)
+        date = nil
+        categories = []
+        SearchEntry.new(title, url, date, categories, body, nil)
+      end
+      def self.create_from_post(post, renderer)
+        title, url = extract_title_and_url(post)
+        body = renderer.render(post)
+        date = post.date
+        categories = post.categories
+        SearchEntry.new(title, url, date, categories, body, nil)
+      end
+      def self.extract_title_and_url(item)
+        data = item.to_liquid
+        [ data['title'], data['url'] ]
+      end
+      attr_reader :title, :url, :date, :categories, :body, :collection
+      def initialize(title, url, date, categories, body)
+        @title, @url, @date, @categories, @body, @collection = title, url, date, categories, body, collection
+      end
+      def strip_index_suffix_from_url!
+        @url.gsub!(/index\.html$/, '')
+      end
+      # remove anything that is in the stop words list from the text to be indexed
+      def strip_stopwords!(stopwords, min_length)
+        @body = @body.split.delete_if() do |x|
+          t = x.downcase.gsub(/[^a-z]/, '')
+          t.length < min_length || stopwords.include?(t)
+        end.join(' ')
+      end
+    end
+  end
+end

data/lib/jekyll_lunr_js_search/search_index_file.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module Jekyll
+  module LunrJsSearch
+    class SearchIndexFile < Jekyll::StaticFile
+      # Override write as the search.json index file has already been created
+      def write(dest)
+        true
+      end
+    end
+  end
+end

data/lib/jekyll_lunr_js_search/version.rb ADDED Viewed

@@ -0,0 +1,5 @@
+module Jekyll
+  module LunrJsSearch
+      VERSION = "0.2.0"
+    end
+end

metadata ADDED Viewed

@@ -0,0 +1,92 @@
+--- !ruby/object:Gem::Specification
+name: jekyll-lunr-js-search-plusplus
+version: !ruby/object:Gem::Version
+  version: 0.2.0
+platform: ruby
+authors:
+- Garen J. Torikian
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-09-29 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.6'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.6'
+- !ruby/object:Gem::Dependency
+  name: json
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.8'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.8'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.3'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.3'
+description: Use lunr.js to provide simple full-text search, using JavaScript in your
+  browser, for your Jekyll static website.
+email: gjtorikian@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/jekyll-lunr-js-search.rb
+- lib/jekyll_lunr_js_search/indexer.rb
+- lib/jekyll_lunr_js_search/page_renderer.rb
+- lib/jekyll_lunr_js_search/search_entry.rb
+- lib/jekyll_lunr_js_search/search_index_file.rb
+- lib/jekyll_lunr_js_search/version.rb
+homepage: https://github.com/slashdotdash/jekyll-lunr-js-search
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.2.2
+signing_key:
+specification_version: 4
+summary: Jekyll + lunr.js = static websites with powerful full-text search using JavaScript
+test_files: []