nanoc-search 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/README.md +44 -0
  2. data/lib/nanoc-search.rb +73 -0
  3. metadata +69 -0
@@ -0,0 +1,44 @@
1
+ # Search
2
+
3
+ Index an item in an indextank compatible API.
4
+
5
+ It use [nokogiri](http://nokogiri.org/) and indextank.
6
+
7
+ ## Install
8
+
9
+ gem install nanoc-search
10
+
11
+ If you use bundler, add it to your Gemfile:
12
+
13
+ gem "nanoc-search", "~> 0.0.1"
14
+
15
+ ## Usage
16
+
17
+ Add to *lib/default.rb*:
18
+
19
+ ```ruby
20
+ require 'nanoc-search'
21
+ ```
22
+
23
+ Add to *config.yaml*:
24
+
25
+ indextank:
26
+ index: index_name
27
+ api_url: http://:password@api
28
+
29
+ Add a filter at the compile time to use it:
30
+
31
+ ```ruby
32
+ compile '*' do
33
+ filter :search
34
+ filter :erb
35
+ layout 'default'
36
+ end
37
+ ```
38
+
39
+ ## License
40
+
41
+ (c) 2011 Pascal Widdershoven (https://github.com/PascalW/jekyll_indextank)
42
+ (c) 2012 Stormz
43
+
44
+ This code is free to use under the terms of the MIT license
@@ -0,0 +1,73 @@
1
+ # Derived from https://github.com/PascalW/jekyll_indextank
2
+ require 'indextank'
3
+ require 'nokogiri'
4
+
5
+ class SearchFilter < Nanoc::Filter
6
+ identifier :search
7
+ type :text
8
+
9
+ def initialize(hash = {})
10
+ super
11
+
12
+ raise ArgumentError.new 'Missing indextank:api_url.' unless @config[:indextank][:api_url]
13
+ raise ArgumentError.new 'Missing indextank:index.' unless @config[:indextank][:index]
14
+
15
+ @last_indexed_file = '.nanoc_indextank'
16
+
17
+ load_last_timestamp
18
+
19
+ api = IndexTank::Client.new(@config[:indextank][:api_url])
20
+ @index = api.indexes(@config[:indextank][:index])
21
+ end
22
+
23
+ # Index all pages except pages matching any value in config[:indextank][:excludes]
24
+ # The main content from each page is extracted and indexed at indextank.com
25
+ # The doc_id of each indextank document will be the absolute url to the resource without domain name
26
+ def run(content, params={})
27
+ # only process item that are changed since last regeneration
28
+ if (!@last_indexed.nil? && @last_indexed > item.mtime)
29
+ return content
30
+ end
31
+
32
+ puts "Indexing page #{@item.identifier}"
33
+
34
+ while not @index.running?
35
+ # wait for the indextank index to get ready
36
+ sleep 0.5
37
+ end
38
+
39
+ page_text = extract_text(content)
40
+
41
+ @index.document(@item.identifier).add({
42
+ :text => page_text,
43
+ :title => @item[:title] || item.identifier
44
+ })
45
+ puts 'Indexed ' << item.identifier
46
+
47
+ @last_indexed = Time.now
48
+ write_last_indexed
49
+
50
+ content
51
+ end
52
+
53
+ def extract_text(content)
54
+ doc = Nokogiri::HTML(content)
55
+ doc.xpath('//*/text()').to_a.join(" ").gsub("\r"," ").gsub("\n"," ")
56
+ end
57
+
58
+ def write_last_indexed
59
+ begin
60
+ File.open(@last_indexed_file, 'w') {|f| Marshal.dump(@last_indexed, f)}
61
+ rescue
62
+ puts 'WARNING: cannot write indexed timestamps file.'
63
+ end
64
+ end
65
+
66
+ def load_last_timestamp
67
+ begin
68
+ @last_indexed = File.open(@last_indexed_file, "rb") {|f| Marshal.load(f)}
69
+ rescue
70
+ @last_indexed = nil
71
+ end
72
+ end
73
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nanoc-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - François de Metz
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-31 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: &9072360 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *9072360
25
+ - !ruby/object:Gem::Dependency
26
+ name: indextank
27
+ requirement: &9071560 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *9071560
36
+ description:
37
+ email: francois@2metz.fr
38
+ executables: []
39
+ extensions: []
40
+ extra_rdoc_files:
41
+ - README.md
42
+ files:
43
+ - README.md
44
+ - lib/nanoc-search.rb
45
+ homepage: https://github.com/stormz/nanoc-plugins
46
+ licenses: []
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.10
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Index items from nanoc site
69
+ test_files: []