nanoc-search 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/README.md +44 -0
  2. data/lib/nanoc-search.rb +73 -0
  3. metadata +69 -0
@@ -0,0 +1,44 @@
1
+ # Search
2
+
3
+ Index an item in an indextank compatible API.
4
+
5
+ It use [nokogiri](http://nokogiri.org/) and indextank.
6
+
7
+ ## Install
8
+
9
+ gem install nanoc-search
10
+
11
+ If you use bundler, add it to your Gemfile:
12
+
13
+ gem "nanoc-search", "~> 0.0.1"
14
+
15
+ ## Usage
16
+
17
+ Add to *lib/default.rb*:
18
+
19
+ ```ruby
20
+ require 'nanoc-search'
21
+ ```
22
+
23
+ Add to *config.yaml*:
24
+
25
+ indextank:
26
+ index: index_name
27
+ api_url: http://:password@api
28
+
29
+ Add a filter at the compile time to use it:
30
+
31
+ ```ruby
32
+ compile '*' do
33
+ filter :search
34
+ filter :erb
35
+ layout 'default'
36
+ end
37
+ ```
38
+
39
+ ## License
40
+
41
+ (c) 2011 Pascal Widdershoven (https://github.com/PascalW/jekyll_indextank)
42
+ (c) 2012 Stormz
43
+
44
+ This code is free to use under the terms of the MIT license
@@ -0,0 +1,73 @@
1
+ # Derived from https://github.com/PascalW/jekyll_indextank
2
+ require 'indextank'
3
+ require 'nokogiri'
4
+
5
+ class SearchFilter < Nanoc::Filter
6
+ identifier :search
7
+ type :text
8
+
9
+ def initialize(hash = {})
10
+ super
11
+
12
+ raise ArgumentError.new 'Missing indextank:api_url.' unless @config[:indextank][:api_url]
13
+ raise ArgumentError.new 'Missing indextank:index.' unless @config[:indextank][:index]
14
+
15
+ @last_indexed_file = '.nanoc_indextank'
16
+
17
+ load_last_timestamp
18
+
19
+ api = IndexTank::Client.new(@config[:indextank][:api_url])
20
+ @index = api.indexes(@config[:indextank][:index])
21
+ end
22
+
23
+ # Index all pages except pages matching any value in config[:indextank][:excludes]
24
+ # The main content from each page is extracted and indexed at indextank.com
25
+ # The doc_id of each indextank document will be the absolute url to the resource without domain name
26
+ def run(content, params={})
27
+ # only process item that are changed since last regeneration
28
+ if (!@last_indexed.nil? && @last_indexed > item.mtime)
29
+ return content
30
+ end
31
+
32
+ puts "Indexing page #{@item.identifier}"
33
+
34
+ while not @index.running?
35
+ # wait for the indextank index to get ready
36
+ sleep 0.5
37
+ end
38
+
39
+ page_text = extract_text(content)
40
+
41
+ @index.document(@item.identifier).add({
42
+ :text => page_text,
43
+ :title => @item[:title] || item.identifier
44
+ })
45
+ puts 'Indexed ' << item.identifier
46
+
47
+ @last_indexed = Time.now
48
+ write_last_indexed
49
+
50
+ content
51
+ end
52
+
53
+ def extract_text(content)
54
+ doc = Nokogiri::HTML(content)
55
+ doc.xpath('//*/text()').to_a.join(" ").gsub("\r"," ").gsub("\n"," ")
56
+ end
57
+
58
+ def write_last_indexed
59
+ begin
60
+ File.open(@last_indexed_file, 'w') {|f| Marshal.dump(@last_indexed, f)}
61
+ rescue
62
+ puts 'WARNING: cannot write indexed timestamps file.'
63
+ end
64
+ end
65
+
66
+ def load_last_timestamp
67
+ begin
68
+ @last_indexed = File.open(@last_indexed_file, "rb") {|f| Marshal.load(f)}
69
+ rescue
70
+ @last_indexed = nil
71
+ end
72
+ end
73
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nanoc-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - François de Metz
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-31 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: &9072360 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *9072360
25
+ - !ruby/object:Gem::Dependency
26
+ name: indextank
27
+ requirement: &9071560 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *9071560
36
+ description:
37
+ email: francois@2metz.fr
38
+ executables: []
39
+ extensions: []
40
+ extra_rdoc_files:
41
+ - README.md
42
+ files:
43
+ - README.md
44
+ - lib/nanoc-search.rb
45
+ homepage: https://github.com/stormz/nanoc-plugins
46
+ licenses: []
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.10
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Index items from nanoc site
69
+ test_files: []