nanoc-search 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +44 -0
- data/lib/nanoc-search.rb +73 -0
- metadata +69 -0
data/README.md
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# Search
|
2
|
+
|
3
|
+
Index an item in an indextank compatible API.
|
4
|
+
|
5
|
+
It use [nokogiri](http://nokogiri.org/) and indextank.
|
6
|
+
|
7
|
+
## Install
|
8
|
+
|
9
|
+
gem install nanoc-search
|
10
|
+
|
11
|
+
If you use bundler, add it to your Gemfile:
|
12
|
+
|
13
|
+
gem "nanoc-search", "~> 0.0.1"
|
14
|
+
|
15
|
+
## Usage
|
16
|
+
|
17
|
+
Add to *lib/default.rb*:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
require 'nanoc-search'
|
21
|
+
```
|
22
|
+
|
23
|
+
Add to *config.yaml*:
|
24
|
+
|
25
|
+
indextank:
|
26
|
+
index: index_name
|
27
|
+
api_url: http://:password@api
|
28
|
+
|
29
|
+
Add a filter at the compile time to use it:
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
compile '*' do
|
33
|
+
filter :search
|
34
|
+
filter :erb
|
35
|
+
layout 'default'
|
36
|
+
end
|
37
|
+
```
|
38
|
+
|
39
|
+
## License
|
40
|
+
|
41
|
+
(c) 2011 Pascal Widdershoven (https://github.com/PascalW/jekyll_indextank)
|
42
|
+
(c) 2012 Stormz
|
43
|
+
|
44
|
+
This code is free to use under the terms of the MIT license
|
data/lib/nanoc-search.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# Derived from https://github.com/PascalW/jekyll_indextank
|
2
|
+
require 'indextank'
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
class SearchFilter < Nanoc::Filter
|
6
|
+
identifier :search
|
7
|
+
type :text
|
8
|
+
|
9
|
+
def initialize(hash = {})
|
10
|
+
super
|
11
|
+
|
12
|
+
raise ArgumentError.new 'Missing indextank:api_url.' unless @config[:indextank][:api_url]
|
13
|
+
raise ArgumentError.new 'Missing indextank:index.' unless @config[:indextank][:index]
|
14
|
+
|
15
|
+
@last_indexed_file = '.nanoc_indextank'
|
16
|
+
|
17
|
+
load_last_timestamp
|
18
|
+
|
19
|
+
api = IndexTank::Client.new(@config[:indextank][:api_url])
|
20
|
+
@index = api.indexes(@config[:indextank][:index])
|
21
|
+
end
|
22
|
+
|
23
|
+
# Index all pages except pages matching any value in config[:indextank][:excludes]
|
24
|
+
# The main content from each page is extracted and indexed at indextank.com
|
25
|
+
# The doc_id of each indextank document will be the absolute url to the resource without domain name
|
26
|
+
def run(content, params={})
|
27
|
+
# only process item that are changed since last regeneration
|
28
|
+
if (!@last_indexed.nil? && @last_indexed > item.mtime)
|
29
|
+
return content
|
30
|
+
end
|
31
|
+
|
32
|
+
puts "Indexing page #{@item.identifier}"
|
33
|
+
|
34
|
+
while not @index.running?
|
35
|
+
# wait for the indextank index to get ready
|
36
|
+
sleep 0.5
|
37
|
+
end
|
38
|
+
|
39
|
+
page_text = extract_text(content)
|
40
|
+
|
41
|
+
@index.document(@item.identifier).add({
|
42
|
+
:text => page_text,
|
43
|
+
:title => @item[:title] || item.identifier
|
44
|
+
})
|
45
|
+
puts 'Indexed ' << item.identifier
|
46
|
+
|
47
|
+
@last_indexed = Time.now
|
48
|
+
write_last_indexed
|
49
|
+
|
50
|
+
content
|
51
|
+
end
|
52
|
+
|
53
|
+
def extract_text(content)
|
54
|
+
doc = Nokogiri::HTML(content)
|
55
|
+
doc.xpath('//*/text()').to_a.join(" ").gsub("\r"," ").gsub("\n"," ")
|
56
|
+
end
|
57
|
+
|
58
|
+
def write_last_indexed
|
59
|
+
begin
|
60
|
+
File.open(@last_indexed_file, 'w') {|f| Marshal.dump(@last_indexed, f)}
|
61
|
+
rescue
|
62
|
+
puts 'WARNING: cannot write indexed timestamps file.'
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def load_last_timestamp
|
67
|
+
begin
|
68
|
+
@last_indexed = File.open(@last_indexed_file, "rb") {|f| Marshal.load(f)}
|
69
|
+
rescue
|
70
|
+
@last_indexed = nil
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nanoc-search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- François de Metz
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-31 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: &9072360 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *9072360
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: indextank
|
27
|
+
requirement: &9071560 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *9071560
|
36
|
+
description:
|
37
|
+
email: francois@2metz.fr
|
38
|
+
executables: []
|
39
|
+
extensions: []
|
40
|
+
extra_rdoc_files:
|
41
|
+
- README.md
|
42
|
+
files:
|
43
|
+
- README.md
|
44
|
+
- lib/nanoc-search.rb
|
45
|
+
homepage: https://github.com/stormz/nanoc-plugins
|
46
|
+
licenses: []
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
requirements: []
|
64
|
+
rubyforge_project:
|
65
|
+
rubygems_version: 1.8.10
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: Index items from nanoc site
|
69
|
+
test_files: []
|