middleman-search-gds 0.11.0a
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +191 -0
- data/lib/middleman-search.rb +7 -0
- data/lib/middleman-search/extension.rb +35 -0
- data/lib/middleman-search/search-index-resource.rb +157 -0
- data/lib/middleman-search/version.rb +3 -0
- data/lib/middleman_extension.rb +1 -0
- data/middleman-search.gemspec +27 -0
- data/vendor/assets/javascripts/lunr.da.js +273 -0
- data/vendor/assets/javascripts/lunr.de.js +373 -0
- data/vendor/assets/javascripts/lunr.du.js +437 -0
- data/vendor/assets/javascripts/lunr.es.js +588 -0
- data/vendor/assets/javascripts/lunr.fi.js +529 -0
- data/vendor/assets/javascripts/lunr.fr.js +691 -0
- data/vendor/assets/javascripts/lunr.hu.js +554 -0
- data/vendor/assets/javascripts/lunr.it.js +605 -0
- data/vendor/assets/javascripts/lunr.jp.js +118 -0
- data/vendor/assets/javascripts/lunr.min.js +7 -0
- data/vendor/assets/javascripts/lunr.no.js +246 -0
- data/vendor/assets/javascripts/lunr.pt.js +559 -0
- data/vendor/assets/javascripts/lunr.ro.js +547 -0
- data/vendor/assets/javascripts/lunr.ru.js +380 -0
- data/vendor/assets/javascripts/lunr.stemmer.support.js +282 -0
- data/vendor/assets/javascripts/lunr.sv.js +245 -0
- data/vendor/assets/javascripts/lunr.tr.js +1063 -0
- metadata +147 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f7a50a523cabca4a87a73bb6a1a87b765f1e0e3b
|
4
|
+
data.tar.gz: e62e0a792758b31fd98a00acf6bbe2091c6593be
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a51080682bef38d99ea3a27958c34e30768454fc18e04432ca3ae26e6bf79325a7bcdc1b8b5ad6cd27787c183ef4499e3ad672ce2dc63ce2dae283cce18644f8
|
7
|
+
data.tar.gz: 6fa42ea890e06662dd5b876437a2b9b61cb8f6e57e0623fcedbd7870968130d8cc277baf4b815a751409b4dd9f0040cb1543c983fbd8a33aacad583107cc3aa5
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Matías García Isaía
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,191 @@
|
|
1
|
+
# Middleman::Search
|
2
|
+
|
3
|
+
LunrJS-based search for Middleman.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'middleman-search'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install middleman-search
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
You need to activate the module in your `config.rb`, telling the extension how to index your resources:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
activate :search do |search|
|
25
|
+
|
26
|
+
search.resources = ['blog/', 'index.html', 'contactus/index.html']
|
27
|
+
|
28
|
+
search.index_path = 'search/lunr-index.json' # defaults to `search.json`
|
29
|
+
|
30
|
+
search.lunr_dirs = ['source/vendor/lunr-custom/'] # optional alternate paths where to look for lunr js files
|
31
|
+
|
32
|
+
search.language = 'es' # defaults to 'en'
|
33
|
+
|
34
|
+
search.fields = {
|
35
|
+
title: {boost: 100, store: true, required: true},
|
36
|
+
content: {boost: 50},
|
37
|
+
url: {index: false, store: true},
|
38
|
+
author: {boost: 30}
|
39
|
+
}
|
40
|
+
end
|
41
|
+
```
|
42
|
+
|
43
|
+
Where `resources` is a list of the beginning of the URL of the resources to index (tested with `String#start_with?`), `index_path` is the relative path of the generated index file in your site, and `fields` is a hash with one entry for each field to be indexed, with a hash of options associated:
|
44
|
+
|
45
|
+
- `boost` Specifies lunr relevance boost when searching this field
|
46
|
+
- `store` Whether to store this field in the document map (see below), defaults to false
|
47
|
+
- `index` Whether to index this field, defaults to true
|
48
|
+
- `required` The resource will not be indexed if a field marked as required has an empty or null value
|
49
|
+
|
50
|
+
Note that a special field `id` is included automatically, with an autogenerated identifier to be used as the `ref` for the document.
|
51
|
+
|
52
|
+
All fields values are retrieved from the resource `data` (i.e. its frontmatter), or from the `options` in the `resource.metadata` (i.e. any options specified in a `proxy` page), except for:
|
53
|
+
- `url` which is the actual resource url
|
54
|
+
- `content` the text extracted from the rendered resource, without including its layout
|
55
|
+
|
56
|
+
You can then query the index from Javascript via the `lunrIndex` object (see [Index file](#index-file) for more info):
|
57
|
+
|
58
|
+
```javascript
|
59
|
+
var max_search_entries = 50;
|
60
|
+
|
61
|
+
var result = []; //initialize empty array
|
62
|
+
|
63
|
+
lunrIndex.search(request.term).forEach( function (item, index) {
|
64
|
+
if ( index < max_search_entries ) {
|
65
|
+
result.push(lunrData.docs[item.ref]);
|
66
|
+
}
|
67
|
+
});
|
68
|
+
```
|
69
|
+
|
70
|
+
(Thanks [@Jeepler](https://github.com/Jeepler) [for adapting](https://github.com/manastech/middleman-search/issues/11#issuecomment-220262546) the lodash v3 code [we used to use at Manas](https://manas.tech/blog/2015/10/22/middleman-search-client-side-search-in-your-middleman-site.html))
|
71
|
+
|
72
|
+
### i18n
|
73
|
+
|
74
|
+
This gem includes assets for alternate languages as provided by [MihaiValentin/lunr-languages](https://github.com/MihaiValentin/lunr-languages). Please refer to that repository for a list of the languages available.
|
75
|
+
|
76
|
+
If you want to work with a language that is not included, set up a `lunr.yourlang.js` file in a folder in your project, and add that folder to `lunr_dirs` so the gem knows where to look for it.
|
77
|
+
|
78
|
+
### Manual index manipulation
|
79
|
+
|
80
|
+
You can fully customise the content to be indexed and stored per resource by defining a `before_index` callback:
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
activate :search do |search|
|
84
|
+
search.before_index = Proc.new do |to_index, to_store, resource|
|
85
|
+
if author = resource.data.author
|
86
|
+
to_index[:author] = data.authors[author].name
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
```
|
91
|
+
|
92
|
+
This option accepts a callback that will be executed for each resource, and will be executed with the document to be indexed and the map to be stored, in the `index` and `docs` objects of the output respectively (see below), as well as the resource being processed. You can use this callback to modify either of those, or `throw(:skip)` to skip the resource in question.
|
93
|
+
|
94
|
+
### Lunr pipeline configuration
|
95
|
+
|
96
|
+
In some cases, you may want to add new function to the lunr pipeline, both for creating the indexing and then for searching. You can do this by providing a `pipeline` hash with function names and body, for example:
|
97
|
+
|
98
|
+
```ruby
|
99
|
+
activate :search do |search|
|
100
|
+
search.pipeline = {
|
101
|
+
tildes: <<-JS
|
102
|
+
function(token, tokenIndex, tokens) {
|
103
|
+
return token
|
104
|
+
.replace('á', 'a')
|
105
|
+
.replace('é', 'e')
|
106
|
+
.replace('í', 'i')
|
107
|
+
.replace('ó', 'o')
|
108
|
+
.replace('ú', 'u');
|
109
|
+
}
|
110
|
+
JS
|
111
|
+
}
|
112
|
+
end
|
113
|
+
```
|
114
|
+
|
115
|
+
This will register the `tildes` function in the lunr pipeline and add it when building the index. From the Lunr documentation:
|
116
|
+
|
117
|
+
> Functions in the pipeline are called with three arguments: the current token being processed; the index of that token in the array of tokens, and the whole list of tokens part of the document being processed. This enables simple unigram processing of tokens as well as more sophisticated n-gram processing.
|
118
|
+
>
|
119
|
+
> The function should return the processed version of the text, which will in turn be passed to the next function in the pipeline. Returning undefined will prevent any further processing of the token, and that token will not make it to the index.
|
120
|
+
|
121
|
+
Note that if you add a function to the pipeline, it will also be loaded when de-serialising the index, and lunr will fail with an `Cannot load un-registered function: tildes` error if it has not been re-registered. You can either register them manually, or simply include the following in a `.js.erb` file to be executed __before__ loading the index:
|
122
|
+
```erb
|
123
|
+
<%= search_lunr_js_pipeline %>
|
124
|
+
```
|
125
|
+
|
126
|
+
You can also remove pipeline functions that Lunr.js enables by default: trimmer, stemmer, and stopWordFilter.
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
search.pipeline_remove = [
|
130
|
+
'stopWordFilter'
|
131
|
+
]
|
132
|
+
```
|
133
|
+
|
134
|
+
## Index file
|
135
|
+
|
136
|
+
The generated index file contains a JSON object with two properties:
|
137
|
+
- `index` contains the serialised lunr.js index, which you can load via `lunr.Index.load(lunrData.index)`
|
138
|
+
- `docs` is a map from the autogenerated document ids to an object that contains the attributes configured for storage
|
139
|
+
|
140
|
+
You will typically load the `index` into a lunr index instance, and then use the `docs` map to look up the returned value and present it to the user.
|
141
|
+
|
142
|
+
You should also `require` the `lunr.min.js` file in your main sprockets javascript file (if using the asset pipeline) to be able to actually load the index:
|
143
|
+
|
144
|
+
```javascript
|
145
|
+
//= require lunr.min
|
146
|
+
```
|
147
|
+
|
148
|
+
If you're using lunr's i18n capabilities, you should also load the Stemmer support and language files (in that order) here:
|
149
|
+
|
150
|
+
```javascript
|
151
|
+
//= require lunr.min
|
152
|
+
//= require lunr.stemmer.support
|
153
|
+
//= require lunr.es
|
154
|
+
```
|
155
|
+
|
156
|
+
### Asset pipeline
|
157
|
+
|
158
|
+
The Middleman pipeline (if enabled) does not include `json` files by default, but you can easily modify this by adding `.json` to the `exts` option of the corresponding extensions, such as `gzip` and `asset_hash`:
|
159
|
+
|
160
|
+
```ruby
|
161
|
+
activate :asset_hash do |asset_hash|
|
162
|
+
asset_hash.exts << '.json'
|
163
|
+
end
|
164
|
+
```
|
165
|
+
|
166
|
+
Note that if you run the index json file through the asset hash extension, you will need to retrieve the actual destination URL when loading the file in the browser for searching, using the `search_index_path` view helper:
|
167
|
+
|
168
|
+
```javascript
|
169
|
+
var lunrIndex = null;
|
170
|
+
var lunrData = null;
|
171
|
+
|
172
|
+
// Download index data
|
173
|
+
$.ajax({
|
174
|
+
url: "<%= search_index_path %>",
|
175
|
+
cache: true,
|
176
|
+
method: 'GET',
|
177
|
+
success: function(data) {
|
178
|
+
lunrData = data;
|
179
|
+
lunrIndex = lunr.Index.load(lunrData.index);
|
180
|
+
}
|
181
|
+
});
|
182
|
+
```
|
183
|
+
|
184
|
+
## Acknowledgments
|
185
|
+
|
186
|
+
A big thank you to:
|
187
|
+
- [Octo-Labs](https://github.com/Octo-Labs)'s [jagthedrummer](https://github.com/jagthedrummer) for his [`middleman-alias`](https://github.com/Octo-Labs/middleman-alias) extension, in which we based for developing this one.
|
188
|
+
- [jnovos](https://github.com/jnovos) and [256dpi](https://github.com/256dpi), for their [`middleman-lunrjs`](https://github.com/jnovos/middleman-lunrjs) and [`middleman-lunr`](https://github.com/256dpi/middleman-lunr) extensions, which served as inspirations for making this one.
|
189
|
+
- [olivernn](https://github.com/olivernn) and all [`lunr.js`](http://lunrjs.com/) [contributors](https://github.com/olivernn/lunr.js/graphs/contributors)
|
190
|
+
- [MihaiValentin](https://github.com/MihaiValentin) for the support for 10+ languages in [lunr-languages](https://github.com/MihaiValentin/lunr-languages).
|
191
|
+
- [The Middleman](https://middlemanapp.com/) [team](https://github.com/orgs/middleman/people) and [contributors](https://github.com/middleman/middleman/graphs/contributors)
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'middleman-core'
|
2
|
+
require 'middleman-search/search-index-resource'
|
3
|
+
|
4
|
+
module Middleman
|
5
|
+
class SearchExtension < Middleman::Extension
|
6
|
+
option :resources, [], 'Paths of resources to index'
|
7
|
+
option :fields, {}, 'Fields to index, with their options'
|
8
|
+
option :before_index, nil, 'Callback to execute before indexing a document'
|
9
|
+
option :index_path, 'search.json', 'Index file path'
|
10
|
+
option :pipeline, {}, 'Javascript pipeline functions to use in lunr index'
|
11
|
+
option :pipeline_remove, {}, 'Default pipeline functions to remove'
|
12
|
+
option :tokenizer_separator, nil, 'Replace the default tokizer separator'
|
13
|
+
option :cache, false, 'Avoid the cache to be rebuilt on every request in development mode'
|
14
|
+
option :language, 'en', 'Language code ("es", "fr") to use when indexing site\'s content'
|
15
|
+
option :lunr_dirs, [], 'Directories in which to look for custom lunr.js files'
|
16
|
+
|
17
|
+
def manipulate_resource_list(resources)
|
18
|
+
resources.push Middleman::Sitemap::SearchIndexResource.new(@app.sitemap, @options[:index_path], @options)
|
19
|
+
resources
|
20
|
+
end
|
21
|
+
|
22
|
+
helpers do
|
23
|
+
def search_lunr_js_pipeline
|
24
|
+
# Thanks http://stackoverflow.com/a/20187415/12791
|
25
|
+
extensions[:search].options[:pipeline].map do |name, function|
|
26
|
+
"lunr.Pipeline.registerFunction(#{function}, '#{name}');"
|
27
|
+
end.join("\n")
|
28
|
+
end
|
29
|
+
|
30
|
+
def search_index_path
|
31
|
+
(config || app.config)[:http_prefix] + sitemap.find_resource_by_path(extensions[:search].options[:index_path]).destination_path
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Middleman
|
4
|
+
module Sitemap
|
5
|
+
class SearchIndexResource < ::Middleman::Sitemap::Resource
|
6
|
+
def initialize(store, path, options)
|
7
|
+
@resources_to_index = options[:resources]
|
8
|
+
@fields = options[:fields]
|
9
|
+
@callback = options[:before_index]
|
10
|
+
@pipeline = options[:pipeline]
|
11
|
+
@pipeline_remove = options[:pipeline_remove]
|
12
|
+
@tokenizer_separator = options[:tokenizer_separator]
|
13
|
+
@cache_index = options[:cache]
|
14
|
+
@language = options[:language]
|
15
|
+
@lunr_dirs = options[:lunr_dirs] + [File.expand_path("../../../vendor/assets/javascripts/", __FILE__)]
|
16
|
+
super(store, path)
|
17
|
+
end
|
18
|
+
|
19
|
+
def template?
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_source_file
|
24
|
+
path
|
25
|
+
end
|
26
|
+
|
27
|
+
def render(opts={}, locs={})
|
28
|
+
if @cache_index
|
29
|
+
@index ||= build_index
|
30
|
+
else
|
31
|
+
build_index
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def build_index
|
36
|
+
# Build js context
|
37
|
+
libs = []
|
38
|
+
libs << lunr_resource('lunr.js')
|
39
|
+
if @language != 'en' # English is the default
|
40
|
+
libs << lunr_resource("lunr.stemmer.support.js")
|
41
|
+
libs << lunr_resource("lunr.#{@language}.js")
|
42
|
+
end
|
43
|
+
|
44
|
+
source = libs.map { |lib| File.read(lib, mode: "rb:UTF-8") }
|
45
|
+
source << "lunr.Index.prototype.indexJson = function () {return JSON.stringify(this.toJSON());};"
|
46
|
+
|
47
|
+
@pipeline.each do |name, function|
|
48
|
+
source << "lunr.Pipeline.registerFunction((#{function}), '#{name}');"
|
49
|
+
end
|
50
|
+
|
51
|
+
# Build lunr based on config
|
52
|
+
source << "lunr.middlemanSearchIndex = lunr(function () {"
|
53
|
+
|
54
|
+
# Use autogenerated id field as reference
|
55
|
+
source << "this.ref('id');"
|
56
|
+
|
57
|
+
# Remove default pipeline filters
|
58
|
+
@pipeline_remove.each do |name|
|
59
|
+
source << "this.pipeline.remove(lunr.#{name});"
|
60
|
+
end
|
61
|
+
|
62
|
+
# Add functions to pipeline (just registering them isn't enough)
|
63
|
+
@pipeline.each do |name, function|
|
64
|
+
source << "this.pipeline.add(lunr.Pipeline.registeredFunctions.#{name});"
|
65
|
+
end
|
66
|
+
|
67
|
+
if @tokenizer_separator.present?
|
68
|
+
source << "this.tokenizerFn.seperator = #{@tokenizer_separator}"
|
69
|
+
end
|
70
|
+
|
71
|
+
# Use language if set
|
72
|
+
source << "this.use(lunr.#{@language});" if @language != 'en'
|
73
|
+
|
74
|
+
# Define fields with boost
|
75
|
+
@fields.each do |field, opts|
|
76
|
+
next if opts[:index] == false
|
77
|
+
source << "this.field('#{field}', { boost: #{opts[:boost]}});"
|
78
|
+
end
|
79
|
+
|
80
|
+
source << "});"
|
81
|
+
|
82
|
+
|
83
|
+
# Ref to resource map
|
84
|
+
store = Hash.new
|
85
|
+
|
86
|
+
# Iterate over all resources and build index
|
87
|
+
@app.sitemap.resources.each_with_index do |resource, id|
|
88
|
+
begin
|
89
|
+
catch(:skip) do
|
90
|
+
next if resource.data['index'] == false
|
91
|
+
next unless @resources_to_index.any? {|whitelisted| resource.path.start_with? whitelisted }
|
92
|
+
|
93
|
+
to_index = Hash.new
|
94
|
+
to_store = Hash.new
|
95
|
+
|
96
|
+
@fields.each do |field, opts|
|
97
|
+
value = value_for(resource, field, opts)
|
98
|
+
throw(:skip) if value.blank? && opts[:required]
|
99
|
+
to_index[field] = value unless opts[:index] == false
|
100
|
+
to_store[field] = value if opts[:store]
|
101
|
+
end
|
102
|
+
|
103
|
+
@callback.call(to_index, to_store, resource) if @callback
|
104
|
+
|
105
|
+
source << "lunr.middlemanSearchIndex.add(#{to_index.merge(id: id).to_json});"
|
106
|
+
|
107
|
+
store[id] = to_store
|
108
|
+
end
|
109
|
+
rescue => ex
|
110
|
+
@app.logger.warn "Error processing resource for index: #{resource.path}\n#{ex}\n #{ex.backtrace.join("\n ")}"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# Generate JSON output
|
115
|
+
context = ExecJS.compile(source.join("\n"))
|
116
|
+
json = context.eval('lunr.middlemanSearchIndex.indexJson()')
|
117
|
+
"{\"index\": #{json}, \"docs\": #{store.to_json}}"
|
118
|
+
end
|
119
|
+
|
120
|
+
def binary?
|
121
|
+
false
|
122
|
+
end
|
123
|
+
|
124
|
+
def ignored?
|
125
|
+
false
|
126
|
+
end
|
127
|
+
|
128
|
+
def value_for(resource, field, opts={})
|
129
|
+
case field.to_s
|
130
|
+
when 'content'
|
131
|
+
|
132
|
+
html = resource.render( { :layout => false }, { :current_path => resource.path } )
|
133
|
+
Nokogiri::HTML(html).xpath("//text()").text
|
134
|
+
when 'url'
|
135
|
+
resource.url
|
136
|
+
else
|
137
|
+
value = resource.data.send(field) || resource.metadata.fetch(:options, {}).fetch(field, nil)
|
138
|
+
value ? Array(value).compact.join(" ") : nil
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
private
|
143
|
+
|
144
|
+
def minified_path(resource_name)
|
145
|
+
return resource_name if resource_name.end_with? '.min.js'
|
146
|
+
return resource_name unless resource_name.end_with? '.js'
|
147
|
+
resource_name.sub(/(.*)\.js$/,'\1.min.js')
|
148
|
+
end
|
149
|
+
|
150
|
+
def lunr_resource(resource_name)
|
151
|
+
@lunr_dirs.flat_map do |dir|
|
152
|
+
[File.join(dir, minified_path(resource_name)), File.join(dir, resource_name)]
|
153
|
+
end.detect { |file| File.exists? file } or raise "Couldn't find #{resource_name} nor #{minified_path(resource_name)} in #{@lunr_dirs.map {|dir| File.absolute_path dir }.join File::PATH_SEPARATOR}"
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|