middleman-search-gds 0.11.0a

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f7a50a523cabca4a87a73bb6a1a87b765f1e0e3b
4
+ data.tar.gz: e62e0a792758b31fd98a00acf6bbe2091c6593be
5
+ SHA512:
6
+ metadata.gz: a51080682bef38d99ea3a27958c34e30768454fc18e04432ca3ae26e6bf79325a7bcdc1b8b5ad6cd27787c183ef4499e3ad672ce2dc63ce2dae283cce18644f8
7
+ data.tar.gz: 6fa42ea890e06662dd5b876437a2b9b61cb8f6e57e0623fcedbd7870968130d8cc277baf4b815a751409b4dd9f0040cb1543c983fbd8a33aacad583107cc3aa5
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.0
4
+ - 2.0.0
5
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in middleman-search.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Matías García Isaía
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,191 @@
1
+ # Middleman::Search
2
+
3
+ LunrJS-based search for Middleman.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'middleman-search'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install middleman-search
18
+
19
+ ## Usage
20
+
21
+ You need to activate the module in your `config.rb`, telling the extension how to index your resources:
22
+
23
+ ```ruby
24
+ activate :search do |search|
25
+
26
+ search.resources = ['blog/', 'index.html', 'contactus/index.html']
27
+
28
+ search.index_path = 'search/lunr-index.json' # defaults to `search.json`
29
+
30
+ search.lunr_dirs = ['source/vendor/lunr-custom/'] # optional alternate paths where to look for lunr js files
31
+
32
+ search.language = 'es' # defaults to 'en'
33
+
34
+ search.fields = {
35
+ title: {boost: 100, store: true, required: true},
36
+ content: {boost: 50},
37
+ url: {index: false, store: true},
38
+ author: {boost: 30}
39
+ }
40
+ end
41
+ ```
42
+
43
+ Where `resources` is a list of the beginning of the URL of the resources to index (tested with `String#start_with?`), `index_path` is the relative path of the generated index file in your site, and `fields` is a hash with one entry for each field to be indexed, with a hash of options associated:
44
+
45
+ - `boost` Specifies lunr relevance boost when searching this field
46
+ - `store` Whether to store this field in the document map (see below), defaults to false
47
+ - `index` Whether to index this field, defaults to true
48
+ - `required` The resource will not be indexed if a field marked as required has an empty or null value
49
+
50
+ Note that a special field `id` is included automatically, with an autogenerated identifier to be used as the `ref` for the document.
51
+
52
+ All fields values are retrieved from the resource `data` (i.e. its frontmatter), or from the `options` in the `resource.metadata` (i.e. any options specified in a `proxy` page), except for:
53
+ - `url` which is the actual resource url
54
+ - `content` the text extracted from the rendered resource, without including its layout
55
+
56
+ You can then query the index from Javascript via the `lunrIndex` object (see [Index file](#index-file) for more info):
57
+
58
+ ```javascript
59
+ var max_search_entries = 50;
60
+
61
+ var result = []; //initialize empty array
62
+
63
+ lunrIndex.search(request.term).forEach( function (item, index) {
64
+ if ( index < max_search_entries ) {
65
+ result.push(lunrData.docs[item.ref]);
66
+ }
67
+ });
68
+ ```
69
+
70
+ (Thanks [@Jeepler](https://github.com/Jeepler) [for adapting](https://github.com/manastech/middleman-search/issues/11#issuecomment-220262546) the lodash v3 code [we used to use at Manas](https://manas.tech/blog/2015/10/22/middleman-search-client-side-search-in-your-middleman-site.html))
71
+
72
+ ### i18n
73
+
74
+ This gem includes assets for alternate languages as provided by [MihaiValentin/lunr-languages](https://github.com/MihaiValentin/lunr-languages). Please refer to that repository for a list of the languages available.
75
+
76
+ If you want to work with a language that is not included, set up a `lunr.yourlang.js` file in a folder in your project, and add that folder to `lunr_dirs` so the gem knows where to look for it.
77
+
78
+ ### Manual index manipulation
79
+
80
+ You can fully customise the content to be indexed and stored per resource by defining a `before_index` callback:
81
+
82
+ ```ruby
83
+ activate :search do |search|
84
+ search.before_index = Proc.new do |to_index, to_store, resource|
85
+ if author = resource.data.author
86
+ to_index[:author] = data.authors[author].name
87
+ end
88
+ end
89
+ end
90
+ ```
91
+
92
+ This option accepts a callback that will be executed for each resource, and will be executed with the document to be indexed and the map to be stored, in the `index` and `docs` objects of the output respectively (see below), as well as the resource being processed. You can use this callback to modify either of those, or `throw(:skip)` to skip the resource in question.
93
+
94
+ ### Lunr pipeline configuration
95
+
96
+ In some cases, you may want to add new function to the lunr pipeline, both for creating the indexing and then for searching. You can do this by providing a `pipeline` hash with function names and body, for example:
97
+
98
+ ```ruby
99
+ activate :search do |search|
100
+ search.pipeline = {
101
+ tildes: <<-JS
102
+ function(token, tokenIndex, tokens) {
103
+ return token
104
+ .replace('á', 'a')
105
+ .replace('é', 'e')
106
+ .replace('í', 'i')
107
+ .replace('ó', 'o')
108
+ .replace('ú', 'u');
109
+ }
110
+ JS
111
+ }
112
+ end
113
+ ```
114
+
115
+ This will register the `tildes` function in the lunr pipeline and add it when building the index. From the Lunr documentation:
116
+
117
+ > Functions in the pipeline are called with three arguments: the current token being processed; the index of that token in the array of tokens, and the whole list of tokens part of the document being processed. This enables simple unigram processing of tokens as well as more sophisticated n-gram processing.
118
+ >
119
+ > The function should return the processed version of the text, which will in turn be passed to the next function in the pipeline. Returning undefined will prevent any further processing of the token, and that token will not make it to the index.
120
+
121
+ Note that if you add a function to the pipeline, it will also be loaded when de-serialising the index, and lunr will fail with an `Cannot load un-registered function: tildes` error if it has not been re-registered. You can either register them manually, or simply include the following in a `.js.erb` file to be executed __before__ loading the index:
122
+ ```erb
123
+ <%= search_lunr_js_pipeline %>
124
+ ```
125
+
126
+ You can also remove pipeline functions that Lunr.js enables by default: trimmer, stemmer, and stopWordFilter.
127
+
128
+ ```ruby
129
+ search.pipeline_remove = [
130
+ 'stopWordFilter'
131
+ ]
132
+ ```
133
+
134
+ ## Index file
135
+
136
+ The generated index file contains a JSON object with two properties:
137
+ - `index` contains the serialised lunr.js index, which you can load via `lunr.Index.load(lunrData.index)`
138
+ - `docs` is a map from the autogenerated document ids to an object that contains the attributes configured for storage
139
+
140
+ You will typically load the `index` into a lunr index instance, and then use the `docs` map to look up the returned value and present it to the user.
141
+
142
+ You should also `require` the `lunr.min.js` file in your main sprockets javascript file (if using the asset pipeline) to be able to actually load the index:
143
+
144
+ ```javascript
145
+ //= require lunr.min
146
+ ```
147
+
148
+ If you're using lunr's i18n capabilities, you should also load the Stemmer support and language files (in that order) here:
149
+
150
+ ```javascript
151
+ //= require lunr.min
152
+ //= require lunr.stemmer.support
153
+ //= require lunr.es
154
+ ```
155
+
156
+ ### Asset pipeline
157
+
158
+ The Middleman pipeline (if enabled) does not include `json` files by default, but you can easily modify this by adding `.json` to the `exts` option of the corresponding extensions, such as `gzip` and `asset_hash`:
159
+
160
+ ```ruby
161
+ activate :asset_hash do |asset_hash|
162
+ asset_hash.exts << '.json'
163
+ end
164
+ ```
165
+
166
+ Note that if you run the index json file through the asset hash extension, you will need to retrieve the actual destination URL when loading the file in the browser for searching, using the `search_index_path` view helper:
167
+
168
+ ```javascript
169
+ var lunrIndex = null;
170
+ var lunrData = null;
171
+
172
+ // Download index data
173
+ $.ajax({
174
+ url: "<%= search_index_path %>",
175
+ cache: true,
176
+ method: 'GET',
177
+ success: function(data) {
178
+ lunrData = data;
179
+ lunrIndex = lunr.Index.load(lunrData.index);
180
+ }
181
+ });
182
+ ```
183
+
184
+ ## Acknowledgments
185
+
186
+ A big thank you to:
187
+ - [Octo-Labs](https://github.com/Octo-Labs)'s [jagthedrummer](https://github.com/jagthedrummer) for his [`middleman-alias`](https://github.com/Octo-Labs/middleman-alias) extension, in which we based for developing this one.
188
+ - [jnovos](https://github.com/jnovos) and [256dpi](https://github.com/256dpi), for their [`middleman-lunrjs`](https://github.com/jnovos/middleman-lunrjs) and [`middleman-lunr`](https://github.com/256dpi/middleman-lunr) extensions, which served as inspirations for making this one.
189
+ - [olivernn](https://github.com/olivernn) and all [`lunr.js`](http://lunrjs.com/) [contributors](https://github.com/olivernn/lunr.js/graphs/contributors)
190
+ - [MihaiValentin](https://github.com/MihaiValentin) for the support for 10+ languages in [lunr-languages](https://github.com/MihaiValentin/lunr-languages).
191
+ - [The Middleman](https://middlemanapp.com/) [team](https://github.com/orgs/middleman/people) and [contributors](https://github.com/middleman/middleman/graphs/contributors)
@@ -0,0 +1,7 @@
1
+ require 'middleman-core'
2
+ require "middleman-search/version"
3
+
4
+ ::Middleman::Extensions.register(:search) do
5
+ require 'middleman-search/extension'
6
+ ::Middleman::SearchExtension
7
+ end
@@ -0,0 +1,35 @@
1
+ require 'middleman-core'
2
+ require 'middleman-search/search-index-resource'
3
+
4
+ module Middleman
5
+ class SearchExtension < Middleman::Extension
6
+ option :resources, [], 'Paths of resources to index'
7
+ option :fields, {}, 'Fields to index, with their options'
8
+ option :before_index, nil, 'Callback to execute before indexing a document'
9
+ option :index_path, 'search.json', 'Index file path'
10
+ option :pipeline, {}, 'Javascript pipeline functions to use in lunr index'
11
+ option :pipeline_remove, {}, 'Default pipeline functions to remove'
12
+ option :tokenizer_separator, nil, 'Replace the default tokizer separator'
13
+ option :cache, false, 'Avoid the cache to be rebuilt on every request in development mode'
14
+ option :language, 'en', 'Language code ("es", "fr") to use when indexing site\'s content'
15
+ option :lunr_dirs, [], 'Directories in which to look for custom lunr.js files'
16
+
17
+ def manipulate_resource_list(resources)
18
+ resources.push Middleman::Sitemap::SearchIndexResource.new(@app.sitemap, @options[:index_path], @options)
19
+ resources
20
+ end
21
+
22
+ helpers do
23
+ def search_lunr_js_pipeline
24
+ # Thanks http://stackoverflow.com/a/20187415/12791
25
+ extensions[:search].options[:pipeline].map do |name, function|
26
+ "lunr.Pipeline.registerFunction(#{function}, '#{name}');"
27
+ end.join("\n")
28
+ end
29
+
30
+ def search_index_path
31
+ (config || app.config)[:http_prefix] + sitemap.find_resource_by_path(extensions[:search].options[:index_path]).destination_path
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,157 @@
1
+ # encoding: UTF-8
2
+
3
+ module Middleman
4
+ module Sitemap
5
+ class SearchIndexResource < ::Middleman::Sitemap::Resource
6
+ def initialize(store, path, options)
7
+ @resources_to_index = options[:resources]
8
+ @fields = options[:fields]
9
+ @callback = options[:before_index]
10
+ @pipeline = options[:pipeline]
11
+ @pipeline_remove = options[:pipeline_remove]
12
+ @tokenizer_separator = options[:tokenizer_separator]
13
+ @cache_index = options[:cache]
14
+ @language = options[:language]
15
+ @lunr_dirs = options[:lunr_dirs] + [File.expand_path("../../../vendor/assets/javascripts/", __FILE__)]
16
+ super(store, path)
17
+ end
18
+
19
+ def template?
20
+ false
21
+ end
22
+
23
+ def get_source_file
24
+ path
25
+ end
26
+
27
+ def render(opts={}, locs={})
28
+ if @cache_index
29
+ @index ||= build_index
30
+ else
31
+ build_index
32
+ end
33
+ end
34
+
35
+ def build_index
36
+ # Build js context
37
+ libs = []
38
+ libs << lunr_resource('lunr.js')
39
+ if @language != 'en' # English is the default
40
+ libs << lunr_resource("lunr.stemmer.support.js")
41
+ libs << lunr_resource("lunr.#{@language}.js")
42
+ end
43
+
44
+ source = libs.map { |lib| File.read(lib, mode: "rb:UTF-8") }
45
+ source << "lunr.Index.prototype.indexJson = function () {return JSON.stringify(this.toJSON());};"
46
+
47
+ @pipeline.each do |name, function|
48
+ source << "lunr.Pipeline.registerFunction((#{function}), '#{name}');"
49
+ end
50
+
51
+ # Build lunr based on config
52
+ source << "lunr.middlemanSearchIndex = lunr(function () {"
53
+
54
+ # Use autogenerated id field as reference
55
+ source << "this.ref('id');"
56
+
57
+ # Remove default pipeline filters
58
+ @pipeline_remove.each do |name|
59
+ source << "this.pipeline.remove(lunr.#{name});"
60
+ end
61
+
62
+ # Add functions to pipeline (just registering them isn't enough)
63
+ @pipeline.each do |name, function|
64
+ source << "this.pipeline.add(lunr.Pipeline.registeredFunctions.#{name});"
65
+ end
66
+
67
+ if @tokenizer_separator.present?
68
+ source << "this.tokenizerFn.seperator = #{@tokenizer_separator}"
69
+ end
70
+
71
+ # Use language if set
72
+ source << "this.use(lunr.#{@language});" if @language != 'en'
73
+
74
+ # Define fields with boost
75
+ @fields.each do |field, opts|
76
+ next if opts[:index] == false
77
+ source << "this.field('#{field}', { boost: #{opts[:boost]}});"
78
+ end
79
+
80
+ source << "});"
81
+
82
+
83
+ # Ref to resource map
84
+ store = Hash.new
85
+
86
+ # Iterate over all resources and build index
87
+ @app.sitemap.resources.each_with_index do |resource, id|
88
+ begin
89
+ catch(:skip) do
90
+ next if resource.data['index'] == false
91
+ next unless @resources_to_index.any? {|whitelisted| resource.path.start_with? whitelisted }
92
+
93
+ to_index = Hash.new
94
+ to_store = Hash.new
95
+
96
+ @fields.each do |field, opts|
97
+ value = value_for(resource, field, opts)
98
+ throw(:skip) if value.blank? && opts[:required]
99
+ to_index[field] = value unless opts[:index] == false
100
+ to_store[field] = value if opts[:store]
101
+ end
102
+
103
+ @callback.call(to_index, to_store, resource) if @callback
104
+
105
+ source << "lunr.middlemanSearchIndex.add(#{to_index.merge(id: id).to_json});"
106
+
107
+ store[id] = to_store
108
+ end
109
+ rescue => ex
110
+ @app.logger.warn "Error processing resource for index: #{resource.path}\n#{ex}\n #{ex.backtrace.join("\n ")}"
111
+ end
112
+ end
113
+
114
+ # Generate JSON output
115
+ context = ExecJS.compile(source.join("\n"))
116
+ json = context.eval('lunr.middlemanSearchIndex.indexJson()')
117
+ "{\"index\": #{json}, \"docs\": #{store.to_json}}"
118
+ end
119
+
120
+ def binary?
121
+ false
122
+ end
123
+
124
+ def ignored?
125
+ false
126
+ end
127
+
128
+ def value_for(resource, field, opts={})
129
+ case field.to_s
130
+ when 'content'
131
+
132
+ html = resource.render( { :layout => false }, { :current_path => resource.path } )
133
+ Nokogiri::HTML(html).xpath("//text()").text
134
+ when 'url'
135
+ resource.url
136
+ else
137
+ value = resource.data.send(field) || resource.metadata.fetch(:options, {}).fetch(field, nil)
138
+ value ? Array(value).compact.join(" ") : nil
139
+ end
140
+ end
141
+
142
+ private
143
+
144
+ def minified_path(resource_name)
145
+ return resource_name if resource_name.end_with? '.min.js'
146
+ return resource_name unless resource_name.end_with? '.js'
147
+ resource_name.sub(/(.*)\.js$/,'\1.min.js')
148
+ end
149
+
150
+ def lunr_resource(resource_name)
151
+ @lunr_dirs.flat_map do |dir|
152
+ [File.join(dir, minified_path(resource_name)), File.join(dir, resource_name)]
153
+ end.detect { |file| File.exists? file } or raise "Couldn't find #{resource_name} nor #{minified_path(resource_name)} in #{@lunr_dirs.map {|dir| File.absolute_path dir }.join File::PATH_SEPARATOR}"
154
+ end
155
+ end
156
+ end
157
+ end