jekyll-lunr 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/lib/jekyll/lunr.rb +40 -7
- data/lib/jekyll-lunr.rb +2 -0
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6d9231b2c5cdb6d759407c816f1b4bd7261d645d618fdc6e4a8e4820c30a7f31
|
4
|
+
data.tar.gz: 7bfcba35fffba4f8320d1f71d30fd3a3b049bac0f676051949c9a7092a983a6b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 02d33e0d128d47d83ee4222262a52918052a193ebcba6abe0b14bd3a0a82e019686ff8c4fd227f41881ff7ef70cc72e816e439f7c587ce33e401861d03b2e7af
|
7
|
+
data.tar.gz: daf4707ffa247fd056c3c50eb187f8bc5606702b50c03376aff61f03b643f5683e8dbf681c84d0804e3db7b243aa6510de24835844312b48f32218b1a84b79f1
|
data/README.md
CHANGED
@@ -55,6 +55,11 @@ jekyll-lunr:
|
|
55
55
|
Two files will be generated, `data.json` and `idx.json`. You can
|
56
56
|
download them from your `search.js`.
|
57
57
|
|
58
|
+
## Skip indexing
|
59
|
+
|
60
|
+
Following jekyll-sitemap convention, posts with `sitemap: false` in
|
61
|
+
their front matter are skipped from indexing.
|
62
|
+
|
58
63
|
## Development
|
59
64
|
|
60
65
|
After checking out the repo, run `bin/setup` to install dependencies.
|
data/lib/jekyll/lunr.rb
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
require 'json'
|
4
4
|
require 'open3'
|
5
5
|
require 'loofah'
|
6
|
+
require 'stopwords'
|
6
7
|
|
7
8
|
module Jekyll
|
8
9
|
module Lunr
|
9
10
|
# Error
|
10
11
|
class Error < StandardError; end
|
11
12
|
|
13
|
+
# Add the indexer to the site
|
12
14
|
module IndexableSite
|
13
15
|
def self.included(base)
|
14
16
|
base.class_eval do
|
@@ -19,6 +21,7 @@ module Jekyll
|
|
19
21
|
end
|
20
22
|
end
|
21
23
|
|
24
|
+
# Index a Jekyll::Document
|
22
25
|
module IndexableDocument
|
23
26
|
def self.included(base)
|
24
27
|
base.class_eval do
|
@@ -26,8 +29,7 @@ module Jekyll
|
|
26
29
|
data
|
27
30
|
.slice(*site.indexer.fields)
|
28
31
|
.transform_values { |value| extract_data_recursively(value) }
|
29
|
-
.merge('content' => Loofah.fragment(content).to_text,
|
30
|
-
'url' => url,
|
32
|
+
.merge('content' => Loofah.fragment(content).to_text.strip,
|
31
33
|
'id' => url,
|
32
34
|
'year' => date&.year)
|
33
35
|
end
|
@@ -38,6 +40,12 @@ module Jekyll
|
|
38
40
|
when Hash then value.transform_values { |v| extract_data(v) }
|
39
41
|
when Jekyll::Document then extract_data(value)
|
40
42
|
when Jekyll::Page then extract_data(value)
|
43
|
+
when String
|
44
|
+
if value.include? '<'
|
45
|
+
Loofah.fragment(value).to_text.strip
|
46
|
+
else
|
47
|
+
value.strip
|
48
|
+
end
|
41
49
|
else value
|
42
50
|
end
|
43
51
|
end
|
@@ -60,7 +68,11 @@ module Jekyll
|
|
60
68
|
# The data is the register where Lunr looks for results.
|
61
69
|
# TODO: Write a single data file per doc?
|
62
70
|
def data
|
63
|
-
@data ||= site.documents.
|
71
|
+
@data ||= site.documents.select do |doc|
|
72
|
+
doc.respond_to? :to_data
|
73
|
+
end.reject do |doc|
|
74
|
+
doc.data['sitemap'] == false
|
75
|
+
end.map(&:to_data)
|
64
76
|
end
|
65
77
|
|
66
78
|
def data_file
|
@@ -72,14 +84,30 @@ module Jekyll
|
|
72
84
|
@indexable_data ||= data.map do |d|
|
73
85
|
d.transform_values do |v|
|
74
86
|
case v
|
75
|
-
when Array
|
76
|
-
|
87
|
+
when Array
|
88
|
+
v.map do |vv|
|
89
|
+
vv.is_a?(Hash) ? vv['title'] : vv
|
90
|
+
end.compact.join(', ')
|
91
|
+
when Hash then v['title'] || v.values.map(&:to_s).join(', ')
|
77
92
|
else v.to_s
|
78
93
|
end
|
79
94
|
end
|
80
95
|
end
|
81
96
|
end
|
82
97
|
|
98
|
+
def cleanup(data)
|
99
|
+
cleaned_data = data.dup
|
100
|
+
|
101
|
+
if (lang = lang&.to_sym)
|
102
|
+
sieve = Stopwords::Snowball::WordSieve.new
|
103
|
+
words = cleaned_data.split(' ').map(&:strip)
|
104
|
+
|
105
|
+
cleaned_data = site.filter(lang: lang, words: words).join(' ')
|
106
|
+
end
|
107
|
+
|
108
|
+
cleaned_data
|
109
|
+
end
|
110
|
+
|
83
111
|
def write
|
84
112
|
File.open(data_file.path, 'w') do |df|
|
85
113
|
df.write data.to_json
|
@@ -107,7 +135,12 @@ module Jekyll
|
|
107
135
|
def index
|
108
136
|
Open3.popen2(env, *indexer) do |stdin, stdout, wait|
|
109
137
|
indexable_data.each do |data|
|
110
|
-
stdin.puts
|
138
|
+
stdin.puts(data.transform_values do |val|
|
139
|
+
case val
|
140
|
+
when String then cleanup val
|
141
|
+
else val
|
142
|
+
end
|
143
|
+
end.to_json)
|
111
144
|
end
|
112
145
|
stdin.close
|
113
146
|
|
@@ -123,7 +156,7 @@ module Jekyll
|
|
123
156
|
|
124
157
|
# Site lang
|
125
158
|
def lang
|
126
|
-
@lang ||= site.config
|
159
|
+
@lang ||= site.config['lang'].freeze
|
127
160
|
end
|
128
161
|
|
129
162
|
# Indexable fields
|
data/lib/jekyll-lunr.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-lunr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- f
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-05-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: jekyll
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '4.2'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '4.2'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: loofah
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -24,6 +38,20 @@ dependencies:
|
|
24
38
|
- - "~>"
|
25
39
|
- !ruby/object:Gem::Version
|
26
40
|
version: '2.4'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: stopwords-filter
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.6'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.6'
|
27
55
|
- !ruby/object:Gem::Dependency
|
28
56
|
name: bundler
|
29
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -97,7 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
97
125
|
- !ruby/object:Gem::Version
|
98
126
|
version: '0'
|
99
127
|
requirements: []
|
100
|
-
rubygems_version: 3.
|
128
|
+
rubygems_version: 3.3.26
|
101
129
|
signing_key:
|
102
130
|
specification_version: 4
|
103
131
|
summary: Lunr indexer for Jekyll
|