jekyll-lunr 0.2.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e18aceb9b02aa376f8c9006f399c0135362db2cdf2c429876e7e4a5856b560d8
4
- data.tar.gz: e3fed8cb38dd1610db828a54ae1b3a9361b795e9b816f2df4c8826f72d566bd6
3
+ metadata.gz: 6d9231b2c5cdb6d759407c816f1b4bd7261d645d618fdc6e4a8e4820c30a7f31
4
+ data.tar.gz: 7bfcba35fffba4f8320d1f71d30fd3a3b049bac0f676051949c9a7092a983a6b
5
5
  SHA512:
6
- metadata.gz: 530ec545d0ce6d20ed562534d2e77f0d6abdac0510e816f525fd12196d05d675fd0286190944aaebed329399af744451115324a43aa285062b8c92e04f4409d3
7
- data.tar.gz: 88a4ef07c75174d03aaa89ebf0834b1665b53c3219c0c89a3d72e36915149270142b76d7ee9aa630c947dcb2b68a0cc4ad65195fee35761345c707513f11540f
6
+ metadata.gz: 02d33e0d128d47d83ee4222262a52918052a193ebcba6abe0b14bd3a0a82e019686ff8c4fd227f41881ff7ef70cc72e816e439f7c587ce33e401861d03b2e7af
7
+ data.tar.gz: daf4707ffa247fd056c3c50eb187f8bc5606702b50c03376aff61f03b643f5683e8dbf681c84d0804e3db7b243aa6510de24835844312b48f32218b1a84b79f1
data/README.md CHANGED
@@ -55,6 +55,11 @@ jekyll-lunr:
55
55
  Two files will be generated, `data.json` and `idx.json`. You can
56
56
  download them from your `search.js`.
57
57
 
58
+ ## Skip indexing
59
+
60
+ Following jekyll-sitemap convention, posts with `sitemap: false` in
61
+ their front matter are skipped from indexing.
62
+
58
63
  ## Development
59
64
 
60
65
  After checking out the repo, run `bin/setup` to install dependencies.
data/lib/jekyll/lunr.rb CHANGED
@@ -3,12 +3,14 @@
3
3
  require 'json'
4
4
  require 'open3'
5
5
  require 'loofah'
6
+ require 'stopwords'
6
7
 
7
8
  module Jekyll
8
9
  module Lunr
9
10
  # Error
10
11
  class Error < StandardError; end
11
12
 
13
+ # Add the indexer to the site
12
14
  module IndexableSite
13
15
  def self.included(base)
14
16
  base.class_eval do
@@ -19,6 +21,7 @@ module Jekyll
19
21
  end
20
22
  end
21
23
 
24
+ # Index a Jekyll::Document
22
25
  module IndexableDocument
23
26
  def self.included(base)
24
27
  base.class_eval do
@@ -26,8 +29,7 @@ module Jekyll
26
29
  data
27
30
  .slice(*site.indexer.fields)
28
31
  .transform_values { |value| extract_data_recursively(value) }
29
- .merge('content' => Loofah.fragment(content).to_text,
30
- 'url' => url,
32
+ .merge('content' => Loofah.fragment(content).to_text.strip,
31
33
  'id' => url,
32
34
  'year' => date&.year)
33
35
  end
@@ -38,6 +40,12 @@ module Jekyll
38
40
  when Hash then value.transform_values { |v| extract_data(v) }
39
41
  when Jekyll::Document then extract_data(value)
40
42
  when Jekyll::Page then extract_data(value)
43
+ when String
44
+ if value.include? '<'
45
+ Loofah.fragment(value).to_text.strip
46
+ else
47
+ value.strip
48
+ end
41
49
  else value
42
50
  end
43
51
  end
@@ -60,7 +68,11 @@ module Jekyll
60
68
  # The data is the register where Lunr looks for results.
61
69
  # TODO: Write a single data file per doc?
62
70
  def data
63
- @data ||= site.documents.map(&:to_data)
71
+ @data ||= site.documents.select do |doc|
72
+ doc.respond_to? :to_data
73
+ end.reject do |doc|
74
+ doc.data['sitemap'] == false
75
+ end.map(&:to_data)
64
76
  end
65
77
 
66
78
  def data_file
@@ -72,14 +84,30 @@ module Jekyll
72
84
  @indexable_data ||= data.map do |d|
73
85
  d.transform_values do |v|
74
86
  case v
75
- when Array then v.join(', ')
76
- when Hash then v.values.join(', ')
87
+ when Array
88
+ v.map do |vv|
89
+ vv.is_a?(Hash) ? vv['title'] : vv
90
+ end.compact.join(', ')
91
+ when Hash then v['title'] || v.values.map(&:to_s).join(', ')
77
92
  else v.to_s
78
93
  end
79
94
  end
80
95
  end
81
96
  end
82
97
 
98
+ def cleanup(data)
99
+ cleaned_data = data.dup
100
+
101
+ if (lang = lang&.to_sym)
102
+ sieve = Stopwords::Snowball::WordSieve.new
103
+ words = cleaned_data.split(' ').map(&:strip)
104
+
105
+ cleaned_data = site.filter(lang: lang, words: words).join(' ')
106
+ end
107
+
108
+ cleaned_data
109
+ end
110
+
83
111
  def write
84
112
  File.open(data_file.path, 'w') do |df|
85
113
  df.write data.to_json
@@ -107,7 +135,12 @@ module Jekyll
107
135
  def index
108
136
  Open3.popen2(env, *indexer) do |stdin, stdout, wait|
109
137
  indexable_data.each do |data|
110
- stdin.puts data.to_json
138
+ stdin.puts(data.transform_values do |val|
139
+ case val
140
+ when String then cleanup val
141
+ else val
142
+ end
143
+ end.to_json)
111
144
  end
112
145
  stdin.close
113
146
 
@@ -123,7 +156,7 @@ module Jekyll
123
156
 
124
157
  # Site lang
125
158
  def lang
126
- @lang ||= site.config.dig('lang').freeze
159
+ @lang ||= site.config['lang'].freeze
127
160
  end
128
161
 
129
162
  # Indexable fields
data/lib/jekyll-lunr.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'jekyll/lunr'
2
4
 
3
5
  Jekyll::Hooks.register :site, :pre_render do |site|
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-lunr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - f
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-15 00:00:00.000000000 Z
11
+ date: 2023-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jekyll
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '4.2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '4.2'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: loofah
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -24,6 +38,20 @@ dependencies:
24
38
  - - "~>"
25
39
  - !ruby/object:Gem::Version
26
40
  version: '2.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: stopwords-filter
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0.6'
27
55
  - !ruby/object:Gem::Dependency
28
56
  name: bundler
29
57
  requirement: !ruby/object:Gem::Requirement
@@ -97,7 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
97
125
  - !ruby/object:Gem::Version
98
126
  version: '0'
99
127
  requirements: []
100
- rubygems_version: 3.1.2
128
+ rubygems_version: 3.3.26
101
129
  signing_key:
102
130
  specification_version: 4
103
131
  summary: Lunr indexer for Jekyll