jekyll-lunr 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e18aceb9b02aa376f8c9006f399c0135362db2cdf2c429876e7e4a5856b560d8
4
- data.tar.gz: e3fed8cb38dd1610db828a54ae1b3a9361b795e9b816f2df4c8826f72d566bd6
3
+ metadata.gz: 6d9231b2c5cdb6d759407c816f1b4bd7261d645d618fdc6e4a8e4820c30a7f31
4
+ data.tar.gz: 7bfcba35fffba4f8320d1f71d30fd3a3b049bac0f676051949c9a7092a983a6b
5
5
  SHA512:
6
- metadata.gz: 530ec545d0ce6d20ed562534d2e77f0d6abdac0510e816f525fd12196d05d675fd0286190944aaebed329399af744451115324a43aa285062b8c92e04f4409d3
7
- data.tar.gz: 88a4ef07c75174d03aaa89ebf0834b1665b53c3219c0c89a3d72e36915149270142b76d7ee9aa630c947dcb2b68a0cc4ad65195fee35761345c707513f11540f
6
+ metadata.gz: 02d33e0d128d47d83ee4222262a52918052a193ebcba6abe0b14bd3a0a82e019686ff8c4fd227f41881ff7ef70cc72e816e439f7c587ce33e401861d03b2e7af
7
+ data.tar.gz: daf4707ffa247fd056c3c50eb187f8bc5606702b50c03376aff61f03b643f5683e8dbf681c84d0804e3db7b243aa6510de24835844312b48f32218b1a84b79f1
data/README.md CHANGED
@@ -55,6 +55,11 @@ jekyll-lunr:
55
55
  Two files will be generated, `data.json` and `idx.json`. You can
56
56
  download them from your `search.js`.
57
57
 
58
+ ## Skip indexing
59
+
60
+ Following jekyll-sitemap convention, posts with `sitemap: false` in
61
+ their front matter are skipped from indexing.
62
+
58
63
  ## Development
59
64
 
60
65
  After checking out the repo, run `bin/setup` to install dependencies.
data/lib/jekyll/lunr.rb CHANGED
@@ -3,12 +3,14 @@
3
3
  require 'json'
4
4
  require 'open3'
5
5
  require 'loofah'
6
+ require 'stopwords'
6
7
 
7
8
  module Jekyll
8
9
  module Lunr
9
10
  # Error
10
11
  class Error < StandardError; end
11
12
 
13
+ # Add the indexer to the site
12
14
  module IndexableSite
13
15
  def self.included(base)
14
16
  base.class_eval do
@@ -19,6 +21,7 @@ module Jekyll
19
21
  end
20
22
  end
21
23
 
24
+ # Index a Jekyll::Document
22
25
  module IndexableDocument
23
26
  def self.included(base)
24
27
  base.class_eval do
@@ -26,8 +29,7 @@ module Jekyll
26
29
  data
27
30
  .slice(*site.indexer.fields)
28
31
  .transform_values { |value| extract_data_recursively(value) }
29
- .merge('content' => Loofah.fragment(content).to_text,
30
- 'url' => url,
32
+ .merge('content' => Loofah.fragment(content).to_text.strip,
31
33
  'id' => url,
32
34
  'year' => date&.year)
33
35
  end
@@ -38,6 +40,12 @@ module Jekyll
38
40
  when Hash then value.transform_values { |v| extract_data(v) }
39
41
  when Jekyll::Document then extract_data(value)
40
42
  when Jekyll::Page then extract_data(value)
43
+ when String
44
+ if value.include? '<'
45
+ Loofah.fragment(value).to_text.strip
46
+ else
47
+ value.strip
48
+ end
41
49
  else value
42
50
  end
43
51
  end
@@ -60,7 +68,11 @@ module Jekyll
60
68
  # The data is the register where Lunr looks for results.
61
69
  # TODO: Write a single data file per doc?
62
70
  def data
63
- @data ||= site.documents.map(&:to_data)
71
+ @data ||= site.documents.select do |doc|
72
+ doc.respond_to? :to_data
73
+ end.reject do |doc|
74
+ doc.data['sitemap'] == false
75
+ end.map(&:to_data)
64
76
  end
65
77
 
66
78
  def data_file
@@ -72,14 +84,30 @@ module Jekyll
72
84
  @indexable_data ||= data.map do |d|
73
85
  d.transform_values do |v|
74
86
  case v
75
- when Array then v.join(', ')
76
- when Hash then v.values.join(', ')
87
+ when Array
88
+ v.map do |vv|
89
+ vv.is_a?(Hash) ? vv['title'] : vv
90
+ end.compact.join(', ')
91
+ when Hash then v['title'] || v.values.map(&:to_s).join(', ')
77
92
  else v.to_s
78
93
  end
79
94
  end
80
95
  end
81
96
  end
82
97
 
98
+ def cleanup(data)
99
+ cleaned_data = data.dup
100
+
101
+ if (lang = lang&.to_sym)
102
+ sieve = Stopwords::Snowball::WordSieve.new
103
+ words = cleaned_data.split(' ').map(&:strip)
104
+
105
+ cleaned_data = site.filter(lang: lang, words: words).join(' ')
106
+ end
107
+
108
+ cleaned_data
109
+ end
110
+
83
111
  def write
84
112
  File.open(data_file.path, 'w') do |df|
85
113
  df.write data.to_json
@@ -107,7 +135,12 @@ module Jekyll
107
135
  def index
108
136
  Open3.popen2(env, *indexer) do |stdin, stdout, wait|
109
137
  indexable_data.each do |data|
110
- stdin.puts data.to_json
138
+ stdin.puts(data.transform_values do |val|
139
+ case val
140
+ when String then cleanup val
141
+ else val
142
+ end
143
+ end.to_json)
111
144
  end
112
145
  stdin.close
113
146
 
@@ -123,7 +156,7 @@ module Jekyll
123
156
 
124
157
  # Site lang
125
158
  def lang
126
- @lang ||= site.config.dig('lang').freeze
159
+ @lang ||= site.config['lang'].freeze
127
160
  end
128
161
 
129
162
  # Indexable fields
data/lib/jekyll-lunr.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'jekyll/lunr'
2
4
 
3
5
  Jekyll::Hooks.register :site, :pre_render do |site|
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-lunr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - f
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-15 00:00:00.000000000 Z
11
+ date: 2023-05-04 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jekyll
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '4.2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '4.2'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: loofah
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -24,6 +38,20 @@ dependencies:
24
38
  - - "~>"
25
39
  - !ruby/object:Gem::Version
26
40
  version: '2.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: stopwords-filter
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0.6'
27
55
  - !ruby/object:Gem::Dependency
28
56
  name: bundler
29
57
  requirement: !ruby/object:Gem::Requirement
@@ -97,7 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
97
125
  - !ruby/object:Gem::Version
98
126
  version: '0'
99
127
  requirements: []
100
- rubygems_version: 3.1.2
128
+ rubygems_version: 3.3.26
101
129
  signing_key:
102
130
  specification_version: 4
103
131
  summary: Lunr indexer for Jekyll