related-blog-posts 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f50d20442166cd9b85bc435f34420984ef196a0fdc82903cf013ce53ef0adfad
4
+ data.tar.gz: 59e826f0cd5c83e40ee8d2575623d5b4fa0005607d7319bd1db14ca1c0dcb531
5
+ SHA512:
6
+ metadata.gz: a4431cdb81e6d5ff30b84ba0c04eb527f55d82147b5729731b6e24cd9ca5d762e1eb7960fd1f4ff0c7861db4f49e12e9ca7c0d5edc902bf26a8060bbc7fe0cc9
7
+ data.tar.gz: 6d8b15dc6e248f4efcd35ef4663835a7d1733a4b13da632a045d31d73d83a8de9037e460ae43f10ef6328b648e2f746ff00db435ea5d0e1053c4306ba887651c
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ Gemfile.lock
2
+ *.gem
data/CONTRIBUTE.md ADDED
@@ -0,0 +1,11 @@
1
+ ### How to build
2
+
3
+ ```
4
+ gem build jekyll-tfidf-related-posts.gemspec
5
+ ```
6
+
7
+ ### How to release
8
+
9
+ ```
10
+ gem push jekyll-tfidf-related-posts-0.x.x.gem
11
+ ```
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in jekyll-related-posts.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2019 Sangsoo Nam
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # jekyll-tfidf-related-posts
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/jekyll-tfidf-related-posts.svg)](https://rubygems.org/gems/jekyll-tfidf-related-posts)
4
+ [![DUB](https://img.shields.io/dub/l/vibe-d.svg)](LICENSE.txt)
5
+
6
+ [Jekyll](http://jekyllrb.com) plugin to show related posts based on the content, tags, and categories. The similarity is calculated using TF-IDF(term frequency-inverted document frequency). Since tags and categories are use-defined values, those are considered with higher weights than a content while calculating.
7
+
8
+ ### How to install
9
+
10
+ 1. Install the gem `jekyll-tfidf-related-posts`.
11
+ ```
12
+ $ gem install jekyll-tfidf-related-posts
13
+ ```
14
+ 2. Add `jekyll-tfidf-related-posts` plugin in `_config.xml`.
15
+ ```yaml
16
+ plugins:
17
+ - jekyll-tfidf-related-posts
18
+ ```
19
+ 3. Run `jekyll build` or `jekyll serve`
20
+
21
+
22
+ ### How to use
23
+ This plugin calculates related posts and replaces `site.related_posts` containing recent 10 posts by default. So, you can render related posts by iterating `site.related_posts`.
24
+
25
+ ```java
26
+ {% for post in site.related_posts %}
27
+ {% include related-post.html %}
28
+ {% endfor %}
29
+ ```
30
+
31
+ > GitHub Pages supports only [these plugins](https://pages.github.com/versions/). For GitHub Pages, you need to generate your site locally and then push static files to GitHub Pages site.
32
+
33
+ ### Configuration
34
+
35
+ By default, there are 4 related posts. You can configure it in the `_config.yml`
36
+
37
+ ```
38
+ related_posts_count: 8
39
+ ```
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "related-blog-posts"
7
+ spec.version = "0.1.1"
8
+ spec.authors = ["Manpreet singh"]
9
+ spec.email = ["ms4110415@gmail.com"]
10
+ spec.summary = %q{Jekyll plugin to show related posts based on the content, tags, and categories.}
11
+ spec.description = %q{Jekyll plugin to show related posts based on the content, tags, and categories. The similarity is calculated using TF-IDF(term frequency-inverted document frequency). Since tags and categories are use-defined values, those are considered with higher weights than a content while calculating.}
12
+ spec.homepage = "https://github.com/ManpreetChoudhary/Jekyll-tfidf-related-posts"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.require_paths = ["lib"]
17
+
18
+ spec.add_dependency "jekyll", ">= 3.0"
19
+ spec.add_dependency "stopwords-filter", "~> 0.4"
20
+ spec.add_dependency "fast-stemmer", "~> 1.0"
21
+ spec.add_dependency "pqueue", "~> 2.1"
22
+ spec.add_dependency "nmatrix", "~> 0.2"
23
+ end
@@ -0,0 +1,167 @@
1
+ require 'rubygems'
2
+ require 'jekyll'
3
+ require 'fast_stemmer'
4
+ require 'stopwords'
5
+ require 'pqueue'
6
+ require 'nmatrix'
7
+
8
+ module SangsooNam
9
+ module Jekyll
10
+ class TFIDFRelatedPosts
11
+ def initialize
12
+ @docs = Array.new
13
+ @keywords = Array.new
14
+ @tags_and_categories = Array.new
15
+ @stopwords_filter = Stopwords::Snowball::Filter.new('en')
16
+ end
17
+
18
+ def add_post(post)
19
+ tags = post.data['tags'].map { |e| "@tag:" + e }.map(&:to_sym)
20
+ categories = post.data['categories'].map { |e| "@category:" + e }.map(&:to_sym)
21
+ doc = {
22
+ post: post,
23
+ content: (stem(post.content) + stem(post.data['title']) + tags + categories)
24
+ }
25
+ @docs << doc
26
+ @keywords += doc[:content]
27
+ @tags_and_categories += tags + categories
28
+ end
29
+
30
+ def build(site)
31
+ @keywords.uniq!
32
+ @tags_and_categories.uniq!
33
+ @weights = custom_weights(@tags_and_categories)
34
+ related = build_related_docs_with_score(site.config['related_posts_count'] || 4)
35
+
36
+ @docs.each do |doc|
37
+ doc[:post].instance_variable_set(:@related_posts,related[doc].map { |x| x[:post] })
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def build_related_docs_with_score(count = 8)
44
+ dc = document_correleation
45
+ result = Hash.new
46
+ count = [count, @docs.size].min
47
+
48
+ @docs.each_with_index do |doc, index|
49
+ queue = PQueue.new(dc.row(index).each_with_index) do |a, b|
50
+ a[0] > b[0]
51
+ end
52
+
53
+ result[doc] = []
54
+ count.times do
55
+ score, id = queue.pop
56
+ begin
57
+ result[doc] << {
58
+ score: score,
59
+ post: @docs[id][:post]
60
+ }
61
+ rescue
62
+ break
63
+ end
64
+ end
65
+ end
66
+
67
+ return result
68
+ end
69
+
70
+ def document_correleation()
71
+ scores = tfidf
72
+ result = scores.dot(scores.transpose)
73
+
74
+ result.each_with_indices do |_, u, v|
75
+ if u != v
76
+ result[u, v] /= (result[u, u] + result[v, v] - result[u, v])
77
+ else
78
+ result[u, v] = 0.0
79
+ end
80
+ end
81
+
82
+ return result
83
+ end
84
+
85
+ def bag_of_words
86
+ result = NMatrix.new([@docs.size, @keywords.size], 0.0)
87
+ @max = NMatrix.new([@docs.size], 0.0)
88
+
89
+ result.each_with_indices do |_, pi, ki|
90
+ result[pi, ki] = @docs[pi][:content].count(@keywords[ki])
91
+
92
+ if result[pi, ki] > @max[pi]
93
+ @max[pi] = result[pi, ki]
94
+ end
95
+ end
96
+
97
+ @bag_of_words = result.dup
98
+ return result
99
+ end
100
+
101
+ def term_frequency
102
+ result = bag_of_words
103
+
104
+ result.rows.times do |r|
105
+ result[r, 0..-1] *= @weights
106
+ result[r, 0..-1] /= @max[r]
107
+ end
108
+
109
+ return result
110
+ end
111
+
112
+ def custom_weights(terms, weight = 8.0)
113
+ result = NMatrix.new([1, @keywords.size], 1.0)
114
+
115
+ terms.each do |term|
116
+ result[0, @keywords.index(term)] = weight
117
+ end
118
+
119
+ return result
120
+ end
121
+
122
+ def inverse_document_frequency
123
+ result = NMatrix.new([1, @keywords.size], 0.0)
124
+
125
+ @bag_of_words.each_column do |column|
126
+ occurences = column.reduce do |m, c|
127
+ m + (c > 0 ? 1.0 : 0.0)
128
+ end
129
+
130
+ result[0, column.offset[1]] = Math.log(column.size / occurences) if occurences > 0
131
+ end
132
+
133
+ return result
134
+ end
135
+
136
+ def tfidf
137
+ result = term_frequency
138
+ idf = inverse_document_frequency
139
+
140
+ result.rows.times do |r|
141
+ result[r, 0..-1] *= idf
142
+ end
143
+
144
+ return result
145
+ end
146
+
147
+ def stem(data)
148
+ data = data.gsub(/{%.+%}/, ' ') # Replace liquid templates
149
+ tokenized = data.scan(/\w+/).map(&:downcase)
150
+ filtered = @stopwords_filter.filter(tokenized)
151
+ stemmed = filtered.map(&:stem).select{|s| s.length > 1}.map(&:to_sym)
152
+ return stemmed
153
+ end
154
+ end
155
+ end
156
+ end
157
+
158
+ Jekyll::Hooks.register :site, :pre_render do |site|
159
+ Jekyll.logger.info("Building TFIDF index...")
160
+ tfidf = SangsooNam::Jekyll::TFIDFRelatedPosts.new
161
+ site.posts.docs.each do |x|
162
+ tfidf.add_post(x)
163
+ end
164
+
165
+ Jekyll.logger.info("Replaceing Related Posts...")
166
+ tfidf.build(site)
167
+ end
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: related-blog-posts
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Manpreet singh
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-05-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jekyll
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '3.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '3.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: stopwords-filter
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.4'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: fast-stemmer
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pqueue
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '2.1'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.1'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nmatrix
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.2'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.2'
83
+ description: Jekyll plugin to show related posts based on the content, tags, and categories. The
84
+ similarity is calculated using TF-IDF(term frequency-inverted document frequency).
85
+ Since tags and categories are use-defined values, those are considered with higher
86
+ weights than a content while calculating.
87
+ email:
88
+ - ms4110415@gmail.com
89
+ executables: []
90
+ extensions: []
91
+ extra_rdoc_files: []
92
+ files:
93
+ - ".gitignore"
94
+ - CONTRIBUTE.md
95
+ - Gemfile
96
+ - LICENSE.txt
97
+ - README.md
98
+ - jekyll-tfidf-related-posts.gemspec
99
+ - lib/jekyll-tfidf-related-posts.rb
100
+ homepage: https://github.com/ManpreetChoudhary/Jekyll-tfidf-related-posts
101
+ licenses:
102
+ - MIT
103
+ metadata: {}
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubygems_version: 3.1.2
120
+ signing_key:
121
+ specification_version: 4
122
+ summary: Jekyll plugin to show related posts based on the content, tags, and categories.
123
+ test_files: []