related-blog-posts 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f50d20442166cd9b85bc435f34420984ef196a0fdc82903cf013ce53ef0adfad
4
+ data.tar.gz: 59e826f0cd5c83e40ee8d2575623d5b4fa0005607d7319bd1db14ca1c0dcb531
5
+ SHA512:
6
+ metadata.gz: a4431cdb81e6d5ff30b84ba0c04eb527f55d82147b5729731b6e24cd9ca5d762e1eb7960fd1f4ff0c7861db4f49e12e9ca7c0d5edc902bf26a8060bbc7fe0cc9
7
+ data.tar.gz: 6d8b15dc6e248f4efcd35ef4663835a7d1733a4b13da632a045d31d73d83a8de9037e460ae43f10ef6328b648e2f746ff00db435ea5d0e1053c4306ba887651c
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ Gemfile.lock
2
+ *.gem
data/CONTRIBUTE.md ADDED
@@ -0,0 +1,11 @@
1
+ ### How to build
2
+
3
+ ```
4
+ gem build jekyll-tfidf-related-posts.gemspec
5
+ ```
6
+
7
+ ### How to release
8
+
9
+ ```
10
+ gem push jekyll-tfidf-related-posts-0.x.x.gem
11
+ ```
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in jekyll-related-posts.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2019 Sangsoo Nam
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # jekyll-tfidf-related-posts
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/jekyll-tfidf-related-posts.svg)](https://rubygems.org/gems/jekyll-tfidf-related-posts)
4
+ [![DUB](https://img.shields.io/dub/l/vibe-d.svg)](LICENSE.txt)
5
+
6
+ [Jekyll](http://jekyllrb.com) plugin to show related posts based on the content, tags, and categories. The similarity is calculated using TF-IDF(term frequency-inverted document frequency). Since tags and categories are use-defined values, those are considered with higher weights than a content while calculating.
7
+
8
+ ### How to install
9
+
10
+ 1. Install the gem `jekyll-tfidf-related-posts`.
11
+ ```
12
+ $ gem install jekyll-tfidf-related-posts
13
+ ```
14
+ 2. Add `jekyll-tfidf-related-posts` plugin in `_config.xml`.
15
+ ```yaml
16
+ plugins:
17
+ - jekyll-tfidf-related-posts
18
+ ```
19
+ 3. Run `jekyll build` or `jekyll serve`
20
+
21
+
22
+ ### How to use
23
+ This plugin calculates related posts and replaces `site.related_posts` containing recent 10 posts by default. So, you can render related posts by iterating `site.related_posts`.
24
+
25
+ ```java
26
+ {% for post in site.related_posts %}
27
+ {% include related-post.html %}
28
+ {% endfor %}
29
+ ```
30
+
31
+ > GitHub Pages supports only [these plugins](https://pages.github.com/versions/). For GitHub Pages, you need to generate your site locally and then push static files to GitHub Pages site.
32
+
33
+ ### Configuration
34
+
35
+ By default, there are 4 related posts. You can configure it in the `_config.yml`
36
+
37
+ ```
38
+ related_posts_count: 8
39
+ ```
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "related-blog-posts"
7
+ spec.version = "0.1.1"
8
+ spec.authors = ["Manpreet singh"]
9
+ spec.email = ["ms4110415@gmail.com"]
10
+ spec.summary = %q{Jekyll plugin to show related posts based on the content, tags, and categories.}
11
+ spec.description = %q{Jekyll plugin to show related posts based on the content, tags, and categories. The similarity is calculated using TF-IDF(term frequency-inverted document frequency). Since tags and categories are use-defined values, those are considered with higher weights than a content while calculating.}
12
+ spec.homepage = "https://github.com/ManpreetChoudhary/Jekyll-tfidf-related-posts"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.require_paths = ["lib"]
17
+
18
+ spec.add_dependency "jekyll", ">= 3.0"
19
+ spec.add_dependency "stopwords-filter", "~> 0.4"
20
+ spec.add_dependency "fast-stemmer", "~> 1.0"
21
+ spec.add_dependency "pqueue", "~> 2.1"
22
+ spec.add_dependency "nmatrix", "~> 0.2"
23
+ end
@@ -0,0 +1,167 @@
1
+ require 'rubygems'
2
+ require 'jekyll'
3
+ require 'fast_stemmer'
4
+ require 'stopwords'
5
+ require 'pqueue'
6
+ require 'nmatrix'
7
+
8
+ module SangsooNam
9
+ module Jekyll
10
+ class TFIDFRelatedPosts
11
+ def initialize
12
+ @docs = Array.new
13
+ @keywords = Array.new
14
+ @tags_and_categories = Array.new
15
+ @stopwords_filter = Stopwords::Snowball::Filter.new('en')
16
+ end
17
+
18
+ def add_post(post)
19
+ tags = post.data['tags'].map { |e| "@tag:" + e }.map(&:to_sym)
20
+ categories = post.data['categories'].map { |e| "@category:" + e }.map(&:to_sym)
21
+ doc = {
22
+ post: post,
23
+ content: (stem(post.content) + stem(post.data['title']) + tags + categories)
24
+ }
25
+ @docs << doc
26
+ @keywords += doc[:content]
27
+ @tags_and_categories += tags + categories
28
+ end
29
+
30
+ def build(site)
31
+ @keywords.uniq!
32
+ @tags_and_categories.uniq!
33
+ @weights = custom_weights(@tags_and_categories)
34
+ related = build_related_docs_with_score(site.config['related_posts_count'] || 4)
35
+
36
+ @docs.each do |doc|
37
+ doc[:post].instance_variable_set(:@related_posts,related[doc].map { |x| x[:post] })
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def build_related_docs_with_score(count = 8)
44
+ dc = document_correleation
45
+ result = Hash.new
46
+ count = [count, @docs.size].min
47
+
48
+ @docs.each_with_index do |doc, index|
49
+ queue = PQueue.new(dc.row(index).each_with_index) do |a, b|
50
+ a[0] > b[0]
51
+ end
52
+
53
+ result[doc] = []
54
+ count.times do
55
+ score, id = queue.pop
56
+ begin
57
+ result[doc] << {
58
+ score: score,
59
+ post: @docs[id][:post]
60
+ }
61
+ rescue
62
+ break
63
+ end
64
+ end
65
+ end
66
+
67
+ return result
68
+ end
69
+
70
+ def document_correleation()
71
+ scores = tfidf
72
+ result = scores.dot(scores.transpose)
73
+
74
+ result.each_with_indices do |_, u, v|
75
+ if u != v
76
+ result[u, v] /= (result[u, u] + result[v, v] - result[u, v])
77
+ else
78
+ result[u, v] = 0.0
79
+ end
80
+ end
81
+
82
+ return result
83
+ end
84
+
85
+ def bag_of_words
86
+ result = NMatrix.new([@docs.size, @keywords.size], 0.0)
87
+ @max = NMatrix.new([@docs.size], 0.0)
88
+
89
+ result.each_with_indices do |_, pi, ki|
90
+ result[pi, ki] = @docs[pi][:content].count(@keywords[ki])
91
+
92
+ if result[pi, ki] > @max[pi]
93
+ @max[pi] = result[pi, ki]
94
+ end
95
+ end
96
+
97
+ @bag_of_words = result.dup
98
+ return result
99
+ end
100
+
101
+ def term_frequency
102
+ result = bag_of_words
103
+
104
+ result.rows.times do |r|
105
+ result[r, 0..-1] *= @weights
106
+ result[r, 0..-1] /= @max[r]
107
+ end
108
+
109
+ return result
110
+ end
111
+
112
+ def custom_weights(terms, weight = 8.0)
113
+ result = NMatrix.new([1, @keywords.size], 1.0)
114
+
115
+ terms.each do |term|
116
+ result[0, @keywords.index(term)] = weight
117
+ end
118
+
119
+ return result
120
+ end
121
+
122
+ def inverse_document_frequency
123
+ result = NMatrix.new([1, @keywords.size], 0.0)
124
+
125
+ @bag_of_words.each_column do |column|
126
+ occurences = column.reduce do |m, c|
127
+ m + (c > 0 ? 1.0 : 0.0)
128
+ end
129
+
130
+ result[0, column.offset[1]] = Math.log(column.size / occurences) if occurences > 0
131
+ end
132
+
133
+ return result
134
+ end
135
+
136
+ def tfidf
137
+ result = term_frequency
138
+ idf = inverse_document_frequency
139
+
140
+ result.rows.times do |r|
141
+ result[r, 0..-1] *= idf
142
+ end
143
+
144
+ return result
145
+ end
146
+
147
+ def stem(data)
148
+ data = data.gsub(/{%.+%}/, ' ') # Replace liquid templates
149
+ tokenized = data.scan(/\w+/).map(&:downcase)
150
+ filtered = @stopwords_filter.filter(tokenized)
151
+ stemmed = filtered.map(&:stem).select{|s| s.length > 1}.map(&:to_sym)
152
+ return stemmed
153
+ end
154
+ end
155
+ end
156
+ end
157
+
158
+ Jekyll::Hooks.register :site, :pre_render do |site|
159
+ Jekyll.logger.info("Building TFIDF index...")
160
+ tfidf = SangsooNam::Jekyll::TFIDFRelatedPosts.new
161
+ site.posts.docs.each do |x|
162
+ tfidf.add_post(x)
163
+ end
164
+
165
+ Jekyll.logger.info("Replaceing Related Posts...")
166
+ tfidf.build(site)
167
+ end
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: related-blog-posts
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Manpreet singh
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-05-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jekyll
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '3.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '3.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: stopwords-filter
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.4'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: fast-stemmer
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pqueue
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '2.1'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.1'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nmatrix
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.2'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.2'
83
+ description: Jekyll plugin to show related posts based on the content, tags, and categories. The
84
+ similarity is calculated using TF-IDF(term frequency-inverted document frequency).
85
+ Since tags and categories are use-defined values, those are considered with higher
86
+ weights than a content while calculating.
87
+ email:
88
+ - ms4110415@gmail.com
89
+ executables: []
90
+ extensions: []
91
+ extra_rdoc_files: []
92
+ files:
93
+ - ".gitignore"
94
+ - CONTRIBUTE.md
95
+ - Gemfile
96
+ - LICENSE.txt
97
+ - README.md
98
+ - jekyll-tfidf-related-posts.gemspec
99
+ - lib/jekyll-tfidf-related-posts.rb
100
+ homepage: https://github.com/ManpreetChoudhary/Jekyll-tfidf-related-posts
101
+ licenses:
102
+ - MIT
103
+ metadata: {}
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubygems_version: 3.1.2
120
+ signing_key:
121
+ specification_version: 4
122
+ summary: Jekyll plugin to show related posts based on the content, tags, and categories.
123
+ test_files: []