bridgetown-related-posts 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/bridgetown-related-posts/builder.rb +137 -0
- data/lib/bridgetown-related-posts/config.rb +12 -0
- data/lib/bridgetown-related-posts.rb +16 -0
- metadata +67 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4e102bbfd393fdfb004ea44b684e56a9ae67528047c3a6eaf28fc2c22d08a512
|
4
|
+
data.tar.gz: 10ba94a0dc8e0c8815590e55cf574dff048d78f16ee1892022ab161e277bfc59
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 58f762fa5b087618e12ab8682f79b916913a4b974ad7c7ee1b220f76d210c869aad5eb6e63b604cf591ea9f037d151344a31b2cd15abde7132836270284243a5
|
7
|
+
data.tar.gz: fd98995adb563f00740ae5c50441993c02a840ddc675992f7ed051266267197e8ad17bfc1038e824f84d49b6c326e36ae55e56b2e59ca327aa838a53310336c6
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# lib/bridgetown-related-posts/builder.rb
|
2
|
+
module BridgetownRelatedPosts
|
3
|
+
class Builder < Bridgetown::Builder
|
4
|
+
|
5
|
+
def self.init
|
6
|
+
config.related_posts_limit ||= 5 # Default value
|
7
|
+
end
|
8
|
+
|
9
|
+
def build
|
10
|
+
hook :site, :post_read do
|
11
|
+
BridgetownRelatedPosts.set_config(config)
|
12
|
+
posts_data = prepare_posts_data
|
13
|
+
if posts_data.empty?
|
14
|
+
Bridgetown.logger.warn "No posts found for RelatedPostsBuilder"
|
15
|
+
else
|
16
|
+
similarity_matrix = calculate_similarity(posts_data)
|
17
|
+
add_related_posts(posts_data, similarity_matrix)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def prepare_posts_data
|
23
|
+
posts = site.collections["posts"].resources
|
24
|
+
Bridgetown.logger.info "Number of posts found: #{posts.size}"
|
25
|
+
|
26
|
+
posts.map do |post|
|
27
|
+
begin
|
28
|
+
content = post.content
|
29
|
+
|
30
|
+
{
|
31
|
+
relative_path: post.relative_path,
|
32
|
+
title: post.data["title"] || "Untitled",
|
33
|
+
content: content || "",
|
34
|
+
}
|
35
|
+
rescue => e
|
36
|
+
Bridgetown.logger.error "Error processing post #{post.relative_path}: #{e.message}"
|
37
|
+
Bridgetown.logger.error "Post data: #{post.data.inspect}"
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
end.compact
|
41
|
+
end
|
42
|
+
|
43
|
+
def calculate_similarity(posts_data)
|
44
|
+
idf = compute_idf(posts_data)
|
45
|
+
tfidf_vectors = posts_data.map { |post| compute_tfidf(post[:content], idf) }
|
46
|
+
|
47
|
+
similarity_matrix = {}
|
48
|
+
posts_data.each_with_index do |post, i|
|
49
|
+
similarity_matrix[post[:relative_path]] = {}
|
50
|
+
posts_data.each_with_index do |other_post, j|
|
51
|
+
next if i == j
|
52
|
+
similarity = cosine_similarity(tfidf_vectors[i], tfidf_vectors[j])
|
53
|
+
similarity_matrix[post[:relative_path]][other_post[:relative_path]] = similarity
|
54
|
+
end
|
55
|
+
end
|
56
|
+
similarity_matrix
|
57
|
+
end
|
58
|
+
|
59
|
+
def compute_idf(posts_data)
|
60
|
+
document_frequency = Hash.new(0)
|
61
|
+
posts_data.each do |post|
|
62
|
+
words = post[:content].downcase.split.uniq
|
63
|
+
words.each { |word| document_frequency[word] += 1 }
|
64
|
+
end
|
65
|
+
|
66
|
+
idf = {}
|
67
|
+
total_docs = posts_data.size
|
68
|
+
document_frequency.each do |word, freq|
|
69
|
+
idf[word] = Math.log(total_docs.to_f / (freq + 1))
|
70
|
+
end
|
71
|
+
idf
|
72
|
+
end
|
73
|
+
|
74
|
+
def compute_tfidf(text, idf)
|
75
|
+
words = text.downcase.split
|
76
|
+
word_count = words.size
|
77
|
+
tf = Hash.new(0)
|
78
|
+
words.each { |word| tf[word] += 1 }
|
79
|
+
|
80
|
+
tfidf = {}
|
81
|
+
tf.each do |word, freq|
|
82
|
+
tfidf[word] = (freq.to_f / word_count) * (idf[word] || 0)
|
83
|
+
end
|
84
|
+
tfidf
|
85
|
+
end
|
86
|
+
|
87
|
+
def cosine_similarity(vec1, vec2)
|
88
|
+
dot_product = 0
|
89
|
+
vec1.each_key do |key|
|
90
|
+
dot_product += vec1[key] * vec2[key] if vec2.has_key?(key)
|
91
|
+
end
|
92
|
+
|
93
|
+
mag1 = Math.sqrt(vec1.values.map { |x| x ** 2 }.sum)
|
94
|
+
mag2 = Math.sqrt(vec2.values.map { |x| x ** 2 }.sum)
|
95
|
+
|
96
|
+
dot_product / (mag1 * mag2)
|
97
|
+
end
|
98
|
+
|
99
|
+
def add_related_posts(posts_data, similarity_matrix)
|
100
|
+
limit = config.related_posts&.limit || 5
|
101
|
+
|
102
|
+
posts_data.each do |post|
|
103
|
+
similar_posts = similarity_matrix[post[:relative_path]]
|
104
|
+
if similar_posts.nil?
|
105
|
+
puts "No similar posts found for: #{post[:relative_path]}"
|
106
|
+
next
|
107
|
+
end
|
108
|
+
|
109
|
+
similar_posts = similar_posts
|
110
|
+
.sort_by { |_, score| -score }
|
111
|
+
.first(limit)
|
112
|
+
.map { |relative_path, _| site.collections["posts"].resources.find { |p| p.relative_path == relative_path } }
|
113
|
+
.compact
|
114
|
+
|
115
|
+
# Find the actual post object
|
116
|
+
actual_post = site.collections["posts"].resources.find { |p| p.relative_path == post[:relative_path] }
|
117
|
+
|
118
|
+
if actual_post
|
119
|
+
# Add related posts to the post's data
|
120
|
+
actual_post.data["related_posts"] = similar_posts
|
121
|
+
|
122
|
+
Bridgetown.logger.info "Related posts for #{actual_post.data["title"]}:"
|
123
|
+
|
124
|
+
similar_posts.each do |related_post|
|
125
|
+
if related_post
|
126
|
+
Bridgetown.logger.info " - #{related_post.data["title"]}"
|
127
|
+
else
|
128
|
+
Bridgetown.logger.warning " - Related post not found"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
else
|
132
|
+
Bridgetown.logger.error "Post not found: #{post[:relative_path]}"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require "bridgetown"
|
2
|
+
require "bridgetown-related-posts/builder"
|
3
|
+
|
4
|
+
Bridgetown::PluginManager.new_source_manifest(
|
5
|
+
origin: BridgetownRelatedPosts,
|
6
|
+
components: {
|
7
|
+
"builders" => [{
|
8
|
+
name: "RelatedPosts",
|
9
|
+
class: BridgetownRelatedPosts::Builder
|
10
|
+
}]
|
11
|
+
}
|
12
|
+
)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require "bridgetown"
|
2
|
+
require_relative "bridgetown-related-posts/builder"
|
3
|
+
|
4
|
+
Bridgetown.initializer :"bridgetown-related-posts" do |config|
|
5
|
+
config.builder BridgetownRelatedPosts::Builder
|
6
|
+
end
|
7
|
+
|
8
|
+
module BridgetownRelatedPosts
|
9
|
+
def self.config
|
10
|
+
@config ||= Bridgetown::Configuration.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.set_config(config)
|
14
|
+
@config = config
|
15
|
+
end
|
16
|
+
end
|
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bridgetown-related-posts
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matthew Clarkson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-07-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bridgetown
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.3.0
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '2.0'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.3.0
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '2.0'
|
33
|
+
description: This plugin calculates and adds related posts to your Bridgetown site
|
34
|
+
using TF-IDF and cosine similarity.
|
35
|
+
email: mpclarkson@gmail.com
|
36
|
+
executables: []
|
37
|
+
extensions: []
|
38
|
+
extra_rdoc_files: []
|
39
|
+
files:
|
40
|
+
- lib/bridgetown-related-posts.rb
|
41
|
+
- lib/bridgetown-related-posts/builder.rb
|
42
|
+
- lib/bridgetown-related-posts/config.rb
|
43
|
+
homepage: https://github.com/mpclarkson/bridgetown-related-posts/
|
44
|
+
licenses:
|
45
|
+
- MIT
|
46
|
+
metadata: {}
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 3.3.0
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
requirements: []
|
62
|
+
rubygems_version: 3.5.10
|
63
|
+
signing_key:
|
64
|
+
specification_version: 4
|
65
|
+
summary: A Bridgetown plugin for automatically generating related posts based on cosine
|
66
|
+
similarity.
|
67
|
+
test_files: []
|