bridgetown-related-posts 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 4e102bbfd393fdfb004ea44b684e56a9ae67528047c3a6eaf28fc2c22d08a512
4
+ data.tar.gz: 10ba94a0dc8e0c8815590e55cf574dff048d78f16ee1892022ab161e277bfc59
5
+ SHA512:
6
+ metadata.gz: 58f762fa5b087618e12ab8682f79b916913a4b974ad7c7ee1b220f76d210c869aad5eb6e63b604cf591ea9f037d151344a31b2cd15abde7132836270284243a5
7
+ data.tar.gz: fd98995adb563f00740ae5c50441993c02a840ddc675992f7ed051266267197e8ad17bfc1038e824f84d49b6c326e36ae55e56b2e59ca327aa838a53310336c6
@@ -0,0 +1,137 @@
1
+ # lib/bridgetown-related-posts/builder.rb
2
+ module BridgetownRelatedPosts
3
+ class Builder < Bridgetown::Builder
4
+
5
+ def self.init
6
+ config.related_posts_limit ||= 5 # Default value
7
+ end
8
+
9
+ def build
10
+ hook :site, :post_read do
11
+ BridgetownRelatedPosts.set_config(config)
12
+ posts_data = prepare_posts_data
13
+ if posts_data.empty?
14
+ Bridgetown.logger.warn "No posts found for RelatedPostsBuilder"
15
+ else
16
+ similarity_matrix = calculate_similarity(posts_data)
17
+ add_related_posts(posts_data, similarity_matrix)
18
+ end
19
+ end
20
+ end
21
+
22
+ def prepare_posts_data
23
+ posts = site.collections["posts"].resources
24
+ Bridgetown.logger.info "Number of posts found: #{posts.size}"
25
+
26
+ posts.map do |post|
27
+ begin
28
+ content = post.content
29
+
30
+ {
31
+ relative_path: post.relative_path,
32
+ title: post.data["title"] || "Untitled",
33
+ content: content || "",
34
+ }
35
+ rescue => e
36
+ Bridgetown.logger.error "Error processing post #{post.relative_path}: #{e.message}"
37
+ Bridgetown.logger.error "Post data: #{post.data.inspect}"
38
+ nil
39
+ end
40
+ end.compact
41
+ end
42
+
43
+ def calculate_similarity(posts_data)
44
+ idf = compute_idf(posts_data)
45
+ tfidf_vectors = posts_data.map { |post| compute_tfidf(post[:content], idf) }
46
+
47
+ similarity_matrix = {}
48
+ posts_data.each_with_index do |post, i|
49
+ similarity_matrix[post[:relative_path]] = {}
50
+ posts_data.each_with_index do |other_post, j|
51
+ next if i == j
52
+ similarity = cosine_similarity(tfidf_vectors[i], tfidf_vectors[j])
53
+ similarity_matrix[post[:relative_path]][other_post[:relative_path]] = similarity
54
+ end
55
+ end
56
+ similarity_matrix
57
+ end
58
+
59
+ def compute_idf(posts_data)
60
+ document_frequency = Hash.new(0)
61
+ posts_data.each do |post|
62
+ words = post[:content].downcase.split.uniq
63
+ words.each { |word| document_frequency[word] += 1 }
64
+ end
65
+
66
+ idf = {}
67
+ total_docs = posts_data.size
68
+ document_frequency.each do |word, freq|
69
+ idf[word] = Math.log(total_docs.to_f / (freq + 1))
70
+ end
71
+ idf
72
+ end
73
+
74
+ def compute_tfidf(text, idf)
75
+ words = text.downcase.split
76
+ word_count = words.size
77
+ tf = Hash.new(0)
78
+ words.each { |word| tf[word] += 1 }
79
+
80
+ tfidf = {}
81
+ tf.each do |word, freq|
82
+ tfidf[word] = (freq.to_f / word_count) * (idf[word] || 0)
83
+ end
84
+ tfidf
85
+ end
86
+
87
+ def cosine_similarity(vec1, vec2)
88
+ dot_product = 0
89
+ vec1.each_key do |key|
90
+ dot_product += vec1[key] * vec2[key] if vec2.has_key?(key)
91
+ end
92
+
93
+ mag1 = Math.sqrt(vec1.values.map { |x| x ** 2 }.sum)
94
+ mag2 = Math.sqrt(vec2.values.map { |x| x ** 2 }.sum)
95
+
96
+ dot_product / (mag1 * mag2)
97
+ end
98
+
99
+ def add_related_posts(posts_data, similarity_matrix)
100
+ limit = config.related_posts&.limit || 5
101
+
102
+ posts_data.each do |post|
103
+ similar_posts = similarity_matrix[post[:relative_path]]
104
+ if similar_posts.nil?
105
+ puts "No similar posts found for: #{post[:relative_path]}"
106
+ next
107
+ end
108
+
109
+ similar_posts = similar_posts
110
+ .sort_by { |_, score| -score }
111
+ .first(limit)
112
+ .map { |relative_path, _| site.collections["posts"].resources.find { |p| p.relative_path == relative_path } }
113
+ .compact
114
+
115
+ # Find the actual post object
116
+ actual_post = site.collections["posts"].resources.find { |p| p.relative_path == post[:relative_path] }
117
+
118
+ if actual_post
119
+ # Add related posts to the post's data
120
+ actual_post.data["related_posts"] = similar_posts
121
+
122
+ Bridgetown.logger.info "Related posts for #{actual_post.data["title"]}:"
123
+
124
+ similar_posts.each do |related_post|
125
+ if related_post
126
+ Bridgetown.logger.info " - #{related_post.data["title"]}"
127
+ else
128
+ Bridgetown.logger.warning " - Related post not found"
129
+ end
130
+ end
131
+ else
132
+ Bridgetown.logger.error "Post not found: #{post[:relative_path]}"
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,12 @@
1
+ require "bridgetown"
2
+ require "bridgetown-related-posts/builder"
3
+
4
+ Bridgetown::PluginManager.new_source_manifest(
5
+ origin: BridgetownRelatedPosts,
6
+ components: {
7
+ "builders" => [{
8
+ name: "RelatedPosts",
9
+ class: BridgetownRelatedPosts::Builder
10
+ }]
11
+ }
12
+ )
@@ -0,0 +1,16 @@
1
+ require "bridgetown"
2
+ require_relative "bridgetown-related-posts/builder"
3
+
4
+ Bridgetown.initializer :"bridgetown-related-posts" do |config|
5
+ config.builder BridgetownRelatedPosts::Builder
6
+ end
7
+
8
+ module BridgetownRelatedPosts
9
+ def self.config
10
+ @config ||= Bridgetown::Configuration.new
11
+ end
12
+
13
+ def self.set_config(config)
14
+ @config = config
15
+ end
16
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bridgetown-related-posts
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Matthew Clarkson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-07-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bridgetown
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.3.0
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '2.0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: 1.3.0
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.0'
33
+ description: This plugin calculates and adds related posts to your Bridgetown site
34
+ using TF-IDF and cosine similarity.
35
+ email: mpclarkson@gmail.com
36
+ executables: []
37
+ extensions: []
38
+ extra_rdoc_files: []
39
+ files:
40
+ - lib/bridgetown-related-posts.rb
41
+ - lib/bridgetown-related-posts/builder.rb
42
+ - lib/bridgetown-related-posts/config.rb
43
+ homepage: https://github.com/mpclarkson/bridgetown-related-posts/
44
+ licenses:
45
+ - MIT
46
+ metadata: {}
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: 3.3.0
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubygems_version: 3.5.10
63
+ signing_key:
64
+ specification_version: 4
65
+ summary: A Bridgetown plugin for automatically generating related posts based on cosine
66
+ similarity.
67
+ test_files: []