jekyll_ai_related_posts 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +7 -0
- data/lib/jekyll_ai_related_posts/generator.rb +55 -20
- data/lib/jekyll_ai_related_posts/open_ai_embeddings.rb +2 -2
- data/lib/jekyll_ai_related_posts/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bde6fb501500ac67e8b2fbcc05707dad8acc44d3e06ea9e461a98a065e9f84e2
|
4
|
+
data.tar.gz: e0a588ca8656583ad3dcbb36bb76d8c3992107ef807e5425c3f0f0f0db021da7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a5e674a295aeb7c2e0bfd1bcec208d129dad7833a3690c4beb467e130220806e06867f49c4703a677cd000944ae0fdc66305bbcdc9679e7ccb51691f1ed9ec4b
|
7
|
+
data.tar.gz: f5c95c5eab1f65ec12b56b5ed949125eb24ec41a0e0097609bff313d654ff0888152e78070648fb744b4bf04c90a2f50a5698161d77df99545037d8c26f55d26
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -16,6 +16,13 @@ embeddings API that allows us to easily get the embedding vector (in one of
|
|
16
16
|
OpenAI's models) of some text. We can use these vectors to compute related
|
17
17
|
posts with the accuracy of OpenAI's models (or any other LLM, for that matter).
|
18
18
|
|
19
|
+
### Used in Production at
|
20
|
+
|
21
|
+
- [MikeKasberg.com](https://www.mikekasberg.com)
|
22
|
+
|
23
|
+
(Feel free to open a PR to add your website if you're using this gem in
|
24
|
+
production!)
|
25
|
+
|
19
26
|
## Installation
|
20
27
|
|
21
28
|
Jekyll AI Related Posts is a [Jekyll
|
@@ -9,7 +9,13 @@ require "json"
|
|
9
9
|
module JekyllAiRelatedPosts
|
10
10
|
class Generator < Jekyll::Generator
|
11
11
|
def generate(site)
|
12
|
+
Jekyll.logger.debug "AI Related Posts:", "Generating related posts..."
|
13
|
+
|
12
14
|
@site = site
|
15
|
+
@stats = {
|
16
|
+
cache_hits: 0,
|
17
|
+
cache_misses: 0
|
18
|
+
}
|
13
19
|
setup_database
|
14
20
|
|
15
21
|
@indexed_posts = {}
|
@@ -18,7 +24,6 @@ module JekyllAiRelatedPosts
|
|
18
24
|
end
|
19
25
|
|
20
26
|
if fetch_enabled?
|
21
|
-
Jekyll.logger.info "[ai_related_posts] Generating related posts..."
|
22
27
|
@embeddings_fetcher = new_fetcher
|
23
28
|
|
24
29
|
@site.posts.docs.each do |p|
|
@@ -28,13 +33,25 @@ module JekyllAiRelatedPosts
|
|
28
33
|
@site.posts.docs.each do |p|
|
29
34
|
find_related(p)
|
30
35
|
end
|
36
|
+
Jekyll.logger.info "AI Related Posts:", "Found #{@stats[:cache_hits]} cached embeddings; fetched #{@stats[:cache_misses]}"
|
31
37
|
else
|
32
|
-
Jekyll.logger.info "
|
38
|
+
Jekyll.logger.info "AI Related Posts:", "Fetch disabled. Using cached related posts data."
|
33
39
|
|
34
40
|
@site.posts.docs.each do |p|
|
35
41
|
fallback_generate_related(p)
|
36
42
|
end
|
43
|
+
|
44
|
+
case @stats[:cache_misses]
|
45
|
+
when 0
|
46
|
+
Jekyll.logger.info "AI Related Posts:", "Found #{@stats[:cache_hits]} cached embeddings; all embeddings cached"
|
47
|
+
when 1
|
48
|
+
Jekyll.logger.info "AI Related Posts:", "Found #{@stats[:cache_hits]} cached embeddings; skipped 1 fetch"
|
49
|
+
else
|
50
|
+
Jekyll.logger.info "AI Related Posts:", "Found #{@stats[:cache_hits]} cached embeddings; skipped #{@stats[:cache_misses]} fetches"
|
51
|
+
end
|
37
52
|
end
|
53
|
+
|
54
|
+
Jekyll.logger.debug "AI Related Posts:", "Done generating related posts"
|
38
55
|
end
|
39
56
|
|
40
57
|
private
|
@@ -53,8 +70,14 @@ module JekyllAiRelatedPosts
|
|
53
70
|
def fallback_generate_related(post)
|
54
71
|
existing = Models::Post.find_by(relative_path: post.relative_path)
|
55
72
|
if existing.nil?
|
73
|
+
@stats[:cache_misses] += 1
|
56
74
|
post.data["ai_related_posts"] = post.related_posts
|
57
75
|
else
|
76
|
+
if existing.embedding_text == embedding_text(post)
|
77
|
+
@stats[:cache_hits] += 1
|
78
|
+
else
|
79
|
+
@stats[:cache_misses] += 1
|
80
|
+
end
|
58
81
|
find_related(post)
|
59
82
|
end
|
60
83
|
end
|
@@ -80,20 +103,25 @@ module JekyllAiRelatedPosts
|
|
80
103
|
existing = nil
|
81
104
|
end
|
82
105
|
|
83
|
-
|
106
|
+
if existing.nil?
|
107
|
+
@stats[:cache_misses] += 1
|
84
108
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
109
|
+
Models::Post.create!(
|
110
|
+
relative_path: post.relative_path,
|
111
|
+
embedding_text: embedding_text(post),
|
112
|
+
embedding: embedding_for(post).to_json
|
113
|
+
)
|
90
114
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
115
|
+
sql = <<-SQL
|
116
|
+
INSERT INTO vss_posts (rowid, post_embedding)
|
117
|
+
SELECT rowid, embedding FROM posts WHERE relative_path = :relative_path;
|
118
|
+
SQL
|
119
|
+
ActiveRecord::Base.connection.execute(
|
120
|
+
ActiveRecord::Base.sanitize_sql([ sql, { relative_path: post.relative_path } ])
|
121
|
+
)
|
122
|
+
else
|
123
|
+
@stats[:cache_hits] += 1
|
124
|
+
end
|
97
125
|
end
|
98
126
|
|
99
127
|
def find_related(post)
|
@@ -107,9 +135,9 @@ module JekyllAiRelatedPosts
|
|
107
135
|
LIMIT 10000;
|
108
136
|
SQL
|
109
137
|
|
110
|
-
results = ActiveRecord::Base.connection.execute(
|
111
|
-
|
112
|
-
|
138
|
+
results = ActiveRecord::Base.connection.execute(
|
139
|
+
ActiveRecord::Base.sanitize_sql([ sql, { relative_path: post.relative_path } ])
|
140
|
+
)
|
113
141
|
# The first result is the post itself, with a distance of 0.
|
114
142
|
rowids = results.sort_by { |r| r["distance"] }.drop(1).first(10).map { |r| r["rowid"] }
|
115
143
|
|
@@ -139,16 +167,23 @@ module JekyllAiRelatedPosts
|
|
139
167
|
end
|
140
168
|
|
141
169
|
def embedding_for(post)
|
142
|
-
Jekyll.logger.info "
|
170
|
+
Jekyll.logger.info "AI Related Posts:", "Fetching embedding for #{post.relative_path}"
|
143
171
|
input = embedding_text(post)
|
144
172
|
|
145
173
|
@embeddings_fetcher.embedding_for(input)
|
146
174
|
end
|
147
175
|
|
148
176
|
def setup_database
|
177
|
+
db_path = @site.in_source_dir(".ai_related_posts_cache.sqlite3")
|
178
|
+
if File.exist?(db_path)
|
179
|
+
Jekyll.logger.debug "AI Related Posts:", "Found cache [.ai_related_posts_cache.sqlite3]"
|
180
|
+
else
|
181
|
+
Jekyll.logger.info "AI Related Posts:", "Creating cache [.ai_related_posts_cache.sqlite3]"
|
182
|
+
end
|
183
|
+
|
149
184
|
ActiveRecord::Base.establish_connection(
|
150
185
|
adapter: "sqlite3",
|
151
|
-
database:
|
186
|
+
database: db_path
|
152
187
|
)
|
153
188
|
# We don't need WAL mode for this.
|
154
189
|
ActiveRecord::Base.connection.execute("PRAGMA journal_mode=DELETE;")
|
@@ -175,7 +210,7 @@ module JekyllAiRelatedPosts
|
|
175
210
|
SQL
|
176
211
|
ActiveRecord::Base.connection.execute(create_vss_posts)
|
177
212
|
|
178
|
-
Jekyll.logger.debug
|
213
|
+
Jekyll.logger.debug "AI Related Posts:", "DB setup complete"
|
179
214
|
end
|
180
215
|
end
|
181
216
|
end
|
@@ -29,8 +29,8 @@ module JekyllAiRelatedPosts
|
|
29
29
|
|
30
30
|
res.body["data"].first["embedding"]
|
31
31
|
rescue Faraday::Error => e
|
32
|
-
Jekyll.logger.error "Error response from
|
33
|
-
Jekyll.logger.error e.inspect
|
32
|
+
Jekyll.logger.error "AI Related Posts:", "Error response from OpenAI API!"
|
33
|
+
Jekyll.logger.error "AI Related Posts:", e.inspect
|
34
34
|
|
35
35
|
raise
|
36
36
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll_ai_related_posts
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Kasberg
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|