jekyll_ai_related_posts 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dde47b3847f5f2578a80f8f208049f912d409e7812a2927dda4ea333b0b8cd82
4
- data.tar.gz: 9d94d4dd9695b96467349818d20f961ac9a55f9bd5c4968c3703636854d71edb
3
+ metadata.gz: bde6fb501500ac67e8b2fbcc05707dad8acc44d3e06ea9e461a98a065e9f84e2
4
+ data.tar.gz: e0a588ca8656583ad3dcbb36bb76d8c3992107ef807e5425c3f0f0f0db021da7
5
5
  SHA512:
6
- metadata.gz: 857d8f3e381b1f4f3d52c411cc5ab630f4539ab735df57b33276f5b9df0647bebe44319b3a8a2a647f75cc3910409863aeae56d412dd48827fa5116870ea7975
7
- data.tar.gz: eab136bfd7e226dcc0dfd71c14a0a9924b5e3aa59f8c376200129b3237e39f7e47ce540d55d03a080d834ebe56306b5827de44d8e55b9f04e71aedfbc3c9b60a
6
+ metadata.gz: a5e674a295aeb7c2e0bfd1bcec208d129dad7833a3690c4beb467e130220806e06867f49c4703a677cd000944ae0fdc66305bbcdc9679e7ccb51691f1ed9ec4b
7
+ data.tar.gz: f5c95c5eab1f65ec12b56b5ed949125eb24ec41a0e0097609bff313d654ff0888152e78070648fb744b4bf04c90a2f50a5698161d77df99545037d8c26f55d26
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.3] - 2024-05-15
4
+
5
+ - Better (nicer to read) log messages.
6
+
7
+ ## [0.1.2] - 2024-04-24
8
+
3
9
  - [Bugfix] Include up to 10 related posts (like native Jekyll does) instead of just 3.
4
10
 
5
11
  ## [0.1.1] - 2024-04-23
data/README.md CHANGED
@@ -16,6 +16,13 @@ embeddings API that allows us to easily get the embedding vector (in one of
16
16
  OpenAI's models) of some text. We can use these vectors to compute related
17
17
  posts with the accuracy of OpenAI's models (or any other LLM, for that matter).
18
18
 
19
+ ### Used in Production at
20
+
21
+ - [MikeKasberg.com](https://www.mikekasberg.com)
22
+
23
+ (Feel free to open a PR to add your website if you're using this gem in
24
+ production!)
25
+
19
26
  ## Installation
20
27
 
21
28
  Jekyll AI Related Posts is a [Jekyll
@@ -9,7 +9,13 @@ require "json"
9
9
  module JekyllAiRelatedPosts
10
10
  class Generator < Jekyll::Generator
11
11
  def generate(site)
12
+ Jekyll.logger.debug "AI Related Posts:", "Generating related posts..."
13
+
12
14
  @site = site
15
+ @stats = {
16
+ cache_hits: 0,
17
+ cache_misses: 0
18
+ }
13
19
  setup_database
14
20
 
15
21
  @indexed_posts = {}
@@ -18,7 +24,6 @@ module JekyllAiRelatedPosts
18
24
  end
19
25
 
20
26
  if fetch_enabled?
21
- Jekyll.logger.info "[ai_related_posts] Generating related posts..."
22
27
  @embeddings_fetcher = new_fetcher
23
28
 
24
29
  @site.posts.docs.each do |p|
@@ -28,13 +33,25 @@ module JekyllAiRelatedPosts
28
33
  @site.posts.docs.each do |p|
29
34
  find_related(p)
30
35
  end
36
+ Jekyll.logger.info "AI Related Posts:", "Found #{@stats[:cache_hits]} cached embeddings; fetched #{@stats[:cache_misses]}"
31
37
  else
32
- Jekyll.logger.info "[ai_related_posts] Using cached related posts data..."
38
+ Jekyll.logger.info "AI Related Posts:", "Fetch disabled. Using cached related posts data."
33
39
 
34
40
  @site.posts.docs.each do |p|
35
41
  fallback_generate_related(p)
36
42
  end
43
+
44
+ case @stats[:cache_misses]
45
+ when 0
46
+ Jekyll.logger.info "AI Related Posts:", "Found #{@stats[:cache_hits]} cached embeddings; all embeddings cached"
47
+ when 1
48
+ Jekyll.logger.info "AI Related Posts:", "Found #{@stats[:cache_hits]} cached embeddings; skipped 1 fetch"
49
+ else
50
+ Jekyll.logger.info "AI Related Posts:", "Found #{@stats[:cache_hits]} cached embeddings; skipped #{@stats[:cache_misses]} fetches"
51
+ end
37
52
  end
53
+
54
+ Jekyll.logger.debug "AI Related Posts:", "Done generating related posts"
38
55
  end
39
56
 
40
57
  private
@@ -53,8 +70,14 @@ module JekyllAiRelatedPosts
53
70
  def fallback_generate_related(post)
54
71
  existing = Models::Post.find_by(relative_path: post.relative_path)
55
72
  if existing.nil?
73
+ @stats[:cache_misses] += 1
56
74
  post.data["ai_related_posts"] = post.related_posts
57
75
  else
76
+ if existing.embedding_text == embedding_text(post)
77
+ @stats[:cache_hits] += 1
78
+ else
79
+ @stats[:cache_misses] += 1
80
+ end
58
81
  find_related(post)
59
82
  end
60
83
  end
@@ -80,20 +103,25 @@ module JekyllAiRelatedPosts
80
103
  existing = nil
81
104
  end
82
105
 
83
- return unless existing.nil?
106
+ if existing.nil?
107
+ @stats[:cache_misses] += 1
84
108
 
85
- Models::Post.create!(
86
- relative_path: post.relative_path,
87
- embedding_text: embedding_text(post),
88
- embedding: embedding_for(post).to_json
89
- )
109
+ Models::Post.create!(
110
+ relative_path: post.relative_path,
111
+ embedding_text: embedding_text(post),
112
+ embedding: embedding_for(post).to_json
113
+ )
90
114
 
91
- sql = <<-SQL
92
- INSERT INTO vss_posts (rowid, post_embedding)
93
- SELECT rowid, embedding FROM posts WHERE relative_path = :relative_path;
94
- SQL
95
- ActiveRecord::Base.connection.execute(ActiveRecord::Base.sanitize_sql([ sql,
96
- { relative_path: post.relative_path } ]))
115
+ sql = <<-SQL
116
+ INSERT INTO vss_posts (rowid, post_embedding)
117
+ SELECT rowid, embedding FROM posts WHERE relative_path = :relative_path;
118
+ SQL
119
+ ActiveRecord::Base.connection.execute(
120
+ ActiveRecord::Base.sanitize_sql([ sql, { relative_path: post.relative_path } ])
121
+ )
122
+ else
123
+ @stats[:cache_hits] += 1
124
+ end
97
125
  end
98
126
 
99
127
  def find_related(post)
@@ -107,9 +135,9 @@ module JekyllAiRelatedPosts
107
135
  LIMIT 10000;
108
136
  SQL
109
137
 
110
- results = ActiveRecord::Base.connection.execute(ActiveRecord::Base.sanitize_sql([ sql, {
111
- relative_path: post.relative_path
112
- } ]))
138
+ results = ActiveRecord::Base.connection.execute(
139
+ ActiveRecord::Base.sanitize_sql([ sql, { relative_path: post.relative_path } ])
140
+ )
113
141
  # The first result is the post itself, with a distance of 0.
114
142
  rowids = results.sort_by { |r| r["distance"] }.drop(1).first(10).map { |r| r["rowid"] }
115
143
 
@@ -139,16 +167,23 @@ module JekyllAiRelatedPosts
139
167
  end
140
168
 
141
169
  def embedding_for(post)
142
- Jekyll.logger.info "[ai_related_posts] Fetching embedding for #{post.relative_path}"
170
+ Jekyll.logger.info "AI Related Posts:", "Fetching embedding for #{post.relative_path}"
143
171
  input = embedding_text(post)
144
172
 
145
173
  @embeddings_fetcher.embedding_for(input)
146
174
  end
147
175
 
148
176
  def setup_database
177
+ db_path = @site.in_source_dir(".ai_related_posts_cache.sqlite3")
178
+ if File.exist?(db_path)
179
+ Jekyll.logger.debug "AI Related Posts:", "Found cache [.ai_related_posts_cache.sqlite3]"
180
+ else
181
+ Jekyll.logger.info "AI Related Posts:", "Creating cache [.ai_related_posts_cache.sqlite3]"
182
+ end
183
+
149
184
  ActiveRecord::Base.establish_connection(
150
185
  adapter: "sqlite3",
151
- database: @site.in_source_dir(".ai_related_posts_cache.sqlite3")
186
+ database: db_path
152
187
  )
153
188
  # We don't need WAL mode for this.
154
189
  ActiveRecord::Base.connection.execute("PRAGMA journal_mode=DELETE;")
@@ -175,7 +210,7 @@ module JekyllAiRelatedPosts
175
210
  SQL
176
211
  ActiveRecord::Base.connection.execute(create_vss_posts)
177
212
 
178
- Jekyll.logger.debug("ai_related_posts db setup complete")
213
+ Jekyll.logger.debug "AI Related Posts:", "DB setup complete"
179
214
  end
180
215
  end
181
216
  end
@@ -29,8 +29,8 @@ module JekyllAiRelatedPosts
29
29
 
30
30
  res.body["data"].first["embedding"]
31
31
  rescue Faraday::Error => e
32
- Jekyll.logger.error "Error response from OpanAI API!"
33
- Jekyll.logger.error e.inspect
32
+ Jekyll.logger.error "AI Related Posts:", "Error response from OpenAI API!"
33
+ Jekyll.logger.error "AI Related Posts:", e.inspect
34
34
 
35
35
  raise
36
36
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JekyllAiRelatedPosts
4
- VERSION = "0.1.2"
4
+ VERSION = "0.1.4"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll_ai_related_posts
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Kasberg
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-04-24 00:00:00.000000000 Z
11
+ date: 2024-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord