jekyll-ai-related 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +24 -0
- data/.github/workflows/release.yml +31 -0
- data/.gitignore +29 -0
- data/.rubocop.yml +25 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +18 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +135 -0
- data/LICENSE.txt +21 -0
- data/README.md +133 -0
- data/Rakefile +31 -0
- data/cliff.toml +84 -0
- data/jekyll-ai-related.gemspec +35 -0
- data/lib/jekyll/commands/generator.rb +35 -0
- data/lib/jekyll/embeddings-generator/embeddings/generate.rb +34 -0
- data/lib/jekyll/embeddings-generator/embeddings/store.rb +151 -0
- data/lib/jekyll/embeddings-generator/init.rb +51 -0
- data/lib/jekyll/embeddings-generator/models/data.rb +32 -0
- data/lib/jekyll/embeddings-generator/models/metadata.rb +40 -0
- data/lib/jekyll/embeddings-generator/version.rb +7 -0
- data/lib/jekyll/processor.rb +78 -0
- data/lib/jekyll-ai-related.rb +3 -0
- data/sig/jekyll/embeddings/generator.rbs +6 -0
- data/sql/supabase/create.sql +38 -0
- data/sql/supabase/drop.sql +5 -0
- metadata +164 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/jekyll/embeddings-generator/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "jekyll-ai-related"
|
7
|
+
spec.version = Jekyll::EmbeddingsGenerator::VERSION
|
8
|
+
spec.authors = ["Francesco Pira"]
|
9
|
+
spec.email = ["dev@fpira.com"]
|
10
|
+
|
11
|
+
spec.summary = "Jekyll plugin to generate embeddings for posts and find related content"
|
12
|
+
spec.description = "A Jekyll plugin that uses OpenAI embeddings to analyze posts and find related content"
|
13
|
+
spec.homepage = "https://github.com/pirafrank/jekyll-ai-related"
|
14
|
+
|
15
|
+
spec.license = "MIT"
|
16
|
+
spec.required_ruby_version = ">= 3.2.0"
|
17
|
+
|
18
|
+
spec.files = `git ls-files -z`.split("\x0")
|
19
|
+
spec.executables = spec.files.grep(%r!^bin/!) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
23
|
+
|
24
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
25
|
+
spec.metadata["changelog_uri"] = "https://github.com/pirafrank/jekyll-ai-related/blob/main/CHANGELOG.md"
|
26
|
+
spec.metadata["bug_tracker_uri"] = "https://github.com/pirafrank/jekyll-ai-related/issues"
|
27
|
+
|
28
|
+
spec.add_dependency "httparty", "~> 0.22.0"
|
29
|
+
spec.add_dependency "jekyll", ">= 3.7", "< 5.0"
|
30
|
+
spec.add_dependency "json", "~> 2.7"
|
31
|
+
|
32
|
+
spec.add_development_dependency "bundler", "~> 2.6"
|
33
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
34
|
+
spec.add_development_dependency "rubocop-jekyll", "~> 0.14"
|
35
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module Commands
|
5
|
+
class EmbeddingsGenerator < Command
|
6
|
+
class << self
|
7
|
+
def init_with_program(prog)
|
8
|
+
prog.command(:related) do |c|
|
9
|
+
c.description "Generate embeddings for each post and find related posts."
|
10
|
+
c.syntax "embeddings [options]"
|
11
|
+
|
12
|
+
c.option "debug",
|
13
|
+
"--debug",
|
14
|
+
"Most verbose. Set log level to Debug."
|
15
|
+
c.option "quiet",
|
16
|
+
"--quiet",
|
17
|
+
"Do not print Info logs. Set log level to Error."
|
18
|
+
c.option "future",
|
19
|
+
"--future",
|
20
|
+
"Get embeds and fine related posts also for those with a future date."
|
21
|
+
c.option "drafts",
|
22
|
+
"--drafts",
|
23
|
+
"Get embeds and find related posts also for drafts."
|
24
|
+
|
25
|
+
c.action do |_, opts|
|
26
|
+
Jekyll.logger.info "AI Related plugin starting..."
|
27
|
+
options = configuration_from_options(opts)
|
28
|
+
Jekyll::EmbeddingsGenerator.run(options)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "httparty"
|
4
|
+
require "json"
|
5
|
+
|
6
|
+
module Jekyll
|
7
|
+
module EmbeddingsGenerator
|
8
|
+
module Embeddings
|
9
|
+
class << self
|
10
|
+
include Jekyll::EmbeddingsGenerator
|
11
|
+
|
12
|
+
def generate_embeddings(text)
|
13
|
+
config = Jekyll::EmbeddingsGenerator.config
|
14
|
+
api_key = config["openai_api_key"]
|
15
|
+
response = HTTParty.post(
|
16
|
+
"https://api.openai.com/v1/embeddings",
|
17
|
+
:headers => {
|
18
|
+
"Authorization" => "Bearer #{api_key}",
|
19
|
+
"Content-Type" => "application/json",
|
20
|
+
},
|
21
|
+
:body => {
|
22
|
+
:model => "text-embedding-3-small",
|
23
|
+
:input => text,
|
24
|
+
}.to_json
|
25
|
+
)
|
26
|
+
|
27
|
+
raise "OpenAI API error: #{response.parsed_response["error"]["message"]}" unless response.success?
|
28
|
+
|
29
|
+
response.parsed_response["data"][0]["embedding"]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "httparty"
|
4
|
+
require "json"
|
5
|
+
|
6
|
+
module Jekyll
|
7
|
+
module EmbeddingsGenerator
|
8
|
+
module Store
|
9
|
+
class << self
|
10
|
+
include Jekyll::EmbeddingsGenerator
|
11
|
+
|
12
|
+
def store_embedding(data) # rubocop:disable Metrics/AbcSize
|
13
|
+
config = Jekyll::EmbeddingsGenerator.config
|
14
|
+
supabase_url = config["supabase_url"]
|
15
|
+
supabase_key = config["supabase_key"]
|
16
|
+
|
17
|
+
# First check if record exists and its edit date
|
18
|
+
existing = HTTParty.get(
|
19
|
+
"#{supabase_url}/rest/v1/page_embeddings",
|
20
|
+
:headers => {
|
21
|
+
"apikey" => supabase_key,
|
22
|
+
"Authorization" => "Bearer #{supabase_key}",
|
23
|
+
"Content-Type" => "application/json",
|
24
|
+
"Accept-Encoding" => "identity", # this to avoid supabase returning gzipped content
|
25
|
+
},
|
26
|
+
:query => {
|
27
|
+
"uid" => "eq.#{data.uid}",
|
28
|
+
"select" => "uid, most_recent_edit",
|
29
|
+
}
|
30
|
+
)
|
31
|
+
|
32
|
+
Jekyll.logger.debug "response headers: #{existing.headers}"
|
33
|
+
Jekyll.logger.debug "response body: #{existing.body}"
|
34
|
+
|
35
|
+
raise "Supabase API error: #{existing.code} - #{existing.body}" unless existing.success?
|
36
|
+
|
37
|
+
existing_record = existing.parsed_response&.first
|
38
|
+
mre = data.most_recent_edit
|
39
|
+
should_update = existing_record.nil? || Time.parse(existing_record["most_recent_edit"]) < mre
|
40
|
+
|
41
|
+
false unless should_update
|
42
|
+
|
43
|
+
update_embedding(data)
|
44
|
+
end
|
45
|
+
|
46
|
+
def find_related(post)
|
47
|
+
config = Jekyll::EmbeddingsGenerator.config
|
48
|
+
post_uid = post.data[config["uid"]]
|
49
|
+
embedding = query_embeddings(post_uid)
|
50
|
+
find_related_posts(embedding, post_uid)
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def update_embedding(data)
|
56
|
+
config = Jekyll::EmbeddingsGenerator.config
|
57
|
+
supabase_url = config["supabase_url"]
|
58
|
+
supabase_key = config["supabase_key"]
|
59
|
+
|
60
|
+
response = HTTParty.post(
|
61
|
+
"#{supabase_url}/rest/v1/page_embeddings",
|
62
|
+
:headers => {
|
63
|
+
"apikey" => supabase_key,
|
64
|
+
"Authorization" => "Bearer #{supabase_key}",
|
65
|
+
"Content-Type" => "application/json",
|
66
|
+
"Prefer" => "resolution=merge-duplicates", # upsert behavior
|
67
|
+
},
|
68
|
+
:query => {
|
69
|
+
"on_conflict" => "uid", # important: this MUST be declared as unique on database
|
70
|
+
},
|
71
|
+
:body => {
|
72
|
+
:uid => data.uid,
|
73
|
+
:most_recent_edit => data.most_recent_edit,
|
74
|
+
:embedding => data.embedding,
|
75
|
+
:metadata => data.metadata,
|
76
|
+
:content => data.content,
|
77
|
+
}.to_json
|
78
|
+
)
|
79
|
+
|
80
|
+
return if response.success?
|
81
|
+
|
82
|
+
raise "Supabase API error: #{response.code} - #{response.body}"
|
83
|
+
end
|
84
|
+
|
85
|
+
def query_embeddings(post_uid)
|
86
|
+
config = Jekyll::EmbeddingsGenerator.config
|
87
|
+
supabase_url = config["supabase_url"]
|
88
|
+
supabase_key = config["supabase_key"]
|
89
|
+
response = HTTParty.get(
|
90
|
+
"#{supabase_url}/rest/v1/page_embeddings",
|
91
|
+
headers: {
|
92
|
+
"apikey" => supabase_key,
|
93
|
+
"Authorization" => "Bearer #{supabase_key}",
|
94
|
+
"Content-Type" => "application/json",
|
95
|
+
"Accept-Encoding" => "identity", # this to avoid supabase returning gzipped content
|
96
|
+
},
|
97
|
+
query: {
|
98
|
+
"uid" => "eq.#{post_uid}",
|
99
|
+
}
|
100
|
+
)
|
101
|
+
Jekyll.logger.debug "response.parsed_response: #{response.parsed_response}"
|
102
|
+
raise "Supabase API error: #{response.code} - #{response.body}" unless response.success?
|
103
|
+
|
104
|
+
response.parsed_response.first&.dig("embedding")
|
105
|
+
end
|
106
|
+
|
107
|
+
def find_related_posts(embedding, post_uid)
|
108
|
+
config = Jekyll::EmbeddingsGenerator.config
|
109
|
+
supabase_url = config["supabase_url"]
|
110
|
+
supabase_key = config["supabase_key"]
|
111
|
+
score_threshold = config["score_threshold"]
|
112
|
+
limit = config["limit"] || 3
|
113
|
+
# Query using cosine similarity
|
114
|
+
# Note: this MUST be a stored procedure on Supabase, and order of
|
115
|
+
# columns in 'select' statament must match the order of the
|
116
|
+
# columns defined in the stored procedure.
|
117
|
+
query = %(
|
118
|
+
select
|
119
|
+
metadata->>'title' as title,
|
120
|
+
uid as uid,
|
121
|
+
most_recent_edit,
|
122
|
+
metadata->>'url' as url,
|
123
|
+
metadata->>'date' as date,
|
124
|
+
1 - (embedding <=> '#{embedding}') as similarity
|
125
|
+
from page_embeddings
|
126
|
+
where uid != '#{post_uid}'
|
127
|
+
and 1 - (embedding <=> '#{embedding}') > '#{score_threshold}'
|
128
|
+
order by embedding <=> '#{embedding}'
|
129
|
+
limit '#{limit}';
|
130
|
+
)
|
131
|
+
response = HTTParty.post(
|
132
|
+
"#{supabase_url}/rest/v1/rpc/related_posts",
|
133
|
+
headers: {
|
134
|
+
"apikey" => supabase_key,
|
135
|
+
"Authorization" => "Bearer #{supabase_key}",
|
136
|
+
"Content-Type" => "application/json",
|
137
|
+
"Accept-Encoding" => "identity", # this to avoid supabase returning gzipped content
|
138
|
+
"Prefer" => "return=minimal",
|
139
|
+
},
|
140
|
+
body: {
|
141
|
+
query:,
|
142
|
+
}.to_json
|
143
|
+
)
|
144
|
+
raise "Supabase API error: #{response.code} - #{response.body}" unless response.success?
|
145
|
+
|
146
|
+
response.parsed_response
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module EmbeddingsGenerator
|
5
|
+
class Configuration
|
6
|
+
include Jekyll::EmbeddingsGenerator
|
7
|
+
|
8
|
+
@config = {}
|
9
|
+
|
10
|
+
def self.init(opts) # rubocop:disable Metrics/AbcSize,Metrics/PerceivedComplexity
|
11
|
+
jk_config = Jekyll.configuration({})["jekyll-ai-related"] || {}
|
12
|
+
config = {}
|
13
|
+
config["uid"] = jk_config["post_unique_field"] || "slug"
|
14
|
+
config["mre"] = jk_config["post_updated_field"] || "date"
|
15
|
+
config["path"] = jk_config["output_path"] || "related_posts"
|
16
|
+
config["drafts"] = jk_config["include_drafts"] || opts["drafts"] || false
|
17
|
+
config["future"] = jk_config["include_future"] || opts["future"] || false
|
18
|
+
config["limit"] = jk_config["related_posts_limit"] || 3
|
19
|
+
config["score_threshold"] = jk_config["related_posts_score_threshold"] || 0.5
|
20
|
+
config["openai_api_key"] = ENV["OPENAI_API_KEY"]
|
21
|
+
config["supabase_url"] = ENV["SUPABASE_URL"]
|
22
|
+
config["supabase_key"] = ENV["SUPABASE_KEY"]
|
23
|
+
@config = config
|
24
|
+
validate
|
25
|
+
config
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.build(options)
|
29
|
+
options["show_drafts"] = @config["drafts"]
|
30
|
+
options["future"] = @config["future"]
|
31
|
+
Jekyll.logger.debug "Show drafts? #{options["show_drafts"]}"
|
32
|
+
Jekyll.logger.debug "Include future posts? #{options["future"]}"
|
33
|
+
site = Jekyll::Site.new(options)
|
34
|
+
site.reset
|
35
|
+
site.read
|
36
|
+
# call the 'generate' method on all plugins inheriting from Jekyll::Generator.
|
37
|
+
# This allows to generate the site's content, including any additional data
|
38
|
+
# you may have added to the post objects via a custom plugin (which by default
|
39
|
+
# lives in the _plugins dir of you Jekyll installation).
|
40
|
+
site.generate
|
41
|
+
site
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.validate
|
45
|
+
raise "Missing OpenAI API key" unless @config["openai_api_key"]
|
46
|
+
raise "Missing Supabase URL" unless @config["supabase_url"]
|
47
|
+
raise "Missing Supabase key" unless @config["supabase_key"]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "metadata"
|
4
|
+
|
5
|
+
module Jekyll
|
6
|
+
module EmbeddingsGenerator
|
7
|
+
class Data
|
8
|
+
include Jekyll::EmbeddingsGenerator
|
9
|
+
|
10
|
+
attr_reader :uid, :most_recent_edit, :embedding, :metadata, :content
|
11
|
+
|
12
|
+
def initialize(post, embedding, metadata)
|
13
|
+
config = Jekyll::EmbeddingsGenerator.config
|
14
|
+
@uid = post.data[config["uid"]]
|
15
|
+
@most_recent_edit = post.data[config["mre"]]
|
16
|
+
@embedding = embedding
|
17
|
+
@metadata = metadata.to_h
|
18
|
+
@content = post.content
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_h
|
22
|
+
{
|
23
|
+
:uid => @uid,
|
24
|
+
:most_recent_edit => @most_recent_edit,
|
25
|
+
:embedding => @embedding,
|
26
|
+
:metadata => @metadata,
|
27
|
+
:content => @content,
|
28
|
+
}.compact
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module EmbeddingsGenerator
|
5
|
+
class Metadata
|
6
|
+
attr_reader :title, :subtitle, :description, :date, :slug, :uid,
|
7
|
+
:url, :categories, :tags, :updates, :last_edit
|
8
|
+
|
9
|
+
def initialize(post) # rubocop:disable Metrics/AbcSize
|
10
|
+
@title = post.data["title"]
|
11
|
+
@subtitle = post.data["subtitle"]
|
12
|
+
@description = post.data["description"]
|
13
|
+
@date = post.data["date"]
|
14
|
+
@slug = post.data["slug"]
|
15
|
+
@uid = post.data["uid"]
|
16
|
+
@url = post.url
|
17
|
+
@categories = post.data["categories"]
|
18
|
+
@tags = post.data["tags"]
|
19
|
+
@updates = post.data["updates"]
|
20
|
+
@last_edit = post.data["most_recent_edit"]
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_h
|
24
|
+
{
|
25
|
+
:title => @title,
|
26
|
+
:subtitle => @subtitle,
|
27
|
+
:description => @description,
|
28
|
+
:date => @date,
|
29
|
+
:slug => @slug,
|
30
|
+
:uid => @uid,
|
31
|
+
:url => @url,
|
32
|
+
:categories => @categories,
|
33
|
+
:tags => @tags,
|
34
|
+
:updates => @updates,
|
35
|
+
:last_edit => @last_edit,
|
36
|
+
}.compact
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "commands/generator"
|
4
|
+
require_relative "embeddings-generator/init"
|
5
|
+
require_relative "embeddings-generator/version"
|
6
|
+
require_relative "embeddings-generator/embeddings/generate"
|
7
|
+
require_relative "embeddings-generator/embeddings/store"
|
8
|
+
require_relative "embeddings-generator/models/data"
|
9
|
+
require_relative "embeddings-generator/models/metadata"
|
10
|
+
|
11
|
+
module Jekyll
|
12
|
+
module EmbeddingsGenerator
|
13
|
+
class Error < StandardError; end
|
14
|
+
class << self
|
15
|
+
attr_reader :config, :site
|
16
|
+
|
17
|
+
def run(options)
|
18
|
+
@config = Configuration.init(options)
|
19
|
+
@site = Configuration.build(options)
|
20
|
+
extract_content
|
21
|
+
write_related_posts
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def extract_content
|
27
|
+
Jekyll.logger.info "Embeddings Generator:", "Starting to process markdown files..."
|
28
|
+
# Generate and store embeddings per each post
|
29
|
+
@site.posts.docs.each do |post|
|
30
|
+
Jekyll.logger.info "Embeddings Generator:", "Processing post: #{post.data["title"]}"
|
31
|
+
# Extract content and metadata
|
32
|
+
content = post.content
|
33
|
+
metadata = Jekyll::EmbeddingsGenerator::Metadata.new(post)
|
34
|
+
|
35
|
+
# Generate embeddings using OpenAI API
|
36
|
+
embedding = Jekyll::EmbeddingsGenerator::Embeddings.generate_embeddings(content)
|
37
|
+
|
38
|
+
# Store in Supabase
|
39
|
+
data = Jekyll::EmbeddingsGenerator::Data.new(post, embedding, metadata)
|
40
|
+
Jekyll::EmbeddingsGenerator::Store.store_embedding(data)
|
41
|
+
end
|
42
|
+
Jekyll.logger.info "Embeddings Generator:", "Finished processing markdown files."
|
43
|
+
end
|
44
|
+
|
45
|
+
def write_related_posts
|
46
|
+
# Query vector database and find related posts per each post
|
47
|
+
@site.posts.docs.each do |post|
|
48
|
+
# Find related posts
|
49
|
+
related_posts = Jekyll::EmbeddingsGenerator::Store.find_related(post)
|
50
|
+
write_to_file(related_posts, post)
|
51
|
+
|
52
|
+
# Log related posts for debugging
|
53
|
+
Jekyll.logger.info "Related posts:", "Found #{related_posts.length} related posts for #{post.data[@config["uid"]]}"
|
54
|
+
rescue StandardError => e
|
55
|
+
Jekyll.logger.error "Related posts:", "Error processing #{post.data["title"]}: #{e.message}"
|
56
|
+
end
|
57
|
+
Jekyll.logger.info "Related posts:", "Finished writing markdown files."
|
58
|
+
end
|
59
|
+
|
60
|
+
def write_to_file(data, post)
|
61
|
+
return if data.empty?
|
62
|
+
|
63
|
+
# Create directory if it doesn't exist
|
64
|
+
subdir = @config["path"]
|
65
|
+
target_dir = File.join(@site.source, "_data", subdir)
|
66
|
+
FileUtils.mkdir_p(target_dir)
|
67
|
+
# Write related posts to file, overwriting if exists
|
68
|
+
safe_uid = safe_filename(post.data[@config["uid"]].to_s)
|
69
|
+
filename = File.join(target_dir, "#{safe_uid}.yml")
|
70
|
+
File.write(filename, data.to_yaml, mode: "w")
|
71
|
+
end
|
72
|
+
|
73
|
+
def safe_filename(filename)
|
74
|
+
filename.downcase.gsub(%r![^a-z0-9\-_]!, "-")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
create extension if not exists vector;
|
2
|
+
|
3
|
+
-- Create a table to store the embeddings
|
4
|
+
--
|
5
|
+
-- Notes: timezone must match those set in _config.yml in your Jekyll website.
|
6
|
+
create table page_embeddings (
|
7
|
+
id bigint generated by default as identity primary key,
|
8
|
+
uid varchar(255) not null unique,
|
9
|
+
most_recent_edit timestamp with time zone default timezone('Europe/Rome'::text, now()) not null,
|
10
|
+
content text,
|
11
|
+
embedding vector(1536),
|
12
|
+
metadata jsonb,
|
13
|
+
created_at timestamp with time zone default timezone('Europe/Rome'::text, now())
|
14
|
+
);
|
15
|
+
|
16
|
+
-- Create a similarity search index
|
17
|
+
create index page_embeddings_embedding_idx
|
18
|
+
on page_embeddings
|
19
|
+
using ivfflat (embedding vector_cosine_ops)
|
20
|
+
with (lists = 100);
|
21
|
+
|
22
|
+
-- create a Postgres function to safely execute the similarity search
|
23
|
+
create or replace function related_posts(query text)
|
24
|
+
returns table (
|
25
|
+
title text,
|
26
|
+
uid varchar(255),
|
27
|
+
most_recent_edit timestamp with time zone,
|
28
|
+
url text,
|
29
|
+
date text,
|
30
|
+
similarity float
|
31
|
+
)
|
32
|
+
language plpgsql
|
33
|
+
security definer
|
34
|
+
as $$
|
35
|
+
begin
|
36
|
+
return query execute query;
|
37
|
+
end;
|
38
|
+
$$;
|