fact_db 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.envrc +1 -0
- data/CHANGELOG.md +48 -0
- data/COMMITS.md +196 -0
- data/README.md +102 -0
- data/Rakefile +41 -0
- data/db/migrate/001_enable_extensions.rb +7 -0
- data/db/migrate/002_create_contents.rb +44 -0
- data/db/migrate/003_create_entities.rb +36 -0
- data/db/migrate/004_create_entity_aliases.rb +18 -0
- data/db/migrate/005_create_facts.rb +65 -0
- data/db/migrate/006_create_entity_mentions.rb +18 -0
- data/db/migrate/007_create_fact_sources.rb +18 -0
- data/docs/api/extractors/index.md +71 -0
- data/docs/api/extractors/llm.md +162 -0
- data/docs/api/extractors/manual.md +92 -0
- data/docs/api/extractors/rule-based.md +165 -0
- data/docs/api/facts.md +300 -0
- data/docs/api/index.md +66 -0
- data/docs/api/models/content.md +165 -0
- data/docs/api/models/entity.md +202 -0
- data/docs/api/models/fact.md +270 -0
- data/docs/api/models/index.md +77 -0
- data/docs/api/pipeline/extraction.md +175 -0
- data/docs/api/pipeline/index.md +72 -0
- data/docs/api/pipeline/resolution.md +209 -0
- data/docs/api/services/content-service.md +166 -0
- data/docs/api/services/entity-service.md +202 -0
- data/docs/api/services/fact-service.md +223 -0
- data/docs/api/services/index.md +55 -0
- data/docs/architecture/database-schema.md +293 -0
- data/docs/architecture/entity-resolution.md +293 -0
- data/docs/architecture/index.md +149 -0
- data/docs/architecture/temporal-facts.md +268 -0
- data/docs/architecture/three-layer-model.md +242 -0
- data/docs/assets/css/custom.css +137 -0
- data/docs/assets/fact_db.jpg +0 -0
- data/docs/assets/images/fact_db.jpg +0 -0
- data/docs/concepts.md +183 -0
- data/docs/examples/basic-usage.md +235 -0
- data/docs/examples/hr-onboarding.md +312 -0
- data/docs/examples/index.md +64 -0
- data/docs/examples/news-analysis.md +288 -0
- data/docs/getting-started/database-setup.md +170 -0
- data/docs/getting-started/index.md +71 -0
- data/docs/getting-started/installation.md +98 -0
- data/docs/getting-started/quick-start.md +191 -0
- data/docs/guides/batch-processing.md +325 -0
- data/docs/guides/configuration.md +243 -0
- data/docs/guides/entity-management.md +364 -0
- data/docs/guides/extracting-facts.md +299 -0
- data/docs/guides/index.md +22 -0
- data/docs/guides/ingesting-content.md +252 -0
- data/docs/guides/llm-integration.md +299 -0
- data/docs/guides/temporal-queries.md +315 -0
- data/docs/index.md +121 -0
- data/examples/README.md +130 -0
- data/examples/basic_usage.rb +164 -0
- data/examples/entity_management.rb +216 -0
- data/examples/hr_system.rb +428 -0
- data/examples/rule_based_extraction.rb +258 -0
- data/examples/temporal_queries.rb +245 -0
- data/lib/fact_db/config.rb +71 -0
- data/lib/fact_db/database.rb +45 -0
- data/lib/fact_db/errors.rb +10 -0
- data/lib/fact_db/extractors/base.rb +117 -0
- data/lib/fact_db/extractors/llm_extractor.rb +179 -0
- data/lib/fact_db/extractors/manual_extractor.rb +53 -0
- data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
- data/lib/fact_db/llm/adapter.rb +109 -0
- data/lib/fact_db/models/content.rb +62 -0
- data/lib/fact_db/models/entity.rb +84 -0
- data/lib/fact_db/models/entity_alias.rb +26 -0
- data/lib/fact_db/models/entity_mention.rb +33 -0
- data/lib/fact_db/models/fact.rb +192 -0
- data/lib/fact_db/models/fact_source.rb +35 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
- data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
- data/lib/fact_db/resolution/entity_resolver.rb +261 -0
- data/lib/fact_db/resolution/fact_resolver.rb +259 -0
- data/lib/fact_db/services/content_service.rb +93 -0
- data/lib/fact_db/services/entity_service.rb +150 -0
- data/lib/fact_db/services/fact_service.rb +193 -0
- data/lib/fact_db/temporal/query.rb +125 -0
- data/lib/fact_db/temporal/timeline.rb +134 -0
- data/lib/fact_db/version.rb +5 -0
- data/lib/fact_db.rb +141 -0
- data/mkdocs.yml +198 -0
- metadata +288 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Extractors
|
|
5
|
+
class ManualExtractor < Base
|
|
6
|
+
# Manual extraction passes through the text as a single fact
|
|
7
|
+
# This is used for API-driven fact creation where the user
|
|
8
|
+
# provides the fact text and metadata directly
|
|
9
|
+
def extract(text, context = {})
|
|
10
|
+
return [] if text.nil? || text.strip.empty?
|
|
11
|
+
|
|
12
|
+
valid_at = context[:valid_at] || context[:captured_at] || Time.current
|
|
13
|
+
|
|
14
|
+
[
|
|
15
|
+
build_fact(
|
|
16
|
+
text: text,
|
|
17
|
+
valid_at: valid_at,
|
|
18
|
+
invalid_at: context[:invalid_at],
|
|
19
|
+
mentions: context[:mentions] || [],
|
|
20
|
+
confidence: context[:confidence] || 1.0,
|
|
21
|
+
metadata: context[:metadata] || {}
|
|
22
|
+
)
|
|
23
|
+
]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Manual extraction expects entities to be provided explicitly
|
|
27
|
+
def extract_entities(text)
|
|
28
|
+
[]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Convenience method for creating a single fact with full control
|
|
32
|
+
def create_fact(text:, valid_at:, invalid_at: nil, mentions: [], confidence: 1.0, metadata: {})
|
|
33
|
+
extract(text, {
|
|
34
|
+
valid_at: valid_at,
|
|
35
|
+
invalid_at: invalid_at,
|
|
36
|
+
mentions: mentions,
|
|
37
|
+
confidence: confidence,
|
|
38
|
+
metadata: metadata
|
|
39
|
+
}).first
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Convenience method for creating an entity
|
|
43
|
+
def create_entity(name:, type:, aliases: [], attributes: {})
|
|
44
|
+
build_entity(
|
|
45
|
+
name: name,
|
|
46
|
+
type: type,
|
|
47
|
+
aliases: aliases,
|
|
48
|
+
attributes: attributes
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Extractors
|
|
5
|
+
class RuleBasedExtractor < Base
|
|
6
|
+
# Date patterns for temporal extraction
|
|
7
|
+
DATE_PATTERNS = [
|
|
8
|
+
# "on January 10, 2024"
|
|
9
|
+
/(?:on|since|from|as of|starting)\s+(\w+\s+\d{1,2},?\s+\d{4})/i,
|
|
10
|
+
# "on 2024-01-10"
|
|
11
|
+
/(?:on|since|from|as of|starting)\s+(\d{4}-\d{2}-\d{2})/i,
|
|
12
|
+
# "in January 2024"
|
|
13
|
+
/(?:in|during)\s+(\w+\s+\d{4})/i,
|
|
14
|
+
# "in 2024"
|
|
15
|
+
/(?:in|during)\s+(\d{4})\b/i
|
|
16
|
+
].freeze
|
|
17
|
+
|
|
18
|
+
END_DATE_PATTERNS = [
|
|
19
|
+
# "until January 10, 2024"
|
|
20
|
+
/(?:until|through|to|ended|left)\s+(\w+\s+\d{1,2},?\s+\d{4})/i,
|
|
21
|
+
/(?:until|through|to|ended|left)\s+(\d{4}-\d{2}-\d{2})/i
|
|
22
|
+
].freeze
|
|
23
|
+
|
|
24
|
+
# Employment patterns
|
|
25
|
+
EMPLOYMENT_PATTERNS = [
|
|
26
|
+
# "Paula works at Microsoft"
|
|
27
|
+
/(\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:works?|worked|is working)\s+(?:at|for)\s+(\b[A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)*)/,
|
|
28
|
+
# "Paula joined Microsoft"
|
|
29
|
+
/(\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:joined|started at|was hired by)\s+(\b[A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)*)/,
|
|
30
|
+
# "Paula left Microsoft"
|
|
31
|
+
/(\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:left|departed|resigned from|was fired from)\s+(\b[A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)*)/,
|
|
32
|
+
# "Paula is a Principal Engineer at Microsoft"
|
|
33
|
+
/(\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:is|was|became)\s+(?:a\s+)?([A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)*)\s+at\s+(\b[A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)*)/
|
|
34
|
+
].freeze
|
|
35
|
+
|
|
36
|
+
# Relationship patterns
|
|
37
|
+
RELATIONSHIP_PATTERNS = [
|
|
38
|
+
# "Paula is married to John"
|
|
39
|
+
/(\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:is|was)\s+(?:married to|engaged to|dating)\s+(\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)/,
|
|
40
|
+
# "Paula is the CEO of Microsoft"
|
|
41
|
+
/(\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:is|was)\s+(?:the\s+)?(\w+(?:\s+\w+)*)\s+of\s+(\b[A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)*)/
|
|
42
|
+
].freeze
|
|
43
|
+
|
|
44
|
+
# Location patterns
|
|
45
|
+
LOCATION_PATTERNS = [
|
|
46
|
+
# "Paula lives in Seattle"
|
|
47
|
+
/(\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:lives?|lived|is based|was based|relocated)\s+(?:in|to)\s+(\b[A-Z][A-Za-z]+(?:,?\s+[A-Z]{2})?)/,
|
|
48
|
+
# "Microsoft is headquartered in Redmond"
|
|
49
|
+
/(\b[A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)*)\s+(?:is|was)\s+(?:headquartered|located|based)\s+in\s+(\b[A-Z][A-Za-z]+(?:,?\s+[A-Z]{2})?)/
|
|
50
|
+
].freeze
|
|
51
|
+
|
|
52
|
+
def extract(text, context = {})
|
|
53
|
+
return [] if text.nil? || text.strip.empty?
|
|
54
|
+
|
|
55
|
+
facts = []
|
|
56
|
+
|
|
57
|
+
# Extract employment facts
|
|
58
|
+
facts.concat(extract_employment_facts(text, context))
|
|
59
|
+
|
|
60
|
+
# Extract relationship facts
|
|
61
|
+
facts.concat(extract_relationship_facts(text, context))
|
|
62
|
+
|
|
63
|
+
# Extract location facts
|
|
64
|
+
facts.concat(extract_location_facts(text, context))
|
|
65
|
+
|
|
66
|
+
facts.uniq { |f| f[:text] }
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def extract_entities(text)
|
|
70
|
+
return [] if text.nil? || text.strip.empty?
|
|
71
|
+
|
|
72
|
+
entities = []
|
|
73
|
+
|
|
74
|
+
# Extract person names (simple capitalized word sequences)
|
|
75
|
+
text.scan(/\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b/).flatten.uniq.each do |name|
|
|
76
|
+
next if common_word?(name)
|
|
77
|
+
|
|
78
|
+
entities << build_entity(name: name, type: "person")
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Extract organization names (from employment patterns)
|
|
82
|
+
EMPLOYMENT_PATTERNS.each do |pattern|
|
|
83
|
+
text.scan(pattern).each do |match|
|
|
84
|
+
org_name = match.last
|
|
85
|
+
entities << build_entity(name: org_name, type: "organization") unless common_word?(org_name)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Extract locations
|
|
90
|
+
LOCATION_PATTERNS.each do |pattern|
|
|
91
|
+
text.scan(pattern).each do |match|
|
|
92
|
+
location = match.last
|
|
93
|
+
entities << build_entity(name: location, type: "place") unless common_word?(location)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
entities.uniq { |e| e[:name].downcase }
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
private
|
|
101
|
+
|
|
102
|
+
def extract_employment_facts(text, context)
|
|
103
|
+
facts = []
|
|
104
|
+
default_date = context[:captured_at] || Time.current
|
|
105
|
+
|
|
106
|
+
EMPLOYMENT_PATTERNS.each do |pattern|
|
|
107
|
+
text.scan(pattern).each do |match|
|
|
108
|
+
person, *rest = match
|
|
109
|
+
org = rest.last
|
|
110
|
+
|
|
111
|
+
# Determine if this is a "left" pattern
|
|
112
|
+
is_termination = text.match?(/#{Regexp.escape(person)}\s+(?:left|departed|resigned|was fired)/i)
|
|
113
|
+
|
|
114
|
+
fact_text = match.join(" ").gsub(/\s+/, " ")
|
|
115
|
+
valid_at = extract_start_date(text) || default_date
|
|
116
|
+
invalid_at = is_termination ? (extract_end_date(text) || default_date) : nil
|
|
117
|
+
|
|
118
|
+
mentions = [
|
|
119
|
+
build_mention(name: person, type: "person", role: "subject"),
|
|
120
|
+
build_mention(name: org, type: "organization", role: "object")
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
# Add role if present
|
|
124
|
+
if rest.length > 1
|
|
125
|
+
mentions << build_mention(name: rest[0], type: "concept", role: "instrument")
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
facts << build_fact(
|
|
129
|
+
text: fact_text,
|
|
130
|
+
valid_at: valid_at,
|
|
131
|
+
invalid_at: invalid_at,
|
|
132
|
+
mentions: mentions,
|
|
133
|
+
confidence: 0.8
|
|
134
|
+
)
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
facts
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def extract_relationship_facts(text, context)
|
|
142
|
+
facts = []
|
|
143
|
+
default_date = context[:captured_at] || Time.current
|
|
144
|
+
|
|
145
|
+
RELATIONSHIP_PATTERNS.each do |pattern|
|
|
146
|
+
text.scan(pattern).each do |match|
|
|
147
|
+
fact_text = match.join(" ").gsub(/\s+/, " ")
|
|
148
|
+
|
|
149
|
+
mentions = match.map.with_index do |name, i|
|
|
150
|
+
role = i.zero? ? "subject" : "object"
|
|
151
|
+
build_mention(name: name, type: "person", role: role)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
facts << build_fact(
|
|
155
|
+
text: fact_text,
|
|
156
|
+
valid_at: extract_start_date(text) || default_date,
|
|
157
|
+
invalid_at: extract_end_date(text),
|
|
158
|
+
mentions: mentions,
|
|
159
|
+
confidence: 0.75
|
|
160
|
+
)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
facts
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def extract_location_facts(text, context)
|
|
168
|
+
facts = []
|
|
169
|
+
default_date = context[:captured_at] || Time.current
|
|
170
|
+
|
|
171
|
+
LOCATION_PATTERNS.each do |pattern|
|
|
172
|
+
text.scan(pattern).each do |match|
|
|
173
|
+
entity_name, location = match
|
|
174
|
+
fact_text = "#{entity_name} is located in #{location}"
|
|
175
|
+
|
|
176
|
+
# Determine entity type
|
|
177
|
+
entity_type = text.match?(/#{Regexp.escape(entity_name)}\s+(?:lives?|lived)/i) ? "person" : "organization"
|
|
178
|
+
|
|
179
|
+
mentions = [
|
|
180
|
+
build_mention(name: entity_name, type: entity_type, role: "subject"),
|
|
181
|
+
build_mention(name: location, type: "place", role: "location")
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
facts << build_fact(
|
|
185
|
+
text: fact_text,
|
|
186
|
+
valid_at: extract_start_date(text) || default_date,
|
|
187
|
+
invalid_at: nil,
|
|
188
|
+
mentions: mentions,
|
|
189
|
+
confidence: 0.7
|
|
190
|
+
)
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
facts
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def extract_start_date(text)
|
|
198
|
+
DATE_PATTERNS.each do |pattern|
|
|
199
|
+
if (match = text.match(pattern))
|
|
200
|
+
return parse_date(match[1])
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
nil
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def extract_end_date(text)
|
|
207
|
+
END_DATE_PATTERNS.each do |pattern|
|
|
208
|
+
if (match = text.match(pattern))
|
|
209
|
+
return parse_date(match[1])
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
nil
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def common_word?(word)
|
|
216
|
+
common_words = %w[
|
|
217
|
+
The A An And Or But Is Was Were Are Been
|
|
218
|
+
Has Have Had Will Would Could Should
|
|
219
|
+
This That These Those
|
|
220
|
+
January February March April May June July August September October November December
|
|
221
|
+
Monday Tuesday Wednesday Thursday Friday Saturday Sunday
|
|
222
|
+
Inc Corp Ltd LLC Company Corporation
|
|
223
|
+
]
|
|
224
|
+
common_words.any? { |w| w.casecmp?(word) }
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module LLM
|
|
5
|
+
# Adapter for ruby_llm gem
|
|
6
|
+
# Provides a unified interface for the LLM extractor
|
|
7
|
+
#
|
|
8
|
+
# @example Configure with OpenAI
|
|
9
|
+
# FactDb.configure do |config|
|
|
10
|
+
# config.llm_client = FactDb::LLM::Adapter.new(
|
|
11
|
+
# provider: :openai,
|
|
12
|
+
# api_key: ENV["OPENAI_API_KEY"],
|
|
13
|
+
# model: "gpt-4o-mini"
|
|
14
|
+
# )
|
|
15
|
+
# end
|
|
16
|
+
#
|
|
17
|
+
# @example Configure with Anthropic
|
|
18
|
+
# FactDb.configure do |config|
|
|
19
|
+
# config.llm_client = FactDb::LLM::Adapter.new(
|
|
20
|
+
# provider: :anthropic,
|
|
21
|
+
# api_key: ENV["ANTHROPIC_API_KEY"],
|
|
22
|
+
# model: "claude-sonnet-4-20250514"
|
|
23
|
+
# )
|
|
24
|
+
# end
|
|
25
|
+
#
|
|
26
|
+
# @example Configure via YAML (config/fact_db.yml)
|
|
27
|
+
# # llm_provider: anthropic
|
|
28
|
+
# # llm_model: claude-sonnet-4-20250514
|
|
29
|
+
# # llm_api_key: <%= ENV["ANTHROPIC_API_KEY"] %>
|
|
30
|
+
#
|
|
31
|
+
# @example Configure via environment variables
|
|
32
|
+
# # EVENT_CLOCK_LLM_PROVIDER=anthropic
|
|
33
|
+
# # EVENT_CLOCK_LLM_MODEL=claude-sonnet-4-20250514
|
|
34
|
+
# # EVENT_CLOCK_LLM_API_KEY=sk-...
|
|
35
|
+
#
|
|
36
|
+
class Adapter
|
|
37
|
+
attr_reader :model, :provider
|
|
38
|
+
|
|
39
|
+
PROVIDER_DEFAULTS = {
|
|
40
|
+
openai: "gpt-4o-mini",
|
|
41
|
+
anthropic: "claude-sonnet-4-20250514",
|
|
42
|
+
gemini: "gemini-2.0-flash",
|
|
43
|
+
ollama: "llama3.2",
|
|
44
|
+
bedrock: "claude-sonnet-4",
|
|
45
|
+
openrouter: "anthropic/claude-sonnet-4"
|
|
46
|
+
}.freeze
|
|
47
|
+
|
|
48
|
+
# Create an adapter for a specific LLM provider
|
|
49
|
+
#
|
|
50
|
+
# @param provider [Symbol] :openai, :anthropic, :gemini, :ollama, :bedrock, :openrouter
|
|
51
|
+
# @param model [String] Model name (optional, uses provider default)
|
|
52
|
+
# @param api_key [String] API key (optional if set via ENV)
|
|
53
|
+
# @param options [Hash] Additional options passed to RubyLLM
|
|
54
|
+
#
|
|
55
|
+
def initialize(provider:, model: nil, api_key: nil, **options)
|
|
56
|
+
@provider = provider.to_sym
|
|
57
|
+
@model = model || PROVIDER_DEFAULTS[@provider]
|
|
58
|
+
@options = options
|
|
59
|
+
|
|
60
|
+
configure_ruby_llm(api_key)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Send a prompt to the LLM and return the response text
|
|
64
|
+
#
|
|
65
|
+
# @param prompt [String] The prompt to send
|
|
66
|
+
# @return [String] The response text
|
|
67
|
+
def chat(prompt)
|
|
68
|
+
chat_instance = RubyLLM.chat(model: model)
|
|
69
|
+
response = chat_instance.ask(prompt)
|
|
70
|
+
response.content
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Alias for compatibility with different client interfaces
|
|
74
|
+
alias call chat
|
|
75
|
+
alias complete chat
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def configure_ruby_llm(api_key)
|
|
80
|
+
require "ruby_llm"
|
|
81
|
+
|
|
82
|
+
RubyLLM.configure do |config|
|
|
83
|
+
case provider
|
|
84
|
+
when :openai
|
|
85
|
+
config.openai_api_key = api_key || ENV.fetch("OPENAI_API_KEY", nil)
|
|
86
|
+
when :anthropic
|
|
87
|
+
config.anthropic_api_key = api_key || ENV.fetch("ANTHROPIC_API_KEY", nil)
|
|
88
|
+
when :gemini
|
|
89
|
+
config.gemini_api_key = api_key || ENV.fetch("GEMINI_API_KEY", nil)
|
|
90
|
+
when :ollama
|
|
91
|
+
config.ollama_api_base = @options[:api_base] || "http://localhost:11434"
|
|
92
|
+
when :bedrock
|
|
93
|
+
config.bedrock_region = @options[:region] || ENV.fetch("AWS_REGION", "us-east-1")
|
|
94
|
+
config.bedrock_api_key = api_key || ENV.fetch("AWS_ACCESS_KEY_ID", nil)
|
|
95
|
+
config.bedrock_secret_key = @options[:secret_key] || ENV.fetch("AWS_SECRET_ACCESS_KEY", nil)
|
|
96
|
+
when :openrouter
|
|
97
|
+
config.openrouter_api_key = api_key || ENV.fetch("OPENROUTER_API_KEY", nil)
|
|
98
|
+
else
|
|
99
|
+
raise ConfigurationError, "Unknown LLM provider: #{provider}. " \
|
|
100
|
+
"Supported: openai, anthropic, gemini, ollama, bedrock, openrouter"
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
rescue LoadError
|
|
104
|
+
raise ConfigurationError, "LLM adapter requires the 'ruby_llm' gem. Add it to your Gemfile:\n" \
|
|
105
|
+
" gem 'ruby_llm'"
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Models
|
|
5
|
+
class Content < ActiveRecord::Base
|
|
6
|
+
self.table_name = "fact_db_contents"
|
|
7
|
+
|
|
8
|
+
has_many :fact_sources, class_name: "FactDb::Models::FactSource",
|
|
9
|
+
foreign_key: :content_id, dependent: :destroy
|
|
10
|
+
has_many :facts, through: :fact_sources
|
|
11
|
+
|
|
12
|
+
validates :content_hash, presence: true, uniqueness: true
|
|
13
|
+
validates :content_type, presence: true
|
|
14
|
+
validates :raw_text, presence: true
|
|
15
|
+
validates :captured_at, presence: true
|
|
16
|
+
|
|
17
|
+
before_validation :generate_content_hash, on: :create
|
|
18
|
+
|
|
19
|
+
# Content types
|
|
20
|
+
TYPES = %w[email transcript document slack meeting_notes contract report].freeze
|
|
21
|
+
|
|
22
|
+
validates :content_type, inclusion: { in: TYPES }, allow_nil: false
|
|
23
|
+
|
|
24
|
+
scope :by_type, ->(type) { where(content_type: type) }
|
|
25
|
+
scope :captured_between, ->(from, to) { where(captured_at: from..to) }
|
|
26
|
+
scope :captured_after, ->(date) { where("captured_at >= ?", date) }
|
|
27
|
+
scope :captured_before, ->(date) { where("captured_at <= ?", date) }
|
|
28
|
+
|
|
29
|
+
# Full-text search
|
|
30
|
+
scope :search_text, lambda { |query|
|
|
31
|
+
where("to_tsvector('english', raw_text) @@ plainto_tsquery('english', ?)", query)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# Vector similarity search (requires neighbor gem configured)
|
|
35
|
+
def self.nearest_neighbors(embedding, limit: 10)
|
|
36
|
+
return none unless embedding
|
|
37
|
+
|
|
38
|
+
order(Arel.sql("embedding <=> '#{embedding}'")).limit(limit)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def immutable?
|
|
42
|
+
true
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def word_count
|
|
46
|
+
raw_text.split.size
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def preview(length: 200)
|
|
50
|
+
return raw_text if raw_text.length <= length
|
|
51
|
+
|
|
52
|
+
"#{raw_text[0, length]}..."
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def generate_content_hash
|
|
58
|
+
self.content_hash = Digest::SHA256.hexdigest(raw_text) if raw_text.present?
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Models
|
|
5
|
+
class Entity < ActiveRecord::Base
|
|
6
|
+
self.table_name = "fact_db_entities"
|
|
7
|
+
|
|
8
|
+
has_many :aliases, class_name: "FactDb::Models::EntityAlias",
|
|
9
|
+
foreign_key: :entity_id, dependent: :destroy
|
|
10
|
+
has_many :entity_mentions, class_name: "FactDb::Models::EntityMention",
|
|
11
|
+
foreign_key: :entity_id, dependent: :destroy
|
|
12
|
+
has_many :facts, through: :entity_mentions
|
|
13
|
+
|
|
14
|
+
belongs_to :merged_into, class_name: "FactDb::Models::Entity",
|
|
15
|
+
foreign_key: :merged_into_id, optional: true
|
|
16
|
+
has_many :merged_entities, class_name: "FactDb::Models::Entity",
|
|
17
|
+
foreign_key: :merged_into_id
|
|
18
|
+
|
|
19
|
+
validates :canonical_name, presence: true
|
|
20
|
+
validates :entity_type, presence: true
|
|
21
|
+
validates :resolution_status, presence: true
|
|
22
|
+
|
|
23
|
+
# Entity types
|
|
24
|
+
TYPES = %w[person organization place product event concept].freeze
|
|
25
|
+
STATUSES = %w[unresolved resolved merged split].freeze
|
|
26
|
+
|
|
27
|
+
validates :entity_type, inclusion: { in: TYPES }
|
|
28
|
+
validates :resolution_status, inclusion: { in: STATUSES }
|
|
29
|
+
|
|
30
|
+
scope :by_type, ->(type) { where(entity_type: type) }
|
|
31
|
+
scope :resolved, -> { where(resolution_status: "resolved") }
|
|
32
|
+
scope :unresolved, -> { where(resolution_status: "unresolved") }
|
|
33
|
+
scope :not_merged, -> { where.not(resolution_status: "merged") }
|
|
34
|
+
scope :people, -> { by_type("person") }
|
|
35
|
+
scope :organizations, -> { by_type("organization") }
|
|
36
|
+
scope :places, -> { by_type("place") }
|
|
37
|
+
|
|
38
|
+
def resolved?
|
|
39
|
+
resolution_status == "resolved"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def merged?
|
|
43
|
+
resolution_status == "merged"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def canonical_entity
|
|
47
|
+
merged? ? merged_into&.canonical_entity || merged_into : self
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def all_aliases
|
|
51
|
+
aliases.pluck(:alias_text)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def add_alias(text, type: nil, confidence: 1.0)
|
|
55
|
+
aliases.find_or_create_by!(alias_text: text) do |a|
|
|
56
|
+
a.alias_type = type
|
|
57
|
+
a.confidence = confidence
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def matches_name?(name)
|
|
62
|
+
return true if canonical_name.downcase == name.downcase
|
|
63
|
+
|
|
64
|
+
aliases.exists?(["LOWER(alias_text) = ?", name.downcase])
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Get all facts mentioning this entity
|
|
68
|
+
def current_facts
|
|
69
|
+
facts.currently_valid.canonical
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def facts_at(date)
|
|
73
|
+
facts.valid_at(date).canonical
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Vector similarity search for entity matching
|
|
77
|
+
def self.nearest_neighbors(embedding, limit: 10)
|
|
78
|
+
return none unless embedding
|
|
79
|
+
|
|
80
|
+
order(Arel.sql("embedding <=> '#{embedding}'")).limit(limit)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Models
|
|
5
|
+
class EntityAlias < ActiveRecord::Base
|
|
6
|
+
self.table_name = "fact_db_entity_aliases"
|
|
7
|
+
|
|
8
|
+
belongs_to :entity, class_name: "FactDb::Models::Entity"
|
|
9
|
+
|
|
10
|
+
validates :alias_text, presence: true
|
|
11
|
+
validates :alias_text, uniqueness: { scope: :entity_id }
|
|
12
|
+
|
|
13
|
+
# Alias types
|
|
14
|
+
TYPES = %w[name nickname email handle abbreviation title].freeze
|
|
15
|
+
|
|
16
|
+
validates :alias_type, inclusion: { in: TYPES }, allow_nil: true
|
|
17
|
+
|
|
18
|
+
scope :by_type, ->(type) { where(alias_type: type) }
|
|
19
|
+
scope :high_confidence, -> { where("confidence >= ?", 0.9) }
|
|
20
|
+
|
|
21
|
+
def self.find_entity_by_alias(text)
|
|
22
|
+
find_by(["LOWER(alias_text) = ?", text.downcase])&.entity
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Models
|
|
5
|
+
class EntityMention < ActiveRecord::Base
|
|
6
|
+
self.table_name = "fact_db_entity_mentions"
|
|
7
|
+
|
|
8
|
+
belongs_to :fact, class_name: "FactDb::Models::Fact"
|
|
9
|
+
belongs_to :entity, class_name: "FactDb::Models::Entity"
|
|
10
|
+
|
|
11
|
+
validates :mention_text, presence: true
|
|
12
|
+
validates :fact_id, uniqueness: { scope: [:entity_id, :mention_text] }
|
|
13
|
+
|
|
14
|
+
# Mention roles
|
|
15
|
+
ROLES = %w[subject object location temporal instrument beneficiary].freeze
|
|
16
|
+
|
|
17
|
+
validates :mention_role, inclusion: { in: ROLES }, allow_nil: true
|
|
18
|
+
|
|
19
|
+
scope :by_role, ->(role) { where(mention_role: role) }
|
|
20
|
+
scope :subjects, -> { by_role("subject") }
|
|
21
|
+
scope :objects, -> { by_role("object") }
|
|
22
|
+
scope :high_confidence, -> { where("confidence >= ?", 0.9) }
|
|
23
|
+
|
|
24
|
+
def subject?
|
|
25
|
+
mention_role == "subject"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def object?
|
|
29
|
+
mention_role == "object"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|