fact_db 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +2 -0
- data/.yardopts +5 -0
- data/CHANGELOG.md +64 -0
- data/README.md +107 -6
- data/Rakefile +243 -10
- data/db/migrate/001_enable_extensions.rb +1 -0
- data/db/migrate/002_create_sources.rb +49 -0
- data/db/migrate/003_create_entities.rb +27 -15
- data/db/migrate/004_create_entity_aliases.rb +20 -7
- data/db/migrate/005_create_facts.rb +37 -21
- data/db/migrate/006_create_entity_mentions.rb +14 -6
- data/db/migrate/007_create_fact_sources.rb +16 -8
- data/docs/api/extractors/index.md +5 -5
- data/docs/api/extractors/llm.md +17 -17
- data/docs/api/extractors/rule-based.md +14 -14
- data/docs/api/facts.md +20 -20
- data/docs/api/index.md +4 -4
- data/docs/api/models/entity.md +21 -21
- data/docs/api/models/fact.md +15 -15
- data/docs/api/models/index.md +7 -7
- data/docs/api/models/{content.md → source.md} +29 -29
- data/docs/api/pipeline/extraction.md +25 -25
- data/docs/api/pipeline/index.md +1 -1
- data/docs/api/pipeline/resolution.md +4 -4
- data/docs/api/services/entity-service.md +20 -20
- data/docs/api/services/fact-service.md +12 -12
- data/docs/api/services/index.md +5 -5
- data/docs/api/services/{content-service.md → source-service.md} +27 -27
- data/docs/architecture/database-schema.md +46 -46
- data/docs/architecture/entity-resolution.md +6 -6
- data/docs/architecture/index.md +10 -10
- data/docs/architecture/temporal-facts.md +5 -5
- data/docs/architecture/three-layer-model.md +17 -17
- data/docs/concepts.md +6 -6
- data/docs/examples/basic-usage.md +20 -20
- data/docs/examples/hr-onboarding.md +17 -17
- data/docs/examples/index.md +4 -4
- data/docs/examples/news-analysis.md +23 -23
- data/docs/getting-started/database-setup.md +28 -20
- data/docs/getting-started/index.md +3 -3
- data/docs/getting-started/quick-start.md +33 -30
- data/docs/guides/batch-processing.md +26 -26
- data/docs/guides/configuration.md +158 -77
- data/docs/guides/entity-management.md +14 -14
- data/docs/guides/extracting-facts.md +28 -28
- data/docs/guides/ingesting-content.md +14 -14
- data/docs/guides/llm-integration.md +40 -32
- data/docs/guides/temporal-queries.md +11 -11
- data/docs/index.md +6 -2
- data/examples/.envrc +4 -0
- data/examples/.gitignore +1 -0
- data/examples/001_configuration.rb +312 -0
- data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
- data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
- data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
- data/examples/040_output_formats.rb +177 -0
- data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
- data/examples/060_fluent_temporal_api.rb +217 -0
- data/examples/070_introspection.rb +252 -0
- data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
- data/examples/090_ingest_demo.rb +515 -0
- data/examples/100_query_context.rb +668 -0
- data/examples/110_prove_it.rb +204 -0
- data/examples/120_dump_database.rb +358 -0
- data/examples/130_rag_feedback_loop.rb +858 -0
- data/examples/README.md +229 -15
- data/examples/data/lincoln_associates.md +201 -0
- data/examples/data/lincoln_biography.md +66 -0
- data/examples/data/lincoln_cabinet.md +243 -0
- data/examples/data/lincoln_family.md +163 -0
- data/examples/data/lincoln_military.md +241 -0
- data/examples/data/lincoln_todd_family.md +136 -0
- data/examples/ingest_reporter.rb +335 -0
- data/examples/utilities.rb +182 -0
- data/lib/fact_db/config/defaults.yml +254 -0
- data/lib/fact_db/config.rb +94 -35
- data/lib/fact_db/database.rb +98 -8
- data/lib/fact_db/extractors/base.rb +106 -21
- data/lib/fact_db/extractors/llm_extractor.rb +35 -63
- data/lib/fact_db/extractors/manual_extractor.rb +46 -6
- data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
- data/lib/fact_db/llm/adapter.rb +3 -3
- data/lib/fact_db/models/entity.rb +94 -22
- data/lib/fact_db/models/entity_alias.rb +41 -7
- data/lib/fact_db/models/entity_mention.rb +34 -1
- data/lib/fact_db/models/fact.rb +259 -28
- data/lib/fact_db/models/fact_source.rb +43 -9
- data/lib/fact_db/models/source.rb +113 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
- data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
- data/lib/fact_db/query_result.rb +202 -0
- data/lib/fact_db/resolution/entity_resolver.rb +139 -39
- data/lib/fact_db/resolution/fact_resolver.rb +86 -14
- data/lib/fact_db/services/entity_service.rb +246 -37
- data/lib/fact_db/services/fact_service.rb +254 -17
- data/lib/fact_db/services/source_service.rb +164 -0
- data/lib/fact_db/temporal/query.rb +71 -7
- data/lib/fact_db/temporal/query_builder.rb +69 -0
- data/lib/fact_db/temporal/timeline.rb +102 -11
- data/lib/fact_db/transformers/base.rb +77 -0
- data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
- data/lib/fact_db/transformers/json_transformer.rb +17 -0
- data/lib/fact_db/transformers/raw_transformer.rb +35 -0
- data/lib/fact_db/transformers/text_transformer.rb +114 -0
- data/lib/fact_db/transformers/triple_transformer.rb +138 -0
- data/lib/fact_db/validation/alias_filter.rb +185 -0
- data/lib/fact_db/version.rb +1 -1
- data/lib/fact_db.rb +281 -30
- data/mkdocs.yml +2 -2
- metadata +60 -16
- data/db/migrate/002_create_contents.rb +0 -44
- data/lib/fact_db/models/content.rb +0 -62
- data/lib/fact_db/services/content_service.rb +0 -93
|
@@ -2,44 +2,89 @@
|
|
|
2
2
|
|
|
3
3
|
module FactDb
|
|
4
4
|
module Resolution
|
|
5
|
+
# Resolves entity names to canonical entities in the database
|
|
6
|
+
#
|
|
7
|
+
# Provides entity resolution through exact alias matching, canonical name matching,
|
|
8
|
+
# and fuzzy matching using Levenshtein distance. Also handles entity merging,
|
|
9
|
+
# splitting, and duplicate detection.
|
|
10
|
+
#
|
|
11
|
+
# @example Basic usage
|
|
12
|
+
# resolver = EntityResolver.new
|
|
13
|
+
# resolved = resolver.resolve("John Smith", kind: :person)
|
|
14
|
+
# if resolved
|
|
15
|
+
# puts "Found: #{resolved.entity.name} (confidence: #{resolved.confidence})"
|
|
16
|
+
# end
|
|
17
|
+
#
|
|
5
18
|
class EntityResolver
|
|
19
|
+
# @return [FactDb::Config] the configuration object
|
|
6
20
|
attr_reader :config
|
|
7
21
|
|
|
22
|
+
# Initializes a new EntityResolver instance
|
|
23
|
+
#
|
|
24
|
+
# @param config [FactDb::Config] configuration object (defaults to FactDb.config)
|
|
8
25
|
def initialize(config = FactDb.config)
|
|
9
26
|
@config = config
|
|
10
27
|
@threshold = config.fuzzy_match_threshold
|
|
11
28
|
@auto_merge_threshold = config.auto_merge_threshold
|
|
12
29
|
end
|
|
13
30
|
|
|
14
|
-
#
|
|
15
|
-
|
|
31
|
+
# Resolves a name to an existing entity
|
|
32
|
+
#
|
|
33
|
+
# Tries resolution in order: exact alias match, canonical name match, fuzzy match.
|
|
34
|
+
#
|
|
35
|
+
# @param name [String] the name to resolve
|
|
36
|
+
# @param kind [Symbol, nil] optional entity kind filter (:person, :organization, etc.)
|
|
37
|
+
# @return [ResolvedEntity, nil] resolved entity with confidence score, or nil if not found
|
|
38
|
+
#
|
|
39
|
+
# @example Resolve with kind filter
|
|
40
|
+
# resolver.resolve("Acme", kind: :organization)
|
|
41
|
+
def resolve(name, kind: nil)
|
|
16
42
|
return nil if name.nil? || name.empty?
|
|
17
43
|
|
|
18
44
|
# 1. Exact alias match
|
|
19
|
-
exact = find_by_exact_alias(name,
|
|
45
|
+
exact = find_by_exact_alias(name, kind: kind)
|
|
20
46
|
return ResolvedEntity.new(exact, confidence: 1.0, match_type: :exact_alias) if exact
|
|
21
47
|
|
|
22
48
|
# 2. Canonical name match
|
|
23
|
-
canonical =
|
|
24
|
-
return ResolvedEntity.new(canonical, confidence: 1.0, match_type: :
|
|
49
|
+
canonical = find_by_name(name, kind: kind)
|
|
50
|
+
return ResolvedEntity.new(canonical, confidence: 1.0, match_type: :name) if canonical
|
|
25
51
|
|
|
26
52
|
# 3. Fuzzy matching
|
|
27
|
-
fuzzy = find_by_fuzzy_match(name,
|
|
53
|
+
fuzzy = find_by_fuzzy_match(name, kind: kind)
|
|
28
54
|
return fuzzy if fuzzy && fuzzy.confidence >= @threshold
|
|
29
55
|
|
|
30
56
|
# 4. No match found
|
|
31
57
|
nil
|
|
32
58
|
end
|
|
33
59
|
|
|
34
|
-
#
|
|
35
|
-
|
|
36
|
-
|
|
60
|
+
# Resolves a name to an entity, creating one if not found
|
|
61
|
+
#
|
|
62
|
+
# @param name [String] the name to resolve or create
|
|
63
|
+
# @param kind [Symbol] the entity kind (required for creation)
|
|
64
|
+
# @param aliases [Array<String>] additional aliases to add
|
|
65
|
+
# @param attributes [Hash] additional attributes for new entity
|
|
66
|
+
# @return [FactDb::Models::Entity] the resolved or created entity
|
|
67
|
+
#
|
|
68
|
+
# @example Create with aliases
|
|
69
|
+
# resolver.resolve_or_create("John Smith", kind: :person, aliases: ["J. Smith", "Johnny"])
|
|
70
|
+
def resolve_or_create(name, kind:, aliases: [], attributes: {})
|
|
71
|
+
resolved = resolve(name, kind: kind)
|
|
37
72
|
return resolved.entity if resolved
|
|
38
73
|
|
|
39
|
-
create_entity(name,
|
|
74
|
+
create_entity(name, kind: kind, aliases: aliases, attributes: attributes)
|
|
40
75
|
end
|
|
41
76
|
|
|
42
|
-
#
|
|
77
|
+
# Merges two entities, keeping one as canonical
|
|
78
|
+
#
|
|
79
|
+
# Transfers all aliases and mentions from the merged entity to the kept entity.
|
|
80
|
+
#
|
|
81
|
+
# @param keep_id [Integer] ID of the entity to keep
|
|
82
|
+
# @param merge_id [Integer] ID of the entity to merge (will be marked as merged)
|
|
83
|
+
# @return [FactDb::Models::Entity] the kept entity with updated aliases
|
|
84
|
+
# @raise [ResolutionError] if attempting to merge into itself or merge already merged entity
|
|
85
|
+
#
|
|
86
|
+
# @example Merge duplicate entities
|
|
87
|
+
# resolver.merge(primary_entity.id, duplicate_entity.id)
|
|
43
88
|
def merge(keep_id, merge_id)
|
|
44
89
|
keep = Models::Entity.find(keep_id)
|
|
45
90
|
merge_entity = Models::Entity.find(merge_id)
|
|
@@ -50,15 +95,15 @@ module FactDb
|
|
|
50
95
|
Models::Entity.transaction do
|
|
51
96
|
# Move all aliases to kept entity
|
|
52
97
|
merge_entity.aliases.each do |alias_record|
|
|
53
|
-
keep.aliases.find_or_create_by!(
|
|
54
|
-
a.
|
|
98
|
+
keep.aliases.find_or_create_by!(name: alias_record.name) do |a|
|
|
99
|
+
a.kind = alias_record.kind
|
|
55
100
|
a.confidence = alias_record.confidence
|
|
56
101
|
end
|
|
57
102
|
end
|
|
58
103
|
|
|
59
104
|
# Add the merged entity's canonical name as an alias
|
|
60
|
-
keep.aliases.find_or_create_by!(
|
|
61
|
-
a.
|
|
105
|
+
keep.aliases.find_or_create_by!(name: merge_entity.name) do |a|
|
|
106
|
+
a.kind = "name"
|
|
62
107
|
a.confidence = 1.0
|
|
63
108
|
end
|
|
64
109
|
|
|
@@ -68,14 +113,26 @@ module FactDb
|
|
|
68
113
|
# Mark merged entity
|
|
69
114
|
merge_entity.update!(
|
|
70
115
|
resolution_status: "merged",
|
|
71
|
-
|
|
116
|
+
canonical_id: keep_id
|
|
72
117
|
)
|
|
73
118
|
end
|
|
74
119
|
|
|
75
120
|
keep.reload
|
|
76
121
|
end
|
|
77
122
|
|
|
78
|
-
#
|
|
123
|
+
# Splits an entity into multiple new entities
|
|
124
|
+
#
|
|
125
|
+
# Creates new entities based on the split configuration and marks the original as split.
|
|
126
|
+
#
|
|
127
|
+
# @param entity_id [Integer] ID of the entity to split
|
|
128
|
+
# @param split_configs [Array<Hash>] array of hashes with :name, :kind, :aliases, :attributes
|
|
129
|
+
# @return [Array<FactDb::Models::Entity>] array of newly created entities
|
|
130
|
+
#
|
|
131
|
+
# @example Split an ambiguous entity
|
|
132
|
+
# resolver.split(entity.id, [
|
|
133
|
+
# { name: "John Smith (Sales)", kind: :person },
|
|
134
|
+
# { name: "John Smith (Engineering)", kind: :person }
|
|
135
|
+
# ])
|
|
79
136
|
def split(entity_id, split_configs)
|
|
80
137
|
original = Models::Entity.find(entity_id)
|
|
81
138
|
|
|
@@ -83,7 +140,7 @@ module FactDb
|
|
|
83
140
|
new_entities = split_configs.map do |config|
|
|
84
141
|
create_entity(
|
|
85
142
|
config[:name],
|
|
86
|
-
|
|
143
|
+
kind: config[:kind] || original.kind,
|
|
87
144
|
aliases: config[:aliases] || [],
|
|
88
145
|
attributes: config[:attributes] || {}
|
|
89
146
|
)
|
|
@@ -95,7 +152,14 @@ module FactDb
|
|
|
95
152
|
end
|
|
96
153
|
end
|
|
97
154
|
|
|
98
|
-
#
|
|
155
|
+
# Finds potential duplicate entities based on name similarity
|
|
156
|
+
#
|
|
157
|
+
# @param threshold [Float, nil] minimum similarity score (defaults to config threshold)
|
|
158
|
+
# @return [Array<Hash>] array of hashes with :entity1, :entity2, :similarity keys
|
|
159
|
+
#
|
|
160
|
+
# @example Find duplicates with custom threshold
|
|
161
|
+
# duplicates = resolver.find_duplicates(threshold: 0.85)
|
|
162
|
+
# duplicates.each { |d| puts "#{d[:entity1].name} ~ #{d[:entity2].name} (#{d[:similarity]})" }
|
|
99
163
|
def find_duplicates(threshold: nil)
|
|
100
164
|
threshold ||= @threshold
|
|
101
165
|
duplicates = []
|
|
@@ -104,7 +168,7 @@ module FactDb
|
|
|
104
168
|
|
|
105
169
|
entities.each_with_index do |entity, i|
|
|
106
170
|
entities[(i + 1)..].each do |other|
|
|
107
|
-
similarity = calculate_similarity(entity.
|
|
171
|
+
similarity = calculate_similarity(entity.name, other.name)
|
|
108
172
|
if similarity >= threshold
|
|
109
173
|
duplicates << {
|
|
110
174
|
entity1: entity,
|
|
@@ -118,7 +182,11 @@ module FactDb
|
|
|
118
182
|
duplicates.sort_by { |d| -d[:similarity] }
|
|
119
183
|
end
|
|
120
184
|
|
|
121
|
-
#
|
|
185
|
+
# Automatically merges high-confidence duplicates
|
|
186
|
+
#
|
|
187
|
+
# Uses the auto_merge_threshold from config and keeps the entity with more mentions.
|
|
188
|
+
#
|
|
189
|
+
# @return [void]
|
|
122
190
|
def auto_merge_duplicates!
|
|
123
191
|
duplicates = find_duplicates(threshold: @auto_merge_threshold)
|
|
124
192
|
|
|
@@ -138,29 +206,29 @@ module FactDb
|
|
|
138
206
|
|
|
139
207
|
private
|
|
140
208
|
|
|
141
|
-
def find_by_exact_alias(name,
|
|
142
|
-
scope = Models::EntityAlias.where(["LOWER(
|
|
143
|
-
scope = scope.joins(:entity).where(fact_db_entities: {
|
|
209
|
+
def find_by_exact_alias(name, kind:)
|
|
210
|
+
scope = Models::EntityAlias.where(["LOWER(fact_db_entity_aliases.name) = ?", name.downcase])
|
|
211
|
+
scope = scope.joins(:entity).where(fact_db_entities: { kind: kind }) if kind
|
|
144
212
|
scope = scope.joins(:entity).where.not(fact_db_entities: { resolution_status: "merged" })
|
|
145
213
|
scope.first&.entity
|
|
146
214
|
end
|
|
147
215
|
|
|
148
|
-
def
|
|
149
|
-
scope = Models::Entity.where(["LOWER(
|
|
150
|
-
scope = scope.where(
|
|
216
|
+
def find_by_name(name, kind:)
|
|
217
|
+
scope = Models::Entity.where(["LOWER(name) = ?", name.downcase])
|
|
218
|
+
scope = scope.where(kind: kind) if kind
|
|
151
219
|
scope.not_merged.first
|
|
152
220
|
end
|
|
153
221
|
|
|
154
|
-
def find_by_fuzzy_match(name,
|
|
222
|
+
def find_by_fuzzy_match(name, kind:)
|
|
155
223
|
candidates = Models::Entity.not_merged
|
|
156
|
-
candidates = candidates.where(
|
|
224
|
+
candidates = candidates.where(kind: kind) if kind
|
|
157
225
|
|
|
158
226
|
best_match = nil
|
|
159
227
|
best_similarity = 0
|
|
160
228
|
|
|
161
229
|
candidates.find_each do |entity|
|
|
162
230
|
# Check canonical name
|
|
163
|
-
similarity = calculate_similarity(name, entity.
|
|
231
|
+
similarity = calculate_similarity(name, entity.name)
|
|
164
232
|
if similarity > best_similarity
|
|
165
233
|
best_similarity = similarity
|
|
166
234
|
best_match = entity
|
|
@@ -168,7 +236,7 @@ module FactDb
|
|
|
168
236
|
|
|
169
237
|
# Check aliases
|
|
170
238
|
entity.aliases.each do |alias_record|
|
|
171
|
-
alias_similarity = calculate_similarity(name, alias_record.
|
|
239
|
+
alias_similarity = calculate_similarity(name, alias_record.name)
|
|
172
240
|
if alias_similarity > best_similarity
|
|
173
241
|
best_similarity = alias_similarity
|
|
174
242
|
best_match = entity
|
|
@@ -181,10 +249,10 @@ module FactDb
|
|
|
181
249
|
ResolvedEntity.new(best_match, confidence: best_similarity, match_type: :fuzzy)
|
|
182
250
|
end
|
|
183
251
|
|
|
184
|
-
def create_entity(name,
|
|
252
|
+
def create_entity(name, kind:, aliases: [], attributes: {})
|
|
185
253
|
entity = Models::Entity.create!(
|
|
186
|
-
|
|
187
|
-
|
|
254
|
+
name: name,
|
|
255
|
+
kind: kind,
|
|
188
256
|
attributes: attributes,
|
|
189
257
|
resolution_status: "resolved"
|
|
190
258
|
)
|
|
@@ -228,33 +296,65 @@ module FactDb
|
|
|
228
296
|
end
|
|
229
297
|
end
|
|
230
298
|
|
|
299
|
+
# Represents a resolved entity with confidence metadata
|
|
300
|
+
#
|
|
301
|
+
# Wraps an entity with information about how it was resolved
|
|
302
|
+
# and the confidence level of the match.
|
|
303
|
+
#
|
|
231
304
|
class ResolvedEntity
|
|
232
|
-
|
|
305
|
+
# @return [FactDb::Models::Entity] the resolved entity
|
|
306
|
+
attr_reader :entity
|
|
233
307
|
|
|
308
|
+
# @return [Float] confidence score from 0.0 to 1.0
|
|
309
|
+
attr_reader :confidence
|
|
310
|
+
|
|
311
|
+
# @return [Symbol] how the entity was matched (:exact_alias, :name, :fuzzy)
|
|
312
|
+
attr_reader :match_type
|
|
313
|
+
|
|
314
|
+
# Initializes a new ResolvedEntity
|
|
315
|
+
#
|
|
316
|
+
# @param entity [FactDb::Models::Entity] the resolved entity
|
|
317
|
+
# @param confidence [Float] confidence score (0.0 to 1.0)
|
|
318
|
+
# @param match_type [Symbol] match type (:exact_alias, :name, :fuzzy)
|
|
234
319
|
def initialize(entity, confidence:, match_type:)
|
|
235
320
|
@entity = entity
|
|
236
321
|
@confidence = confidence
|
|
237
322
|
@match_type = match_type
|
|
238
323
|
end
|
|
239
324
|
|
|
325
|
+
# Checks if this was an exact match (confidence == 1.0)
|
|
326
|
+
#
|
|
327
|
+
# @return [Boolean] true if confidence is 1.0
|
|
240
328
|
def exact_match?
|
|
241
329
|
confidence == 1.0
|
|
242
330
|
end
|
|
243
331
|
|
|
332
|
+
# Checks if this was a fuzzy match
|
|
333
|
+
#
|
|
334
|
+
# @return [Boolean] true if match_type is :fuzzy
|
|
244
335
|
def fuzzy_match?
|
|
245
336
|
match_type == :fuzzy
|
|
246
337
|
end
|
|
247
338
|
|
|
339
|
+
# Returns the entity ID
|
|
340
|
+
#
|
|
341
|
+
# @return [Integer] the entity's database ID
|
|
248
342
|
def id
|
|
249
343
|
entity.id
|
|
250
344
|
end
|
|
251
345
|
|
|
252
|
-
|
|
253
|
-
|
|
346
|
+
# Returns the entity name
|
|
347
|
+
#
|
|
348
|
+
# @return [String] the entity's canonical name
|
|
349
|
+
def name
|
|
350
|
+
entity.name
|
|
254
351
|
end
|
|
255
352
|
|
|
256
|
-
|
|
257
|
-
|
|
353
|
+
# Returns the entity kind
|
|
354
|
+
#
|
|
355
|
+
# @return [String] the entity's kind
|
|
356
|
+
def kind
|
|
357
|
+
entity.kind
|
|
258
358
|
end
|
|
259
359
|
end
|
|
260
360
|
end
|
|
@@ -2,22 +2,51 @@
|
|
|
2
2
|
|
|
3
3
|
module FactDb
|
|
4
4
|
module Resolution
|
|
5
|
+
# Handles fact lifecycle operations including supersession, synthesis, and conflict resolution
|
|
6
|
+
#
|
|
7
|
+
# Provides methods for managing fact relationships: superseding outdated facts,
|
|
8
|
+
# synthesizing new facts from multiple sources, handling corroboration,
|
|
9
|
+
# and detecting/resolving conflicts.
|
|
10
|
+
#
|
|
11
|
+
# @example Supersede an outdated fact
|
|
12
|
+
# resolver = FactResolver.new
|
|
13
|
+
# new_fact = resolver.supersede(old_fact.id, "Updated information", valid_at: Date.today)
|
|
14
|
+
#
|
|
5
15
|
class FactResolver
|
|
16
|
+
# @return [FactDb::Config] the configuration object
|
|
6
17
|
attr_reader :config
|
|
7
18
|
|
|
19
|
+
# Initializes a new FactResolver instance
|
|
20
|
+
#
|
|
21
|
+
# @param config [FactDb::Config] configuration object (defaults to FactDb.config)
|
|
8
22
|
def initialize(config = FactDb.config)
|
|
9
23
|
@config = config
|
|
10
24
|
end
|
|
11
25
|
|
|
12
|
-
#
|
|
13
|
-
|
|
26
|
+
# Supersedes an existing fact with a new one
|
|
27
|
+
#
|
|
28
|
+
# Creates a new canonical fact and marks the old one as superseded.
|
|
29
|
+
# Copies mentions and sources from the old fact unless new mentions are provided.
|
|
30
|
+
#
|
|
31
|
+
# @param old_fact_id [Integer] ID of the fact to supersede
|
|
32
|
+
# @param new_text [String] the updated fact text
|
|
33
|
+
# @param valid_at [Date, Time] when the new fact became valid
|
|
34
|
+
# @param mentions [Array<Hash>] optional entity mentions for the new fact
|
|
35
|
+
# @return [FactDb::Models::Fact] the new canonical fact
|
|
36
|
+
# @raise [ResolutionError] if the fact is already superseded
|
|
37
|
+
#
|
|
38
|
+
# @example Supersede with new mentions
|
|
39
|
+
# resolver.supersede(fact.id, "John now works at NewCo",
|
|
40
|
+
# valid_at: Date.today,
|
|
41
|
+
# mentions: [{ entity_id: john.id, text: "John", role: :subject }])
|
|
42
|
+
def supersede(old_fact_id, new_text, valid_at:, mentions: [])
|
|
14
43
|
old_fact = Models::Fact.find(old_fact_id)
|
|
15
44
|
|
|
16
45
|
raise ResolutionError, "Cannot supersede already superseded fact" if old_fact.superseded?
|
|
17
46
|
|
|
18
47
|
Models::Fact.transaction do
|
|
19
48
|
new_fact = Models::Fact.create!(
|
|
20
|
-
|
|
49
|
+
text: new_text,
|
|
21
50
|
valid_at: valid_at,
|
|
22
51
|
status: "canonical",
|
|
23
52
|
extraction_method: old_fact.extraction_method,
|
|
@@ -49,8 +78,8 @@ module FactDb
|
|
|
49
78
|
# Copy sources from old fact
|
|
50
79
|
old_fact.fact_sources.each do |source|
|
|
51
80
|
new_fact.add_source(
|
|
52
|
-
|
|
53
|
-
|
|
81
|
+
source: source.source,
|
|
82
|
+
kind: source.kind,
|
|
54
83
|
excerpt: source.excerpt,
|
|
55
84
|
confidence: source.confidence
|
|
56
85
|
)
|
|
@@ -67,7 +96,21 @@ module FactDb
|
|
|
67
96
|
end
|
|
68
97
|
end
|
|
69
98
|
|
|
70
|
-
#
|
|
99
|
+
# Synthesizes a new fact from multiple source facts
|
|
100
|
+
#
|
|
101
|
+
# Creates a single synthesized fact that aggregates information from multiple facts.
|
|
102
|
+
# Automatically aggregates entity mentions and links to all source content.
|
|
103
|
+
#
|
|
104
|
+
# @param source_fact_ids [Array<Integer>] IDs of the source facts
|
|
105
|
+
# @param synthesized_text [String] the synthesized summary text
|
|
106
|
+
# @param valid_at [Date, Time] when the synthesis is valid from
|
|
107
|
+
# @param invalid_at [Date, Time, nil] when the synthesis becomes invalid
|
|
108
|
+
# @param mentions [Array<Hash>] optional entity mentions (aggregated from sources if empty)
|
|
109
|
+
# @return [FactDb::Models::Fact] the synthesized fact
|
|
110
|
+
# @raise [ResolutionError] if no source facts are found
|
|
111
|
+
#
|
|
112
|
+
# @example Synthesize multiple facts
|
|
113
|
+
# resolver.synthesize([fact1.id, fact2.id], "Summary of events", valid_at: Date.today)
|
|
71
114
|
def synthesize(source_fact_ids, synthesized_text, valid_at:, invalid_at: nil, mentions: [])
|
|
72
115
|
source_facts = Models::Fact.where(id: source_fact_ids)
|
|
73
116
|
|
|
@@ -75,7 +118,7 @@ module FactDb
|
|
|
75
118
|
|
|
76
119
|
Models::Fact.transaction do
|
|
77
120
|
synthesized = Models::Fact.create!(
|
|
78
|
-
|
|
121
|
+
text: synthesized_text,
|
|
79
122
|
valid_at: valid_at,
|
|
80
123
|
invalid_at: invalid_at,
|
|
81
124
|
status: "synthesized",
|
|
@@ -117,7 +160,15 @@ module FactDb
|
|
|
117
160
|
end
|
|
118
161
|
end
|
|
119
162
|
|
|
120
|
-
#
|
|
163
|
+
# Marks a fact as corroborated by another fact
|
|
164
|
+
#
|
|
165
|
+
# Adds the corroborating fact ID to the corroborated_by_ids array.
|
|
166
|
+
# If 2+ facts corroborate, status changes to "corroborated".
|
|
167
|
+
#
|
|
168
|
+
# @param fact_id [Integer] ID of the fact being corroborated
|
|
169
|
+
# @param corroborating_fact_id [Integer] ID of the supporting fact
|
|
170
|
+
# @return [FactDb::Models::Fact] the updated fact
|
|
171
|
+
# @raise [ResolutionError] if attempting to corroborate with the same fact
|
|
121
172
|
def corroborate(fact_id, corroborating_fact_id)
|
|
122
173
|
fact = Models::Fact.find(fact_id)
|
|
123
174
|
_corroborating = Models::Fact.find(corroborating_fact_id)
|
|
@@ -134,14 +185,24 @@ module FactDb
|
|
|
134
185
|
fact
|
|
135
186
|
end
|
|
136
187
|
|
|
137
|
-
#
|
|
188
|
+
# Invalidates a fact without replacement
|
|
189
|
+
#
|
|
190
|
+
# @param fact_id [Integer] ID of the fact to invalidate
|
|
191
|
+
# @param at [Time] when the fact became invalid (defaults to now)
|
|
192
|
+
# @return [FactDb::Models::Fact] the invalidated fact
|
|
138
193
|
def invalidate(fact_id, at: Time.current)
|
|
139
194
|
fact = Models::Fact.find(fact_id)
|
|
140
195
|
fact.update!(invalid_at: at)
|
|
141
196
|
fact
|
|
142
197
|
end
|
|
143
198
|
|
|
144
|
-
#
|
|
199
|
+
# Finds potentially conflicting facts
|
|
200
|
+
#
|
|
201
|
+
# Identifies facts with similar text (50-95% similarity) that might be contradictory.
|
|
202
|
+
#
|
|
203
|
+
# @param entity_id [Integer, nil] entity ID to filter by
|
|
204
|
+
# @param topic [String, nil] topic to search for
|
|
205
|
+
# @return [Array<Hash>] array of hashes with :fact1, :fact2, :similarity keys
|
|
145
206
|
def find_conflicts(entity_id: nil, topic: nil)
|
|
146
207
|
scope = Models::Fact.canonical.currently_valid
|
|
147
208
|
|
|
@@ -159,7 +220,7 @@ module FactDb
|
|
|
159
220
|
|
|
160
221
|
facts.each_with_index do |fact, i|
|
|
161
222
|
facts[(i + 1)..].each do |other|
|
|
162
|
-
similarity = text_similarity(fact.
|
|
223
|
+
similarity = text_similarity(fact.text, other.text)
|
|
163
224
|
if similarity > 0.5 && similarity < 0.95
|
|
164
225
|
conflicts << {
|
|
165
226
|
fact1: fact,
|
|
@@ -173,7 +234,12 @@ module FactDb
|
|
|
173
234
|
conflicts.sort_by { |c| -c[:similarity] }
|
|
174
235
|
end
|
|
175
236
|
|
|
176
|
-
#
|
|
237
|
+
# Resolves conflicts by keeping one fact and superseding others
|
|
238
|
+
#
|
|
239
|
+
# @param keep_fact_id [Integer] ID of the fact to keep as canonical
|
|
240
|
+
# @param supersede_fact_ids [Array<Integer>] IDs of facts to mark as superseded
|
|
241
|
+
# @param reason [String, nil] reason for the resolution (stored in metadata)
|
|
242
|
+
# @return [FactDb::Models::Fact] the kept fact
|
|
177
243
|
def resolve_conflict(keep_fact_id, supersede_fact_ids, reason: nil)
|
|
178
244
|
Models::Fact.transaction do
|
|
179
245
|
supersede_fact_ids.each do |fact_id|
|
|
@@ -190,7 +256,13 @@ module FactDb
|
|
|
190
256
|
Models::Fact.find(keep_fact_id)
|
|
191
257
|
end
|
|
192
258
|
|
|
193
|
-
#
|
|
259
|
+
# Builds a timeline fact from point-in-time facts for an entity
|
|
260
|
+
#
|
|
261
|
+
# Creates a synthesized fact summarizing the entity's history on a topic.
|
|
262
|
+
#
|
|
263
|
+
# @param entity_id [Integer] the entity ID
|
|
264
|
+
# @param topic [String, nil] optional topic filter
|
|
265
|
+
# @return [FactDb::Models::Fact, nil] synthesized timeline fact or nil if no facts found
|
|
194
266
|
def build_timeline_fact(entity_id:, topic: nil)
|
|
195
267
|
facts = Models::Fact.mentioning_entity(entity_id)
|
|
196
268
|
facts = facts.search_text(topic) if topic
|
|
@@ -203,7 +275,7 @@ module FactDb
|
|
|
203
275
|
end_date = facts.select { |f| f.invalid_at }.map(&:invalid_at).max
|
|
204
276
|
|
|
205
277
|
entity = Models::Entity.find(entity_id)
|
|
206
|
-
synthesized_text = "#{entity.
|
|
278
|
+
synthesized_text = "#{entity.name}: #{topic || 'timeline'} from #{start_date.to_date}"
|
|
207
279
|
synthesized_text += " to #{end_date.to_date}" if end_date
|
|
208
280
|
|
|
209
281
|
synthesize(
|