fact_db 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +2 -0
  3. data/.yardopts +5 -0
  4. data/CHANGELOG.md +64 -0
  5. data/README.md +107 -6
  6. data/Rakefile +243 -10
  7. data/db/migrate/001_enable_extensions.rb +1 -0
  8. data/db/migrate/002_create_sources.rb +49 -0
  9. data/db/migrate/003_create_entities.rb +27 -15
  10. data/db/migrate/004_create_entity_aliases.rb +20 -7
  11. data/db/migrate/005_create_facts.rb +37 -21
  12. data/db/migrate/006_create_entity_mentions.rb +14 -6
  13. data/db/migrate/007_create_fact_sources.rb +16 -8
  14. data/docs/api/extractors/index.md +5 -5
  15. data/docs/api/extractors/llm.md +17 -17
  16. data/docs/api/extractors/rule-based.md +14 -14
  17. data/docs/api/facts.md +20 -20
  18. data/docs/api/index.md +4 -4
  19. data/docs/api/models/entity.md +21 -21
  20. data/docs/api/models/fact.md +15 -15
  21. data/docs/api/models/index.md +7 -7
  22. data/docs/api/models/{content.md → source.md} +29 -29
  23. data/docs/api/pipeline/extraction.md +25 -25
  24. data/docs/api/pipeline/index.md +1 -1
  25. data/docs/api/pipeline/resolution.md +4 -4
  26. data/docs/api/services/entity-service.md +20 -20
  27. data/docs/api/services/fact-service.md +12 -12
  28. data/docs/api/services/index.md +5 -5
  29. data/docs/api/services/{content-service.md → source-service.md} +27 -27
  30. data/docs/architecture/database-schema.md +46 -46
  31. data/docs/architecture/entity-resolution.md +6 -6
  32. data/docs/architecture/index.md +10 -10
  33. data/docs/architecture/temporal-facts.md +5 -5
  34. data/docs/architecture/three-layer-model.md +17 -17
  35. data/docs/concepts.md +6 -6
  36. data/docs/examples/basic-usage.md +20 -20
  37. data/docs/examples/hr-onboarding.md +17 -17
  38. data/docs/examples/index.md +4 -4
  39. data/docs/examples/news-analysis.md +23 -23
  40. data/docs/getting-started/database-setup.md +28 -20
  41. data/docs/getting-started/index.md +3 -3
  42. data/docs/getting-started/quick-start.md +33 -30
  43. data/docs/guides/batch-processing.md +26 -26
  44. data/docs/guides/configuration.md +158 -77
  45. data/docs/guides/entity-management.md +14 -14
  46. data/docs/guides/extracting-facts.md +28 -28
  47. data/docs/guides/ingesting-content.md +14 -14
  48. data/docs/guides/llm-integration.md +40 -32
  49. data/docs/guides/temporal-queries.md +11 -11
  50. data/docs/index.md +6 -2
  51. data/examples/.envrc +4 -0
  52. data/examples/.gitignore +1 -0
  53. data/examples/001_configuration.rb +312 -0
  54. data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
  55. data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
  56. data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
  57. data/examples/040_output_formats.rb +177 -0
  58. data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
  59. data/examples/060_fluent_temporal_api.rb +217 -0
  60. data/examples/070_introspection.rb +252 -0
  61. data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
  62. data/examples/090_ingest_demo.rb +515 -0
  63. data/examples/100_query_context.rb +668 -0
  64. data/examples/110_prove_it.rb +204 -0
  65. data/examples/120_dump_database.rb +358 -0
  66. data/examples/130_rag_feedback_loop.rb +858 -0
  67. data/examples/README.md +229 -15
  68. data/examples/data/lincoln_associates.md +201 -0
  69. data/examples/data/lincoln_biography.md +66 -0
  70. data/examples/data/lincoln_cabinet.md +243 -0
  71. data/examples/data/lincoln_family.md +163 -0
  72. data/examples/data/lincoln_military.md +241 -0
  73. data/examples/data/lincoln_todd_family.md +136 -0
  74. data/examples/ingest_reporter.rb +335 -0
  75. data/examples/utilities.rb +182 -0
  76. data/lib/fact_db/config/defaults.yml +254 -0
  77. data/lib/fact_db/config.rb +94 -35
  78. data/lib/fact_db/database.rb +98 -8
  79. data/lib/fact_db/extractors/base.rb +106 -21
  80. data/lib/fact_db/extractors/llm_extractor.rb +35 -63
  81. data/lib/fact_db/extractors/manual_extractor.rb +46 -6
  82. data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
  83. data/lib/fact_db/llm/adapter.rb +3 -3
  84. data/lib/fact_db/models/entity.rb +94 -22
  85. data/lib/fact_db/models/entity_alias.rb +41 -7
  86. data/lib/fact_db/models/entity_mention.rb +34 -1
  87. data/lib/fact_db/models/fact.rb +259 -28
  88. data/lib/fact_db/models/fact_source.rb +43 -9
  89. data/lib/fact_db/models/source.rb +113 -0
  90. data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
  91. data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
  92. data/lib/fact_db/query_result.rb +202 -0
  93. data/lib/fact_db/resolution/entity_resolver.rb +139 -39
  94. data/lib/fact_db/resolution/fact_resolver.rb +86 -14
  95. data/lib/fact_db/services/entity_service.rb +246 -37
  96. data/lib/fact_db/services/fact_service.rb +254 -17
  97. data/lib/fact_db/services/source_service.rb +164 -0
  98. data/lib/fact_db/temporal/query.rb +71 -7
  99. data/lib/fact_db/temporal/query_builder.rb +69 -0
  100. data/lib/fact_db/temporal/timeline.rb +102 -11
  101. data/lib/fact_db/transformers/base.rb +77 -0
  102. data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
  103. data/lib/fact_db/transformers/json_transformer.rb +17 -0
  104. data/lib/fact_db/transformers/raw_transformer.rb +35 -0
  105. data/lib/fact_db/transformers/text_transformer.rb +114 -0
  106. data/lib/fact_db/transformers/triple_transformer.rb +138 -0
  107. data/lib/fact_db/validation/alias_filter.rb +185 -0
  108. data/lib/fact_db/version.rb +1 -1
  109. data/lib/fact_db.rb +281 -30
  110. data/mkdocs.yml +2 -2
  111. metadata +60 -16
  112. data/db/migrate/002_create_contents.rb +0 -44
  113. data/lib/fact_db/models/content.rb +0 -62
  114. data/lib/fact_db/services/content_service.rb +0 -93
@@ -2,44 +2,89 @@
2
2
 
3
3
  module FactDb
4
4
  module Resolution
5
+ # Resolves entity names to canonical entities in the database
6
+ #
7
+ # Provides entity resolution through exact alias matching, canonical name matching,
8
+ # and fuzzy matching using Levenshtein distance. Also handles entity merging,
9
+ # splitting, and duplicate detection.
10
+ #
11
+ # @example Basic usage
12
+ # resolver = EntityResolver.new
13
+ # resolved = resolver.resolve("John Smith", kind: :person)
14
+ # if resolved
15
+ # puts "Found: #{resolved.entity.name} (confidence: #{resolved.confidence})"
16
+ # end
17
+ #
5
18
  class EntityResolver
19
+ # @return [FactDb::Config] the configuration object
6
20
  attr_reader :config
7
21
 
22
+ # Initializes a new EntityResolver instance
23
+ #
24
+ # @param config [FactDb::Config] configuration object (defaults to FactDb.config)
8
25
  def initialize(config = FactDb.config)
9
26
  @config = config
10
27
  @threshold = config.fuzzy_match_threshold
11
28
  @auto_merge_threshold = config.auto_merge_threshold
12
29
  end
13
30
 
14
- # Resolve a name to an entity
15
- def resolve(name, type: nil)
31
+ # Resolves a name to an existing entity
32
+ #
33
+ # Tries resolution in order: exact alias match, canonical name match, fuzzy match.
34
+ #
35
+ # @param name [String] the name to resolve
36
+ # @param kind [Symbol, nil] optional entity kind filter (:person, :organization, etc.)
37
+ # @return [ResolvedEntity, nil] resolved entity with confidence score, or nil if not found
38
+ #
39
+ # @example Resolve with kind filter
40
+ # resolver.resolve("Acme", kind: :organization)
41
+ def resolve(name, kind: nil)
16
42
  return nil if name.nil? || name.empty?
17
43
 
18
44
  # 1. Exact alias match
19
- exact = find_by_exact_alias(name, type: type)
45
+ exact = find_by_exact_alias(name, kind: kind)
20
46
  return ResolvedEntity.new(exact, confidence: 1.0, match_type: :exact_alias) if exact
21
47
 
22
48
  # 2. Canonical name match
23
- canonical = find_by_canonical_name(name, type: type)
24
- return ResolvedEntity.new(canonical, confidence: 1.0, match_type: :canonical_name) if canonical
49
+ canonical = find_by_name(name, kind: kind)
50
+ return ResolvedEntity.new(canonical, confidence: 1.0, match_type: :name) if canonical
25
51
 
26
52
  # 3. Fuzzy matching
27
- fuzzy = find_by_fuzzy_match(name, type: type)
53
+ fuzzy = find_by_fuzzy_match(name, kind: kind)
28
54
  return fuzzy if fuzzy && fuzzy.confidence >= @threshold
29
55
 
30
56
  # 4. No match found
31
57
  nil
32
58
  end
33
59
 
34
- # Resolve or create an entity
35
- def resolve_or_create(name, type:, aliases: [], attributes: {})
36
- resolved = resolve(name, type: type)
60
+ # Resolves a name to an entity, creating one if not found
61
+ #
62
+ # @param name [String] the name to resolve or create
63
+ # @param kind [Symbol] the entity kind (required for creation)
64
+ # @param aliases [Array<String>] additional aliases to add
65
+ # @param attributes [Hash] additional attributes for new entity
66
+ # @return [FactDb::Models::Entity] the resolved or created entity
67
+ #
68
+ # @example Create with aliases
69
+ # resolver.resolve_or_create("John Smith", kind: :person, aliases: ["J. Smith", "Johnny"])
70
+ def resolve_or_create(name, kind:, aliases: [], attributes: {})
71
+ resolved = resolve(name, kind: kind)
37
72
  return resolved.entity if resolved
38
73
 
39
- create_entity(name, type: type, aliases: aliases, attributes: attributes)
74
+ create_entity(name, kind: kind, aliases: aliases, attributes: attributes)
40
75
  end
41
76
 
42
- # Merge two entities, keeping one as canonical
77
+ # Merges two entities, keeping one as canonical
78
+ #
79
+ # Transfers all aliases and mentions from the merged entity to the kept entity.
80
+ #
81
+ # @param keep_id [Integer] ID of the entity to keep
82
+ # @param merge_id [Integer] ID of the entity to merge (will be marked as merged)
83
+ # @return [FactDb::Models::Entity] the kept entity with updated aliases
84
+ # @raise [ResolutionError] if attempting to merge into itself or merge already merged entity
85
+ #
86
+ # @example Merge duplicate entities
87
+ # resolver.merge(primary_entity.id, duplicate_entity.id)
43
88
  def merge(keep_id, merge_id)
44
89
  keep = Models::Entity.find(keep_id)
45
90
  merge_entity = Models::Entity.find(merge_id)
@@ -50,15 +95,15 @@ module FactDb
50
95
  Models::Entity.transaction do
51
96
  # Move all aliases to kept entity
52
97
  merge_entity.aliases.each do |alias_record|
53
- keep.aliases.find_or_create_by!(alias_text: alias_record.alias_text) do |a|
54
- a.alias_type = alias_record.alias_type
98
+ keep.aliases.find_or_create_by!(name: alias_record.name) do |a|
99
+ a.kind = alias_record.kind
55
100
  a.confidence = alias_record.confidence
56
101
  end
57
102
  end
58
103
 
59
104
  # Add the merged entity's canonical name as an alias
60
- keep.aliases.find_or_create_by!(alias_text: merge_entity.canonical_name) do |a|
61
- a.alias_type = "name"
105
+ keep.aliases.find_or_create_by!(name: merge_entity.name) do |a|
106
+ a.kind = "name"
62
107
  a.confidence = 1.0
63
108
  end
64
109
 
@@ -68,14 +113,26 @@ module FactDb
68
113
  # Mark merged entity
69
114
  merge_entity.update!(
70
115
  resolution_status: "merged",
71
- merged_into_id: keep_id
116
+ canonical_id: keep_id
72
117
  )
73
118
  end
74
119
 
75
120
  keep.reload
76
121
  end
77
122
 
78
- # Split an entity into multiple entities
123
+ # Splits an entity into multiple new entities
124
+ #
125
+ # Creates new entities based on the split configuration and marks the original as split.
126
+ #
127
+ # @param entity_id [Integer] ID of the entity to split
128
+ # @param split_configs [Array<Hash>] array of hashes with :name, :kind, :aliases, :attributes
129
+ # @return [Array<FactDb::Models::Entity>] array of newly created entities
130
+ #
131
+ # @example Split an ambiguous entity
132
+ # resolver.split(entity.id, [
133
+ # { name: "John Smith (Sales)", kind: :person },
134
+ # { name: "John Smith (Engineering)", kind: :person }
135
+ # ])
79
136
  def split(entity_id, split_configs)
80
137
  original = Models::Entity.find(entity_id)
81
138
 
@@ -83,7 +140,7 @@ module FactDb
83
140
  new_entities = split_configs.map do |config|
84
141
  create_entity(
85
142
  config[:name],
86
- type: config[:type] || original.entity_type,
143
+ kind: config[:kind] || original.kind,
87
144
  aliases: config[:aliases] || [],
88
145
  attributes: config[:attributes] || {}
89
146
  )
@@ -95,7 +152,14 @@ module FactDb
95
152
  end
96
153
  end
97
154
 
98
- # Find potential duplicate entities
155
+ # Finds potential duplicate entities based on name similarity
156
+ #
157
+ # @param threshold [Float, nil] minimum similarity score (defaults to config threshold)
158
+ # @return [Array<Hash>] array of hashes with :entity1, :entity2, :similarity keys
159
+ #
160
+ # @example Find duplicates with custom threshold
161
+ # duplicates = resolver.find_duplicates(threshold: 0.85)
162
+ # duplicates.each { |d| puts "#{d[:entity1].name} ~ #{d[:entity2].name} (#{d[:similarity]})" }
99
163
  def find_duplicates(threshold: nil)
100
164
  threshold ||= @threshold
101
165
  duplicates = []
@@ -104,7 +168,7 @@ module FactDb
104
168
 
105
169
  entities.each_with_index do |entity, i|
106
170
  entities[(i + 1)..].each do |other|
107
- similarity = calculate_similarity(entity.canonical_name, other.canonical_name)
171
+ similarity = calculate_similarity(entity.name, other.name)
108
172
  if similarity >= threshold
109
173
  duplicates << {
110
174
  entity1: entity,
@@ -118,7 +182,11 @@ module FactDb
118
182
  duplicates.sort_by { |d| -d[:similarity] }
119
183
  end
120
184
 
121
- # Auto-merge high-confidence duplicates
185
+ # Automatically merges high-confidence duplicates
186
+ #
187
+ # Uses the auto_merge_threshold from config and keeps the entity with more mentions.
188
+ #
189
+ # @return [void]
122
190
  def auto_merge_duplicates!
123
191
  duplicates = find_duplicates(threshold: @auto_merge_threshold)
124
192
 
@@ -138,29 +206,29 @@ module FactDb
138
206
 
139
207
  private
140
208
 
141
- def find_by_exact_alias(name, type:)
142
- scope = Models::EntityAlias.where(["LOWER(alias_text) = ?", name.downcase])
143
- scope = scope.joins(:entity).where(fact_db_entities: { entity_type: type }) if type
209
+ def find_by_exact_alias(name, kind:)
210
+ scope = Models::EntityAlias.where(["LOWER(fact_db_entity_aliases.name) = ?", name.downcase])
211
+ scope = scope.joins(:entity).where(fact_db_entities: { kind: kind }) if kind
144
212
  scope = scope.joins(:entity).where.not(fact_db_entities: { resolution_status: "merged" })
145
213
  scope.first&.entity
146
214
  end
147
215
 
148
- def find_by_canonical_name(name, type:)
149
- scope = Models::Entity.where(["LOWER(canonical_name) = ?", name.downcase])
150
- scope = scope.where(entity_type: type) if type
216
+ def find_by_name(name, kind:)
217
+ scope = Models::Entity.where(["LOWER(name) = ?", name.downcase])
218
+ scope = scope.where(kind: kind) if kind
151
219
  scope.not_merged.first
152
220
  end
153
221
 
154
- def find_by_fuzzy_match(name, type:)
222
+ def find_by_fuzzy_match(name, kind:)
155
223
  candidates = Models::Entity.not_merged
156
- candidates = candidates.where(entity_type: type) if type
224
+ candidates = candidates.where(kind: kind) if kind
157
225
 
158
226
  best_match = nil
159
227
  best_similarity = 0
160
228
 
161
229
  candidates.find_each do |entity|
162
230
  # Check canonical name
163
- similarity = calculate_similarity(name, entity.canonical_name)
231
+ similarity = calculate_similarity(name, entity.name)
164
232
  if similarity > best_similarity
165
233
  best_similarity = similarity
166
234
  best_match = entity
@@ -168,7 +236,7 @@ module FactDb
168
236
 
169
237
  # Check aliases
170
238
  entity.aliases.each do |alias_record|
171
- alias_similarity = calculate_similarity(name, alias_record.alias_text)
239
+ alias_similarity = calculate_similarity(name, alias_record.name)
172
240
  if alias_similarity > best_similarity
173
241
  best_similarity = alias_similarity
174
242
  best_match = entity
@@ -181,10 +249,10 @@ module FactDb
181
249
  ResolvedEntity.new(best_match, confidence: best_similarity, match_type: :fuzzy)
182
250
  end
183
251
 
184
- def create_entity(name, type:, aliases: [], attributes: {})
252
+ def create_entity(name, kind:, aliases: [], attributes: {})
185
253
  entity = Models::Entity.create!(
186
- canonical_name: name,
187
- entity_type: type,
254
+ name: name,
255
+ kind: kind,
188
256
  attributes: attributes,
189
257
  resolution_status: "resolved"
190
258
  )
@@ -228,33 +296,65 @@ module FactDb
228
296
  end
229
297
  end
230
298
 
299
+ # Represents a resolved entity with confidence metadata
300
+ #
301
+ # Wraps an entity with information about how it was resolved
302
+ # and the confidence level of the match.
303
+ #
231
304
  class ResolvedEntity
232
- attr_reader :entity, :confidence, :match_type
305
+ # @return [FactDb::Models::Entity] the resolved entity
306
+ attr_reader :entity
233
307
 
308
+ # @return [Float] confidence score from 0.0 to 1.0
309
+ attr_reader :confidence
310
+
311
+ # @return [Symbol] how the entity was matched (:exact_alias, :name, :fuzzy)
312
+ attr_reader :match_type
313
+
314
+ # Initializes a new ResolvedEntity
315
+ #
316
+ # @param entity [FactDb::Models::Entity] the resolved entity
317
+ # @param confidence [Float] confidence score (0.0 to 1.0)
318
+ # @param match_type [Symbol] match type (:exact_alias, :name, :fuzzy)
234
319
  def initialize(entity, confidence:, match_type:)
235
320
  @entity = entity
236
321
  @confidence = confidence
237
322
  @match_type = match_type
238
323
  end
239
324
 
325
+ # Checks if this was an exact match (confidence == 1.0)
326
+ #
327
+ # @return [Boolean] true if confidence is 1.0
240
328
  def exact_match?
241
329
  confidence == 1.0
242
330
  end
243
331
 
332
+ # Checks if this was a fuzzy match
333
+ #
334
+ # @return [Boolean] true if match_type is :fuzzy
244
335
  def fuzzy_match?
245
336
  match_type == :fuzzy
246
337
  end
247
338
 
339
+ # Returns the entity ID
340
+ #
341
+ # @return [Integer] the entity's database ID
248
342
  def id
249
343
  entity.id
250
344
  end
251
345
 
252
- def canonical_name
253
- entity.canonical_name
346
+ # Returns the entity name
347
+ #
348
+ # @return [String] the entity's canonical name
349
+ def name
350
+ entity.name
254
351
  end
255
352
 
256
- def entity_type
257
- entity.entity_type
353
+ # Returns the entity kind
354
+ #
355
+ # @return [String] the entity's kind
356
+ def kind
357
+ entity.kind
258
358
  end
259
359
  end
260
360
  end
@@ -2,22 +2,51 @@
2
2
 
3
3
  module FactDb
4
4
  module Resolution
5
+ # Handles fact lifecycle operations including supersession, synthesis, and conflict resolution
6
+ #
7
+ # Provides methods for managing fact relationships: superseding outdated facts,
8
+ # synthesizing new facts from multiple sources, handling corroboration,
9
+ # and detecting/resolving conflicts.
10
+ #
11
+ # @example Supersede an outdated fact
12
+ # resolver = FactResolver.new
13
+ # new_fact = resolver.supersede(old_fact.id, "Updated information", valid_at: Date.today)
14
+ #
5
15
  class FactResolver
16
+ # @return [FactDb::Config] the configuration object
6
17
  attr_reader :config
7
18
 
19
+ # Initializes a new FactResolver instance
20
+ #
21
+ # @param config [FactDb::Config] configuration object (defaults to FactDb.config)
8
22
  def initialize(config = FactDb.config)
9
23
  @config = config
10
24
  end
11
25
 
12
- # Supersede an existing fact with a new one
13
- def supersede(old_fact_id, new_fact_text, valid_at:, mentions: [])
26
+ # Supersedes an existing fact with a new one
27
+ #
28
+ # Creates a new canonical fact and marks the old one as superseded.
29
+ # Copies mentions and sources from the old fact unless new mentions are provided.
30
+ #
31
+ # @param old_fact_id [Integer] ID of the fact to supersede
32
+ # @param new_text [String] the updated fact text
33
+ # @param valid_at [Date, Time] when the new fact became valid
34
+ # @param mentions [Array<Hash>] optional entity mentions for the new fact
35
+ # @return [FactDb::Models::Fact] the new canonical fact
36
+ # @raise [ResolutionError] if the fact is already superseded
37
+ #
38
+ # @example Supersede with new mentions
39
+ # resolver.supersede(fact.id, "John now works at NewCo",
40
+ # valid_at: Date.today,
41
+ # mentions: [{ entity_id: john.id, text: "John", role: :subject }])
42
+ def supersede(old_fact_id, new_text, valid_at:, mentions: [])
14
43
  old_fact = Models::Fact.find(old_fact_id)
15
44
 
16
45
  raise ResolutionError, "Cannot supersede already superseded fact" if old_fact.superseded?
17
46
 
18
47
  Models::Fact.transaction do
19
48
  new_fact = Models::Fact.create!(
20
- fact_text: new_fact_text,
49
+ text: new_text,
21
50
  valid_at: valid_at,
22
51
  status: "canonical",
23
52
  extraction_method: old_fact.extraction_method,
@@ -49,8 +78,8 @@ module FactDb
49
78
  # Copy sources from old fact
50
79
  old_fact.fact_sources.each do |source|
51
80
  new_fact.add_source(
52
- content: source.content,
53
- type: source.source_type,
81
+ source: source.source,
82
+ kind: source.kind,
54
83
  excerpt: source.excerpt,
55
84
  confidence: source.confidence
56
85
  )
@@ -67,7 +96,21 @@ module FactDb
67
96
  end
68
97
  end
69
98
 
70
- # Synthesize a new fact from multiple source facts
99
+ # Synthesizes a new fact from multiple source facts
100
+ #
101
+ # Creates a single synthesized fact that aggregates information from multiple facts.
102
+ # Automatically aggregates entity mentions and links to all source content.
103
+ #
104
+ # @param source_fact_ids [Array<Integer>] IDs of the source facts
105
+ # @param synthesized_text [String] the synthesized summary text
106
+ # @param valid_at [Date, Time] when the synthesis is valid from
107
+ # @param invalid_at [Date, Time, nil] when the synthesis becomes invalid
108
+ # @param mentions [Array<Hash>] optional entity mentions (aggregated from sources if empty)
109
+ # @return [FactDb::Models::Fact] the synthesized fact
110
+ # @raise [ResolutionError] if no source facts are found
111
+ #
112
+ # @example Synthesize multiple facts
113
+ # resolver.synthesize([fact1.id, fact2.id], "Summary of events", valid_at: Date.today)
71
114
  def synthesize(source_fact_ids, synthesized_text, valid_at:, invalid_at: nil, mentions: [])
72
115
  source_facts = Models::Fact.where(id: source_fact_ids)
73
116
 
@@ -75,7 +118,7 @@ module FactDb
75
118
 
76
119
  Models::Fact.transaction do
77
120
  synthesized = Models::Fact.create!(
78
- fact_text: synthesized_text,
121
+ text: synthesized_text,
79
122
  valid_at: valid_at,
80
123
  invalid_at: invalid_at,
81
124
  status: "synthesized",
@@ -117,7 +160,15 @@ module FactDb
117
160
  end
118
161
  end
119
162
 
120
- # Mark a fact as corroborated by another fact
163
+ # Marks a fact as corroborated by another fact
164
+ #
165
+ # Adds the corroborating fact ID to the corroborated_by_ids array.
166
+ # If 2+ facts corroborate, status changes to "corroborated".
167
+ #
168
+ # @param fact_id [Integer] ID of the fact being corroborated
169
+ # @param corroborating_fact_id [Integer] ID of the supporting fact
170
+ # @return [FactDb::Models::Fact] the updated fact
171
+ # @raise [ResolutionError] if attempting to corroborate with the same fact
121
172
  def corroborate(fact_id, corroborating_fact_id)
122
173
  fact = Models::Fact.find(fact_id)
123
174
  _corroborating = Models::Fact.find(corroborating_fact_id)
@@ -134,14 +185,24 @@ module FactDb
134
185
  fact
135
186
  end
136
187
 
137
- # Invalidate a fact without replacement
188
+ # Invalidates a fact without replacement
189
+ #
190
+ # @param fact_id [Integer] ID of the fact to invalidate
191
+ # @param at [Time] when the fact became invalid (defaults to now)
192
+ # @return [FactDb::Models::Fact] the invalidated fact
138
193
  def invalidate(fact_id, at: Time.current)
139
194
  fact = Models::Fact.find(fact_id)
140
195
  fact.update!(invalid_at: at)
141
196
  fact
142
197
  end
143
198
 
144
- # Find potentially conflicting facts
199
+ # Finds potentially conflicting facts
200
+ #
201
+ # Identifies facts with similar text (50-95% similarity) that might be contradictory.
202
+ #
203
+ # @param entity_id [Integer, nil] entity ID to filter by
204
+ # @param topic [String, nil] topic to search for
205
+ # @return [Array<Hash>] array of hashes with :fact1, :fact2, :similarity keys
145
206
  def find_conflicts(entity_id: nil, topic: nil)
146
207
  scope = Models::Fact.canonical.currently_valid
147
208
 
@@ -159,7 +220,7 @@ module FactDb
159
220
 
160
221
  facts.each_with_index do |fact, i|
161
222
  facts[(i + 1)..].each do |other|
162
- similarity = text_similarity(fact.fact_text, other.fact_text)
223
+ similarity = text_similarity(fact.text, other.text)
163
224
  if similarity > 0.5 && similarity < 0.95
164
225
  conflicts << {
165
226
  fact1: fact,
@@ -173,7 +234,12 @@ module FactDb
173
234
  conflicts.sort_by { |c| -c[:similarity] }
174
235
  end
175
236
 
176
- # Resolve conflicts by keeping one fact and superseding others
237
+ # Resolves conflicts by keeping one fact and superseding others
238
+ #
239
+ # @param keep_fact_id [Integer] ID of the fact to keep as canonical
240
+ # @param supersede_fact_ids [Array<Integer>] IDs of facts to mark as superseded
241
+ # @param reason [String, nil] reason for the resolution (stored in metadata)
242
+ # @return [FactDb::Models::Fact] the kept fact
177
243
  def resolve_conflict(keep_fact_id, supersede_fact_ids, reason: nil)
178
244
  Models::Fact.transaction do
179
245
  supersede_fact_ids.each do |fact_id|
@@ -190,7 +256,13 @@ module FactDb
190
256
  Models::Fact.find(keep_fact_id)
191
257
  end
192
258
 
193
- # Build a timeline fact from point-in-time facts
259
+ # Builds a timeline fact from point-in-time facts for an entity
260
+ #
261
+ # Creates a synthesized fact summarizing the entity's history on a topic.
262
+ #
263
+ # @param entity_id [Integer] the entity ID
264
+ # @param topic [String, nil] optional topic filter
265
+ # @return [FactDb::Models::Fact, nil] synthesized timeline fact or nil if no facts found
194
266
  def build_timeline_fact(entity_id:, topic: nil)
195
267
  facts = Models::Fact.mentioning_entity(entity_id)
196
268
  facts = facts.search_text(topic) if topic
@@ -203,7 +275,7 @@ module FactDb
203
275
  end_date = facts.select { |f| f.invalid_at }.map(&:invalid_at).max
204
276
 
205
277
  entity = Models::Entity.find(entity_id)
206
- synthesized_text = "#{entity.canonical_name}: #{topic || 'timeline'} from #{start_date.to_date}"
278
+ synthesized_text = "#{entity.name}: #{topic || 'timeline'} from #{start_date.to_date}"
207
279
  synthesized_text += " to #{end_date.to_date}" if end_date
208
280
 
209
281
  synthesize(