fact_db 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.envrc +1 -0
- data/CHANGELOG.md +48 -0
- data/COMMITS.md +196 -0
- data/README.md +102 -0
- data/Rakefile +41 -0
- data/db/migrate/001_enable_extensions.rb +7 -0
- data/db/migrate/002_create_contents.rb +44 -0
- data/db/migrate/003_create_entities.rb +36 -0
- data/db/migrate/004_create_entity_aliases.rb +18 -0
- data/db/migrate/005_create_facts.rb +65 -0
- data/db/migrate/006_create_entity_mentions.rb +18 -0
- data/db/migrate/007_create_fact_sources.rb +18 -0
- data/docs/api/extractors/index.md +71 -0
- data/docs/api/extractors/llm.md +162 -0
- data/docs/api/extractors/manual.md +92 -0
- data/docs/api/extractors/rule-based.md +165 -0
- data/docs/api/facts.md +300 -0
- data/docs/api/index.md +66 -0
- data/docs/api/models/content.md +165 -0
- data/docs/api/models/entity.md +202 -0
- data/docs/api/models/fact.md +270 -0
- data/docs/api/models/index.md +77 -0
- data/docs/api/pipeline/extraction.md +175 -0
- data/docs/api/pipeline/index.md +72 -0
- data/docs/api/pipeline/resolution.md +209 -0
- data/docs/api/services/content-service.md +166 -0
- data/docs/api/services/entity-service.md +202 -0
- data/docs/api/services/fact-service.md +223 -0
- data/docs/api/services/index.md +55 -0
- data/docs/architecture/database-schema.md +293 -0
- data/docs/architecture/entity-resolution.md +293 -0
- data/docs/architecture/index.md +149 -0
- data/docs/architecture/temporal-facts.md +268 -0
- data/docs/architecture/three-layer-model.md +242 -0
- data/docs/assets/css/custom.css +137 -0
- data/docs/assets/fact_db.jpg +0 -0
- data/docs/assets/images/fact_db.jpg +0 -0
- data/docs/concepts.md +183 -0
- data/docs/examples/basic-usage.md +235 -0
- data/docs/examples/hr-onboarding.md +312 -0
- data/docs/examples/index.md +64 -0
- data/docs/examples/news-analysis.md +288 -0
- data/docs/getting-started/database-setup.md +170 -0
- data/docs/getting-started/index.md +71 -0
- data/docs/getting-started/installation.md +98 -0
- data/docs/getting-started/quick-start.md +191 -0
- data/docs/guides/batch-processing.md +325 -0
- data/docs/guides/configuration.md +243 -0
- data/docs/guides/entity-management.md +364 -0
- data/docs/guides/extracting-facts.md +299 -0
- data/docs/guides/index.md +22 -0
- data/docs/guides/ingesting-content.md +252 -0
- data/docs/guides/llm-integration.md +299 -0
- data/docs/guides/temporal-queries.md +315 -0
- data/docs/index.md +121 -0
- data/examples/README.md +130 -0
- data/examples/basic_usage.rb +164 -0
- data/examples/entity_management.rb +216 -0
- data/examples/hr_system.rb +428 -0
- data/examples/rule_based_extraction.rb +258 -0
- data/examples/temporal_queries.rb +245 -0
- data/lib/fact_db/config.rb +71 -0
- data/lib/fact_db/database.rb +45 -0
- data/lib/fact_db/errors.rb +10 -0
- data/lib/fact_db/extractors/base.rb +117 -0
- data/lib/fact_db/extractors/llm_extractor.rb +179 -0
- data/lib/fact_db/extractors/manual_extractor.rb +53 -0
- data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
- data/lib/fact_db/llm/adapter.rb +109 -0
- data/lib/fact_db/models/content.rb +62 -0
- data/lib/fact_db/models/entity.rb +84 -0
- data/lib/fact_db/models/entity_alias.rb +26 -0
- data/lib/fact_db/models/entity_mention.rb +33 -0
- data/lib/fact_db/models/fact.rb +192 -0
- data/lib/fact_db/models/fact_source.rb +35 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
- data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
- data/lib/fact_db/resolution/entity_resolver.rb +261 -0
- data/lib/fact_db/resolution/fact_resolver.rb +259 -0
- data/lib/fact_db/services/content_service.rb +93 -0
- data/lib/fact_db/services/entity_service.rb +150 -0
- data/lib/fact_db/services/fact_service.rb +193 -0
- data/lib/fact_db/temporal/query.rb +125 -0
- data/lib/fact_db/temporal/timeline.rb +134 -0
- data/lib/fact_db/version.rb +5 -0
- data/lib/fact_db.rb +141 -0
- data/mkdocs.yml +198 -0
- metadata +288 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Services
|
|
5
|
+
class EntityService
|
|
6
|
+
attr_reader :config, :resolver
|
|
7
|
+
|
|
8
|
+
def initialize(config = FactDb.config)
|
|
9
|
+
@config = config
|
|
10
|
+
@resolver = Resolution::EntityResolver.new(config)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def create(name, type:, aliases: [], attributes: {}, description: nil)
|
|
14
|
+
embedding = generate_embedding(name)
|
|
15
|
+
|
|
16
|
+
entity = Models::Entity.create!(
|
|
17
|
+
canonical_name: name,
|
|
18
|
+
entity_type: type.to_s,
|
|
19
|
+
description: description,
|
|
20
|
+
metadata: attributes,
|
|
21
|
+
resolution_status: "resolved",
|
|
22
|
+
embedding: embedding
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
aliases.each do |alias_text|
|
|
26
|
+
entity.add_alias(alias_text)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
entity
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def find(id)
|
|
33
|
+
Models::Entity.find(id)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def find_by_name(name, type: nil)
|
|
37
|
+
scope = Models::Entity.where(["LOWER(canonical_name) = ?", name.downcase])
|
|
38
|
+
scope = scope.where(entity_type: type) if type
|
|
39
|
+
scope.not_merged.first
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def resolve(name, type: nil)
|
|
43
|
+
@resolver.resolve(name, type: type)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def resolve_or_create(name, type:, aliases: [], attributes: {}, description: nil)
|
|
47
|
+
resolved = @resolver.resolve(name, type: type)
|
|
48
|
+
return resolved.entity if resolved
|
|
49
|
+
|
|
50
|
+
create(name, type: type, aliases: aliases, attributes: attributes, description: description)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def merge(keep_id, merge_id)
|
|
54
|
+
@resolver.merge(keep_id, merge_id)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def add_alias(entity_id, alias_text, alias_type: nil, confidence: 1.0)
|
|
58
|
+
entity = Models::Entity.find(entity_id)
|
|
59
|
+
entity.add_alias(alias_text, type: alias_type, confidence: confidence)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def search(query, type: nil, limit: 20)
|
|
63
|
+
scope = Models::Entity.not_merged
|
|
64
|
+
|
|
65
|
+
# Search canonical names and aliases
|
|
66
|
+
scope = scope.left_joins(:aliases).where(
|
|
67
|
+
"LOWER(fact_db_entities.canonical_name) LIKE ? OR LOWER(fact_db_entity_aliases.alias_text) LIKE ?",
|
|
68
|
+
"%#{query.downcase}%",
|
|
69
|
+
"%#{query.downcase}%"
|
|
70
|
+
).distinct
|
|
71
|
+
|
|
72
|
+
scope = scope.where(entity_type: type) if type
|
|
73
|
+
scope.limit(limit)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def semantic_search(query, type: nil, limit: 20)
|
|
77
|
+
embedding = generate_embedding(query)
|
|
78
|
+
return Models::Entity.none unless embedding
|
|
79
|
+
|
|
80
|
+
scope = Models::Entity.not_merged.nearest_neighbors(embedding, limit: limit)
|
|
81
|
+
scope = scope.where(entity_type: type) if type
|
|
82
|
+
scope
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def by_type(type)
|
|
86
|
+
Models::Entity.by_type(type).not_merged.order(:canonical_name)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def people(limit: nil)
|
|
90
|
+
scope = Models::Entity.people.not_merged.order(:canonical_name)
|
|
91
|
+
scope = scope.limit(limit) if limit
|
|
92
|
+
scope
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def organizations(limit: nil)
|
|
96
|
+
scope = Models::Entity.organizations.not_merged.order(:canonical_name)
|
|
97
|
+
scope = scope.limit(limit) if limit
|
|
98
|
+
scope
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def places(limit: nil)
|
|
102
|
+
scope = Models::Entity.places.not_merged.order(:canonical_name)
|
|
103
|
+
scope = scope.limit(limit) if limit
|
|
104
|
+
scope
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def facts_about(entity_id, at: nil, status: :canonical)
|
|
108
|
+
Temporal::Query.new.execute(
|
|
109
|
+
entity_id: entity_id,
|
|
110
|
+
at: at,
|
|
111
|
+
status: status
|
|
112
|
+
)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def timeline_for(entity_id, from: nil, to: nil)
|
|
116
|
+
Temporal::Timeline.new.build(entity_id: entity_id, from: from, to: to)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def find_duplicates(threshold: nil)
|
|
120
|
+
@resolver.find_duplicates(threshold: threshold)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def auto_merge_duplicates!
|
|
124
|
+
@resolver.auto_merge_duplicates!
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def stats
|
|
128
|
+
{
|
|
129
|
+
total: Models::Entity.not_merged.count,
|
|
130
|
+
total_count: Models::Entity.not_merged.count,
|
|
131
|
+
by_type: Models::Entity.not_merged.group(:entity_type).count,
|
|
132
|
+
by_status: Models::Entity.group(:resolution_status).count,
|
|
133
|
+
merged_count: Models::Entity.where(resolution_status: "merged").count,
|
|
134
|
+
with_facts: Models::Entity.joins(:entity_mentions).distinct.count
|
|
135
|
+
}
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
private
|
|
139
|
+
|
|
140
|
+
def generate_embedding(text)
|
|
141
|
+
return nil unless config.embedding_generator
|
|
142
|
+
|
|
143
|
+
config.embedding_generator.call(text)
|
|
144
|
+
rescue StandardError => e
|
|
145
|
+
config.logger&.warn("Failed to generate embedding: #{e.message}")
|
|
146
|
+
nil
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Services
|
|
5
|
+
class FactService
|
|
6
|
+
attr_reader :config, :resolver, :entity_service
|
|
7
|
+
|
|
8
|
+
def initialize(config = FactDb.config)
|
|
9
|
+
@config = config
|
|
10
|
+
@resolver = Resolution::FactResolver.new(config)
|
|
11
|
+
@entity_service = EntityService.new(config)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def create(text, valid_at:, invalid_at: nil, status: :canonical, source_content_id: nil, mentions: [], extraction_method: :manual, confidence: 1.0, metadata: {})
|
|
15
|
+
embedding = generate_embedding(text)
|
|
16
|
+
|
|
17
|
+
fact = Models::Fact.create!(
|
|
18
|
+
fact_text: text,
|
|
19
|
+
valid_at: valid_at,
|
|
20
|
+
invalid_at: invalid_at,
|
|
21
|
+
status: status.to_s,
|
|
22
|
+
extraction_method: extraction_method.to_s,
|
|
23
|
+
confidence: confidence,
|
|
24
|
+
metadata: metadata,
|
|
25
|
+
embedding: embedding
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Link to source content
|
|
29
|
+
if source_content_id
|
|
30
|
+
content = Models::Content.find(source_content_id)
|
|
31
|
+
fact.add_source(content: content, type: "primary")
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Add entity mentions
|
|
35
|
+
mentions.each do |mention|
|
|
36
|
+
entity = resolve_or_create_entity(mention)
|
|
37
|
+
fact.add_mention(
|
|
38
|
+
entity: entity,
|
|
39
|
+
text: mention[:text] || mention[:name],
|
|
40
|
+
role: mention[:role],
|
|
41
|
+
confidence: mention[:confidence] || 1.0
|
|
42
|
+
)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
fact
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def find(id)
|
|
49
|
+
Models::Fact.find(id)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def extract_from_content(content_id, extractor: config.default_extractor)
|
|
53
|
+
content = Models::Content.find(content_id)
|
|
54
|
+
extractor_instance = Extractors::Base.for(extractor, config)
|
|
55
|
+
|
|
56
|
+
extracted = extractor_instance.extract(
|
|
57
|
+
content.raw_text,
|
|
58
|
+
{ captured_at: content.captured_at }
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
extracted.map do |fact_data|
|
|
62
|
+
create(
|
|
63
|
+
fact_data[:text],
|
|
64
|
+
valid_at: fact_data[:valid_at],
|
|
65
|
+
invalid_at: fact_data[:invalid_at],
|
|
66
|
+
source_content_id: content_id,
|
|
67
|
+
mentions: fact_data[:mentions],
|
|
68
|
+
extraction_method: fact_data[:extraction_method] || extractor,
|
|
69
|
+
confidence: fact_data[:confidence] || 1.0,
|
|
70
|
+
metadata: fact_data[:metadata] || {}
|
|
71
|
+
)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def query(topic: nil, at: nil, entity: nil, status: :canonical, limit: nil)
|
|
76
|
+
Temporal::Query.new.execute(
|
|
77
|
+
topic: topic,
|
|
78
|
+
at: at,
|
|
79
|
+
entity_id: entity,
|
|
80
|
+
status: status,
|
|
81
|
+
limit: limit
|
|
82
|
+
)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def current_facts(entity: nil, topic: nil, limit: nil)
|
|
86
|
+
query(topic: topic, entity: entity, at: nil, status: :canonical, limit: limit)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def facts_at(date, entity: nil, topic: nil)
|
|
90
|
+
query(topic: topic, entity: entity, at: date, status: :canonical)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def timeline(entity_id:, from: nil, to: nil)
|
|
94
|
+
Temporal::Timeline.new.build(entity_id: entity_id, from: from, to: to)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def supersede(old_fact_id, new_fact_text, valid_at:, mentions: [])
|
|
98
|
+
@resolver.supersede(old_fact_id, new_fact_text, valid_at: valid_at, mentions: mentions)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def synthesize(source_fact_ids, synthesized_text, valid_at:, invalid_at: nil, mentions: [])
|
|
102
|
+
@resolver.synthesize(source_fact_ids, synthesized_text, valid_at: valid_at, invalid_at: invalid_at, mentions: mentions)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def invalidate(fact_id, at: Time.current)
|
|
106
|
+
@resolver.invalidate(fact_id, at: at)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def corroborate(fact_id, corroborating_fact_id)
|
|
110
|
+
@resolver.corroborate(fact_id, corroborating_fact_id)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def search(query, entity: nil, status: :canonical, limit: 20)
|
|
114
|
+
scope = Models::Fact.search_text(query)
|
|
115
|
+
scope = apply_filters(scope, entity: entity, status: status)
|
|
116
|
+
scope.order(valid_at: :desc).limit(limit)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def semantic_search(query, entity: nil, at: nil, limit: 20)
|
|
120
|
+
embedding = generate_embedding(query)
|
|
121
|
+
return Models::Fact.none unless embedding
|
|
122
|
+
|
|
123
|
+
scope = Models::Fact.canonical.nearest_neighbors(embedding, limit: limit * 2)
|
|
124
|
+
scope = scope.currently_valid if at.nil?
|
|
125
|
+
scope = scope.valid_at(at) if at
|
|
126
|
+
scope = scope.mentioning_entity(entity) if entity
|
|
127
|
+
scope.limit(limit)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def find_conflicts(entity_id: nil, topic: nil)
|
|
131
|
+
@resolver.find_conflicts(entity_id: entity_id, topic: topic)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def resolve_conflict(keep_fact_id, supersede_fact_ids, reason: nil)
|
|
135
|
+
@resolver.resolve_conflict(keep_fact_id, supersede_fact_ids, reason: reason)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def build_timeline_fact(entity_id:, topic: nil)
|
|
139
|
+
@resolver.build_timeline_fact(entity_id: entity_id, topic: topic)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def recent(limit: 10, status: :canonical)
|
|
143
|
+
scope = Models::Fact.where(status: status.to_s).order(created_at: :desc)
|
|
144
|
+
scope.limit(limit)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def by_extraction_method(method, limit: nil)
|
|
148
|
+
scope = Models::Fact.extracted_by(method.to_s).order(created_at: :desc)
|
|
149
|
+
scope = scope.limit(limit) if limit
|
|
150
|
+
scope
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def stats
|
|
154
|
+
{
|
|
155
|
+
total: Models::Fact.count,
|
|
156
|
+
total_count: Models::Fact.count,
|
|
157
|
+
canonical_count: Models::Fact.canonical.count,
|
|
158
|
+
currently_valid_count: Models::Fact.canonical.currently_valid.count,
|
|
159
|
+
by_status: Models::Fact.group(:status).count,
|
|
160
|
+
by_extraction_method: Models::Fact.group(:extraction_method).count,
|
|
161
|
+
average_confidence: Models::Fact.average(:confidence)&.to_f&.round(3)
|
|
162
|
+
}
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
private
|
|
166
|
+
|
|
167
|
+
def resolve_or_create_entity(mention)
|
|
168
|
+
# If entity_id is already provided, use that entity directly
|
|
169
|
+
return Models::Entity.find(mention[:entity_id]) if mention[:entity_id]
|
|
170
|
+
|
|
171
|
+
name = mention[:name] || mention[:text]
|
|
172
|
+
type = mention[:type]&.to_sym || :concept
|
|
173
|
+
|
|
174
|
+
@entity_service.resolve_or_create(name, type: type)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def apply_filters(scope, entity: nil, status: nil)
|
|
178
|
+
scope = scope.mentioning_entity(entity) if entity
|
|
179
|
+
scope = scope.where(status: status.to_s) if status && status != :all
|
|
180
|
+
scope
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def generate_embedding(text)
|
|
184
|
+
return nil unless config.embedding_generator
|
|
185
|
+
|
|
186
|
+
config.embedding_generator.call(text)
|
|
187
|
+
rescue StandardError => e
|
|
188
|
+
config.logger&.warn("Failed to generate embedding: #{e.message}")
|
|
189
|
+
nil
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Temporal
|
|
5
|
+
class Query
|
|
6
|
+
attr_reader :scope
|
|
7
|
+
|
|
8
|
+
def initialize(scope = Models::Fact.all)
|
|
9
|
+
@scope = scope
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def execute(topic: nil, at: nil, entity_id: nil, status: :canonical, limit: nil)
|
|
13
|
+
result = @scope
|
|
14
|
+
|
|
15
|
+
# Status filtering
|
|
16
|
+
result = apply_status_filter(result, status)
|
|
17
|
+
|
|
18
|
+
# Temporal filtering
|
|
19
|
+
result = apply_temporal_filter(result, at)
|
|
20
|
+
|
|
21
|
+
# Entity filtering
|
|
22
|
+
result = apply_entity_filter(result, entity_id)
|
|
23
|
+
|
|
24
|
+
# Topic search
|
|
25
|
+
result = apply_topic_search(result, topic)
|
|
26
|
+
|
|
27
|
+
# Ordering - most recently valid first
|
|
28
|
+
result = result.order(valid_at: :desc)
|
|
29
|
+
|
|
30
|
+
# Limit results
|
|
31
|
+
result = result.limit(limit) if limit
|
|
32
|
+
|
|
33
|
+
result
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Currently valid facts about an entity
|
|
37
|
+
def current_facts(entity_id:)
|
|
38
|
+
execute(entity_id: entity_id, at: nil, status: :canonical)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Facts valid at a specific point in time
|
|
42
|
+
def facts_at(date, entity_id: nil)
|
|
43
|
+
execute(at: date, entity_id: entity_id, status: :canonical)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Facts that became valid in a date range
|
|
47
|
+
def facts_created_between(from:, to:, entity_id: nil)
|
|
48
|
+
result = @scope.canonical.became_valid_between(from, to)
|
|
49
|
+
result = result.mentioning_entity(entity_id) if entity_id
|
|
50
|
+
result.order(valid_at: :asc)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Facts that became invalid in a date range
|
|
54
|
+
def facts_invalidated_between(from:, to:, entity_id: nil)
|
|
55
|
+
result = @scope.became_invalid_between(from, to)
|
|
56
|
+
result = result.mentioning_entity(entity_id) if entity_id
|
|
57
|
+
result.order(invalid_at: :asc)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Semantic search with temporal filtering
|
|
61
|
+
def semantic_search(query:, at: nil, entity_id: nil, limit: 20)
|
|
62
|
+
result = @scope.canonical.search_text(query)
|
|
63
|
+
result = apply_temporal_filter(result, at)
|
|
64
|
+
result = result.mentioning_entity(entity_id) if entity_id
|
|
65
|
+
result.limit(limit)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Find facts where entity has a specific role
|
|
69
|
+
def facts_with_entity_role(entity_id:, role:, at: nil)
|
|
70
|
+
result = @scope.canonical.with_role(entity_id, role)
|
|
71
|
+
result = apply_temporal_filter(result, at)
|
|
72
|
+
result.order(valid_at: :desc)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Compare facts at two points in time
|
|
76
|
+
def diff(entity_id:, from_date:, to_date:)
|
|
77
|
+
facts_at_from = facts_at(from_date, entity_id: entity_id).to_a
|
|
78
|
+
facts_at_to = facts_at(to_date, entity_id: entity_id).to_a
|
|
79
|
+
|
|
80
|
+
{
|
|
81
|
+
added: facts_at_to - facts_at_from,
|
|
82
|
+
removed: facts_at_from - facts_at_to,
|
|
83
|
+
unchanged: facts_at_from & facts_at_to
|
|
84
|
+
}
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private
|
|
88
|
+
|
|
89
|
+
def apply_status_filter(scope, status)
|
|
90
|
+
case status.to_sym
|
|
91
|
+
when :canonical
|
|
92
|
+
scope.canonical
|
|
93
|
+
when :superseded
|
|
94
|
+
scope.superseded
|
|
95
|
+
when :synthesized
|
|
96
|
+
scope.synthesized
|
|
97
|
+
when :all
|
|
98
|
+
scope
|
|
99
|
+
else
|
|
100
|
+
scope.where(status: status.to_s)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def apply_temporal_filter(scope, at)
|
|
105
|
+
if at.nil?
|
|
106
|
+
scope.currently_valid
|
|
107
|
+
else
|
|
108
|
+
scope.valid_at(at)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def apply_entity_filter(scope, entity_id)
|
|
113
|
+
return scope if entity_id.nil?
|
|
114
|
+
|
|
115
|
+
scope.mentioning_entity(entity_id)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def apply_topic_search(scope, topic)
|
|
119
|
+
return scope if topic.nil? || topic.empty?
|
|
120
|
+
|
|
121
|
+
scope.search_text(topic)
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Temporal
|
|
5
|
+
class Timeline
|
|
6
|
+
include Enumerable
|
|
7
|
+
|
|
8
|
+
attr_reader :events
|
|
9
|
+
|
|
10
|
+
def initialize
|
|
11
|
+
@events = []
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def each(&block)
|
|
15
|
+
to_hash.each(&block)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def build(entity_id:, from: nil, to: nil)
|
|
19
|
+
facts = fetch_facts(entity_id, from, to)
|
|
20
|
+
@events = facts.map { |fact| TimelineEvent.new(fact) }
|
|
21
|
+
self
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def to_a
|
|
25
|
+
@events.sort_by(&:valid_at)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def to_hash
|
|
29
|
+
to_a.map(&:to_hash)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Group events by year
|
|
33
|
+
def by_year
|
|
34
|
+
to_a.group_by { |event| event.valid_at.year }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Group events by month
|
|
38
|
+
def by_month
|
|
39
|
+
to_a.group_by { |event| event.valid_at.strftime("%Y-%m") }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Get events in a specific date range
|
|
43
|
+
def between(from, to)
|
|
44
|
+
to_a.select { |event| event.valid_at >= from && event.valid_at <= to }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Get currently active events
|
|
48
|
+
def active
|
|
49
|
+
to_a.select(&:currently_valid?)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Get historical (no longer valid) events
|
|
53
|
+
def historical
|
|
54
|
+
to_a.reject(&:currently_valid?)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Find overlapping events
|
|
58
|
+
def overlapping
|
|
59
|
+
result = []
|
|
60
|
+
sorted = to_a
|
|
61
|
+
|
|
62
|
+
sorted.each_with_index do |event, i|
|
|
63
|
+
sorted[(i + 1)..].each do |other|
|
|
64
|
+
result << [event, other] if events_overlap?(event, other)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
result
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Get the state at a specific point in time
|
|
72
|
+
def state_at(date)
|
|
73
|
+
to_a.select { |event| event.valid_at?(date) }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Generate a summary of changes
|
|
77
|
+
def changes_summary
|
|
78
|
+
sorted = to_a
|
|
79
|
+
|
|
80
|
+
sorted.each_cons(2).map do |prev_event, next_event|
|
|
81
|
+
{
|
|
82
|
+
from: prev_event,
|
|
83
|
+
to: next_event,
|
|
84
|
+
gap_days: (next_event.valid_at.to_date - (prev_event.invalid_at || prev_event.valid_at).to_date).to_i
|
|
85
|
+
}
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
def fetch_facts(entity_id, from, to)
|
|
92
|
+
scope = Models::Fact.mentioning_entity(entity_id).order(valid_at: :asc)
|
|
93
|
+
scope = scope.where("valid_at >= ?", from) if from
|
|
94
|
+
scope = scope.where("valid_at <= ?", to) if to
|
|
95
|
+
scope
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def events_overlap?(event1, event2)
|
|
99
|
+
return false if event1.invalid_at && event1.invalid_at <= event2.valid_at
|
|
100
|
+
return false if event2.invalid_at && event2.invalid_at <= event1.valid_at
|
|
101
|
+
|
|
102
|
+
true
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
class TimelineEvent
|
|
107
|
+
attr_reader :fact
|
|
108
|
+
|
|
109
|
+
delegate :id, :fact_text, :valid_at, :invalid_at, :status,
|
|
110
|
+
:currently_valid?, :valid_at?, :duration, :duration_days,
|
|
111
|
+
:entities, :source_contents, to: :fact
|
|
112
|
+
|
|
113
|
+
def initialize(fact)
|
|
114
|
+
@fact = fact
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def to_hash
|
|
118
|
+
{
|
|
119
|
+
id: id,
|
|
120
|
+
fact_text: fact_text,
|
|
121
|
+
valid_at: valid_at,
|
|
122
|
+
invalid_at: invalid_at,
|
|
123
|
+
status: status,
|
|
124
|
+
duration_days: duration_days,
|
|
125
|
+
entities: entities.map(&:canonical_name)
|
|
126
|
+
}
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def <=>(other)
|
|
130
|
+
valid_at <=> other.valid_at
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|