fact_db 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +2 -0
- data/.yardopts +5 -0
- data/CHANGELOG.md +64 -0
- data/README.md +107 -6
- data/Rakefile +243 -10
- data/db/migrate/001_enable_extensions.rb +1 -0
- data/db/migrate/002_create_sources.rb +49 -0
- data/db/migrate/003_create_entities.rb +27 -15
- data/db/migrate/004_create_entity_aliases.rb +20 -7
- data/db/migrate/005_create_facts.rb +37 -21
- data/db/migrate/006_create_entity_mentions.rb +14 -6
- data/db/migrate/007_create_fact_sources.rb +16 -8
- data/docs/api/extractors/index.md +5 -5
- data/docs/api/extractors/llm.md +17 -17
- data/docs/api/extractors/rule-based.md +14 -14
- data/docs/api/facts.md +20 -20
- data/docs/api/index.md +4 -4
- data/docs/api/models/entity.md +21 -21
- data/docs/api/models/fact.md +15 -15
- data/docs/api/models/index.md +7 -7
- data/docs/api/models/{content.md → source.md} +29 -29
- data/docs/api/pipeline/extraction.md +25 -25
- data/docs/api/pipeline/index.md +1 -1
- data/docs/api/pipeline/resolution.md +4 -4
- data/docs/api/services/entity-service.md +20 -20
- data/docs/api/services/fact-service.md +12 -12
- data/docs/api/services/index.md +5 -5
- data/docs/api/services/{content-service.md → source-service.md} +27 -27
- data/docs/architecture/database-schema.md +46 -46
- data/docs/architecture/entity-resolution.md +6 -6
- data/docs/architecture/index.md +10 -10
- data/docs/architecture/temporal-facts.md +5 -5
- data/docs/architecture/three-layer-model.md +17 -17
- data/docs/concepts.md +6 -6
- data/docs/examples/basic-usage.md +20 -20
- data/docs/examples/hr-onboarding.md +17 -17
- data/docs/examples/index.md +4 -4
- data/docs/examples/news-analysis.md +23 -23
- data/docs/getting-started/database-setup.md +28 -20
- data/docs/getting-started/index.md +3 -3
- data/docs/getting-started/quick-start.md +33 -30
- data/docs/guides/batch-processing.md +26 -26
- data/docs/guides/configuration.md +158 -77
- data/docs/guides/entity-management.md +14 -14
- data/docs/guides/extracting-facts.md +28 -28
- data/docs/guides/ingesting-content.md +14 -14
- data/docs/guides/llm-integration.md +40 -32
- data/docs/guides/temporal-queries.md +11 -11
- data/docs/index.md +6 -2
- data/examples/.envrc +4 -0
- data/examples/.gitignore +1 -0
- data/examples/001_configuration.rb +312 -0
- data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
- data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
- data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
- data/examples/040_output_formats.rb +177 -0
- data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
- data/examples/060_fluent_temporal_api.rb +217 -0
- data/examples/070_introspection.rb +252 -0
- data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
- data/examples/090_ingest_demo.rb +515 -0
- data/examples/100_query_context.rb +668 -0
- data/examples/110_prove_it.rb +204 -0
- data/examples/120_dump_database.rb +358 -0
- data/examples/130_rag_feedback_loop.rb +858 -0
- data/examples/README.md +229 -15
- data/examples/data/lincoln_associates.md +201 -0
- data/examples/data/lincoln_biography.md +66 -0
- data/examples/data/lincoln_cabinet.md +243 -0
- data/examples/data/lincoln_family.md +163 -0
- data/examples/data/lincoln_military.md +241 -0
- data/examples/data/lincoln_todd_family.md +136 -0
- data/examples/ingest_reporter.rb +335 -0
- data/examples/utilities.rb +182 -0
- data/lib/fact_db/config/defaults.yml +254 -0
- data/lib/fact_db/config.rb +94 -35
- data/lib/fact_db/database.rb +98 -8
- data/lib/fact_db/extractors/base.rb +106 -21
- data/lib/fact_db/extractors/llm_extractor.rb +35 -63
- data/lib/fact_db/extractors/manual_extractor.rb +46 -6
- data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
- data/lib/fact_db/llm/adapter.rb +3 -3
- data/lib/fact_db/models/entity.rb +94 -22
- data/lib/fact_db/models/entity_alias.rb +41 -7
- data/lib/fact_db/models/entity_mention.rb +34 -1
- data/lib/fact_db/models/fact.rb +259 -28
- data/lib/fact_db/models/fact_source.rb +43 -9
- data/lib/fact_db/models/source.rb +113 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
- data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
- data/lib/fact_db/query_result.rb +202 -0
- data/lib/fact_db/resolution/entity_resolver.rb +139 -39
- data/lib/fact_db/resolution/fact_resolver.rb +86 -14
- data/lib/fact_db/services/entity_service.rb +246 -37
- data/lib/fact_db/services/fact_service.rb +254 -17
- data/lib/fact_db/services/source_service.rb +164 -0
- data/lib/fact_db/temporal/query.rb +71 -7
- data/lib/fact_db/temporal/query_builder.rb +69 -0
- data/lib/fact_db/temporal/timeline.rb +102 -11
- data/lib/fact_db/transformers/base.rb +77 -0
- data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
- data/lib/fact_db/transformers/json_transformer.rb +17 -0
- data/lib/fact_db/transformers/raw_transformer.rb +35 -0
- data/lib/fact_db/transformers/text_transformer.rb +114 -0
- data/lib/fact_db/transformers/triple_transformer.rb +138 -0
- data/lib/fact_db/validation/alias_filter.rb +185 -0
- data/lib/fact_db/version.rb +1 -1
- data/lib/fact_db.rb +281 -30
- data/mkdocs.yml +2 -2
- metadata +60 -16
- data/db/migrate/002_create_contents.rb +0 -44
- data/lib/fact_db/models/content.rb +0 -62
- data/lib/fact_db/services/content_service.rb +0 -93
|
@@ -2,20 +2,44 @@
|
|
|
2
2
|
|
|
3
3
|
module FactDb
|
|
4
4
|
module Services
|
|
5
|
+
# Service class for managing entities in the database
|
|
6
|
+
#
|
|
7
|
+
# Provides methods for creating, searching, and managing entities including
|
|
8
|
+
# name resolution, alias management, and duplicate detection.
|
|
9
|
+
#
|
|
10
|
+
# @example Basic usage
|
|
11
|
+
# service = EntityService.new
|
|
12
|
+
# entity = service.create("John Smith", kind: :person)
|
|
13
|
+
#
|
|
5
14
|
class EntityService
|
|
6
|
-
|
|
15
|
+
# @return [FactDb::Config] the configuration object
|
|
16
|
+
attr_reader :config
|
|
7
17
|
|
|
18
|
+
# @return [FactDb::Resolution::EntityResolver] the entity resolver instance
|
|
19
|
+
attr_reader :resolver
|
|
20
|
+
|
|
21
|
+
# Initializes a new EntityService instance
|
|
22
|
+
#
|
|
23
|
+
# @param config [FactDb::Config] configuration object (defaults to FactDb.config)
|
|
8
24
|
def initialize(config = FactDb.config)
|
|
9
25
|
@config = config
|
|
10
26
|
@resolver = Resolution::EntityResolver.new(config)
|
|
11
27
|
end
|
|
12
28
|
|
|
13
|
-
|
|
29
|
+
# Creates a new entity in the database
|
|
30
|
+
#
|
|
31
|
+
# @param name [String] the canonical name
|
|
32
|
+
# @param kind [Symbol, String] entity kind (:person, :organization, etc.)
|
|
33
|
+
# @param aliases [Array<String>] alternative names
|
|
34
|
+
# @param attributes [Hash] additional metadata attributes
|
|
35
|
+
# @param description [String, nil] entity description
|
|
36
|
+
# @return [FactDb::Models::Entity] the created entity
|
|
37
|
+
def create(name, kind:, aliases: [], attributes: {}, description: nil)
|
|
14
38
|
embedding = generate_embedding(name)
|
|
15
39
|
|
|
16
40
|
entity = Models::Entity.create!(
|
|
17
|
-
|
|
18
|
-
|
|
41
|
+
name: name,
|
|
42
|
+
kind: kind.to_s,
|
|
19
43
|
description: description,
|
|
20
44
|
metadata: attributes,
|
|
21
45
|
resolution_status: "resolved",
|
|
@@ -29,81 +53,202 @@ module FactDb
|
|
|
29
53
|
entity
|
|
30
54
|
end
|
|
31
55
|
|
|
56
|
+
# Finds an entity by ID
|
|
57
|
+
#
|
|
58
|
+
# @param id [Integer] the entity ID
|
|
59
|
+
# @return [FactDb::Models::Entity] the found entity
|
|
60
|
+
# @raise [ActiveRecord::RecordNotFound] if entity not found
|
|
32
61
|
def find(id)
|
|
33
62
|
Models::Entity.find(id)
|
|
34
63
|
end
|
|
35
64
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
65
|
+
# Finds an entity by exact name match
|
|
66
|
+
#
|
|
67
|
+
# @param name [String] the entity name (case-insensitive)
|
|
68
|
+
# @param kind [Symbol, String, nil] optional kind filter
|
|
69
|
+
# @return [FactDb::Models::Entity, nil] the found entity or nil
|
|
70
|
+
def find_by_name(name, kind: nil)
|
|
71
|
+
scope = Models::Entity.where(["LOWER(name) = ?", name.downcase])
|
|
72
|
+
scope = scope.where(kind: kind) if kind
|
|
39
73
|
scope.not_merged.first
|
|
40
74
|
end
|
|
41
75
|
|
|
42
|
-
|
|
43
|
-
|
|
76
|
+
# Resolves a name to an existing entity
|
|
77
|
+
#
|
|
78
|
+
# Uses exact alias matching, canonical name matching, and fuzzy matching.
|
|
79
|
+
#
|
|
80
|
+
# @param name [String] the name to resolve
|
|
81
|
+
# @param kind [Symbol, nil] optional kind filter
|
|
82
|
+
# @return [FactDb::Resolution::ResolvedEntity, nil] resolved entity or nil
|
|
83
|
+
def resolve(name, kind: nil)
|
|
84
|
+
@resolver.resolve(name, kind: kind)
|
|
44
85
|
end
|
|
45
86
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
87
|
+
# Resolves a name to an entity, creating one if not found
|
|
88
|
+
#
|
|
89
|
+
# Also checks if any provided aliases match existing entities.
|
|
90
|
+
#
|
|
91
|
+
# @param name [String] the name to resolve or create
|
|
92
|
+
# @param kind [Symbol, String] entity kind (required for creation)
|
|
93
|
+
# @param aliases [Array<String>] additional aliases
|
|
94
|
+
# @param attributes [Hash] additional attributes for new entity
|
|
95
|
+
# @param description [String, nil] entity description
|
|
96
|
+
# @return [FactDb::Models::Entity] the resolved or created entity
|
|
97
|
+
def resolve_or_create(name, kind:, aliases: [], attributes: {}, description: nil)
|
|
98
|
+
# First, try to resolve the canonical name
|
|
99
|
+
resolved = @resolver.resolve(name, kind: kind)
|
|
100
|
+
if resolved
|
|
101
|
+
# Add any new aliases to the resolved entity
|
|
102
|
+
add_new_aliases(resolved.entity, aliases)
|
|
103
|
+
return resolved.entity
|
|
104
|
+
end
|
|
49
105
|
|
|
50
|
-
|
|
106
|
+
# Check if any of the provided aliases match an existing entity
|
|
107
|
+
# This handles cases like: name="Lord", aliases=["Jesus"] where "Jesus" already exists
|
|
108
|
+
aliases.each do |alias_text|
|
|
109
|
+
next if alias_text.to_s.strip.empty?
|
|
110
|
+
|
|
111
|
+
resolved_by_alias = @resolver.resolve(alias_text.to_s.strip, kind: kind)
|
|
112
|
+
if resolved_by_alias
|
|
113
|
+
entity = resolved_by_alias.entity
|
|
114
|
+
# Add the new canonical name as an alias to the existing entity
|
|
115
|
+
entity.add_alias(name) unless entity.name.downcase == name.downcase
|
|
116
|
+
# Add all the other aliases too
|
|
117
|
+
add_new_aliases(entity, aliases)
|
|
118
|
+
return entity
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
create(name, kind: kind, aliases: aliases, attributes: attributes, description: description)
|
|
51
123
|
end
|
|
52
124
|
|
|
125
|
+
# Merges two entities, keeping one as canonical
|
|
126
|
+
#
|
|
127
|
+
# @param keep_id [Integer] ID of the entity to keep
|
|
128
|
+
# @param merge_id [Integer] ID of the entity to merge
|
|
129
|
+
# @return [FactDb::Models::Entity] the kept entity
|
|
53
130
|
def merge(keep_id, merge_id)
|
|
54
131
|
@resolver.merge(keep_id, merge_id)
|
|
55
132
|
end
|
|
56
133
|
|
|
57
|
-
|
|
134
|
+
# Adds an alias to an entity
|
|
135
|
+
#
|
|
136
|
+
# @param entity_id [Integer] the entity ID
|
|
137
|
+
# @param alias_name [String] the alias text
|
|
138
|
+
# @param kind [String, nil] alias kind
|
|
139
|
+
# @param confidence [Float] confidence score
|
|
140
|
+
# @return [FactDb::Models::EntityAlias] the created alias
|
|
141
|
+
def add_alias(entity_id, alias_name, kind: nil, confidence: 1.0)
|
|
58
142
|
entity = Models::Entity.find(entity_id)
|
|
59
|
-
entity.add_alias(
|
|
143
|
+
entity.add_alias(alias_name, kind: kind, confidence: confidence)
|
|
60
144
|
end
|
|
61
145
|
|
|
62
|
-
|
|
146
|
+
# Searches entities by name or alias using LIKE pattern matching
|
|
147
|
+
#
|
|
148
|
+
# @param query [String] the search query
|
|
149
|
+
# @param kind [Symbol, String, nil] optional kind filter
|
|
150
|
+
# @param limit [Integer] maximum number of results
|
|
151
|
+
# @return [ActiveRecord::Relation] matching entities
|
|
152
|
+
def search(query, kind: nil, limit: 20)
|
|
63
153
|
scope = Models::Entity.not_merged
|
|
64
154
|
|
|
65
155
|
# Search canonical names and aliases
|
|
66
156
|
scope = scope.left_joins(:aliases).where(
|
|
67
|
-
"LOWER(fact_db_entities.
|
|
157
|
+
"LOWER(fact_db_entities.name) LIKE ? OR LOWER(fact_db_entity_aliases.name) LIKE ?",
|
|
68
158
|
"%#{query.downcase}%",
|
|
69
159
|
"%#{query.downcase}%"
|
|
70
160
|
).distinct
|
|
71
161
|
|
|
72
|
-
scope = scope.where(
|
|
162
|
+
scope = scope.where(kind: kind) if kind
|
|
73
163
|
scope.limit(limit)
|
|
74
164
|
end
|
|
75
165
|
|
|
76
|
-
|
|
166
|
+
# Searches entities using semantic similarity (vector search)
|
|
167
|
+
#
|
|
168
|
+
# Requires an embedding generator to be configured.
|
|
169
|
+
#
|
|
170
|
+
# @param query [String] the search query
|
|
171
|
+
# @param kind [Symbol, String, nil] optional kind filter
|
|
172
|
+
# @param limit [Integer] maximum number of results
|
|
173
|
+
# @return [ActiveRecord::Relation] semantically similar entities
|
|
174
|
+
def semantic_search(query, kind: nil, limit: 20)
|
|
77
175
|
embedding = generate_embedding(query)
|
|
78
176
|
return Models::Entity.none unless embedding
|
|
79
177
|
|
|
80
178
|
scope = Models::Entity.not_merged.nearest_neighbors(embedding, limit: limit)
|
|
81
|
-
scope = scope.where(
|
|
179
|
+
scope = scope.where(kind: kind) if kind
|
|
82
180
|
scope
|
|
83
181
|
end
|
|
84
182
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
183
|
+
# Searches entities using PostgreSQL trigram similarity (handles typos)
|
|
184
|
+
#
|
|
185
|
+
# Requires pg_trgm extension. Falls back to LIKE search if unavailable.
|
|
186
|
+
#
|
|
187
|
+
# @param query [String] search term (minimum 3 characters)
|
|
188
|
+
# @param kind [Symbol, String, nil] optional kind filter
|
|
189
|
+
# @param threshold [Float] minimum similarity score (0.0-1.0)
|
|
190
|
+
# @param limit [Integer] maximum number of results
|
|
191
|
+
# @return [Array<FactDb::Models::Entity>] entities ordered by similarity
|
|
192
|
+
def fuzzy_search(query, kind: nil, threshold: 0.3, limit: 20)
|
|
193
|
+
return [] if query.to_s.strip.length < 3
|
|
88
194
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
195
|
+
sql = <<~SQL
|
|
196
|
+
SELECT DISTINCT e.id,
|
|
197
|
+
GREATEST(
|
|
198
|
+
similarity(LOWER(e.name), LOWER(?)),
|
|
199
|
+
COALESCE(MAX(similarity(LOWER(a.name), LOWER(?))), 0)
|
|
200
|
+
) as sim_score
|
|
201
|
+
FROM fact_db_entities e
|
|
202
|
+
LEFT JOIN fact_db_entity_aliases a ON a.entity_id = e.id
|
|
203
|
+
WHERE e.resolution_status != 'merged'
|
|
204
|
+
AND (
|
|
205
|
+
similarity(LOWER(e.name), LOWER(?)) > ?
|
|
206
|
+
OR similarity(LOWER(a.name), LOWER(?)) > ?
|
|
207
|
+
)
|
|
208
|
+
GROUP BY e.id
|
|
209
|
+
ORDER BY sim_score DESC
|
|
210
|
+
LIMIT ?
|
|
211
|
+
SQL
|
|
94
212
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
213
|
+
sanitized = ActiveRecord::Base.sanitize_sql(
|
|
214
|
+
[sql, query, query, query, threshold, query, threshold, limit]
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
results = ActiveRecord::Base.connection.execute(sanitized)
|
|
218
|
+
entity_ids = results.map { |r| r["id"] }
|
|
219
|
+
|
|
220
|
+
return [] if entity_ids.empty?
|
|
221
|
+
|
|
222
|
+
# Preserve ordering by fetching in order
|
|
223
|
+
entities_by_id = Models::Entity.where(id: entity_ids).index_by(&:id)
|
|
224
|
+
ordered_entities = entity_ids.map { |id| entities_by_id[id] }.compact
|
|
225
|
+
|
|
226
|
+
# Apply kind filter if specified
|
|
227
|
+
if kind
|
|
228
|
+
ordered_entities = ordered_entities.select { |e| e.kind == kind.to_s }
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
ordered_entities
|
|
232
|
+
rescue ActiveRecord::StatementInvalid => e
|
|
233
|
+
# pg_trgm extension not available, fall back to LIKE search
|
|
234
|
+
config.logger&.warn("Fuzzy search unavailable (pg_trgm not installed): #{e.message}")
|
|
235
|
+
search(query, kind: kind, limit: limit).to_a
|
|
99
236
|
end
|
|
100
237
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
238
|
+
# Returns entities of a specific kind
|
|
239
|
+
#
|
|
240
|
+
# @param kind [Symbol, String] the entity kind
|
|
241
|
+
# @return [ActiveRecord::Relation] entities of that kind
|
|
242
|
+
def by_kind(kind)
|
|
243
|
+
Models::Entity.by_kind(kind).not_merged.order(:name)
|
|
105
244
|
end
|
|
106
245
|
|
|
246
|
+
# Returns facts about an entity
|
|
247
|
+
#
|
|
248
|
+
# @param entity_id [Integer] the entity ID
|
|
249
|
+
# @param at [Date, Time, nil] optional point in time
|
|
250
|
+
# @param status [Symbol] fact status filter
|
|
251
|
+
# @return [ActiveRecord::Relation] facts mentioning the entity
|
|
107
252
|
def facts_about(entity_id, at: nil, status: :canonical)
|
|
108
253
|
Temporal::Query.new.execute(
|
|
109
254
|
entity_id: entity_id,
|
|
@@ -112,31 +257,95 @@ module FactDb
|
|
|
112
257
|
)
|
|
113
258
|
end
|
|
114
259
|
|
|
260
|
+
# Builds a timeline of facts for an entity
|
|
261
|
+
#
|
|
262
|
+
# @param entity_id [Integer] the entity ID
|
|
263
|
+
# @param from [Date, Time, nil] start of timeline range
|
|
264
|
+
# @param to [Date, Time, nil] end of timeline range
|
|
265
|
+
# @return [FactDb::Temporal::Timeline] timeline of facts
|
|
115
266
|
def timeline_for(entity_id, from: nil, to: nil)
|
|
116
267
|
Temporal::Timeline.new.build(entity_id: entity_id, from: from, to: to)
|
|
117
268
|
end
|
|
118
269
|
|
|
270
|
+
# Finds potential duplicate entities
|
|
271
|
+
#
|
|
272
|
+
# @param threshold [Float, nil] minimum similarity score
|
|
273
|
+
# @return [Array<Hash>] array of potential duplicates
|
|
119
274
|
def find_duplicates(threshold: nil)
|
|
120
275
|
@resolver.find_duplicates(threshold: threshold)
|
|
121
276
|
end
|
|
122
277
|
|
|
278
|
+
# Automatically merges high-confidence duplicates
|
|
279
|
+
#
|
|
280
|
+
# @return [void]
|
|
123
281
|
def auto_merge_duplicates!
|
|
124
282
|
@resolver.auto_merge_duplicates!
|
|
125
283
|
end
|
|
126
284
|
|
|
285
|
+
# Returns aggregate statistics about entities
|
|
286
|
+
#
|
|
287
|
+
# @return [Hash] statistics including counts by kind and status
|
|
127
288
|
def stats
|
|
128
289
|
{
|
|
129
290
|
total: Models::Entity.not_merged.count,
|
|
130
291
|
total_count: Models::Entity.not_merged.count,
|
|
131
|
-
|
|
292
|
+
by_kind: Models::Entity.not_merged.group(:kind).count,
|
|
132
293
|
by_status: Models::Entity.group(:resolution_status).count,
|
|
133
294
|
merged_count: Models::Entity.where(resolution_status: "merged").count,
|
|
134
295
|
with_facts: Models::Entity.joins(:entity_mentions).distinct.count
|
|
135
296
|
}
|
|
136
297
|
end
|
|
137
298
|
|
|
299
|
+
# Returns all relationship types used in the database
|
|
300
|
+
#
|
|
301
|
+
# @return [Array<Symbol>] relationship types (mention roles)
|
|
302
|
+
def relationship_types
|
|
303
|
+
Models::EntityMention.distinct.pluck(:mention_role).compact.map(&:to_sym)
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# Returns relationship types for a specific entity
|
|
307
|
+
#
|
|
308
|
+
# @param entity_id [Integer] Entity ID
|
|
309
|
+
# @return [Array<Symbol>] Relationship types for this entity
|
|
310
|
+
def relationship_types_for(entity_id)
|
|
311
|
+
Models::EntityMention
|
|
312
|
+
.where(entity_id: entity_id)
|
|
313
|
+
.distinct
|
|
314
|
+
.pluck(:mention_role)
|
|
315
|
+
.compact
|
|
316
|
+
.map(&:to_sym)
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# Returns the timespan of facts for an entity
|
|
320
|
+
#
|
|
321
|
+
# @param entity_id [Integer] Entity ID
|
|
322
|
+
# @return [Hash] Hash with :from and :to dates
|
|
323
|
+
def timespan_for(entity_id)
|
|
324
|
+
facts = Models::Fact
|
|
325
|
+
.joins(:entity_mentions)
|
|
326
|
+
.where(entity_mentions: { entity_id: entity_id })
|
|
327
|
+
|
|
328
|
+
{
|
|
329
|
+
from: facts.minimum(:valid_at),
|
|
330
|
+
to: facts.maximum(:valid_at) || Date.today
|
|
331
|
+
}
|
|
332
|
+
end
|
|
333
|
+
|
|
138
334
|
private
|
|
139
335
|
|
|
336
|
+
def add_new_aliases(entity, aliases)
|
|
337
|
+
return unless aliases&.any?
|
|
338
|
+
|
|
339
|
+
# Filter out pronouns and generic terms
|
|
340
|
+
valid_aliases = Validation::AliasFilter.filter(aliases, name: entity.name)
|
|
341
|
+
|
|
342
|
+
valid_aliases.each do |alias_text|
|
|
343
|
+
next if entity.all_aliases.map(&:downcase).include?(alias_text.downcase)
|
|
344
|
+
|
|
345
|
+
entity.add_alias(alias_text)
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
140
349
|
def generate_embedding(text)
|
|
141
350
|
return nil unless config.embedding_generator
|
|
142
351
|
|