fact_db 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +2 -0
- data/.yardopts +5 -0
- data/CHANGELOG.md +64 -0
- data/README.md +107 -6
- data/Rakefile +243 -10
- data/db/migrate/001_enable_extensions.rb +1 -0
- data/db/migrate/002_create_sources.rb +49 -0
- data/db/migrate/003_create_entities.rb +27 -15
- data/db/migrate/004_create_entity_aliases.rb +20 -7
- data/db/migrate/005_create_facts.rb +37 -21
- data/db/migrate/006_create_entity_mentions.rb +14 -6
- data/db/migrate/007_create_fact_sources.rb +16 -8
- data/docs/api/extractors/index.md +5 -5
- data/docs/api/extractors/llm.md +17 -17
- data/docs/api/extractors/rule-based.md +14 -14
- data/docs/api/facts.md +20 -20
- data/docs/api/index.md +4 -4
- data/docs/api/models/entity.md +21 -21
- data/docs/api/models/fact.md +15 -15
- data/docs/api/models/index.md +7 -7
- data/docs/api/models/{content.md → source.md} +29 -29
- data/docs/api/pipeline/extraction.md +25 -25
- data/docs/api/pipeline/index.md +1 -1
- data/docs/api/pipeline/resolution.md +4 -4
- data/docs/api/services/entity-service.md +20 -20
- data/docs/api/services/fact-service.md +12 -12
- data/docs/api/services/index.md +5 -5
- data/docs/api/services/{content-service.md → source-service.md} +27 -27
- data/docs/architecture/database-schema.md +46 -46
- data/docs/architecture/entity-resolution.md +6 -6
- data/docs/architecture/index.md +10 -10
- data/docs/architecture/temporal-facts.md +5 -5
- data/docs/architecture/three-layer-model.md +17 -17
- data/docs/concepts.md +6 -6
- data/docs/examples/basic-usage.md +20 -20
- data/docs/examples/hr-onboarding.md +17 -17
- data/docs/examples/index.md +4 -4
- data/docs/examples/news-analysis.md +23 -23
- data/docs/getting-started/database-setup.md +28 -20
- data/docs/getting-started/index.md +3 -3
- data/docs/getting-started/quick-start.md +33 -30
- data/docs/guides/batch-processing.md +26 -26
- data/docs/guides/configuration.md +158 -77
- data/docs/guides/entity-management.md +14 -14
- data/docs/guides/extracting-facts.md +28 -28
- data/docs/guides/ingesting-content.md +14 -14
- data/docs/guides/llm-integration.md +40 -32
- data/docs/guides/temporal-queries.md +11 -11
- data/docs/index.md +6 -2
- data/examples/.envrc +4 -0
- data/examples/.gitignore +1 -0
- data/examples/001_configuration.rb +312 -0
- data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
- data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
- data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
- data/examples/040_output_formats.rb +177 -0
- data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
- data/examples/060_fluent_temporal_api.rb +217 -0
- data/examples/070_introspection.rb +252 -0
- data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
- data/examples/090_ingest_demo.rb +515 -0
- data/examples/100_query_context.rb +668 -0
- data/examples/110_prove_it.rb +204 -0
- data/examples/120_dump_database.rb +358 -0
- data/examples/130_rag_feedback_loop.rb +858 -0
- data/examples/README.md +229 -15
- data/examples/data/lincoln_associates.md +201 -0
- data/examples/data/lincoln_biography.md +66 -0
- data/examples/data/lincoln_cabinet.md +243 -0
- data/examples/data/lincoln_family.md +163 -0
- data/examples/data/lincoln_military.md +241 -0
- data/examples/data/lincoln_todd_family.md +136 -0
- data/examples/ingest_reporter.rb +335 -0
- data/examples/utilities.rb +182 -0
- data/lib/fact_db/config/defaults.yml +254 -0
- data/lib/fact_db/config.rb +94 -35
- data/lib/fact_db/database.rb +98 -8
- data/lib/fact_db/extractors/base.rb +106 -21
- data/lib/fact_db/extractors/llm_extractor.rb +35 -63
- data/lib/fact_db/extractors/manual_extractor.rb +46 -6
- data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
- data/lib/fact_db/llm/adapter.rb +3 -3
- data/lib/fact_db/models/entity.rb +94 -22
- data/lib/fact_db/models/entity_alias.rb +41 -7
- data/lib/fact_db/models/entity_mention.rb +34 -1
- data/lib/fact_db/models/fact.rb +259 -28
- data/lib/fact_db/models/fact_source.rb +43 -9
- data/lib/fact_db/models/source.rb +113 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
- data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
- data/lib/fact_db/query_result.rb +202 -0
- data/lib/fact_db/resolution/entity_resolver.rb +139 -39
- data/lib/fact_db/resolution/fact_resolver.rb +86 -14
- data/lib/fact_db/services/entity_service.rb +246 -37
- data/lib/fact_db/services/fact_service.rb +254 -17
- data/lib/fact_db/services/source_service.rb +164 -0
- data/lib/fact_db/temporal/query.rb +71 -7
- data/lib/fact_db/temporal/query_builder.rb +69 -0
- data/lib/fact_db/temporal/timeline.rb +102 -11
- data/lib/fact_db/transformers/base.rb +77 -0
- data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
- data/lib/fact_db/transformers/json_transformer.rb +17 -0
- data/lib/fact_db/transformers/raw_transformer.rb +35 -0
- data/lib/fact_db/transformers/text_transformer.rb +114 -0
- data/lib/fact_db/transformers/triple_transformer.rb +138 -0
- data/lib/fact_db/validation/alias_filter.rb +185 -0
- data/lib/fact_db/version.rb +1 -1
- data/lib/fact_db.rb +281 -30
- data/mkdocs.yml +2 -2
- metadata +60 -16
- data/db/migrate/002_create_contents.rb +0 -44
- data/lib/fact_db/models/content.rb +0 -62
- data/lib/fact_db/services/content_service.rb +0 -93
|
@@ -2,6 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
module FactDb
|
|
4
4
|
module Models
|
|
5
|
+
# Represents a named entity in the fact database
|
|
6
|
+
#
|
|
7
|
+
# Entities are real-world things like people, organizations, places, etc.
|
|
8
|
+
# that can be referenced in facts. Entities support aliases for name variations
|
|
9
|
+
# and can be merged to deduplicate records.
|
|
10
|
+
#
|
|
11
|
+
# @example Create an entity with aliases
|
|
12
|
+
# entity = Entity.create!(name: "John Smith", kind: "person", resolution_status: "resolved")
|
|
13
|
+
# entity.add_alias("J. Smith")
|
|
14
|
+
#
|
|
15
|
+
# @example Find entities by kind
|
|
16
|
+
# people = Entity.by_kind("person").not_merged
|
|
17
|
+
#
|
|
5
18
|
class Entity < ActiveRecord::Base
|
|
6
19
|
self.table_name = "fact_db_entities"
|
|
7
20
|
|
|
@@ -11,69 +24,128 @@ module FactDb
|
|
|
11
24
|
foreign_key: :entity_id, dependent: :destroy
|
|
12
25
|
has_many :facts, through: :entity_mentions
|
|
13
26
|
|
|
14
|
-
belongs_to :
|
|
15
|
-
foreign_key: :
|
|
27
|
+
belongs_to :canonical, class_name: "FactDb::Models::Entity",
|
|
28
|
+
foreign_key: :canonical_id, optional: true
|
|
16
29
|
has_many :merged_entities, class_name: "FactDb::Models::Entity",
|
|
17
|
-
foreign_key: :
|
|
30
|
+
foreign_key: :canonical_id
|
|
18
31
|
|
|
19
|
-
validates :
|
|
20
|
-
validates :
|
|
32
|
+
validates :name, presence: true
|
|
33
|
+
validates :kind, presence: true
|
|
21
34
|
validates :resolution_status, presence: true
|
|
22
35
|
|
|
23
|
-
#
|
|
24
|
-
TYPES = %w[person organization place product event concept].freeze
|
|
36
|
+
# @return [Array<String>] valid resolution statuses
|
|
25
37
|
STATUSES = %w[unresolved resolved merged split].freeze
|
|
26
38
|
|
|
27
|
-
|
|
39
|
+
# @return [Array<String>] valid entity kinds
|
|
40
|
+
ENTITY_KINDS = %w[person organization place product event concept other].freeze
|
|
41
|
+
|
|
28
42
|
validates :resolution_status, inclusion: { in: STATUSES }
|
|
43
|
+
validates :kind, inclusion: { in: ENTITY_KINDS }
|
|
44
|
+
|
|
45
|
+
# @!method by_kind(k)
|
|
46
|
+
# Returns entities of a specific kind
|
|
47
|
+
# @param k [String] the entity kind
|
|
48
|
+
# @return [ActiveRecord::Relation]
|
|
49
|
+
scope :by_kind, ->(k) { where(kind: k) }
|
|
29
50
|
|
|
30
|
-
|
|
51
|
+
# @!method resolved
|
|
52
|
+
# Returns entities with "resolved" status
|
|
53
|
+
# @return [ActiveRecord::Relation]
|
|
31
54
|
scope :resolved, -> { where(resolution_status: "resolved") }
|
|
55
|
+
|
|
56
|
+
# @!method unresolved
|
|
57
|
+
# Returns entities with "unresolved" status
|
|
58
|
+
# @return [ActiveRecord::Relation]
|
|
32
59
|
scope :unresolved, -> { where(resolution_status: "unresolved") }
|
|
60
|
+
|
|
61
|
+
# @!method not_merged
|
|
62
|
+
# Returns entities that have not been merged
|
|
63
|
+
# @return [ActiveRecord::Relation]
|
|
33
64
|
scope :not_merged, -> { where.not(resolution_status: "merged") }
|
|
34
|
-
scope :people, -> { by_type("person") }
|
|
35
|
-
scope :organizations, -> { by_type("organization") }
|
|
36
|
-
scope :places, -> { by_type("place") }
|
|
37
65
|
|
|
66
|
+
# Checks if the entity is resolved
|
|
67
|
+
#
|
|
68
|
+
# @return [Boolean] true if resolution_status is "resolved"
|
|
38
69
|
def resolved?
|
|
39
70
|
resolution_status == "resolved"
|
|
40
71
|
end
|
|
41
72
|
|
|
73
|
+
# Checks if the entity has been merged into another
|
|
74
|
+
#
|
|
75
|
+
# @return [Boolean] true if resolution_status is "merged"
|
|
42
76
|
def merged?
|
|
43
77
|
resolution_status == "merged"
|
|
44
78
|
end
|
|
45
79
|
|
|
80
|
+
# Returns the canonical entity (follows merge chain)
|
|
81
|
+
#
|
|
82
|
+
# If this entity has been merged, recursively follows the canonical_id
|
|
83
|
+
# chain to find the ultimate canonical entity.
|
|
84
|
+
#
|
|
85
|
+
# @return [Entity] the canonical entity or self if not merged
|
|
46
86
|
def canonical_entity
|
|
47
|
-
merged? ?
|
|
87
|
+
merged? ? canonical&.canonical_entity || canonical : self
|
|
48
88
|
end
|
|
49
89
|
|
|
90
|
+
# Returns all alias names as an array of strings
|
|
91
|
+
#
|
|
92
|
+
# @return [Array<String>] alias names
|
|
50
93
|
def all_aliases
|
|
51
|
-
aliases.pluck(:
|
|
94
|
+
aliases.pluck(:name)
|
|
52
95
|
end
|
|
53
96
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
97
|
+
# Adds an alias to this entity
|
|
98
|
+
#
|
|
99
|
+
# Validates the alias before creation using AliasFilter.
|
|
100
|
+
# Returns nil if validation fails.
|
|
101
|
+
#
|
|
102
|
+
# @param text [String] the alias text
|
|
103
|
+
# @param kind [String, nil] alias kind (name, nickname, email, handle, abbreviation, title)
|
|
104
|
+
# @param confidence [Float] confidence score (0.0 to 1.0)
|
|
105
|
+
# @return [EntityAlias, nil] the created alias or nil if validation failed
|
|
106
|
+
def add_alias(text, kind: nil, confidence: 1.0)
|
|
107
|
+
# Pre-validate before attempting to create
|
|
108
|
+
return nil unless Validation::AliasFilter.valid?(text, name: name)
|
|
109
|
+
|
|
110
|
+
aliases.find_or_create_by!(name: text) do |a|
|
|
111
|
+
a.kind = kind
|
|
57
112
|
a.confidence = confidence
|
|
58
113
|
end
|
|
114
|
+
rescue ActiveRecord::RecordInvalid
|
|
115
|
+
# Alias validation failed (pronoun, generic term, etc.)
|
|
116
|
+
nil
|
|
59
117
|
end
|
|
60
118
|
|
|
61
|
-
|
|
62
|
-
|
|
119
|
+
# Checks if the entity matches a query (by name or alias)
|
|
120
|
+
#
|
|
121
|
+
# @param query [String] the name to match (case-insensitive)
|
|
122
|
+
# @return [Boolean] true if name or any alias matches
|
|
123
|
+
def matches_name?(query)
|
|
124
|
+
return true if self.name.downcase == query.downcase
|
|
63
125
|
|
|
64
|
-
aliases.exists?(["LOWER(
|
|
126
|
+
aliases.exists?(["LOWER(name) = ?", query.downcase])
|
|
65
127
|
end
|
|
66
128
|
|
|
67
|
-
#
|
|
129
|
+
# Returns currently valid canonical facts mentioning this entity
|
|
130
|
+
#
|
|
131
|
+
# @return [ActiveRecord::Relation] currently valid facts
|
|
68
132
|
def current_facts
|
|
69
133
|
facts.currently_valid.canonical
|
|
70
134
|
end
|
|
71
135
|
|
|
136
|
+
# Returns facts valid at a specific date
|
|
137
|
+
#
|
|
138
|
+
# @param date [Date, Time] the point in time to query
|
|
139
|
+
# @return [ActiveRecord::Relation] facts valid at the given date
|
|
72
140
|
def facts_at(date)
|
|
73
141
|
facts.valid_at(date).canonical
|
|
74
142
|
end
|
|
75
143
|
|
|
76
|
-
#
|
|
144
|
+
# Finds entities by vector similarity using pgvector
|
|
145
|
+
#
|
|
146
|
+
# @param embedding [Array<Float>] the embedding vector to search with
|
|
147
|
+
# @param limit [Integer] maximum number of results
|
|
148
|
+
# @return [ActiveRecord::Relation] entities ordered by similarity
|
|
77
149
|
def self.nearest_neighbors(embedding, limit: 10)
|
|
78
150
|
return none unless embedding
|
|
79
151
|
|
|
@@ -2,24 +2,58 @@
|
|
|
2
2
|
|
|
3
3
|
module FactDb
|
|
4
4
|
module Models
|
|
5
|
+
# Represents an alternative name for an entity
|
|
6
|
+
#
|
|
7
|
+
# Aliases allow entities to be found by various name forms (nicknames,
|
|
8
|
+
# abbreviations, email handles, etc.). Validation prevents invalid aliases
|
|
9
|
+
# like pronouns or generic terms.
|
|
10
|
+
#
|
|
11
|
+
# @example Create an alias
|
|
12
|
+
# alias = EntityAlias.create!(entity: person, name: "Johnny", kind: "nickname")
|
|
13
|
+
#
|
|
5
14
|
class EntityAlias < ActiveRecord::Base
|
|
6
15
|
self.table_name = "fact_db_entity_aliases"
|
|
7
16
|
|
|
8
17
|
belongs_to :entity, class_name: "FactDb::Models::Entity"
|
|
9
18
|
|
|
10
|
-
validates :
|
|
11
|
-
validates :
|
|
19
|
+
validates :name, presence: true
|
|
20
|
+
validates :name, uniqueness: { scope: :entity_id }
|
|
21
|
+
validate :name_is_valid
|
|
12
22
|
|
|
13
|
-
#
|
|
14
|
-
|
|
23
|
+
# @return [Array<String>] valid alias kinds
|
|
24
|
+
KINDS = %w[name nickname email handle abbreviation title].freeze
|
|
15
25
|
|
|
16
|
-
validates :
|
|
26
|
+
validates :kind, inclusion: { in: KINDS }, allow_nil: true
|
|
17
27
|
|
|
18
|
-
|
|
28
|
+
# @!method by_kind(k)
|
|
29
|
+
# Returns aliases of a specific kind
|
|
30
|
+
# @param k [String] the alias kind
|
|
31
|
+
# @return [ActiveRecord::Relation]
|
|
32
|
+
scope :by_kind, ->(k) { where(kind: k) }
|
|
33
|
+
|
|
34
|
+
# @!method high_confidence
|
|
35
|
+
# Returns aliases with confidence >= 0.9
|
|
36
|
+
# @return [ActiveRecord::Relation]
|
|
19
37
|
scope :high_confidence, -> { where("confidence >= ?", 0.9) }
|
|
20
38
|
|
|
39
|
+
# Finds an entity by alias text (case-insensitive)
|
|
40
|
+
#
|
|
41
|
+
# @param text [String] the alias text to search for
|
|
42
|
+
# @return [Entity, nil] the entity with this alias or nil
|
|
21
43
|
def self.find_entity_by_alias(text)
|
|
22
|
-
find_by(["LOWER(
|
|
44
|
+
find_by(["LOWER(name) = ?", text.downcase])&.entity
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def name_is_valid
|
|
50
|
+
return if name.blank?
|
|
51
|
+
|
|
52
|
+
entity_name = entity&.name
|
|
53
|
+
unless Validation::AliasFilter.valid?(name, name: entity_name)
|
|
54
|
+
reason = Validation::AliasFilter.rejection_reason(name, name: entity_name)
|
|
55
|
+
errors.add(:name, "is not a valid alias: #{reason}")
|
|
56
|
+
end
|
|
23
57
|
end
|
|
24
58
|
end
|
|
25
59
|
end
|
|
@@ -2,6 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
module FactDb
|
|
4
4
|
module Models
|
|
5
|
+
# Join model linking entities to facts with role information
|
|
6
|
+
#
|
|
7
|
+
# Represents how an entity is mentioned in a specific fact, including
|
|
8
|
+
# the exact text used and the semantic role (subject, object, etc.).
|
|
9
|
+
#
|
|
10
|
+
# @example Create a mention
|
|
11
|
+
# mention = EntityMention.create!(
|
|
12
|
+
# fact: fact, entity: person,
|
|
13
|
+
# mention_text: "John", mention_role: "subject"
|
|
14
|
+
# )
|
|
15
|
+
#
|
|
5
16
|
class EntityMention < ActiveRecord::Base
|
|
6
17
|
self.table_name = "fact_db_entity_mentions"
|
|
7
18
|
|
|
@@ -11,20 +22,42 @@ module FactDb
|
|
|
11
22
|
validates :mention_text, presence: true
|
|
12
23
|
validates :fact_id, uniqueness: { scope: [:entity_id, :mention_text] }
|
|
13
24
|
|
|
14
|
-
#
|
|
25
|
+
# @return [Array<String>] valid mention roles
|
|
15
26
|
ROLES = %w[subject object location temporal instrument beneficiary].freeze
|
|
16
27
|
|
|
17
28
|
validates :mention_role, inclusion: { in: ROLES }, allow_nil: true
|
|
18
29
|
|
|
30
|
+
# @!method by_role(role)
|
|
31
|
+
# Returns mentions with a specific role
|
|
32
|
+
# @param role [String] the mention role
|
|
33
|
+
# @return [ActiveRecord::Relation]
|
|
19
34
|
scope :by_role, ->(role) { where(mention_role: role) }
|
|
35
|
+
|
|
36
|
+
# @!method subjects
|
|
37
|
+
# Returns mentions with subject role
|
|
38
|
+
# @return [ActiveRecord::Relation]
|
|
20
39
|
scope :subjects, -> { by_role("subject") }
|
|
40
|
+
|
|
41
|
+
# @!method objects
|
|
42
|
+
# Returns mentions with object role
|
|
43
|
+
# @return [ActiveRecord::Relation]
|
|
21
44
|
scope :objects, -> { by_role("object") }
|
|
45
|
+
|
|
46
|
+
# @!method high_confidence
|
|
47
|
+
# Returns mentions with confidence >= 0.9
|
|
48
|
+
# @return [ActiveRecord::Relation]
|
|
22
49
|
scope :high_confidence, -> { where("confidence >= ?", 0.9) }
|
|
23
50
|
|
|
51
|
+
# Checks if this mention has the subject role
|
|
52
|
+
#
|
|
53
|
+
# @return [Boolean] true if mention_role is "subject"
|
|
24
54
|
def subject?
|
|
25
55
|
mention_role == "subject"
|
|
26
56
|
end
|
|
27
57
|
|
|
58
|
+
# Checks if this mention has the object role
|
|
59
|
+
#
|
|
60
|
+
# @return [Boolean] true if mention_role is "object"
|
|
28
61
|
def object?
|
|
29
62
|
mention_role == "object"
|
|
30
63
|
end
|