fact_db 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +2 -0
  3. data/.yardopts +5 -0
  4. data/CHANGELOG.md +64 -0
  5. data/README.md +107 -6
  6. data/Rakefile +243 -10
  7. data/db/migrate/001_enable_extensions.rb +1 -0
  8. data/db/migrate/002_create_sources.rb +49 -0
  9. data/db/migrate/003_create_entities.rb +27 -15
  10. data/db/migrate/004_create_entity_aliases.rb +20 -7
  11. data/db/migrate/005_create_facts.rb +37 -21
  12. data/db/migrate/006_create_entity_mentions.rb +14 -6
  13. data/db/migrate/007_create_fact_sources.rb +16 -8
  14. data/docs/api/extractors/index.md +5 -5
  15. data/docs/api/extractors/llm.md +17 -17
  16. data/docs/api/extractors/rule-based.md +14 -14
  17. data/docs/api/facts.md +20 -20
  18. data/docs/api/index.md +4 -4
  19. data/docs/api/models/entity.md +21 -21
  20. data/docs/api/models/fact.md +15 -15
  21. data/docs/api/models/index.md +7 -7
  22. data/docs/api/models/{content.md → source.md} +29 -29
  23. data/docs/api/pipeline/extraction.md +25 -25
  24. data/docs/api/pipeline/index.md +1 -1
  25. data/docs/api/pipeline/resolution.md +4 -4
  26. data/docs/api/services/entity-service.md +20 -20
  27. data/docs/api/services/fact-service.md +12 -12
  28. data/docs/api/services/index.md +5 -5
  29. data/docs/api/services/{content-service.md → source-service.md} +27 -27
  30. data/docs/architecture/database-schema.md +46 -46
  31. data/docs/architecture/entity-resolution.md +6 -6
  32. data/docs/architecture/index.md +10 -10
  33. data/docs/architecture/temporal-facts.md +5 -5
  34. data/docs/architecture/three-layer-model.md +17 -17
  35. data/docs/concepts.md +6 -6
  36. data/docs/examples/basic-usage.md +20 -20
  37. data/docs/examples/hr-onboarding.md +17 -17
  38. data/docs/examples/index.md +4 -4
  39. data/docs/examples/news-analysis.md +23 -23
  40. data/docs/getting-started/database-setup.md +28 -20
  41. data/docs/getting-started/index.md +3 -3
  42. data/docs/getting-started/quick-start.md +33 -30
  43. data/docs/guides/batch-processing.md +26 -26
  44. data/docs/guides/configuration.md +158 -77
  45. data/docs/guides/entity-management.md +14 -14
  46. data/docs/guides/extracting-facts.md +28 -28
  47. data/docs/guides/ingesting-content.md +14 -14
  48. data/docs/guides/llm-integration.md +40 -32
  49. data/docs/guides/temporal-queries.md +11 -11
  50. data/docs/index.md +6 -2
  51. data/examples/.envrc +4 -0
  52. data/examples/.gitignore +1 -0
  53. data/examples/001_configuration.rb +312 -0
  54. data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
  55. data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
  56. data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
  57. data/examples/040_output_formats.rb +177 -0
  58. data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
  59. data/examples/060_fluent_temporal_api.rb +217 -0
  60. data/examples/070_introspection.rb +252 -0
  61. data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
  62. data/examples/090_ingest_demo.rb +515 -0
  63. data/examples/100_query_context.rb +668 -0
  64. data/examples/110_prove_it.rb +204 -0
  65. data/examples/120_dump_database.rb +358 -0
  66. data/examples/130_rag_feedback_loop.rb +858 -0
  67. data/examples/README.md +229 -15
  68. data/examples/data/lincoln_associates.md +201 -0
  69. data/examples/data/lincoln_biography.md +66 -0
  70. data/examples/data/lincoln_cabinet.md +243 -0
  71. data/examples/data/lincoln_family.md +163 -0
  72. data/examples/data/lincoln_military.md +241 -0
  73. data/examples/data/lincoln_todd_family.md +136 -0
  74. data/examples/ingest_reporter.rb +335 -0
  75. data/examples/utilities.rb +182 -0
  76. data/lib/fact_db/config/defaults.yml +254 -0
  77. data/lib/fact_db/config.rb +94 -35
  78. data/lib/fact_db/database.rb +98 -8
  79. data/lib/fact_db/extractors/base.rb +106 -21
  80. data/lib/fact_db/extractors/llm_extractor.rb +35 -63
  81. data/lib/fact_db/extractors/manual_extractor.rb +46 -6
  82. data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
  83. data/lib/fact_db/llm/adapter.rb +3 -3
  84. data/lib/fact_db/models/entity.rb +94 -22
  85. data/lib/fact_db/models/entity_alias.rb +41 -7
  86. data/lib/fact_db/models/entity_mention.rb +34 -1
  87. data/lib/fact_db/models/fact.rb +259 -28
  88. data/lib/fact_db/models/fact_source.rb +43 -9
  89. data/lib/fact_db/models/source.rb +113 -0
  90. data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
  91. data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
  92. data/lib/fact_db/query_result.rb +202 -0
  93. data/lib/fact_db/resolution/entity_resolver.rb +139 -39
  94. data/lib/fact_db/resolution/fact_resolver.rb +86 -14
  95. data/lib/fact_db/services/entity_service.rb +246 -37
  96. data/lib/fact_db/services/fact_service.rb +254 -17
  97. data/lib/fact_db/services/source_service.rb +164 -0
  98. data/lib/fact_db/temporal/query.rb +71 -7
  99. data/lib/fact_db/temporal/query_builder.rb +69 -0
  100. data/lib/fact_db/temporal/timeline.rb +102 -11
  101. data/lib/fact_db/transformers/base.rb +77 -0
  102. data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
  103. data/lib/fact_db/transformers/json_transformer.rb +17 -0
  104. data/lib/fact_db/transformers/raw_transformer.rb +35 -0
  105. data/lib/fact_db/transformers/text_transformer.rb +114 -0
  106. data/lib/fact_db/transformers/triple_transformer.rb +138 -0
  107. data/lib/fact_db/validation/alias_filter.rb +185 -0
  108. data/lib/fact_db/version.rb +1 -1
  109. data/lib/fact_db.rb +281 -30
  110. data/mkdocs.yml +2 -2
  111. metadata +60 -16
  112. data/db/migrate/002_create_contents.rb +0 -44
  113. data/lib/fact_db/models/content.rb +0 -62
  114. data/lib/fact_db/services/content_service.rb +0 -93
@@ -2,6 +2,19 @@
2
2
 
3
3
  module FactDb
4
4
  module Models
5
+ # Represents a named entity in the fact database
6
+ #
7
+ # Entities are real-world things like people, organizations, places, etc.
8
+ # that can be referenced in facts. Entities support aliases for name variations
9
+ # and can be merged to deduplicate records.
10
+ #
11
+ # @example Create an entity with aliases
12
+ # entity = Entity.create!(name: "John Smith", kind: "person", resolution_status: "resolved")
13
+ # entity.add_alias("J. Smith")
14
+ #
15
+ # @example Find entities by kind
16
+ # people = Entity.by_kind("person").not_merged
17
+ #
5
18
  class Entity < ActiveRecord::Base
6
19
  self.table_name = "fact_db_entities"
7
20
 
@@ -11,69 +24,128 @@ module FactDb
11
24
  foreign_key: :entity_id, dependent: :destroy
12
25
  has_many :facts, through: :entity_mentions
13
26
 
14
- belongs_to :merged_into, class_name: "FactDb::Models::Entity",
15
- foreign_key: :merged_into_id, optional: true
27
+ belongs_to :canonical, class_name: "FactDb::Models::Entity",
28
+ foreign_key: :canonical_id, optional: true
16
29
  has_many :merged_entities, class_name: "FactDb::Models::Entity",
17
- foreign_key: :merged_into_id
30
+ foreign_key: :canonical_id
18
31
 
19
- validates :canonical_name, presence: true
20
- validates :entity_type, presence: true
32
+ validates :name, presence: true
33
+ validates :kind, presence: true
21
34
  validates :resolution_status, presence: true
22
35
 
23
- # Entity types
24
- TYPES = %w[person organization place product event concept].freeze
36
+ # @return [Array<String>] valid resolution statuses
25
37
  STATUSES = %w[unresolved resolved merged split].freeze
26
38
 
27
- validates :entity_type, inclusion: { in: TYPES }
39
+ # @return [Array<String>] valid entity kinds
40
+ ENTITY_KINDS = %w[person organization place product event concept other].freeze
41
+
28
42
  validates :resolution_status, inclusion: { in: STATUSES }
43
+ validates :kind, inclusion: { in: ENTITY_KINDS }
44
+
45
+ # @!method by_kind(k)
46
+ # Returns entities of a specific kind
47
+ # @param k [String] the entity kind
48
+ # @return [ActiveRecord::Relation]
49
+ scope :by_kind, ->(k) { where(kind: k) }
29
50
 
30
- scope :by_type, ->(type) { where(entity_type: type) }
51
+ # @!method resolved
52
+ # Returns entities with "resolved" status
53
+ # @return [ActiveRecord::Relation]
31
54
  scope :resolved, -> { where(resolution_status: "resolved") }
55
+
56
+ # @!method unresolved
57
+ # Returns entities with "unresolved" status
58
+ # @return [ActiveRecord::Relation]
32
59
  scope :unresolved, -> { where(resolution_status: "unresolved") }
60
+
61
+ # @!method not_merged
62
+ # Returns entities that have not been merged
63
+ # @return [ActiveRecord::Relation]
33
64
  scope :not_merged, -> { where.not(resolution_status: "merged") }
34
- scope :people, -> { by_type("person") }
35
- scope :organizations, -> { by_type("organization") }
36
- scope :places, -> { by_type("place") }
37
65
 
66
+ # Checks if the entity is resolved
67
+ #
68
+ # @return [Boolean] true if resolution_status is "resolved"
38
69
  def resolved?
39
70
  resolution_status == "resolved"
40
71
  end
41
72
 
73
+ # Checks if the entity has been merged into another
74
+ #
75
+ # @return [Boolean] true if resolution_status is "merged"
42
76
  def merged?
43
77
  resolution_status == "merged"
44
78
  end
45
79
 
80
+ # Returns the canonical entity (follows merge chain)
81
+ #
82
+ # If this entity has been merged, recursively follows the canonical_id
83
+ # chain to find the ultimate canonical entity.
84
+ #
85
+ # @return [Entity] the canonical entity or self if not merged
46
86
  def canonical_entity
47
- merged? ? merged_into&.canonical_entity || merged_into : self
87
+ merged? ? canonical&.canonical_entity || canonical : self
48
88
  end
49
89
 
90
+ # Returns all alias names as an array of strings
91
+ #
92
+ # @return [Array<String>] alias names
50
93
  def all_aliases
51
- aliases.pluck(:alias_text)
94
+ aliases.pluck(:name)
52
95
  end
53
96
 
54
- def add_alias(text, type: nil, confidence: 1.0)
55
- aliases.find_or_create_by!(alias_text: text) do |a|
56
- a.alias_type = type
97
+ # Adds an alias to this entity
98
+ #
99
+ # Validates the alias before creation using AliasFilter.
100
+ # Returns nil if validation fails.
101
+ #
102
+ # @param text [String] the alias text
103
+ # @param kind [String, nil] alias kind (name, nickname, email, handle, abbreviation, title)
104
+ # @param confidence [Float] confidence score (0.0 to 1.0)
105
+ # @return [EntityAlias, nil] the created alias or nil if validation failed
106
+ def add_alias(text, kind: nil, confidence: 1.0)
107
+ # Pre-validate before attempting to create
108
+ return nil unless Validation::AliasFilter.valid?(text, name: name)
109
+
110
+ aliases.find_or_create_by!(name: text) do |a|
111
+ a.kind = kind
57
112
  a.confidence = confidence
58
113
  end
114
+ rescue ActiveRecord::RecordInvalid
115
+ # Alias validation failed (pronoun, generic term, etc.)
116
+ nil
59
117
  end
60
118
 
61
- def matches_name?(name)
62
- return true if canonical_name.downcase == name.downcase
119
+ # Checks if the entity matches a query (by name or alias)
120
+ #
121
+ # @param query [String] the name to match (case-insensitive)
122
+ # @return [Boolean] true if name or any alias matches
123
+ def matches_name?(query)
124
+ return true if self.name.downcase == query.downcase
63
125
 
64
- aliases.exists?(["LOWER(alias_text) = ?", name.downcase])
126
+ aliases.exists?(["LOWER(name) = ?", query.downcase])
65
127
  end
66
128
 
67
- # Get all facts mentioning this entity
129
+ # Returns currently valid canonical facts mentioning this entity
130
+ #
131
+ # @return [ActiveRecord::Relation] currently valid facts
68
132
  def current_facts
69
133
  facts.currently_valid.canonical
70
134
  end
71
135
 
136
+ # Returns facts valid at a specific date
137
+ #
138
+ # @param date [Date, Time] the point in time to query
139
+ # @return [ActiveRecord::Relation] facts valid at the given date
72
140
  def facts_at(date)
73
141
  facts.valid_at(date).canonical
74
142
  end
75
143
 
76
- # Vector similarity search for entity matching
144
+ # Finds entities by vector similarity using pgvector
145
+ #
146
+ # @param embedding [Array<Float>] the embedding vector to search with
147
+ # @param limit [Integer] maximum number of results
148
+ # @return [ActiveRecord::Relation] entities ordered by similarity
77
149
  def self.nearest_neighbors(embedding, limit: 10)
78
150
  return none unless embedding
79
151
 
@@ -2,24 +2,58 @@
2
2
 
3
3
  module FactDb
4
4
  module Models
5
+ # Represents an alternative name for an entity
6
+ #
7
+ # Aliases allow entities to be found by various name forms (nicknames,
8
+ # abbreviations, email handles, etc.). Validation prevents invalid aliases
9
+ # like pronouns or generic terms.
10
+ #
11
+ # @example Create an alias
12
+ # alias = EntityAlias.create!(entity: person, name: "Johnny", kind: "nickname")
13
+ #
5
14
  class EntityAlias < ActiveRecord::Base
6
15
  self.table_name = "fact_db_entity_aliases"
7
16
 
8
17
  belongs_to :entity, class_name: "FactDb::Models::Entity"
9
18
 
10
- validates :alias_text, presence: true
11
- validates :alias_text, uniqueness: { scope: :entity_id }
19
+ validates :name, presence: true
20
+ validates :name, uniqueness: { scope: :entity_id }
21
+ validate :name_is_valid
12
22
 
13
- # Alias types
14
- TYPES = %w[name nickname email handle abbreviation title].freeze
23
+ # @return [Array<String>] valid alias kinds
24
+ KINDS = %w[name nickname email handle abbreviation title].freeze
15
25
 
16
- validates :alias_type, inclusion: { in: TYPES }, allow_nil: true
26
+ validates :kind, inclusion: { in: KINDS }, allow_nil: true
17
27
 
18
- scope :by_type, ->(type) { where(alias_type: type) }
28
+ # @!method by_kind(k)
29
+ # Returns aliases of a specific kind
30
+ # @param k [String] the alias kind
31
+ # @return [ActiveRecord::Relation]
32
+ scope :by_kind, ->(k) { where(kind: k) }
33
+
34
+ # @!method high_confidence
35
+ # Returns aliases with confidence >= 0.9
36
+ # @return [ActiveRecord::Relation]
19
37
  scope :high_confidence, -> { where("confidence >= ?", 0.9) }
20
38
 
39
+ # Finds an entity by alias text (case-insensitive)
40
+ #
41
+ # @param text [String] the alias text to search for
42
+ # @return [Entity, nil] the entity with this alias or nil
21
43
  def self.find_entity_by_alias(text)
22
- find_by(["LOWER(alias_text) = ?", text.downcase])&.entity
44
+ find_by(["LOWER(name) = ?", text.downcase])&.entity
45
+ end
46
+
47
+ private
48
+
49
+ def name_is_valid
50
+ return if name.blank?
51
+
52
+ entity_name = entity&.name
53
+ unless Validation::AliasFilter.valid?(name, name: entity_name)
54
+ reason = Validation::AliasFilter.rejection_reason(name, name: entity_name)
55
+ errors.add(:name, "is not a valid alias: #{reason}")
56
+ end
23
57
  end
24
58
  end
25
59
  end
@@ -2,6 +2,17 @@
2
2
 
3
3
  module FactDb
4
4
  module Models
5
+ # Join model linking entities to facts with role information
6
+ #
7
+ # Represents how an entity is mentioned in a specific fact, including
8
+ # the exact text used and the semantic role (subject, object, etc.).
9
+ #
10
+ # @example Create a mention
11
+ # mention = EntityMention.create!(
12
+ # fact: fact, entity: person,
13
+ # mention_text: "John", mention_role: "subject"
14
+ # )
15
+ #
5
16
  class EntityMention < ActiveRecord::Base
6
17
  self.table_name = "fact_db_entity_mentions"
7
18
 
@@ -11,20 +22,42 @@ module FactDb
11
22
  validates :mention_text, presence: true
12
23
  validates :fact_id, uniqueness: { scope: [:entity_id, :mention_text] }
13
24
 
14
- # Mention roles
25
+ # @return [Array<String>] valid mention roles
15
26
  ROLES = %w[subject object location temporal instrument beneficiary].freeze
16
27
 
17
28
  validates :mention_role, inclusion: { in: ROLES }, allow_nil: true
18
29
 
30
+ # @!method by_role(role)
31
+ # Returns mentions with a specific role
32
+ # @param role [String] the mention role
33
+ # @return [ActiveRecord::Relation]
19
34
  scope :by_role, ->(role) { where(mention_role: role) }
35
+
36
+ # @!method subjects
37
+ # Returns mentions with subject role
38
+ # @return [ActiveRecord::Relation]
20
39
  scope :subjects, -> { by_role("subject") }
40
+
41
+ # @!method objects
42
+ # Returns mentions with object role
43
+ # @return [ActiveRecord::Relation]
21
44
  scope :objects, -> { by_role("object") }
45
+
46
+ # @!method high_confidence
47
+ # Returns mentions with confidence >= 0.9
48
+ # @return [ActiveRecord::Relation]
22
49
  scope :high_confidence, -> { where("confidence >= ?", 0.9) }
23
50
 
51
+ # Checks if this mention has the subject role
52
+ #
53
+ # @return [Boolean] true if mention_role is "subject"
24
54
  def subject?
25
55
  mention_role == "subject"
26
56
  end
27
57
 
58
+ # Checks if this mention has the object role
59
+ #
60
+ # @return [Boolean] true if mention_role is "object"
28
61
  def object?
29
62
  mention_role == "object"
30
63
  end