fact_db 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.envrc +1 -0
- data/CHANGELOG.md +48 -0
- data/COMMITS.md +196 -0
- data/README.md +102 -0
- data/Rakefile +41 -0
- data/db/migrate/001_enable_extensions.rb +7 -0
- data/db/migrate/002_create_contents.rb +44 -0
- data/db/migrate/003_create_entities.rb +36 -0
- data/db/migrate/004_create_entity_aliases.rb +18 -0
- data/db/migrate/005_create_facts.rb +65 -0
- data/db/migrate/006_create_entity_mentions.rb +18 -0
- data/db/migrate/007_create_fact_sources.rb +18 -0
- data/docs/api/extractors/index.md +71 -0
- data/docs/api/extractors/llm.md +162 -0
- data/docs/api/extractors/manual.md +92 -0
- data/docs/api/extractors/rule-based.md +165 -0
- data/docs/api/facts.md +300 -0
- data/docs/api/index.md +66 -0
- data/docs/api/models/content.md +165 -0
- data/docs/api/models/entity.md +202 -0
- data/docs/api/models/fact.md +270 -0
- data/docs/api/models/index.md +77 -0
- data/docs/api/pipeline/extraction.md +175 -0
- data/docs/api/pipeline/index.md +72 -0
- data/docs/api/pipeline/resolution.md +209 -0
- data/docs/api/services/content-service.md +166 -0
- data/docs/api/services/entity-service.md +202 -0
- data/docs/api/services/fact-service.md +223 -0
- data/docs/api/services/index.md +55 -0
- data/docs/architecture/database-schema.md +293 -0
- data/docs/architecture/entity-resolution.md +293 -0
- data/docs/architecture/index.md +149 -0
- data/docs/architecture/temporal-facts.md +268 -0
- data/docs/architecture/three-layer-model.md +242 -0
- data/docs/assets/css/custom.css +137 -0
- data/docs/assets/fact_db.jpg +0 -0
- data/docs/assets/images/fact_db.jpg +0 -0
- data/docs/concepts.md +183 -0
- data/docs/examples/basic-usage.md +235 -0
- data/docs/examples/hr-onboarding.md +312 -0
- data/docs/examples/index.md +64 -0
- data/docs/examples/news-analysis.md +288 -0
- data/docs/getting-started/database-setup.md +170 -0
- data/docs/getting-started/index.md +71 -0
- data/docs/getting-started/installation.md +98 -0
- data/docs/getting-started/quick-start.md +191 -0
- data/docs/guides/batch-processing.md +325 -0
- data/docs/guides/configuration.md +243 -0
- data/docs/guides/entity-management.md +364 -0
- data/docs/guides/extracting-facts.md +299 -0
- data/docs/guides/index.md +22 -0
- data/docs/guides/ingesting-content.md +252 -0
- data/docs/guides/llm-integration.md +299 -0
- data/docs/guides/temporal-queries.md +315 -0
- data/docs/index.md +121 -0
- data/examples/README.md +130 -0
- data/examples/basic_usage.rb +164 -0
- data/examples/entity_management.rb +216 -0
- data/examples/hr_system.rb +428 -0
- data/examples/rule_based_extraction.rb +258 -0
- data/examples/temporal_queries.rb +245 -0
- data/lib/fact_db/config.rb +71 -0
- data/lib/fact_db/database.rb +45 -0
- data/lib/fact_db/errors.rb +10 -0
- data/lib/fact_db/extractors/base.rb +117 -0
- data/lib/fact_db/extractors/llm_extractor.rb +179 -0
- data/lib/fact_db/extractors/manual_extractor.rb +53 -0
- data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
- data/lib/fact_db/llm/adapter.rb +109 -0
- data/lib/fact_db/models/content.rb +62 -0
- data/lib/fact_db/models/entity.rb +84 -0
- data/lib/fact_db/models/entity_alias.rb +26 -0
- data/lib/fact_db/models/entity_mention.rb +33 -0
- data/lib/fact_db/models/fact.rb +192 -0
- data/lib/fact_db/models/fact_source.rb +35 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
- data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
- data/lib/fact_db/resolution/entity_resolver.rb +261 -0
- data/lib/fact_db/resolution/fact_resolver.rb +259 -0
- data/lib/fact_db/services/content_service.rb +93 -0
- data/lib/fact_db/services/entity_service.rb +150 -0
- data/lib/fact_db/services/fact_service.rb +193 -0
- data/lib/fact_db/temporal/query.rb +125 -0
- data/lib/fact_db/temporal/timeline.rb +134 -0
- data/lib/fact_db/version.rb +5 -0
- data/lib/fact_db.rb +141 -0
- data/mkdocs.yml +198 -0
- metadata +288 -0
data/docs/api/index.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# API Reference
|
|
2
|
+
|
|
3
|
+
Complete API documentation for FactDb.
|
|
4
|
+
|
|
5
|
+
## Main Classes
|
|
6
|
+
|
|
7
|
+
- [Facts](facts.md) - Main interface for FactDb operations
|
|
8
|
+
|
|
9
|
+
## Models
|
|
10
|
+
|
|
11
|
+
- [Content](models/content.md) - Immutable source documents
|
|
12
|
+
- [Entity](models/entity.md) - Resolved identities
|
|
13
|
+
- [Fact](models/fact.md) - Temporal assertions
|
|
14
|
+
|
|
15
|
+
## Services
|
|
16
|
+
|
|
17
|
+
- [ContentService](services/content-service.md) - Ingest and manage content
|
|
18
|
+
- [EntityService](services/entity-service.md) - Create and resolve entities
|
|
19
|
+
- [FactService](services/fact-service.md) - Extract and query facts
|
|
20
|
+
|
|
21
|
+
## Extractors
|
|
22
|
+
|
|
23
|
+
- [ManualExtractor](extractors/manual.md) - API-driven extraction
|
|
24
|
+
- [LLMExtractor](extractors/llm.md) - AI-powered extraction
|
|
25
|
+
- [RuleBasedExtractor](extractors/rule-based.md) - Pattern matching
|
|
26
|
+
|
|
27
|
+
## Pipeline
|
|
28
|
+
|
|
29
|
+
- [ExtractionPipeline](pipeline/extraction.md) - Concurrent extraction
|
|
30
|
+
- [ResolutionPipeline](pipeline/resolution.md) - Parallel resolution
|
|
31
|
+
|
|
32
|
+
## Module Structure
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
FactDb
|
|
36
|
+
├── Facts # Main class
|
|
37
|
+
├── Config # Configuration
|
|
38
|
+
├── Database # Database connection
|
|
39
|
+
├── Models
|
|
40
|
+
│ ├── Content
|
|
41
|
+
│ ├── Entity
|
|
42
|
+
│ ├── EntityAlias
|
|
43
|
+
│ ├── Fact
|
|
44
|
+
│ ├── EntityMention
|
|
45
|
+
│ └── FactSource
|
|
46
|
+
├── Services
|
|
47
|
+
│ ├── ContentService
|
|
48
|
+
│ ├── EntityService
|
|
49
|
+
│ └── FactService
|
|
50
|
+
├── Extractors
|
|
51
|
+
│ ├── Base
|
|
52
|
+
│ ├── ManualExtractor
|
|
53
|
+
│ ├── LLMExtractor
|
|
54
|
+
│ └── RuleBasedExtractor
|
|
55
|
+
├── Resolution
|
|
56
|
+
│ ├── EntityResolver
|
|
57
|
+
│ └── FactResolver
|
|
58
|
+
├── Pipeline
|
|
59
|
+
│ ├── ExtractionPipeline
|
|
60
|
+
│ └── ResolutionPipeline
|
|
61
|
+
├── Temporal
|
|
62
|
+
│ ├── Query
|
|
63
|
+
│ └── Timeline
|
|
64
|
+
└── LLM
|
|
65
|
+
└── Adapter
|
|
66
|
+
```
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# Content Model
|
|
2
|
+
|
|
3
|
+
Stores immutable source documents.
|
|
4
|
+
|
|
5
|
+
## Class: `FactDb::Models::Content`
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
content = FactDb::Models::Content.new(
|
|
9
|
+
raw_text: "Document content...",
|
|
10
|
+
content_type: "email",
|
|
11
|
+
captured_at: Time.current
|
|
12
|
+
)
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Attributes
|
|
16
|
+
|
|
17
|
+
| Attribute | Type | Description |
|
|
18
|
+
|-----------|------|-------------|
|
|
19
|
+
| `id` | Integer | Primary key |
|
|
20
|
+
| `content_hash` | String | SHA256 hash for deduplication |
|
|
21
|
+
| `content_type` | String | Type (email, document, etc.) |
|
|
22
|
+
| `raw_text` | Text | Original content |
|
|
23
|
+
| `title` | String | Optional title |
|
|
24
|
+
| `source_uri` | String | Original location |
|
|
25
|
+
| `source_metadata` | Hash | Additional metadata (JSONB) |
|
|
26
|
+
| `embedding` | Vector | Semantic search vector |
|
|
27
|
+
| `captured_at` | DateTime | When content was captured |
|
|
28
|
+
| `created_at` | DateTime | Record creation time |
|
|
29
|
+
|
|
30
|
+
## Associations
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
has_many :fact_sources
|
|
34
|
+
has_many :facts, through: :fact_sources
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Callbacks
|
|
38
|
+
|
|
39
|
+
```ruby
|
|
40
|
+
before_create :compute_hash
|
|
41
|
+
before_create :generate_embedding
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Instance Methods
|
|
45
|
+
|
|
46
|
+
### compute_hash
|
|
47
|
+
|
|
48
|
+
```ruby
|
|
49
|
+
def compute_hash
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Computes SHA256 hash of raw_text for deduplication.
|
|
53
|
+
|
|
54
|
+
### generate_embedding
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
def generate_embedding
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Generates embedding vector using configured generator.
|
|
61
|
+
|
|
62
|
+
## Class Methods
|
|
63
|
+
|
|
64
|
+
### find_or_create_by_text
|
|
65
|
+
|
|
66
|
+
```ruby
|
|
67
|
+
def self.find_or_create_by_text(text, **attributes)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Find existing content by hash or create new.
|
|
71
|
+
|
|
72
|
+
**Example:**
|
|
73
|
+
|
|
74
|
+
```ruby
|
|
75
|
+
content = Content.find_or_create_by_text(
|
|
76
|
+
"Document text",
|
|
77
|
+
content_type: "document",
|
|
78
|
+
captured_at: Time.current
|
|
79
|
+
)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Scopes
|
|
83
|
+
|
|
84
|
+
### by_type
|
|
85
|
+
|
|
86
|
+
```ruby
|
|
87
|
+
scope :by_type, ->(type) { where(content_type: type) }
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Filter by content type.
|
|
91
|
+
|
|
92
|
+
```ruby
|
|
93
|
+
Content.by_type('email')
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### captured_between
|
|
97
|
+
|
|
98
|
+
```ruby
|
|
99
|
+
scope :captured_between, ->(from, to) {
|
|
100
|
+
where(captured_at: from..to)
|
|
101
|
+
}
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Filter by capture date range.
|
|
105
|
+
|
|
106
|
+
```ruby
|
|
107
|
+
Content.captured_between(1.week.ago, Time.current)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### search_text
|
|
111
|
+
|
|
112
|
+
```ruby
|
|
113
|
+
scope :search_text, ->(query) {
|
|
114
|
+
where("raw_text @@ plainto_tsquery(?)", query)
|
|
115
|
+
}
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Full-text search.
|
|
119
|
+
|
|
120
|
+
```ruby
|
|
121
|
+
Content.search_text("quarterly earnings")
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Usage Examples
|
|
125
|
+
|
|
126
|
+
### Create Content
|
|
127
|
+
|
|
128
|
+
```ruby
|
|
129
|
+
content = Content.create!(
|
|
130
|
+
raw_text: "Important document...",
|
|
131
|
+
content_type: "document",
|
|
132
|
+
title: "Q4 Report",
|
|
133
|
+
source_uri: "https://example.com/report.pdf",
|
|
134
|
+
captured_at: Time.current,
|
|
135
|
+
source_metadata: {
|
|
136
|
+
author: "Jane Smith",
|
|
137
|
+
department: "Finance"
|
|
138
|
+
}
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Find by Hash
|
|
143
|
+
|
|
144
|
+
```ruby
|
|
145
|
+
hash = Digest::SHA256.hexdigest("Document text")
|
|
146
|
+
content = Content.find_by(content_hash: hash)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Get Related Facts
|
|
150
|
+
|
|
151
|
+
```ruby
|
|
152
|
+
content.facts.each do |fact|
|
|
153
|
+
puts fact.fact_text
|
|
154
|
+
end
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Semantic Search
|
|
158
|
+
|
|
159
|
+
```ruby
|
|
160
|
+
# Requires embedding
|
|
161
|
+
similar = Content
|
|
162
|
+
.where.not(embedding: nil)
|
|
163
|
+
.order(Arel.sql("embedding <=> '#{query_embedding}'"))
|
|
164
|
+
.limit(10)
|
|
165
|
+
```
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# Entity Model
|
|
2
|
+
|
|
3
|
+
Stores resolved identities (people, organizations, places, etc.).
|
|
4
|
+
|
|
5
|
+
## Class: `FactDb::Models::Entity`
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
entity = FactDb::Models::Entity.new(
|
|
9
|
+
canonical_name: "Paula Chen",
|
|
10
|
+
entity_type: "person"
|
|
11
|
+
)
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Attributes
|
|
15
|
+
|
|
16
|
+
| Attribute | Type | Description |
|
|
17
|
+
|-----------|------|-------------|
|
|
18
|
+
| `id` | Integer | Primary key |
|
|
19
|
+
| `canonical_name` | String | Authoritative name |
|
|
20
|
+
| `entity_type` | String | Type (person, organization, place, etc.) |
|
|
21
|
+
| `resolution_status` | String | Status (unresolved, resolved, merged) |
|
|
22
|
+
| `merged_into_id` | Integer | Points to canonical entity if merged |
|
|
23
|
+
| `metadata` | Hash | Additional attributes (JSONB) |
|
|
24
|
+
| `embedding` | Vector | Semantic search vector |
|
|
25
|
+
| `created_at` | DateTime | Record creation time |
|
|
26
|
+
|
|
27
|
+
## Entity Types
|
|
28
|
+
|
|
29
|
+
- `person` - Individual people
|
|
30
|
+
- `organization` - Companies, teams, groups
|
|
31
|
+
- `place` - Locations
|
|
32
|
+
- `product` - Products, services
|
|
33
|
+
- `event` - Named events
|
|
34
|
+
|
|
35
|
+
## Resolution Status
|
|
36
|
+
|
|
37
|
+
- `unresolved` - Entity created but not confirmed
|
|
38
|
+
- `resolved` - Entity identity confirmed
|
|
39
|
+
- `merged` - Entity merged into another
|
|
40
|
+
|
|
41
|
+
## Associations
|
|
42
|
+
|
|
43
|
+
```ruby
|
|
44
|
+
has_many :entity_aliases, dependent: :destroy
|
|
45
|
+
has_many :entity_mentions
|
|
46
|
+
has_many :facts, through: :entity_mentions
|
|
47
|
+
belongs_to :merged_into, class_name: 'Entity', optional: true
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Instance Methods
|
|
51
|
+
|
|
52
|
+
### add_alias
|
|
53
|
+
|
|
54
|
+
```ruby
|
|
55
|
+
def add_alias(text, type: nil, confidence: 1.0)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Add an alias to the entity.
|
|
59
|
+
|
|
60
|
+
**Example:**
|
|
61
|
+
|
|
62
|
+
```ruby
|
|
63
|
+
entity.add_alias("Paula", type: "nickname", confidence: 0.95)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### merged?
|
|
67
|
+
|
|
68
|
+
```ruby
|
|
69
|
+
def merged?
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Returns true if entity has been merged into another.
|
|
73
|
+
|
|
74
|
+
### canonical
|
|
75
|
+
|
|
76
|
+
```ruby
|
|
77
|
+
def canonical
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Returns the canonical entity (follows merge chain).
|
|
81
|
+
|
|
82
|
+
**Example:**
|
|
83
|
+
|
|
84
|
+
```ruby
|
|
85
|
+
# If entity was merged
|
|
86
|
+
canonical = entity.canonical # Returns the canonical entity
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Scopes
|
|
90
|
+
|
|
91
|
+
### by_type
|
|
92
|
+
|
|
93
|
+
```ruby
|
|
94
|
+
scope :by_type, ->(type) { where(entity_type: type) }
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Filter by entity type.
|
|
98
|
+
|
|
99
|
+
```ruby
|
|
100
|
+
Entity.by_type('person')
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### active
|
|
104
|
+
|
|
105
|
+
```ruby
|
|
106
|
+
scope :active, -> { where.not(resolution_status: 'merged') }
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Exclude merged entities.
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
Entity.active
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### resolved
|
|
116
|
+
|
|
117
|
+
```ruby
|
|
118
|
+
scope :resolved, -> { where(resolution_status: 'resolved') }
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Only resolved entities.
|
|
122
|
+
|
|
123
|
+
### search_name
|
|
124
|
+
|
|
125
|
+
```ruby
|
|
126
|
+
scope :search_name, ->(query) {
|
|
127
|
+
where("canonical_name ILIKE ?", "%#{query}%")
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Search by name.
|
|
132
|
+
|
|
133
|
+
```ruby
|
|
134
|
+
Entity.search_name("paula")
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Usage Examples
|
|
138
|
+
|
|
139
|
+
### Create Entity
|
|
140
|
+
|
|
141
|
+
```ruby
|
|
142
|
+
entity = Entity.create!(
|
|
143
|
+
canonical_name: "Paula Chen",
|
|
144
|
+
entity_type: "person",
|
|
145
|
+
metadata: {
|
|
146
|
+
department: "Engineering",
|
|
147
|
+
employee_id: "E12345"
|
|
148
|
+
}
|
|
149
|
+
)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Add Aliases
|
|
153
|
+
|
|
154
|
+
```ruby
|
|
155
|
+
entity.add_alias("Paula")
|
|
156
|
+
entity.add_alias("P. Chen", type: "abbreviation")
|
|
157
|
+
entity.add_alias("Chen, Paula", type: "formal")
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Check Aliases
|
|
161
|
+
|
|
162
|
+
```ruby
|
|
163
|
+
entity.entity_aliases.each do |a|
|
|
164
|
+
puts "#{a.alias_text} (#{a.alias_type})"
|
|
165
|
+
end
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Get Related Facts
|
|
169
|
+
|
|
170
|
+
```ruby
|
|
171
|
+
entity.facts.each do |fact|
|
|
172
|
+
puts "#{fact.valid_at}: #{fact.fact_text}"
|
|
173
|
+
end
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Find Similar Entities
|
|
177
|
+
|
|
178
|
+
```ruby
|
|
179
|
+
# By name
|
|
180
|
+
similar = Entity.search_name("Microsoft")
|
|
181
|
+
|
|
182
|
+
# By embedding
|
|
183
|
+
similar = Entity
|
|
184
|
+
.where.not(embedding: nil)
|
|
185
|
+
.order(Arel.sql("embedding <=> '#{query_embedding}'"))
|
|
186
|
+
.limit(10)
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Merge Entities
|
|
190
|
+
|
|
191
|
+
```ruby
|
|
192
|
+
# entity2 will be merged into entity1
|
|
193
|
+
entity2.update!(
|
|
194
|
+
resolution_status: 'merged',
|
|
195
|
+
merged_into_id: entity1.id
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Copy aliases
|
|
199
|
+
entity2.entity_aliases.each do |a|
|
|
200
|
+
entity1.add_alias(a.alias_text, type: a.alias_type)
|
|
201
|
+
end
|
|
202
|
+
```
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
# Fact Model
|
|
2
|
+
|
|
3
|
+
Stores temporal assertions about entities.
|
|
4
|
+
|
|
5
|
+
## Class: `FactDb::Models::Fact`
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
fact = FactDb::Models::Fact.new(
|
|
9
|
+
fact_text: "Paula Chen is Principal Engineer",
|
|
10
|
+
valid_at: Date.parse("2024-01-10"),
|
|
11
|
+
status: "canonical"
|
|
12
|
+
)
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Attributes
|
|
16
|
+
|
|
17
|
+
| Attribute | Type | Description |
|
|
18
|
+
|-----------|------|-------------|
|
|
19
|
+
| `id` | Integer | Primary key |
|
|
20
|
+
| `fact_text` | Text | The assertion |
|
|
21
|
+
| `fact_hash` | String | Hash for deduplication |
|
|
22
|
+
| `valid_at` | DateTime | When fact became true |
|
|
23
|
+
| `invalid_at` | DateTime | When fact stopped being true (nil if current) |
|
|
24
|
+
| `status` | String | Status (canonical, superseded, corroborated, synthesized) |
|
|
25
|
+
| `superseded_by_id` | Integer | Points to replacing fact |
|
|
26
|
+
| `derived_from_ids` | Array | Source facts for synthesized |
|
|
27
|
+
| `corroborated_by_ids` | Array | Corroborating facts |
|
|
28
|
+
| `confidence` | Float | Extraction confidence (0.0-1.0) |
|
|
29
|
+
| `extraction_method` | String | How fact was extracted |
|
|
30
|
+
| `metadata` | Hash | Additional data (JSONB) |
|
|
31
|
+
| `embedding` | Vector | Semantic search vector |
|
|
32
|
+
| `created_at` | DateTime | Record creation time |
|
|
33
|
+
|
|
34
|
+
## Fact Status
|
|
35
|
+
|
|
36
|
+
- `canonical` - Current authoritative version
|
|
37
|
+
- `superseded` - Replaced by newer information
|
|
38
|
+
- `corroborated` - Confirmed by multiple sources
|
|
39
|
+
- `synthesized` - Derived from multiple facts
|
|
40
|
+
|
|
41
|
+
## Associations
|
|
42
|
+
|
|
43
|
+
```ruby
|
|
44
|
+
has_many :entity_mentions, dependent: :destroy
|
|
45
|
+
has_many :entities, through: :entity_mentions
|
|
46
|
+
has_many :fact_sources, dependent: :destroy
|
|
47
|
+
has_many :contents, through: :fact_sources
|
|
48
|
+
belongs_to :superseded_by, class_name: 'Fact', optional: true
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Instance Methods
|
|
52
|
+
|
|
53
|
+
### add_mention
|
|
54
|
+
|
|
55
|
+
```ruby
|
|
56
|
+
def add_mention(entity:, text:, role:, confidence: 1.0)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Add an entity mention to the fact.
|
|
60
|
+
|
|
61
|
+
**Example:**
|
|
62
|
+
|
|
63
|
+
```ruby
|
|
64
|
+
fact.add_mention(
|
|
65
|
+
entity: paula,
|
|
66
|
+
text: "Paula Chen",
|
|
67
|
+
role: "subject",
|
|
68
|
+
confidence: 0.95
|
|
69
|
+
)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### add_source
|
|
73
|
+
|
|
74
|
+
```ruby
|
|
75
|
+
def add_source(content:, type: "primary", excerpt: nil, confidence: 1.0)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Add a source content link.
|
|
79
|
+
|
|
80
|
+
**Example:**
|
|
81
|
+
|
|
82
|
+
```ruby
|
|
83
|
+
fact.add_source(
|
|
84
|
+
content: email,
|
|
85
|
+
type: "primary",
|
|
86
|
+
excerpt: "...accepted the offer..."
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### currently_valid?
|
|
91
|
+
|
|
92
|
+
```ruby
|
|
93
|
+
def currently_valid?
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Returns true if fact is currently valid (invalid_at is nil).
|
|
97
|
+
|
|
98
|
+
### valid_at?(date)
|
|
99
|
+
|
|
100
|
+
```ruby
|
|
101
|
+
def valid_at?(date)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Returns true if fact was valid at the given date.
|
|
105
|
+
|
|
106
|
+
### superseded?
|
|
107
|
+
|
|
108
|
+
```ruby
|
|
109
|
+
def superseded?
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Returns true if fact has been superseded.
|
|
113
|
+
|
|
114
|
+
### canonical?
|
|
115
|
+
|
|
116
|
+
```ruby
|
|
117
|
+
def canonical?
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Returns true if fact is canonical.
|
|
121
|
+
|
|
122
|
+
## Scopes
|
|
123
|
+
|
|
124
|
+
### canonical
|
|
125
|
+
|
|
126
|
+
```ruby
|
|
127
|
+
scope :canonical, -> { where(status: 'canonical') }
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Only canonical facts.
|
|
131
|
+
|
|
132
|
+
### currently_valid
|
|
133
|
+
|
|
134
|
+
```ruby
|
|
135
|
+
scope :currently_valid, -> { where(invalid_at: nil) }
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Facts that are currently valid.
|
|
139
|
+
|
|
140
|
+
### valid_at
|
|
141
|
+
|
|
142
|
+
```ruby
|
|
143
|
+
scope :valid_at, ->(date) {
|
|
144
|
+
where("valid_at <= ? AND (invalid_at IS NULL OR invalid_at > ?)", date, date)
|
|
145
|
+
}
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Facts valid at a specific point in time.
|
|
149
|
+
|
|
150
|
+
```ruby
|
|
151
|
+
Fact.valid_at(Date.parse("2023-06-15"))
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### mentioning_entity
|
|
155
|
+
|
|
156
|
+
```ruby
|
|
157
|
+
scope :mentioning_entity, ->(entity_id) {
|
|
158
|
+
joins(:entity_mentions).where(entity_mentions: { entity_id: entity_id })
|
|
159
|
+
}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Facts mentioning a specific entity.
|
|
163
|
+
|
|
164
|
+
```ruby
|
|
165
|
+
Fact.mentioning_entity(paula.id)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### search_text
|
|
169
|
+
|
|
170
|
+
```ruby
|
|
171
|
+
scope :search_text, ->(query) {
|
|
172
|
+
where("fact_text @@ plainto_tsquery(?)", query)
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Full-text search.
|
|
177
|
+
|
|
178
|
+
```ruby
|
|
179
|
+
Fact.search_text("engineer")
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### by_extraction_method
|
|
183
|
+
|
|
184
|
+
```ruby
|
|
185
|
+
scope :by_extraction_method, ->(method) {
|
|
186
|
+
where(extraction_method: method)
|
|
187
|
+
}
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Filter by extraction method.
|
|
191
|
+
|
|
192
|
+
```ruby
|
|
193
|
+
Fact.by_extraction_method('llm')
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### high_confidence
|
|
197
|
+
|
|
198
|
+
```ruby
|
|
199
|
+
scope :high_confidence, -> { where("confidence > 0.8") }
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
High confidence facts only.
|
|
203
|
+
|
|
204
|
+
## Usage Examples
|
|
205
|
+
|
|
206
|
+
### Create Fact
|
|
207
|
+
|
|
208
|
+
```ruby
|
|
209
|
+
fact = Fact.create!(
|
|
210
|
+
fact_text: "Paula Chen joined Microsoft as Principal Engineer",
|
|
211
|
+
valid_at: Date.parse("2024-01-10"),
|
|
212
|
+
status: "canonical",
|
|
213
|
+
extraction_method: "manual",
|
|
214
|
+
confidence: 1.0
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Add mentions
|
|
218
|
+
fact.add_mention(entity: paula, text: "Paula Chen", role: "subject")
|
|
219
|
+
fact.add_mention(entity: microsoft, text: "Microsoft", role: "organization")
|
|
220
|
+
|
|
221
|
+
# Add source
|
|
222
|
+
fact.add_source(content: announcement, type: "primary")
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Query Facts
|
|
226
|
+
|
|
227
|
+
```ruby
|
|
228
|
+
# Current facts about Paula
|
|
229
|
+
Fact.canonical.currently_valid.mentioning_entity(paula.id)
|
|
230
|
+
|
|
231
|
+
# Historical facts
|
|
232
|
+
Fact.valid_at(Date.parse("2023-06-15")).mentioning_entity(paula.id)
|
|
233
|
+
|
|
234
|
+
# Search
|
|
235
|
+
Fact.search_text("promoted")
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### Supersede Fact
|
|
239
|
+
|
|
240
|
+
```ruby
|
|
241
|
+
new_fact = Fact.create!(
|
|
242
|
+
fact_text: "Paula Chen is Senior Principal Engineer",
|
|
243
|
+
valid_at: Date.parse("2024-06-01"),
|
|
244
|
+
status: "canonical"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
old_fact.update!(
|
|
248
|
+
status: "superseded",
|
|
249
|
+
superseded_by_id: new_fact.id,
|
|
250
|
+
invalid_at: Date.parse("2024-06-01")
|
|
251
|
+
)
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
### Get Sources
|
|
255
|
+
|
|
256
|
+
```ruby
|
|
257
|
+
fact.fact_sources.each do |source|
|
|
258
|
+
puts "Source: #{source.content.title}"
|
|
259
|
+
puts "Type: #{source.source_type}"
|
|
260
|
+
puts "Excerpt: #{source.excerpt}"
|
|
261
|
+
end
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
### Get Mentioned Entities
|
|
265
|
+
|
|
266
|
+
```ruby
|
|
267
|
+
fact.entity_mentions.each do |mention|
|
|
268
|
+
puts "#{mention.entity.canonical_name} (#{mention.mention_role})"
|
|
269
|
+
end
|
|
270
|
+
```
|