fact_db 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.envrc +1 -0
- data/CHANGELOG.md +48 -0
- data/COMMITS.md +196 -0
- data/README.md +102 -0
- data/Rakefile +41 -0
- data/db/migrate/001_enable_extensions.rb +7 -0
- data/db/migrate/002_create_contents.rb +44 -0
- data/db/migrate/003_create_entities.rb +36 -0
- data/db/migrate/004_create_entity_aliases.rb +18 -0
- data/db/migrate/005_create_facts.rb +65 -0
- data/db/migrate/006_create_entity_mentions.rb +18 -0
- data/db/migrate/007_create_fact_sources.rb +18 -0
- data/docs/api/extractors/index.md +71 -0
- data/docs/api/extractors/llm.md +162 -0
- data/docs/api/extractors/manual.md +92 -0
- data/docs/api/extractors/rule-based.md +165 -0
- data/docs/api/facts.md +300 -0
- data/docs/api/index.md +66 -0
- data/docs/api/models/content.md +165 -0
- data/docs/api/models/entity.md +202 -0
- data/docs/api/models/fact.md +270 -0
- data/docs/api/models/index.md +77 -0
- data/docs/api/pipeline/extraction.md +175 -0
- data/docs/api/pipeline/index.md +72 -0
- data/docs/api/pipeline/resolution.md +209 -0
- data/docs/api/services/content-service.md +166 -0
- data/docs/api/services/entity-service.md +202 -0
- data/docs/api/services/fact-service.md +223 -0
- data/docs/api/services/index.md +55 -0
- data/docs/architecture/database-schema.md +293 -0
- data/docs/architecture/entity-resolution.md +293 -0
- data/docs/architecture/index.md +149 -0
- data/docs/architecture/temporal-facts.md +268 -0
- data/docs/architecture/three-layer-model.md +242 -0
- data/docs/assets/css/custom.css +137 -0
- data/docs/assets/fact_db.jpg +0 -0
- data/docs/assets/images/fact_db.jpg +0 -0
- data/docs/concepts.md +183 -0
- data/docs/examples/basic-usage.md +235 -0
- data/docs/examples/hr-onboarding.md +312 -0
- data/docs/examples/index.md +64 -0
- data/docs/examples/news-analysis.md +288 -0
- data/docs/getting-started/database-setup.md +170 -0
- data/docs/getting-started/index.md +71 -0
- data/docs/getting-started/installation.md +98 -0
- data/docs/getting-started/quick-start.md +191 -0
- data/docs/guides/batch-processing.md +325 -0
- data/docs/guides/configuration.md +243 -0
- data/docs/guides/entity-management.md +364 -0
- data/docs/guides/extracting-facts.md +299 -0
- data/docs/guides/index.md +22 -0
- data/docs/guides/ingesting-content.md +252 -0
- data/docs/guides/llm-integration.md +299 -0
- data/docs/guides/temporal-queries.md +315 -0
- data/docs/index.md +121 -0
- data/examples/README.md +130 -0
- data/examples/basic_usage.rb +164 -0
- data/examples/entity_management.rb +216 -0
- data/examples/hr_system.rb +428 -0
- data/examples/rule_based_extraction.rb +258 -0
- data/examples/temporal_queries.rb +245 -0
- data/lib/fact_db/config.rb +71 -0
- data/lib/fact_db/database.rb +45 -0
- data/lib/fact_db/errors.rb +10 -0
- data/lib/fact_db/extractors/base.rb +117 -0
- data/lib/fact_db/extractors/llm_extractor.rb +179 -0
- data/lib/fact_db/extractors/manual_extractor.rb +53 -0
- data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
- data/lib/fact_db/llm/adapter.rb +109 -0
- data/lib/fact_db/models/content.rb +62 -0
- data/lib/fact_db/models/entity.rb +84 -0
- data/lib/fact_db/models/entity_alias.rb +26 -0
- data/lib/fact_db/models/entity_mention.rb +33 -0
- data/lib/fact_db/models/fact.rb +192 -0
- data/lib/fact_db/models/fact_source.rb +35 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
- data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
- data/lib/fact_db/resolution/entity_resolver.rb +261 -0
- data/lib/fact_db/resolution/fact_resolver.rb +259 -0
- data/lib/fact_db/services/content_service.rb +93 -0
- data/lib/fact_db/services/entity_service.rb +150 -0
- data/lib/fact_db/services/fact_service.rb +193 -0
- data/lib/fact_db/temporal/query.rb +125 -0
- data/lib/fact_db/temporal/timeline.rb +134 -0
- data/lib/fact_db/version.rb +5 -0
- data/lib/fact_db.rb +141 -0
- data/mkdocs.yml +198 -0
- metadata +288 -0
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# EntityService
|
|
2
|
+
|
|
3
|
+
Service for creating and resolving entities.
|
|
4
|
+
|
|
5
|
+
## Class: `FactDb::Services::EntityService`
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
service = FactDb::Services::EntityService.new(config)
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Methods
|
|
12
|
+
|
|
13
|
+
### create
|
|
14
|
+
|
|
15
|
+
```ruby
|
|
16
|
+
def create(canonical_name, type:, aliases: [], metadata: {})
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Create a new entity.
|
|
20
|
+
|
|
21
|
+
**Parameters:**
|
|
22
|
+
|
|
23
|
+
- `canonical_name` (String) - Authoritative name
|
|
24
|
+
- `type` (Symbol) - Entity type
|
|
25
|
+
- `aliases` (Array) - Alternative names
|
|
26
|
+
- `metadata` (Hash) - Additional attributes
|
|
27
|
+
|
|
28
|
+
**Returns:** `Models::Entity`
|
|
29
|
+
|
|
30
|
+
**Example:**
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
entity = service.create(
|
|
34
|
+
"Paula Chen",
|
|
35
|
+
type: :person,
|
|
36
|
+
aliases: ["Paula", "P. Chen"],
|
|
37
|
+
metadata: { department: "Engineering" }
|
|
38
|
+
)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
### find
|
|
44
|
+
|
|
45
|
+
```ruby
|
|
46
|
+
def find(id)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Find entity by ID.
|
|
50
|
+
|
|
51
|
+
**Returns:** `Models::Entity`
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
### resolve
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
def resolve(name, type: nil)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Resolve a name to an entity using multiple strategies.
|
|
62
|
+
|
|
63
|
+
**Parameters:**
|
|
64
|
+
|
|
65
|
+
- `name` (String) - Name to resolve
|
|
66
|
+
- `type` (Symbol) - Optional type filter
|
|
67
|
+
|
|
68
|
+
**Returns:** `Models::Entity` or `nil`
|
|
69
|
+
|
|
70
|
+
**Example:**
|
|
71
|
+
|
|
72
|
+
```ruby
|
|
73
|
+
entity = service.resolve("Paula Chen", type: :person)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
### add_alias
|
|
79
|
+
|
|
80
|
+
```ruby
|
|
81
|
+
def add_alias(entity_id, alias_text, type: nil, confidence: 1.0)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Add an alias to an entity.
|
|
85
|
+
|
|
86
|
+
**Example:**
|
|
87
|
+
|
|
88
|
+
```ruby
|
|
89
|
+
service.add_alias(entity.id, "P. Chen", type: :abbreviation)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
### remove_alias
|
|
95
|
+
|
|
96
|
+
```ruby
|
|
97
|
+
def remove_alias(entity_id, alias_text)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Remove an alias from an entity.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
### merge
|
|
105
|
+
|
|
106
|
+
```ruby
|
|
107
|
+
def merge(keep_id, merge_id)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Merge two entities (merge_id into keep_id).
|
|
111
|
+
|
|
112
|
+
**Example:**
|
|
113
|
+
|
|
114
|
+
```ruby
|
|
115
|
+
service.merge(canonical_entity.id, duplicate_entity.id)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
### update
|
|
121
|
+
|
|
122
|
+
```ruby
|
|
123
|
+
def update(id, **attributes)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Update entity attributes.
|
|
127
|
+
|
|
128
|
+
**Example:**
|
|
129
|
+
|
|
130
|
+
```ruby
|
|
131
|
+
service.update(
|
|
132
|
+
entity.id,
|
|
133
|
+
canonical_name: "Paula M. Chen",
|
|
134
|
+
metadata: { title: "Senior Engineer" }
|
|
135
|
+
)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
### search
|
|
141
|
+
|
|
142
|
+
```ruby
|
|
143
|
+
def search(query, type: nil, limit: 20)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Search entities by name.
|
|
147
|
+
|
|
148
|
+
**Parameters:**
|
|
149
|
+
|
|
150
|
+
- `query` (String) - Search query
|
|
151
|
+
- `type` (Symbol) - Optional type filter
|
|
152
|
+
- `limit` (Integer) - Max results
|
|
153
|
+
|
|
154
|
+
**Returns:** `Array<Models::Entity>`
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
### by_type
|
|
159
|
+
|
|
160
|
+
```ruby
|
|
161
|
+
def by_type(type)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Filter entities by type.
|
|
165
|
+
|
|
166
|
+
**Returns:** `ActiveRecord::Relation`
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
### in_content
|
|
171
|
+
|
|
172
|
+
```ruby
|
|
173
|
+
def in_content(content_id)
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Find entities mentioned in a content.
|
|
177
|
+
|
|
178
|
+
**Returns:** `Array<Models::Entity>`
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
### related_to
|
|
183
|
+
|
|
184
|
+
```ruby
|
|
185
|
+
def related_to(entity_id)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Find entities that appear in facts with the given entity.
|
|
189
|
+
|
|
190
|
+
**Returns:** `Array<Models::Entity>`
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
### semantic_search
|
|
195
|
+
|
|
196
|
+
```ruby
|
|
197
|
+
def semantic_search(query, type: nil, limit: 10)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Semantic similarity search using embeddings.
|
|
201
|
+
|
|
202
|
+
**Returns:** `Array<Models::Entity>`
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# FactService
|
|
2
|
+
|
|
3
|
+
Service for extracting and querying facts.
|
|
4
|
+
|
|
5
|
+
## Class: `FactDb::Services::FactService`
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
service = FactDb::Services::FactService.new(config)
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Attributes
|
|
12
|
+
|
|
13
|
+
| Attribute | Type | Description |
|
|
14
|
+
|-----------|------|-------------|
|
|
15
|
+
| `resolver` | FactResolver | For fact resolution operations |
|
|
16
|
+
|
|
17
|
+
## Methods
|
|
18
|
+
|
|
19
|
+
### create
|
|
20
|
+
|
|
21
|
+
```ruby
|
|
22
|
+
def create(fact_text, valid_at:, invalid_at: nil, mentions: [], sources: [], confidence: 1.0, metadata: {})
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Create a new fact.
|
|
26
|
+
|
|
27
|
+
**Parameters:**
|
|
28
|
+
|
|
29
|
+
- `fact_text` (String) - The assertion
|
|
30
|
+
- `valid_at` (Date/Time) - When fact became true
|
|
31
|
+
- `invalid_at` (Date/Time) - When fact stopped (optional)
|
|
32
|
+
- `mentions` (Array) - Entity mentions
|
|
33
|
+
- `sources` (Array) - Source content links
|
|
34
|
+
- `confidence` (Float) - Extraction confidence
|
|
35
|
+
- `metadata` (Hash) - Additional data
|
|
36
|
+
|
|
37
|
+
**Returns:** `Models::Fact`
|
|
38
|
+
|
|
39
|
+
**Example:**
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
fact = service.create(
|
|
43
|
+
"Paula Chen is Principal Engineer",
|
|
44
|
+
valid_at: Date.parse("2024-01-10"),
|
|
45
|
+
mentions: [
|
|
46
|
+
{ entity: paula, role: "subject", text: "Paula Chen" }
|
|
47
|
+
],
|
|
48
|
+
sources: [
|
|
49
|
+
{ content: email, type: "primary" }
|
|
50
|
+
]
|
|
51
|
+
)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
### find
|
|
57
|
+
|
|
58
|
+
```ruby
|
|
59
|
+
def find(id)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Find fact by ID.
|
|
63
|
+
|
|
64
|
+
**Returns:** `Models::Fact`
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
### extract_from_content
|
|
69
|
+
|
|
70
|
+
```ruby
|
|
71
|
+
def extract_from_content(content_id, extractor: config.default_extractor)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Extract facts from content using specified extractor.
|
|
75
|
+
|
|
76
|
+
**Parameters:**
|
|
77
|
+
|
|
78
|
+
- `content_id` (Integer) - Content ID
|
|
79
|
+
- `extractor` (Symbol) - Extractor type (:manual, :llm, :rule_based)
|
|
80
|
+
|
|
81
|
+
**Returns:** `Array<Models::Fact>`
|
|
82
|
+
|
|
83
|
+
**Example:**
|
|
84
|
+
|
|
85
|
+
```ruby
|
|
86
|
+
facts = service.extract_from_content(content.id, extractor: :llm)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
### query
|
|
92
|
+
|
|
93
|
+
```ruby
|
|
94
|
+
def query(topic: nil, at: nil, entity: nil, status: :canonical, from: nil, to: nil, limit: nil)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Query facts with filters.
|
|
98
|
+
|
|
99
|
+
**Parameters:**
|
|
100
|
+
|
|
101
|
+
- `topic` (String) - Text search
|
|
102
|
+
- `at` (Date/Time) - Point in time
|
|
103
|
+
- `entity` (Integer) - Entity ID
|
|
104
|
+
- `status` (Symbol/Array) - Status filter
|
|
105
|
+
- `from` (Date/Time) - Range start
|
|
106
|
+
- `to` (Date/Time) - Range end
|
|
107
|
+
- `limit` (Integer) - Max results
|
|
108
|
+
|
|
109
|
+
**Returns:** `ActiveRecord::Relation`
|
|
110
|
+
|
|
111
|
+
**Example:**
|
|
112
|
+
|
|
113
|
+
```ruby
|
|
114
|
+
# Current facts about Paula
|
|
115
|
+
facts = service.query(entity: paula.id, status: :canonical)
|
|
116
|
+
|
|
117
|
+
# Historical facts
|
|
118
|
+
facts = service.query(entity: paula.id, at: Date.parse("2023-06-15"))
|
|
119
|
+
|
|
120
|
+
# Facts in a range
|
|
121
|
+
facts = service.query(
|
|
122
|
+
entity: paula.id,
|
|
123
|
+
from: Date.parse("2023-01-01"),
|
|
124
|
+
to: Date.parse("2023-12-31")
|
|
125
|
+
)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
### timeline
|
|
131
|
+
|
|
132
|
+
```ruby
|
|
133
|
+
def timeline(entity_id:, from: nil, to: nil)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Build a timeline for an entity.
|
|
137
|
+
|
|
138
|
+
**Returns:** `Array<Models::Fact>`
|
|
139
|
+
|
|
140
|
+
**Example:**
|
|
141
|
+
|
|
142
|
+
```ruby
|
|
143
|
+
timeline = service.timeline(entity_id: paula.id)
|
|
144
|
+
timeline.each do |fact|
|
|
145
|
+
puts "#{fact.valid_at}: #{fact.fact_text}"
|
|
146
|
+
end
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
### from_content
|
|
152
|
+
|
|
153
|
+
```ruby
|
|
154
|
+
def from_content(content_id)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Get facts sourced from specific content.
|
|
158
|
+
|
|
159
|
+
**Returns:** `Array<Models::Fact>`
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
### semantic_search
|
|
164
|
+
|
|
165
|
+
```ruby
|
|
166
|
+
def semantic_search(query, entity: nil, limit: 10)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Semantic similarity search.
|
|
170
|
+
|
|
171
|
+
**Returns:** `Array<Models::Fact>`
|
|
172
|
+
|
|
173
|
+
## Resolver Methods
|
|
174
|
+
|
|
175
|
+
Access via `service.resolver`:
|
|
176
|
+
|
|
177
|
+
### supersede
|
|
178
|
+
|
|
179
|
+
```ruby
|
|
180
|
+
service.resolver.supersede(old_fact_id, new_text, valid_at: date)
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Supersede an existing fact.
|
|
184
|
+
|
|
185
|
+
### synthesize
|
|
186
|
+
|
|
187
|
+
```ruby
|
|
188
|
+
service.resolver.synthesize(source_ids, synthesized_text, valid_at: date)
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Create synthesized fact from multiple sources.
|
|
192
|
+
|
|
193
|
+
### corroborate
|
|
194
|
+
|
|
195
|
+
```ruby
|
|
196
|
+
service.resolver.corroborate(fact_id, corroborating_fact_id)
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Mark fact as corroborated.
|
|
200
|
+
|
|
201
|
+
### invalidate
|
|
202
|
+
|
|
203
|
+
```ruby
|
|
204
|
+
service.resolver.invalidate(fact_id, at: Time.current)
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
Invalidate a fact.
|
|
208
|
+
|
|
209
|
+
### find_conflicts
|
|
210
|
+
|
|
211
|
+
```ruby
|
|
212
|
+
service.resolver.find_conflicts(entity_id: id, topic: text)
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
Find potentially conflicting facts.
|
|
216
|
+
|
|
217
|
+
### resolve_conflict
|
|
218
|
+
|
|
219
|
+
```ruby
|
|
220
|
+
service.resolver.resolve_conflict(keep_id, supersede_ids, reason: text)
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Resolve conflicts by keeping one fact.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Services
|
|
2
|
+
|
|
3
|
+
Services provide the business logic layer for FactDb operations.
|
|
4
|
+
|
|
5
|
+
## Available Services
|
|
6
|
+
|
|
7
|
+
- [ContentService](content-service.md) - Ingest and manage source content
|
|
8
|
+
- [EntityService](entity-service.md) - Create and resolve entities
|
|
9
|
+
- [FactService](fact-service.md) - Extract and query facts
|
|
10
|
+
|
|
11
|
+
## Service Pattern
|
|
12
|
+
|
|
13
|
+
All services follow a common pattern:
|
|
14
|
+
|
|
15
|
+
```ruby
|
|
16
|
+
class SomeService
|
|
17
|
+
attr_reader :config
|
|
18
|
+
|
|
19
|
+
def initialize(config = FactDb.config)
|
|
20
|
+
@config = config
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Business methods...
|
|
24
|
+
end
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Accessing Services
|
|
28
|
+
|
|
29
|
+
### Via Facts
|
|
30
|
+
|
|
31
|
+
```ruby
|
|
32
|
+
facts = FactDb.new
|
|
33
|
+
|
|
34
|
+
facts.content_service.create(text, type: :document)
|
|
35
|
+
facts.entity_service.create("Paula", type: :person)
|
|
36
|
+
facts.fact_service.create("Fact text", valid_at: Date.today)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Directly
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
service = FactDb::Services::ContentService.new(config)
|
|
43
|
+
content = service.create(text, type: :document)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Common Methods
|
|
47
|
+
|
|
48
|
+
All services provide these common methods:
|
|
49
|
+
|
|
50
|
+
| Method | Description |
|
|
51
|
+
|--------|-------------|
|
|
52
|
+
| `find(id)` | Find record by ID |
|
|
53
|
+
| `create(...)` | Create new record |
|
|
54
|
+
| `update(id, ...)` | Update existing record |
|
|
55
|
+
| `search(query)` | Search records |
|