fact_db 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.envrc +1 -0
  3. data/CHANGELOG.md +48 -0
  4. data/COMMITS.md +196 -0
  5. data/README.md +102 -0
  6. data/Rakefile +41 -0
  7. data/db/migrate/001_enable_extensions.rb +7 -0
  8. data/db/migrate/002_create_contents.rb +44 -0
  9. data/db/migrate/003_create_entities.rb +36 -0
  10. data/db/migrate/004_create_entity_aliases.rb +18 -0
  11. data/db/migrate/005_create_facts.rb +65 -0
  12. data/db/migrate/006_create_entity_mentions.rb +18 -0
  13. data/db/migrate/007_create_fact_sources.rb +18 -0
  14. data/docs/api/extractors/index.md +71 -0
  15. data/docs/api/extractors/llm.md +162 -0
  16. data/docs/api/extractors/manual.md +92 -0
  17. data/docs/api/extractors/rule-based.md +165 -0
  18. data/docs/api/facts.md +300 -0
  19. data/docs/api/index.md +66 -0
  20. data/docs/api/models/content.md +165 -0
  21. data/docs/api/models/entity.md +202 -0
  22. data/docs/api/models/fact.md +270 -0
  23. data/docs/api/models/index.md +77 -0
  24. data/docs/api/pipeline/extraction.md +175 -0
  25. data/docs/api/pipeline/index.md +72 -0
  26. data/docs/api/pipeline/resolution.md +209 -0
  27. data/docs/api/services/content-service.md +166 -0
  28. data/docs/api/services/entity-service.md +202 -0
  29. data/docs/api/services/fact-service.md +223 -0
  30. data/docs/api/services/index.md +55 -0
  31. data/docs/architecture/database-schema.md +293 -0
  32. data/docs/architecture/entity-resolution.md +293 -0
  33. data/docs/architecture/index.md +149 -0
  34. data/docs/architecture/temporal-facts.md +268 -0
  35. data/docs/architecture/three-layer-model.md +242 -0
  36. data/docs/assets/css/custom.css +137 -0
  37. data/docs/assets/fact_db.jpg +0 -0
  38. data/docs/assets/images/fact_db.jpg +0 -0
  39. data/docs/concepts.md +183 -0
  40. data/docs/examples/basic-usage.md +235 -0
  41. data/docs/examples/hr-onboarding.md +312 -0
  42. data/docs/examples/index.md +64 -0
  43. data/docs/examples/news-analysis.md +288 -0
  44. data/docs/getting-started/database-setup.md +170 -0
  45. data/docs/getting-started/index.md +71 -0
  46. data/docs/getting-started/installation.md +98 -0
  47. data/docs/getting-started/quick-start.md +191 -0
  48. data/docs/guides/batch-processing.md +325 -0
  49. data/docs/guides/configuration.md +243 -0
  50. data/docs/guides/entity-management.md +364 -0
  51. data/docs/guides/extracting-facts.md +299 -0
  52. data/docs/guides/index.md +22 -0
  53. data/docs/guides/ingesting-content.md +252 -0
  54. data/docs/guides/llm-integration.md +299 -0
  55. data/docs/guides/temporal-queries.md +315 -0
  56. data/docs/index.md +121 -0
  57. data/examples/README.md +130 -0
  58. data/examples/basic_usage.rb +164 -0
  59. data/examples/entity_management.rb +216 -0
  60. data/examples/hr_system.rb +428 -0
  61. data/examples/rule_based_extraction.rb +258 -0
  62. data/examples/temporal_queries.rb +245 -0
  63. data/lib/fact_db/config.rb +71 -0
  64. data/lib/fact_db/database.rb +45 -0
  65. data/lib/fact_db/errors.rb +10 -0
  66. data/lib/fact_db/extractors/base.rb +117 -0
  67. data/lib/fact_db/extractors/llm_extractor.rb +179 -0
  68. data/lib/fact_db/extractors/manual_extractor.rb +53 -0
  69. data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
  70. data/lib/fact_db/llm/adapter.rb +109 -0
  71. data/lib/fact_db/models/content.rb +62 -0
  72. data/lib/fact_db/models/entity.rb +84 -0
  73. data/lib/fact_db/models/entity_alias.rb +26 -0
  74. data/lib/fact_db/models/entity_mention.rb +33 -0
  75. data/lib/fact_db/models/fact.rb +192 -0
  76. data/lib/fact_db/models/fact_source.rb +35 -0
  77. data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
  78. data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
  79. data/lib/fact_db/resolution/entity_resolver.rb +261 -0
  80. data/lib/fact_db/resolution/fact_resolver.rb +259 -0
  81. data/lib/fact_db/services/content_service.rb +93 -0
  82. data/lib/fact_db/services/entity_service.rb +150 -0
  83. data/lib/fact_db/services/fact_service.rb +193 -0
  84. data/lib/fact_db/temporal/query.rb +125 -0
  85. data/lib/fact_db/temporal/timeline.rb +134 -0
  86. data/lib/fact_db/version.rb +5 -0
  87. data/lib/fact_db.rb +141 -0
  88. data/mkdocs.yml +198 -0
  89. metadata +288 -0
@@ -0,0 +1,202 @@
1
+ # EntityService
2
+
3
+ Service for creating and resolving entities.
4
+
5
+ ## Class: `FactDb::Services::EntityService`
6
+
7
+ ```ruby
8
+ service = FactDb::Services::EntityService.new(config)
9
+ ```
10
+
11
+ ## Methods
12
+
13
+ ### create
14
+
15
+ ```ruby
16
+ def create(canonical_name, type:, aliases: [], metadata: {})
17
+ ```
18
+
19
+ Create a new entity.
20
+
21
+ **Parameters:**
22
+
23
+ - `canonical_name` (String) - Authoritative name
24
+ - `type` (Symbol) - Entity type
25
+ - `aliases` (Array) - Alternative names
26
+ - `metadata` (Hash) - Additional attributes
27
+
28
+ **Returns:** `Models::Entity`
29
+
30
+ **Example:**
31
+
32
+ ```ruby
33
+ entity = service.create(
34
+ "Paula Chen",
35
+ type: :person,
36
+ aliases: ["Paula", "P. Chen"],
37
+ metadata: { department: "Engineering" }
38
+ )
39
+ ```
40
+
41
+ ---
42
+
43
+ ### find
44
+
45
+ ```ruby
46
+ def find(id)
47
+ ```
48
+
49
+ Find entity by ID.
50
+
51
+ **Returns:** `Models::Entity`
52
+
53
+ ---
54
+
55
+ ### resolve
56
+
57
+ ```ruby
58
+ def resolve(name, type: nil)
59
+ ```
60
+
61
+ Resolve a name to an entity using multiple strategies.
62
+
63
+ **Parameters:**
64
+
65
+ - `name` (String) - Name to resolve
66
+ - `type` (Symbol) - Optional type filter
67
+
68
+ **Returns:** `Models::Entity` or `nil`
69
+
70
+ **Example:**
71
+
72
+ ```ruby
73
+ entity = service.resolve("Paula Chen", type: :person)
74
+ ```
75
+
76
+ ---
77
+
78
+ ### add_alias
79
+
80
+ ```ruby
81
+ def add_alias(entity_id, alias_text, type: nil, confidence: 1.0)
82
+ ```
83
+
84
+ Add an alias to an entity.
85
+
86
+ **Example:**
87
+
88
+ ```ruby
89
+ service.add_alias(entity.id, "P. Chen", type: :abbreviation)
90
+ ```
91
+
92
+ ---
93
+
94
+ ### remove_alias
95
+
96
+ ```ruby
97
+ def remove_alias(entity_id, alias_text)
98
+ ```
99
+
100
+ Remove an alias from an entity.
101
+
102
+ ---
103
+
104
+ ### merge
105
+
106
+ ```ruby
107
+ def merge(keep_id, merge_id)
108
+ ```
109
+
110
+ Merge two entities (merge_id into keep_id).
111
+
112
+ **Example:**
113
+
114
+ ```ruby
115
+ service.merge(canonical_entity.id, duplicate_entity.id)
116
+ ```
117
+
118
+ ---
119
+
120
+ ### update
121
+
122
+ ```ruby
123
+ def update(id, **attributes)
124
+ ```
125
+
126
+ Update entity attributes.
127
+
128
+ **Example:**
129
+
130
+ ```ruby
131
+ service.update(
132
+ entity.id,
133
+ canonical_name: "Paula M. Chen",
134
+ metadata: { title: "Senior Engineer" }
135
+ )
136
+ ```
137
+
138
+ ---
139
+
140
+ ### search
141
+
142
+ ```ruby
143
+ def search(query, type: nil, limit: 20)
144
+ ```
145
+
146
+ Search entities by name.
147
+
148
+ **Parameters:**
149
+
150
+ - `query` (String) - Search query
151
+ - `type` (Symbol) - Optional type filter
152
+ - `limit` (Integer) - Max results
153
+
154
+ **Returns:** `Array<Models::Entity>`
155
+
156
+ ---
157
+
158
+ ### by_type
159
+
160
+ ```ruby
161
+ def by_type(type)
162
+ ```
163
+
164
+ Filter entities by type.
165
+
166
+ **Returns:** `ActiveRecord::Relation`
167
+
168
+ ---
169
+
170
+ ### in_content
171
+
172
+ ```ruby
173
+ def in_content(content_id)
174
+ ```
175
+
176
+ Find entities mentioned in a content.
177
+
178
+ **Returns:** `Array<Models::Entity>`
179
+
180
+ ---
181
+
182
+ ### related_to
183
+
184
+ ```ruby
185
+ def related_to(entity_id)
186
+ ```
187
+
188
+ Find entities that appear in facts with the given entity.
189
+
190
+ **Returns:** `Array<Models::Entity>`
191
+
192
+ ---
193
+
194
+ ### semantic_search
195
+
196
+ ```ruby
197
+ def semantic_search(query, type: nil, limit: 10)
198
+ ```
199
+
200
+ Semantic similarity search using embeddings.
201
+
202
+ **Returns:** `Array<Models::Entity>`
@@ -0,0 +1,223 @@
1
+ # FactService
2
+
3
+ Service for extracting and querying facts.
4
+
5
+ ## Class: `FactDb::Services::FactService`
6
+
7
+ ```ruby
8
+ service = FactDb::Services::FactService.new(config)
9
+ ```
10
+
11
+ ## Attributes
12
+
13
+ | Attribute | Type | Description |
14
+ |-----------|------|-------------|
15
+ | `resolver` | FactResolver | For fact resolution operations |
16
+
17
+ ## Methods
18
+
19
+ ### create
20
+
21
+ ```ruby
22
+ def create(fact_text, valid_at:, invalid_at: nil, mentions: [], sources: [], confidence: 1.0, metadata: {})
23
+ ```
24
+
25
+ Create a new fact.
26
+
27
+ **Parameters:**
28
+
29
+ - `fact_text` (String) - The assertion
30
+ - `valid_at` (Date/Time) - When fact became true
31
+ - `invalid_at` (Date/Time) - When fact stopped (optional)
32
+ - `mentions` (Array) - Entity mentions
33
+ - `sources` (Array) - Source content links
34
+ - `confidence` (Float) - Extraction confidence
35
+ - `metadata` (Hash) - Additional data
36
+
37
+ **Returns:** `Models::Fact`
38
+
39
+ **Example:**
40
+
41
+ ```ruby
42
+ fact = service.create(
43
+ "Paula Chen is Principal Engineer",
44
+ valid_at: Date.parse("2024-01-10"),
45
+ mentions: [
46
+ { entity: paula, role: "subject", text: "Paula Chen" }
47
+ ],
48
+ sources: [
49
+ { content: email, type: "primary" }
50
+ ]
51
+ )
52
+ ```
53
+
54
+ ---
55
+
56
+ ### find
57
+
58
+ ```ruby
59
+ def find(id)
60
+ ```
61
+
62
+ Find fact by ID.
63
+
64
+ **Returns:** `Models::Fact`
65
+
66
+ ---
67
+
68
+ ### extract_from_content
69
+
70
+ ```ruby
71
+ def extract_from_content(content_id, extractor: config.default_extractor)
72
+ ```
73
+
74
+ Extract facts from content using specified extractor.
75
+
76
+ **Parameters:**
77
+
78
+ - `content_id` (Integer) - Content ID
79
+ - `extractor` (Symbol) - Extractor type (:manual, :llm, :rule_based)
80
+
81
+ **Returns:** `Array<Models::Fact>`
82
+
83
+ **Example:**
84
+
85
+ ```ruby
86
+ facts = service.extract_from_content(content.id, extractor: :llm)
87
+ ```
88
+
89
+ ---
90
+
91
+ ### query
92
+
93
+ ```ruby
94
+ def query(topic: nil, at: nil, entity: nil, status: :canonical, from: nil, to: nil, limit: nil)
95
+ ```
96
+
97
+ Query facts with filters.
98
+
99
+ **Parameters:**
100
+
101
+ - `topic` (String) - Text search
102
+ - `at` (Date/Time) - Point in time
103
+ - `entity` (Integer) - Entity ID
104
+ - `status` (Symbol/Array) - Status filter
105
+ - `from` (Date/Time) - Range start
106
+ - `to` (Date/Time) - Range end
107
+ - `limit` (Integer) - Max results
108
+
109
+ **Returns:** `ActiveRecord::Relation`
110
+
111
+ **Example:**
112
+
113
+ ```ruby
114
+ # Current facts about Paula
115
+ facts = service.query(entity: paula.id, status: :canonical)
116
+
117
+ # Historical facts
118
+ facts = service.query(entity: paula.id, at: Date.parse("2023-06-15"))
119
+
120
+ # Facts in a range
121
+ facts = service.query(
122
+ entity: paula.id,
123
+ from: Date.parse("2023-01-01"),
124
+ to: Date.parse("2023-12-31")
125
+ )
126
+ ```
127
+
128
+ ---
129
+
130
+ ### timeline
131
+
132
+ ```ruby
133
+ def timeline(entity_id:, from: nil, to: nil)
134
+ ```
135
+
136
+ Build a timeline for an entity.
137
+
138
+ **Returns:** `Array<Models::Fact>`
139
+
140
+ **Example:**
141
+
142
+ ```ruby
143
+ timeline = service.timeline(entity_id: paula.id)
144
+ timeline.each do |fact|
145
+ puts "#{fact.valid_at}: #{fact.fact_text}"
146
+ end
147
+ ```
148
+
149
+ ---
150
+
151
+ ### from_content
152
+
153
+ ```ruby
154
+ def from_content(content_id)
155
+ ```
156
+
157
+ Get facts sourced from specific content.
158
+
159
+ **Returns:** `Array<Models::Fact>`
160
+
161
+ ---
162
+
163
+ ### semantic_search
164
+
165
+ ```ruby
166
+ def semantic_search(query, entity: nil, limit: 10)
167
+ ```
168
+
169
+ Semantic similarity search.
170
+
171
+ **Returns:** `Array<Models::Fact>`
172
+
173
+ ## Resolver Methods
174
+
175
+ Access via `service.resolver`:
176
+
177
+ ### supersede
178
+
179
+ ```ruby
180
+ service.resolver.supersede(old_fact_id, new_text, valid_at: date)
181
+ ```
182
+
183
+ Supersede an existing fact.
184
+
185
+ ### synthesize
186
+
187
+ ```ruby
188
+ service.resolver.synthesize(source_ids, synthesized_text, valid_at: date)
189
+ ```
190
+
191
+ Create synthesized fact from multiple sources.
192
+
193
+ ### corroborate
194
+
195
+ ```ruby
196
+ service.resolver.corroborate(fact_id, corroborating_fact_id)
197
+ ```
198
+
199
+ Mark fact as corroborated.
200
+
201
+ ### invalidate
202
+
203
+ ```ruby
204
+ service.resolver.invalidate(fact_id, at: Time.current)
205
+ ```
206
+
207
+ Invalidate a fact.
208
+
209
+ ### find_conflicts
210
+
211
+ ```ruby
212
+ service.resolver.find_conflicts(entity_id: id, topic: text)
213
+ ```
214
+
215
+ Find potentially conflicting facts.
216
+
217
+ ### resolve_conflict
218
+
219
+ ```ruby
220
+ service.resolver.resolve_conflict(keep_id, supersede_ids, reason: text)
221
+ ```
222
+
223
+ Resolve conflicts by keeping one fact.
@@ -0,0 +1,55 @@
1
+ # Services
2
+
3
+ Services provide the business logic layer for FactDb operations.
4
+
5
+ ## Available Services
6
+
7
+ - [ContentService](content-service.md) - Ingest and manage source content
8
+ - [EntityService](entity-service.md) - Create and resolve entities
9
+ - [FactService](fact-service.md) - Extract and query facts
10
+
11
+ ## Service Pattern
12
+
13
+ All services follow a common pattern:
14
+
15
+ ```ruby
16
+ class SomeService
17
+ attr_reader :config
18
+
19
+ def initialize(config = FactDb.config)
20
+ @config = config
21
+ end
22
+
23
+ # Business methods...
24
+ end
25
+ ```
26
+
27
+ ## Accessing Services
28
+
29
+ ### Via Facts
30
+
31
+ ```ruby
32
+ facts = FactDb.new
33
+
34
+ facts.content_service.create(text, type: :document)
35
+ facts.entity_service.create("Paula", type: :person)
36
+ facts.fact_service.create("Fact text", valid_at: Date.today)
37
+ ```
38
+
39
+ ### Directly
40
+
41
+ ```ruby
42
+ service = FactDb::Services::ContentService.new(config)
43
+ content = service.create(text, type: :document)
44
+ ```
45
+
46
+ ## Common Methods
47
+
48
+ All services provide these common methods:
49
+
50
+ | Method | Description |
51
+ |--------|-------------|
52
+ | `find(id)` | Find record by ID |
53
+ | `create(...)` | Create new record |
54
+ | `update(id, ...)` | Update existing record |
55
+ | `search(query)` | Search records |