fact_db 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +2 -0
  3. data/.yardopts +5 -0
  4. data/CHANGELOG.md +64 -0
  5. data/README.md +107 -6
  6. data/Rakefile +243 -10
  7. data/db/migrate/001_enable_extensions.rb +1 -0
  8. data/db/migrate/002_create_sources.rb +49 -0
  9. data/db/migrate/003_create_entities.rb +27 -15
  10. data/db/migrate/004_create_entity_aliases.rb +20 -7
  11. data/db/migrate/005_create_facts.rb +37 -21
  12. data/db/migrate/006_create_entity_mentions.rb +14 -6
  13. data/db/migrate/007_create_fact_sources.rb +16 -8
  14. data/docs/api/extractors/index.md +5 -5
  15. data/docs/api/extractors/llm.md +17 -17
  16. data/docs/api/extractors/rule-based.md +14 -14
  17. data/docs/api/facts.md +20 -20
  18. data/docs/api/index.md +4 -4
  19. data/docs/api/models/entity.md +21 -21
  20. data/docs/api/models/fact.md +15 -15
  21. data/docs/api/models/index.md +7 -7
  22. data/docs/api/models/{content.md → source.md} +29 -29
  23. data/docs/api/pipeline/extraction.md +25 -25
  24. data/docs/api/pipeline/index.md +1 -1
  25. data/docs/api/pipeline/resolution.md +4 -4
  26. data/docs/api/services/entity-service.md +20 -20
  27. data/docs/api/services/fact-service.md +12 -12
  28. data/docs/api/services/index.md +5 -5
  29. data/docs/api/services/{content-service.md → source-service.md} +27 -27
  30. data/docs/architecture/database-schema.md +46 -46
  31. data/docs/architecture/entity-resolution.md +6 -6
  32. data/docs/architecture/index.md +10 -10
  33. data/docs/architecture/temporal-facts.md +5 -5
  34. data/docs/architecture/three-layer-model.md +17 -17
  35. data/docs/concepts.md +6 -6
  36. data/docs/examples/basic-usage.md +20 -20
  37. data/docs/examples/hr-onboarding.md +17 -17
  38. data/docs/examples/index.md +4 -4
  39. data/docs/examples/news-analysis.md +23 -23
  40. data/docs/getting-started/database-setup.md +28 -20
  41. data/docs/getting-started/index.md +3 -3
  42. data/docs/getting-started/quick-start.md +33 -30
  43. data/docs/guides/batch-processing.md +26 -26
  44. data/docs/guides/configuration.md +158 -77
  45. data/docs/guides/entity-management.md +14 -14
  46. data/docs/guides/extracting-facts.md +28 -28
  47. data/docs/guides/ingesting-content.md +14 -14
  48. data/docs/guides/llm-integration.md +40 -32
  49. data/docs/guides/temporal-queries.md +11 -11
  50. data/docs/index.md +6 -2
  51. data/examples/.envrc +4 -0
  52. data/examples/.gitignore +1 -0
  53. data/examples/001_configuration.rb +312 -0
  54. data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
  55. data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
  56. data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
  57. data/examples/040_output_formats.rb +177 -0
  58. data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
  59. data/examples/060_fluent_temporal_api.rb +217 -0
  60. data/examples/070_introspection.rb +252 -0
  61. data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
  62. data/examples/090_ingest_demo.rb +515 -0
  63. data/examples/100_query_context.rb +668 -0
  64. data/examples/110_prove_it.rb +204 -0
  65. data/examples/120_dump_database.rb +358 -0
  66. data/examples/130_rag_feedback_loop.rb +858 -0
  67. data/examples/README.md +229 -15
  68. data/examples/data/lincoln_associates.md +201 -0
  69. data/examples/data/lincoln_biography.md +66 -0
  70. data/examples/data/lincoln_cabinet.md +243 -0
  71. data/examples/data/lincoln_family.md +163 -0
  72. data/examples/data/lincoln_military.md +241 -0
  73. data/examples/data/lincoln_todd_family.md +136 -0
  74. data/examples/ingest_reporter.rb +335 -0
  75. data/examples/utilities.rb +182 -0
  76. data/lib/fact_db/config/defaults.yml +254 -0
  77. data/lib/fact_db/config.rb +94 -35
  78. data/lib/fact_db/database.rb +98 -8
  79. data/lib/fact_db/extractors/base.rb +106 -21
  80. data/lib/fact_db/extractors/llm_extractor.rb +35 -63
  81. data/lib/fact_db/extractors/manual_extractor.rb +46 -6
  82. data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
  83. data/lib/fact_db/llm/adapter.rb +3 -3
  84. data/lib/fact_db/models/entity.rb +94 -22
  85. data/lib/fact_db/models/entity_alias.rb +41 -7
  86. data/lib/fact_db/models/entity_mention.rb +34 -1
  87. data/lib/fact_db/models/fact.rb +259 -28
  88. data/lib/fact_db/models/fact_source.rb +43 -9
  89. data/lib/fact_db/models/source.rb +113 -0
  90. data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
  91. data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
  92. data/lib/fact_db/query_result.rb +202 -0
  93. data/lib/fact_db/resolution/entity_resolver.rb +139 -39
  94. data/lib/fact_db/resolution/fact_resolver.rb +86 -14
  95. data/lib/fact_db/services/entity_service.rb +246 -37
  96. data/lib/fact_db/services/fact_service.rb +254 -17
  97. data/lib/fact_db/services/source_service.rb +164 -0
  98. data/lib/fact_db/temporal/query.rb +71 -7
  99. data/lib/fact_db/temporal/query_builder.rb +69 -0
  100. data/lib/fact_db/temporal/timeline.rb +102 -11
  101. data/lib/fact_db/transformers/base.rb +77 -0
  102. data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
  103. data/lib/fact_db/transformers/json_transformer.rb +17 -0
  104. data/lib/fact_db/transformers/raw_transformer.rb +35 -0
  105. data/lib/fact_db/transformers/text_transformer.rb +114 -0
  106. data/lib/fact_db/transformers/triple_transformer.rb +138 -0
  107. data/lib/fact_db/validation/alias_filter.rb +185 -0
  108. data/lib/fact_db/version.rb +1 -1
  109. data/lib/fact_db.rb +281 -30
  110. data/mkdocs.yml +2 -2
  111. metadata +60 -16
  112. data/db/migrate/002_create_contents.rb +0 -44
  113. data/lib/fact_db/models/content.rb +0 -62
  114. data/lib/fact_db/services/content_service.rb +0 -93
@@ -13,15 +13,15 @@ service = FactDb::Services::EntityService.new(config)
13
13
  ### create
14
14
 
15
15
  ```ruby
16
- def create(canonical_name, type:, aliases: [], metadata: {})
16
+ def create(name, kind:, aliases: [], metadata: {})
17
17
  ```
18
18
 
19
19
  Create a new entity.
20
20
 
21
21
  **Parameters:**
22
22
 
23
- - `canonical_name` (String) - Authoritative name
24
- - `type` (Symbol) - Entity type
23
+ - `name` (String) - Authoritative name
24
+ - `kind` (Symbol) - Entity kind
25
25
  - `aliases` (Array) - Alternative names
26
26
  - `metadata` (Hash) - Additional attributes
27
27
 
@@ -32,7 +32,7 @@ Create a new entity.
32
32
  ```ruby
33
33
  entity = service.create(
34
34
  "Paula Chen",
35
- type: :person,
35
+ kind: :person,
36
36
  aliases: ["Paula", "P. Chen"],
37
37
  metadata: { department: "Engineering" }
38
38
  )
@@ -55,7 +55,7 @@ Find entity by ID.
55
55
  ### resolve
56
56
 
57
57
  ```ruby
58
- def resolve(name, type: nil)
58
+ def resolve(name, kind: nil)
59
59
  ```
60
60
 
61
61
  Resolve a name to an entity using multiple strategies.
@@ -63,14 +63,14 @@ Resolve a name to an entity using multiple strategies.
63
63
  **Parameters:**
64
64
 
65
65
  - `name` (String) - Name to resolve
66
- - `type` (Symbol) - Optional type filter
66
+ - `kind` (Symbol) - Optional kind filter
67
67
 
68
68
  **Returns:** `Models::Entity` or `nil`
69
69
 
70
70
  **Example:**
71
71
 
72
72
  ```ruby
73
- entity = service.resolve("Paula Chen", type: :person)
73
+ entity = service.resolve("Paula Chen", kind: :person)
74
74
  ```
75
75
 
76
76
  ---
@@ -78,7 +78,7 @@ entity = service.resolve("Paula Chen", type: :person)
78
78
  ### add_alias
79
79
 
80
80
  ```ruby
81
- def add_alias(entity_id, alias_text, type: nil, confidence: 1.0)
81
+ def add_alias(entity_id, alias_name, kind: nil, confidence: 1.0)
82
82
  ```
83
83
 
84
84
  Add an alias to an entity.
@@ -86,7 +86,7 @@ Add an alias to an entity.
86
86
  **Example:**
87
87
 
88
88
  ```ruby
89
- service.add_alias(entity.id, "P. Chen", type: :abbreviation)
89
+ service.add_alias(entity.id, "P. Chen", kind: :abbreviation)
90
90
  ```
91
91
 
92
92
  ---
@@ -94,7 +94,7 @@ service.add_alias(entity.id, "P. Chen", type: :abbreviation)
94
94
  ### remove_alias
95
95
 
96
96
  ```ruby
97
- def remove_alias(entity_id, alias_text)
97
+ def remove_alias(entity_id, alias_name)
98
98
  ```
99
99
 
100
100
  Remove an alias from an entity.
@@ -130,7 +130,7 @@ Update entity attributes.
130
130
  ```ruby
131
131
  service.update(
132
132
  entity.id,
133
- canonical_name: "Paula M. Chen",
133
+ name: "Paula M. Chen",
134
134
  metadata: { title: "Senior Engineer" }
135
135
  )
136
136
  ```
@@ -140,7 +140,7 @@ service.update(
140
140
  ### search
141
141
 
142
142
  ```ruby
143
- def search(query, type: nil, limit: 20)
143
+ def search(query, kind: nil, limit: 20)
144
144
  ```
145
145
 
146
146
  Search entities by name.
@@ -148,32 +148,32 @@ Search entities by name.
148
148
  **Parameters:**
149
149
 
150
150
  - `query` (String) - Search query
151
- - `type` (Symbol) - Optional type filter
151
+ - `kind` (Symbol) - Optional kind filter
152
152
  - `limit` (Integer) - Max results
153
153
 
154
154
  **Returns:** `Array<Models::Entity>`
155
155
 
156
156
  ---
157
157
 
158
- ### by_type
158
+ ### by_kind
159
159
 
160
160
  ```ruby
161
- def by_type(type)
161
+ def by_kind(kind)
162
162
  ```
163
163
 
164
- Filter entities by type.
164
+ Filter entities by kind.
165
165
 
166
166
  **Returns:** `ActiveRecord::Relation`
167
167
 
168
168
  ---
169
169
 
170
- ### in_content
170
+ ### in_source
171
171
 
172
172
  ```ruby
173
- def in_content(content_id)
173
+ def in_source(source_id)
174
174
  ```
175
175
 
176
- Find entities mentioned in a content.
176
+ Find entities mentioned in a source.
177
177
 
178
178
  **Returns:** `Array<Models::Entity>`
179
179
 
@@ -194,7 +194,7 @@ Find entities that appear in facts with the given entity.
194
194
  ### semantic_search
195
195
 
196
196
  ```ruby
197
- def semantic_search(query, type: nil, limit: 10)
197
+ def semantic_search(query, kind: nil, limit: 10)
198
198
  ```
199
199
 
200
200
  Semantic similarity search using embeddings.
@@ -19,14 +19,14 @@ service = FactDb::Services::FactService.new(config)
19
19
  ### create
20
20
 
21
21
  ```ruby
22
- def create(fact_text, valid_at:, invalid_at: nil, mentions: [], sources: [], confidence: 1.0, metadata: {})
22
+ def create(text, valid_at:, invalid_at: nil, mentions: [], sources: [], confidence: 1.0, metadata: {})
23
23
  ```
24
24
 
25
25
  Create a new fact.
26
26
 
27
27
  **Parameters:**
28
28
 
29
- - `fact_text` (String) - The assertion
29
+ - `text` (String) - The assertion
30
30
  - `valid_at` (Date/Time) - When fact became true
31
31
  - `invalid_at` (Date/Time) - When fact stopped (optional)
32
32
  - `mentions` (Array) - Entity mentions
@@ -46,7 +46,7 @@ fact = service.create(
46
46
  { entity: paula, role: "subject", text: "Paula Chen" }
47
47
  ],
48
48
  sources: [
49
- { content: email, type: "primary" }
49
+ { source: email, type: "primary" }
50
50
  ]
51
51
  )
52
52
  ```
@@ -65,17 +65,17 @@ Find fact by ID.
65
65
 
66
66
  ---
67
67
 
68
- ### extract_from_content
68
+ ### extract_from_source
69
69
 
70
70
  ```ruby
71
- def extract_from_content(content_id, extractor: config.default_extractor)
71
+ def extract_from_source(source_id, extractor: config.default_extractor)
72
72
  ```
73
73
 
74
- Extract facts from content using specified extractor.
74
+ Extract facts from source using specified extractor.
75
75
 
76
76
  **Parameters:**
77
77
 
78
- - `content_id` (Integer) - Content ID
78
+ - `source_id` (Integer) - Source ID
79
79
  - `extractor` (Symbol) - Extractor type (:manual, :llm, :rule_based)
80
80
 
81
81
  **Returns:** `Array<Models::Fact>`
@@ -83,7 +83,7 @@ Extract facts from content using specified extractor.
83
83
  **Example:**
84
84
 
85
85
  ```ruby
86
- facts = service.extract_from_content(content.id, extractor: :llm)
86
+ facts = service.extract_from_source(source.id, extractor: :llm)
87
87
  ```
88
88
 
89
89
  ---
@@ -142,19 +142,19 @@ Build a timeline for an entity.
142
142
  ```ruby
143
143
  timeline = service.timeline(entity_id: paula.id)
144
144
  timeline.each do |fact|
145
- puts "#{fact.valid_at}: #{fact.fact_text}"
145
+ puts "#{fact.valid_at}: #{fact.text}"
146
146
  end
147
147
  ```
148
148
 
149
149
  ---
150
150
 
151
- ### from_content
151
+ ### from_source
152
152
 
153
153
  ```ruby
154
- def from_content(content_id)
154
+ def from_source(source_id)
155
155
  ```
156
156
 
157
- Get facts sourced from specific content.
157
+ Get facts sourced from specific source.
158
158
 
159
159
  **Returns:** `Array<Models::Fact>`
160
160
 
@@ -4,7 +4,7 @@ Services provide the business logic layer for FactDb operations.
4
4
 
5
5
  ## Available Services
6
6
 
7
- - [ContentService](content-service.md) - Ingest and manage source content
7
+ - [SourceService](source-service.md) - Ingest and manage source content
8
8
  - [EntityService](entity-service.md) - Create and resolve entities
9
9
  - [FactService](fact-service.md) - Extract and query facts
10
10
 
@@ -26,12 +26,12 @@ end
26
26
 
27
27
  ## Accessing Services
28
28
 
29
- ### Via Facts
29
+ ### Via FactDb
30
30
 
31
31
  ```ruby
32
32
  facts = FactDb.new
33
33
 
34
- facts.content_service.create(text, type: :document)
34
+ facts.source_service.create(text, type: :document)
35
35
  facts.entity_service.create("Paula", type: :person)
36
36
  facts.fact_service.create("Fact text", valid_at: Date.today)
37
37
  ```
@@ -39,8 +39,8 @@ facts.fact_service.create("Fact text", valid_at: Date.today)
39
39
  ### Directly
40
40
 
41
41
  ```ruby
42
- service = FactDb::Services::ContentService.new(config)
43
- content = service.create(text, type: :document)
42
+ service = FactDb::Services::SourceService.new(config)
43
+ source = service.create(text, type: :document)
44
44
  ```
45
45
 
46
46
  ## Common Methods
@@ -1,11 +1,11 @@
1
- # ContentService
1
+ # SourceService
2
2
 
3
3
  Service for ingesting and managing source content.
4
4
 
5
- ## Class: `FactDb::Services::ContentService`
5
+ ## Class: `FactDb::Services::SourceService`
6
6
 
7
7
  ```ruby
8
- service = FactDb::Services::ContentService.new(config)
8
+ service = FactDb::Services::SourceService.new(config)
9
9
  ```
10
10
 
11
11
  ## Methods
@@ -13,28 +13,28 @@ service = FactDb::Services::ContentService.new(config)
13
13
  ### create
14
14
 
15
15
  ```ruby
16
- def create(raw_text, type:, captured_at: Time.current, metadata: {}, title: nil, source_uri: nil)
16
+ def create(content, kind:, captured_at: Time.current, metadata: {}, title: nil, source_uri: nil)
17
17
  ```
18
18
 
19
- Create new content with automatic deduplication.
19
+ Create new source with automatic deduplication.
20
20
 
21
21
  **Parameters:**
22
22
 
23
- - `raw_text` (String) - Content text
24
- - `type` (Symbol) - Content type
23
+ - `content` (String) - Source text content
24
+ - `kind` (Symbol) - Content kind
25
25
  - `captured_at` (Time) - Capture timestamp
26
26
  - `metadata` (Hash) - Additional metadata
27
27
  - `title` (String) - Optional title
28
28
  - `source_uri` (String) - Original location
29
29
 
30
- **Returns:** `Models::Content`
30
+ **Returns:** `Models::Source`
31
31
 
32
32
  **Example:**
33
33
 
34
34
  ```ruby
35
- content = service.create(
35
+ source = service.create(
36
36
  "Email body text...",
37
- type: :email,
37
+ kind: :email,
38
38
  title: "RE: Important",
39
39
  metadata: { from: "sender@example.com" }
40
40
  )
@@ -48,9 +48,9 @@ content = service.create(
48
48
  def find(id)
49
49
  ```
50
50
 
51
- Find content by ID.
51
+ Find source by ID.
52
52
 
53
- **Returns:** `Models::Content`
53
+ **Returns:** `Models::Source`
54
54
 
55
55
  ---
56
56
 
@@ -60,15 +60,15 @@ Find content by ID.
60
60
  def find_by_hash(hash)
61
61
  ```
62
62
 
63
- Find content by SHA256 hash.
63
+ Find source by SHA256 hash.
64
64
 
65
- **Returns:** `Models::Content` or `nil`
65
+ **Returns:** `Models::Source` or `nil`
66
66
 
67
67
  **Example:**
68
68
 
69
69
  ```ruby
70
70
  hash = Digest::SHA256.hexdigest(text)
71
- content = service.find_by_hash(hash)
71
+ source = service.find_by_hash(hash)
72
72
  ```
73
73
 
74
74
  ---
@@ -79,14 +79,14 @@ content = service.find_by_hash(hash)
79
79
  def search(query, limit: 20)
80
80
  ```
81
81
 
82
- Full-text search content.
82
+ Full-text search sources.
83
83
 
84
84
  **Parameters:**
85
85
 
86
86
  - `query` (String) - Search query
87
87
  - `limit` (Integer) - Max results
88
88
 
89
- **Returns:** `Array<Models::Content>`
89
+ **Returns:** `Array<Models::Source>`
90
90
 
91
91
  **Example:**
92
92
 
@@ -109,7 +109,7 @@ Semantic similarity search using embeddings.
109
109
  - `query` (String) - Search query
110
110
  - `limit` (Integer) - Max results
111
111
 
112
- **Returns:** `Array<Models::Content>`
112
+ **Returns:** `Array<Models::Source>`
113
113
 
114
114
  **Example:**
115
115
 
@@ -119,20 +119,20 @@ results = service.semantic_search("financial performance")
119
119
 
120
120
  ---
121
121
 
122
- ### by_type
122
+ ### by_kind
123
123
 
124
124
  ```ruby
125
- def by_type(type)
125
+ def by_kind(kind)
126
126
  ```
127
127
 
128
- Filter content by type.
128
+ Filter sources by kind.
129
129
 
130
130
  **Returns:** `ActiveRecord::Relation`
131
131
 
132
132
  **Example:**
133
133
 
134
134
  ```ruby
135
- emails = service.by_type(:email)
135
+ emails = service.by_kind(:email)
136
136
  ```
137
137
 
138
138
  ---
@@ -143,9 +143,9 @@ emails = service.by_type(:email)
143
143
  def recent(limit: 20)
144
144
  ```
145
145
 
146
- Get recently captured content.
146
+ Get recently captured sources.
147
147
 
148
- **Returns:** `Array<Models::Content>`
148
+ **Returns:** `Array<Models::Source>`
149
149
 
150
150
  ---
151
151
 
@@ -155,12 +155,12 @@ Get recently captured content.
155
155
  def mentioning_entity(entity_id)
156
156
  ```
157
157
 
158
- Find content that mentions an entity (via facts).
158
+ Find sources that mention an entity (via facts).
159
159
 
160
- **Returns:** `Array<Models::Content>`
160
+ **Returns:** `Array<Models::Source>`
161
161
 
162
162
  **Example:**
163
163
 
164
164
  ```ruby
165
- paula_content = service.mentioning_entity(paula.id)
165
+ paula_sources = service.mentioning_entity(paula.id)
166
166
  ```
@@ -6,21 +6,21 @@ FactDb uses PostgreSQL with the pgvector extension for semantic search capabilit
6
6
 
7
7
  ```mermaid
8
8
  erDiagram
9
- contents ||--o{ fact_sources : "sourced by"
9
+ sources ||--o{ fact_sources : "sourced by"
10
10
  entities ||--o{ entity_aliases : "has"
11
11
  entities ||--o{ entity_mentions : "mentioned in"
12
12
  facts ||--o{ entity_mentions : "mentions"
13
13
  facts ||--o{ fact_sources : "sourced from"
14
14
  facts ||--o| facts : "superseded by"
15
15
 
16
- contents {
16
+ sources {
17
17
  bigint id PK
18
18
  string content_hash UK
19
- string content_type
20
- text raw_text
19
+ string type
20
+ text content
21
21
  string title
22
22
  string source_uri
23
- jsonb source_metadata
23
+ jsonb metadata
24
24
  vector embedding
25
25
  timestamptz captured_at
26
26
  timestamptz created_at
@@ -28,10 +28,10 @@ erDiagram
28
28
 
29
29
  entities {
30
30
  bigint id PK
31
- string canonical_name
32
- string entity_type
31
+ string name
32
+ string type
33
33
  string resolution_status
34
- bigint merged_into_id FK
34
+ bigint canonical_id FK
35
35
  jsonb metadata
36
36
  vector embedding
37
37
  timestamptz created_at
@@ -40,15 +40,15 @@ erDiagram
40
40
  entity_aliases {
41
41
  bigint id PK
42
42
  bigint entity_id FK
43
- string alias_text
44
- string alias_type
43
+ string name
44
+ string type
45
45
  float confidence
46
46
  }
47
47
 
48
48
  facts {
49
49
  bigint id PK
50
- text fact_text
51
- string fact_hash
50
+ text text
51
+ string digest
52
52
  timestamptz valid_at
53
53
  timestamptz invalid_at
54
54
  string status
@@ -74,8 +74,8 @@ erDiagram
74
74
  fact_sources {
75
75
  bigint id PK
76
76
  bigint fact_id FK
77
- bigint content_id FK
78
- string source_type
77
+ bigint source_id FK
78
+ string kind
79
79
  text excerpt
80
80
  float confidence
81
81
  }
@@ -83,28 +83,28 @@ erDiagram
83
83
 
84
84
  ## Tables
85
85
 
86
- ### contents
86
+ ### sources
87
87
 
88
- Stores immutable source documents.
88
+ Stores immutable source content.
89
89
 
90
90
  ```sql
91
- CREATE TABLE contents (
91
+ CREATE TABLE sources (
92
92
  id BIGSERIAL PRIMARY KEY,
93
93
  content_hash VARCHAR(64) NOT NULL UNIQUE,
94
- content_type VARCHAR(50) NOT NULL,
95
- raw_text TEXT NOT NULL,
94
+ type VARCHAR(50) NOT NULL,
95
+ content TEXT NOT NULL,
96
96
  title VARCHAR(255),
97
97
  source_uri TEXT,
98
- source_metadata JSONB NOT NULL DEFAULT '{}',
98
+ metadata JSONB NOT NULL DEFAULT '{}',
99
99
  embedding VECTOR(1536),
100
100
  captured_at TIMESTAMPTZ NOT NULL,
101
101
  created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
102
102
  );
103
103
 
104
- CREATE INDEX idx_contents_type ON contents(content_type);
105
- CREATE INDEX idx_contents_captured ON contents(captured_at);
106
- CREATE INDEX idx_contents_text ON contents USING gin(to_tsvector('english', raw_text));
107
- CREATE INDEX idx_contents_embedding ON contents USING hnsw(embedding vector_cosine_ops);
104
+ CREATE INDEX idx_sources_type ON sources(type);
105
+ CREATE INDEX idx_sources_captured ON sources(captured_at);
106
+ CREATE INDEX idx_sources_text ON sources USING gin(to_tsvector('english', content));
107
+ CREATE INDEX idx_sources_embedding ON sources USING hnsw(embedding vector_cosine_ops);
108
108
  ```
109
109
 
110
110
  ### entities
@@ -114,17 +114,17 @@ Stores resolved identities.
114
114
  ```sql
115
115
  CREATE TABLE entities (
116
116
  id BIGSERIAL PRIMARY KEY,
117
- canonical_name VARCHAR(255) NOT NULL,
118
- entity_type VARCHAR(50) NOT NULL,
117
+ name VARCHAR(255) NOT NULL,
118
+ type VARCHAR(50) NOT NULL,
119
119
  resolution_status VARCHAR(20) NOT NULL DEFAULT 'unresolved',
120
- merged_into_id BIGINT REFERENCES entities(id),
120
+ canonical_id BIGINT REFERENCES entities(id),
121
121
  metadata JSONB NOT NULL DEFAULT '{}',
122
122
  embedding VECTOR(1536),
123
123
  created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
124
124
  );
125
125
 
126
- CREATE INDEX idx_entities_name ON entities(canonical_name);
127
- CREATE INDEX idx_entities_type ON entities(entity_type);
126
+ CREATE INDEX idx_entities_name ON entities(name);
127
+ CREATE INDEX idx_entities_type ON entities(type);
128
128
  CREATE INDEX idx_entities_status ON entities(resolution_status);
129
129
  CREATE INDEX idx_entities_embedding ON entities USING hnsw(embedding vector_cosine_ops);
130
130
  ```
@@ -137,14 +137,14 @@ Stores alternative names for entities.
137
137
  CREATE TABLE entity_aliases (
138
138
  id BIGSERIAL PRIMARY KEY,
139
139
  entity_id BIGINT NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
140
- alias_text VARCHAR(255) NOT NULL,
141
- alias_type VARCHAR(50),
140
+ name VARCHAR(255) NOT NULL,
141
+ type VARCHAR(50),
142
142
  confidence FLOAT DEFAULT 1.0
143
143
  );
144
144
 
145
145
  CREATE INDEX idx_aliases_entity ON entity_aliases(entity_id);
146
- CREATE INDEX idx_aliases_text ON entity_aliases(alias_text);
147
- CREATE UNIQUE INDEX idx_aliases_unique ON entity_aliases(entity_id, alias_text);
146
+ CREATE INDEX idx_aliases_text ON entity_aliases(name);
147
+ CREATE UNIQUE INDEX idx_aliases_unique ON entity_aliases(entity_id, name);
148
148
  ```
149
149
 
150
150
  ### facts
@@ -154,8 +154,8 @@ Stores temporal assertions.
154
154
  ```sql
155
155
  CREATE TABLE facts (
156
156
  id BIGSERIAL PRIMARY KEY,
157
- fact_text TEXT NOT NULL,
158
- fact_hash VARCHAR(64) NOT NULL,
157
+ text TEXT NOT NULL,
158
+ digest VARCHAR(64) NOT NULL,
159
159
  valid_at TIMESTAMPTZ NOT NULL,
160
160
  invalid_at TIMESTAMPTZ,
161
161
  status VARCHAR(20) NOT NULL DEFAULT 'canonical',
@@ -174,7 +174,7 @@ CREATE INDEX idx_facts_valid ON facts(valid_at);
174
174
  CREATE INDEX idx_facts_invalid ON facts(invalid_at);
175
175
  CREATE INDEX idx_facts_temporal ON facts(valid_at, invalid_at);
176
176
  CREATE INDEX idx_facts_method ON facts(extraction_method);
177
- CREATE INDEX idx_facts_text ON facts USING gin(to_tsvector('english', fact_text));
177
+ CREATE INDEX idx_facts_text ON facts USING gin(to_tsvector('english', text));
178
178
  CREATE INDEX idx_facts_embedding ON facts USING hnsw(embedding vector_cosine_ops);
179
179
  ```
180
180
 
@@ -205,15 +205,15 @@ Links facts to source content.
205
205
  CREATE TABLE fact_sources (
206
206
  id BIGSERIAL PRIMARY KEY,
207
207
  fact_id BIGINT NOT NULL REFERENCES facts(id) ON DELETE CASCADE,
208
- content_id BIGINT NOT NULL REFERENCES contents(id),
209
- source_type VARCHAR(50) NOT NULL DEFAULT 'primary',
208
+ source_id BIGINT NOT NULL REFERENCES sources(id),
209
+ kind VARCHAR(50) NOT NULL DEFAULT 'primary',
210
210
  excerpt TEXT,
211
211
  confidence FLOAT DEFAULT 1.0
212
212
  );
213
213
 
214
- CREATE INDEX idx_sources_fact ON fact_sources(fact_id);
215
- CREATE INDEX idx_sources_content ON fact_sources(content_id);
216
- CREATE INDEX idx_sources_type ON fact_sources(source_type);
214
+ CREATE INDEX idx_fact_sources_fact ON fact_sources(fact_id);
215
+ CREATE INDEX idx_fact_sources_source ON fact_sources(source_id);
216
+ CREATE INDEX idx_fact_sources_kind ON fact_sources(kind);
217
217
  ```
218
218
 
219
219
  ## Vector Indexes
@@ -221,8 +221,8 @@ CREATE INDEX idx_sources_type ON fact_sources(source_type);
221
221
  FactDb uses HNSW indexes for fast approximate nearest neighbor search:
222
222
 
223
223
  ```sql
224
- -- Contents semantic search
225
- CREATE INDEX idx_contents_embedding ON contents
224
+ -- Sources semantic search
225
+ CREATE INDEX idx_sources_embedding ON sources
226
226
  USING hnsw(embedding vector_cosine_ops)
227
227
  WITH (m = 16, ef_construction = 64);
228
228
 
@@ -269,7 +269,7 @@ ORDER BY f.valid_at ASC;
269
269
 
270
270
  ```sql
271
271
  SELECT *, embedding <=> '[...]' AS distance
272
- FROM contents
272
+ FROM sources
273
273
  ORDER BY embedding <=> '[...]'
274
274
  LIMIT 10;
275
275
  ```
@@ -279,7 +279,7 @@ LIMIT 10;
279
279
  ### Vacuum and Analyze
280
280
 
281
281
  ```sql
282
- VACUUM ANALYZE contents;
282
+ VACUUM ANALYZE sources;
283
283
  VACUUM ANALYZE entities;
284
284
  VACUUM ANALYZE facts;
285
285
  ```
@@ -287,7 +287,7 @@ VACUUM ANALYZE facts;
287
287
  ### Reindex Vectors
288
288
 
289
289
  ```sql
290
- REINDEX INDEX idx_contents_embedding;
290
+ REINDEX INDEX idx_sources_embedding;
291
291
  REINDEX INDEX idx_entities_embedding;
292
292
  REINDEX INDEX idx_facts_embedding;
293
293
  ```
@@ -33,7 +33,7 @@ Direct match against canonical names:
33
33
  ```ruby
34
34
  # Looking for "Microsoft"
35
35
  entity = facts.resolve_entity("Microsoft")
36
- # Matches: Entity(canonical_name: "Microsoft")
36
+ # Matches: Entity(name: "Microsoft")
37
37
  ```
38
38
 
39
39
  ### 2. Alias Match
@@ -130,7 +130,7 @@ facts.entity_service.add_alias(
130
130
 
131
131
  ```ruby
132
132
  entity.entity_aliases.each do |alias_record|
133
- puts "#{alias_record.alias_text} (#{alias_record.alias_type})"
133
+ puts "#{alias_record.name} (#{alias_record.type})"
134
134
  end
135
135
  ```
136
136
 
@@ -153,7 +153,7 @@ facts.entity_service.merge(
153
153
 
154
154
  # After merge:
155
155
  # - entity2.resolution_status => "merged"
156
- # - entity2.merged_into_id => entity1.id
156
+ # - entity2.canonical_id => entity1.id
157
157
  # - All facts mentioning entity2 now also reference entity1
158
158
  ```
159
159
 
@@ -193,11 +193,11 @@ fact = facts.fact_service.create(
193
193
  The LLM extractor resolves mentions automatically:
194
194
 
195
195
  ```ruby
196
- extracted = facts.extract_facts(content.id, extractor: :llm)
196
+ extracted = facts.extract_facts(source.id, extractor: :llm)
197
197
 
198
198
  extracted.each do |fact|
199
199
  fact.entity_mentions.each do |mention|
200
- puts "Resolved '#{mention.mention_text}' to #{mention.entity.canonical_name}"
200
+ puts "Resolved '#{mention.mention_text}' to #{mention.entity.name}"
201
201
  puts " Role: #{mention.mention_role}"
202
202
  puts " Confidence: #{mention.confidence}"
203
203
  end
@@ -240,7 +240,7 @@ results = facts.batch_resolve_entities(names)
240
240
 
241
241
  results.each do |result|
242
242
  puts "#{result[:name]}: #{result[:status]}"
243
- puts " Entity: #{result[:entity]&.canonical_name}"
243
+ puts " Entity: #{result[:entity]&.name}"
244
244
  end
245
245
  ```
246
246