fact_db 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.envrc +1 -0
  3. data/CHANGELOG.md +48 -0
  4. data/COMMITS.md +196 -0
  5. data/README.md +102 -0
  6. data/Rakefile +41 -0
  7. data/db/migrate/001_enable_extensions.rb +7 -0
  8. data/db/migrate/002_create_contents.rb +44 -0
  9. data/db/migrate/003_create_entities.rb +36 -0
  10. data/db/migrate/004_create_entity_aliases.rb +18 -0
  11. data/db/migrate/005_create_facts.rb +65 -0
  12. data/db/migrate/006_create_entity_mentions.rb +18 -0
  13. data/db/migrate/007_create_fact_sources.rb +18 -0
  14. data/docs/api/extractors/index.md +71 -0
  15. data/docs/api/extractors/llm.md +162 -0
  16. data/docs/api/extractors/manual.md +92 -0
  17. data/docs/api/extractors/rule-based.md +165 -0
  18. data/docs/api/facts.md +300 -0
  19. data/docs/api/index.md +66 -0
  20. data/docs/api/models/content.md +165 -0
  21. data/docs/api/models/entity.md +202 -0
  22. data/docs/api/models/fact.md +270 -0
  23. data/docs/api/models/index.md +77 -0
  24. data/docs/api/pipeline/extraction.md +175 -0
  25. data/docs/api/pipeline/index.md +72 -0
  26. data/docs/api/pipeline/resolution.md +209 -0
  27. data/docs/api/services/content-service.md +166 -0
  28. data/docs/api/services/entity-service.md +202 -0
  29. data/docs/api/services/fact-service.md +223 -0
  30. data/docs/api/services/index.md +55 -0
  31. data/docs/architecture/database-schema.md +293 -0
  32. data/docs/architecture/entity-resolution.md +293 -0
  33. data/docs/architecture/index.md +149 -0
  34. data/docs/architecture/temporal-facts.md +268 -0
  35. data/docs/architecture/three-layer-model.md +242 -0
  36. data/docs/assets/css/custom.css +137 -0
  37. data/docs/assets/fact_db.jpg +0 -0
  38. data/docs/assets/images/fact_db.jpg +0 -0
  39. data/docs/concepts.md +183 -0
  40. data/docs/examples/basic-usage.md +235 -0
  41. data/docs/examples/hr-onboarding.md +312 -0
  42. data/docs/examples/index.md +64 -0
  43. data/docs/examples/news-analysis.md +288 -0
  44. data/docs/getting-started/database-setup.md +170 -0
  45. data/docs/getting-started/index.md +71 -0
  46. data/docs/getting-started/installation.md +98 -0
  47. data/docs/getting-started/quick-start.md +191 -0
  48. data/docs/guides/batch-processing.md +325 -0
  49. data/docs/guides/configuration.md +243 -0
  50. data/docs/guides/entity-management.md +364 -0
  51. data/docs/guides/extracting-facts.md +299 -0
  52. data/docs/guides/index.md +22 -0
  53. data/docs/guides/ingesting-content.md +252 -0
  54. data/docs/guides/llm-integration.md +299 -0
  55. data/docs/guides/temporal-queries.md +315 -0
  56. data/docs/index.md +121 -0
  57. data/examples/README.md +130 -0
  58. data/examples/basic_usage.rb +164 -0
  59. data/examples/entity_management.rb +216 -0
  60. data/examples/hr_system.rb +428 -0
  61. data/examples/rule_based_extraction.rb +258 -0
  62. data/examples/temporal_queries.rb +245 -0
  63. data/lib/fact_db/config.rb +71 -0
  64. data/lib/fact_db/database.rb +45 -0
  65. data/lib/fact_db/errors.rb +10 -0
  66. data/lib/fact_db/extractors/base.rb +117 -0
  67. data/lib/fact_db/extractors/llm_extractor.rb +179 -0
  68. data/lib/fact_db/extractors/manual_extractor.rb +53 -0
  69. data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
  70. data/lib/fact_db/llm/adapter.rb +109 -0
  71. data/lib/fact_db/models/content.rb +62 -0
  72. data/lib/fact_db/models/entity.rb +84 -0
  73. data/lib/fact_db/models/entity_alias.rb +26 -0
  74. data/lib/fact_db/models/entity_mention.rb +33 -0
  75. data/lib/fact_db/models/fact.rb +192 -0
  76. data/lib/fact_db/models/fact_source.rb +35 -0
  77. data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
  78. data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
  79. data/lib/fact_db/resolution/entity_resolver.rb +261 -0
  80. data/lib/fact_db/resolution/fact_resolver.rb +259 -0
  81. data/lib/fact_db/services/content_service.rb +93 -0
  82. data/lib/fact_db/services/entity_service.rb +150 -0
  83. data/lib/fact_db/services/fact_service.rb +193 -0
  84. data/lib/fact_db/temporal/query.rb +125 -0
  85. data/lib/fact_db/temporal/timeline.rb +134 -0
  86. data/lib/fact_db/version.rb +5 -0
  87. data/lib/fact_db.rb +141 -0
  88. data/mkdocs.yml +198 -0
  89. metadata +288 -0
@@ -0,0 +1,77 @@
1
+ # Models
2
+
3
+ FactDb uses ActiveRecord models for data persistence.
4
+
5
+ ## Core Models
6
+
7
+ - [Content](content.md) - Immutable source documents
8
+ - [Entity](entity.md) - Resolved identities with aliases
9
+ - [Fact](fact.md) - Temporal assertions
10
+
11
+ ## Supporting Models
12
+
13
+ ### EntityAlias
14
+
15
+ Stores alternative names for entities.
16
+
17
+ ```ruby
18
+ class EntityAlias < ActiveRecord::Base
19
+ belongs_to :entity
20
+ end
21
+ ```
22
+
23
+ | Column | Type | Description |
24
+ |--------|------|-------------|
25
+ | entity_id | bigint | Parent entity |
26
+ | alias_text | string | Alternative name |
27
+ | alias_type | string | Type (nickname, abbreviation, etc.) |
28
+ | confidence | float | Match confidence |
29
+
30
+ ### EntityMention
31
+
32
+ Links facts to mentioned entities.
33
+
34
+ ```ruby
35
+ class EntityMention < ActiveRecord::Base
36
+ belongs_to :fact
37
+ belongs_to :entity
38
+ end
39
+ ```
40
+
41
+ | Column | Type | Description |
42
+ |--------|------|-------------|
43
+ | fact_id | bigint | Parent fact |
44
+ | entity_id | bigint | Referenced entity |
45
+ | mention_text | string | How entity was mentioned |
46
+ | mention_role | string | Role (subject, object, etc.) |
47
+ | confidence | float | Resolution confidence |
48
+
49
+ ### FactSource
50
+
51
+ Links facts to source content.
52
+
53
+ ```ruby
54
+ class FactSource < ActiveRecord::Base
55
+ belongs_to :fact
56
+ belongs_to :content
57
+ end
58
+ ```
59
+
60
+ | Column | Type | Description |
61
+ |--------|------|-------------|
62
+ | fact_id | bigint | Parent fact |
63
+ | content_id | bigint | Source content |
64
+ | source_type | string | Type (primary, supporting, contradicting) |
65
+ | excerpt | text | Relevant text excerpt |
66
+ | confidence | float | Source confidence |
67
+
68
+ ## Model Relationships
69
+
70
+ ```mermaid
71
+ erDiagram
72
+ Content ||--o{ FactSource : "sourced by"
73
+ Entity ||--o{ EntityAlias : "has"
74
+ Entity ||--o{ EntityMention : "mentioned in"
75
+ Fact ||--o{ EntityMention : "mentions"
76
+ Fact ||--o{ FactSource : "sourced from"
77
+ ```
@@ -0,0 +1,175 @@
1
+ # ExtractionPipeline
2
+
3
+ Concurrent fact extraction from multiple content items.
4
+
5
+ ## Class: `FactDb::Pipeline::ExtractionPipeline`
6
+
7
+ ```ruby
8
+ pipeline = FactDb::Pipeline::ExtractionPipeline.new(config)
9
+ ```
10
+
11
+ ## Methods
12
+
13
+ ### process
14
+
15
+ ```ruby
16
+ def process(contents, extractor: config.default_extractor)
17
+ ```
18
+
19
+ Process content items sequentially.
20
+
21
+ **Parameters:**
22
+
23
+ - `contents` (Array<Content>) - Content records
24
+ - `extractor` (Symbol) - Extraction method
25
+
26
+ **Returns:** `Array<Hash>`
27
+
28
+ **Example:**
29
+
30
+ ```ruby
31
+ contents = Models::Content.where(id: [1, 2, 3])
32
+ results = pipeline.process(contents, extractor: :llm)
33
+ ```
34
+
35
+ ---
36
+
37
+ ### process_parallel
38
+
39
+ ```ruby
40
+ def process_parallel(contents, extractor: config.default_extractor)
41
+ ```
42
+
43
+ Process content items concurrently.
44
+
45
+ **Parameters:**
46
+
47
+ - `contents` (Array<Content>) - Content records
48
+ - `extractor` (Symbol) - Extraction method
49
+
50
+ **Returns:** `Array<Hash>`
51
+
52
+ **Example:**
53
+
54
+ ```ruby
55
+ results = pipeline.process_parallel(contents, extractor: :llm)
56
+
57
+ results.each do |result|
58
+ puts "Content #{result[:content_id]}:"
59
+ puts " Facts: #{result[:facts].count}"
60
+ puts " Error: #{result[:error]}" if result[:error]
61
+ end
62
+ ```
63
+
64
+ ## Pipeline Steps
65
+
66
+ ### Sequential Pipeline
67
+
68
+ ```mermaid
69
+ graph LR
70
+ A[Content] --> B[Validate]
71
+ B --> C[Extract]
72
+ C --> D[Validate Facts]
73
+ D --> E[Results]
74
+
75
+ style A fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
76
+ style B fill:#B45309,stroke:#92400E,color:#FFFFFF
77
+ style C fill:#047857,stroke:#065F46,color:#FFFFFF
78
+ style D fill:#B45309,stroke:#92400E,color:#FFFFFF
79
+ style E fill:#B91C1C,stroke:#991B1B,color:#FFFFFF
80
+ ```
81
+
82
+ 1. **Validate** - Check content is not empty
83
+ 2. **Extract** - Run extractor
84
+ 3. **Validate Facts** - Filter valid facts
85
+ 4. **Results** - Return extracted facts
86
+
87
+ ### Parallel Pipeline
88
+
89
+ ```mermaid
90
+ graph TB
91
+ subgraph Parallel
92
+ A1[Content 1] --> E1[Extract 1]
93
+ A2[Content 2] --> E2[Extract 2]
94
+ A3[Content 3] --> E3[Extract 3]
95
+ end
96
+ E1 --> Aggregate
97
+ E2 --> Aggregate
98
+ E3 --> Aggregate
99
+
100
+ style A1 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
101
+ style A2 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
102
+ style A3 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
103
+ style E1 fill:#047857,stroke:#065F46,color:#FFFFFF
104
+ style E2 fill:#047857,stroke:#065F46,color:#FFFFFF
105
+ style E3 fill:#047857,stroke:#065F46,color:#FFFFFF
106
+ style Aggregate fill:#B91C1C,stroke:#991B1B,color:#FFFFFF
107
+ ```
108
+
109
+ ## Result Structure
110
+
111
+ ```ruby
112
+ {
113
+ content_id: 123,
114
+ facts: [<Fact>, <Fact>, ...], # Extracted facts
115
+ error: nil # Error message if failed
116
+ }
117
+ ```
118
+
119
+ ## Usage via Facts
120
+
121
+ ```ruby
122
+ facts = FactDb.new
123
+
124
+ # Sequential
125
+ results = facts.batch_extract(content_ids, parallel: false)
126
+
127
+ # Parallel (default)
128
+ results = facts.batch_extract(content_ids, parallel: true)
129
+ ```
130
+
131
+ ## Error Handling
132
+
133
+ The pipeline catches errors per-item:
134
+
135
+ ```ruby
136
+ results = pipeline.process_parallel(contents)
137
+
138
+ results.each do |result|
139
+ if result[:error]
140
+ logger.error "Content #{result[:content_id]}: #{result[:error]}"
141
+ else
142
+ logger.info "Content #{result[:content_id]}: #{result[:facts].count} facts"
143
+ end
144
+ end
145
+ ```
146
+
147
+ ## Performance
148
+
149
+ ### Batch Size
150
+
151
+ Optimal batch size depends on:
152
+
153
+ - Extractor type (LLM has rate limits)
154
+ - Content length
155
+ - System resources
156
+
157
+ ```ruby
158
+ # Process in optimal batches
159
+ contents.each_slice(25) do |batch|
160
+ results = pipeline.process_parallel(batch)
161
+ process_results(results)
162
+ end
163
+ ```
164
+
165
+ ### Memory
166
+
167
+ For large batches, process and discard:
168
+
169
+ ```ruby
170
+ contents.each_slice(50) do |batch|
171
+ results = pipeline.process_parallel(batch)
172
+ save_facts(results.flat_map { |r| r[:facts] })
173
+ # Results discarded after each batch
174
+ end
175
+ ```
@@ -0,0 +1,72 @@
1
+ # Pipeline
2
+
3
+ Pipelines provide concurrent processing for batch operations using SimpleFlow.
4
+
5
+ ## Available Pipelines
6
+
7
+ - [ExtractionPipeline](extraction.md) - Concurrent fact extraction
8
+ - [ResolutionPipeline](resolution.md) - Parallel entity resolution
9
+
10
+ ## SimpleFlow Integration
11
+
12
+ Pipelines are built on the `simple_flow` gem:
13
+
14
+ ```ruby
15
+ require 'simple_flow'
16
+
17
+ pipeline = SimpleFlow::Pipeline.new do
18
+ step ->(result) { result.continue(transformed_value) }
19
+ step ->(result) { result.continue(more_transformation) }
20
+ end
21
+
22
+ result = pipeline.call(SimpleFlow::Result.new(initial_value))
23
+ ```
24
+
25
+ ## Pipeline Pattern
26
+
27
+ All pipelines follow a common structure:
28
+
29
+ ```ruby
30
+ class SomePipeline
31
+ attr_reader :config
32
+
33
+ def initialize(config = FactDb.config)
34
+ @config = config
35
+ end
36
+
37
+ def process(items, **options)
38
+ # Sequential processing
39
+ end
40
+
41
+ def process_parallel(items, **options)
42
+ # Parallel processing
43
+ end
44
+ end
45
+ ```
46
+
47
+ ## Result Structure
48
+
49
+ Pipeline results follow a consistent format:
50
+
51
+ ```ruby
52
+ {
53
+ content_id: 123, # Item identifier
54
+ facts: [<Fact>, ...], # Extracted/resolved items
55
+ error: nil # Error message if failed
56
+ }
57
+ ```
58
+
59
+ ## Error Handling
60
+
61
+ Pipelines handle errors gracefully:
62
+
63
+ ```ruby
64
+ results = pipeline.process_parallel(items)
65
+
66
+ successful = results.select { |r| r[:error].nil? }
67
+ failed = results.reject { |r| r[:error].nil? }
68
+
69
+ failed.each do |result|
70
+ logger.error "Failed: #{result[:error]}"
71
+ end
72
+ ```
@@ -0,0 +1,209 @@
1
+ # ResolutionPipeline
2
+
3
+ Parallel entity resolution and conflict detection.
4
+
5
+ ## Class: `FactDb::Pipeline::ResolutionPipeline`
6
+
7
+ ```ruby
8
+ pipeline = FactDb::Pipeline::ResolutionPipeline.new(config)
9
+ ```
10
+
11
+ ## Methods
12
+
13
+ ### resolve_entities
14
+
15
+ ```ruby
16
+ def resolve_entities(names, type: nil)
17
+ ```
18
+
19
+ Resolve multiple entity names in parallel.
20
+
21
+ **Parameters:**
22
+
23
+ - `names` (Array<String>) - Names to resolve
24
+ - `type` (Symbol) - Optional entity type filter
25
+
26
+ **Returns:** `Array<Hash>`
27
+
28
+ **Example:**
29
+
30
+ ```ruby
31
+ names = ["Paula Chen", "Microsoft", "Seattle"]
32
+ results = pipeline.resolve_entities(names)
33
+
34
+ results.each do |result|
35
+ puts "#{result[:name]}: #{result[:status]}"
36
+ puts " Entity: #{result[:entity]&.canonical_name}"
37
+ end
38
+ ```
39
+
40
+ ---
41
+
42
+ ### detect_conflicts
43
+
44
+ ```ruby
45
+ def detect_conflicts(entity_ids)
46
+ ```
47
+
48
+ Find fact conflicts for multiple entities in parallel.
49
+
50
+ **Parameters:**
51
+
52
+ - `entity_ids` (Array<Integer>) - Entity IDs to check
53
+
54
+ **Returns:** `Array<Hash>`
55
+
56
+ **Example:**
57
+
58
+ ```ruby
59
+ results = pipeline.detect_conflicts([paula.id, john.id])
60
+
61
+ results.each do |result|
62
+ puts "Entity #{result[:entity_id]}: #{result[:conflict_count]} conflicts"
63
+ result[:conflicts].each do |c|
64
+ puts " - #{c[:fact1].fact_text} vs #{c[:fact2].fact_text}"
65
+ end
66
+ end
67
+ ```
68
+
69
+ ## Result Structures
70
+
71
+ ### Resolution Result
72
+
73
+ ```ruby
74
+ {
75
+ name: "Paula Chen",
76
+ entity: <Entity>, # Resolved entity or nil
77
+ status: :resolved, # :resolved, :not_found, :error
78
+ error: nil # Error message if failed
79
+ }
80
+ ```
81
+
82
+ ### Conflict Detection Result
83
+
84
+ ```ruby
85
+ {
86
+ entity_id: 123,
87
+ conflicts: [
88
+ {
89
+ fact1: <Fact>,
90
+ fact2: <Fact>,
91
+ similarity: 0.75
92
+ }
93
+ ],
94
+ conflict_count: 1
95
+ }
96
+ ```
97
+
98
+ ## Pipeline Steps
99
+
100
+ ### Entity Resolution Pipeline
101
+
102
+ ```mermaid
103
+ graph TB
104
+ subgraph Parallel
105
+ N1[Name 1] --> R1[Resolve 1]
106
+ N2[Name 2] --> R2[Resolve 2]
107
+ N3[Name 3] --> R3[Resolve 3]
108
+ end
109
+ R1 --> Aggregate
110
+ R2 --> Aggregate
111
+ R3 --> Aggregate
112
+
113
+ style N1 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
114
+ style N2 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
115
+ style N3 fill:#1E40AF,stroke:#1E3A8A,color:#FFFFFF
116
+ style R1 fill:#047857,stroke:#065F46,color:#FFFFFF
117
+ style R2 fill:#047857,stroke:#065F46,color:#FFFFFF
118
+ style R3 fill:#047857,stroke:#065F46,color:#FFFFFF
119
+ style Aggregate fill:#B91C1C,stroke:#991B1B,color:#FFFFFF
120
+ ```
121
+
122
+ ### Conflict Detection Pipeline
123
+
124
+ ```mermaid
125
+ graph TB
126
+ subgraph Parallel
127
+ E1[Entity 1] --> C1[Find Conflicts 1]
128
+ E2[Entity 2] --> C2[Find Conflicts 2]
129
+ end
130
+ C1 --> Aggregate
131
+ C2 --> Aggregate
132
+
133
+ style E1 fill:#047857,stroke:#065F46,color:#FFFFFF
134
+ style E2 fill:#047857,stroke:#065F46,color:#FFFFFF
135
+ style C1 fill:#B45309,stroke:#92400E,color:#FFFFFF
136
+ style C2 fill:#B45309,stroke:#92400E,color:#FFFFFF
137
+ style Aggregate fill:#B91C1C,stroke:#991B1B,color:#FFFFFF
138
+ ```
139
+
140
+ ## Usage via Facts
141
+
142
+ ```ruby
143
+ facts = FactDb.new
144
+
145
+ # Resolve entities
146
+ results = facts.batch_resolve_entities(["Paula", "Microsoft"])
147
+
148
+ # Detect conflicts
149
+ results = facts.detect_fact_conflicts([entity1.id, entity2.id])
150
+ ```
151
+
152
+ ## Resolution Strategies
153
+
154
+ The pipeline uses the EntityResolver which tries:
155
+
156
+ 1. **Exact match** on canonical name
157
+ 2. **Alias match** on registered aliases
158
+ 3. **Fuzzy match** using Levenshtein distance
159
+
160
+ ```ruby
161
+ FactDb.configure do |config|
162
+ config.fuzzy_match_threshold = 0.85
163
+ end
164
+ ```
165
+
166
+ ## Error Handling
167
+
168
+ ```ruby
169
+ results = pipeline.resolve_entities(names)
170
+
171
+ # Handle unresolved names
172
+ unresolved = results.select { |r| r[:status] == :not_found }
173
+ unresolved.each do |result|
174
+ # Optionally create new entities
175
+ entity = facts.entity_service.create(
176
+ result[:name],
177
+ type: :person,
178
+ metadata: { needs_review: true }
179
+ )
180
+ end
181
+
182
+ # Handle errors
183
+ errors = results.select { |r| r[:status] == :error }
184
+ errors.each do |result|
185
+ logger.error "Resolution failed for #{result[:name]}: #{result[:error]}"
186
+ end
187
+ ```
188
+
189
+ ## Performance Tips
190
+
191
+ ### Batch Size
192
+
193
+ ```ruby
194
+ # Process in batches for large name lists
195
+ names.each_slice(100) do |batch|
196
+ results = pipeline.resolve_entities(batch)
197
+ process_results(results)
198
+ end
199
+ ```
200
+
201
+ ### Pre-warm Cache
202
+
203
+ ```ruby
204
+ # Load entities into memory first
205
+ Entity.where(entity_type: 'person').to_a
206
+
207
+ # Then resolve
208
+ results = pipeline.resolve_entities(person_names, type: :person)
209
+ ```
@@ -0,0 +1,166 @@
1
+ # ContentService
2
+
3
+ Service for ingesting and managing source content.
4
+
5
+ ## Class: `FactDb::Services::ContentService`
6
+
7
+ ```ruby
8
+ service = FactDb::Services::ContentService.new(config)
9
+ ```
10
+
11
+ ## Methods
12
+
13
+ ### create
14
+
15
+ ```ruby
16
+ def create(raw_text, type:, captured_at: Time.current, metadata: {}, title: nil, source_uri: nil)
17
+ ```
18
+
19
+ Create new content with automatic deduplication.
20
+
21
+ **Parameters:**
22
+
23
+ - `raw_text` (String) - Content text
24
+ - `type` (Symbol) - Content type
25
+ - `captured_at` (Time) - Capture timestamp
26
+ - `metadata` (Hash) - Additional metadata
27
+ - `title` (String) - Optional title
28
+ - `source_uri` (String) - Original location
29
+
30
+ **Returns:** `Models::Content`
31
+
32
+ **Example:**
33
+
34
+ ```ruby
35
+ content = service.create(
36
+ "Email body text...",
37
+ type: :email,
38
+ title: "RE: Important",
39
+ metadata: { from: "sender@example.com" }
40
+ )
41
+ ```
42
+
43
+ ---
44
+
45
+ ### find
46
+
47
+ ```ruby
48
+ def find(id)
49
+ ```
50
+
51
+ Find content by ID.
52
+
53
+ **Returns:** `Models::Content`
54
+
55
+ ---
56
+
57
+ ### find_by_hash
58
+
59
+ ```ruby
60
+ def find_by_hash(hash)
61
+ ```
62
+
63
+ Find content by SHA256 hash.
64
+
65
+ **Returns:** `Models::Content` or `nil`
66
+
67
+ **Example:**
68
+
69
+ ```ruby
70
+ hash = Digest::SHA256.hexdigest(text)
71
+ content = service.find_by_hash(hash)
72
+ ```
73
+
74
+ ---
75
+
76
+ ### search
77
+
78
+ ```ruby
79
+ def search(query, limit: 20)
80
+ ```
81
+
82
+ Full-text search content.
83
+
84
+ **Parameters:**
85
+
86
+ - `query` (String) - Search query
87
+ - `limit` (Integer) - Max results
88
+
89
+ **Returns:** `Array<Models::Content>`
90
+
91
+ **Example:**
92
+
93
+ ```ruby
94
+ results = service.search("quarterly report", limit: 10)
95
+ ```
96
+
97
+ ---
98
+
99
+ ### semantic_search
100
+
101
+ ```ruby
102
+ def semantic_search(query, limit: 10)
103
+ ```
104
+
105
+ Semantic similarity search using embeddings.
106
+
107
+ **Parameters:**
108
+
109
+ - `query` (String) - Search query
110
+ - `limit` (Integer) - Max results
111
+
112
+ **Returns:** `Array<Models::Content>`
113
+
114
+ **Example:**
115
+
116
+ ```ruby
117
+ results = service.semantic_search("financial performance")
118
+ ```
119
+
120
+ ---
121
+
122
+ ### by_type
123
+
124
+ ```ruby
125
+ def by_type(type)
126
+ ```
127
+
128
+ Filter content by type.
129
+
130
+ **Returns:** `ActiveRecord::Relation`
131
+
132
+ **Example:**
133
+
134
+ ```ruby
135
+ emails = service.by_type(:email)
136
+ ```
137
+
138
+ ---
139
+
140
+ ### recent
141
+
142
+ ```ruby
143
+ def recent(limit: 20)
144
+ ```
145
+
146
+ Get recently captured content.
147
+
148
+ **Returns:** `Array<Models::Content>`
149
+
150
+ ---
151
+
152
+ ### mentioning_entity
153
+
154
+ ```ruby
155
+ def mentioning_entity(entity_id)
156
+ ```
157
+
158
+ Find content that mentions an entity (via facts).
159
+
160
+ **Returns:** `Array<Models::Content>`
161
+
162
+ **Example:**
163
+
164
+ ```ruby
165
+ paula_content = service.mentioning_entity(paula.id)
166
+ ```