fact_db 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +2 -0
  3. data/.yardopts +5 -0
  4. data/CHANGELOG.md +64 -0
  5. data/README.md +107 -6
  6. data/Rakefile +243 -10
  7. data/db/migrate/001_enable_extensions.rb +1 -0
  8. data/db/migrate/002_create_sources.rb +49 -0
  9. data/db/migrate/003_create_entities.rb +27 -15
  10. data/db/migrate/004_create_entity_aliases.rb +20 -7
  11. data/db/migrate/005_create_facts.rb +37 -21
  12. data/db/migrate/006_create_entity_mentions.rb +14 -6
  13. data/db/migrate/007_create_fact_sources.rb +16 -8
  14. data/docs/api/extractors/index.md +5 -5
  15. data/docs/api/extractors/llm.md +17 -17
  16. data/docs/api/extractors/rule-based.md +14 -14
  17. data/docs/api/facts.md +20 -20
  18. data/docs/api/index.md +4 -4
  19. data/docs/api/models/entity.md +21 -21
  20. data/docs/api/models/fact.md +15 -15
  21. data/docs/api/models/index.md +7 -7
  22. data/docs/api/models/{content.md → source.md} +29 -29
  23. data/docs/api/pipeline/extraction.md +25 -25
  24. data/docs/api/pipeline/index.md +1 -1
  25. data/docs/api/pipeline/resolution.md +4 -4
  26. data/docs/api/services/entity-service.md +20 -20
  27. data/docs/api/services/fact-service.md +12 -12
  28. data/docs/api/services/index.md +5 -5
  29. data/docs/api/services/{content-service.md → source-service.md} +27 -27
  30. data/docs/architecture/database-schema.md +46 -46
  31. data/docs/architecture/entity-resolution.md +6 -6
  32. data/docs/architecture/index.md +10 -10
  33. data/docs/architecture/temporal-facts.md +5 -5
  34. data/docs/architecture/three-layer-model.md +17 -17
  35. data/docs/concepts.md +6 -6
  36. data/docs/examples/basic-usage.md +20 -20
  37. data/docs/examples/hr-onboarding.md +17 -17
  38. data/docs/examples/index.md +4 -4
  39. data/docs/examples/news-analysis.md +23 -23
  40. data/docs/getting-started/database-setup.md +28 -20
  41. data/docs/getting-started/index.md +3 -3
  42. data/docs/getting-started/quick-start.md +33 -30
  43. data/docs/guides/batch-processing.md +26 -26
  44. data/docs/guides/configuration.md +158 -77
  45. data/docs/guides/entity-management.md +14 -14
  46. data/docs/guides/extracting-facts.md +28 -28
  47. data/docs/guides/ingesting-content.md +14 -14
  48. data/docs/guides/llm-integration.md +40 -32
  49. data/docs/guides/temporal-queries.md +11 -11
  50. data/docs/index.md +6 -2
  51. data/examples/.envrc +4 -0
  52. data/examples/.gitignore +1 -0
  53. data/examples/001_configuration.rb +312 -0
  54. data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
  55. data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
  56. data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
  57. data/examples/040_output_formats.rb +177 -0
  58. data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
  59. data/examples/060_fluent_temporal_api.rb +217 -0
  60. data/examples/070_introspection.rb +252 -0
  61. data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
  62. data/examples/090_ingest_demo.rb +515 -0
  63. data/examples/100_query_context.rb +668 -0
  64. data/examples/110_prove_it.rb +204 -0
  65. data/examples/120_dump_database.rb +358 -0
  66. data/examples/130_rag_feedback_loop.rb +858 -0
  67. data/examples/README.md +229 -15
  68. data/examples/data/lincoln_associates.md +201 -0
  69. data/examples/data/lincoln_biography.md +66 -0
  70. data/examples/data/lincoln_cabinet.md +243 -0
  71. data/examples/data/lincoln_family.md +163 -0
  72. data/examples/data/lincoln_military.md +241 -0
  73. data/examples/data/lincoln_todd_family.md +136 -0
  74. data/examples/ingest_reporter.rb +335 -0
  75. data/examples/utilities.rb +182 -0
  76. data/lib/fact_db/config/defaults.yml +254 -0
  77. data/lib/fact_db/config.rb +94 -35
  78. data/lib/fact_db/database.rb +98 -8
  79. data/lib/fact_db/extractors/base.rb +106 -21
  80. data/lib/fact_db/extractors/llm_extractor.rb +35 -63
  81. data/lib/fact_db/extractors/manual_extractor.rb +46 -6
  82. data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
  83. data/lib/fact_db/llm/adapter.rb +3 -3
  84. data/lib/fact_db/models/entity.rb +94 -22
  85. data/lib/fact_db/models/entity_alias.rb +41 -7
  86. data/lib/fact_db/models/entity_mention.rb +34 -1
  87. data/lib/fact_db/models/fact.rb +259 -28
  88. data/lib/fact_db/models/fact_source.rb +43 -9
  89. data/lib/fact_db/models/source.rb +113 -0
  90. data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
  91. data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
  92. data/lib/fact_db/query_result.rb +202 -0
  93. data/lib/fact_db/resolution/entity_resolver.rb +139 -39
  94. data/lib/fact_db/resolution/fact_resolver.rb +86 -14
  95. data/lib/fact_db/services/entity_service.rb +246 -37
  96. data/lib/fact_db/services/fact_service.rb +254 -17
  97. data/lib/fact_db/services/source_service.rb +164 -0
  98. data/lib/fact_db/temporal/query.rb +71 -7
  99. data/lib/fact_db/temporal/query_builder.rb +69 -0
  100. data/lib/fact_db/temporal/timeline.rb +102 -11
  101. data/lib/fact_db/transformers/base.rb +77 -0
  102. data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
  103. data/lib/fact_db/transformers/json_transformer.rb +17 -0
  104. data/lib/fact_db/transformers/raw_transformer.rb +35 -0
  105. data/lib/fact_db/transformers/text_transformer.rb +114 -0
  106. data/lib/fact_db/transformers/triple_transformer.rb +138 -0
  107. data/lib/fact_db/validation/alias_filter.rb +185 -0
  108. data/lib/fact_db/version.rb +1 -1
  109. data/lib/fact_db.rb +281 -30
  110. data/mkdocs.yml +2 -2
  111. metadata +60 -16
  112. data/db/migrate/002_create_contents.rb +0 -44
  113. data/lib/fact_db/models/content.rb +0 -62
  114. data/lib/fact_db/services/content_service.rb +0 -93
@@ -2,17 +2,30 @@
2
2
 
3
3
  class CreateEntityAliases < ActiveRecord::Migration[7.0]
4
4
  def change
5
- create_table :fact_db_entity_aliases do |t|
6
- t.references :entity, null: false, foreign_key: { to_table: :fact_db_entities, on_delete: :cascade }
7
- t.string :alias_text, null: false, limit: 500
8
- t.string :alias_type, limit: 50 # name, nickname, email, handle, abbreviation
9
- t.float :confidence, default: 1.0
5
+ create_table :fact_db_entity_aliases, comment: "Alternative names and identifiers for entities enabling flexible matching" do |t|
6
+ t.references :entity, null: false, foreign_key: { to_table: :fact_db_entities, on_delete: :cascade },
7
+ comment: "The canonical entity this alias refers to"
8
+ t.string :name, null: false, limit: 500,
9
+ comment: "The alternative name, identifier, or reference text"
10
+ t.string :kind, limit: 50,
11
+ comment: "Classification of alias: name, nickname, email, handle, abbreviation, former_name"
12
+ t.float :confidence, default: 1.0,
13
+ comment: "Confidence score (0.0-1.0) that this alias correctly refers to the entity"
10
14
 
11
15
  t.timestamps
12
16
  end
13
17
 
14
- add_index :fact_db_entity_aliases, :alias_text
15
- add_index :fact_db_entity_aliases, [:entity_id, :alias_text], unique: true,
18
+ add_index :fact_db_entity_aliases, :name
19
+ add_index :fact_db_entity_aliases, [:entity_id, :name], unique: true,
16
20
  name: "idx_unique_entity_alias"
21
+
22
+ # GIN trigram index on name for fuzzy alias matching
23
+ execute <<-SQL
24
+ CREATE INDEX idx_entity_aliases_name_trgm ON fact_db_entity_aliases
25
+ USING gin (name gin_trgm_ops);
26
+ SQL
27
+
28
+ execute "COMMENT ON COLUMN fact_db_entity_aliases.created_at IS 'When this alias association was created';"
29
+ execute "COMMENT ON COLUMN fact_db_entity_aliases.updated_at IS 'When this alias record was last modified';"
17
30
  end
18
31
  end
@@ -2,35 +2,42 @@
2
2
 
3
3
  class CreateFacts < ActiveRecord::Migration[7.0]
4
4
  def change
5
- create_table :fact_db_facts do |t|
6
- # The assertion
7
- t.text :fact_text, null: false
8
- t.string :fact_hash, null: false, limit: 64
5
+ create_table :fact_db_facts, comment: "Extracted factual assertions with temporal validity tracking (Event Clock pattern)" do |t|
6
+ t.text :text, null: false,
7
+ comment: "The factual assertion in natural language form"
8
+ t.string :digest, null: false, limit: 64,
9
+ comment: "SHA-256 hash of normalized text for deduplication"
9
10
 
10
- # Temporal validity (the Event Clock core concept)
11
- t.timestamptz :valid_at, null: false
12
- t.timestamptz :invalid_at # NULL = still valid
11
+ t.timestamptz :valid_at, null: false,
12
+ comment: "When this fact became true (Event Clock valid_from)"
13
+ t.timestamptz :invalid_at,
14
+ comment: "When this fact ceased to be true; NULL means still valid (Event Clock valid_to)"
13
15
 
14
- # Fact status
15
- t.string :status, null: false, default: "canonical", limit: 20
16
+ t.string :status, null: false, default: "canonical", limit: 20,
17
+ comment: "Fact lifecycle state: canonical, superseded, retracted, or disputed"
16
18
 
17
- # Resolution relationships
18
- t.bigint :superseded_by_id
19
- t.bigint :derived_from_ids, array: true, default: []
20
- t.bigint :corroborated_by_ids, array: true, default: []
19
+ t.bigint :superseded_by_id,
20
+ comment: "Reference to newer fact that replaces this one"
21
+ t.bigint :derived_from_ids, array: true, default: [],
22
+ comment: "Array of fact IDs from which this fact was inferred or derived"
23
+ t.bigint :corroborated_by_ids, array: true, default: [],
24
+ comment: "Array of fact IDs that independently confirm this fact"
21
25
 
22
- # Confidence and metadata
23
- t.float :confidence, default: 1.0
24
- t.string :extraction_method, limit: 50
25
- t.jsonb :metadata, null: false, default: {}
26
+ t.float :confidence, default: 1.0,
27
+ comment: "Confidence score (0.0-1.0) in the accuracy of this fact"
28
+ t.string :extraction_method, limit: 50,
29
+ comment: "How fact was extracted: manual, llm_extraction, rule_based, etc."
30
+ t.jsonb :metadata, null: false, default: {},
31
+ comment: "Additional structured data: extraction context, source details, tags"
26
32
 
27
- # Vector embedding for semantic search
28
- t.vector :embedding, limit: 1536
33
+ t.vector :embedding, limit: 1536,
34
+ comment: "Vector embedding for semantic fact search and similarity matching"
29
35
 
30
36
  t.timestamps
31
37
  end
32
38
 
33
- add_index :fact_db_facts, :fact_hash
39
+ # Unique constraint on digest + valid_at allows same fact text at different times
40
+ add_index :fact_db_facts, [:digest, :valid_at], unique: true, name: "index_fact_db_facts_on_digest_valid_at"
34
41
  add_index :fact_db_facts, :valid_at
35
42
  add_index :fact_db_facts, :invalid_at
36
43
  add_index :fact_db_facts, :status
@@ -53,7 +60,7 @@ class CreateFacts < ActiveRecord::Migration[7.0]
53
60
  # Full-text search index
54
61
  execute <<-SQL
55
62
  CREATE INDEX idx_facts_fulltext ON fact_db_facts
56
- USING gin(to_tsvector('english', fact_text));
63
+ USING gin(to_tsvector('english', text));
57
64
  SQL
58
65
 
59
66
  # HNSW index for vector similarity search
@@ -61,5 +68,14 @@ class CreateFacts < ActiveRecord::Migration[7.0]
61
68
  CREATE INDEX idx_facts_embedding ON fact_db_facts
62
69
  USING hnsw (embedding vector_cosine_ops);
63
70
  SQL
71
+
72
+ # GIN trigram index on text for fuzzy fact search
73
+ execute <<-SQL
74
+ CREATE INDEX idx_facts_text_trgm ON fact_db_facts
75
+ USING gin (text gin_trgm_ops);
76
+ SQL
77
+
78
+ execute "COMMENT ON COLUMN fact_db_facts.created_at IS 'When this fact was recorded in the database';"
79
+ execute "COMMENT ON COLUMN fact_db_facts.updated_at IS 'When this fact record was last modified';"
64
80
  end
65
81
  end
@@ -2,17 +2,25 @@
2
2
 
3
3
  class CreateEntityMentions < ActiveRecord::Migration[7.0]
4
4
  def change
5
- create_table :fact_db_entity_mentions do |t|
6
- t.references :fact, null: false, foreign_key: { to_table: :fact_db_facts, on_delete: :cascade }
7
- t.references :entity, null: false, foreign_key: { to_table: :fact_db_entities, on_delete: :cascade }
8
- t.string :mention_text, null: false, limit: 500
9
- t.string :mention_role, limit: 50 # subject, object, location, etc.
10
- t.float :confidence, default: 1.0
5
+ create_table :fact_db_entity_mentions, comment: "Links entities to facts where they are mentioned, with role context" do |t|
6
+ t.references :fact, null: false, foreign_key: { to_table: :fact_db_facts, on_delete: :cascade },
7
+ comment: "The fact containing this entity mention"
8
+ t.references :entity, null: false, foreign_key: { to_table: :fact_db_entities, on_delete: :cascade },
9
+ comment: "The resolved entity being mentioned"
10
+ t.string :mention_text, null: false, limit: 500,
11
+ comment: "The exact text used to reference the entity in the fact"
12
+ t.string :mention_role, limit: 50,
13
+ comment: "Semantic role of entity in fact: subject, object, location, time, instrument, etc."
14
+ t.float :confidence, default: 1.0,
15
+ comment: "Confidence score (0.0-1.0) that mention correctly resolves to entity"
11
16
 
12
17
  t.timestamps
13
18
  end
14
19
 
15
20
  add_index :fact_db_entity_mentions, [:fact_id, :entity_id, :mention_text],
16
21
  unique: true, name: "idx_unique_fact_entity_mention"
22
+
23
+ execute "COMMENT ON COLUMN fact_db_entity_mentions.created_at IS 'When this mention link was created';"
24
+ execute "COMMENT ON COLUMN fact_db_entity_mentions.updated_at IS 'When this mention record was last modified';"
17
25
  end
18
26
  end
@@ -2,17 +2,25 @@
2
2
 
3
3
  class CreateFactSources < ActiveRecord::Migration[7.0]
4
4
  def change
5
- create_table :fact_db_fact_sources do |t|
6
- t.references :fact, null: false, foreign_key: { to_table: :fact_db_facts, on_delete: :cascade }
7
- t.references :content, null: false, foreign_key: { to_table: :fact_db_contents, on_delete: :cascade }
8
- t.string :source_type, default: "primary", limit: 50 # primary, supporting, corroborating
9
- t.text :excerpt # The specific portion that supports the fact
10
- t.float :confidence, default: 1.0
5
+ create_table :fact_db_fact_sources, comment: "Links facts to their source content for provenance tracking" do |t|
6
+ t.references :fact, null: false, foreign_key: { to_table: :fact_db_facts, on_delete: :cascade },
7
+ comment: "The fact derived from this source"
8
+ t.references :source, null: false, foreign_key: { to_table: :fact_db_sources, on_delete: :cascade },
9
+ comment: "The source content from which the fact was extracted"
10
+ t.string :kind, default: "primary", limit: 50,
11
+ comment: "Relationship type: primary (direct extraction), supporting, or corroborating"
12
+ t.text :excerpt,
13
+ comment: "The specific text passage within the content that supports this fact"
14
+ t.float :confidence, default: 1.0,
15
+ comment: "Confidence score (0.0-1.0) that this source supports the fact"
11
16
 
12
17
  t.timestamps
13
18
  end
14
19
 
15
- add_index :fact_db_fact_sources, [:fact_id, :content_id], unique: true,
16
- name: "idx_unique_fact_content"
20
+ add_index :fact_db_fact_sources, [:fact_id, :source_id], unique: true,
21
+ name: "idx_unique_fact_source"
22
+
23
+ execute "COMMENT ON COLUMN fact_db_fact_sources.created_at IS 'When this source link was established';"
24
+ execute "COMMENT ON COLUMN fact_db_fact_sources.updated_at IS 'When this source record was last modified';"
17
25
  end
18
26
  end
@@ -34,11 +34,11 @@ end
34
34
 
35
35
  ```ruby
36
36
  class MyExtractor < FactDb::Extractors::Base
37
- def extract(content)
37
+ def extract(source)
38
38
  facts = []
39
39
 
40
40
  # Your extraction logic
41
- # Parse content.raw_text
41
+ # Parse source.content
42
42
  # Create fact records
43
43
 
44
44
  facts
@@ -48,18 +48,18 @@ end
48
48
 
49
49
  ## Using Extractors
50
50
 
51
- ### Via Facts
51
+ ### Via FactDb
52
52
 
53
53
  ```ruby
54
54
  facts = FactDb.new
55
- extracted = facts.extract_facts(content.id, extractor: :llm)
55
+ extracted = facts.extract_facts(source.id, extractor: :llm)
56
56
  ```
57
57
 
58
58
  ### Directly
59
59
 
60
60
  ```ruby
61
61
  extractor = FactDb::Extractors::LLMExtractor.new(config)
62
- facts = extractor.extract(content)
62
+ facts = extractor.extract(source)
63
63
  ```
64
64
 
65
65
  ## Extractor Selection
@@ -17,9 +17,9 @@ extractor = FactDb::Extractors::LLMExtractor.new(config)
17
17
 
18
18
  ```ruby
19
19
  FactDb.configure do |config|
20
- config.llm_provider = :openai
21
- config.llm_model = "gpt-4o-mini"
22
- config.llm_api_key = ENV['OPENAI_API_KEY']
20
+ config.llm.provider = :openai
21
+ config.llm.model = "gpt-4o-mini"
22
+ config.llm.api_key = ENV['OPENAI_API_KEY']
23
23
  end
24
24
  ```
25
25
 
@@ -35,7 +35,7 @@ Extract facts from content using LLM.
35
35
 
36
36
  **Parameters:**
37
37
 
38
- - `content` (Models::Content) - Content to process
38
+ - `source` (Models::Source) - Source to process
39
39
 
40
40
  **Returns:** `Array<Models::Fact>`
41
41
 
@@ -43,10 +43,10 @@ Extract facts from content using LLM.
43
43
 
44
44
  ```ruby
45
45
  extractor = LLMExtractor.new(config)
46
- facts = extractor.extract(content)
46
+ facts = extractor.extract(source)
47
47
 
48
48
  facts.each do |fact|
49
- puts fact.fact_text
49
+ puts fact.text
50
50
  puts " Valid: #{fact.valid_at}"
51
51
  puts " Confidence: #{fact.confidence}"
52
52
  end
@@ -73,7 +73,7 @@ Extract temporal facts from this content. For each fact:
73
73
  4. Assess confidence level
74
74
 
75
75
  Content:
76
- {content.raw_text}
76
+ {source.content}
77
77
 
78
78
  Return JSON:
79
79
  {
@@ -94,12 +94,12 @@ Return JSON:
94
94
 
95
95
  | Provider | Models | Config |
96
96
  |----------|--------|--------|
97
- | OpenAI | gpt-4o, gpt-4o-mini | `llm_provider: :openai` |
98
- | Anthropic | claude-sonnet-4, claude-3-haiku | `llm_provider: :anthropic` |
99
- | Google | gemini-2.0-flash | `llm_provider: :gemini` |
100
- | Ollama | llama3.2, mistral | `llm_provider: :ollama` |
101
- | AWS Bedrock | claude-sonnet-4 | `llm_provider: :bedrock` |
102
- | OpenRouter | Various | `llm_provider: :openrouter` |
97
+ | OpenAI | gpt-4o, gpt-4o-mini | `llm.provider = :openai` |
98
+ | Anthropic | claude-sonnet-4, claude-3-haiku | `llm.provider = :anthropic` |
99
+ | Google | gemini-2.0-flash | `llm.provider = :gemini` |
100
+ | Ollama | llama3.2, mistral | `llm.provider = :ollama` |
101
+ | AWS Bedrock | claude-sonnet-4 | `llm.provider = :bedrock` |
102
+ | OpenRouter | Various | `llm.provider = :openrouter` |
103
103
 
104
104
  ## Error Handling
105
105
 
@@ -134,7 +134,7 @@ end
134
134
  ### 1. Validate Results
135
135
 
136
136
  ```ruby
137
- facts = extractor.extract(content)
137
+ facts = extractor.extract(source)
138
138
  facts.each do |fact|
139
139
  if fact.confidence < 0.7
140
140
  fact.update!(metadata: { needs_review: true })
@@ -145,9 +145,9 @@ end
145
145
  ### 2. Cache Responses
146
146
 
147
147
  ```ruby
148
- cache_key = "llm:#{content.content_hash}"
148
+ cache_key = "llm:#{source.content_hash}"
149
149
  facts = Rails.cache.fetch(cache_key) do
150
- extractor.extract(content)
150
+ extractor.extract(source)
151
151
  end
152
152
  ```
153
153
 
@@ -157,6 +157,6 @@ end
157
157
  require 'retryable'
158
158
 
159
159
  Retryable.retryable(tries: 3, sleep: lambda { |n| 2**n }) do
160
- extractor.extract(content)
160
+ extractor.extract(source)
161
161
  end
162
162
  ```
@@ -65,13 +65,13 @@ The extractor includes patterns for common fact types:
65
65
  ```ruby
66
66
  extractor = RuleBasedExtractor.new(config)
67
67
 
68
- content = Models::Content.create!(
69
- raw_text: "Paula Chen joined Microsoft on January 10, 2024 as Principal Engineer.",
70
- content_type: "announcement",
68
+ source = Models::Source.create!(
69
+ content: "Paula Chen joined Microsoft on January 10, 2024 as Principal Engineer.",
70
+ type: "announcement",
71
71
  captured_at: Time.current
72
72
  )
73
73
 
74
- facts = extractor.extract(content)
74
+ facts = extractor.extract(source)
75
75
  # Returns facts about:
76
76
  # - Paula joining Microsoft
77
77
  # - Paula's title as Principal Engineer
@@ -99,20 +99,20 @@ class CustomRuleExtractor < FactDb::Extractors::RuleBasedExtractor
99
99
 
100
100
  private
101
101
 
102
- def extract_custom_patterns(content)
102
+ def extract_custom_patterns(source)
103
103
  facts = []
104
104
  CUSTOM_PATTERNS.each do |rule|
105
- content.raw_text.scan(rule[:pattern]) do |match|
106
- facts << send(rule[:handler], match, content)
105
+ source.content.scan(rule[:pattern]) do |match|
106
+ facts << send(rule[:handler], match, source)
107
107
  end
108
108
  end
109
109
  facts
110
110
  end
111
111
 
112
- def extract_revenue(match, content)
112
+ def extract_revenue(match, source)
113
113
  Models::Fact.create!(
114
- fact_text: "Revenue of $#{match[:amount]}",
115
- valid_at: content.captured_at,
114
+ text: "Revenue of $#{match[:amount]}",
115
+ valid_at: source.captured_at,
116
116
  extraction_method: "rule_based",
117
117
  # ...
118
118
  )
@@ -141,7 +141,7 @@ end
141
141
 
142
142
  ```ruby
143
143
  # Use rule-based for structured content
144
- if content.content_type == "form"
144
+ if content.type == "form"
145
145
  facts = rule_extractor.extract(content)
146
146
  else
147
147
  facts = llm_extractor.extract(content)
@@ -155,11 +155,11 @@ facts = extractor.extract(content)
155
155
  facts.select { |f| f.confidence > 0.8 }
156
156
  ```
157
157
 
158
- ### 3. Log Unmatched Content
158
+ ### 3. Log Unmatched Sources
159
159
 
160
160
  ```ruby
161
- facts = extractor.extract(content)
161
+ facts = extractor.extract(source)
162
162
  if facts.empty?
163
- logger.info "No patterns matched for content #{content.id}"
163
+ logger.info "No patterns matched for source #{source.id}"
164
164
  end
165
165
  ```
data/docs/api/facts.md CHANGED
@@ -15,7 +15,7 @@ facts = FactDb::Facts.new(config: custom_config)
15
15
  | Attribute | Type | Description |
16
16
  |-----------|------|-------------|
17
17
  | `config` | Config | Configuration instance |
18
- | `content_service` | ContentService | Service for content operations |
18
+ | `source_service` | SourceService | Service for source operations |
19
19
  | `entity_service` | EntityService | Service for entity operations |
20
20
  | `fact_service` | FactService | Service for fact operations |
21
21
  | `extraction_pipeline` | ExtractionPipeline | Pipeline for batch extraction |
@@ -43,7 +43,7 @@ facts = FactDb.new
43
43
 
44
44
  # Use custom configuration
45
45
  config = FactDb::Config.new
46
- config.database_url = "postgresql://localhost/my_db"
46
+ config.database.url = "postgresql://localhost/my_db"
47
47
  facts = FactDb.new(config: config)
48
48
  ```
49
49
 
@@ -52,28 +52,28 @@ facts = FactDb.new(config: config)
52
52
  ### ingest
53
53
 
54
54
  ```ruby
55
- def ingest(raw_text, type:, captured_at: Time.current, metadata: {}, title: nil, source_uri: nil)
55
+ def ingest(content, kind:, captured_at: Time.current, metadata: {}, title: nil, source_uri: nil)
56
56
  ```
57
57
 
58
58
  Ingest raw content into the fact database.
59
59
 
60
60
  **Parameters:**
61
61
 
62
- - `raw_text` (String) - The content text
63
- - `type` (Symbol) - Content type (:email, :document, :article, etc.)
62
+ - `content` (String) - The source text content
63
+ - `kind` (Symbol) - Content kind (:email, :document, :article, etc.)
64
64
  - `captured_at` (Time, optional) - When content was captured
65
65
  - `metadata` (Hash, optional) - Additional metadata
66
66
  - `title` (String, optional) - Content title
67
67
  - `source_uri` (String, optional) - Original location
68
68
 
69
- **Returns:** `Models::Content`
69
+ **Returns:** `Models::Source`
70
70
 
71
71
  **Example:**
72
72
 
73
73
  ```ruby
74
- content = facts.ingest(
74
+ source = facts.ingest(
75
75
  "Paula joined Microsoft on Jan 10, 2024",
76
- type: :announcement,
76
+ kind: :announcement,
77
77
  title: "New Hire",
78
78
  captured_at: Time.current
79
79
  )
@@ -84,14 +84,14 @@ content = facts.ingest(
84
84
  ### extract_facts
85
85
 
86
86
  ```ruby
87
- def extract_facts(content_id, extractor: @config.default_extractor)
87
+ def extract_facts(source_id, extractor: @config.default_extractor)
88
88
  ```
89
89
 
90
90
  Extract facts from content.
91
91
 
92
92
  **Parameters:**
93
93
 
94
- - `content_id` (Integer) - Content ID
94
+ - `source_id` (Integer) - Source ID
95
95
  - `extractor` (Symbol, optional) - Extraction method (:manual, :llm, :rule_based)
96
96
 
97
97
  **Returns:** `Array<Models::Fact>`
@@ -99,7 +99,7 @@ Extract facts from content.
99
99
  **Example:**
100
100
 
101
101
  ```ruby
102
- extracted = facts.extract_facts(content.id, extractor: :llm)
102
+ extracted = facts.extract_facts(source.id, extractor: :llm)
103
103
  ```
104
104
 
105
105
  ---
@@ -139,7 +139,7 @@ results = facts.query_facts(at: Date.parse("2023-06-15"))
139
139
  ### resolve_entity
140
140
 
141
141
  ```ruby
142
- def resolve_entity(name, type: nil)
142
+ def resolve_entity(name, kind: nil)
143
143
  ```
144
144
 
145
145
  Resolve a name to an entity.
@@ -147,14 +147,14 @@ Resolve a name to an entity.
147
147
  **Parameters:**
148
148
 
149
149
  - `name` (String) - Name to resolve
150
- - `type` (Symbol, optional) - Entity type filter
150
+ - `kind` (Symbol, optional) - Entity kind filter
151
151
 
152
152
  **Returns:** `Models::Entity` or `nil`
153
153
 
154
154
  **Example:**
155
155
 
156
156
  ```ruby
157
- entity = facts.resolve_entity("Paula Chen", type: :person)
157
+ entity = facts.resolve_entity("Paula Chen", kind: :person)
158
158
  ```
159
159
 
160
160
  ---
@@ -232,14 +232,14 @@ historical = facts.facts_at(Date.parse("2023-06-15"), entity: paula.id)
232
232
  ### batch_extract
233
233
 
234
234
  ```ruby
235
- def batch_extract(content_ids, extractor: @config.default_extractor, parallel: true)
235
+ def batch_extract(source_ids, extractor: @config.default_extractor, parallel: true)
236
236
  ```
237
237
 
238
238
  Batch extract facts from multiple content items.
239
239
 
240
240
  **Parameters:**
241
241
 
242
- - `content_ids` (Array<Integer>) - Content IDs to process
242
+ - `source_ids` (Array<Integer>) - Source IDs to process
243
243
  - `extractor` (Symbol, optional) - Extraction method
244
244
  - `parallel` (Boolean, optional) - Use parallel processing (default: true)
245
245
 
@@ -248,9 +248,9 @@ Batch extract facts from multiple content items.
248
248
  **Example:**
249
249
 
250
250
  ```ruby
251
- results = facts.batch_extract([c1.id, c2.id, c3.id], parallel: true)
251
+ results = facts.batch_extract([s1.id, s2.id, s3.id], parallel: true)
252
252
  results.each do |r|
253
- puts "#{r[:content_id]}: #{r[:facts].count} facts"
253
+ puts "#{r[:source_id]}: #{r[:facts].count} facts"
254
254
  end
255
255
  ```
256
256
 
@@ -259,7 +259,7 @@ end
259
259
  ### batch_resolve_entities
260
260
 
261
261
  ```ruby
262
- def batch_resolve_entities(names, type: nil)
262
+ def batch_resolve_entities(names, kind: nil)
263
263
  ```
264
264
 
265
265
  Batch resolve entity names.
@@ -267,7 +267,7 @@ Batch resolve entity names.
267
267
  **Parameters:**
268
268
 
269
269
  - `names` (Array<String>) - Names to resolve
270
- - `type` (Symbol, optional) - Entity type filter
270
+ - `kind` (Symbol, optional) - Entity kind filter
271
271
 
272
272
  **Returns:** `Array<Hash>` - Resolution results
273
273
 
data/docs/api/index.md CHANGED
@@ -8,13 +8,13 @@ Complete API documentation for FactDb.
8
8
 
9
9
  ## Models
10
10
 
11
- - [Content](models/content.md) - Immutable source documents
11
+ - [Source](models/source.md) - Immutable source content
12
12
  - [Entity](models/entity.md) - Resolved identities
13
13
  - [Fact](models/fact.md) - Temporal assertions
14
14
 
15
15
  ## Services
16
16
 
17
- - [ContentService](services/content-service.md) - Ingest and manage content
17
+ - [SourceService](services/source-service.md) - Ingest and manage sources
18
18
  - [EntityService](services/entity-service.md) - Create and resolve entities
19
19
  - [FactService](services/fact-service.md) - Extract and query facts
20
20
 
@@ -37,14 +37,14 @@ FactDb
37
37
  ├── Config # Configuration
38
38
  ├── Database # Database connection
39
39
  ├── Models
40
- │ ├── Content
40
+ │ ├── Source
41
41
  │ ├── Entity
42
42
  │ ├── EntityAlias
43
43
  │ ├── Fact
44
44
  │ ├── EntityMention
45
45
  │ └── FactSource
46
46
  ├── Services
47
- │ ├── ContentService
47
+ │ ├── SourceService
48
48
  │ ├── EntityService
49
49
  │ └── FactService
50
50
  ├── Extractors