fact_db 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +2 -0
  3. data/.yardopts +5 -0
  4. data/CHANGELOG.md +64 -0
  5. data/README.md +107 -6
  6. data/Rakefile +243 -10
  7. data/db/migrate/001_enable_extensions.rb +1 -0
  8. data/db/migrate/002_create_sources.rb +49 -0
  9. data/db/migrate/003_create_entities.rb +27 -15
  10. data/db/migrate/004_create_entity_aliases.rb +20 -7
  11. data/db/migrate/005_create_facts.rb +37 -21
  12. data/db/migrate/006_create_entity_mentions.rb +14 -6
  13. data/db/migrate/007_create_fact_sources.rb +16 -8
  14. data/docs/api/extractors/index.md +5 -5
  15. data/docs/api/extractors/llm.md +17 -17
  16. data/docs/api/extractors/rule-based.md +14 -14
  17. data/docs/api/facts.md +20 -20
  18. data/docs/api/index.md +4 -4
  19. data/docs/api/models/entity.md +21 -21
  20. data/docs/api/models/fact.md +15 -15
  21. data/docs/api/models/index.md +7 -7
  22. data/docs/api/models/{content.md → source.md} +29 -29
  23. data/docs/api/pipeline/extraction.md +25 -25
  24. data/docs/api/pipeline/index.md +1 -1
  25. data/docs/api/pipeline/resolution.md +4 -4
  26. data/docs/api/services/entity-service.md +20 -20
  27. data/docs/api/services/fact-service.md +12 -12
  28. data/docs/api/services/index.md +5 -5
  29. data/docs/api/services/{content-service.md → source-service.md} +27 -27
  30. data/docs/architecture/database-schema.md +46 -46
  31. data/docs/architecture/entity-resolution.md +6 -6
  32. data/docs/architecture/index.md +10 -10
  33. data/docs/architecture/temporal-facts.md +5 -5
  34. data/docs/architecture/three-layer-model.md +17 -17
  35. data/docs/concepts.md +6 -6
  36. data/docs/examples/basic-usage.md +20 -20
  37. data/docs/examples/hr-onboarding.md +17 -17
  38. data/docs/examples/index.md +4 -4
  39. data/docs/examples/news-analysis.md +23 -23
  40. data/docs/getting-started/database-setup.md +28 -20
  41. data/docs/getting-started/index.md +3 -3
  42. data/docs/getting-started/quick-start.md +33 -30
  43. data/docs/guides/batch-processing.md +26 -26
  44. data/docs/guides/configuration.md +158 -77
  45. data/docs/guides/entity-management.md +14 -14
  46. data/docs/guides/extracting-facts.md +28 -28
  47. data/docs/guides/ingesting-content.md +14 -14
  48. data/docs/guides/llm-integration.md +40 -32
  49. data/docs/guides/temporal-queries.md +11 -11
  50. data/docs/index.md +6 -2
  51. data/examples/.envrc +4 -0
  52. data/examples/.gitignore +1 -0
  53. data/examples/001_configuration.rb +312 -0
  54. data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
  55. data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
  56. data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
  57. data/examples/040_output_formats.rb +177 -0
  58. data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
  59. data/examples/060_fluent_temporal_api.rb +217 -0
  60. data/examples/070_introspection.rb +252 -0
  61. data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
  62. data/examples/090_ingest_demo.rb +515 -0
  63. data/examples/100_query_context.rb +668 -0
  64. data/examples/110_prove_it.rb +204 -0
  65. data/examples/120_dump_database.rb +358 -0
  66. data/examples/130_rag_feedback_loop.rb +858 -0
  67. data/examples/README.md +229 -15
  68. data/examples/data/lincoln_associates.md +201 -0
  69. data/examples/data/lincoln_biography.md +66 -0
  70. data/examples/data/lincoln_cabinet.md +243 -0
  71. data/examples/data/lincoln_family.md +163 -0
  72. data/examples/data/lincoln_military.md +241 -0
  73. data/examples/data/lincoln_todd_family.md +136 -0
  74. data/examples/ingest_reporter.rb +335 -0
  75. data/examples/utilities.rb +182 -0
  76. data/lib/fact_db/config/defaults.yml +254 -0
  77. data/lib/fact_db/config.rb +94 -35
  78. data/lib/fact_db/database.rb +98 -8
  79. data/lib/fact_db/extractors/base.rb +106 -21
  80. data/lib/fact_db/extractors/llm_extractor.rb +35 -63
  81. data/lib/fact_db/extractors/manual_extractor.rb +46 -6
  82. data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
  83. data/lib/fact_db/llm/adapter.rb +3 -3
  84. data/lib/fact_db/models/entity.rb +94 -22
  85. data/lib/fact_db/models/entity_alias.rb +41 -7
  86. data/lib/fact_db/models/entity_mention.rb +34 -1
  87. data/lib/fact_db/models/fact.rb +259 -28
  88. data/lib/fact_db/models/fact_source.rb +43 -9
  89. data/lib/fact_db/models/source.rb +113 -0
  90. data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
  91. data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
  92. data/lib/fact_db/query_result.rb +202 -0
  93. data/lib/fact_db/resolution/entity_resolver.rb +139 -39
  94. data/lib/fact_db/resolution/fact_resolver.rb +86 -14
  95. data/lib/fact_db/services/entity_service.rb +246 -37
  96. data/lib/fact_db/services/fact_service.rb +254 -17
  97. data/lib/fact_db/services/source_service.rb +164 -0
  98. data/lib/fact_db/temporal/query.rb +71 -7
  99. data/lib/fact_db/temporal/query_builder.rb +69 -0
  100. data/lib/fact_db/temporal/timeline.rb +102 -11
  101. data/lib/fact_db/transformers/base.rb +77 -0
  102. data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
  103. data/lib/fact_db/transformers/json_transformer.rb +17 -0
  104. data/lib/fact_db/transformers/raw_transformer.rb +35 -0
  105. data/lib/fact_db/transformers/text_transformer.rb +114 -0
  106. data/lib/fact_db/transformers/triple_transformer.rb +138 -0
  107. data/lib/fact_db/validation/alias_filter.rb +185 -0
  108. data/lib/fact_db/version.rb +1 -1
  109. data/lib/fact_db.rb +281 -30
  110. data/mkdocs.yml +2 -2
  111. metadata +60 -16
  112. data/db/migrate/002_create_contents.rb +0 -44
  113. data/lib/fact_db/models/content.rb +0 -62
  114. data/lib/fact_db/services/content_service.rb +0 -93
@@ -24,7 +24,7 @@ FactDb provides migrations that create all necessary tables:
24
24
  require 'fact_db'
25
25
 
26
26
  FactDb.configure do |config|
27
- config.database_url = "postgresql://localhost/fact_db"
27
+ config.database.url = "postgresql://localhost/fact_db"
28
28
  end
29
29
 
30
30
  FactDb::Database.migrate!
@@ -34,19 +34,19 @@ FactDb::Database.migrate!
34
34
 
35
35
  The migrations create six tables:
36
36
 
37
- ### contents
37
+ ### sources
38
38
 
39
- Stores immutable source documents.
39
+ Stores immutable source content.
40
40
 
41
41
  | Column | Type | Description |
42
42
  |--------|------|-------------|
43
43
  | id | bigint | Primary key |
44
44
  | content_hash | string | SHA256 hash for deduplication |
45
- | content_type | string | Type (email, document, article) |
46
- | raw_text | text | Original content |
45
+ | type | string | Type (email, document, article) |
46
+ | content | text | Original source content |
47
47
  | title | string | Optional title |
48
48
  | source_uri | string | Original location |
49
- | source_metadata | jsonb | Additional metadata |
49
+ | metadata | jsonb | Additional metadata |
50
50
  | embedding | vector(1536) | Semantic search vector |
51
51
  | captured_at | timestamptz | When content was captured |
52
52
 
@@ -57,10 +57,10 @@ Stores resolved identities.
57
57
  | Column | Type | Description |
58
58
  |--------|------|-------------|
59
59
  | id | bigint | Primary key |
60
- | canonical_name | string | Authoritative name |
61
- | entity_type | string | person, organization, place, etc. |
60
+ | name | string | Authoritative name |
61
+ | type | string | person, organization, place, etc. |
62
62
  | resolution_status | string | unresolved, resolved, merged |
63
- | merged_into_id | bigint | Points to canonical entity if merged |
63
+ | canonical_id | bigint | Points to canonical entity if merged |
64
64
  | metadata | jsonb | Additional attributes |
65
65
  | embedding | vector(1536) | Semantic search vector |
66
66
 
@@ -72,8 +72,8 @@ Stores alternative names for entities.
72
72
  |--------|------|-------------|
73
73
  | id | bigint | Primary key |
74
74
  | entity_id | bigint | Foreign key to entities |
75
- | alias_text | string | Alternative name |
76
- | alias_type | string | nickname, abbreviation, etc. |
75
+ | name | string | Alternative name |
76
+ | type | string | nickname, abbreviation, etc. |
77
77
  | confidence | float | Match confidence (0-1) |
78
78
 
79
79
  ### facts
@@ -83,8 +83,8 @@ Stores temporal assertions.
83
83
  | Column | Type | Description |
84
84
  |--------|------|-------------|
85
85
  | id | bigint | Primary key |
86
- | fact_text | text | The assertion |
87
- | fact_hash | string | For deduplication |
86
+ | text | text | The assertion |
87
+ | digest | string | SHA256 digest for deduplication |
88
88
  | valid_at | timestamptz | When fact became true |
89
89
  | invalid_at | timestamptz | When fact stopped being true |
90
90
  | status | string | canonical, superseded, corroborated, synthesized |
@@ -117,8 +117,8 @@ Links facts to source content.
117
117
  |--------|------|-------------|
118
118
  | id | bigint | Primary key |
119
119
  | fact_id | bigint | Foreign key to facts |
120
- | content_id | bigint | Foreign key to contents |
121
- | source_type | string | primary, supporting, contradicting |
120
+ | source_id | bigint | Foreign key to sources |
121
+ | kind | string | primary, supporting, corroborating |
122
122
  | excerpt | text | Relevant text excerpt |
123
123
  | confidence | float | Source confidence |
124
124
 
@@ -128,8 +128,8 @@ The migrations create indexes for:
128
128
 
129
129
  - Content hash (unique)
130
130
  - Content type
131
- - Full-text search on raw_text
132
- - Entity canonical name
131
+ - Full-text search on content
132
+ - Entity name
133
133
  - Entity type
134
134
  - Fact status
135
135
  - Temporal range queries (valid_at, invalid_at)
@@ -158,12 +158,20 @@ Configure the connection pool for your workload:
158
158
 
159
159
  ```ruby
160
160
  FactDb.configure do |config|
161
- config.database_url = ENV['DATABASE_URL']
162
- config.database_pool_size = 10 # Default: 5
163
- config.database_timeout = 60_000 # Default: 30000ms
161
+ config.database.url = ENV['DATABASE_URL']
162
+ config.database.pool_size = 10 # Default: 5
163
+ config.database.timeout = 60_000 # Default: 30000ms
164
164
  end
165
165
  ```
166
166
 
167
+ Or via environment variables:
168
+
169
+ ```bash
170
+ export FDB_DATABASE__URL="postgresql://localhost/fact_db"
171
+ export FDB_DATABASE__POOL_SIZE=10
172
+ export FDB_DATABASE__TIMEOUT=60000
173
+ ```
174
+
167
175
  ## Next Steps
168
176
 
169
177
  - [Quick Start](quick-start.md) - Start using FactDb
@@ -55,17 +55,17 @@ require 'fact_db'
55
55
 
56
56
  # Configure
57
57
  FactDb.configure do |config|
58
- config.database_url = ENV['DATABASE_URL']
58
+ config.database.url = ENV['DATABASE_URL']
59
59
  end
60
60
 
61
61
  # Create a facts instance
62
62
  facts = FactDb.new
63
63
 
64
64
  # Ingest content
65
- content = facts.ingest("Important information...", type: :document)
65
+ source = facts.ingest("Important information...", type: :document)
66
66
 
67
67
  # Extract and query facts
68
- extracted = facts.extract_facts(content.id)
68
+ extracted = facts.extract_facts(source.id)
69
69
  ```
70
70
 
71
71
  Continue to the [Installation Guide](installation.md) to begin.
@@ -9,27 +9,30 @@ Create a configuration file or use environment variables:
9
9
  === "Environment Variables"
10
10
 
11
11
  ```bash
12
- export EVENT_CLOCK_DATABASE_URL="postgresql://localhost/fact_db"
13
- export EVENT_CLOCK_LLM_PROVIDER="openai"
14
- export EVENT_CLOCK_LLM_API_KEY="sk-..."
12
+ export FDB_DATABASE__URL="postgresql://localhost/fact_db"
13
+ export FDB_LLM__PROVIDER="openai"
14
+ export FDB_LLM__API_KEY="sk-..."
15
15
  ```
16
16
 
17
17
  === "YAML Config"
18
18
 
19
19
  ```yaml
20
20
  # config/fact_db.yml
21
- database_url: postgresql://localhost/fact_db
22
- llm_provider: openai
23
- llm_api_key: <%= ENV['OPENAI_API_KEY'] %>
21
+ database:
22
+ url: postgresql://localhost/fact_db
23
+
24
+ llm:
25
+ provider: openai
26
+ api_key: <%= ENV['OPENAI_API_KEY'] %>
24
27
  ```
25
28
 
26
29
  === "Ruby Block"
27
30
 
28
31
  ```ruby
29
32
  FactDb.configure do |config|
30
- config.database_url = "postgresql://localhost/fact_db"
31
- config.llm_provider = :openai
32
- config.llm_api_key = ENV['OPENAI_API_KEY']
33
+ config.database.url = "postgresql://localhost/fact_db"
34
+ config.llm.provider = :openai
35
+ config.llm.api_key = ENV['OPENAI_API_KEY']
33
36
  end
34
37
  ```
35
38
 
@@ -41,7 +44,7 @@ Run the migrations:
41
44
  require 'fact_db'
42
45
 
43
46
  FactDb.configure do |config|
44
- config.database_url = ENV['DATABASE_URL']
47
+ config.database.url = ENV['DATABASE_URL']
45
48
  end
46
49
 
47
50
  # Run migrations
@@ -60,14 +63,14 @@ facts = FactDb.new
60
63
 
61
64
  ```ruby
62
65
  # Ingest an email
63
- content = facts.ingest(
66
+ source = facts.ingest(
64
67
  "Hi team, Paula Chen has accepted our offer and will join as Principal Engineer starting January 10, 2024. She'll be reporting to Sarah in the Platform team.",
65
- type: :email,
68
+ kind: :email,
66
69
  title: "New Hire Announcement",
67
70
  captured_at: Time.current
68
71
  )
69
72
 
70
- puts "Ingested content: #{content.id}"
73
+ puts "Ingested source: #{source.id}"
71
74
  ```
72
75
 
73
76
  ## 5. Create Entities
@@ -76,19 +79,19 @@ puts "Ingested content: #{content.id}"
76
79
  # Create entities for people and organizations
77
80
  paula = facts.entity_service.create(
78
81
  "Paula Chen",
79
- type: :person,
82
+ kind: :person,
80
83
  aliases: ["Paula", "P. Chen"]
81
84
  )
82
85
 
83
86
  sarah = facts.entity_service.create(
84
87
  "Sarah Johnson",
85
- type: :person,
88
+ kind: :person,
86
89
  aliases: ["Sarah"]
87
90
  )
88
91
 
89
92
  platform_team = facts.entity_service.create(
90
93
  "Platform Team",
91
- type: :organization
94
+ kind: :organization
92
95
  )
93
96
  ```
94
97
 
@@ -104,7 +107,7 @@ fact = facts.fact_service.create(
104
107
  { entity: paula, role: "subject", text: "Paula Chen" }
105
108
  ],
106
109
  sources: [
107
- { content: content, type: "primary" }
110
+ { source: source, type: "primary" }
108
111
  ]
109
112
  )
110
113
  ```
@@ -113,10 +116,10 @@ fact = facts.fact_service.create(
113
116
 
114
117
  ```ruby
115
118
  # Extract facts automatically using LLM
116
- extracted = facts.extract_facts(content.id, extractor: :llm)
119
+ extracted = facts.extract_facts(source.id, extractor: :llm)
117
120
 
118
121
  extracted.each do |fact|
119
- puts "Extracted: #{fact.fact_text}"
122
+ puts "Extracted: #{fact.text}"
120
123
  puts " Valid from: #{fact.valid_at}"
121
124
  end
122
125
  ```
@@ -126,7 +129,7 @@ end
126
129
  ```ruby
127
130
  # Get current facts about Paula
128
131
  current = facts.current_facts_for(paula.id)
129
- current.each { |f| puts f.fact_text }
132
+ current.each { |f| puts f.text }
130
133
 
131
134
  # Get facts valid at a specific date
132
135
  historical = facts.facts_at(
@@ -144,7 +147,7 @@ team_facts = facts.query_facts(topic: "Platform Team")
144
147
  timeline = facts.timeline_for(paula.id)
145
148
 
146
149
  timeline.each do |entry|
147
- puts "#{entry[:date]}: #{entry[:fact].fact_text}"
150
+ puts "#{entry[:date]}: #{entry[:fact].text}"
148
151
  end
149
152
  ```
150
153
 
@@ -155,32 +158,32 @@ require 'fact_db'
155
158
 
156
159
  # Configure
157
160
  FactDb.configure do |config|
158
- config.database_url = ENV['DATABASE_URL']
159
- config.llm_provider = :openai
160
- config.llm_api_key = ENV['OPENAI_API_KEY']
161
+ config.database.url = ENV['DATABASE_URL']
162
+ config.llm.provider = :openai
163
+ config.llm.api_key = ENV['OPENAI_API_KEY']
161
164
  end
162
165
 
163
166
  # Create facts instance
164
167
  facts = FactDb.new
165
168
 
166
169
  # Ingest content
167
- content = facts.ingest(
170
+ source = facts.ingest(
168
171
  "Paula Chen joined Microsoft as Principal Engineer on January 10, 2024.",
169
- type: :announcement,
172
+ kind: :announcement,
170
173
  captured_at: Time.current
171
174
  )
172
175
 
173
176
  # Create entities
174
- paula = facts.entity_service.create("Paula Chen", type: :person)
175
- microsoft = facts.entity_service.create("Microsoft", type: :organization)
177
+ paula = facts.entity_service.create("Paula Chen", kind: :person)
178
+ microsoft = facts.entity_service.create("Microsoft", kind: :organization)
176
179
 
177
180
  # Extract facts via LLM
178
- extracted = facts.extract_facts(content.id, extractor: :llm)
181
+ extracted = facts.extract_facts(source.id, extractor: :llm)
179
182
 
180
183
  # Query
181
184
  puts "Current facts about Paula:"
182
185
  facts.current_facts_for(paula.id).each do |fact|
183
- puts " - #{fact.fact_text}"
186
+ puts " - #{fact.text}"
184
187
  end
185
188
  ```
186
189
 
@@ -20,10 +20,10 @@ Process content one at a time:
20
20
  ```ruby
21
21
  facts = FactDb.new
22
22
 
23
- content_ids = [content1.id, content2.id, content3.id]
23
+ source_ids = [content1.id, content2.id, content3.id]
24
24
 
25
25
  results = facts.batch_extract(
26
- content_ids,
26
+ source_ids,
27
27
  extractor: :llm,
28
28
  parallel: false
29
29
  )
@@ -35,13 +35,13 @@ Process content concurrently (default):
35
35
 
36
36
  ```ruby
37
37
  results = facts.batch_extract(
38
- content_ids,
38
+ source_ids,
39
39
  extractor: :llm,
40
40
  parallel: true # default
41
41
  )
42
42
 
43
43
  results.each do |result|
44
- puts "Content #{result[:content_id]}:"
44
+ puts "Content #{result[:source_id]}:"
45
45
  puts " Facts extracted: #{result[:facts].count}"
46
46
  puts " Error: #{result[:error]}" if result[:error]
47
47
  end
@@ -51,7 +51,7 @@ end
51
51
 
52
52
  ```ruby
53
53
  result = {
54
- content_id: 123,
54
+ source_id: 123,
55
55
  facts: [<Fact>, <Fact>, ...], # Extracted facts
56
56
  error: nil # Error message if failed
57
57
  }
@@ -75,7 +75,7 @@ results = facts.batch_resolve_entities(names, type: nil)
75
75
  results.each do |result|
76
76
  case result[:status]
77
77
  when :resolved
78
- puts "#{result[:name]} -> #{result[:entity].canonical_name}"
78
+ puts "#{result[:name]} -> #{result[:entity].name}"
79
79
  when :not_found
80
80
  puts "#{result[:name]} -> Not found"
81
81
  when :error
@@ -104,9 +104,9 @@ results.each do |result|
104
104
  if result[:conflict_count] > 0
105
105
  puts "Entity #{result[:entity_id]} has #{result[:conflict_count]} conflicts:"
106
106
  result[:conflicts].each do |conflict|
107
- puts " #{conflict[:fact1].fact_text}"
107
+ puts " #{conflict[:fact1].text}"
108
108
  puts " vs"
109
- puts " #{conflict[:fact2].fact_text}"
109
+ puts " #{conflict[:fact2].text}"
110
110
  puts " Similarity: #{conflict[:similarity]}"
111
111
  end
112
112
  end
@@ -153,7 +153,7 @@ pipeline = SimpleFlow::Pipeline.new do
153
153
  # Step 1: Validate
154
154
  step ->(result) {
155
155
  content = result.value
156
- if content.raw_text.blank?
156
+ if source.content.blank?
157
157
  result.halt("Empty content")
158
158
  else
159
159
  result.continue(content)
@@ -182,7 +182,7 @@ result = pipeline.call(SimpleFlow::Result.new(content))
182
182
  ### Graceful Degradation
183
183
 
184
184
  ```ruby
185
- results = facts.batch_extract(content_ids, extractor: :llm)
185
+ results = facts.batch_extract(source_ids, extractor: :llm)
186
186
 
187
187
  successful = results.select { |r| r[:error].nil? }
188
188
  failed = results.reject { |r| r[:error].nil? }
@@ -192,7 +192,7 @@ puts "Failed: #{failed.count}"
192
192
 
193
193
  # Retry failed items with different extractor
194
194
  if failed.any?
195
- retry_ids = failed.map { |r| r[:content_id] }
195
+ retry_ids = failed.map { |r| r[:source_id] }
196
196
  retry_results = facts.batch_extract(retry_ids, extractor: :rule_based)
197
197
  end
198
198
  ```
@@ -204,7 +204,7 @@ results.each do |result|
204
204
  if result[:error]
205
205
  logger.error(
206
206
  "Extraction failed",
207
- content_id: result[:content_id],
207
+ source_id: result[:source_id],
208
208
  error: result[:error]
209
209
  )
210
210
  end
@@ -217,7 +217,7 @@ end
217
217
 
218
218
  ```ruby
219
219
  # Process in batches of 10-50 for optimal performance
220
- content_ids.each_slice(25) do |batch|
220
+ source_ids.each_slice(25) do |batch|
221
221
  results = facts.batch_extract(batch, parallel: true)
222
222
  process_results(results)
223
223
  end
@@ -228,7 +228,7 @@ end
228
228
  For LLM extraction, add delays between batches:
229
229
 
230
230
  ```ruby
231
- content_ids.each_slice(10) do |batch|
231
+ source_ids.each_slice(10) do |batch|
232
232
  results = facts.batch_extract(batch, extractor: :llm)
233
233
  process_results(results)
234
234
  sleep(2) # Rate limit
@@ -239,7 +239,7 @@ end
239
239
 
240
240
  ```ruby
241
241
  # Process results immediately to avoid memory buildup
242
- content_ids.each_slice(50) do |batch|
242
+ source_ids.each_slice(50) do |batch|
243
243
  results = facts.batch_extract(batch)
244
244
 
245
245
  results.each do |result|
@@ -259,7 +259,7 @@ Track batch processing metrics:
259
259
  ```ruby
260
260
  start_time = Time.now
261
261
 
262
- results = facts.batch_extract(content_ids, parallel: true)
262
+ results = facts.batch_extract(source_ids, parallel: true)
263
263
 
264
264
  duration = Time.now - start_time
265
265
  success_rate = results.count { |r| r[:error].nil? }.to_f / results.count
@@ -275,18 +275,18 @@ puts "Items/second: #{(results.count / duration).round(2)}"
275
275
 
276
276
  ```ruby
277
277
  # Sequential for small batches (< 5 items)
278
- if content_ids.count < 5
279
- results = facts.batch_extract(content_ids, parallel: false)
278
+ if source_ids.count < 5
279
+ results = facts.batch_extract(source_ids, parallel: false)
280
280
  else
281
- results = facts.batch_extract(content_ids, parallel: true)
281
+ results = facts.batch_extract(source_ids, parallel: true)
282
282
  end
283
283
  ```
284
284
 
285
285
  ### 2. Handle Partial Failures
286
286
 
287
287
  ```ruby
288
- def process_batch(content_ids)
289
- results = facts.batch_extract(content_ids)
288
+ def process_batch(source_ids)
289
+ results = facts.batch_extract(source_ids)
290
290
 
291
291
  {
292
292
  successful: results.select { |r| r[:error].nil? },
@@ -294,17 +294,17 @@ def process_batch(content_ids)
294
294
  }
295
295
  end
296
296
 
297
- batch_result = process_batch(content_ids)
297
+ batch_result = process_batch(source_ids)
298
298
  retry_failed(batch_result[:failed]) if batch_result[:failed].any?
299
299
  ```
300
300
 
301
301
  ### 3. Log Progress
302
302
 
303
303
  ```ruby
304
- total = content_ids.count
304
+ total = source_ids.count
305
305
  processed = 0
306
306
 
307
- content_ids.each_slice(25) do |batch|
307
+ source_ids.each_slice(25) do |batch|
308
308
  results = facts.batch_extract(batch)
309
309
  processed += batch.count
310
310
 
@@ -316,10 +316,10 @@ end
316
316
 
317
317
  ```ruby
318
318
  # LLM for complex documents
319
- complex_docs = contents.select { |c| c.raw_text.length > 1000 }
319
+ complex_docs = sources.select { |s| s.content.length > 1000 }
320
320
  facts.batch_extract(complex_docs.map(&:id), extractor: :llm)
321
321
 
322
322
  # Rule-based for simple, structured content
323
- simple_docs = contents.select { |c| c.raw_text.length <= 1000 }
323
+ simple_docs = sources.select { |s| s.content.length <= 1000 }
324
324
  facts.batch_extract(simple_docs.map(&:id), extractor: :rule_based)
325
325
  ```