fact_db 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +2 -0
  3. data/.yardopts +5 -0
  4. data/CHANGELOG.md +64 -0
  5. data/README.md +107 -6
  6. data/Rakefile +243 -10
  7. data/db/migrate/001_enable_extensions.rb +1 -0
  8. data/db/migrate/002_create_sources.rb +49 -0
  9. data/db/migrate/003_create_entities.rb +27 -15
  10. data/db/migrate/004_create_entity_aliases.rb +20 -7
  11. data/db/migrate/005_create_facts.rb +37 -21
  12. data/db/migrate/006_create_entity_mentions.rb +14 -6
  13. data/db/migrate/007_create_fact_sources.rb +16 -8
  14. data/docs/api/extractors/index.md +5 -5
  15. data/docs/api/extractors/llm.md +17 -17
  16. data/docs/api/extractors/rule-based.md +14 -14
  17. data/docs/api/facts.md +20 -20
  18. data/docs/api/index.md +4 -4
  19. data/docs/api/models/entity.md +21 -21
  20. data/docs/api/models/fact.md +15 -15
  21. data/docs/api/models/index.md +7 -7
  22. data/docs/api/models/{content.md → source.md} +29 -29
  23. data/docs/api/pipeline/extraction.md +25 -25
  24. data/docs/api/pipeline/index.md +1 -1
  25. data/docs/api/pipeline/resolution.md +4 -4
  26. data/docs/api/services/entity-service.md +20 -20
  27. data/docs/api/services/fact-service.md +12 -12
  28. data/docs/api/services/index.md +5 -5
  29. data/docs/api/services/{content-service.md → source-service.md} +27 -27
  30. data/docs/architecture/database-schema.md +46 -46
  31. data/docs/architecture/entity-resolution.md +6 -6
  32. data/docs/architecture/index.md +10 -10
  33. data/docs/architecture/temporal-facts.md +5 -5
  34. data/docs/architecture/three-layer-model.md +17 -17
  35. data/docs/concepts.md +6 -6
  36. data/docs/examples/basic-usage.md +20 -20
  37. data/docs/examples/hr-onboarding.md +17 -17
  38. data/docs/examples/index.md +4 -4
  39. data/docs/examples/news-analysis.md +23 -23
  40. data/docs/getting-started/database-setup.md +28 -20
  41. data/docs/getting-started/index.md +3 -3
  42. data/docs/getting-started/quick-start.md +33 -30
  43. data/docs/guides/batch-processing.md +26 -26
  44. data/docs/guides/configuration.md +158 -77
  45. data/docs/guides/entity-management.md +14 -14
  46. data/docs/guides/extracting-facts.md +28 -28
  47. data/docs/guides/ingesting-content.md +14 -14
  48. data/docs/guides/llm-integration.md +40 -32
  49. data/docs/guides/temporal-queries.md +11 -11
  50. data/docs/index.md +6 -2
  51. data/examples/.envrc +4 -0
  52. data/examples/.gitignore +1 -0
  53. data/examples/001_configuration.rb +312 -0
  54. data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
  55. data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
  56. data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
  57. data/examples/040_output_formats.rb +177 -0
  58. data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
  59. data/examples/060_fluent_temporal_api.rb +217 -0
  60. data/examples/070_introspection.rb +252 -0
  61. data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
  62. data/examples/090_ingest_demo.rb +515 -0
  63. data/examples/100_query_context.rb +668 -0
  64. data/examples/110_prove_it.rb +204 -0
  65. data/examples/120_dump_database.rb +358 -0
  66. data/examples/130_rag_feedback_loop.rb +858 -0
  67. data/examples/README.md +229 -15
  68. data/examples/data/lincoln_associates.md +201 -0
  69. data/examples/data/lincoln_biography.md +66 -0
  70. data/examples/data/lincoln_cabinet.md +243 -0
  71. data/examples/data/lincoln_family.md +163 -0
  72. data/examples/data/lincoln_military.md +241 -0
  73. data/examples/data/lincoln_todd_family.md +136 -0
  74. data/examples/ingest_reporter.rb +335 -0
  75. data/examples/utilities.rb +182 -0
  76. data/lib/fact_db/config/defaults.yml +254 -0
  77. data/lib/fact_db/config.rb +94 -35
  78. data/lib/fact_db/database.rb +98 -8
  79. data/lib/fact_db/extractors/base.rb +106 -21
  80. data/lib/fact_db/extractors/llm_extractor.rb +35 -63
  81. data/lib/fact_db/extractors/manual_extractor.rb +46 -6
  82. data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
  83. data/lib/fact_db/llm/adapter.rb +3 -3
  84. data/lib/fact_db/models/entity.rb +94 -22
  85. data/lib/fact_db/models/entity_alias.rb +41 -7
  86. data/lib/fact_db/models/entity_mention.rb +34 -1
  87. data/lib/fact_db/models/fact.rb +259 -28
  88. data/lib/fact_db/models/fact_source.rb +43 -9
  89. data/lib/fact_db/models/source.rb +113 -0
  90. data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
  91. data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
  92. data/lib/fact_db/query_result.rb +202 -0
  93. data/lib/fact_db/resolution/entity_resolver.rb +139 -39
  94. data/lib/fact_db/resolution/fact_resolver.rb +86 -14
  95. data/lib/fact_db/services/entity_service.rb +246 -37
  96. data/lib/fact_db/services/fact_service.rb +254 -17
  97. data/lib/fact_db/services/source_service.rb +164 -0
  98. data/lib/fact_db/temporal/query.rb +71 -7
  99. data/lib/fact_db/temporal/query_builder.rb +69 -0
  100. data/lib/fact_db/temporal/timeline.rb +102 -11
  101. data/lib/fact_db/transformers/base.rb +77 -0
  102. data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
  103. data/lib/fact_db/transformers/json_transformer.rb +17 -0
  104. data/lib/fact_db/transformers/raw_transformer.rb +35 -0
  105. data/lib/fact_db/transformers/text_transformer.rb +114 -0
  106. data/lib/fact_db/transformers/triple_transformer.rb +138 -0
  107. data/lib/fact_db/validation/alias_filter.rb +185 -0
  108. data/lib/fact_db/version.rb +1 -1
  109. data/lib/fact_db.rb +281 -30
  110. data/mkdocs.yml +2 -2
  111. metadata +60 -16
  112. data/db/migrate/002_create_contents.rb +0 -44
  113. data/lib/fact_db/models/content.rb +0 -62
  114. data/lib/fact_db/services/content_service.rb +0 -93
data/lib/fact_db.rb CHANGED
@@ -9,7 +9,7 @@ require_relative "fact_db/config"
9
9
  require_relative "fact_db/database"
10
10
 
11
11
  # Models
12
- require_relative "fact_db/models/content"
12
+ require_relative "fact_db/models/source"
13
13
  require_relative "fact_db/models/entity"
14
14
  require_relative "fact_db/models/entity_alias"
15
15
  require_relative "fact_db/models/fact"
@@ -19,11 +19,15 @@ require_relative "fact_db/models/fact_source"
19
19
  # Temporal queries
20
20
  require_relative "fact_db/temporal/query"
21
21
  require_relative "fact_db/temporal/timeline"
22
+ require_relative "fact_db/temporal/query_builder"
22
23
 
23
24
  # Resolution
24
25
  require_relative "fact_db/resolution/entity_resolver"
25
26
  require_relative "fact_db/resolution/fact_resolver"
26
27
 
28
+ # Validation
29
+ require_relative "fact_db/validation/alias_filter"
30
+
27
31
  # Extractors
28
32
  require_relative "fact_db/extractors/base"
29
33
  require_relative "fact_db/extractors/manual_extractor"
@@ -38,31 +42,49 @@ require_relative "fact_db/pipeline/extraction_pipeline"
38
42
  require_relative "fact_db/pipeline/resolution_pipeline"
39
43
 
40
44
  # Services
41
- require_relative "fact_db/services/content_service"
45
+ require_relative "fact_db/services/source_service"
42
46
  require_relative "fact_db/services/entity_service"
43
47
  require_relative "fact_db/services/fact_service"
44
48
 
49
+ # Transformers (output formatting)
50
+ require_relative "fact_db/transformers/base"
51
+ require_relative "fact_db/transformers/raw_transformer"
52
+ require_relative "fact_db/transformers/json_transformer"
53
+ require_relative "fact_db/transformers/triple_transformer"
54
+ require_relative "fact_db/transformers/cypher_transformer"
55
+ require_relative "fact_db/transformers/text_transformer"
56
+
57
+ # Query Result
58
+ require_relative "fact_db/query_result"
59
+
45
60
  module FactDb
46
61
  class Facts
47
- attr_reader :config, :content_service, :entity_service, :fact_service,
62
+ # Available output formats for LLM consumption
63
+ FORMATS = %i[raw json triples cypher text].freeze
64
+
65
+ # Available retrieval strategies
66
+ STRATEGIES = %i[auto semantic fulltext graph temporal hybrid].freeze
67
+
68
+ attr_reader :config, :source_service, :entity_service, :fact_service,
48
69
  :extraction_pipeline, :resolution_pipeline
49
70
 
50
71
  def initialize(config: nil)
51
72
  @config = config || FactDb.config
52
73
  Database.establish_connection!(@config)
53
74
 
54
- @content_service = Services::ContentService.new(@config)
75
+ @source_service = Services::SourceService.new(@config)
55
76
  @entity_service = Services::EntityService.new(@config)
56
77
  @fact_service = Services::FactService.new(@config)
57
78
  @extraction_pipeline = Pipeline::ExtractionPipeline.new(@config)
58
79
  @resolution_pipeline = Pipeline::ResolutionPipeline.new(@config)
80
+ @transformers = build_transformers
59
81
  end
60
82
 
61
- # Ingest raw content into the event clock
62
- def ingest(raw_text, type:, captured_at: Time.current, metadata: {}, title: nil, source_uri: nil)
63
- @content_service.create(
64
- raw_text,
65
- type: type,
83
+ # Ingest raw content
84
+ def ingest(content, kind:, captured_at: Time.current, metadata: {}, title: nil, source_uri: nil)
85
+ @source_service.create(
86
+ content,
87
+ kind: kind,
66
88
  captured_at: captured_at,
67
89
  metadata: metadata,
68
90
  title: title,
@@ -70,19 +92,27 @@ module FactDb
70
92
  )
71
93
  end
72
94
 
73
- # Extract facts from content
74
- def extract_facts(content_id, extractor: @config.default_extractor)
75
- @fact_service.extract_from_content(content_id, extractor: extractor)
95
+ # Extract facts from source
96
+ def extract_facts(source_id, extractor: @config.default_extractor)
97
+ @fact_service.extract_from_source(source_id, extractor: extractor)
76
98
  end
77
99
 
78
100
  # Query facts with temporal and entity filtering
79
- def query_facts(topic: nil, at: nil, entity: nil, status: :canonical)
80
- @fact_service.query(topic: topic, at: at, entity: entity, status: status)
101
+ #
102
+ # @param topic [String, nil] Topic to search for
103
+ # @param at [Date, Time, String, nil] Point in time for temporal query
104
+ # @param entity [Integer, nil] Entity ID to filter by
105
+ # @param status [Symbol] Fact status (:canonical, :superseded, :synthesized, :all)
106
+ # @param format [Symbol] Output format (:json, :triples, :cypher, :text, :prolog)
107
+ # @return [Array, String, Hash] Results in requested format
108
+ def query_facts(topic: nil, at: nil, entity: nil, status: :canonical, format: :json)
109
+ results = @fact_service.query(topic: topic, at: at, entity: entity, status: status)
110
+ transform_results(results, topic: topic, format: format)
81
111
  end
82
112
 
83
113
  # Resolve a name to an entity
84
- def resolve_entity(name, type: nil)
85
- @entity_service.resolve(name, type: type)
114
+ def resolve_entity(name, kind: nil)
115
+ @entity_service.resolve(name, kind: kind)
86
116
  end
87
117
 
88
118
  # Build a timeline for an entity
@@ -91,37 +121,152 @@ module FactDb
91
121
  end
92
122
 
93
123
  # Get currently valid facts about an entity
94
- def current_facts_for(entity_id)
95
- query_facts(entity: entity_id, at: nil, status: :canonical)
124
+ #
125
+ # @param entity_id [Integer] Entity ID
126
+ # @param format [Symbol] Output format
127
+ # @return [Array, String, Hash] Results in requested format
128
+ def current_facts_for(entity_id, format: :json)
129
+ results = @fact_service.current_facts(entity: entity_id)
130
+ transform_results(results, topic: "entity_#{entity_id}", format: format)
96
131
  end
97
132
 
98
133
  # Get facts valid at a specific point in time
99
- def facts_at(at, entity: nil, topic: nil)
100
- query_facts(at: at, entity: entity, topic: topic, status: :canonical)
134
+ #
135
+ # @param at [Date, Time, String] Point in time
136
+ # @param entity [Integer, nil] Entity ID to filter by
137
+ # @param topic [String, nil] Topic to search for
138
+ # @param format [Symbol] Output format
139
+ # @return [Array, String, Hash] Results in requested format
140
+ def facts_at(at, entity: nil, topic: nil, format: :json)
141
+ results = @fact_service.facts_at(at, entity: entity, topic: topic)
142
+ transform_results(results, topic: topic || "facts_at_#{at}", format: format)
143
+ end
144
+
145
+ # Temporal query builder - query at a specific point in time
146
+ #
147
+ # @param date [Date, Time, String] Point in time
148
+ # @return [Temporal::QueryBuilder] Scoped query builder
149
+ #
150
+ # @example
151
+ # facts.at("2024-01-15").query("Paula's role", format: :cypher)
152
+ # facts.at("2024-01-15").facts_for(entity_id)
153
+ # facts.at("2024-01-15").compare_to("2024-06-15")
154
+ def at(date)
155
+ Temporal::QueryBuilder.new(self, parse_date(date))
156
+ end
157
+
158
+ # Compare what changed between two dates
159
+ #
160
+ # @param topic [String, nil] Topic to compare (nil for all facts)
161
+ # @param from [Date, Time, String] Start date
162
+ # @param to [Date, Time, String] End date
163
+ # @return [Hash] Differences with :added, :removed, :unchanged keys
164
+ def diff(topic = nil, from:, to:)
165
+ from_date = parse_date(from)
166
+ to_date = parse_date(to)
167
+
168
+ from_results = @fact_service.query(topic: topic, at: from_date, status: :canonical)
169
+ to_results = @fact_service.query(topic: topic, at: to_date, status: :canonical)
170
+
171
+ from_set = facts_to_comparable(from_results)
172
+ to_set = facts_to_comparable(to_results)
173
+
174
+ {
175
+ topic: topic,
176
+ from: from_date,
177
+ to: to_date,
178
+ added: to_results.select { |f| !from_set.include?(comparable_key(f)) },
179
+ removed: from_results.select { |f| !to_set.include?(comparable_key(f)) },
180
+ unchanged: from_results.select { |f| to_set.include?(comparable_key(f)) }
181
+ }
182
+ end
183
+
184
+ # Introspect the schema - what does the layer know about?
185
+ #
186
+ # @param topic [String, nil] Optional topic to introspect specifically
187
+ # @return [Hash] Schema information or topic-specific coverage
188
+ def introspect(topic = nil)
189
+ topic ? introspect_topic(topic) : introspect_schema
190
+ end
191
+
192
+ # Suggest queries based on what's stored for a topic
193
+ #
194
+ # @param topic [String] Topic to get suggestions for
195
+ # @return [Array<String>] Suggested queries
196
+ def suggest_queries(topic)
197
+ resolved = resolve_entity(topic)
198
+ return [] unless resolved
199
+
200
+ entity = resolved.respond_to?(:entity) ? resolved.entity : resolved
201
+ suggestions = []
202
+
203
+ entity_kind = entity.respond_to?(:kind) ? entity.kind : nil
204
+ suggestions << "current status" if entity_kind == "person"
205
+
206
+ # Check relationships
207
+ relationships = @entity_service.relationship_types_for(entity.id)
208
+ suggestions << "employment history" if relationships.include?(:works_at) || relationships.include?(:object)
209
+ suggestions << "team members" if relationships.include?(:works_with)
210
+ suggestions << "reporting chain" if relationships.include?(:reports_to)
211
+
212
+ # Check fact coverage
213
+ fact_stats = @fact_service.fact_stats(entity.id)
214
+ suggestions << "timeline" if fact_stats[:canonical]&.positive?
215
+ suggestions << "historical changes" if fact_stats[:superseded]&.positive?
216
+
217
+ suggestions
218
+ end
219
+
220
+ # Suggest retrieval strategies for a query
221
+ #
222
+ # @param query_text [String] The query
223
+ # @return [Array<Hash>] Strategy options with descriptions
224
+ def suggest_strategies(query_text)
225
+ strategies = []
226
+
227
+ # Check for temporal keywords
228
+ if query_text.match?(/\b(yesterday|last\s+week|last\s+month|ago|since|before|after|between)\b/i)
229
+ strategies << { strategy: :temporal, description: "Filter by date range" }
230
+ end
231
+
232
+ # Check for semantic intent
233
+ if query_text.match?(/\b(about|related|similar|like)\b/i)
234
+ strategies << { strategy: :semantic, description: "Search by semantic similarity" }
235
+ end
236
+
237
+ # Check for entity focus
238
+ if query_text.match?(/\b(who|what|where)\b/i)
239
+ strategies << { strategy: :graph, description: "Traverse from entity node" }
240
+ end
241
+
242
+ # Default: hybrid
243
+ strategies << { strategy: :hybrid, description: "Combine multiple strategies" }
244
+
245
+ strategies
101
246
  end
102
247
 
103
- # Batch extract facts from multiple content items
248
+ # Batch extract facts from multiple sources
104
249
  #
105
- # @param content_ids [Array<Integer>] Content IDs to process
250
+ # @param source_ids [Array<Integer>] Source IDs to process
106
251
  # @param extractor [Symbol] Extractor type (:manual, :llm, :rule_based)
107
252
  # @param parallel [Boolean] Whether to use parallel processing
108
- # @return [Array<Hash>] Results with extracted facts per content
109
- def batch_extract(content_ids, extractor: @config.default_extractor, parallel: true)
110
- contents = Models::Content.where(id: content_ids).to_a
253
+ # @return [Array<Hash>] Results with extracted facts per source
254
+ def batch_extract(source_ids, extractor: @config.default_extractor, parallel: true)
255
+ sources = Models::Source.where(id: source_ids).to_a
111
256
  if parallel
112
- @extraction_pipeline.process_parallel(contents, extractor: extractor)
257
+ @extraction_pipeline.process_parallel(sources, extractor: extractor)
113
258
  else
114
- @extraction_pipeline.process(contents, extractor: extractor)
259
+ @extraction_pipeline.process(sources, extractor: extractor)
115
260
  end
116
261
  end
117
262
 
118
263
  # Batch resolve entity names
119
264
  #
120
265
  # @param names [Array<String>] Entity names to resolve
121
- # @param type [Symbol, nil] Entity type filter
266
+ # @param kind [Symbol, nil] Entity kind filter
122
267
  # @return [Array<Hash>] Resolution results
123
- def batch_resolve_entities(names, type: nil)
124
- @resolution_pipeline.resolve_entities(names, type: type)
268
+ def batch_resolve_entities(names, kind: nil)
269
+ @resolution_pipeline.resolve_entities(names, kind: kind)
125
270
  end
126
271
 
127
272
  # Detect fact conflicts for multiple entities
@@ -131,6 +276,112 @@ module FactDb
131
276
  def detect_fact_conflicts(entity_ids)
132
277
  @resolution_pipeline.detect_conflicts(entity_ids)
133
278
  end
279
+
280
+ private
281
+
282
+ def build_transformers
283
+ {
284
+ raw: Transformers::RawTransformer.new,
285
+ json: Transformers::JsonTransformer.new,
286
+ triples: Transformers::TripleTransformer.new,
287
+ cypher: Transformers::CypherTransformer.new,
288
+ text: Transformers::TextTransformer.new
289
+ }
290
+ end
291
+
292
+ def transform_results(results, topic:, format:)
293
+ validate_format!(format)
294
+
295
+ query_result = QueryResult.new(query: topic || "query")
296
+ query_result.add_facts(results)
297
+ query_result.resolve_entities(@entity_service)
298
+
299
+ # Return QueryResult directly for :json format to support fluent API methods
300
+ # like each_fact, fact_count, etc. Use query_result.to_h for Hash output.
301
+ return query_result if format == :json
302
+
303
+ @transformers[format].transform(query_result)
304
+ end
305
+
306
+ def validate_format!(format)
307
+ return if FORMATS.include?(format)
308
+
309
+ raise ArgumentError, "Unknown format: #{format}. Available: #{FORMATS.join(', ')}"
310
+ end
311
+
312
+ def parse_date(date)
313
+ return nil if date.nil?
314
+ return date if date.is_a?(Date) || date.is_a?(Time)
315
+
316
+ Date.parse(date.to_s)
317
+ rescue ArgumentError
318
+ nil
319
+ end
320
+
321
+ def introspect_schema
322
+ {
323
+ capabilities: collect_capabilities,
324
+ entity_kinds: Models::Entity.distinct.pluck(:kind).compact,
325
+ fact_statuses: %w[canonical superseded corroborated synthesized],
326
+ extraction_methods: %w[manual llm rule_based],
327
+ output_formats: FORMATS,
328
+ retrieval_strategies: STRATEGIES,
329
+ statistics: collect_statistics
330
+ }
331
+ end
332
+
333
+ def introspect_topic(topic)
334
+ resolved = resolve_entity(topic)
335
+ return nil unless resolved
336
+
337
+ entity = resolved.respond_to?(:entity) ? resolved.entity : resolved
338
+
339
+ {
340
+ entity: entity_info(entity),
341
+ coverage: {
342
+ facts: @fact_service.fact_stats(entity.id),
343
+ timespan: @entity_service.timespan_for(entity.id)
344
+ },
345
+ relationships: @entity_service.relationship_types_for(entity.id),
346
+ suggested_queries: suggest_queries(topic)
347
+ }
348
+ end
349
+
350
+ def collect_capabilities
351
+ capabilities = [:temporal_query, :entity_resolution, :introspection]
352
+
353
+ capabilities << :semantic_search if @config.embedding_generator
354
+ capabilities << :llm_extraction if @config.llm_client || @config.llm&.provider
355
+
356
+ capabilities
357
+ end
358
+
359
+ def collect_statistics
360
+ {
361
+ facts: @fact_service.stats,
362
+ entities: @entity_service.stats,
363
+ sources: @source_service.stats
364
+ }
365
+ end
366
+
367
+ def entity_info(entity)
368
+ {
369
+ id: entity.id,
370
+ name: entity.name,
371
+ kind: entity.kind,
372
+ resolution_status: entity.resolution_status,
373
+ aliases: entity.aliases.map { |a| { name: a.name, kind: a.kind } }
374
+ }
375
+ end
376
+
377
+ def facts_to_comparable(facts)
378
+ facts.map { |f| comparable_key(f) }.to_set
379
+ end
380
+
381
+ def comparable_key(fact)
382
+ text = fact.respond_to?(:text) ? fact.text : fact[:text]
383
+ "#{text}".downcase.strip
384
+ end
134
385
  end
135
386
 
136
387
  class << self
data/mkdocs.yml CHANGED
@@ -174,12 +174,12 @@ nav:
174
174
  - Facts: api/facts.md
175
175
  - Models:
176
176
  - api/models/index.md
177
- - Content: api/models/content.md
177
+ - Source: api/models/source.md
178
178
  - Entity: api/models/entity.md
179
179
  - Fact: api/models/fact.md
180
180
  - Services:
181
181
  - api/services/index.md
182
- - ContentService: api/services/content-service.md
182
+ - SourceService: api/services/source-service.md
183
183
  - EntityService: api/services/entity-service.md
184
184
  - FactService: api/services/fact-service.md
185
185
  - Extractors:
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fact_db
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dewayne VanHoozer
@@ -52,19 +52,19 @@ dependencies:
52
52
  - !ruby/object:Gem::Version
53
53
  version: '0.3'
54
54
  - !ruby/object:Gem::Dependency
55
- name: anyway_config
55
+ name: myway_config
56
56
  requirement: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - ">="
58
+ - - "~>"
59
59
  - !ruby/object:Gem::Version
60
- version: '2.0'
60
+ version: 0.1.1
61
61
  type: :runtime
62
62
  prerelease: false
63
63
  version_requirements: !ruby/object:Gem::Requirement
64
64
  requirements:
65
- - - ">="
65
+ - - "~>"
66
66
  - !ruby/object:Gem::Version
67
- version: '2.0'
67
+ version: 0.1.1
68
68
  - !ruby/object:Gem::Dependency
69
69
  name: chronic
70
70
  requirement: !ruby/object:Gem::Requirement
@@ -149,6 +149,20 @@ dependencies:
149
149
  - - ">="
150
150
  - !ruby/object:Gem::Version
151
151
  version: '0'
152
+ - !ruby/object:Gem::Dependency
153
+ name: timecop
154
+ requirement: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: '0'
159
+ type: :development
160
+ prerelease: false
161
+ version_requirements: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ">="
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
152
166
  - !ruby/object:Gem::Dependency
153
167
  name: yard
154
168
  requirement: !ruby/object:Gem::Requirement
@@ -175,12 +189,13 @@ extensions: []
175
189
  extra_rdoc_files: []
176
190
  files:
177
191
  - ".envrc"
192
+ - ".yardopts"
178
193
  - CHANGELOG.md
179
194
  - COMMITS.md
180
195
  - README.md
181
196
  - Rakefile
182
197
  - db/migrate/001_enable_extensions.rb
183
- - db/migrate/002_create_contents.rb
198
+ - db/migrate/002_create_sources.rb
184
199
  - db/migrate/003_create_entities.rb
185
200
  - db/migrate/004_create_entity_aliases.rb
186
201
  - db/migrate/005_create_facts.rb
@@ -192,17 +207,17 @@ files:
192
207
  - docs/api/extractors/rule-based.md
193
208
  - docs/api/facts.md
194
209
  - docs/api/index.md
195
- - docs/api/models/content.md
196
210
  - docs/api/models/entity.md
197
211
  - docs/api/models/fact.md
198
212
  - docs/api/models/index.md
213
+ - docs/api/models/source.md
199
214
  - docs/api/pipeline/extraction.md
200
215
  - docs/api/pipeline/index.md
201
216
  - docs/api/pipeline/resolution.md
202
- - docs/api/services/content-service.md
203
217
  - docs/api/services/entity-service.md
204
218
  - docs/api/services/fact-service.md
205
219
  - docs/api/services/index.md
220
+ - docs/api/services/source-service.md
206
221
  - docs/architecture/database-schema.md
207
222
  - docs/architecture/entity-resolution.md
208
223
  - docs/architecture/index.md
@@ -229,14 +244,34 @@ files:
229
244
  - docs/guides/llm-integration.md
230
245
  - docs/guides/temporal-queries.md
231
246
  - docs/index.md
247
+ - examples/.envrc
248
+ - examples/.gitignore
249
+ - examples/001_configuration.rb
250
+ - examples/010_basic_usage.rb
251
+ - examples/020_entity_management.rb
252
+ - examples/030_temporal_queries.rb
253
+ - examples/040_output_formats.rb
254
+ - examples/050_rule_based_extraction.rb
255
+ - examples/060_fluent_temporal_api.rb
256
+ - examples/070_introspection.rb
257
+ - examples/080_hr_system.rb
258
+ - examples/090_ingest_demo.rb
259
+ - examples/100_query_context.rb
260
+ - examples/110_prove_it.rb
261
+ - examples/120_dump_database.rb
262
+ - examples/130_rag_feedback_loop.rb
232
263
  - examples/README.md
233
- - examples/basic_usage.rb
234
- - examples/entity_management.rb
235
- - examples/hr_system.rb
236
- - examples/rule_based_extraction.rb
237
- - examples/temporal_queries.rb
264
+ - examples/data/lincoln_associates.md
265
+ - examples/data/lincoln_biography.md
266
+ - examples/data/lincoln_cabinet.md
267
+ - examples/data/lincoln_family.md
268
+ - examples/data/lincoln_military.md
269
+ - examples/data/lincoln_todd_family.md
270
+ - examples/ingest_reporter.rb
271
+ - examples/utilities.rb
238
272
  - lib/fact_db.rb
239
273
  - lib/fact_db/config.rb
274
+ - lib/fact_db/config/defaults.yml
240
275
  - lib/fact_db/database.rb
241
276
  - lib/fact_db/errors.rb
242
277
  - lib/fact_db/extractors/base.rb
@@ -244,21 +279,30 @@ files:
244
279
  - lib/fact_db/extractors/manual_extractor.rb
245
280
  - lib/fact_db/extractors/rule_based_extractor.rb
246
281
  - lib/fact_db/llm/adapter.rb
247
- - lib/fact_db/models/content.rb
248
282
  - lib/fact_db/models/entity.rb
249
283
  - lib/fact_db/models/entity_alias.rb
250
284
  - lib/fact_db/models/entity_mention.rb
251
285
  - lib/fact_db/models/fact.rb
252
286
  - lib/fact_db/models/fact_source.rb
287
+ - lib/fact_db/models/source.rb
253
288
  - lib/fact_db/pipeline/extraction_pipeline.rb
254
289
  - lib/fact_db/pipeline/resolution_pipeline.rb
290
+ - lib/fact_db/query_result.rb
255
291
  - lib/fact_db/resolution/entity_resolver.rb
256
292
  - lib/fact_db/resolution/fact_resolver.rb
257
- - lib/fact_db/services/content_service.rb
258
293
  - lib/fact_db/services/entity_service.rb
259
294
  - lib/fact_db/services/fact_service.rb
295
+ - lib/fact_db/services/source_service.rb
260
296
  - lib/fact_db/temporal/query.rb
297
+ - lib/fact_db/temporal/query_builder.rb
261
298
  - lib/fact_db/temporal/timeline.rb
299
+ - lib/fact_db/transformers/base.rb
300
+ - lib/fact_db/transformers/cypher_transformer.rb
301
+ - lib/fact_db/transformers/json_transformer.rb
302
+ - lib/fact_db/transformers/raw_transformer.rb
303
+ - lib/fact_db/transformers/text_transformer.rb
304
+ - lib/fact_db/transformers/triple_transformer.rb
305
+ - lib/fact_db/validation/alias_filter.rb
262
306
  - lib/fact_db/version.rb
263
307
  - mkdocs.yml
264
308
  homepage: https://github.com/MadBomber/fact_db
@@ -1,44 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class CreateContents < ActiveRecord::Migration[7.0]
4
- def change
5
- create_table :fact_db_contents do |t|
6
- # Content identification
7
- t.string :content_hash, null: false, limit: 64
8
- t.string :content_type, null: false, limit: 50
9
-
10
- # The raw content (immutable)
11
- t.text :raw_text, null: false
12
- t.string :title, limit: 500
13
-
14
- # Source metadata
15
- t.text :source_uri
16
- t.jsonb :source_metadata, null: false, default: {}
17
-
18
- # Vector embedding for semantic search
19
- t.vector :embedding, limit: 1536
20
-
21
- # Timestamps
22
- t.timestamptz :captured_at, null: false
23
- t.timestamps
24
- end
25
-
26
- add_index :fact_db_contents, :content_hash, unique: true
27
- add_index :fact_db_contents, :captured_at
28
- add_index :fact_db_contents, :content_type
29
- add_index :fact_db_contents, :source_metadata, using: :gin
30
-
31
- # Full-text search index
32
- execute <<-SQL
33
- CREATE INDEX idx_contents_fulltext ON fact_db_contents
34
- USING gin(to_tsvector('english', raw_text));
35
- SQL
36
-
37
- # HNSW index for vector similarity search (if pgvector supports it)
38
- # This creates a cosine similarity index for fast nearest neighbor queries
39
- execute <<-SQL
40
- CREATE INDEX idx_contents_embedding ON fact_db_contents
41
- USING hnsw (embedding vector_cosine_ops);
42
- SQL
43
- end
44
- end
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module FactDb
4
- module Models
5
- class Content < ActiveRecord::Base
6
- self.table_name = "fact_db_contents"
7
-
8
- has_many :fact_sources, class_name: "FactDb::Models::FactSource",
9
- foreign_key: :content_id, dependent: :destroy
10
- has_many :facts, through: :fact_sources
11
-
12
- validates :content_hash, presence: true, uniqueness: true
13
- validates :content_type, presence: true
14
- validates :raw_text, presence: true
15
- validates :captured_at, presence: true
16
-
17
- before_validation :generate_content_hash, on: :create
18
-
19
- # Content types
20
- TYPES = %w[email transcript document slack meeting_notes contract report].freeze
21
-
22
- validates :content_type, inclusion: { in: TYPES }, allow_nil: false
23
-
24
- scope :by_type, ->(type) { where(content_type: type) }
25
- scope :captured_between, ->(from, to) { where(captured_at: from..to) }
26
- scope :captured_after, ->(date) { where("captured_at >= ?", date) }
27
- scope :captured_before, ->(date) { where("captured_at <= ?", date) }
28
-
29
- # Full-text search
30
- scope :search_text, lambda { |query|
31
- where("to_tsvector('english', raw_text) @@ plainto_tsquery('english', ?)", query)
32
- }
33
-
34
- # Vector similarity search (requires neighbor gem configured)
35
- def self.nearest_neighbors(embedding, limit: 10)
36
- return none unless embedding
37
-
38
- order(Arel.sql("embedding <=> '#{embedding}'")).limit(limit)
39
- end
40
-
41
- def immutable?
42
- true
43
- end
44
-
45
- def word_count
46
- raw_text.split.size
47
- end
48
-
49
- def preview(length: 200)
50
- return raw_text if raw_text.length <= length
51
-
52
- "#{raw_text[0, length]}..."
53
- end
54
-
55
- private
56
-
57
- def generate_content_hash
58
- self.content_hash = Digest::SHA256.hexdigest(raw_text) if raw_text.present?
59
- end
60
- end
61
- end
62
- end