fact_db 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +2 -0
  3. data/.yardopts +5 -0
  4. data/CHANGELOG.md +64 -0
  5. data/README.md +107 -6
  6. data/Rakefile +243 -10
  7. data/db/migrate/001_enable_extensions.rb +1 -0
  8. data/db/migrate/002_create_sources.rb +49 -0
  9. data/db/migrate/003_create_entities.rb +27 -15
  10. data/db/migrate/004_create_entity_aliases.rb +20 -7
  11. data/db/migrate/005_create_facts.rb +37 -21
  12. data/db/migrate/006_create_entity_mentions.rb +14 -6
  13. data/db/migrate/007_create_fact_sources.rb +16 -8
  14. data/docs/api/extractors/index.md +5 -5
  15. data/docs/api/extractors/llm.md +17 -17
  16. data/docs/api/extractors/rule-based.md +14 -14
  17. data/docs/api/facts.md +20 -20
  18. data/docs/api/index.md +4 -4
  19. data/docs/api/models/entity.md +21 -21
  20. data/docs/api/models/fact.md +15 -15
  21. data/docs/api/models/index.md +7 -7
  22. data/docs/api/models/{content.md → source.md} +29 -29
  23. data/docs/api/pipeline/extraction.md +25 -25
  24. data/docs/api/pipeline/index.md +1 -1
  25. data/docs/api/pipeline/resolution.md +4 -4
  26. data/docs/api/services/entity-service.md +20 -20
  27. data/docs/api/services/fact-service.md +12 -12
  28. data/docs/api/services/index.md +5 -5
  29. data/docs/api/services/{content-service.md → source-service.md} +27 -27
  30. data/docs/architecture/database-schema.md +46 -46
  31. data/docs/architecture/entity-resolution.md +6 -6
  32. data/docs/architecture/index.md +10 -10
  33. data/docs/architecture/temporal-facts.md +5 -5
  34. data/docs/architecture/three-layer-model.md +17 -17
  35. data/docs/concepts.md +6 -6
  36. data/docs/examples/basic-usage.md +20 -20
  37. data/docs/examples/hr-onboarding.md +17 -17
  38. data/docs/examples/index.md +4 -4
  39. data/docs/examples/news-analysis.md +23 -23
  40. data/docs/getting-started/database-setup.md +28 -20
  41. data/docs/getting-started/index.md +3 -3
  42. data/docs/getting-started/quick-start.md +33 -30
  43. data/docs/guides/batch-processing.md +26 -26
  44. data/docs/guides/configuration.md +158 -77
  45. data/docs/guides/entity-management.md +14 -14
  46. data/docs/guides/extracting-facts.md +28 -28
  47. data/docs/guides/ingesting-content.md +14 -14
  48. data/docs/guides/llm-integration.md +40 -32
  49. data/docs/guides/temporal-queries.md +11 -11
  50. data/docs/index.md +6 -2
  51. data/examples/.envrc +4 -0
  52. data/examples/.gitignore +1 -0
  53. data/examples/001_configuration.rb +312 -0
  54. data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
  55. data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
  56. data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
  57. data/examples/040_output_formats.rb +177 -0
  58. data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
  59. data/examples/060_fluent_temporal_api.rb +217 -0
  60. data/examples/070_introspection.rb +252 -0
  61. data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
  62. data/examples/090_ingest_demo.rb +515 -0
  63. data/examples/100_query_context.rb +668 -0
  64. data/examples/110_prove_it.rb +204 -0
  65. data/examples/120_dump_database.rb +358 -0
  66. data/examples/130_rag_feedback_loop.rb +858 -0
  67. data/examples/README.md +229 -15
  68. data/examples/data/lincoln_associates.md +201 -0
  69. data/examples/data/lincoln_biography.md +66 -0
  70. data/examples/data/lincoln_cabinet.md +243 -0
  71. data/examples/data/lincoln_family.md +163 -0
  72. data/examples/data/lincoln_military.md +241 -0
  73. data/examples/data/lincoln_todd_family.md +136 -0
  74. data/examples/ingest_reporter.rb +335 -0
  75. data/examples/utilities.rb +182 -0
  76. data/lib/fact_db/config/defaults.yml +254 -0
  77. data/lib/fact_db/config.rb +94 -35
  78. data/lib/fact_db/database.rb +98 -8
  79. data/lib/fact_db/extractors/base.rb +106 -21
  80. data/lib/fact_db/extractors/llm_extractor.rb +35 -63
  81. data/lib/fact_db/extractors/manual_extractor.rb +46 -6
  82. data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
  83. data/lib/fact_db/llm/adapter.rb +3 -3
  84. data/lib/fact_db/models/entity.rb +94 -22
  85. data/lib/fact_db/models/entity_alias.rb +41 -7
  86. data/lib/fact_db/models/entity_mention.rb +34 -1
  87. data/lib/fact_db/models/fact.rb +259 -28
  88. data/lib/fact_db/models/fact_source.rb +43 -9
  89. data/lib/fact_db/models/source.rb +113 -0
  90. data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
  91. data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
  92. data/lib/fact_db/query_result.rb +202 -0
  93. data/lib/fact_db/resolution/entity_resolver.rb +139 -39
  94. data/lib/fact_db/resolution/fact_resolver.rb +86 -14
  95. data/lib/fact_db/services/entity_service.rb +246 -37
  96. data/lib/fact_db/services/fact_service.rb +254 -17
  97. data/lib/fact_db/services/source_service.rb +164 -0
  98. data/lib/fact_db/temporal/query.rb +71 -7
  99. data/lib/fact_db/temporal/query_builder.rb +69 -0
  100. data/lib/fact_db/temporal/timeline.rb +102 -11
  101. data/lib/fact_db/transformers/base.rb +77 -0
  102. data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
  103. data/lib/fact_db/transformers/json_transformer.rb +17 -0
  104. data/lib/fact_db/transformers/raw_transformer.rb +35 -0
  105. data/lib/fact_db/transformers/text_transformer.rb +114 -0
  106. data/lib/fact_db/transformers/triple_transformer.rb +138 -0
  107. data/lib/fact_db/validation/alias_filter.rb +185 -0
  108. data/lib/fact_db/version.rb +1 -1
  109. data/lib/fact_db.rb +281 -30
  110. data/mkdocs.yml +2 -2
  111. metadata +60 -16
  112. data/db/migrate/002_create_contents.rb +0 -44
  113. data/lib/fact_db/models/content.rb +0 -62
  114. data/lib/fact_db/services/content_service.rb +0 -93
@@ -2,13 +2,38 @@
2
2
 
3
3
  module FactDb
4
4
  module Temporal
5
+ # Executes temporal queries on facts with time-based filtering
6
+ #
7
+ # Provides methods for querying facts at specific points in time,
8
+ # comparing states between dates, and searching with temporal constraints.
9
+ #
10
+ # @example Query current facts about an entity
11
+ # query = Query.new
12
+ # facts = query.current_facts(entity_id: person.id)
13
+ #
14
+ # @example Compare facts at two points in time
15
+ # diff = query.diff(entity_id: person.id, from_date: Date.parse("2023-01-01"), to_date: Date.today)
16
+ # puts "Added: #{diff[:added].count}, Removed: #{diff[:removed].count}"
17
+ #
5
18
  class Query
19
+ # @return [ActiveRecord::Relation] the base scope for queries
6
20
  attr_reader :scope
7
21
 
22
+ # Initializes a new Query with an optional base scope
23
+ #
24
+ # @param scope [ActiveRecord::Relation] base fact scope (defaults to all facts)
8
25
  def initialize(scope = Models::Fact.all)
9
26
  @scope = scope
10
27
  end
11
28
 
29
+ # Executes a temporal query with multiple filters
30
+ #
31
+ # @param topic [String, nil] text to search for in fact content
32
+ # @param at [Date, Time, nil] point in time (nil for currently valid)
33
+ # @param entity_id [Integer, nil] filter by entity
34
+ # @param status [Symbol] fact status filter (:canonical, :superseded, :synthesized, :all)
35
+ # @param limit [Integer, nil] maximum number of results
36
+ # @return [ActiveRecord::Relation] matching facts ordered by valid_at desc
12
37
  def execute(topic: nil, at: nil, entity_id: nil, status: :canonical, limit: nil)
13
38
  result = @scope
14
39
 
@@ -33,31 +58,56 @@ module FactDb
33
58
  result
34
59
  end
35
60
 
36
- # Currently valid facts about an entity
61
+ # Returns currently valid canonical facts about an entity
62
+ #
63
+ # @param entity_id [Integer] the entity to query
64
+ # @return [ActiveRecord::Relation] currently valid facts mentioning the entity
37
65
  def current_facts(entity_id:)
38
66
  execute(entity_id: entity_id, at: nil, status: :canonical)
39
67
  end
40
68
 
41
- # Facts valid at a specific point in time
69
+ # Returns facts valid at a specific point in time
70
+ #
71
+ # @param date [Date, Time] the point in time to query
72
+ # @param entity_id [Integer, nil] optional entity filter
73
+ # @return [ActiveRecord::Relation] facts valid at the given date
42
74
  def facts_at(date, entity_id: nil)
43
75
  execute(at: date, entity_id: entity_id, status: :canonical)
44
76
  end
45
77
 
46
- # Facts that became valid in a date range
78
+ # Returns facts that became valid within a date range
79
+ #
80
+ # @param from [Date, Time] start of range (inclusive)
81
+ # @param to [Date, Time] end of range (inclusive)
82
+ # @param entity_id [Integer, nil] optional entity filter
83
+ # @return [ActiveRecord::Relation] facts created in the range, ordered by valid_at asc
47
84
  def facts_created_between(from:, to:, entity_id: nil)
48
85
  result = @scope.canonical.became_valid_between(from, to)
49
86
  result = result.mentioning_entity(entity_id) if entity_id
50
87
  result.order(valid_at: :asc)
51
88
  end
52
89
 
53
- # Facts that became invalid in a date range
90
+ # Returns facts that became invalid within a date range
91
+ #
92
+ # @param from [Date, Time] start of range (inclusive)
93
+ # @param to [Date, Time] end of range (inclusive)
94
+ # @param entity_id [Integer, nil] optional entity filter
95
+ # @return [ActiveRecord::Relation] facts invalidated in the range, ordered by invalid_at asc
54
96
  def facts_invalidated_between(from:, to:, entity_id: nil)
55
97
  result = @scope.became_invalid_between(from, to)
56
98
  result = result.mentioning_entity(entity_id) if entity_id
57
99
  result.order(invalid_at: :asc)
58
100
  end
59
101
 
60
- # Semantic search with temporal filtering
102
+ # Searches facts by text with temporal filtering
103
+ #
104
+ # Uses PostgreSQL full-text search with optional point-in-time filtering.
105
+ #
106
+ # @param query [String] text to search for
107
+ # @param at [Date, Time, nil] point in time (nil for currently valid)
108
+ # @param entity_id [Integer, nil] optional entity filter
109
+ # @param limit [Integer] maximum number of results (default: 20)
110
+ # @return [ActiveRecord::Relation] matching facts
61
111
  def semantic_search(query:, at: nil, entity_id: nil, limit: 20)
62
112
  result = @scope.canonical.search_text(query)
63
113
  result = apply_temporal_filter(result, at)
@@ -65,14 +115,28 @@ module FactDb
65
115
  result.limit(limit)
66
116
  end
67
117
 
68
- # Find facts where entity has a specific role
118
+ # Returns facts where an entity has a specific mention role
119
+ #
120
+ # @param entity_id [Integer] the entity to query
121
+ # @param role [String, Symbol] the mention role (e.g., :subject, :object)
122
+ # @param at [Date, Time, nil] point in time (nil for currently valid)
123
+ # @return [ActiveRecord::Relation] facts with the entity in the specified role
69
124
  def facts_with_entity_role(entity_id:, role:, at: nil)
70
125
  result = @scope.canonical.with_role(entity_id, role)
71
126
  result = apply_temporal_filter(result, at)
72
127
  result.order(valid_at: :desc)
73
128
  end
74
129
 
75
- # Compare facts at two points in time
130
+ # Compares facts at two points in time to find changes
131
+ #
132
+ # @param entity_id [Integer] the entity to compare
133
+ # @param from_date [Date, Time] the earlier point in time
134
+ # @param to_date [Date, Time] the later point in time
135
+ # @return [Hash] hash with :added, :removed, and :unchanged arrays of facts
136
+ #
137
+ # @example
138
+ # diff = query.diff(entity_id: 1, from_date: 1.year.ago, to_date: Date.today)
139
+ # puts "#{diff[:added].count} new facts, #{diff[:removed].count} removed"
76
140
  def diff(entity_id:, from_date:, to_date:)
77
141
  facts_at_from = facts_at(from_date, entity_id: entity_id).to_a
78
142
  facts_at_to = facts_at(to_date, entity_id: entity_id).to_a
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FactDb
4
+ module Temporal
5
+ # A scoped query builder for temporal queries.
6
+ # Allows chaining: facts.at("2024-01-15").query("Paula's role")
7
+ #
8
+ # @example Basic usage
9
+ # facts.at("2024-01-15").query("Paula's role", format: :cypher)
10
+ # facts.at("2024-01-15").facts_for(entity_id)
11
+ # facts.at("2024-01-15").compare_to("2024-06-15")
12
+ #
13
+ class QueryBuilder
14
+ attr_reader :date
15
+
16
+ # Initialize with a Facts instance and date
17
+ #
18
+ # @param facts [FactDb::Facts] The Facts instance
19
+ # @param date [Date] The point in time
20
+ def initialize(facts, date)
21
+ @facts = facts
22
+ @date = date
23
+ end
24
+
25
+ # Execute a query at this point in time
26
+ #
27
+ # @param topic [String] The query topic
28
+ # @param format [Symbol] Output format (:json, :triples, :cypher, :text, :prolog)
29
+ # @return [Array, String, Hash] Results at this point in time
30
+ def query(topic, format: :json, **options)
31
+ @facts.query_facts(topic: topic, at: @date, format: format, **options)
32
+ end
33
+
34
+ # Get all facts valid at this date
35
+ #
36
+ # @param format [Symbol] Output format
37
+ # @return [Array, String, Hash] Results
38
+ def facts(format: :json, **options)
39
+ @facts.facts_at(@date, format: format, **options)
40
+ end
41
+
42
+ # Get facts for a specific entity at this date
43
+ #
44
+ # @param entity_id [Integer] Entity ID
45
+ # @param format [Symbol] Output format
46
+ # @return [Array, String, Hash] Results
47
+ def facts_for(entity_id, format: :json, **options)
48
+ @facts.facts_at(@date, entity: entity_id, format: format, **options)
49
+ end
50
+
51
+ # Compare this date to another
52
+ #
53
+ # @param other_date [Date, String] The date to compare to
54
+ # @param topic [String, nil] Optional topic to compare
55
+ # @return [Hash] Differences with :added, :removed, :unchanged keys
56
+ def compare_to(other_date, topic: nil)
57
+ @facts.diff(topic, from: @date, to: other_date)
58
+ end
59
+
60
+ # Get the timeline state at this date
61
+ #
62
+ # @param entity_id [Integer] Entity ID
63
+ # @return [Array] Facts valid at this date for the entity
64
+ def state_for(entity_id, format: :json)
65
+ @facts.facts_at(@date, entity: entity_id, format: format)
66
+ end
67
+ end
68
+ end
69
+ end
@@ -2,59 +2,106 @@
2
2
 
3
3
  module FactDb
4
4
  module Temporal
5
+ # Builds and analyzes temporal timelines of facts for an entity
6
+ #
7
+ # Provides methods to view an entity's history, group events by time periods,
8
+ # find overlapping facts, and compare states at different points in time.
9
+ # Includes Enumerable for easy iteration over timeline events.
10
+ #
11
+ # @example Build a timeline for an entity
12
+ # timeline = Timeline.new.build(entity_id: person.id)
13
+ # timeline.by_year.each { |year, events| puts "#{year}: #{events.count} events" }
14
+ #
15
+ # @example Find currently active facts
16
+ # active_facts = timeline.active
17
+ #
5
18
  class Timeline
6
19
  include Enumerable
7
20
 
21
+ # @return [Array<TimelineEvent>] the timeline events
8
22
  attr_reader :events
9
23
 
24
+ # Initializes a new empty Timeline
10
25
  def initialize
11
26
  @events = []
12
27
  end
13
28
 
29
+ # Iterates over timeline event hashes
30
+ #
31
+ # @yield [Hash] each event as a hash
32
+ # @return [Enumerator] if no block given
14
33
  def each(&block)
15
34
  to_hash.each(&block)
16
35
  end
17
36
 
37
+ # Builds a timeline of facts for an entity
38
+ #
39
+ # @param entity_id [Integer] the entity to build timeline for
40
+ # @param from [Date, Time, nil] start of date range (optional)
41
+ # @param to [Date, Time, nil] end of date range (optional)
42
+ # @return [Timeline] self for method chaining
18
43
  def build(entity_id:, from: nil, to: nil)
19
44
  facts = fetch_facts(entity_id, from, to)
20
45
  @events = facts.map { |fact| TimelineEvent.new(fact) }
21
46
  self
22
47
  end
23
48
 
49
+ # Returns events sorted by valid_at date
50
+ #
51
+ # @return [Array<TimelineEvent>] sorted events
24
52
  def to_a
25
53
  @events.sort_by(&:valid_at)
26
54
  end
27
55
 
56
+ # Returns events as an array of hashes
57
+ #
58
+ # @return [Array<Hash>] events converted to hash format
28
59
  def to_hash
29
60
  to_a.map(&:to_hash)
30
61
  end
31
62
 
32
- # Group events by year
63
+ # Groups events by year
64
+ #
65
+ # @return [Hash<Integer, Array<TimelineEvent>>] events grouped by year
33
66
  def by_year
34
67
  to_a.group_by { |event| event.valid_at.year }
35
68
  end
36
69
 
37
- # Group events by month
70
+ # Groups events by month
71
+ #
72
+ # @return [Hash<String, Array<TimelineEvent>>] events grouped by "YYYY-MM" key
38
73
  def by_month
39
74
  to_a.group_by { |event| event.valid_at.strftime("%Y-%m") }
40
75
  end
41
76
 
42
- # Get events in a specific date range
77
+ # Returns events in a specific date range
78
+ #
79
+ # @param from [Date, Time] start of range (inclusive)
80
+ # @param to [Date, Time] end of range (inclusive)
81
+ # @return [Array<TimelineEvent>] events within the range
43
82
  def between(from, to)
44
83
  to_a.select { |event| event.valid_at >= from && event.valid_at <= to }
45
84
  end
46
85
 
47
- # Get currently active events
86
+ # Returns currently active (valid) events
87
+ #
88
+ # @return [Array<TimelineEvent>] events with no invalid_at date
48
89
  def active
49
90
  to_a.select(&:currently_valid?)
50
91
  end
51
92
 
52
- # Get historical (no longer valid) events
93
+ # Returns historical (no longer valid) events
94
+ #
95
+ # @return [Array<TimelineEvent>] events that have been invalidated
53
96
  def historical
54
97
  to_a.reject(&:currently_valid?)
55
98
  end
56
99
 
57
- # Find overlapping events
100
+ # Finds pairs of overlapping events
101
+ #
102
+ # Two events overlap if their validity periods intersect.
103
+ #
104
+ # @return [Array<Array<TimelineEvent, TimelineEvent>>] pairs of overlapping events
58
105
  def overlapping
59
106
  result = []
60
107
  sorted = to_a
@@ -68,12 +115,17 @@ module FactDb
68
115
  result
69
116
  end
70
117
 
71
- # Get the state at a specific point in time
118
+ # Returns the state (valid events) at a specific point in time
119
+ #
120
+ # @param date [Date, Time] the point in time to query
121
+ # @return [Array<TimelineEvent>] events valid at the given date
72
122
  def state_at(date)
73
123
  to_a.select { |event| event.valid_at?(date) }
74
124
  end
75
125
 
76
- # Generate a summary of changes
126
+ # Generates a summary of changes between consecutive events
127
+ #
128
+ # @return [Array<Hash>] array of hashes with :from, :to, and :gap_days keys
77
129
  def changes_summary
78
130
  sorted = to_a
79
131
 
@@ -103,29 +155,68 @@ module FactDb
103
155
  end
104
156
  end
105
157
 
158
+ # Wraps a Fact as a timeline event with convenience methods
159
+ #
160
+ # Provides a simplified interface for timeline operations,
161
+ # delegating most methods to the underlying fact.
162
+ #
106
163
  class TimelineEvent
164
+ # @return [FactDb::Models::Fact] the underlying fact
107
165
  attr_reader :fact
108
166
 
109
- delegate :id, :fact_text, :valid_at, :invalid_at, :status,
167
+ # @!method id
168
+ # @return [Integer] the fact ID
169
+ # @!method text
170
+ # @return [String] the fact text
171
+ # @!method valid_at
172
+ # @return [Time] when the fact became valid
173
+ # @!method invalid_at
174
+ # @return [Time, nil] when the fact became invalid
175
+ # @!method status
176
+ # @return [String] the fact status
177
+ # @!method currently_valid?
178
+ # @return [Boolean] true if fact is currently valid
179
+ # @!method valid_at?(date)
180
+ # @param date [Date, Time] the point in time
181
+ # @return [Boolean] true if valid at the given date
182
+ # @!method duration
183
+ # @return [ActiveSupport::Duration, nil] validity duration
184
+ # @!method duration_days
185
+ # @return [Integer, nil] validity duration in days
186
+ # @!method entities
187
+ # @return [Array<Entity>] mentioned entities
188
+ # @!method source_contents
189
+ # @return [Array<Source>] source documents
190
+ delegate :id, :text, :valid_at, :invalid_at, :status,
110
191
  :currently_valid?, :valid_at?, :duration, :duration_days,
111
192
  :entities, :source_contents, to: :fact
112
193
 
194
+ # Initializes a new TimelineEvent
195
+ #
196
+ # @param fact [FactDb::Models::Fact] the fact to wrap
113
197
  def initialize(fact)
114
198
  @fact = fact
115
199
  end
116
200
 
201
+ # Converts the event to a hash representation
202
+ #
203
+ # @return [Hash] hash with :id, :text, :valid_at, :invalid_at, :status, :duration_days, :entities
117
204
  def to_hash
118
205
  {
119
206
  id: id,
120
- fact_text: fact_text,
207
+ text: text,
121
208
  valid_at: valid_at,
122
209
  invalid_at: invalid_at,
123
210
  status: status,
124
211
  duration_days: duration_days,
125
- entities: entities.map(&:canonical_name)
212
+ entities: entities.map(&:name)
126
213
  }
127
214
  end
128
215
 
216
+ # Compares events by valid_at date for sorting
217
+ #
218
+ # @param other [TimelineEvent] the event to compare with
219
+ # @return [Integer] -1, 0, or 1
129
220
  def <=>(other)
130
221
  valid_at <=> other.valid_at
131
222
  end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FactDb
4
+ module Transformers
5
+ # Base transformer class providing common utilities.
6
+ # Subclasses implement specific output formats for LLM consumption.
7
+ class Base
8
+ # Transform query results to the target format.
9
+ #
10
+ # @param results [QueryResult] The query results
11
+ # @return [QueryResult] Transformed results (may modify in place)
12
+ def transform(results)
13
+ results
14
+ end
15
+
16
+ protected
17
+
18
+ # Safely get a value from hash or object
19
+ #
20
+ # @param obj [Hash, Object] Source object
21
+ # @param key [Symbol] Key to retrieve
22
+ # @return [Object, nil] The value
23
+ def get_value(obj, key)
24
+ if obj.is_a?(Hash)
25
+ obj[key] || obj[key.to_s]
26
+ elsif obj.respond_to?(key)
27
+ obj.send(key)
28
+ end
29
+ end
30
+
31
+ # Format dates consistently
32
+ #
33
+ # @param date [Date, Time, String, nil] Date to format
34
+ # @return [String, nil] Formatted date string
35
+ def format_date(date)
36
+ return nil if date.nil?
37
+
38
+ if date.respond_to?(:strftime)
39
+ date.strftime("%Y-%m-%d")
40
+ else
41
+ date.to_s
42
+ end
43
+ end
44
+
45
+ # Escape strings for output
46
+ #
47
+ # @param str [String] String to escape
48
+ # @return [String] Escaped string
49
+ def escape_string(str)
50
+ str.to_s.gsub('"', '\\"').gsub("\n", "\\n")
51
+ end
52
+
53
+ # Create a variable name from a string
54
+ #
55
+ # @param str [String] Source string
56
+ # @return [String] Valid variable name
57
+ def to_variable(str)
58
+ str.to_s
59
+ .downcase
60
+ .gsub(/[^a-z0-9]+/, "_")
61
+ .gsub(/^_|_$/, "")
62
+ .slice(0, 30)
63
+ end
64
+
65
+ # Truncate string to specified length
66
+ #
67
+ # @param str [String] String to truncate
68
+ # @param length [Integer] Maximum length
69
+ # @return [String] Truncated string
70
+ def truncate(str, length)
71
+ return str if str.to_s.length <= length
72
+
73
+ "#{str.to_s[0, length - 3]}..."
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FactDb
4
+ module Transformers
5
+ # Transforms results into Cypher-like graph notation.
6
+ # This format is readable by both humans and LLMs, and encodes
7
+ # nodes, relationships, and properties explicitly.
8
+ #
9
+ # @example Output format
10
+ # (paula:Person {name: "Paula Chen"})
11
+ # (microsoft:Organization {name: "Microsoft"})
12
+ # (paula)-[:WORKS_AT {since: "2024-01-10", role: "Principal Engineer"}]->(microsoft)
13
+ class CypherTransformer < Base
14
+ # Transform results to Cypher format.
15
+ #
16
+ # @param results [QueryResult] The query results
17
+ # @return [String] Cypher-like graph notation
18
+ def transform(results)
19
+ lines = []
20
+ defined_nodes = Set.new
21
+
22
+ # Define entity nodes
23
+ results.each_entity do |entity|
24
+ node_def = entity_to_cypher(entity)
25
+ if node_def && !defined_nodes.include?(node_def)
26
+ lines << node_def
27
+ defined_nodes << node_def
28
+ end
29
+ end
30
+
31
+ # Define relationships from facts
32
+ results.each_fact do |fact|
33
+ relationship = fact_to_cypher(fact, results.entities, defined_nodes, lines)
34
+ lines << relationship if relationship
35
+ end
36
+
37
+ lines.join("\n")
38
+ end
39
+
40
+ private
41
+
42
+ def entity_to_cypher(entity)
43
+ name = get_value(entity, :name)
44
+ return nil unless name
45
+
46
+ var = to_variable(name)
47
+ entity_kind = get_value(entity, :kind) || "Entity"
48
+ label = entity_kind.to_s.capitalize
49
+
50
+ props = { name: name }
51
+
52
+ # Add aliases if present
53
+ aliases = get_value(entity, :aliases)
54
+ if aliases && !aliases.empty?
55
+ alias_texts = aliases.map { |a| a.is_a?(Hash) ? a[:name] : a.to_s }
56
+ props[:aliases] = alias_texts
57
+ end
58
+
59
+ "(#{var}:#{label} #{format_props(props)})"
60
+ end
61
+
62
+ def fact_to_cypher(fact, entities, defined_nodes, lines)
63
+ text = get_value(fact, :text) || ""
64
+ return nil if text.empty?
65
+
66
+ mentions = get_value(fact, :entity_mentions) || []
67
+
68
+ # Find subject and object from mentions if available
69
+ subject_mention = mentions.find { |m| get_value(m, :mention_role) == "subject" }
70
+ object_mention = mentions.find { |m| get_value(m, :mention_role) != "subject" }
71
+
72
+ # Get subject - from mentions or parse from text
73
+ if subject_mention
74
+ subject_id = get_value(subject_mention, :entity_id)
75
+ subject_entity = entities[subject_id]
76
+ subject_name = subject_entity ? get_value(subject_entity, :name) : "Entity_#{subject_id}"
77
+ else
78
+ subject_name = extract_subject(text)
79
+ end
80
+
81
+ return nil if subject_name.nil? || subject_name.empty?
82
+
83
+ subject_var = to_variable(subject_name)
84
+
85
+ # Ensure subject node is defined
86
+ unless defined_nodes.any? { |n| n.include?("(#{subject_var}:") }
87
+ node_def = "(#{subject_var}:Entity {name: \"#{escape_string(subject_name)}\"})"
88
+ lines << node_def
89
+ defined_nodes << node_def
90
+ end
91
+
92
+ # Build relationship properties
93
+ rel_props = {}
94
+
95
+ valid_at = get_value(fact, :valid_at)
96
+ rel_props[:since] = format_date(valid_at) if valid_at
97
+
98
+ invalid_at = get_value(fact, :invalid_at)
99
+ rel_props[:until] = format_date(invalid_at) if invalid_at
100
+
101
+ status = get_value(fact, :status)
102
+ rel_props[:status] = status if status
103
+
104
+ confidence = get_value(fact, :confidence)
105
+ rel_props[:confidence] = confidence if confidence
106
+
107
+ # Extract relationship type from fact text
108
+ rel_type = extract_relationship_type(text)
109
+
110
+ if object_mention
111
+ # Relationship to another entity
112
+ object_id = get_value(object_mention, :entity_id)
113
+ object_entity = entities[object_id]
114
+ object_name = object_entity ? get_value(object_entity, :name) : "Entity_#{object_id}"
115
+ object_var = to_variable(object_name)
116
+
117
+ # Ensure object node is defined
118
+ unless defined_nodes.any? { |n| n.include?("(#{object_var}:") }
119
+ node_def = "(#{object_var}:Entity {name: \"#{escape_string(object_name)}\"})"
120
+ lines << node_def
121
+ defined_nodes << node_def
122
+ end
123
+
124
+ props_str = rel_props.empty? ? "" : " #{format_props(rel_props)}"
125
+ "(#{subject_var})-[:#{rel_type}#{props_str}]->(#{object_var})"
126
+ else
127
+ # Relationship to a literal value
128
+ object_value = extract_object_value(text, subject_name)
129
+ props_str = rel_props.empty? ? "" : " #{format_props(rel_props)}"
130
+ "(#{subject_var})-[:#{rel_type}#{props_str}]->(\"#{escape_string(object_value)}\")"
131
+ end
132
+ end
133
+
134
+ def extract_relationship_type(text)
135
+ if text.match?(/\bworks?\s+(at|for)\b/i)
136
+ "WORKS_AT"
137
+ elsif text.match?(/\bworked\s+(at|for)\b/i)
138
+ "WORKED_AT"
139
+ elsif text.match?(/\breports?\s+to\b/i)
140
+ "REPORTS_TO"
141
+ elsif text.match?(/\bis\s+(a|an|the)\b/i)
142
+ "IS_A"
143
+ elsif text.match?(/\bis\s+\w+/i)
144
+ "IS"
145
+ elsif text.match?(/\bhas\b/i)
146
+ "HAS"
147
+ elsif text.match?(/\bdecided\b/i)
148
+ "DECIDED"
149
+ elsif text.match?(/\bjoined\b/i)
150
+ "JOINED"
151
+ elsif text.match?(/\bleft\b/i)
152
+ "LEFT"
153
+ else
154
+ "RELATES_TO"
155
+ end
156
+ end
157
+
158
+ def extract_subject(text)
159
+ words = text.split(/\s+/)
160
+ words.take_while { |w| !w.match?(/^(is|are|was|were|has|have|works|worked|reports)$/i) }.join(" ")
161
+ end
162
+
163
+ def extract_object_value(text, subject)
164
+ remainder = text.sub(/^#{Regexp.escape(subject)}\s*/i, "")
165
+ remainder.sub(/^(is|are|was|were|has|have|works?|worked|reports?)\s+(at|for|to|a|an|the)?\s*/i, "")
166
+ end
167
+
168
+ def format_props(props)
169
+ return "{}" if props.empty?
170
+
171
+ pairs = props.map do |k, v|
172
+ value = case v
173
+ when String then "\"#{escape_string(v)}\""
174
+ when Array then "[#{v.map { |e| "\"#{escape_string(e)}\"" }.join(", ")}]"
175
+ when nil then "null"
176
+ else v.to_s
177
+ end
178
+ "#{k}: #{value}"
179
+ end
180
+
181
+ "{#{pairs.join(", ")}}"
182
+ end
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FactDb
4
+ module Transformers
5
+ # JSON transformer - returns results as structured hash.
6
+ # This is the default pass-through format.
7
+ class JsonTransformer < Base
8
+ # Transform results to JSON-ready hash format.
9
+ #
10
+ # @param results [QueryResult] The query results
11
+ # @return [Hash] JSON-serializable hash
12
+ def transform(results)
13
+ results.to_h
14
+ end
15
+ end
16
+ end
17
+ end