fact_db 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +2 -0
- data/.yardopts +5 -0
- data/CHANGELOG.md +64 -0
- data/README.md +107 -6
- data/Rakefile +243 -10
- data/db/migrate/001_enable_extensions.rb +1 -0
- data/db/migrate/002_create_sources.rb +49 -0
- data/db/migrate/003_create_entities.rb +27 -15
- data/db/migrate/004_create_entity_aliases.rb +20 -7
- data/db/migrate/005_create_facts.rb +37 -21
- data/db/migrate/006_create_entity_mentions.rb +14 -6
- data/db/migrate/007_create_fact_sources.rb +16 -8
- data/docs/api/extractors/index.md +5 -5
- data/docs/api/extractors/llm.md +17 -17
- data/docs/api/extractors/rule-based.md +14 -14
- data/docs/api/facts.md +20 -20
- data/docs/api/index.md +4 -4
- data/docs/api/models/entity.md +21 -21
- data/docs/api/models/fact.md +15 -15
- data/docs/api/models/index.md +7 -7
- data/docs/api/models/{content.md → source.md} +29 -29
- data/docs/api/pipeline/extraction.md +25 -25
- data/docs/api/pipeline/index.md +1 -1
- data/docs/api/pipeline/resolution.md +4 -4
- data/docs/api/services/entity-service.md +20 -20
- data/docs/api/services/fact-service.md +12 -12
- data/docs/api/services/index.md +5 -5
- data/docs/api/services/{content-service.md → source-service.md} +27 -27
- data/docs/architecture/database-schema.md +46 -46
- data/docs/architecture/entity-resolution.md +6 -6
- data/docs/architecture/index.md +10 -10
- data/docs/architecture/temporal-facts.md +5 -5
- data/docs/architecture/three-layer-model.md +17 -17
- data/docs/concepts.md +6 -6
- data/docs/examples/basic-usage.md +20 -20
- data/docs/examples/hr-onboarding.md +17 -17
- data/docs/examples/index.md +4 -4
- data/docs/examples/news-analysis.md +23 -23
- data/docs/getting-started/database-setup.md +28 -20
- data/docs/getting-started/index.md +3 -3
- data/docs/getting-started/quick-start.md +33 -30
- data/docs/guides/batch-processing.md +26 -26
- data/docs/guides/configuration.md +158 -77
- data/docs/guides/entity-management.md +14 -14
- data/docs/guides/extracting-facts.md +28 -28
- data/docs/guides/ingesting-content.md +14 -14
- data/docs/guides/llm-integration.md +40 -32
- data/docs/guides/temporal-queries.md +11 -11
- data/docs/index.md +6 -2
- data/examples/.envrc +4 -0
- data/examples/.gitignore +1 -0
- data/examples/001_configuration.rb +312 -0
- data/examples/{basic_usage.rb → 010_basic_usage.rb} +47 -56
- data/examples/{entity_management.rb → 020_entity_management.rb} +57 -72
- data/examples/{temporal_queries.rb → 030_temporal_queries.rb} +39 -59
- data/examples/040_output_formats.rb +177 -0
- data/examples/{rule_based_extraction.rb → 050_rule_based_extraction.rb} +39 -45
- data/examples/060_fluent_temporal_api.rb +217 -0
- data/examples/070_introspection.rb +252 -0
- data/examples/{hr_system.rb → 080_hr_system.rb} +56 -75
- data/examples/090_ingest_demo.rb +515 -0
- data/examples/100_query_context.rb +668 -0
- data/examples/110_prove_it.rb +204 -0
- data/examples/120_dump_database.rb +358 -0
- data/examples/130_rag_feedback_loop.rb +858 -0
- data/examples/README.md +229 -15
- data/examples/data/lincoln_associates.md +201 -0
- data/examples/data/lincoln_biography.md +66 -0
- data/examples/data/lincoln_cabinet.md +243 -0
- data/examples/data/lincoln_family.md +163 -0
- data/examples/data/lincoln_military.md +241 -0
- data/examples/data/lincoln_todd_family.md +136 -0
- data/examples/ingest_reporter.rb +335 -0
- data/examples/utilities.rb +182 -0
- data/lib/fact_db/config/defaults.yml +254 -0
- data/lib/fact_db/config.rb +94 -35
- data/lib/fact_db/database.rb +98 -8
- data/lib/fact_db/extractors/base.rb +106 -21
- data/lib/fact_db/extractors/llm_extractor.rb +35 -63
- data/lib/fact_db/extractors/manual_extractor.rb +46 -6
- data/lib/fact_db/extractors/rule_based_extractor.rb +136 -25
- data/lib/fact_db/llm/adapter.rb +3 -3
- data/lib/fact_db/models/entity.rb +94 -22
- data/lib/fact_db/models/entity_alias.rb +41 -7
- data/lib/fact_db/models/entity_mention.rb +34 -1
- data/lib/fact_db/models/fact.rb +259 -28
- data/lib/fact_db/models/fact_source.rb +43 -9
- data/lib/fact_db/models/source.rb +113 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +35 -35
- data/lib/fact_db/pipeline/resolution_pipeline.rb +5 -5
- data/lib/fact_db/query_result.rb +202 -0
- data/lib/fact_db/resolution/entity_resolver.rb +139 -39
- data/lib/fact_db/resolution/fact_resolver.rb +86 -14
- data/lib/fact_db/services/entity_service.rb +246 -37
- data/lib/fact_db/services/fact_service.rb +254 -17
- data/lib/fact_db/services/source_service.rb +164 -0
- data/lib/fact_db/temporal/query.rb +71 -7
- data/lib/fact_db/temporal/query_builder.rb +69 -0
- data/lib/fact_db/temporal/timeline.rb +102 -11
- data/lib/fact_db/transformers/base.rb +77 -0
- data/lib/fact_db/transformers/cypher_transformer.rb +185 -0
- data/lib/fact_db/transformers/json_transformer.rb +17 -0
- data/lib/fact_db/transformers/raw_transformer.rb +35 -0
- data/lib/fact_db/transformers/text_transformer.rb +114 -0
- data/lib/fact_db/transformers/triple_transformer.rb +138 -0
- data/lib/fact_db/validation/alias_filter.rb +185 -0
- data/lib/fact_db/version.rb +1 -1
- data/lib/fact_db.rb +281 -30
- data/mkdocs.yml +2 -2
- metadata +60 -16
- data/db/migrate/002_create_contents.rb +0 -44
- data/lib/fact_db/models/content.rb +0 -62
- data/lib/fact_db/services/content_service.rb +0 -93
|
@@ -2,13 +2,38 @@
|
|
|
2
2
|
|
|
3
3
|
module FactDb
|
|
4
4
|
module Temporal
|
|
5
|
+
# Executes temporal queries on facts with time-based filtering
|
|
6
|
+
#
|
|
7
|
+
# Provides methods for querying facts at specific points in time,
|
|
8
|
+
# comparing states between dates, and searching with temporal constraints.
|
|
9
|
+
#
|
|
10
|
+
# @example Query current facts about an entity
|
|
11
|
+
# query = Query.new
|
|
12
|
+
# facts = query.current_facts(entity_id: person.id)
|
|
13
|
+
#
|
|
14
|
+
# @example Compare facts at two points in time
|
|
15
|
+
# diff = query.diff(entity_id: person.id, from_date: Date.parse("2023-01-01"), to_date: Date.today)
|
|
16
|
+
# puts "Added: #{diff[:added].count}, Removed: #{diff[:removed].count}"
|
|
17
|
+
#
|
|
5
18
|
class Query
|
|
19
|
+
# @return [ActiveRecord::Relation] the base scope for queries
|
|
6
20
|
attr_reader :scope
|
|
7
21
|
|
|
22
|
+
# Initializes a new Query with an optional base scope
|
|
23
|
+
#
|
|
24
|
+
# @param scope [ActiveRecord::Relation] base fact scope (defaults to all facts)
|
|
8
25
|
def initialize(scope = Models::Fact.all)
|
|
9
26
|
@scope = scope
|
|
10
27
|
end
|
|
11
28
|
|
|
29
|
+
# Executes a temporal query with multiple filters
|
|
30
|
+
#
|
|
31
|
+
# @param topic [String, nil] text to search for in fact content
|
|
32
|
+
# @param at [Date, Time, nil] point in time (nil for currently valid)
|
|
33
|
+
# @param entity_id [Integer, nil] filter by entity
|
|
34
|
+
# @param status [Symbol] fact status filter (:canonical, :superseded, :synthesized, :all)
|
|
35
|
+
# @param limit [Integer, nil] maximum number of results
|
|
36
|
+
# @return [ActiveRecord::Relation] matching facts ordered by valid_at desc
|
|
12
37
|
def execute(topic: nil, at: nil, entity_id: nil, status: :canonical, limit: nil)
|
|
13
38
|
result = @scope
|
|
14
39
|
|
|
@@ -33,31 +58,56 @@ module FactDb
|
|
|
33
58
|
result
|
|
34
59
|
end
|
|
35
60
|
|
|
36
|
-
#
|
|
61
|
+
# Returns currently valid canonical facts about an entity
|
|
62
|
+
#
|
|
63
|
+
# @param entity_id [Integer] the entity to query
|
|
64
|
+
# @return [ActiveRecord::Relation] currently valid facts mentioning the entity
|
|
37
65
|
def current_facts(entity_id:)
|
|
38
66
|
execute(entity_id: entity_id, at: nil, status: :canonical)
|
|
39
67
|
end
|
|
40
68
|
|
|
41
|
-
#
|
|
69
|
+
# Returns facts valid at a specific point in time
|
|
70
|
+
#
|
|
71
|
+
# @param date [Date, Time] the point in time to query
|
|
72
|
+
# @param entity_id [Integer, nil] optional entity filter
|
|
73
|
+
# @return [ActiveRecord::Relation] facts valid at the given date
|
|
42
74
|
def facts_at(date, entity_id: nil)
|
|
43
75
|
execute(at: date, entity_id: entity_id, status: :canonical)
|
|
44
76
|
end
|
|
45
77
|
|
|
46
|
-
#
|
|
78
|
+
# Returns facts that became valid within a date range
|
|
79
|
+
#
|
|
80
|
+
# @param from [Date, Time] start of range (inclusive)
|
|
81
|
+
# @param to [Date, Time] end of range (inclusive)
|
|
82
|
+
# @param entity_id [Integer, nil] optional entity filter
|
|
83
|
+
# @return [ActiveRecord::Relation] facts created in the range, ordered by valid_at asc
|
|
47
84
|
def facts_created_between(from:, to:, entity_id: nil)
|
|
48
85
|
result = @scope.canonical.became_valid_between(from, to)
|
|
49
86
|
result = result.mentioning_entity(entity_id) if entity_id
|
|
50
87
|
result.order(valid_at: :asc)
|
|
51
88
|
end
|
|
52
89
|
|
|
53
|
-
#
|
|
90
|
+
# Returns facts that became invalid within a date range
|
|
91
|
+
#
|
|
92
|
+
# @param from [Date, Time] start of range (inclusive)
|
|
93
|
+
# @param to [Date, Time] end of range (inclusive)
|
|
94
|
+
# @param entity_id [Integer, nil] optional entity filter
|
|
95
|
+
# @return [ActiveRecord::Relation] facts invalidated in the range, ordered by invalid_at asc
|
|
54
96
|
def facts_invalidated_between(from:, to:, entity_id: nil)
|
|
55
97
|
result = @scope.became_invalid_between(from, to)
|
|
56
98
|
result = result.mentioning_entity(entity_id) if entity_id
|
|
57
99
|
result.order(invalid_at: :asc)
|
|
58
100
|
end
|
|
59
101
|
|
|
60
|
-
#
|
|
102
|
+
# Searches facts by text with temporal filtering
|
|
103
|
+
#
|
|
104
|
+
# Uses PostgreSQL full-text search with optional point-in-time filtering.
|
|
105
|
+
#
|
|
106
|
+
# @param query [String] text to search for
|
|
107
|
+
# @param at [Date, Time, nil] point in time (nil for currently valid)
|
|
108
|
+
# @param entity_id [Integer, nil] optional entity filter
|
|
109
|
+
# @param limit [Integer] maximum number of results (default: 20)
|
|
110
|
+
# @return [ActiveRecord::Relation] matching facts
|
|
61
111
|
def semantic_search(query:, at: nil, entity_id: nil, limit: 20)
|
|
62
112
|
result = @scope.canonical.search_text(query)
|
|
63
113
|
result = apply_temporal_filter(result, at)
|
|
@@ -65,14 +115,28 @@ module FactDb
|
|
|
65
115
|
result.limit(limit)
|
|
66
116
|
end
|
|
67
117
|
|
|
68
|
-
#
|
|
118
|
+
# Returns facts where an entity has a specific mention role
|
|
119
|
+
#
|
|
120
|
+
# @param entity_id [Integer] the entity to query
|
|
121
|
+
# @param role [String, Symbol] the mention role (e.g., :subject, :object)
|
|
122
|
+
# @param at [Date, Time, nil] point in time (nil for currently valid)
|
|
123
|
+
# @return [ActiveRecord::Relation] facts with the entity in the specified role
|
|
69
124
|
def facts_with_entity_role(entity_id:, role:, at: nil)
|
|
70
125
|
result = @scope.canonical.with_role(entity_id, role)
|
|
71
126
|
result = apply_temporal_filter(result, at)
|
|
72
127
|
result.order(valid_at: :desc)
|
|
73
128
|
end
|
|
74
129
|
|
|
75
|
-
#
|
|
130
|
+
# Compares facts at two points in time to find changes
|
|
131
|
+
#
|
|
132
|
+
# @param entity_id [Integer] the entity to compare
|
|
133
|
+
# @param from_date [Date, Time] the earlier point in time
|
|
134
|
+
# @param to_date [Date, Time] the later point in time
|
|
135
|
+
# @return [Hash] hash with :added, :removed, and :unchanged arrays of facts
|
|
136
|
+
#
|
|
137
|
+
# @example
|
|
138
|
+
# diff = query.diff(entity_id: 1, from_date: 1.year.ago, to_date: Date.today)
|
|
139
|
+
# puts "#{diff[:added].count} new facts, #{diff[:removed].count} removed"
|
|
76
140
|
def diff(entity_id:, from_date:, to_date:)
|
|
77
141
|
facts_at_from = facts_at(from_date, entity_id: entity_id).to_a
|
|
78
142
|
facts_at_to = facts_at(to_date, entity_id: entity_id).to_a
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Temporal
|
|
5
|
+
# A scoped query builder for temporal queries.
|
|
6
|
+
# Allows chaining: facts.at("2024-01-15").query("Paula's role")
|
|
7
|
+
#
|
|
8
|
+
# @example Basic usage
|
|
9
|
+
# facts.at("2024-01-15").query("Paula's role", format: :cypher)
|
|
10
|
+
# facts.at("2024-01-15").facts_for(entity_id)
|
|
11
|
+
# facts.at("2024-01-15").compare_to("2024-06-15")
|
|
12
|
+
#
|
|
13
|
+
class QueryBuilder
|
|
14
|
+
attr_reader :date
|
|
15
|
+
|
|
16
|
+
# Initialize with a Facts instance and date
|
|
17
|
+
#
|
|
18
|
+
# @param facts [FactDb::Facts] The Facts instance
|
|
19
|
+
# @param date [Date] The point in time
|
|
20
|
+
def initialize(facts, date)
|
|
21
|
+
@facts = facts
|
|
22
|
+
@date = date
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Execute a query at this point in time
|
|
26
|
+
#
|
|
27
|
+
# @param topic [String] The query topic
|
|
28
|
+
# @param format [Symbol] Output format (:json, :triples, :cypher, :text, :prolog)
|
|
29
|
+
# @return [Array, String, Hash] Results at this point in time
|
|
30
|
+
def query(topic, format: :json, **options)
|
|
31
|
+
@facts.query_facts(topic: topic, at: @date, format: format, **options)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Get all facts valid at this date
|
|
35
|
+
#
|
|
36
|
+
# @param format [Symbol] Output format
|
|
37
|
+
# @return [Array, String, Hash] Results
|
|
38
|
+
def facts(format: :json, **options)
|
|
39
|
+
@facts.facts_at(@date, format: format, **options)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Get facts for a specific entity at this date
|
|
43
|
+
#
|
|
44
|
+
# @param entity_id [Integer] Entity ID
|
|
45
|
+
# @param format [Symbol] Output format
|
|
46
|
+
# @return [Array, String, Hash] Results
|
|
47
|
+
def facts_for(entity_id, format: :json, **options)
|
|
48
|
+
@facts.facts_at(@date, entity: entity_id, format: format, **options)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Compare this date to another
|
|
52
|
+
#
|
|
53
|
+
# @param other_date [Date, String] The date to compare to
|
|
54
|
+
# @param topic [String, nil] Optional topic to compare
|
|
55
|
+
# @return [Hash] Differences with :added, :removed, :unchanged keys
|
|
56
|
+
def compare_to(other_date, topic: nil)
|
|
57
|
+
@facts.diff(topic, from: @date, to: other_date)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Get the timeline state at this date
|
|
61
|
+
#
|
|
62
|
+
# @param entity_id [Integer] Entity ID
|
|
63
|
+
# @return [Array] Facts valid at this date for the entity
|
|
64
|
+
def state_for(entity_id, format: :json)
|
|
65
|
+
@facts.facts_at(@date, entity: entity_id, format: format)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -2,59 +2,106 @@
|
|
|
2
2
|
|
|
3
3
|
module FactDb
|
|
4
4
|
module Temporal
|
|
5
|
+
# Builds and analyzes temporal timelines of facts for an entity
|
|
6
|
+
#
|
|
7
|
+
# Provides methods to view an entity's history, group events by time periods,
|
|
8
|
+
# find overlapping facts, and compare states at different points in time.
|
|
9
|
+
# Includes Enumerable for easy iteration over timeline events.
|
|
10
|
+
#
|
|
11
|
+
# @example Build a timeline for an entity
|
|
12
|
+
# timeline = Timeline.new.build(entity_id: person.id)
|
|
13
|
+
# timeline.by_year.each { |year, events| puts "#{year}: #{events.count} events" }
|
|
14
|
+
#
|
|
15
|
+
# @example Find currently active facts
|
|
16
|
+
# active_facts = timeline.active
|
|
17
|
+
#
|
|
5
18
|
class Timeline
|
|
6
19
|
include Enumerable
|
|
7
20
|
|
|
21
|
+
# @return [Array<TimelineEvent>] the timeline events
|
|
8
22
|
attr_reader :events
|
|
9
23
|
|
|
24
|
+
# Initializes a new empty Timeline
|
|
10
25
|
def initialize
|
|
11
26
|
@events = []
|
|
12
27
|
end
|
|
13
28
|
|
|
29
|
+
# Iterates over timeline event hashes
|
|
30
|
+
#
|
|
31
|
+
# @yield [Hash] each event as a hash
|
|
32
|
+
# @return [Enumerator] if no block given
|
|
14
33
|
def each(&block)
|
|
15
34
|
to_hash.each(&block)
|
|
16
35
|
end
|
|
17
36
|
|
|
37
|
+
# Builds a timeline of facts for an entity
|
|
38
|
+
#
|
|
39
|
+
# @param entity_id [Integer] the entity to build timeline for
|
|
40
|
+
# @param from [Date, Time, nil] start of date range (optional)
|
|
41
|
+
# @param to [Date, Time, nil] end of date range (optional)
|
|
42
|
+
# @return [Timeline] self for method chaining
|
|
18
43
|
def build(entity_id:, from: nil, to: nil)
|
|
19
44
|
facts = fetch_facts(entity_id, from, to)
|
|
20
45
|
@events = facts.map { |fact| TimelineEvent.new(fact) }
|
|
21
46
|
self
|
|
22
47
|
end
|
|
23
48
|
|
|
49
|
+
# Returns events sorted by valid_at date
|
|
50
|
+
#
|
|
51
|
+
# @return [Array<TimelineEvent>] sorted events
|
|
24
52
|
def to_a
|
|
25
53
|
@events.sort_by(&:valid_at)
|
|
26
54
|
end
|
|
27
55
|
|
|
56
|
+
# Returns events as an array of hashes
|
|
57
|
+
#
|
|
58
|
+
# @return [Array<Hash>] events converted to hash format
|
|
28
59
|
def to_hash
|
|
29
60
|
to_a.map(&:to_hash)
|
|
30
61
|
end
|
|
31
62
|
|
|
32
|
-
#
|
|
63
|
+
# Groups events by year
|
|
64
|
+
#
|
|
65
|
+
# @return [Hash<Integer, Array<TimelineEvent>>] events grouped by year
|
|
33
66
|
def by_year
|
|
34
67
|
to_a.group_by { |event| event.valid_at.year }
|
|
35
68
|
end
|
|
36
69
|
|
|
37
|
-
#
|
|
70
|
+
# Groups events by month
|
|
71
|
+
#
|
|
72
|
+
# @return [Hash<String, Array<TimelineEvent>>] events grouped by "YYYY-MM" key
|
|
38
73
|
def by_month
|
|
39
74
|
to_a.group_by { |event| event.valid_at.strftime("%Y-%m") }
|
|
40
75
|
end
|
|
41
76
|
|
|
42
|
-
#
|
|
77
|
+
# Returns events in a specific date range
|
|
78
|
+
#
|
|
79
|
+
# @param from [Date, Time] start of range (inclusive)
|
|
80
|
+
# @param to [Date, Time] end of range (inclusive)
|
|
81
|
+
# @return [Array<TimelineEvent>] events within the range
|
|
43
82
|
def between(from, to)
|
|
44
83
|
to_a.select { |event| event.valid_at >= from && event.valid_at <= to }
|
|
45
84
|
end
|
|
46
85
|
|
|
47
|
-
#
|
|
86
|
+
# Returns currently active (valid) events
|
|
87
|
+
#
|
|
88
|
+
# @return [Array<TimelineEvent>] events with no invalid_at date
|
|
48
89
|
def active
|
|
49
90
|
to_a.select(&:currently_valid?)
|
|
50
91
|
end
|
|
51
92
|
|
|
52
|
-
#
|
|
93
|
+
# Returns historical (no longer valid) events
|
|
94
|
+
#
|
|
95
|
+
# @return [Array<TimelineEvent>] events that have been invalidated
|
|
53
96
|
def historical
|
|
54
97
|
to_a.reject(&:currently_valid?)
|
|
55
98
|
end
|
|
56
99
|
|
|
57
|
-
#
|
|
100
|
+
# Finds pairs of overlapping events
|
|
101
|
+
#
|
|
102
|
+
# Two events overlap if their validity periods intersect.
|
|
103
|
+
#
|
|
104
|
+
# @return [Array<Array<TimelineEvent, TimelineEvent>>] pairs of overlapping events
|
|
58
105
|
def overlapping
|
|
59
106
|
result = []
|
|
60
107
|
sorted = to_a
|
|
@@ -68,12 +115,17 @@ module FactDb
|
|
|
68
115
|
result
|
|
69
116
|
end
|
|
70
117
|
|
|
71
|
-
#
|
|
118
|
+
# Returns the state (valid events) at a specific point in time
|
|
119
|
+
#
|
|
120
|
+
# @param date [Date, Time] the point in time to query
|
|
121
|
+
# @return [Array<TimelineEvent>] events valid at the given date
|
|
72
122
|
def state_at(date)
|
|
73
123
|
to_a.select { |event| event.valid_at?(date) }
|
|
74
124
|
end
|
|
75
125
|
|
|
76
|
-
#
|
|
126
|
+
# Generates a summary of changes between consecutive events
|
|
127
|
+
#
|
|
128
|
+
# @return [Array<Hash>] array of hashes with :from, :to, and :gap_days keys
|
|
77
129
|
def changes_summary
|
|
78
130
|
sorted = to_a
|
|
79
131
|
|
|
@@ -103,29 +155,68 @@ module FactDb
|
|
|
103
155
|
end
|
|
104
156
|
end
|
|
105
157
|
|
|
158
|
+
# Wraps a Fact as a timeline event with convenience methods
|
|
159
|
+
#
|
|
160
|
+
# Provides a simplified interface for timeline operations,
|
|
161
|
+
# delegating most methods to the underlying fact.
|
|
162
|
+
#
|
|
106
163
|
class TimelineEvent
|
|
164
|
+
# @return [FactDb::Models::Fact] the underlying fact
|
|
107
165
|
attr_reader :fact
|
|
108
166
|
|
|
109
|
-
|
|
167
|
+
# @!method id
|
|
168
|
+
# @return [Integer] the fact ID
|
|
169
|
+
# @!method text
|
|
170
|
+
# @return [String] the fact text
|
|
171
|
+
# @!method valid_at
|
|
172
|
+
# @return [Time] when the fact became valid
|
|
173
|
+
# @!method invalid_at
|
|
174
|
+
# @return [Time, nil] when the fact became invalid
|
|
175
|
+
# @!method status
|
|
176
|
+
# @return [String] the fact status
|
|
177
|
+
# @!method currently_valid?
|
|
178
|
+
# @return [Boolean] true if fact is currently valid
|
|
179
|
+
# @!method valid_at?(date)
|
|
180
|
+
# @param date [Date, Time] the point in time
|
|
181
|
+
# @return [Boolean] true if valid at the given date
|
|
182
|
+
# @!method duration
|
|
183
|
+
# @return [ActiveSupport::Duration, nil] validity duration
|
|
184
|
+
# @!method duration_days
|
|
185
|
+
# @return [Integer, nil] validity duration in days
|
|
186
|
+
# @!method entities
|
|
187
|
+
# @return [Array<Entity>] mentioned entities
|
|
188
|
+
# @!method source_contents
|
|
189
|
+
# @return [Array<Source>] source documents
|
|
190
|
+
delegate :id, :text, :valid_at, :invalid_at, :status,
|
|
110
191
|
:currently_valid?, :valid_at?, :duration, :duration_days,
|
|
111
192
|
:entities, :source_contents, to: :fact
|
|
112
193
|
|
|
194
|
+
# Initializes a new TimelineEvent
|
|
195
|
+
#
|
|
196
|
+
# @param fact [FactDb::Models::Fact] the fact to wrap
|
|
113
197
|
def initialize(fact)
|
|
114
198
|
@fact = fact
|
|
115
199
|
end
|
|
116
200
|
|
|
201
|
+
# Converts the event to a hash representation
|
|
202
|
+
#
|
|
203
|
+
# @return [Hash] hash with :id, :text, :valid_at, :invalid_at, :status, :duration_days, :entities
|
|
117
204
|
def to_hash
|
|
118
205
|
{
|
|
119
206
|
id: id,
|
|
120
|
-
|
|
207
|
+
text: text,
|
|
121
208
|
valid_at: valid_at,
|
|
122
209
|
invalid_at: invalid_at,
|
|
123
210
|
status: status,
|
|
124
211
|
duration_days: duration_days,
|
|
125
|
-
entities: entities.map(&:
|
|
212
|
+
entities: entities.map(&:name)
|
|
126
213
|
}
|
|
127
214
|
end
|
|
128
215
|
|
|
216
|
+
# Compares events by valid_at date for sorting
|
|
217
|
+
#
|
|
218
|
+
# @param other [TimelineEvent] the event to compare with
|
|
219
|
+
# @return [Integer] -1, 0, or 1
|
|
129
220
|
def <=>(other)
|
|
130
221
|
valid_at <=> other.valid_at
|
|
131
222
|
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Transformers
|
|
5
|
+
# Base transformer class providing common utilities.
|
|
6
|
+
# Subclasses implement specific output formats for LLM consumption.
|
|
7
|
+
class Base
|
|
8
|
+
# Transform query results to the target format.
|
|
9
|
+
#
|
|
10
|
+
# @param results [QueryResult] The query results
|
|
11
|
+
# @return [QueryResult] Transformed results (may modify in place)
|
|
12
|
+
def transform(results)
|
|
13
|
+
results
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
protected
|
|
17
|
+
|
|
18
|
+
# Safely get a value from hash or object
|
|
19
|
+
#
|
|
20
|
+
# @param obj [Hash, Object] Source object
|
|
21
|
+
# @param key [Symbol] Key to retrieve
|
|
22
|
+
# @return [Object, nil] The value
|
|
23
|
+
def get_value(obj, key)
|
|
24
|
+
if obj.is_a?(Hash)
|
|
25
|
+
obj[key] || obj[key.to_s]
|
|
26
|
+
elsif obj.respond_to?(key)
|
|
27
|
+
obj.send(key)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Format dates consistently
|
|
32
|
+
#
|
|
33
|
+
# @param date [Date, Time, String, nil] Date to format
|
|
34
|
+
# @return [String, nil] Formatted date string
|
|
35
|
+
def format_date(date)
|
|
36
|
+
return nil if date.nil?
|
|
37
|
+
|
|
38
|
+
if date.respond_to?(:strftime)
|
|
39
|
+
date.strftime("%Y-%m-%d")
|
|
40
|
+
else
|
|
41
|
+
date.to_s
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Escape strings for output
|
|
46
|
+
#
|
|
47
|
+
# @param str [String] String to escape
|
|
48
|
+
# @return [String] Escaped string
|
|
49
|
+
def escape_string(str)
|
|
50
|
+
str.to_s.gsub('"', '\\"').gsub("\n", "\\n")
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Create a variable name from a string
|
|
54
|
+
#
|
|
55
|
+
# @param str [String] Source string
|
|
56
|
+
# @return [String] Valid variable name
|
|
57
|
+
def to_variable(str)
|
|
58
|
+
str.to_s
|
|
59
|
+
.downcase
|
|
60
|
+
.gsub(/[^a-z0-9]+/, "_")
|
|
61
|
+
.gsub(/^_|_$/, "")
|
|
62
|
+
.slice(0, 30)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Truncate string to specified length
|
|
66
|
+
#
|
|
67
|
+
# @param str [String] String to truncate
|
|
68
|
+
# @param length [Integer] Maximum length
|
|
69
|
+
# @return [String] Truncated string
|
|
70
|
+
def truncate(str, length)
|
|
71
|
+
return str if str.to_s.length <= length
|
|
72
|
+
|
|
73
|
+
"#{str.to_s[0, length - 3]}..."
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Transformers
|
|
5
|
+
# Transforms results into Cypher-like graph notation.
|
|
6
|
+
# This format is readable by both humans and LLMs, and encodes
|
|
7
|
+
# nodes, relationships, and properties explicitly.
|
|
8
|
+
#
|
|
9
|
+
# @example Output format
|
|
10
|
+
# (paula:Person {name: "Paula Chen"})
|
|
11
|
+
# (microsoft:Organization {name: "Microsoft"})
|
|
12
|
+
# (paula)-[:WORKS_AT {since: "2024-01-10", role: "Principal Engineer"}]->(microsoft)
|
|
13
|
+
class CypherTransformer < Base
|
|
14
|
+
# Transform results to Cypher format.
|
|
15
|
+
#
|
|
16
|
+
# @param results [QueryResult] The query results
|
|
17
|
+
# @return [String] Cypher-like graph notation
|
|
18
|
+
def transform(results)
|
|
19
|
+
lines = []
|
|
20
|
+
defined_nodes = Set.new
|
|
21
|
+
|
|
22
|
+
# Define entity nodes
|
|
23
|
+
results.each_entity do |entity|
|
|
24
|
+
node_def = entity_to_cypher(entity)
|
|
25
|
+
if node_def && !defined_nodes.include?(node_def)
|
|
26
|
+
lines << node_def
|
|
27
|
+
defined_nodes << node_def
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Define relationships from facts
|
|
32
|
+
results.each_fact do |fact|
|
|
33
|
+
relationship = fact_to_cypher(fact, results.entities, defined_nodes, lines)
|
|
34
|
+
lines << relationship if relationship
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
lines.join("\n")
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def entity_to_cypher(entity)
|
|
43
|
+
name = get_value(entity, :name)
|
|
44
|
+
return nil unless name
|
|
45
|
+
|
|
46
|
+
var = to_variable(name)
|
|
47
|
+
entity_kind = get_value(entity, :kind) || "Entity"
|
|
48
|
+
label = entity_kind.to_s.capitalize
|
|
49
|
+
|
|
50
|
+
props = { name: name }
|
|
51
|
+
|
|
52
|
+
# Add aliases if present
|
|
53
|
+
aliases = get_value(entity, :aliases)
|
|
54
|
+
if aliases && !aliases.empty?
|
|
55
|
+
alias_texts = aliases.map { |a| a.is_a?(Hash) ? a[:name] : a.to_s }
|
|
56
|
+
props[:aliases] = alias_texts
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
"(#{var}:#{label} #{format_props(props)})"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def fact_to_cypher(fact, entities, defined_nodes, lines)
|
|
63
|
+
text = get_value(fact, :text) || ""
|
|
64
|
+
return nil if text.empty?
|
|
65
|
+
|
|
66
|
+
mentions = get_value(fact, :entity_mentions) || []
|
|
67
|
+
|
|
68
|
+
# Find subject and object from mentions if available
|
|
69
|
+
subject_mention = mentions.find { |m| get_value(m, :mention_role) == "subject" }
|
|
70
|
+
object_mention = mentions.find { |m| get_value(m, :mention_role) != "subject" }
|
|
71
|
+
|
|
72
|
+
# Get subject - from mentions or parse from text
|
|
73
|
+
if subject_mention
|
|
74
|
+
subject_id = get_value(subject_mention, :entity_id)
|
|
75
|
+
subject_entity = entities[subject_id]
|
|
76
|
+
subject_name = subject_entity ? get_value(subject_entity, :name) : "Entity_#{subject_id}"
|
|
77
|
+
else
|
|
78
|
+
subject_name = extract_subject(text)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
return nil if subject_name.nil? || subject_name.empty?
|
|
82
|
+
|
|
83
|
+
subject_var = to_variable(subject_name)
|
|
84
|
+
|
|
85
|
+
# Ensure subject node is defined
|
|
86
|
+
unless defined_nodes.any? { |n| n.include?("(#{subject_var}:") }
|
|
87
|
+
node_def = "(#{subject_var}:Entity {name: \"#{escape_string(subject_name)}\"})"
|
|
88
|
+
lines << node_def
|
|
89
|
+
defined_nodes << node_def
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Build relationship properties
|
|
93
|
+
rel_props = {}
|
|
94
|
+
|
|
95
|
+
valid_at = get_value(fact, :valid_at)
|
|
96
|
+
rel_props[:since] = format_date(valid_at) if valid_at
|
|
97
|
+
|
|
98
|
+
invalid_at = get_value(fact, :invalid_at)
|
|
99
|
+
rel_props[:until] = format_date(invalid_at) if invalid_at
|
|
100
|
+
|
|
101
|
+
status = get_value(fact, :status)
|
|
102
|
+
rel_props[:status] = status if status
|
|
103
|
+
|
|
104
|
+
confidence = get_value(fact, :confidence)
|
|
105
|
+
rel_props[:confidence] = confidence if confidence
|
|
106
|
+
|
|
107
|
+
# Extract relationship type from fact text
|
|
108
|
+
rel_type = extract_relationship_type(text)
|
|
109
|
+
|
|
110
|
+
if object_mention
|
|
111
|
+
# Relationship to another entity
|
|
112
|
+
object_id = get_value(object_mention, :entity_id)
|
|
113
|
+
object_entity = entities[object_id]
|
|
114
|
+
object_name = object_entity ? get_value(object_entity, :name) : "Entity_#{object_id}"
|
|
115
|
+
object_var = to_variable(object_name)
|
|
116
|
+
|
|
117
|
+
# Ensure object node is defined
|
|
118
|
+
unless defined_nodes.any? { |n| n.include?("(#{object_var}:") }
|
|
119
|
+
node_def = "(#{object_var}:Entity {name: \"#{escape_string(object_name)}\"})"
|
|
120
|
+
lines << node_def
|
|
121
|
+
defined_nodes << node_def
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
props_str = rel_props.empty? ? "" : " #{format_props(rel_props)}"
|
|
125
|
+
"(#{subject_var})-[:#{rel_type}#{props_str}]->(#{object_var})"
|
|
126
|
+
else
|
|
127
|
+
# Relationship to a literal value
|
|
128
|
+
object_value = extract_object_value(text, subject_name)
|
|
129
|
+
props_str = rel_props.empty? ? "" : " #{format_props(rel_props)}"
|
|
130
|
+
"(#{subject_var})-[:#{rel_type}#{props_str}]->(\"#{escape_string(object_value)}\")"
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def extract_relationship_type(text)
|
|
135
|
+
if text.match?(/\bworks?\s+(at|for)\b/i)
|
|
136
|
+
"WORKS_AT"
|
|
137
|
+
elsif text.match?(/\bworked\s+(at|for)\b/i)
|
|
138
|
+
"WORKED_AT"
|
|
139
|
+
elsif text.match?(/\breports?\s+to\b/i)
|
|
140
|
+
"REPORTS_TO"
|
|
141
|
+
elsif text.match?(/\bis\s+(a|an|the)\b/i)
|
|
142
|
+
"IS_A"
|
|
143
|
+
elsif text.match?(/\bis\s+\w+/i)
|
|
144
|
+
"IS"
|
|
145
|
+
elsif text.match?(/\bhas\b/i)
|
|
146
|
+
"HAS"
|
|
147
|
+
elsif text.match?(/\bdecided\b/i)
|
|
148
|
+
"DECIDED"
|
|
149
|
+
elsif text.match?(/\bjoined\b/i)
|
|
150
|
+
"JOINED"
|
|
151
|
+
elsif text.match?(/\bleft\b/i)
|
|
152
|
+
"LEFT"
|
|
153
|
+
else
|
|
154
|
+
"RELATES_TO"
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def extract_subject(text)
|
|
159
|
+
words = text.split(/\s+/)
|
|
160
|
+
words.take_while { |w| !w.match?(/^(is|are|was|were|has|have|works|worked|reports)$/i) }.join(" ")
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def extract_object_value(text, subject)
|
|
164
|
+
remainder = text.sub(/^#{Regexp.escape(subject)}\s*/i, "")
|
|
165
|
+
remainder.sub(/^(is|are|was|were|has|have|works?|worked|reports?)\s+(at|for|to|a|an|the)?\s*/i, "")
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def format_props(props)
|
|
169
|
+
return "{}" if props.empty?
|
|
170
|
+
|
|
171
|
+
pairs = props.map do |k, v|
|
|
172
|
+
value = case v
|
|
173
|
+
when String then "\"#{escape_string(v)}\""
|
|
174
|
+
when Array then "[#{v.map { |e| "\"#{escape_string(e)}\"" }.join(", ")}]"
|
|
175
|
+
when nil then "null"
|
|
176
|
+
else v.to_s
|
|
177
|
+
end
|
|
178
|
+
"#{k}: #{value}"
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
"{#{pairs.join(", ")}}"
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Transformers
|
|
5
|
+
# JSON transformer - returns results as structured hash.
|
|
6
|
+
# This is the default pass-through format.
|
|
7
|
+
class JsonTransformer < Base
|
|
8
|
+
# Transform results to JSON-ready hash format.
|
|
9
|
+
#
|
|
10
|
+
# @param results [QueryResult] The query results
|
|
11
|
+
# @return [Hash] JSON-serializable hash
|
|
12
|
+
def transform(results)
|
|
13
|
+
results.to_h
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|