fact_db 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.envrc +1 -0
- data/CHANGELOG.md +48 -0
- data/COMMITS.md +196 -0
- data/README.md +102 -0
- data/Rakefile +41 -0
- data/db/migrate/001_enable_extensions.rb +7 -0
- data/db/migrate/002_create_contents.rb +44 -0
- data/db/migrate/003_create_entities.rb +36 -0
- data/db/migrate/004_create_entity_aliases.rb +18 -0
- data/db/migrate/005_create_facts.rb +65 -0
- data/db/migrate/006_create_entity_mentions.rb +18 -0
- data/db/migrate/007_create_fact_sources.rb +18 -0
- data/docs/api/extractors/index.md +71 -0
- data/docs/api/extractors/llm.md +162 -0
- data/docs/api/extractors/manual.md +92 -0
- data/docs/api/extractors/rule-based.md +165 -0
- data/docs/api/facts.md +300 -0
- data/docs/api/index.md +66 -0
- data/docs/api/models/content.md +165 -0
- data/docs/api/models/entity.md +202 -0
- data/docs/api/models/fact.md +270 -0
- data/docs/api/models/index.md +77 -0
- data/docs/api/pipeline/extraction.md +175 -0
- data/docs/api/pipeline/index.md +72 -0
- data/docs/api/pipeline/resolution.md +209 -0
- data/docs/api/services/content-service.md +166 -0
- data/docs/api/services/entity-service.md +202 -0
- data/docs/api/services/fact-service.md +223 -0
- data/docs/api/services/index.md +55 -0
- data/docs/architecture/database-schema.md +293 -0
- data/docs/architecture/entity-resolution.md +293 -0
- data/docs/architecture/index.md +149 -0
- data/docs/architecture/temporal-facts.md +268 -0
- data/docs/architecture/three-layer-model.md +242 -0
- data/docs/assets/css/custom.css +137 -0
- data/docs/assets/fact_db.jpg +0 -0
- data/docs/assets/images/fact_db.jpg +0 -0
- data/docs/concepts.md +183 -0
- data/docs/examples/basic-usage.md +235 -0
- data/docs/examples/hr-onboarding.md +312 -0
- data/docs/examples/index.md +64 -0
- data/docs/examples/news-analysis.md +288 -0
- data/docs/getting-started/database-setup.md +170 -0
- data/docs/getting-started/index.md +71 -0
- data/docs/getting-started/installation.md +98 -0
- data/docs/getting-started/quick-start.md +191 -0
- data/docs/guides/batch-processing.md +325 -0
- data/docs/guides/configuration.md +243 -0
- data/docs/guides/entity-management.md +364 -0
- data/docs/guides/extracting-facts.md +299 -0
- data/docs/guides/index.md +22 -0
- data/docs/guides/ingesting-content.md +252 -0
- data/docs/guides/llm-integration.md +299 -0
- data/docs/guides/temporal-queries.md +315 -0
- data/docs/index.md +121 -0
- data/examples/README.md +130 -0
- data/examples/basic_usage.rb +164 -0
- data/examples/entity_management.rb +216 -0
- data/examples/hr_system.rb +428 -0
- data/examples/rule_based_extraction.rb +258 -0
- data/examples/temporal_queries.rb +245 -0
- data/lib/fact_db/config.rb +71 -0
- data/lib/fact_db/database.rb +45 -0
- data/lib/fact_db/errors.rb +10 -0
- data/lib/fact_db/extractors/base.rb +117 -0
- data/lib/fact_db/extractors/llm_extractor.rb +179 -0
- data/lib/fact_db/extractors/manual_extractor.rb +53 -0
- data/lib/fact_db/extractors/rule_based_extractor.rb +228 -0
- data/lib/fact_db/llm/adapter.rb +109 -0
- data/lib/fact_db/models/content.rb +62 -0
- data/lib/fact_db/models/entity.rb +84 -0
- data/lib/fact_db/models/entity_alias.rb +26 -0
- data/lib/fact_db/models/entity_mention.rb +33 -0
- data/lib/fact_db/models/fact.rb +192 -0
- data/lib/fact_db/models/fact_source.rb +35 -0
- data/lib/fact_db/pipeline/extraction_pipeline.rb +146 -0
- data/lib/fact_db/pipeline/resolution_pipeline.rb +129 -0
- data/lib/fact_db/resolution/entity_resolver.rb +261 -0
- data/lib/fact_db/resolution/fact_resolver.rb +259 -0
- data/lib/fact_db/services/content_service.rb +93 -0
- data/lib/fact_db/services/entity_service.rb +150 -0
- data/lib/fact_db/services/fact_service.rb +193 -0
- data/lib/fact_db/temporal/query.rb +125 -0
- data/lib/fact_db/temporal/timeline.rb +134 -0
- data/lib/fact_db/version.rb +5 -0
- data/lib/fact_db.rb +141 -0
- data/mkdocs.yml +198 -0
- metadata +288 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Models
|
|
5
|
+
class Fact < ActiveRecord::Base
|
|
6
|
+
self.table_name = "fact_db_facts"
|
|
7
|
+
|
|
8
|
+
has_many :entity_mentions, class_name: "FactDb::Models::EntityMention",
|
|
9
|
+
foreign_key: :fact_id, dependent: :destroy
|
|
10
|
+
has_many :entities, through: :entity_mentions
|
|
11
|
+
|
|
12
|
+
has_many :fact_sources, class_name: "FactDb::Models::FactSource",
|
|
13
|
+
foreign_key: :fact_id, dependent: :destroy
|
|
14
|
+
has_many :source_contents, through: :fact_sources, source: :content
|
|
15
|
+
|
|
16
|
+
belongs_to :superseded_by, class_name: "FactDb::Models::Fact",
|
|
17
|
+
foreign_key: :superseded_by_id, optional: true
|
|
18
|
+
has_many :supersedes, class_name: "FactDb::Models::Fact",
|
|
19
|
+
foreign_key: :superseded_by_id
|
|
20
|
+
|
|
21
|
+
validates :fact_text, presence: true
|
|
22
|
+
validates :fact_hash, presence: true
|
|
23
|
+
validates :valid_at, presence: true
|
|
24
|
+
validates :status, presence: true
|
|
25
|
+
|
|
26
|
+
before_validation :generate_fact_hash, on: :create
|
|
27
|
+
|
|
28
|
+
# Fact statuses
|
|
29
|
+
STATUSES = %w[canonical superseded corroborated synthesized].freeze
|
|
30
|
+
EXTRACTION_METHODS = %w[manual llm rule_based].freeze
|
|
31
|
+
|
|
32
|
+
validates :status, inclusion: { in: STATUSES }
|
|
33
|
+
validates :extraction_method, inclusion: { in: EXTRACTION_METHODS }, allow_nil: true
|
|
34
|
+
|
|
35
|
+
# Core scopes
|
|
36
|
+
scope :canonical, -> { where(status: "canonical") }
|
|
37
|
+
scope :superseded, -> { where(status: "superseded") }
|
|
38
|
+
scope :synthesized, -> { where(status: "synthesized") }
|
|
39
|
+
|
|
40
|
+
# Temporal scopes - the heart of the Event Clock
|
|
41
|
+
scope :currently_valid, -> { where(invalid_at: nil) }
|
|
42
|
+
scope :historical, -> { where.not(invalid_at: nil) }
|
|
43
|
+
|
|
44
|
+
scope :valid_at, lambda { |date|
|
|
45
|
+
where("valid_at <= ?", date)
|
|
46
|
+
.where("invalid_at > ? OR invalid_at IS NULL", date)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
scope :valid_between, lambda { |from, to|
|
|
50
|
+
where("valid_at <= ? AND (invalid_at > ? OR invalid_at IS NULL)", to, from)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
scope :became_valid_between, lambda { |from, to|
|
|
54
|
+
where(valid_at: from..to)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
scope :became_invalid_between, lambda { |from, to|
|
|
58
|
+
where(invalid_at: from..to)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
# Entity filtering
|
|
62
|
+
scope :mentioning_entity, lambda { |entity_id|
|
|
63
|
+
joins(:entity_mentions).where(fact_db_entity_mentions: { entity_id: entity_id })
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
scope :with_role, lambda { |entity_id, role|
|
|
67
|
+
joins(:entity_mentions).where(
|
|
68
|
+
fact_db_entity_mentions: { entity_id: entity_id, mention_role: role }
|
|
69
|
+
)
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# Full-text search
|
|
73
|
+
scope :search_text, lambda { |query|
|
|
74
|
+
where("to_tsvector('english', fact_text) @@ plainto_tsquery('english', ?)", query)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Extraction method
|
|
78
|
+
scope :extracted_by, ->(method) { where(extraction_method: method) }
|
|
79
|
+
scope :by_extraction_method, ->(method) { where(extraction_method: method) }
|
|
80
|
+
|
|
81
|
+
# Confidence filtering
|
|
82
|
+
scope :high_confidence, -> { where("confidence >= ?", 0.9) }
|
|
83
|
+
scope :low_confidence, -> { where("confidence < ?", 0.5) }
|
|
84
|
+
|
|
85
|
+
def currently_valid?
|
|
86
|
+
invalid_at.nil?
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def valid_at?(date)
|
|
90
|
+
valid_at <= date && (invalid_at.nil? || invalid_at > date)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def duration
|
|
94
|
+
return nil if invalid_at.nil?
|
|
95
|
+
|
|
96
|
+
invalid_at - valid_at
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def duration_days
|
|
100
|
+
return nil if invalid_at.nil?
|
|
101
|
+
|
|
102
|
+
(invalid_at.to_date - valid_at.to_date).to_i
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def superseded?
|
|
106
|
+
status == "superseded"
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def synthesized?
|
|
110
|
+
status == "synthesized"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def invalidate!(at: Time.current)
|
|
114
|
+
update!(invalid_at: at)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def supersede_with!(new_fact_text, valid_at:)
|
|
118
|
+
transaction do
|
|
119
|
+
new_fact = self.class.create!(
|
|
120
|
+
fact_text: new_fact_text,
|
|
121
|
+
valid_at: valid_at,
|
|
122
|
+
status: "canonical",
|
|
123
|
+
extraction_method: extraction_method
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
update!(
|
|
127
|
+
status: "superseded",
|
|
128
|
+
superseded_by_id: new_fact.id,
|
|
129
|
+
invalid_at: valid_at
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
new_fact
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def add_mention(entity:, text:, role: nil, confidence: 1.0)
|
|
137
|
+
entity_mentions.find_or_create_by!(entity: entity, mention_text: text) do |m|
|
|
138
|
+
m.mention_role = role
|
|
139
|
+
m.confidence = confidence
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def add_source(content:, type: "primary", excerpt: nil, confidence: 1.0)
|
|
144
|
+
fact_sources.find_or_create_by!(content: content) do |s|
|
|
145
|
+
s.source_type = type
|
|
146
|
+
s.excerpt = excerpt
|
|
147
|
+
s.confidence = confidence
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Get source facts for synthesized facts
|
|
152
|
+
def source_facts
|
|
153
|
+
return Fact.none unless derived_from_ids.any?
|
|
154
|
+
|
|
155
|
+
Fact.where(id: derived_from_ids)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Get facts that corroborate this one
|
|
159
|
+
def corroborating_facts
|
|
160
|
+
return Fact.none unless corroborated_by_ids.any?
|
|
161
|
+
|
|
162
|
+
Fact.where(id: corroborated_by_ids)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Evidence chain - trace back to original content
|
|
166
|
+
def evidence_chain
|
|
167
|
+
sources = source_contents.to_a
|
|
168
|
+
|
|
169
|
+
if synthesized? && derived_from_ids.any?
|
|
170
|
+
source_facts.each do |source_fact|
|
|
171
|
+
sources.concat(source_fact.evidence_chain)
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
sources.uniq
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Vector similarity search
|
|
179
|
+
def self.nearest_neighbors(embedding, limit: 10)
|
|
180
|
+
return none unless embedding
|
|
181
|
+
|
|
182
|
+
order(Arel.sql("embedding <=> '#{embedding}'")).limit(limit)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
private
|
|
186
|
+
|
|
187
|
+
def generate_fact_hash
|
|
188
|
+
self.fact_hash = Digest::SHA256.hexdigest(fact_text) if fact_text.present?
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FactDb
|
|
4
|
+
module Models
|
|
5
|
+
class FactSource < ActiveRecord::Base
|
|
6
|
+
self.table_name = "fact_db_fact_sources"
|
|
7
|
+
|
|
8
|
+
belongs_to :fact, class_name: "FactDb::Models::Fact"
|
|
9
|
+
belongs_to :content, class_name: "FactDb::Models::Content"
|
|
10
|
+
|
|
11
|
+
validates :fact_id, uniqueness: { scope: :content_id }
|
|
12
|
+
|
|
13
|
+
# Source types
|
|
14
|
+
TYPES = %w[primary supporting corroborating].freeze
|
|
15
|
+
|
|
16
|
+
validates :source_type, inclusion: { in: TYPES }
|
|
17
|
+
|
|
18
|
+
scope :primary, -> { where(source_type: "primary") }
|
|
19
|
+
scope :supporting, -> { where(source_type: "supporting") }
|
|
20
|
+
scope :corroborating, -> { where(source_type: "corroborating") }
|
|
21
|
+
scope :high_confidence, -> { where("confidence >= ?", 0.9) }
|
|
22
|
+
|
|
23
|
+
def primary?
|
|
24
|
+
source_type == "primary"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def excerpt_preview(length: 100)
|
|
28
|
+
return nil if excerpt.nil?
|
|
29
|
+
return excerpt if excerpt.length <= length
|
|
30
|
+
|
|
31
|
+
"#{excerpt[0, length]}..."
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "simple_flow"
|
|
4
|
+
|
|
5
|
+
module FactDb
|
|
6
|
+
module Pipeline
|
|
7
|
+
# Pipeline for extracting facts from content using SimpleFlow
|
|
8
|
+
# Supports parallel processing of multiple content items
|
|
9
|
+
#
|
|
10
|
+
# @example Sequential extraction
|
|
11
|
+
# pipeline = ExtractionPipeline.new(config)
|
|
12
|
+
# results = pipeline.process([content1, content2], extractor: :llm)
|
|
13
|
+
#
|
|
14
|
+
# @example Parallel extraction
|
|
15
|
+
# pipeline = ExtractionPipeline.new(config)
|
|
16
|
+
# results = pipeline.process_parallel([content1, content2, content3], extractor: :llm)
|
|
17
|
+
#
|
|
18
|
+
class ExtractionPipeline
|
|
19
|
+
attr_reader :config
|
|
20
|
+
|
|
21
|
+
def initialize(config = FactDb.config)
|
|
22
|
+
@config = config
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Process multiple content items sequentially
|
|
26
|
+
#
|
|
27
|
+
# @param contents [Array<Models::Content>] Content records to process
|
|
28
|
+
# @param extractor [Symbol] Extractor type (:manual, :llm, :rule_based)
|
|
29
|
+
# @return [Array<Hash>] Results with extracted facts per content
|
|
30
|
+
def process(contents, extractor: config.default_extractor)
|
|
31
|
+
pipeline = build_extraction_pipeline(extractor)
|
|
32
|
+
|
|
33
|
+
contents.map do |content|
|
|
34
|
+
result = pipeline.call(SimpleFlow::Result.new(content))
|
|
35
|
+
{
|
|
36
|
+
content_id: content.id,
|
|
37
|
+
facts: result.success? ? result.value : [],
|
|
38
|
+
error: result.halted? ? result.error : nil
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Process multiple content items in parallel
|
|
44
|
+
# Uses SimpleFlow's parallel execution capabilities
|
|
45
|
+
#
|
|
46
|
+
# @param contents [Array<Models::Content>] Content records to process
|
|
47
|
+
# @param extractor [Symbol] Extractor type (:manual, :llm, :rule_based)
|
|
48
|
+
# @return [Array<Hash>] Results with extracted facts per content
|
|
49
|
+
def process_parallel(contents, extractor: config.default_extractor)
|
|
50
|
+
pipeline = build_parallel_pipeline(contents, extractor)
|
|
51
|
+
initial_result = SimpleFlow::Result.new(contents: contents, results: {})
|
|
52
|
+
|
|
53
|
+
final_result = pipeline.call(initial_result)
|
|
54
|
+
|
|
55
|
+
contents.map do |content|
|
|
56
|
+
result = final_result.value[:results][content.id]
|
|
57
|
+
{
|
|
58
|
+
content_id: content.id,
|
|
59
|
+
facts: result&.dig(:facts) || [],
|
|
60
|
+
error: result&.dig(:error)
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def build_extraction_pipeline(extractor)
|
|
68
|
+
extractor_instance = get_extractor(extractor)
|
|
69
|
+
|
|
70
|
+
SimpleFlow::Pipeline.new do
|
|
71
|
+
# Step 1: Validate content
|
|
72
|
+
step ->(result) {
|
|
73
|
+
content = result.value
|
|
74
|
+
if content.nil? || content.raw_text.blank?
|
|
75
|
+
result.halt("Content is empty or missing")
|
|
76
|
+
else
|
|
77
|
+
result.continue(content)
|
|
78
|
+
end
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Step 2: Extract facts
|
|
82
|
+
step ->(result) {
|
|
83
|
+
content = result.value
|
|
84
|
+
begin
|
|
85
|
+
facts = extractor_instance.extract(content)
|
|
86
|
+
result.continue(facts)
|
|
87
|
+
rescue StandardError => e
|
|
88
|
+
result.halt("Extraction failed: #{e.message}")
|
|
89
|
+
end
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# Step 3: Validate extracted facts
|
|
93
|
+
step ->(result) {
|
|
94
|
+
facts = result.value
|
|
95
|
+
valid_facts = facts.select { |f| f.valid? }
|
|
96
|
+
result.continue(valid_facts)
|
|
97
|
+
}
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def build_parallel_pipeline(contents, extractor)
|
|
102
|
+
extractor_instance = get_extractor(extractor)
|
|
103
|
+
|
|
104
|
+
SimpleFlow::Pipeline.new do
|
|
105
|
+
# Create a step for each content item
|
|
106
|
+
contents.each do |content|
|
|
107
|
+
step "extract_#{content.id}", depends_on: [] do |result|
|
|
108
|
+
begin
|
|
109
|
+
facts = extractor_instance.extract(content)
|
|
110
|
+
valid_facts = facts.select { |f| f.valid? }
|
|
111
|
+
|
|
112
|
+
new_results = result.value[:results].merge(
|
|
113
|
+
content.id => { facts: valid_facts, error: nil }
|
|
114
|
+
)
|
|
115
|
+
result.continue(result.value.merge(results: new_results))
|
|
116
|
+
rescue StandardError => e
|
|
117
|
+
new_results = result.value[:results].merge(
|
|
118
|
+
content.id => { facts: [], error: e.message }
|
|
119
|
+
)
|
|
120
|
+
result.continue(result.value.merge(results: new_results))
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Aggregate results
|
|
126
|
+
step "aggregate", depends_on: contents.map { |c| "extract_#{c.id}" } do |result|
|
|
127
|
+
result.continue(result.value)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def get_extractor(extractor)
|
|
133
|
+
case extractor.to_sym
|
|
134
|
+
when :manual
|
|
135
|
+
Extractors::ManualExtractor.new(config)
|
|
136
|
+
when :llm
|
|
137
|
+
Extractors::LLMExtractor.new(config)
|
|
138
|
+
when :rule_based
|
|
139
|
+
Extractors::RuleBasedExtractor.new(config)
|
|
140
|
+
else
|
|
141
|
+
raise ConfigurationError, "Unknown extractor: #{extractor}"
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "simple_flow"
|
|
4
|
+
|
|
5
|
+
module FactDb
|
|
6
|
+
module Pipeline
|
|
7
|
+
# Pipeline for resolving entities and facts using SimpleFlow
|
|
8
|
+
# Supports parallel resolution of multiple items
|
|
9
|
+
#
|
|
10
|
+
# @example Resolve entities in parallel
|
|
11
|
+
# pipeline = ResolutionPipeline.new(config)
|
|
12
|
+
# results = pipeline.resolve_entities(["John Smith", "Jane Doe", "Acme Corp"])
|
|
13
|
+
#
|
|
14
|
+
class ResolutionPipeline
|
|
15
|
+
attr_reader :config, :entity_resolver, :fact_resolver
|
|
16
|
+
|
|
17
|
+
def initialize(config = FactDb.config)
|
|
18
|
+
@config = config
|
|
19
|
+
@entity_resolver = Resolution::EntityResolver.new(config)
|
|
20
|
+
@fact_resolver = Resolution::FactResolver.new(config)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Resolve multiple entity names in parallel
|
|
24
|
+
#
|
|
25
|
+
# @param names [Array<String>] Entity names to resolve
|
|
26
|
+
# @param type [Symbol, nil] Entity type filter
|
|
27
|
+
# @return [Array<Hash>] Resolution results
|
|
28
|
+
def resolve_entities(names, type: nil)
|
|
29
|
+
pipeline = build_entity_resolution_pipeline(names, type)
|
|
30
|
+
initial_result = SimpleFlow::Result.new(names: names, resolved: {})
|
|
31
|
+
|
|
32
|
+
final_result = pipeline.call(initial_result)
|
|
33
|
+
|
|
34
|
+
names.map do |name|
|
|
35
|
+
resolution = final_result.value[:resolved][name]
|
|
36
|
+
{
|
|
37
|
+
name: name,
|
|
38
|
+
entity: resolution&.dig(:entity),
|
|
39
|
+
status: resolution&.dig(:status) || :failed,
|
|
40
|
+
error: resolution&.dig(:error)
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Find and resolve conflicts for multiple entities in parallel
|
|
46
|
+
#
|
|
47
|
+
# @param entity_ids [Array<Integer>] Entity IDs to check for conflicts
|
|
48
|
+
# @return [Array<Hash>] Conflict detection results
|
|
49
|
+
def detect_conflicts(entity_ids)
|
|
50
|
+
pipeline = build_conflict_detection_pipeline(entity_ids)
|
|
51
|
+
initial_result = SimpleFlow::Result.new(entity_ids: entity_ids, conflicts: {})
|
|
52
|
+
|
|
53
|
+
final_result = pipeline.call(initial_result)
|
|
54
|
+
|
|
55
|
+
entity_ids.map do |entity_id|
|
|
56
|
+
conflicts = final_result.value[:conflicts][entity_id]
|
|
57
|
+
{
|
|
58
|
+
entity_id: entity_id,
|
|
59
|
+
conflicts: conflicts || [],
|
|
60
|
+
conflict_count: conflicts&.size || 0
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def build_entity_resolution_pipeline(names, type)
|
|
68
|
+
resolver = @entity_resolver
|
|
69
|
+
|
|
70
|
+
SimpleFlow::Pipeline.new do
|
|
71
|
+
# Create parallel resolution steps
|
|
72
|
+
names.each do |name|
|
|
73
|
+
step "resolve_#{name.hash.abs}", depends_on: [] do |result|
|
|
74
|
+
begin
|
|
75
|
+
entity = resolver.resolve(name, type: type)
|
|
76
|
+
status = entity ? :resolved : :not_found
|
|
77
|
+
|
|
78
|
+
new_resolved = result.value[:resolved].merge(
|
|
79
|
+
name => { entity: entity, status: status, error: nil }
|
|
80
|
+
)
|
|
81
|
+
result.continue(result.value.merge(resolved: new_resolved))
|
|
82
|
+
rescue StandardError => e
|
|
83
|
+
new_resolved = result.value[:resolved].merge(
|
|
84
|
+
name => { entity: nil, status: :error, error: e.message }
|
|
85
|
+
)
|
|
86
|
+
result.continue(result.value.merge(resolved: new_resolved))
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Aggregate
|
|
92
|
+
step "aggregate", depends_on: names.map { |n| "resolve_#{n.hash.abs}" } do |result|
|
|
93
|
+
result.continue(result.value)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def build_conflict_detection_pipeline(entity_ids)
|
|
99
|
+
resolver = @fact_resolver
|
|
100
|
+
|
|
101
|
+
SimpleFlow::Pipeline.new do
|
|
102
|
+
# Create parallel conflict detection steps
|
|
103
|
+
entity_ids.each do |entity_id|
|
|
104
|
+
step "conflicts_#{entity_id}", depends_on: [] do |result|
|
|
105
|
+
begin
|
|
106
|
+
conflicts = resolver.find_conflicts(entity_id: entity_id)
|
|
107
|
+
|
|
108
|
+
new_conflicts = result.value[:conflicts].merge(
|
|
109
|
+
entity_id => conflicts
|
|
110
|
+
)
|
|
111
|
+
result.continue(result.value.merge(conflicts: new_conflicts))
|
|
112
|
+
rescue StandardError
|
|
113
|
+
new_conflicts = result.value[:conflicts].merge(
|
|
114
|
+
entity_id => []
|
|
115
|
+
)
|
|
116
|
+
result.continue(result.value.merge(conflicts: new_conflicts))
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Aggregate
|
|
122
|
+
step "aggregate", depends_on: entity_ids.map { |id| "conflicts_#{id}" } do |result|
|
|
123
|
+
result.continue(result.value)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|