phronomy 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mutant.yml +8 -7
- data/CHANGELOG.md +151 -1
- data/README.md +170 -47
- data/Rakefile +33 -0
- data/benchmark/baseline.json +1 -1
- data/benchmark/bench_context_assembler.rb +2 -2
- data/benchmark/bench_regression.rb +6 -5
- data/benchmark/bench_token_estimator.rb +5 -5
- data/benchmark/bench_tool_schema.rb +1 -1
- data/benchmark/bench_vector_store.rb +1 -1
- data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +24 -0
- data/docs/decisions/006-no-built-in-guardrails.md +20 -2
- data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
- data/lib/phronomy/agent/base.rb +285 -137
- data/lib/phronomy/agent/checkpoint.rb +118 -0
- data/lib/phronomy/agent/concerns/suspendable.rb +15 -0
- data/lib/phronomy/agent/context/conversation/compaction_context.rb +117 -0
- data/lib/phronomy/agent/context/conversation/trigger_context.rb +43 -0
- data/lib/phronomy/agent/context/conversation/trim_context.rb +82 -0
- data/lib/phronomy/agent/context/instruction/prompt_template.rb +102 -0
- data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +45 -0
- data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +51 -0
- data/lib/phronomy/agent/context/knowledge/loader/base.rb +31 -0
- data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +62 -0
- data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +82 -0
- data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +28 -0
- data/lib/phronomy/agent/context/knowledge/source/base.rb +60 -0
- data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +102 -0
- data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +63 -0
- data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +58 -0
- data/lib/phronomy/agent/context/knowledge/splitter/base.rb +53 -0
- data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +57 -0
- data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +111 -0
- data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +116 -0
- data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +95 -0
- data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +109 -0
- data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +133 -0
- data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +198 -0
- data/lib/phronomy/agent/fsm.rb +42 -65
- data/lib/phronomy/agent/invocation_pipeline.rb +99 -0
- data/lib/phronomy/agent/lifecycle/fsm_session.rb +251 -0
- data/lib/phronomy/agent/lifecycle/phase_machine_builder.rb +249 -0
- data/lib/phronomy/agent/react_agent.rb +27 -14
- data/lib/phronomy/agent/runner.rb +2 -2
- data/lib/phronomy/agent/tool_executor.rb +108 -0
- data/lib/phronomy/concurrency/async_queue.rb +157 -0
- data/lib/phronomy/concurrency/blocking_adapter_pool.rb +443 -0
- data/lib/phronomy/concurrency/cancellation_scope.rb +125 -0
- data/lib/phronomy/concurrency/cancellation_token.rb +140 -0
- data/lib/phronomy/concurrency/concurrency_gate.rb +157 -0
- data/lib/phronomy/concurrency/deadline.rb +65 -0
- data/lib/phronomy/concurrency/gate_registry.rb +52 -0
- data/lib/phronomy/concurrency/pool_registry.rb +57 -0
- data/lib/phronomy/configuration.rb +142 -0
- data/lib/phronomy/context.rb +2 -8
- data/lib/phronomy/diagnostics.rb +62 -0
- data/lib/phronomy/embeddings.rb +2 -2
- data/lib/phronomy/eval/runner.rb +13 -9
- data/lib/phronomy/eval/scorer/llm_judge.rb +12 -1
- data/lib/phronomy/event_loop.rb +184 -46
- data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
- data/lib/phronomy/invocation_context.rb +152 -0
- data/lib/phronomy/knowledge_source.rb +0 -5
- data/lib/phronomy/llm_adapter/base.rb +104 -0
- data/lib/phronomy/llm_adapter/ruby_llm.rb +47 -0
- data/lib/phronomy/llm_adapter.rb +20 -0
- data/lib/phronomy/{context → llm_context_window}/assembler.rb +18 -3
- data/lib/phronomy/{context → llm_context_window}/context_version_cache.rb +1 -1
- data/lib/phronomy/{context → llm_context_window}/token_budget.rb +7 -4
- data/lib/phronomy/{context → llm_context_window}/token_estimator.rb +3 -3
- data/lib/phronomy/loader.rb +4 -4
- data/lib/phronomy/metrics.rb +38 -0
- data/lib/phronomy/{agent → multi_agent}/handoff.rb +2 -2
- data/lib/phronomy/{agent → multi_agent}/orchestrator.rb +151 -126
- data/lib/phronomy/multi_agent/parallel_tool_chat.rb +149 -0
- data/lib/phronomy/{agent → multi_agent}/team_coordinator.rb +2 -2
- data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
- data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
- data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
- data/lib/phronomy/runtime/scheduler.rb +98 -0
- data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
- data/lib/phronomy/runtime/task_registry.rb +48 -0
- data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
- data/lib/phronomy/runtime/timer_queue.rb +106 -0
- data/lib/phronomy/runtime/timer_service.rb +42 -0
- data/lib/phronomy/runtime.rb +389 -0
- data/lib/phronomy/splitter.rb +3 -3
- data/lib/phronomy/task/backend.rb +80 -0
- data/lib/phronomy/task/fiber_backend.rb +157 -0
- data/lib/phronomy/task/immediate_backend.rb +89 -0
- data/lib/phronomy/task/thread_backend.rb +84 -0
- data/lib/phronomy/task.rb +275 -0
- data/lib/phronomy/task_group.rb +265 -0
- data/lib/phronomy/testing/fake_clock.rb +109 -0
- data/lib/phronomy/testing/fake_scheduler.rb +104 -0
- data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
- data/lib/phronomy/testing.rb +12 -0
- data/lib/phronomy/tool/base.rb +156 -7
- data/lib/phronomy/tool/mcp_tool.rb +47 -16
- data/lib/phronomy/tool/scope_policy.rb +50 -0
- data/lib/phronomy/tracing/null_tracer.rb +3 -1
- data/lib/phronomy/tracing/open_telemetry_tracer.rb +34 -0
- data/lib/phronomy/vector_store.rb +2 -2
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow.rb +52 -5
- data/lib/phronomy/workflow_context.rb +37 -2
- data/lib/phronomy/workflow_runner.rb +28 -77
- data/lib/phronomy.rb +43 -0
- metadata +73 -33
- data/lib/phronomy/agent/parallel_tool_chat.rb +0 -92
- data/lib/phronomy/cancellation_token.rb +0 -92
- data/lib/phronomy/context/compaction_context.rb +0 -111
- data/lib/phronomy/context/trigger_context.rb +0 -39
- data/lib/phronomy/context/trim_context.rb +0 -75
- data/lib/phronomy/embeddings/base.rb +0 -22
- data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +0 -45
- data/lib/phronomy/fsm_session.rb +0 -201
- data/lib/phronomy/knowledge_source/base.rb +0 -36
- data/lib/phronomy/knowledge_source/entity_knowledge.rb +0 -96
- data/lib/phronomy/knowledge_source/rag_knowledge.rb +0 -57
- data/lib/phronomy/knowledge_source/static_knowledge.rb +0 -52
- data/lib/phronomy/loader/base.rb +0 -25
- data/lib/phronomy/loader/csv_loader.rb +0 -56
- data/lib/phronomy/loader/markdown_loader.rb +0 -76
- data/lib/phronomy/loader/plain_text_loader.rb +0 -22
- data/lib/phronomy/prompt_template.rb +0 -96
- data/lib/phronomy/splitter/base.rb +0 -47
- data/lib/phronomy/splitter/fixed_size_splitter.rb +0 -51
- data/lib/phronomy/splitter/recursive_splitter.rb +0 -105
- data/lib/phronomy/vector_store/base.rb +0 -82
- data/lib/phronomy/vector_store/in_memory.rb +0 -93
- data/lib/phronomy/vector_store/pgvector.rb +0 -127
- data/lib/phronomy/vector_store/redis_search.rb +0 -192
data/lib/phronomy/fsm_session.rb
DELETED
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
# Event-driven execution wrapper for a single workflow run.
|
|
5
|
-
#
|
|
6
|
-
# Created by WorkflowRunner and registered with EventLoop. All public methods
|
|
7
|
-
# are called from the EventLoop thread — FSMSession is NOT thread-safe and must
|
|
8
|
-
# not be accessed concurrently from multiple threads.
|
|
9
|
-
#
|
|
10
|
-
# == Lifecycle
|
|
11
|
-
#
|
|
12
|
-
# register(session) → EventLoop posts :start → session.start
|
|
13
|
-
# ↓ (auto-transition present)
|
|
14
|
-
# EventLoop posts :state_completed → session.handle
|
|
15
|
-
# ↓ (repeat)
|
|
16
|
-
# session posts :finished or :halted
|
|
17
|
-
# ↓
|
|
18
|
-
# EventLoop pushes ctx to completion_queue → caller unblocks
|
|
19
|
-
#
|
|
20
|
-
# == Async IO pattern (EventLoop mode only)
|
|
21
|
-
#
|
|
22
|
-
# When a state has no auto-transition and is not a wait_state, but has an
|
|
23
|
-
# external event registered (e.g. +transition from: :fetching, on: :fetch_done+),
|
|
24
|
-
# the FSMSession stays registered in the EventLoop and waits for that event.
|
|
25
|
-
# The entry action is expected to spawn an IO thread that posts the event back:
|
|
26
|
-
#
|
|
27
|
-
# entry :fetching, ->(ctx) {
|
|
28
|
-
# Thread.new {
|
|
29
|
-
# ctx.result = http.get(ctx.url)
|
|
30
|
-
# Phronomy::EventLoop.instance.post(
|
|
31
|
-
# Phronomy::Event.new(type: :fetch_done, target_id: ctx.thread_id, payload: nil)
|
|
32
|
-
# )
|
|
33
|
-
# }
|
|
34
|
-
# }
|
|
35
|
-
# transition from: :fetching, on: :fetch_done, to: :process
|
|
36
|
-
class FSMSession
|
|
37
|
-
FINISH = WorkflowRunner::FINISH
|
|
38
|
-
|
|
39
|
-
# @return [String] workflow thread_id (matches WorkflowContext#thread_id)
|
|
40
|
-
attr_reader :id
|
|
41
|
-
|
|
42
|
-
# @param id [String]
|
|
43
|
-
# @param context [Object] includes Phronomy::WorkflowContext
|
|
44
|
-
# @param entry_point [Symbol] initial state name
|
|
45
|
-
# @param entry_actions [Hash] { state_name => [callable, ...] }
|
|
46
|
-
# @param auto_state_set [Hash] { state_name => true }
|
|
47
|
-
# @param declared_states [Array<Symbol>] all action state names
|
|
48
|
-
# @param wait_state_names [Array<Symbol>]
|
|
49
|
-
# @param external_events [Hash] { event_name => [{from:, to:, guard:}] }
|
|
50
|
-
# @param phase_machine_class [Class] state_machines-backed phase tracker class
|
|
51
|
-
# @param recursion_limit [Integer]
|
|
52
|
-
# @param resume_event [Symbol, nil] external event to fire when resuming
|
|
53
|
-
# @param resume_phase [Symbol, nil] wait state name to resume from
|
|
54
|
-
# @api private
|
|
55
|
-
def initialize(id:, context:, entry_point:, entry_actions:, auto_state_set:,
|
|
56
|
-
declared_states:, wait_state_names:, external_events:, phase_machine_class:,
|
|
57
|
-
recursion_limit:, resume_event: nil, resume_phase: nil)
|
|
58
|
-
@id = id
|
|
59
|
-
@ctx = context
|
|
60
|
-
@entry_point = entry_point
|
|
61
|
-
@entry_actions = entry_actions
|
|
62
|
-
@auto_state_set = auto_state_set
|
|
63
|
-
@declared_states = declared_states
|
|
64
|
-
@wait_state_names = wait_state_names
|
|
65
|
-
@external_events = external_events
|
|
66
|
-
@phase_machine_class = phase_machine_class
|
|
67
|
-
@recursion_limit = recursion_limit
|
|
68
|
-
@resume_event = resume_event
|
|
69
|
-
@resume_phase = resume_phase
|
|
70
|
-
@step = 0
|
|
71
|
-
@done = false
|
|
72
|
-
@current_state = nil
|
|
73
|
-
@tracker = nil
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
# Begins workflow execution. Called by EventLoop on :start event.
|
|
77
|
-
def start
|
|
78
|
-
if @resume_event
|
|
79
|
-
# Resume from wait state: position tracker at the wait state, then fire the
|
|
80
|
-
# external event. state_machines fires before_transition (exit) and
|
|
81
|
-
# after_transition (entry) callbacks, so both actions execute here.
|
|
82
|
-
@current_state = @resume_phase
|
|
83
|
-
@tracker = build_tracker(@current_state)
|
|
84
|
-
@tracker.context = @ctx
|
|
85
|
-
fire_and_advance!(@resume_event)
|
|
86
|
-
else
|
|
87
|
-
# Fresh start: state_machines does not fire callbacks on initialization,
|
|
88
|
-
# so we invoke the entry action for the initial state manually.
|
|
89
|
-
@current_state = @entry_point
|
|
90
|
-
@tracker = build_tracker(@current_state)
|
|
91
|
-
@tracker.context = @ctx
|
|
92
|
-
(@entry_actions[@current_state] || []).each do |c|
|
|
93
|
-
result = c.call(@ctx)
|
|
94
|
-
@ctx = result if result.is_a?(Phronomy::WorkflowContext)
|
|
95
|
-
end
|
|
96
|
-
@tracker.context = @ctx
|
|
97
|
-
advance_or_halt
|
|
98
|
-
end
|
|
99
|
-
rescue => e
|
|
100
|
-
finish_with_error(e)
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
# Processes an event dispatched from EventLoop.
|
|
104
|
-
# Called for :state_completed and all user-defined external events.
|
|
105
|
-
#
|
|
106
|
-
# @param event [Phronomy::Event]
|
|
107
|
-
# @api private
|
|
108
|
-
def handle(event)
|
|
109
|
-
return if @done
|
|
110
|
-
|
|
111
|
-
fire_and_advance!(event.type)
|
|
112
|
-
rescue => e
|
|
113
|
-
finish_with_error(e)
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
private
|
|
117
|
-
|
|
118
|
-
# Fires event_name on the phase tracker, updates @current_state, then
|
|
119
|
-
# calls advance_or_halt to decide what to do next.
|
|
120
|
-
def fire_and_advance!(event_name)
|
|
121
|
-
if @step >= @recursion_limit
|
|
122
|
-
raise Phronomy::RecursionLimitError,
|
|
123
|
-
"Recursion limit (#{@recursion_limit}) exceeded"
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
fire_event!(@tracker, event_name, @current_state)
|
|
127
|
-
@ctx = @tracker.context
|
|
128
|
-
next_phase = @tracker.phase.to_sym
|
|
129
|
-
# When next_phase == @current_state, no transition matched → treat as terminal.
|
|
130
|
-
@current_state = (next_phase == @current_state) ? FINISH : next_phase
|
|
131
|
-
@step += 1
|
|
132
|
-
advance_or_halt
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
# Determines the next action after the FSM has entered @current_state.
|
|
136
|
-
def advance_or_halt
|
|
137
|
-
return finish! if @current_state == FINISH
|
|
138
|
-
|
|
139
|
-
if @wait_state_names.include?(@current_state)
|
|
140
|
-
return halt!
|
|
141
|
-
end
|
|
142
|
-
|
|
143
|
-
if @auto_state_set.key?(@current_state)
|
|
144
|
-
event_loop.post(Event.new(type: :state_completed, target_id: @id, payload: nil))
|
|
145
|
-
return
|
|
146
|
-
end
|
|
147
|
-
|
|
148
|
-
if has_external_event_from?(@current_state)
|
|
149
|
-
# Async IO pattern: the entry action spawned an IO thread that will post
|
|
150
|
-
# an external event back. Stay registered; do nothing here.
|
|
151
|
-
return
|
|
152
|
-
end
|
|
153
|
-
|
|
154
|
-
# No transition declared — validate the state is known, then treat as terminal.
|
|
155
|
-
unless @declared_states.include?(@current_state)
|
|
156
|
-
raise ArgumentError, "State #{@current_state.inspect} is not defined"
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
finish!
|
|
160
|
-
end
|
|
161
|
-
|
|
162
|
-
def finish!
|
|
163
|
-
@done = true
|
|
164
|
-
@ctx.set_graph_metadata(thread_id: @id, phase: :__end__)
|
|
165
|
-
event_loop.post(Event.new(type: :finished, target_id: @id, payload: @ctx))
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
def halt!
|
|
169
|
-
@done = true
|
|
170
|
-
@ctx.set_graph_metadata(thread_id: @id, phase: @current_state)
|
|
171
|
-
event_loop.post(Event.new(type: :halted, target_id: @id, payload: @ctx))
|
|
172
|
-
end
|
|
173
|
-
|
|
174
|
-
def finish_with_error(err)
|
|
175
|
-
@done = true
|
|
176
|
-
event_loop.post(Event.new(type: :error, target_id: @id, payload: err))
|
|
177
|
-
end
|
|
178
|
-
|
|
179
|
-
def fire_event!(tracker, event_name, from_state)
|
|
180
|
-
return if tracker.send(event_name)
|
|
181
|
-
|
|
182
|
-
raise ArgumentError,
|
|
183
|
-
"Transition from #{from_state.inspect} via event #{event_name.inspect} failed. " \
|
|
184
|
-
"Ensure at least one guard matches or add a fallback (no-guard) transition."
|
|
185
|
-
end
|
|
186
|
-
|
|
187
|
-
def has_external_event_from?(state)
|
|
188
|
-
@external_events.any? { |_, transitions| transitions.any? { |t| t[:from] == state } }
|
|
189
|
-
end
|
|
190
|
-
|
|
191
|
-
def build_tracker(from_state)
|
|
192
|
-
machine = @phase_machine_class.new
|
|
193
|
-
machine.instance_variable_set(:@phase, from_state.to_s)
|
|
194
|
-
machine
|
|
195
|
-
end
|
|
196
|
-
|
|
197
|
-
def event_loop
|
|
198
|
-
Phronomy::EventLoop.instance
|
|
199
|
-
end
|
|
200
|
-
end
|
|
201
|
-
end
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module KnowledgeSource
|
|
5
|
-
# Abstract base class for all KnowledgeSource implementations.
|
|
6
|
-
#
|
|
7
|
-
# Subclasses must implement #fetch(query:) and return an Array of chunk Hashes.
|
|
8
|
-
# Each chunk Hash must contain:
|
|
9
|
-
# :content [String] the text to inject into the context
|
|
10
|
-
# :type [Symbol] semantic tag (e.g. :static, :rag, :entity)
|
|
11
|
-
class Base
|
|
12
|
-
# Retrieve knowledge chunks relevant to the given query.
|
|
13
|
-
#
|
|
14
|
-
# @param query [String, nil] the current user input used to select relevant chunks
|
|
15
|
-
# @param cancellation_token [Phronomy::CancellationToken, nil] optional token; raises CancellationError when cancelled
|
|
16
|
-
# @return [Array<Hash>] array of { content: String, type: Symbol }
|
|
17
|
-
# @api public
|
|
18
|
-
def fetch(query: nil, cancellation_token: nil)
|
|
19
|
-
cancellation_token&.raise_if_cancelled!
|
|
20
|
-
raise NotImplementedError, "#{self.class}#fetch is not implemented"
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Returns true when this source's content is considered static (i.e. does
|
|
24
|
-
# not change between agent invocations). Static sources are eligible for
|
|
25
|
-
# fingerprint-based caching in ContextVersionCache.
|
|
26
|
-
#
|
|
27
|
-
# Override in subclasses that return fixed content.
|
|
28
|
-
#
|
|
29
|
-
# @return [Boolean]
|
|
30
|
-
# @api public
|
|
31
|
-
def static?
|
|
32
|
-
false
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
end
|
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module KnowledgeSource
|
|
5
|
-
# A KnowledgeSource that extracts named-entity facts from conversation history.
|
|
6
|
-
#
|
|
7
|
-
# This is the knowledge-injection counterpart of the old EntityMemory.
|
|
8
|
-
# It scans saved user messages with a regex heuristic (no LLM call) and
|
|
9
|
-
# returns the discovered facts as a single knowledge chunk tagged :entity.
|
|
10
|
-
#
|
|
11
|
-
# EntityKnowledge is stateful: it accumulates extracted facts via #update(messages:)
|
|
12
|
-
# which should be called each time new messages are saved.
|
|
13
|
-
#
|
|
14
|
-
# Supported extraction patterns (case-insensitive):
|
|
15
|
-
# "my name is Alice" → { name: "Alice" }
|
|
16
|
-
# "I am Alice" → { identity: "Alice" }
|
|
17
|
-
# "I'm a software engineer" → { occupation: "software engineer" }
|
|
18
|
-
# "I work at / for Acme" → { workplace: "Acme" }
|
|
19
|
-
# "I live in Tokyo" → { location: "Tokyo" }
|
|
20
|
-
# "I'm from Tokyo" → { location: "Tokyo" }
|
|
21
|
-
# "I like / love Ruby" → { preference: "Ruby" }
|
|
22
|
-
#
|
|
23
|
-
# @example
|
|
24
|
-
# ks = Phronomy::KnowledgeSource::EntityKnowledge.new
|
|
25
|
-
# ks.update(messages: chat_messages)
|
|
26
|
-
# agent.invoke("What is my name?", config: { knowledge_sources: [ks] })
|
|
27
|
-
class EntityKnowledge < Base
|
|
28
|
-
PATTERNS = [
|
|
29
|
-
[:name, /\bmy name is\s+([A-Za-z][A-Za-z0-9 \-']*)/i],
|
|
30
|
-
[:identity, /\bI\s+am\s+([A-Z][A-Za-z0-9 \-']+)/],
|
|
31
|
-
[:occupation, /\bI(?:'m| am) a(?:n)?\s+([A-Za-z][A-Za-z0-9 \-']*)/i],
|
|
32
|
-
[:workplace, /\bI (?:work|worked) (?:at|for|in)\s+([A-Za-z0-9][A-Za-z0-9 \-'.&,]*)/i],
|
|
33
|
-
[:location, /\bI live in\s+([A-Za-z][A-Za-z0-9 \-']*)/i],
|
|
34
|
-
[:location, /\bI(?:'m| am) from\s+([A-Za-z][A-Za-z0-9 \-']*)/i],
|
|
35
|
-
[:preference, /\bI (?:like|love|enjoy)\s+([A-Za-z][A-Za-z0-9 \-']*)/i]
|
|
36
|
-
].freeze
|
|
37
|
-
|
|
38
|
-
def initialize
|
|
39
|
-
@entities = {}
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
# Scan messages and accumulate entity facts.
|
|
43
|
-
# Call this after saving a new set of messages (e.g. from a ConversationManager save hook).
|
|
44
|
-
#
|
|
45
|
-
# @param messages [Array] message objects responding to #role and #content
|
|
46
|
-
# @api public
|
|
47
|
-
def update(messages:)
|
|
48
|
-
messages.each do |msg|
|
|
49
|
-
next unless msg.role.to_sym == :user
|
|
50
|
-
|
|
51
|
-
extract(msg.content.to_s).each { |key, value| @entities[key] = value }
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Returns a single chunk containing all known entity facts in XML context format.
|
|
56
|
-
# Returns an empty array when no entities have been discovered.
|
|
57
|
-
#
|
|
58
|
-
# @param query [String, nil] unused — entity knowledge is always fully injected
|
|
59
|
-
# @param cancellation_token [Phronomy::CancellationToken, nil] optional; raises CancellationError when cancelled
|
|
60
|
-
# @return [Array<Hash>]
|
|
61
|
-
# @api public
|
|
62
|
-
def fetch(query: nil, cancellation_token: nil)
|
|
63
|
-
cancellation_token&.raise_if_cancelled!
|
|
64
|
-
return [] if @entities.empty?
|
|
65
|
-
|
|
66
|
-
lines = @entities.map { |key, value| "- #{key}: #{value}" }.join("\n")
|
|
67
|
-
content = <<~CONTENT.chomp
|
|
68
|
-
Known facts about the user:
|
|
69
|
-
#{lines}
|
|
70
|
-
CONTENT
|
|
71
|
-
[{content: content, type: :entity}]
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
# Returns the current entity store (primarily for testing).
|
|
75
|
-
#
|
|
76
|
-
# @return [Hash]
|
|
77
|
-
# @api public
|
|
78
|
-
def entities
|
|
79
|
-
@entities.dup
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
private
|
|
83
|
-
|
|
84
|
-
def extract(text)
|
|
85
|
-
found = {}
|
|
86
|
-
PATTERNS.each do |key, pattern|
|
|
87
|
-
if (match = text.match(pattern))
|
|
88
|
-
value = match[1].strip.sub(/[.!?]\s+.*$/, "").gsub(/[.,;!?]+$/, "")
|
|
89
|
-
found[key] = value unless value.empty?
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
found
|
|
93
|
-
end
|
|
94
|
-
end
|
|
95
|
-
end
|
|
96
|
-
end
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module KnowledgeSource
|
|
5
|
-
# A KnowledgeSource that retrieves semantically relevant chunks from a VectorStore.
|
|
6
|
-
#
|
|
7
|
-
# On each #fetch call, the query is embedded and the k nearest documents are
|
|
8
|
-
# returned as knowledge chunks.
|
|
9
|
-
#
|
|
10
|
-
# @example
|
|
11
|
-
# store = Phronomy::VectorStore::InMemory.new
|
|
12
|
-
# embeddings = Phronomy::Embeddings::RubyLLMEmbeddings.new(model: "text-embedding-3-small")
|
|
13
|
-
# ks = Phronomy::KnowledgeSource::RAGKnowledge.new(
|
|
14
|
-
# store: store,
|
|
15
|
-
# embeddings: embeddings,
|
|
16
|
-
# k: 5
|
|
17
|
-
# )
|
|
18
|
-
class RAGKnowledge < Base
|
|
19
|
-
# @param store [Phronomy::VectorStore::Base] vector store holding documents
|
|
20
|
-
# @param embeddings [Phronomy::Embeddings::Base] embeddings adapter
|
|
21
|
-
# @param k [Integer] number of chunks to retrieve
|
|
22
|
-
# @param type [Symbol] semantic tag (default :rag)
|
|
23
|
-
# @param source [String, nil] default source label; falls back to
|
|
24
|
-
# each document's :source metadata when nil
|
|
25
|
-
# @api public
|
|
26
|
-
def initialize(store:, embeddings:, k: 5, type: :rag, source: nil)
|
|
27
|
-
@store = store
|
|
28
|
-
@embeddings = embeddings
|
|
29
|
-
@k = k
|
|
30
|
-
@type = type
|
|
31
|
-
@source = source
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# Embed the query and retrieve the k nearest chunks from the vector store.
|
|
35
|
-
#
|
|
36
|
-
# Returns an empty array when query is nil or blank.
|
|
37
|
-
#
|
|
38
|
-
# @param query [String, nil]
|
|
39
|
-
# @param cancellation_token [Phronomy::CancellationToken, nil] optional; raises CancellationError when cancelled
|
|
40
|
-
# @return [Array<Hash>]
|
|
41
|
-
# @api public
|
|
42
|
-
def fetch(query: nil, cancellation_token: nil)
|
|
43
|
-
cancellation_token&.raise_if_cancelled!
|
|
44
|
-
return [] if query.nil? || query.strip.empty?
|
|
45
|
-
|
|
46
|
-
vector = @embeddings.embed(query, cancellation_token)
|
|
47
|
-
results = @store.search(query_embedding: vector, k: @k, cancellation_token: cancellation_token)
|
|
48
|
-
results.map do |doc|
|
|
49
|
-
chunk = {content: doc[:metadata][:content], type: @type}
|
|
50
|
-
src = @source || doc[:metadata][:source]
|
|
51
|
-
chunk[:source] = src if src
|
|
52
|
-
chunk
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
end
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module KnowledgeSource
|
|
5
|
-
# A KnowledgeSource backed by fixed text provided at construction time.
|
|
6
|
-
#
|
|
7
|
-
# Useful for injecting static documents, policy files, or configuration
|
|
8
|
-
# knowledge that does not change per request.
|
|
9
|
-
#
|
|
10
|
-
# @example
|
|
11
|
-
# ks = Phronomy::KnowledgeSource::StaticKnowledge.new(
|
|
12
|
-
# "Our refund policy: ...",
|
|
13
|
-
# type: :policy
|
|
14
|
-
# )
|
|
15
|
-
# agent.invoke("What is the refund policy?", config: { knowledge_sources: [ks] })
|
|
16
|
-
class StaticKnowledge < Base
|
|
17
|
-
# @param text [String] the static knowledge text to inject
|
|
18
|
-
# @param type [Symbol] semantic tag for the chunk (default :static)
|
|
19
|
-
# @param source [String, nil] label identifying where this knowledge came from
|
|
20
|
-
# (e.g. a filename). Included in the context XML tag and exposed to the LLM
|
|
21
|
-
# so that agents can produce grounded citations.
|
|
22
|
-
# @api public
|
|
23
|
-
def initialize(text, type: :static, source: nil)
|
|
24
|
-
@text = text.to_s
|
|
25
|
-
@type = type
|
|
26
|
-
@source = source
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# Returns the fixed text as a single chunk, regardless of query.
|
|
30
|
-
#
|
|
31
|
-
# @param query [String, nil] ignored for static knowledge
|
|
32
|
-
# @param cancellation_token [Phronomy::CancellationToken, nil] optional; raises CancellationError when cancelled
|
|
33
|
-
# @return [Array<Hash>]
|
|
34
|
-
# @api public
|
|
35
|
-
def fetch(query: nil, cancellation_token: nil)
|
|
36
|
-
cancellation_token&.raise_if_cancelled!
|
|
37
|
-
return [] if @text.empty?
|
|
38
|
-
|
|
39
|
-
chunk = {content: @text, type: @type}
|
|
40
|
-
chunk[:source] = @source if @source
|
|
41
|
-
[chunk]
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
# Static knowledge content never changes between invocations.
|
|
45
|
-
# @return [true]
|
|
46
|
-
# @api public
|
|
47
|
-
def static?
|
|
48
|
-
true
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
end
|
data/lib/phronomy/loader/base.rb
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Loader
|
|
5
|
-
# Abstract base class for document loaders.
|
|
6
|
-
#
|
|
7
|
-
# A loader converts an external source (file path, URL, etc.) into an
|
|
8
|
-
# Array of document hashes understood by the rest of the pipeline:
|
|
9
|
-
#
|
|
10
|
-
# [{ text: String, metadata: Hash }, ...]
|
|
11
|
-
#
|
|
12
|
-
# Subclasses must implement {#load}.
|
|
13
|
-
class Base
|
|
14
|
-
# Load documents from +source+ and return an array of document hashes.
|
|
15
|
-
#
|
|
16
|
-
# @param source [String] file path, URL, or other source identifier
|
|
17
|
-
# @return [Array<Hash>] array of <tt>{ text: String, metadata: Hash }</tt>
|
|
18
|
-
# @raise [NotImplementedError] when not overridden by a subclass
|
|
19
|
-
# @api public
|
|
20
|
-
def load(source)
|
|
21
|
-
raise NotImplementedError, "#{self.class}#load is not implemented"
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
end
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "csv"
|
|
4
|
-
|
|
5
|
-
module Phronomy
|
|
6
|
-
module Loader
|
|
7
|
-
# Loads a CSV file, converting each row into a separate document.
|
|
8
|
-
#
|
|
9
|
-
# By default the first row is treated as a header and column names are
|
|
10
|
-
# available in the document metadata. The full row is serialised to
|
|
11
|
-
# a human-readable "key: value" string for embedding.
|
|
12
|
-
#
|
|
13
|
-
# @example
|
|
14
|
-
# loader = Phronomy::Loader::CsvLoader.new
|
|
15
|
-
# docs = loader.load("products.csv")
|
|
16
|
-
# # => [
|
|
17
|
-
# # { text: "name: Widget\nprice: 9.99", metadata: { source: "...", row: 1, name: "Widget", price: "9.99" } },
|
|
18
|
-
# # ...
|
|
19
|
-
# # ]
|
|
20
|
-
class CsvLoader < Base
|
|
21
|
-
# @param headers [Boolean] treat the first row as headers (default: true)
|
|
22
|
-
# @param text_column [String, nil] if set, use only this column as the document text
|
|
23
|
-
# @api public
|
|
24
|
-
def initialize(headers: true, text_column: nil)
|
|
25
|
-
@headers = headers
|
|
26
|
-
@text_column = text_column
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# @param source [String] path to a CSV file
|
|
30
|
-
# @return [Array<Hash>]
|
|
31
|
-
# @raise [Errno::ENOENT] if the file does not exist
|
|
32
|
-
# @api public
|
|
33
|
-
def load(source)
|
|
34
|
-
rows = CSV.read(source, headers: @headers, encoding: "UTF-8")
|
|
35
|
-
|
|
36
|
-
if @headers
|
|
37
|
-
rows.each_with_index.map do |row, idx|
|
|
38
|
-
row_hash = row.to_h
|
|
39
|
-
text = if @text_column
|
|
40
|
-
row_hash[@text_column].to_s
|
|
41
|
-
else
|
|
42
|
-
row_hash.map { |k, v| "#{k}: #{v}" }.join("\n")
|
|
43
|
-
end
|
|
44
|
-
metadata = row_hash.transform_keys(&:to_sym).merge(source: source, row: idx + 1)
|
|
45
|
-
{text: text, metadata: metadata}
|
|
46
|
-
end
|
|
47
|
-
else
|
|
48
|
-
rows.each_with_index.map do |row, idx|
|
|
49
|
-
text = row.join(", ")
|
|
50
|
-
{text: text, metadata: {source: source, row: idx + 1}}
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
end
|
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Loader
|
|
5
|
-
# Loads a Markdown file, optionally splitting on top-level headings.
|
|
6
|
-
#
|
|
7
|
-
# When +split_on_headings:+ is true (the default), each H1/H2 section
|
|
8
|
-
# becomes a separate document so that embeddings capture section semantics
|
|
9
|
-
# rather than the full file at once.
|
|
10
|
-
#
|
|
11
|
-
# @example Single document (heading split disabled)
|
|
12
|
-
# loader = Phronomy::Loader::MarkdownLoader.new(split_on_headings: false)
|
|
13
|
-
# docs = loader.load("README.md")
|
|
14
|
-
# # => [{ text: "# Title\n...", metadata: { source: "README.md" } }]
|
|
15
|
-
#
|
|
16
|
-
# @example Split per heading (default)
|
|
17
|
-
# loader = Phronomy::Loader::MarkdownLoader.new
|
|
18
|
-
# docs = loader.load("guide.md")
|
|
19
|
-
# # => [
|
|
20
|
-
# # { text: "# Section 1\n...", metadata: { source: "guide.md", section: "Section 1" } },
|
|
21
|
-
# # { text: "## Sub-section\n...", metadata: { source: "guide.md", section: "Sub-section" } },
|
|
22
|
-
# # ]
|
|
23
|
-
class MarkdownLoader < Base
|
|
24
|
-
HEADING_RE = /^(\#{1,6})\s+(.+)$/
|
|
25
|
-
|
|
26
|
-
# @param split_on_headings [Boolean] split on H1–H6 boundaries (default: true)
|
|
27
|
-
# @api public
|
|
28
|
-
def initialize(split_on_headings: true)
|
|
29
|
-
@split_on_headings = split_on_headings
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
# @param source [String] path to a Markdown file
|
|
33
|
-
# @return [Array<Hash>]
|
|
34
|
-
# @raise [Errno::ENOENT] if the file does not exist
|
|
35
|
-
# @api public
|
|
36
|
-
def load(source)
|
|
37
|
-
content = File.read(source, encoding: "UTF-8")
|
|
38
|
-
return [{text: content, metadata: {source: source}}] unless @split_on_headings
|
|
39
|
-
|
|
40
|
-
split_by_headings(content, source)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
private
|
|
44
|
-
|
|
45
|
-
def split_by_headings(content, source)
|
|
46
|
-
sections = []
|
|
47
|
-
current_lines = []
|
|
48
|
-
current_heading = nil
|
|
49
|
-
|
|
50
|
-
content.each_line do |line|
|
|
51
|
-
if (m = HEADING_RE.match(line.chomp))
|
|
52
|
-
flush_section(sections, current_lines, current_heading, source) if current_lines.any?
|
|
53
|
-
current_heading = m[2].strip
|
|
54
|
-
current_lines = [line]
|
|
55
|
-
else
|
|
56
|
-
current_lines << line
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
flush_section(sections, current_lines, current_heading, source) if current_lines.any?
|
|
61
|
-
|
|
62
|
-
# Fall back to single document if no headings were found
|
|
63
|
-
sections.empty? ? [{text: content, metadata: {source: source}}] : sections
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
def flush_section(sections, lines, heading, source)
|
|
67
|
-
text = lines.join
|
|
68
|
-
return if text.strip.empty?
|
|
69
|
-
|
|
70
|
-
metadata = {source: source}
|
|
71
|
-
metadata[:section] = heading if heading
|
|
72
|
-
sections << {text: text, metadata: metadata}
|
|
73
|
-
end
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
end
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Phronomy
|
|
4
|
-
module Loader
|
|
5
|
-
# Loads a plain-text file as a single document.
|
|
6
|
-
#
|
|
7
|
-
# @example
|
|
8
|
-
# loader = Phronomy::Loader::PlainTextLoader.new
|
|
9
|
-
# docs = loader.load("/path/to/file.txt")
|
|
10
|
-
# # => [{ text: "...", metadata: { source: "/path/to/file.txt" } }]
|
|
11
|
-
class PlainTextLoader < Base
|
|
12
|
-
# @param source [String] absolute or relative path to a text file
|
|
13
|
-
# @return [Array<Hash>] single-element array with the file contents
|
|
14
|
-
# @raise [Errno::ENOENT] if the file does not exist
|
|
15
|
-
# @api public
|
|
16
|
-
def load(source)
|
|
17
|
-
text = File.read(source, encoding: "UTF-8")
|
|
18
|
-
[{text: text, metadata: {source: source}}]
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|