phronomy 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +16 -16
  3. data/benchmark/bench_context_assembler.rb +2 -2
  4. data/benchmark/bench_regression.rb +5 -5
  5. data/benchmark/bench_token_estimator.rb +5 -5
  6. data/benchmark/bench_tool_schema.rb +1 -1
  7. data/benchmark/bench_vector_store.rb +1 -1
  8. data/lib/phronomy/agent/base.rb +86 -123
  9. data/lib/phronomy/agent/checkpoint.rb +118 -0
  10. data/lib/phronomy/agent/context/conversation/compaction_context.rb +117 -0
  11. data/lib/phronomy/agent/context/conversation/trigger_context.rb +43 -0
  12. data/lib/phronomy/agent/context/conversation/trim_context.rb +82 -0
  13. data/lib/phronomy/agent/context/instruction/prompt_template.rb +102 -0
  14. data/lib/phronomy/agent/context/knowledge/embeddings/base.rb +45 -0
  15. data/lib/phronomy/agent/context/knowledge/embeddings/ruby_llm_embeddings.rb +51 -0
  16. data/lib/phronomy/agent/context/knowledge/loader/base.rb +31 -0
  17. data/lib/phronomy/agent/context/knowledge/loader/csv_loader.rb +62 -0
  18. data/lib/phronomy/agent/context/knowledge/loader/markdown_loader.rb +82 -0
  19. data/lib/phronomy/agent/context/knowledge/loader/plain_text_loader.rb +28 -0
  20. data/lib/phronomy/agent/context/knowledge/source/base.rb +60 -0
  21. data/lib/phronomy/agent/context/knowledge/source/entity_knowledge.rb +102 -0
  22. data/lib/phronomy/agent/context/knowledge/source/rag_knowledge.rb +63 -0
  23. data/lib/phronomy/agent/context/knowledge/source/static_knowledge.rb +58 -0
  24. data/lib/phronomy/agent/context/knowledge/splitter/base.rb +53 -0
  25. data/lib/phronomy/agent/context/knowledge/splitter/fixed_size_splitter.rb +57 -0
  26. data/lib/phronomy/agent/context/knowledge/splitter/recursive_splitter.rb +111 -0
  27. data/lib/phronomy/agent/context/knowledge/vector_store/async_backend.rb +116 -0
  28. data/lib/phronomy/agent/context/knowledge/vector_store/base.rb +95 -0
  29. data/lib/phronomy/agent/context/knowledge/vector_store/in_memory.rb +109 -0
  30. data/lib/phronomy/agent/context/knowledge/vector_store/pgvector.rb +133 -0
  31. data/lib/phronomy/agent/context/knowledge/vector_store/redis_search.rb +198 -0
  32. data/lib/phronomy/agent/fsm.rb +1 -1
  33. data/lib/phronomy/agent/invocation_pipeline.rb +99 -0
  34. data/lib/phronomy/agent/lifecycle/fsm_session.rb +251 -0
  35. data/lib/phronomy/agent/lifecycle/phase_machine_builder.rb +249 -0
  36. data/lib/phronomy/agent/react_agent.rb +19 -14
  37. data/lib/phronomy/agent/runner.rb +2 -2
  38. data/lib/phronomy/agent/tool_executor.rb +108 -0
  39. data/lib/phronomy/concurrency/async_queue.rb +157 -0
  40. data/lib/phronomy/concurrency/blocking_adapter_pool.rb +443 -0
  41. data/lib/phronomy/concurrency/cancellation_scope.rb +125 -0
  42. data/lib/phronomy/concurrency/cancellation_token.rb +140 -0
  43. data/lib/phronomy/concurrency/concurrency_gate.rb +157 -0
  44. data/lib/phronomy/concurrency/deadline.rb +65 -0
  45. data/lib/phronomy/{runtime → concurrency}/gate_registry.rb +1 -1
  46. data/lib/phronomy/{runtime → concurrency}/pool_registry.rb +1 -1
  47. data/lib/phronomy/context.rb +2 -8
  48. data/lib/phronomy/embeddings.rb +2 -2
  49. data/lib/phronomy/eval/runner.rb +4 -0
  50. data/lib/phronomy/eval/scorer/llm_judge.rb +12 -1
  51. data/lib/phronomy/event_loop.rb +7 -7
  52. data/lib/phronomy/invocation_context.rb +3 -3
  53. data/lib/phronomy/knowledge_source.rb +0 -5
  54. data/lib/phronomy/llm_adapter/ruby_llm.rb +17 -11
  55. data/lib/phronomy/{context → llm_context_window}/assembler.rb +18 -3
  56. data/lib/phronomy/{context → llm_context_window}/context_version_cache.rb +1 -1
  57. data/lib/phronomy/{context → llm_context_window}/token_budget.rb +7 -4
  58. data/lib/phronomy/{context → llm_context_window}/token_estimator.rb +3 -3
  59. data/lib/phronomy/loader.rb +4 -4
  60. data/lib/phronomy/{agent → multi_agent}/handoff.rb +2 -2
  61. data/lib/phronomy/{agent → multi_agent}/orchestrator.rb +6 -6
  62. data/lib/phronomy/{agent → multi_agent}/parallel_tool_chat.rb +4 -4
  63. data/lib/phronomy/{agent → multi_agent}/team_coordinator.rb +2 -2
  64. data/lib/phronomy/runtime.rb +19 -4
  65. data/lib/phronomy/splitter.rb +3 -3
  66. data/lib/phronomy/task_group.rb +1 -1
  67. data/lib/phronomy/tool/base.rb +50 -9
  68. data/lib/phronomy/tracing/null_tracer.rb +3 -1
  69. data/lib/phronomy/vector_store.rb +2 -2
  70. data/lib/phronomy/version.rb +1 -1
  71. data/lib/phronomy/workflow_context.rb +8 -0
  72. data/lib/phronomy/workflow_runner.rb +11 -131
  73. data/lib/phronomy.rb +1 -0
  74. metadata +44 -42
  75. data/lib/phronomy/async_queue.rb +0 -155
  76. data/lib/phronomy/blocking_adapter_pool.rb +0 -435
  77. data/lib/phronomy/cancellation_scope.rb +0 -123
  78. data/lib/phronomy/cancellation_token.rb +0 -133
  79. data/lib/phronomy/concurrency_gate.rb +0 -155
  80. data/lib/phronomy/context/compaction_context.rb +0 -111
  81. data/lib/phronomy/context/trigger_context.rb +0 -39
  82. data/lib/phronomy/context/trim_context.rb +0 -75
  83. data/lib/phronomy/deadline.rb +0 -63
  84. data/lib/phronomy/embeddings/base.rb +0 -39
  85. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +0 -45
  86. data/lib/phronomy/fsm_session.rb +0 -247
  87. data/lib/phronomy/knowledge_source/base.rb +0 -54
  88. data/lib/phronomy/knowledge_source/entity_knowledge.rb +0 -96
  89. data/lib/phronomy/knowledge_source/rag_knowledge.rb +0 -57
  90. data/lib/phronomy/knowledge_source/static_knowledge.rb +0 -52
  91. data/lib/phronomy/loader/base.rb +0 -25
  92. data/lib/phronomy/loader/csv_loader.rb +0 -56
  93. data/lib/phronomy/loader/markdown_loader.rb +0 -76
  94. data/lib/phronomy/loader/plain_text_loader.rb +0 -22
  95. data/lib/phronomy/prompt_template.rb +0 -96
  96. data/lib/phronomy/splitter/base.rb +0 -47
  97. data/lib/phronomy/splitter/fixed_size_splitter.rb +0 -51
  98. data/lib/phronomy/splitter/recursive_splitter.rb +0 -105
  99. data/lib/phronomy/tool_executor.rb +0 -106
  100. data/lib/phronomy/vector_store/async_backend.rb +0 -110
  101. data/lib/phronomy/vector_store/base.rb +0 -89
  102. data/lib/phronomy/vector_store/in_memory.rb +0 -93
  103. data/lib/phronomy/vector_store/pgvector.rb +0 -127
  104. data/lib/phronomy/vector_store/redis_search.rb +0 -192
@@ -1,247 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Phronomy
4
- # Event-driven execution wrapper for a single workflow run.
5
- #
6
- # Created by WorkflowRunner and registered with EventLoop. All public methods
7
- # are called from the EventLoop thread — FSMSession is NOT thread-safe and must
8
- # not be accessed concurrently from multiple threads.
9
- #
10
- # == Lifecycle
11
- #
12
- # register(session) → EventLoop posts :start → session.start
13
- # ↓ (auto-transition present)
14
- # EventLoop posts :state_completed → session.handle
15
- # ↓ (repeat)
16
- # session posts :finished or :halted
17
- # ↓
18
- # EventLoop pushes ctx to completion_queue → caller unblocks
19
- #
20
- # == Async IO pattern (EventLoop mode only)
21
- #
22
- # When a state has no auto-transition and is not a wait_state, but has an
23
- # external event registered (e.g. +transition from: :fetching, on: :fetch_done+),
24
- # the FSMSession stays registered in the EventLoop and waits for that event.
25
- # The entry action is expected to spawn an IO thread that posts the event back:
26
- #
27
- # entry :fetching, ->(ctx) {
28
- # Thread.new {
29
- # ctx.result = http.get(ctx.url)
30
- # Phronomy::EventLoop.instance.post(
31
- # Phronomy::Event.new(type: :fetch_done, target_id: ctx.thread_id, payload: nil)
32
- # )
33
- # }
34
- # }
35
- # transition from: :fetching, on: :fetch_done, to: :process
36
- class FSMSession
37
- FINISH = WorkflowRunner::FINISH
38
-
39
- # @return [String] workflow thread_id (matches WorkflowContext#thread_id)
40
- attr_reader :id
41
-
42
- # @param id [String]
43
- # @param context [Object] includes Phronomy::WorkflowContext
44
- # @param entry_point [Symbol] initial state name
45
- # @param entry_actions [Hash] { state_name => [callable, ...] }
46
- # @param auto_state_set [Hash] { state_name => true }
47
- # @param declared_states [Array<Symbol>] all action state names
48
- # @param wait_state_names [Array<Symbol>]
49
- # @param external_events [Hash] { event_name => [{from:, to:, guard:}] }
50
- # @param phase_machine_class [Class] state_machines-backed phase tracker class
51
- # @param recursion_limit [Integer]
52
- # @param action_timeouts [Hash] { state_name => seconds }
53
- # @param resume_event [Symbol, nil] external event to fire when resuming
54
- # @param resume_phase [Symbol, nil] wait state name to resume from
55
- # @api private
56
- def initialize(id:, context:, entry_point:, entry_actions:, auto_state_set:,
57
- declared_states:, wait_state_names:, external_events:, phase_machine_class:,
58
- recursion_limit:, action_timeouts: {}, resume_event: nil, resume_phase: nil)
59
- @id = id
60
- @ctx = context
61
- @entry_point = entry_point
62
- @entry_actions = entry_actions
63
- @auto_state_set = auto_state_set
64
- @declared_states = declared_states
65
- @wait_state_names = wait_state_names
66
- @external_events = external_events
67
- @phase_machine_class = phase_machine_class
68
- @recursion_limit = recursion_limit
69
- @action_timeouts = action_timeouts
70
- @resume_event = resume_event
71
- @resume_phase = resume_phase
72
- @step = 0
73
- @done = false
74
- @current_state = nil
75
- @tracker = nil
76
- end
77
-
78
- # Begins workflow execution. Called by EventLoop on :start event.
79
- def start
80
- if @resume_event
81
- # Resume from wait state: position tracker at the wait state, then fire the
82
- # external event. state_machines fires before_transition (exit) and
83
- # after_transition (entry) callbacks, so both actions execute here.
84
- @current_state = @resume_phase
85
- @tracker = build_tracker(@current_state)
86
- @tracker.context = @ctx
87
- fire_and_advance!(@resume_event)
88
- else
89
- # Fresh start: state_machines does not fire callbacks on initialization,
90
- # so we invoke the entry action for the initial state manually.
91
- @current_state = @entry_point
92
- @tracker = build_tracker(@current_state)
93
- @tracker.context = @ctx
94
- (@entry_actions[@current_state] || []).each do |c|
95
- result = c.call(@ctx)
96
- if result.is_a?(Phronomy::Task)
97
- # Awaitable action: spawn a task to await without blocking EventLoop.
98
- @tracker.async_pending = true
99
- session_id = @id
100
- current_state_name = @current_state
101
- timeout_secs = @action_timeouts[current_state_name]
102
- Phronomy::Runtime.instance.spawn(name: "fsm-await-#{session_id}") do
103
- if timeout_secs
104
- if result.join(timeout_secs).nil?
105
- result.cancel!
106
- raise Phronomy::ActionTimeoutError,
107
- "Action in state #{current_state_name.inspect} timed out after #{timeout_secs}s"
108
- end
109
- end
110
- task_result = result.await
111
- if task_result.is_a?(Phronomy::WorkflowContext)
112
- event_loop.post(Event.new(type: :action_completed, target_id: session_id, payload: task_result))
113
- else
114
- event_loop.post(Event.new(type: :state_completed, target_id: session_id, payload: nil))
115
- end
116
- rescue => e
117
- event_loop.post(Event.new(type: :error, target_id: session_id, payload: e))
118
- end
119
- break # Only one async action at a time per state
120
- elsif result.is_a?(Phronomy::WorkflowContext)
121
- @ctx = result
122
- end
123
- end
124
- @tracker.context = @ctx
125
- advance_or_halt unless @tracker.async_pending
126
- end
127
- rescue => e
128
- finish_with_error(e)
129
- end
130
-
131
- # Processes an event dispatched from EventLoop.
132
- # Called for :state_completed, :action_completed, and all user-defined external events.
133
- #
134
- # @param event [Phronomy::Event]
135
- # @api private
136
- def handle(event)
137
- return if @done
138
-
139
- if event.type == :action_completed
140
- # An awaitable entry action completed: update context and advance.
141
- @ctx = event.payload if event.payload.is_a?(Phronomy::WorkflowContext)
142
- @tracker.context = @ctx
143
- @tracker.async_pending = false # Reset flag set by start or fire_and_advance!
144
- advance_or_halt
145
- return
146
- end
147
-
148
- fire_and_advance!(event.type)
149
- rescue => e
150
- finish_with_error(e)
151
- end
152
-
153
- private
154
-
155
- # Fires event_name on the phase tracker, updates @current_state, then
156
- # calls advance_or_halt to decide what to do next.
157
- def fire_and_advance!(event_name)
158
- if @step >= @recursion_limit
159
- raise Phronomy::RecursionLimitError,
160
- "Recursion limit (#{@recursion_limit}) exceeded"
161
- end
162
-
163
- fire_event!(@tracker, event_name, @current_state)
164
- @ctx = @tracker.context
165
- next_phase = @tracker.phase.to_sym
166
- # When next_phase == @current_state, no transition matched → treat as terminal.
167
- @current_state = (next_phase == @current_state) ? FINISH : next_phase
168
- @step += 1
169
-
170
- # If an entry action returned a Task, the after_transition callback set
171
- # async_pending = true and spawned a thread. Skip advance_or_halt — the
172
- # background thread will post :action_completed or :state_completed.
173
- if @tracker.async_pending
174
- @tracker.async_pending = false
175
- return
176
- end
177
-
178
- advance_or_halt
179
- end
180
-
181
- # Determines the next action after the FSM has entered @current_state.
182
- def advance_or_halt
183
- return finish! if @current_state == FINISH
184
-
185
- if @wait_state_names.include?(@current_state)
186
- return halt!
187
- end
188
-
189
- if @auto_state_set.key?(@current_state)
190
- event_loop.post(Event.new(type: :state_completed, target_id: @id, payload: nil))
191
- return
192
- end
193
-
194
- if has_external_event_from?(@current_state)
195
- # Async IO pattern: the entry action spawned an IO thread that will post
196
- # an external event back. Stay registered; do nothing here.
197
- return
198
- end
199
-
200
- # No transition declared — validate the state is known, then treat as terminal.
201
- unless @declared_states.include?(@current_state)
202
- raise ArgumentError, "State #{@current_state.inspect} is not defined"
203
- end
204
-
205
- finish!
206
- end
207
-
208
- def finish!
209
- @done = true
210
- @ctx.set_graph_metadata(thread_id: @id, phase: :__end__)
211
- event_loop.post(Event.new(type: :finished, target_id: @id, payload: @ctx))
212
- end
213
-
214
- def halt!
215
- @done = true
216
- @ctx.set_graph_metadata(thread_id: @id, phase: @current_state)
217
- event_loop.post(Event.new(type: :halted, target_id: @id, payload: @ctx))
218
- end
219
-
220
- def finish_with_error(err)
221
- @done = true
222
- event_loop.post(Event.new(type: :error, target_id: @id, payload: err))
223
- end
224
-
225
- def fire_event!(tracker, event_name, from_state)
226
- return if tracker.send(event_name)
227
-
228
- raise ArgumentError,
229
- "Transition from #{from_state.inspect} via event #{event_name.inspect} failed. " \
230
- "Ensure at least one guard matches or add a fallback (no-guard) transition."
231
- end
232
-
233
- def has_external_event_from?(state)
234
- @external_events.any? { |_, transitions| transitions.any? { |t| t[:from] == state } }
235
- end
236
-
237
- def build_tracker(from_state)
238
- machine = @phase_machine_class.new
239
- machine.instance_variable_set(:@phase, from_state.to_s)
240
- machine
241
- end
242
-
243
- def event_loop
244
- Phronomy::EventLoop.instance
245
- end
246
- end
247
- end
@@ -1,54 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Phronomy
4
- module KnowledgeSource
5
- # Abstract base class for all KnowledgeSource implementations.
6
- #
7
- # Subclasses must implement #fetch(query:) and return an Array of chunk Hashes.
8
- # Each chunk Hash must contain:
9
- # :content [String] the text to inject into the context
10
- # :type [Symbol] semantic tag (e.g. :static, :rag, :entity)
11
- class Base
12
- # Retrieve knowledge chunks relevant to the given query.
13
- #
14
- # @param query [String, nil] the current user input used to select relevant chunks
15
- # @param cancellation_token [Phronomy::CancellationToken, nil] optional token; raises CancellationError when cancelled
16
- # @return [Array<Hash>] array of { content: String, type: Symbol }
17
- # @api public
18
- def fetch(query: nil, cancellation_token: nil)
19
- cancellation_token&.raise_if_cancelled!
20
- raise NotImplementedError, "#{self.class}#fetch is not implemented"
21
- end
22
-
23
- # Submits a {#fetch} call to {BlockingAdapterPool} and returns a
24
- # {BlockingAdapterPool::PendingOperation}.
25
- # Callers can fan out multiple fetches in parallel and await them all.
26
- #
27
- # @param query [String, nil]
28
- # @param cancellation_token [Phronomy::CancellationToken, nil]
29
- # @param timeout [Numeric, nil] seconds before the operation is abandoned
30
- # @return [BlockingAdapterPool::PendingOperation]
31
- # @api public
32
- def fetch_async(query: nil, cancellation_token: nil, timeout: nil)
33
- Phronomy::Runtime.instance.blocking_io.submit(
34
- timeout: timeout,
35
- cancellation_token: cancellation_token
36
- ) do
37
- fetch(query: query, cancellation_token: cancellation_token)
38
- end
39
- end
40
-
41
- # Returns true when this source's content is considered static (i.e. does
42
- # not change between agent invocations). Static sources are eligible for
43
- # fingerprint-based caching in ContextVersionCache.
44
- #
45
- # Override in subclasses that return fixed content.
46
- #
47
- # @return [Boolean]
48
- # @api public
49
- def static?
50
- false
51
- end
52
- end
53
- end
54
- end
@@ -1,96 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Phronomy
4
- module KnowledgeSource
5
- # A KnowledgeSource that extracts named-entity facts from conversation history.
6
- #
7
- # This is the knowledge-injection counterpart of the old EntityMemory.
8
- # It scans saved user messages with a regex heuristic (no LLM call) and
9
- # returns the discovered facts as a single knowledge chunk tagged :entity.
10
- #
11
- # EntityKnowledge is stateful: it accumulates extracted facts via #update(messages:)
12
- # which should be called each time new messages are saved.
13
- #
14
- # Supported extraction patterns (case-insensitive):
15
- # "my name is Alice" → { name: "Alice" }
16
- # "I am Alice" → { identity: "Alice" }
17
- # "I'm a software engineer" → { occupation: "software engineer" }
18
- # "I work at / for Acme" → { workplace: "Acme" }
19
- # "I live in Tokyo" → { location: "Tokyo" }
20
- # "I'm from Tokyo" → { location: "Tokyo" }
21
- # "I like / love Ruby" → { preference: "Ruby" }
22
- #
23
- # @example
24
- # ks = Phronomy::KnowledgeSource::EntityKnowledge.new
25
- # ks.update(messages: chat_messages)
26
- # agent.invoke("What is my name?", config: { knowledge_sources: [ks] })
27
- class EntityKnowledge < Base
28
- PATTERNS = [
29
- [:name, /\bmy name is\s+([A-Za-z][A-Za-z0-9 \-']*)/i],
30
- [:identity, /\bI\s+am\s+([A-Z][A-Za-z0-9 \-']+)/],
31
- [:occupation, /\bI(?:'m| am) a(?:n)?\s+([A-Za-z][A-Za-z0-9 \-']*)/i],
32
- [:workplace, /\bI (?:work|worked) (?:at|for|in)\s+([A-Za-z0-9][A-Za-z0-9 \-'.&,]*)/i],
33
- [:location, /\bI live in\s+([A-Za-z][A-Za-z0-9 \-']*)/i],
34
- [:location, /\bI(?:'m| am) from\s+([A-Za-z][A-Za-z0-9 \-']*)/i],
35
- [:preference, /\bI (?:like|love|enjoy)\s+([A-Za-z][A-Za-z0-9 \-']*)/i]
36
- ].freeze
37
-
38
- def initialize
39
- @entities = {}
40
- end
41
-
42
- # Scan messages and accumulate entity facts.
43
- # Call this after saving a new set of messages (e.g. from a ConversationManager save hook).
44
- #
45
- # @param messages [Array] message objects responding to #role and #content
46
- # @api public
47
- def update(messages:)
48
- messages.each do |msg|
49
- next unless msg.role.to_sym == :user
50
-
51
- extract(msg.content.to_s).each { |key, value| @entities[key] = value }
52
- end
53
- end
54
-
55
- # Returns a single chunk containing all known entity facts in XML context format.
56
- # Returns an empty array when no entities have been discovered.
57
- #
58
- # @param query [String, nil] unused — entity knowledge is always fully injected
59
- # @param cancellation_token [Phronomy::CancellationToken, nil] optional; raises CancellationError when cancelled
60
- # @return [Array<Hash>]
61
- # @api public
62
- def fetch(query: nil, cancellation_token: nil)
63
- cancellation_token&.raise_if_cancelled!
64
- return [] if @entities.empty?
65
-
66
- lines = @entities.map { |key, value| "- #{key}: #{value}" }.join("\n")
67
- content = <<~CONTENT.chomp
68
- Known facts about the user:
69
- #{lines}
70
- CONTENT
71
- [{content: content, type: :entity}]
72
- end
73
-
74
- # Returns the current entity store (primarily for testing).
75
- #
76
- # @return [Hash]
77
- # @api public
78
- def entities
79
- @entities.dup
80
- end
81
-
82
- private
83
-
84
- def extract(text)
85
- found = {}
86
- PATTERNS.each do |key, pattern|
87
- if (match = text.match(pattern))
88
- value = match[1].strip.sub(/[.!?]\s+.*$/, "").gsub(/[.,;!?]+$/, "")
89
- found[key] = value unless value.empty?
90
- end
91
- end
92
- found
93
- end
94
- end
95
- end
96
- end
@@ -1,57 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Phronomy
4
- module KnowledgeSource
5
- # A KnowledgeSource that retrieves semantically relevant chunks from a VectorStore.
6
- #
7
- # On each #fetch call, the query is embedded and the k nearest documents are
8
- # returned as knowledge chunks.
9
- #
10
- # @example
11
- # store = Phronomy::VectorStore::InMemory.new
12
- # embeddings = Phronomy::Embeddings::RubyLLMEmbeddings.new(model: "text-embedding-3-small")
13
- # ks = Phronomy::KnowledgeSource::RAGKnowledge.new(
14
- # store: store,
15
- # embeddings: embeddings,
16
- # k: 5
17
- # )
18
- class RAGKnowledge < Base
19
- # @param store [Phronomy::VectorStore::Base] vector store holding documents
20
- # @param embeddings [Phronomy::Embeddings::Base] embeddings adapter
21
- # @param k [Integer] number of chunks to retrieve
22
- # @param type [Symbol] semantic tag (default :rag)
23
- # @param source [String, nil] default source label; falls back to
24
- # each document's :source metadata when nil
25
- # @api public
26
- def initialize(store:, embeddings:, k: 5, type: :rag, source: nil)
27
- @store = store
28
- @embeddings = embeddings
29
- @k = k
30
- @type = type
31
- @source = source
32
- end
33
-
34
- # Embed the query and retrieve the k nearest chunks from the vector store.
35
- #
36
- # Returns an empty array when query is nil or blank.
37
- #
38
- # @param query [String, nil]
39
- # @param cancellation_token [Phronomy::CancellationToken, nil] optional; raises CancellationError when cancelled
40
- # @return [Array<Hash>]
41
- # @api public
42
- def fetch(query: nil, cancellation_token: nil)
43
- cancellation_token&.raise_if_cancelled!
44
- return [] if query.nil? || query.strip.empty?
45
-
46
- vector = @embeddings.embed(query, cancellation_token)
47
- results = @store.search(query_embedding: vector, k: @k, cancellation_token: cancellation_token)
48
- results.map do |doc|
49
- chunk = {content: doc[:metadata][:content], type: @type}
50
- src = @source || doc[:metadata][:source]
51
- chunk[:source] = src if src
52
- chunk
53
- end
54
- end
55
- end
56
- end
57
- end
@@ -1,52 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Phronomy
4
- module KnowledgeSource
5
- # A KnowledgeSource backed by fixed text provided at construction time.
6
- #
7
- # Useful for injecting static documents, policy files, or configuration
8
- # knowledge that does not change per request.
9
- #
10
- # @example
11
- # ks = Phronomy::KnowledgeSource::StaticKnowledge.new(
12
- # "Our refund policy: ...",
13
- # type: :policy
14
- # )
15
- # agent.invoke("What is the refund policy?", config: { knowledge_sources: [ks] })
16
- class StaticKnowledge < Base
17
- # @param text [String] the static knowledge text to inject
18
- # @param type [Symbol] semantic tag for the chunk (default :static)
19
- # @param source [String, nil] label identifying where this knowledge came from
20
- # (e.g. a filename). Included in the context XML tag and exposed to the LLM
21
- # so that agents can produce grounded citations.
22
- # @api public
23
- def initialize(text, type: :static, source: nil)
24
- @text = text.to_s
25
- @type = type
26
- @source = source
27
- end
28
-
29
- # Returns the fixed text as a single chunk, regardless of query.
30
- #
31
- # @param query [String, nil] ignored for static knowledge
32
- # @param cancellation_token [Phronomy::CancellationToken, nil] optional; raises CancellationError when cancelled
33
- # @return [Array<Hash>]
34
- # @api public
35
- def fetch(query: nil, cancellation_token: nil)
36
- cancellation_token&.raise_if_cancelled!
37
- return [] if @text.empty?
38
-
39
- chunk = {content: @text, type: @type}
40
- chunk[:source] = @source if @source
41
- [chunk]
42
- end
43
-
44
- # Static knowledge content never changes between invocations.
45
- # @return [true]
46
- # @api public
47
- def static?
48
- true
49
- end
50
- end
51
- end
52
- end
@@ -1,25 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Phronomy
4
- module Loader
5
- # Abstract base class for document loaders.
6
- #
7
- # A loader converts an external source (file path, URL, etc.) into an
8
- # Array of document hashes understood by the rest of the pipeline:
9
- #
10
- # [{ text: String, metadata: Hash }, ...]
11
- #
12
- # Subclasses must implement {#load}.
13
- class Base
14
- # Load documents from +source+ and return an array of document hashes.
15
- #
16
- # @param source [String] file path, URL, or other source identifier
17
- # @return [Array<Hash>] array of <tt>{ text: String, metadata: Hash }</tt>
18
- # @raise [NotImplementedError] when not overridden by a subclass
19
- # @api public
20
- def load(source)
21
- raise NotImplementedError, "#{self.class}#load is not implemented"
22
- end
23
- end
24
- end
25
- end
@@ -1,56 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "csv"
4
-
5
- module Phronomy
6
- module Loader
7
- # Loads a CSV file, converting each row into a separate document.
8
- #
9
- # By default the first row is treated as a header and column names are
10
- # available in the document metadata. The full row is serialised to
11
- # a human-readable "key: value" string for embedding.
12
- #
13
- # @example
14
- # loader = Phronomy::Loader::CsvLoader.new
15
- # docs = loader.load("products.csv")
16
- # # => [
17
- # # { text: "name: Widget\nprice: 9.99", metadata: { source: "...", row: 1, name: "Widget", price: "9.99" } },
18
- # # ...
19
- # # ]
20
- class CsvLoader < Base
21
- # @param headers [Boolean] treat the first row as headers (default: true)
22
- # @param text_column [String, nil] if set, use only this column as the document text
23
- # @api public
24
- def initialize(headers: true, text_column: nil)
25
- @headers = headers
26
- @text_column = text_column
27
- end
28
-
29
- # @param source [String] path to a CSV file
30
- # @return [Array<Hash>]
31
- # @raise [Errno::ENOENT] if the file does not exist
32
- # @api public
33
- def load(source)
34
- rows = CSV.read(source, headers: @headers, encoding: "UTF-8")
35
-
36
- if @headers
37
- rows.each_with_index.map do |row, idx|
38
- row_hash = row.to_h
39
- text = if @text_column
40
- row_hash[@text_column].to_s
41
- else
42
- row_hash.map { |k, v| "#{k}: #{v}" }.join("\n")
43
- end
44
- metadata = row_hash.transform_keys(&:to_sym).merge(source: source, row: idx + 1)
45
- {text: text, metadata: metadata}
46
- end
47
- else
48
- rows.each_with_index.map do |row, idx|
49
- text = row.join(", ")
50
- {text: text, metadata: {source: source, row: idx + 1}}
51
- end
52
- end
53
- end
54
- end
55
- end
56
- end
@@ -1,76 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Phronomy
4
- module Loader
5
- # Loads a Markdown file, optionally splitting on top-level headings.
6
- #
7
- # When +split_on_headings:+ is true (the default), each H1/H2 section
8
- # becomes a separate document so that embeddings capture section semantics
9
- # rather than the full file at once.
10
- #
11
- # @example Single document (heading split disabled)
12
- # loader = Phronomy::Loader::MarkdownLoader.new(split_on_headings: false)
13
- # docs = loader.load("README.md")
14
- # # => [{ text: "# Title\n...", metadata: { source: "README.md" } }]
15
- #
16
- # @example Split per heading (default)
17
- # loader = Phronomy::Loader::MarkdownLoader.new
18
- # docs = loader.load("guide.md")
19
- # # => [
20
- # # { text: "# Section 1\n...", metadata: { source: "guide.md", section: "Section 1" } },
21
- # # { text: "## Sub-section\n...", metadata: { source: "guide.md", section: "Sub-section" } },
22
- # # ]
23
- class MarkdownLoader < Base
24
- HEADING_RE = /^(\#{1,6})\s+(.+)$/
25
-
26
- # @param split_on_headings [Boolean] split on H1–H6 boundaries (default: true)
27
- # @api public
28
- def initialize(split_on_headings: true)
29
- @split_on_headings = split_on_headings
30
- end
31
-
32
- # @param source [String] path to a Markdown file
33
- # @return [Array<Hash>]
34
- # @raise [Errno::ENOENT] if the file does not exist
35
- # @api public
36
- def load(source)
37
- content = File.read(source, encoding: "UTF-8")
38
- return [{text: content, metadata: {source: source}}] unless @split_on_headings
39
-
40
- split_by_headings(content, source)
41
- end
42
-
43
- private
44
-
45
- def split_by_headings(content, source)
46
- sections = []
47
- current_lines = []
48
- current_heading = nil
49
-
50
- content.each_line do |line|
51
- if (m = HEADING_RE.match(line.chomp))
52
- flush_section(sections, current_lines, current_heading, source) if current_lines.any?
53
- current_heading = m[2].strip
54
- current_lines = [line]
55
- else
56
- current_lines << line
57
- end
58
- end
59
-
60
- flush_section(sections, current_lines, current_heading, source) if current_lines.any?
61
-
62
- # Fall back to single document if no headings were found
63
- sections.empty? ? [{text: content, metadata: {source: source}}] : sections
64
- end
65
-
66
- def flush_section(sections, lines, heading, source)
67
- text = lines.join
68
- return if text.strip.empty?
69
-
70
- metadata = {source: source}
71
- metadata[:section] = heading if heading
72
- sections << {text: text, metadata: metadata}
73
- end
74
- end
75
- end
76
- end
@@ -1,22 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Phronomy
4
- module Loader
5
- # Loads a plain-text file as a single document.
6
- #
7
- # @example
8
- # loader = Phronomy::Loader::PlainTextLoader.new
9
- # docs = loader.load("/path/to/file.txt")
10
- # # => [{ text: "...", metadata: { source: "/path/to/file.txt" } }]
11
- class PlainTextLoader < Base
12
- # @param source [String] absolute or relative path to a text file
13
- # @return [Array<Hash>] single-element array with the file contents
14
- # @raise [Errno::ENOENT] if the file does not exist
15
- # @api public
16
- def load(source)
17
- text = File.read(source, encoding: "UTF-8")
18
- [{text: text, metadata: {source: source}}]
19
- end
20
- end
21
- end
22
- end