phronomy 0.5.4 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mutant.yml +21 -0
- data/CHANGELOG.md +379 -0
- data/CONTRIBUTING.md +102 -0
- data/README.md +262 -48
- data/RELEASE_CHECKLIST.md +86 -0
- data/SECURITY.md +80 -0
- data/benchmark/baseline.json +9 -0
- data/benchmark/bench_agent_invoke.rb +105 -0
- data/benchmark/bench_context_assembler.rb +46 -0
- data/benchmark/bench_regression.rb +171 -0
- data/benchmark/bench_token_estimator.rb +44 -0
- data/benchmark/bench_tool_schema.rb +69 -0
- data/benchmark/bench_vector_store.rb +39 -0
- data/benchmark/bench_workflow.rb +55 -0
- data/benchmark/run_all.rb +118 -0
- data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
- data/docs/decisions/002-workflow-context-immutability.md +42 -0
- data/docs/decisions/003-event-loop-singleton.md +48 -0
- data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +51 -0
- data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
- data/docs/decisions/006-no-built-in-guardrails.md +48 -0
- data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
- data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
- data/docs/decisions/009-state-store-abstraction.md +141 -0
- data/lib/phronomy/agent/base.rb +281 -13
- data/lib/phronomy/agent/before_completion_context.rb +1 -0
- data/lib/phronomy/agent/checkpoint.rb +1 -0
- data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
- data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
- data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
- data/lib/phronomy/agent/concerns/retryable.rb +12 -1
- data/lib/phronomy/agent/concerns/suspendable.rb +4 -0
- data/lib/phronomy/agent/fsm.rb +180 -0
- data/lib/phronomy/agent/handoff.rb +3 -0
- data/lib/phronomy/agent/orchestrator.rb +123 -11
- data/lib/phronomy/agent/parallel_tool_chat.rb +92 -0
- data/lib/phronomy/agent/react_agent.rb +8 -6
- data/lib/phronomy/agent/runner.rb +2 -0
- data/lib/phronomy/agent/shared_state.rb +11 -0
- data/lib/phronomy/agent/suspend_signal.rb +2 -0
- data/lib/phronomy/agent/team_coordinator.rb +17 -5
- data/lib/phronomy/cancellation_token.rb +92 -0
- data/lib/phronomy/configuration.rb +32 -2
- data/lib/phronomy/context/assembler.rb +6 -0
- data/lib/phronomy/context/compaction_context.rb +2 -0
- data/lib/phronomy/context/context_version_cache.rb +2 -0
- data/lib/phronomy/context/token_budget.rb +3 -0
- data/lib/phronomy/context/token_estimator.rb +9 -2
- data/lib/phronomy/context/trigger_context.rb +1 -0
- data/lib/phronomy/context/trim_context.rb +4 -0
- data/lib/phronomy/context.rb +0 -1
- data/lib/phronomy/embeddings/base.rb +5 -2
- data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
- data/lib/phronomy/eval/comparison.rb +2 -0
- data/lib/phronomy/eval/dataset.rb +4 -0
- data/lib/phronomy/eval/metrics.rb +6 -0
- data/lib/phronomy/eval/runner.rb +2 -0
- data/lib/phronomy/eval/scorer/base.rb +1 -0
- data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
- data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
- data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
- data/lib/phronomy/event.rb +14 -0
- data/lib/phronomy/event_loop.rb +254 -0
- data/lib/phronomy/fsm_session.rb +201 -0
- data/lib/phronomy/generator_verifier.rb +24 -22
- data/lib/phronomy/guardrail/base.rb +3 -0
- data/lib/phronomy/guardrail.rb +0 -1
- data/lib/phronomy/knowledge_source/base.rb +6 -2
- data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
- data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
- data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
- data/lib/phronomy/loader/base.rb +1 -0
- data/lib/phronomy/loader/csv_loader.rb +2 -0
- data/lib/phronomy/loader/markdown_loader.rb +2 -0
- data/lib/phronomy/loader/plain_text_loader.rb +1 -0
- data/lib/phronomy/output_parser/base.rb +1 -0
- data/lib/phronomy/output_parser/json_parser.rb +22 -3
- data/lib/phronomy/output_parser/structured_parser.rb +2 -0
- data/lib/phronomy/prompt_template.rb +5 -0
- data/lib/phronomy/runnable.rb +20 -3
- data/lib/phronomy/splitter/base.rb +2 -0
- data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
- data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
- data/lib/phronomy/state_store/base.rb +48 -0
- data/lib/phronomy/state_store/in_memory.rb +62 -0
- data/lib/phronomy/tool/agent_tool.rb +1 -0
- data/lib/phronomy/tool/base.rb +189 -27
- data/lib/phronomy/tool/mcp_tool.rb +68 -13
- data/lib/phronomy/tracing/base.rb +3 -0
- data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
- data/lib/phronomy/tracing/open_telemetry_tracer.rb +2 -0
- data/lib/phronomy/vector_store/base.rb +33 -7
- data/lib/phronomy/vector_store/in_memory.rb +16 -7
- data/lib/phronomy/vector_store/pgvector.rb +40 -9
- data/lib/phronomy/vector_store/redis_search.rb +29 -8
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow.rb +175 -74
- data/lib/phronomy/workflow_context.rb +55 -5
- data/lib/phronomy/workflow_runner.rb +197 -114
- data/lib/phronomy.rb +74 -1
- data/scripts/api_snapshot.rb +91 -0
- data/scripts/check_api_annotations.rb +68 -0
- data/scripts/check_private_enforcement.rb +93 -0
- data/scripts/check_readme_runnable.rb +98 -0
- data/scripts/run_mutation.sh +46 -0
- metadata +50 -6
- data/lib/phronomy/context/builder.rb +0 -92
- data/lib/phronomy/guardrail/builtin/pii_pattern_detector.rb +0 -100
- data/lib/phronomy/guardrail/builtin/prompt_injection_detector.rb +0 -67
- data/lib/phronomy/guardrail/builtin.rb +0 -16
|
@@ -43,6 +43,7 @@ module Phronomy
|
|
|
43
43
|
# Call this after saving a new set of messages (e.g. from a ConversationManager save hook).
|
|
44
44
|
#
|
|
45
45
|
# @param messages [Array] message objects responding to #role and #content
|
|
46
|
+
# @api public
|
|
46
47
|
def update(messages:)
|
|
47
48
|
messages.each do |msg|
|
|
48
49
|
next unless msg.role.to_sym == :user
|
|
@@ -54,9 +55,12 @@ module Phronomy
|
|
|
54
55
|
# Returns a single chunk containing all known entity facts in XML context format.
|
|
55
56
|
# Returns an empty array when no entities have been discovered.
|
|
56
57
|
#
|
|
57
|
-
# @param query
|
|
58
|
+
# @param query [String, nil] unused — entity knowledge is always fully injected
|
|
59
|
+
# @param cancellation_token [Phronomy::CancellationToken, nil] optional; raises CancellationError when cancelled
|
|
58
60
|
# @return [Array<Hash>]
|
|
59
|
-
|
|
61
|
+
# @api public
|
|
62
|
+
def fetch(query: nil, cancellation_token: nil)
|
|
63
|
+
cancellation_token&.raise_if_cancelled!
|
|
60
64
|
return [] if @entities.empty?
|
|
61
65
|
|
|
62
66
|
lines = @entities.map { |key, value| "- #{key}: #{value}" }.join("\n")
|
|
@@ -70,6 +74,7 @@ module Phronomy
|
|
|
70
74
|
# Returns the current entity store (primarily for testing).
|
|
71
75
|
#
|
|
72
76
|
# @return [Hash]
|
|
77
|
+
# @api public
|
|
73
78
|
def entities
|
|
74
79
|
@entities.dup
|
|
75
80
|
end
|
|
@@ -22,6 +22,7 @@ module Phronomy
|
|
|
22
22
|
# @param type [Symbol] semantic tag (default :rag)
|
|
23
23
|
# @param source [String, nil] default source label; falls back to
|
|
24
24
|
# each document's :source metadata when nil
|
|
25
|
+
# @api public
|
|
25
26
|
def initialize(store:, embeddings:, k: 5, type: :rag, source: nil)
|
|
26
27
|
@store = store
|
|
27
28
|
@embeddings = embeddings
|
|
@@ -34,13 +35,16 @@ module Phronomy
|
|
|
34
35
|
#
|
|
35
36
|
# Returns an empty array when query is nil or blank.
|
|
36
37
|
#
|
|
37
|
-
# @param query
|
|
38
|
+
# @param query [String, nil]
|
|
39
|
+
# @param cancellation_token [Phronomy::CancellationToken, nil] optional; raises CancellationError when cancelled
|
|
38
40
|
# @return [Array<Hash>]
|
|
39
|
-
|
|
41
|
+
# @api public
|
|
42
|
+
def fetch(query: nil, cancellation_token: nil)
|
|
43
|
+
cancellation_token&.raise_if_cancelled!
|
|
40
44
|
return [] if query.nil? || query.strip.empty?
|
|
41
45
|
|
|
42
|
-
vector = @embeddings.embed(query)
|
|
43
|
-
results = @store.search(query_embedding: vector, k: @k)
|
|
46
|
+
vector = @embeddings.embed(query, cancellation_token)
|
|
47
|
+
results = @store.search(query_embedding: vector, k: @k, cancellation_token: cancellation_token)
|
|
44
48
|
results.map do |doc|
|
|
45
49
|
chunk = {content: doc[:metadata][:content], type: @type}
|
|
46
50
|
src = @source || doc[:metadata][:source]
|
|
@@ -19,6 +19,7 @@ module Phronomy
|
|
|
19
19
|
# @param source [String, nil] label identifying where this knowledge came from
|
|
20
20
|
# (e.g. a filename). Included in the context XML tag and exposed to the LLM
|
|
21
21
|
# so that agents can produce grounded citations.
|
|
22
|
+
# @api public
|
|
22
23
|
def initialize(text, type: :static, source: nil)
|
|
23
24
|
@text = text.to_s
|
|
24
25
|
@type = type
|
|
@@ -27,9 +28,12 @@ module Phronomy
|
|
|
27
28
|
|
|
28
29
|
# Returns the fixed text as a single chunk, regardless of query.
|
|
29
30
|
#
|
|
30
|
-
# @param query
|
|
31
|
+
# @param query [String, nil] ignored for static knowledge
|
|
32
|
+
# @param cancellation_token [Phronomy::CancellationToken, nil] optional; raises CancellationError when cancelled
|
|
31
33
|
# @return [Array<Hash>]
|
|
32
|
-
|
|
34
|
+
# @api public
|
|
35
|
+
def fetch(query: nil, cancellation_token: nil)
|
|
36
|
+
cancellation_token&.raise_if_cancelled!
|
|
33
37
|
return [] if @text.empty?
|
|
34
38
|
|
|
35
39
|
chunk = {content: @text, type: @type}
|
|
@@ -39,6 +43,7 @@ module Phronomy
|
|
|
39
43
|
|
|
40
44
|
# Static knowledge content never changes between invocations.
|
|
41
45
|
# @return [true]
|
|
46
|
+
# @api public
|
|
42
47
|
def static?
|
|
43
48
|
true
|
|
44
49
|
end
|
data/lib/phronomy/loader/base.rb
CHANGED
|
@@ -16,6 +16,7 @@ module Phronomy
|
|
|
16
16
|
# @param source [String] file path, URL, or other source identifier
|
|
17
17
|
# @return [Array<Hash>] array of <tt>{ text: String, metadata: Hash }</tt>
|
|
18
18
|
# @raise [NotImplementedError] when not overridden by a subclass
|
|
19
|
+
# @api public
|
|
19
20
|
def load(source)
|
|
20
21
|
raise NotImplementedError, "#{self.class}#load is not implemented"
|
|
21
22
|
end
|
|
@@ -20,6 +20,7 @@ module Phronomy
|
|
|
20
20
|
class CsvLoader < Base
|
|
21
21
|
# @param headers [Boolean] treat the first row as headers (default: true)
|
|
22
22
|
# @param text_column [String, nil] if set, use only this column as the document text
|
|
23
|
+
# @api public
|
|
23
24
|
def initialize(headers: true, text_column: nil)
|
|
24
25
|
@headers = headers
|
|
25
26
|
@text_column = text_column
|
|
@@ -28,6 +29,7 @@ module Phronomy
|
|
|
28
29
|
# @param source [String] path to a CSV file
|
|
29
30
|
# @return [Array<Hash>]
|
|
30
31
|
# @raise [Errno::ENOENT] if the file does not exist
|
|
32
|
+
# @api public
|
|
31
33
|
def load(source)
|
|
32
34
|
rows = CSV.read(source, headers: @headers, encoding: "UTF-8")
|
|
33
35
|
|
|
@@ -24,6 +24,7 @@ module Phronomy
|
|
|
24
24
|
HEADING_RE = /^(\#{1,6})\s+(.+)$/
|
|
25
25
|
|
|
26
26
|
# @param split_on_headings [Boolean] split on H1–H6 boundaries (default: true)
|
|
27
|
+
# @api public
|
|
27
28
|
def initialize(split_on_headings: true)
|
|
28
29
|
@split_on_headings = split_on_headings
|
|
29
30
|
end
|
|
@@ -31,6 +32,7 @@ module Phronomy
|
|
|
31
32
|
# @param source [String] path to a Markdown file
|
|
32
33
|
# @return [Array<Hash>]
|
|
33
34
|
# @raise [Errno::ENOENT] if the file does not exist
|
|
35
|
+
# @api public
|
|
34
36
|
def load(source)
|
|
35
37
|
content = File.read(source, encoding: "UTF-8")
|
|
36
38
|
return [{text: content, metadata: {source: source}}] unless @split_on_headings
|
|
@@ -12,6 +12,7 @@ module Phronomy
|
|
|
12
12
|
# @param source [String] absolute or relative path to a text file
|
|
13
13
|
# @return [Array<Hash>] single-element array with the file contents
|
|
14
14
|
# @raise [Errno::ENOENT] if the file does not exist
|
|
15
|
+
# @api public
|
|
15
16
|
def load(source)
|
|
16
17
|
text = File.read(source, encoding: "UTF-8")
|
|
17
18
|
[{text: text, metadata: {source: source}}]
|
|
@@ -10,6 +10,7 @@ module Phronomy
|
|
|
10
10
|
# @param text [String]
|
|
11
11
|
# @return [Hash, Array] result parsed with symbolize_names: true
|
|
12
12
|
# @raise [Phronomy::ParseError] raised when JSON parsing fails
|
|
13
|
+
# @api public
|
|
13
14
|
def parse(text)
|
|
14
15
|
json_str = extract_json(text)
|
|
15
16
|
JSON.parse(json_str, symbolize_names: true)
|
|
@@ -19,10 +20,28 @@ module Phronomy
|
|
|
19
20
|
|
|
20
21
|
private
|
|
21
22
|
|
|
22
|
-
# Extracts
|
|
23
|
-
#
|
|
23
|
+
# Extracts a JSON string from the LLM response text.
|
|
24
|
+
#
|
|
25
|
+
# Strategy (in order):
|
|
26
|
+
# 1. Try each ```json ... ``` or ``` ... ``` code fence in document order,
|
|
27
|
+
# returning the content of the first one that parses as valid JSON.
|
|
28
|
+
# 2. Try the raw text stripped of leading/trailing whitespace.
|
|
29
|
+
#
|
|
30
|
+
# This handles:
|
|
31
|
+
# - Single JSON code fence (common case)
|
|
32
|
+
# - Multiple code fences — the first parseable JSON block wins
|
|
33
|
+
# - No fence — LLM omitted the backticks but returned valid JSON
|
|
24
34
|
def extract_json(text)
|
|
25
|
-
text.
|
|
35
|
+
text.scan(/```(?:json)?\s*\n?(.*?)\n?```/m).each do |captures|
|
|
36
|
+
candidate = captures.first.strip
|
|
37
|
+
JSON.parse(candidate)
|
|
38
|
+
return candidate
|
|
39
|
+
rescue JSON::ParserError
|
|
40
|
+
next
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Fallback: no valid fence found — try the raw text
|
|
44
|
+
text.strip
|
|
26
45
|
end
|
|
27
46
|
end
|
|
28
47
|
end
|
|
@@ -9,6 +9,7 @@ module Phronomy
|
|
|
9
9
|
# parser.parse('{"name":"Alice","age":30}') #=> #<struct PersonSchema name="Alice", age=30>
|
|
10
10
|
class StructuredParser < Base
|
|
11
11
|
# @param schema_class [Class] Struct with keyword_init: true or equivalent
|
|
12
|
+
# @api public
|
|
12
13
|
def initialize(schema_class)
|
|
13
14
|
@schema_class = schema_class
|
|
14
15
|
end
|
|
@@ -16,6 +17,7 @@ module Phronomy
|
|
|
16
17
|
# @param text [String]
|
|
17
18
|
# @return [Object] instance of schema_class
|
|
18
19
|
# @raise [Phronomy::ParseError] raised when JSON parsing or schema instantiation fails
|
|
20
|
+
# @api public
|
|
19
21
|
def parse(text)
|
|
20
22
|
data = JsonParser.new.parse(text)
|
|
21
23
|
@schema_class.new(**data)
|
|
@@ -27,6 +27,7 @@ module Phronomy
|
|
|
27
27
|
|
|
28
28
|
# @param template [String] human message template with {{var}} placeholders
|
|
29
29
|
# @param system_template [String, nil] optional system message template
|
|
30
|
+
# @api public
|
|
30
31
|
def initialize(template:, system_template: nil)
|
|
31
32
|
@template = template
|
|
32
33
|
@system_template = system_template
|
|
@@ -36,6 +37,7 @@ module Phronomy
|
|
|
36
37
|
#
|
|
37
38
|
# @param variables [Hash{Symbol => String}]
|
|
38
39
|
# @return [String]
|
|
40
|
+
# @api public
|
|
39
41
|
def format(**variables)
|
|
40
42
|
substitute(@template, variables)
|
|
41
43
|
end
|
|
@@ -45,6 +47,7 @@ module Phronomy
|
|
|
45
47
|
#
|
|
46
48
|
# @param variables [Hash{Symbol => String}]
|
|
47
49
|
# @return [String, nil]
|
|
50
|
+
# @api public
|
|
48
51
|
def format_system(**variables)
|
|
49
52
|
@system_template && substitute(@system_template, variables)
|
|
50
53
|
end
|
|
@@ -54,6 +57,7 @@ module Phronomy
|
|
|
54
57
|
#
|
|
55
58
|
# @param input [Hash{Symbol => String}]
|
|
56
59
|
# @return [Hash]
|
|
60
|
+
# @api public
|
|
57
61
|
def invoke(input, config: {})
|
|
58
62
|
vars = normalize_input(input)
|
|
59
63
|
result = {prompt: format(**vars)}
|
|
@@ -65,6 +69,7 @@ module Phronomy
|
|
|
65
69
|
# Returns the list of placeholder names found in both templates.
|
|
66
70
|
#
|
|
67
71
|
# @return [Array<Symbol>]
|
|
72
|
+
# @api public
|
|
68
73
|
def variables
|
|
69
74
|
names = @template.scan(PLACEHOLDER).flatten
|
|
70
75
|
names += @system_template.scan(PLACEHOLDER).flatten if @system_template
|
data/lib/phronomy/runnable.rb
CHANGED
|
@@ -25,13 +25,30 @@ module Phronomy
|
|
|
25
25
|
# Yields a span; the block must return [result, usage] where usage is a
|
|
26
26
|
# Phronomy::TokenUsage or nil. Returns only the result value.
|
|
27
27
|
#
|
|
28
|
+
# When +trace_pii+ is disabled, both the input and the output (LLM response,
|
|
29
|
+
# tool result) are replaced with the literal string "[REDACTED]" before being
|
|
30
|
+
# forwarded to the tracing backend. The actual result is still returned to
|
|
31
|
+
# the caller — only the copy sent to the tracer is redacted.
|
|
32
|
+
#
|
|
28
33
|
# @example
|
|
29
34
|
# trace("my_chain", input: input) { [invoke(input), nil] }
|
|
35
|
+
# @api public
|
|
30
36
|
def trace(name, input: nil, **meta, &block)
|
|
31
|
-
# Redact user input from spans when trace_pii is disabled to prevent
|
|
32
|
-
# accidental PII transmission to external tracing backends.
|
|
33
37
|
traced_input = Phronomy.configuration.trace_pii ? input : "[REDACTED]"
|
|
34
|
-
|
|
38
|
+
|
|
39
|
+
if Phronomy.configuration.trace_pii
|
|
40
|
+
# PII recording is allowed: pass through unchanged.
|
|
41
|
+
Phronomy.configuration.tracer.trace(name, input: traced_input, **meta, &block)
|
|
42
|
+
else
|
|
43
|
+
# Redact both input (above) and output before forwarding to the tracer.
|
|
44
|
+
# Capture the real result so callers receive the unredacted value.
|
|
45
|
+
real_result = nil
|
|
46
|
+
Phronomy.configuration.tracer.trace(name, input: traced_input, **meta) do |span|
|
|
47
|
+
real_result, usage = block.call(span)
|
|
48
|
+
["[REDACTED]", usage]
|
|
49
|
+
end
|
|
50
|
+
real_result
|
|
51
|
+
end
|
|
35
52
|
end
|
|
36
53
|
end
|
|
37
54
|
end
|
|
@@ -18,6 +18,7 @@ module Phronomy
|
|
|
18
18
|
# returned by a Loader, or a plain String.
|
|
19
19
|
# @return [Array<Hash>] array of <tt>{ text: String, metadata: Hash }</tt>
|
|
20
20
|
# @raise [NotImplementedError] when not overridden by a subclass
|
|
21
|
+
# @api public
|
|
21
22
|
def split(document)
|
|
22
23
|
raise NotImplementedError, "#{self.class}#split is not implemented"
|
|
23
24
|
end
|
|
@@ -26,6 +27,7 @@ module Phronomy
|
|
|
26
27
|
#
|
|
27
28
|
# @param documents [Array<Hash, String>]
|
|
28
29
|
# @return [Array<Hash>]
|
|
30
|
+
# @api public
|
|
29
31
|
def split_all(documents)
|
|
30
32
|
documents.flat_map { |doc| split(doc) }
|
|
31
33
|
end
|
|
@@ -15,6 +15,7 @@ module Phronomy
|
|
|
15
15
|
# @param chunk_size [Integer] maximum characters per chunk (default: 1000)
|
|
16
16
|
# @param chunk_overlap [Integer] characters to repeat at the start of each
|
|
17
17
|
# subsequent chunk (default: 200); must be less than chunk_size
|
|
18
|
+
# @api public
|
|
18
19
|
def initialize(chunk_size: 1000, chunk_overlap: 200)
|
|
19
20
|
raise ArgumentError, "chunk_overlap must be less than chunk_size" if chunk_overlap >= chunk_size
|
|
20
21
|
|
|
@@ -24,6 +25,7 @@ module Phronomy
|
|
|
24
25
|
|
|
25
26
|
# @param document [Hash, String]
|
|
26
27
|
# @return [Array<Hash>]
|
|
28
|
+
# @api public
|
|
27
29
|
def split(document)
|
|
28
30
|
doc = normalise(document)
|
|
29
31
|
text = doc[:text]
|
|
@@ -25,6 +25,7 @@ module Phronomy
|
|
|
25
25
|
# @param chunk_size [Integer] maximum characters per chunk (default: 1000)
|
|
26
26
|
# @param chunk_overlap [Integer] overlap characters (default: 200)
|
|
27
27
|
# @param separators [Array<String>] separator list in priority order
|
|
28
|
+
# @api public
|
|
28
29
|
def initialize(chunk_size: 1000, chunk_overlap: 200, separators: DEFAULT_SEPARATORS)
|
|
29
30
|
raise ArgumentError, "chunk_overlap must be less than chunk_size" if chunk_overlap >= chunk_size
|
|
30
31
|
|
|
@@ -35,6 +36,7 @@ module Phronomy
|
|
|
35
36
|
|
|
36
37
|
# @param document [Hash, String]
|
|
37
38
|
# @return [Array<Hash>]
|
|
39
|
+
# @api public
|
|
38
40
|
def split(document)
|
|
39
41
|
doc = normalise(document)
|
|
40
42
|
texts = recursive_split(doc[:text], @separators)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
module StateStore
|
|
5
|
+
# Abstract base class for workflow state persistence backends.
|
|
6
|
+
#
|
|
7
|
+
# Subclasses must implement {#load}, {#save}, and {#delete}.
|
|
8
|
+
# A snapshot is a plain +Hash+ with two keys:
|
|
9
|
+
# +:fields+ — output of +context.to_h+
|
|
10
|
+
# +:phase+ — +context.phase.to_s+
|
|
11
|
+
#
|
|
12
|
+
# @example Implementing a custom backend
|
|
13
|
+
# class MyStore < Phronomy::StateStore::Base
|
|
14
|
+
# def load(thread_id) = MyRecord.find_by(thread_id:)&.to_h
|
|
15
|
+
# def save(thread_id, snapshot) = MyRecord.upsert(thread_id:, data: snapshot)
|
|
16
|
+
# def delete(thread_id) = MyRecord.where(thread_id:).delete_all
|
|
17
|
+
# end
|
|
18
|
+
class Base
|
|
19
|
+
# Load the stored snapshot for +thread_id+.
|
|
20
|
+
#
|
|
21
|
+
# @param thread_id [String]
|
|
22
|
+
# @return [Hash, nil] stored snapshot hash, or +nil+ if absent
|
|
23
|
+
# @api public
|
|
24
|
+
def load(thread_id)
|
|
25
|
+
raise NotImplementedError, "#{self.class}#load is not implemented"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Persist +snapshot+ for +thread_id+. Overwrites any existing snapshot.
|
|
29
|
+
#
|
|
30
|
+
# @param thread_id [String]
|
|
31
|
+
# @param snapshot [Hash] serialisable hash of workflow state
|
|
32
|
+
# @return [void]
|
|
33
|
+
# @api public
|
|
34
|
+
def save(thread_id, snapshot)
|
|
35
|
+
raise NotImplementedError, "#{self.class}#save is not implemented"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Delete the stored snapshot for +thread_id+. No-op if absent.
|
|
39
|
+
#
|
|
40
|
+
# @param thread_id [String]
|
|
41
|
+
# @return [void]
|
|
42
|
+
# @api public
|
|
43
|
+
def delete(thread_id)
|
|
44
|
+
raise NotImplementedError, "#{self.class}#delete is not implemented"
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
module StateStore
|
|
5
|
+
# Thread-safe in-process state store backed by a plain Ruby Hash.
|
|
6
|
+
#
|
|
7
|
+
# Used as the recommended default for single-process applications and tests.
|
|
8
|
+
# State does not survive process restart.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# store = Phronomy::StateStore::InMemory.new
|
|
12
|
+
# store.save("t1", { fields: { count: 1 }, phase: "__end__" })
|
|
13
|
+
# store.load("t1") # => { fields: { count: 1 }, phase: "__end__" }
|
|
14
|
+
# store.delete("t1")
|
|
15
|
+
# store.load("t1") # => nil
|
|
16
|
+
class InMemory < Base
|
|
17
|
+
def initialize
|
|
18
|
+
@data = {}
|
|
19
|
+
@mutex = Mutex.new
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# @param thread_id [String]
|
|
23
|
+
# @return [Hash, nil]
|
|
24
|
+
# @api public
|
|
25
|
+
def load(thread_id)
|
|
26
|
+
@mutex.synchronize do
|
|
27
|
+
snap = @data[thread_id]
|
|
28
|
+
snap ? deep_dup(snap) : nil
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# @param thread_id [String]
|
|
33
|
+
# @param snapshot [Hash]
|
|
34
|
+
# @return [void]
|
|
35
|
+
# @api public
|
|
36
|
+
def save(thread_id, snapshot)
|
|
37
|
+
@mutex.synchronize { @data[thread_id] = deep_dup(snapshot) }
|
|
38
|
+
nil
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# @param thread_id [String]
|
|
42
|
+
# @return [void]
|
|
43
|
+
# @api public
|
|
44
|
+
def delete(thread_id)
|
|
45
|
+
@mutex.synchronize { @data.delete(thread_id) }
|
|
46
|
+
nil
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
# Recursively deep-duplicates a plain-data value (Hash, Array, or scalar).
|
|
52
|
+
# Sufficient for snapshot data which consists of JSON-compatible types.
|
|
53
|
+
def deep_dup(val)
|
|
54
|
+
case val
|
|
55
|
+
when Hash then val.each_with_object({}) { |(k, v), h| h[k] = deep_dup(v) }
|
|
56
|
+
when Array then val.map { |v| deep_dup(v) }
|
|
57
|
+
else val.frozen? ? val : (val.dup rescue val) # rubocop:disable Style/RescueModifier
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -35,6 +35,7 @@ module Phronomy
|
|
|
35
35
|
# @param description [String, nil] description exposed to the LLM;
|
|
36
36
|
# defaults to "Delegates to <AgentClassName>"
|
|
37
37
|
# @return [Class] an anonymous Phronomy::Tool::AgentTool subclass
|
|
38
|
+
# @api public
|
|
38
39
|
def from_agent(agent_class, tool_name: nil, description: nil)
|
|
39
40
|
raise ArgumentError, "agent_class must be a Class" unless agent_class.is_a?(Class)
|
|
40
41
|
|