rcrewai 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,243 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'flow/state'
4
+ require_relative 'flow/state_store'
5
+
6
+ module RCrewAI
7
+ # Event-driven workflow engine — CrewAI's second pillar, in Ruby.
8
+ #
9
+ # Subclass Flow and declare methods with the class-level DSL:
10
+ #
11
+ # class GuideFlow < RCrewAI::Flow
12
+ # start :pick_topic
13
+ # def pick_topic = state.topic = 'ruby'
14
+ #
15
+ # listen :pick_topic
16
+ # def research(prev) = "researched #{prev}"
17
+ #
18
+ # router :research
19
+ # def route(prev) = prev.include?('ruby') ? :publish : :revise
20
+ #
21
+ # listen :publish
22
+ # def publish = state.done = true
23
+ # end
24
+ #
25
+ # GuideFlow.new.kickoff
26
+ #
27
+ # Triggers combine with and_/or_. State is a schemaless object with a UUID and
28
+ # can be persisted/restored via a state store.
29
+ class Flow
30
+ # --- Trigger descriptors -------------------------------------------------
31
+ Trigger = Struct.new(:mode, :names) do
32
+ def satisfied_by?(completed)
33
+ case mode
34
+ when :single, :or then names.any? { |n| completed.include?(n) }
35
+ when :and then names.all? { |n| completed.include?(n) }
36
+ end
37
+ end
38
+ end
39
+
40
+ # --- Class-level DSL -----------------------------------------------------
41
+ class << self
42
+ def start_methods
43
+ @start_methods ||= []
44
+ end
45
+
46
+ # name => Trigger. Populated when a listen/router declaration is bound to
47
+ # the next defined method.
48
+ def listeners
49
+ @listeners ||= {}
50
+ end
51
+
52
+ def routers
53
+ @routers ||= {}
54
+ end
55
+
56
+ def start(method_name)
57
+ start_methods << method_name.to_sym
58
+ end
59
+
60
+ def listen(trigger)
61
+ @pending = [:listen, normalize_trigger(trigger)]
62
+ end
63
+
64
+ def router(trigger)
65
+ @pending = [:router, normalize_trigger(trigger)]
66
+ end
67
+
68
+ def or_(*names)
69
+ Trigger.new(:or, names.map(&:to_sym))
70
+ end
71
+
72
+ def and_(*names)
73
+ Trigger.new(:and, names.map(&:to_sym))
74
+ end
75
+
76
+ # Binds a pending listen/router declaration to the method just defined.
77
+ def method_added(method_name)
78
+ super
79
+ return unless @pending
80
+
81
+ kind, trigger = @pending
82
+ @pending = nil
83
+ case kind
84
+ when :listen then listeners[method_name.to_sym] = trigger
85
+ when :router then routers[method_name.to_sym] = trigger
86
+ end
87
+ end
88
+
89
+ # Merge inherited declarations so subclasses of a Flow subclass compose.
90
+ def inherited(subclass)
91
+ super
92
+ subclass.instance_variable_set(:@start_methods, start_methods.dup)
93
+ subclass.instance_variable_set(:@listeners, listeners.dup)
94
+ subclass.instance_variable_set(:@routers, routers.dup)
95
+ end
96
+
97
+ private
98
+
99
+ def normalize_trigger(trigger)
100
+ return trigger if trigger.is_a?(Trigger)
101
+
102
+ Trigger.new(:single, [trigger.to_sym])
103
+ end
104
+ end
105
+
106
+ # --- Instance ------------------------------------------------------------
107
+ attr_reader :state
108
+
109
+ def initialize(state_store: nil, feedback_handler: nil)
110
+ @state = State.new
111
+ @state_store = state_store
112
+ @feedback_handler = feedback_handler
113
+ end
114
+
115
+ # A pause point for human feedback. Calls the configured feedback_handler
116
+ # with the prompt and returns its response; without a handler, prompts on
117
+ # the console. Mirrors CrewAI's @human_feedback.
118
+ def human_feedback(prompt)
119
+ return @feedback_handler.call(prompt) if @feedback_handler
120
+
121
+ require_relative 'human_input'
122
+ response = HumanInput.new.request_input(prompt)
123
+ response.is_a?(Hash) ? response[:input] : response
124
+ end
125
+
126
+ # Runs the flow to completion. Optional inputs seed the state.
127
+ def kickoff(inputs: {})
128
+ inputs.each { |k, v| @state[k] = v }
129
+
130
+ @completed = [] # method names that have finished
131
+ @outputs = {} # method name => return value
132
+ @router_labels = [] # labels emitted by routers, act as pseudo-triggers
133
+
134
+ self.class.start_methods.each { |m| run_method(m) }
135
+ drain_listeners
136
+
137
+ persist
138
+ @state
139
+ end
140
+
141
+ # Restores state previously persisted under +id+.
142
+ def restore(id)
143
+ raise FlowError, 'no state store configured' unless @state_store
144
+
145
+ hash = @state_store.load(id)
146
+ raise FlowError, "no persisted state for id #{id}" unless hash
147
+
148
+ @state = State.new(symbolize(hash))
149
+ @state
150
+ end
151
+
152
+ private
153
+
154
+ def run_method(method_name)
155
+ trigger = self.class.listeners[method_name] || self.class.routers[method_name]
156
+ arg = trigger ? @outputs[last_trigger_name(trigger)] : nil
157
+
158
+ result = arity_for(method_name).zero? ? send(method_name) : send(method_name, arg)
159
+
160
+ @completed << method_name
161
+ @outputs[method_name] = result
162
+
163
+ # A router's return value becomes a label that listeners can trigger on.
164
+ @router_labels << result.to_sym if self.class.routers.key?(method_name) && result
165
+ end
166
+
167
+ # Repeatedly fire any listeners/routers whose triggers are now satisfied,
168
+ # until no new method runs (fixed point).
169
+ def drain_listeners
170
+ reactive = self.class.listeners.merge(self.class.routers)
171
+ loop do
172
+ ran = false
173
+ reactive.each do |method_name, trigger|
174
+ next if fired_enough?(method_name, trigger)
175
+
176
+ if trigger.satisfied_by?(satisfied_set)
177
+ run_listener(method_name, trigger)
178
+ ran = true
179
+ end
180
+ end
181
+ break unless ran
182
+ end
183
+ end
184
+
185
+ # For :single/:or triggers we fire once per completed trigger name; for :and
186
+ # we fire once. Track how many times each listener has fired.
187
+ def run_listener(method_name, trigger)
188
+ @fired ||= Hash.new { |h, k| h[k] = [] }
189
+
190
+ case trigger.mode
191
+ when :and
192
+ @fired[method_name] << :once
193
+ invoke_listener(method_name, @outputs[trigger.names.last])
194
+ else
195
+ pending = trigger.names.select { |n| satisfied_set.include?(n) } - @fired[method_name]
196
+ pending.each do |name|
197
+ @fired[method_name] << name
198
+ invoke_listener(method_name, @outputs[name])
199
+ end
200
+ end
201
+ end
202
+
203
+ def invoke_listener(method_name, arg)
204
+ result = arity_for(method_name).zero? ? send(method_name) : send(method_name, arg)
205
+ @completed << method_name
206
+ @outputs[method_name] = result
207
+ @router_labels << result.to_sym if self.class.routers.key?(method_name) && result
208
+ end
209
+
210
+ def fired_enough?(method_name, trigger)
211
+ @fired ||= Hash.new { |h, k| h[k] = [] }
212
+ case trigger.mode
213
+ when :and then @fired[method_name].any?
214
+ else (trigger.names & satisfied_set).all? { |n| @fired[method_name].include?(n) }
215
+ end
216
+ end
217
+
218
+ # Names available to satisfy triggers: completed methods + router labels.
219
+ def satisfied_set
220
+ @completed + @router_labels
221
+ end
222
+
223
+ def last_trigger_name(trigger)
224
+ (trigger.names & @completed).last || trigger.names.last
225
+ end
226
+
227
+ def arity_for(method_name)
228
+ method(method_name).arity
229
+ end
230
+
231
+ def persist
232
+ return unless @state_store
233
+
234
+ @state_store.save(@state.id, @state.to_h)
235
+ end
236
+
237
+ def symbolize(hash)
238
+ hash.transform_keys(&:to_sym)
239
+ end
240
+ end
241
+
242
+ class FlowError < Error; end
243
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'chunker'
4
+ require_relative 'store'
5
+ require_relative 'sources'
6
+ require_relative 'embedder'
7
+
8
+ module RCrewAI
9
+ module Knowledge
10
+ # A knowledge base: loads sources, chunks their text, embeds the chunks, and
11
+ # answers similarity queries. Attach one to an Agent (role-specific) or a
12
+ # Crew (shared) via the +knowledge_sources:+ option.
13
+ class Base
14
+ attr_reader :sources
15
+
16
+ def initialize(sources: [], embedder: nil, chunk_size: 1000, overlap: 100)
17
+ @sources = Array(sources)
18
+ @embedder = embedder || Embedder.new
19
+ @chunker = Chunker.new(chunk_size: chunk_size, overlap: overlap)
20
+ @store = Store.new
21
+ @built = false
22
+ end
23
+
24
+ # Loads, chunks, and embeds all sources. Idempotent.
25
+ def build!
26
+ return self if @built
27
+
28
+ chunks = @sources.flat_map { |source| @chunker.chunk(source.read) }
29
+ unless chunks.empty?
30
+ vectors = @embedder.embed(chunks)
31
+ chunks.zip(vectors).each { |text, vector| @store.add(text, vector) }
32
+ end
33
+
34
+ @built = true
35
+ self
36
+ end
37
+
38
+ # Returns up to k chunks most relevant to the query string.
39
+ def search(query, k: 3)
40
+ build! unless @built
41
+ return [] if @store.empty?
42
+
43
+ query_vector = @embedder.embed([query]).first
44
+ @store.search(query_vector, k: k)
45
+ end
46
+
47
+ def empty?
48
+ @sources.empty?
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RCrewAI
4
+ module Knowledge
5
+ # Splits text into fixed-size, overlapping character windows. Overlap keeps
6
+ # context from spilling across chunk boundaries during retrieval.
7
+ class Chunker
8
+ def initialize(chunk_size: 1000, overlap: 100)
9
+ raise ArgumentError, 'overlap must be smaller than chunk_size' if overlap >= chunk_size
10
+
11
+ @chunk_size = chunk_size
12
+ @overlap = overlap
13
+ end
14
+
15
+ def chunk(text)
16
+ text = text.to_s
17
+ return [] if text.empty?
18
+ return [text] if text.length <= @chunk_size
19
+
20
+ chunks = []
21
+ start = 0
22
+ step = @chunk_size - @overlap
23
+ while start < text.length
24
+ chunks << text[start, @chunk_size]
25
+ start += step
26
+ end
27
+ chunks
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'faraday'
5
+
6
+ module RCrewAI
7
+ module Knowledge
8
+ # Turns text into embedding vectors. Defaults to OpenAI's embeddings API;
9
+ # #embed takes an array of strings and returns an array of vectors. Any
10
+ # object responding to #embed can be substituted (see specs).
11
+ class Embedder
12
+ DEFAULT_MODEL = 'text-embedding-3-small'
13
+ OPENAI_URL = 'https://api.openai.com/v1/embeddings'
14
+
15
+ def initialize(model: DEFAULT_MODEL, api_key: nil, config: RCrewAI.configuration)
16
+ @model = model
17
+ @api_key = api_key || config.openai_api_key || config.api_key
18
+ end
19
+
20
+ def embed(texts)
21
+ texts = Array(texts)
22
+ return [] if texts.empty?
23
+
24
+ response = connection.post(OPENAI_URL) do |req|
25
+ req.headers['Authorization'] = "Bearer #{@api_key}"
26
+ req.headers['Content-Type'] = 'application/json'
27
+ req.body = JSON.generate(model: @model, input: texts)
28
+ end
29
+
30
+ raise EmbeddingError, "embedding request failed: #{response.status}" unless response.success?
31
+
32
+ body = response.body
33
+ body = JSON.parse(body) if body.is_a?(String)
34
+ body['data'].map { |d| d['embedding'] }
35
+ end
36
+
37
+ private
38
+
39
+ def connection
40
+ @connection ||= Faraday.new do |f|
41
+ f.adapter Faraday.default_adapter
42
+ end
43
+ end
44
+ end
45
+
46
+ class EmbeddingError < RCrewAI::Error; end
47
+ end
48
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RCrewAI
4
+ module Knowledge
5
+ # A knowledge source yields plain text via #read. Concrete sources load from
6
+ # strings, files, PDFs, CSVs, or URLs.
7
+ class Source
8
+ def read
9
+ raise NotImplementedError, 'Subclasses must implement #read'
10
+ end
11
+ end
12
+
13
+ class StringSource < Source
14
+ def initialize(text)
15
+ super()
16
+ @text = text.to_s
17
+ end
18
+
19
+ def read
20
+ @text
21
+ end
22
+ end
23
+
24
+ class FileSource < Source
25
+ def initialize(path)
26
+ super()
27
+ @path = path
28
+ end
29
+
30
+ def read
31
+ File.read(@path)
32
+ end
33
+ end
34
+
35
+ class PdfSource < Source
36
+ def initialize(path)
37
+ super()
38
+ @path = path
39
+ end
40
+
41
+ def read
42
+ require 'pdf-reader'
43
+ reader = PDF::Reader.new(@path)
44
+ reader.pages.map(&:text).join("\n")
45
+ end
46
+ end
47
+
48
+ class CsvSource < Source
49
+ def initialize(path)
50
+ super()
51
+ @path = path
52
+ end
53
+
54
+ def read
55
+ require 'csv'
56
+ CSV.read(@path).map { |row| row.join(', ') }.join("\n")
57
+ end
58
+ end
59
+
60
+ class UrlSource < Source
61
+ def initialize(url, fetcher: nil)
62
+ super()
63
+ @url = url
64
+ @fetcher = fetcher
65
+ end
66
+
67
+ def read
68
+ html = @fetcher ? @fetcher.call(@url) : fetch(@url)
69
+ require 'nokogiri'
70
+ doc = Nokogiri::HTML(html)
71
+ doc.search('script, style').remove
72
+ doc.text.gsub(/\s+/, ' ').strip
73
+ end
74
+
75
+ private
76
+
77
+ def fetch(url)
78
+ require 'faraday'
79
+ Faraday.get(url).body
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RCrewAI
4
+ module Knowledge
5
+ # In-memory vector store with cosine-similarity search. The default backing
6
+ # store for Knowledge — no external service required. The interface
7
+ # (#add, #search) is intentionally small so a Chroma/Qdrant-backed store can
8
+ # be swapped in later.
9
+ class Store
10
+ Entry = Struct.new(:text, :vector)
11
+
12
+ def initialize
13
+ @entries = []
14
+ end
15
+
16
+ def add(text, vector)
17
+ @entries << Entry.new(text, vector)
18
+ end
19
+
20
+ # Returns the texts of the top-k entries most similar to +query_vector+.
21
+ def search(query_vector, k: 3)
22
+ return [] if @entries.empty?
23
+
24
+ @entries
25
+ .map { |e| [e.text, cosine_similarity(query_vector, e.vector)] }
26
+ .sort_by { |(_text, score)| -score }
27
+ .first(k)
28
+ .map(&:first)
29
+ end
30
+
31
+ def size
32
+ @entries.length
33
+ end
34
+
35
+ def empty?
36
+ @entries.empty?
37
+ end
38
+
39
+ private
40
+
41
+ def cosine_similarity(a, b)
42
+ dot = 0.0
43
+ norm_a = 0.0
44
+ norm_b = 0.0
45
+ a.each_index do |i|
46
+ ai = a[i].to_f
47
+ bi = (b[i] || 0).to_f
48
+ dot += ai * bi
49
+ norm_a += ai * ai
50
+ norm_b += bi * bi
51
+ end
52
+ return 0.0 if norm_a.zero? || norm_b.zero?
53
+
54
+ dot / (Math.sqrt(norm_a) * Math.sqrt(norm_b))
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'knowledge/chunker'
4
+ require_relative 'knowledge/store'
5
+ require_relative 'knowledge/sources'
6
+ require_relative 'knowledge/embedder'
7
+ require_relative 'knowledge/base'
8
+
9
+ module RCrewAI
10
+ # Retrieval-augmented knowledge for agents and crews. See Knowledge::Base.
11
+ module Knowledge
12
+ end
13
+ end
@@ -28,6 +28,29 @@ module RCrewAI
28
28
  end
29
29
  end
30
30
 
31
+ # Resolves a per-agent / per-pass LLM spec into a client.
32
+ # nil -> global provider
33
+ # Symbol/String -> that provider, global model
34
+ # Hash -> { provider:, model:, api_key:, temperature: } overrides
35
+ # client object -> returned as-is (anything responding to #chat)
36
+ def self.resolve(spec, config = RCrewAI.configuration)
37
+ case spec
38
+ when nil
39
+ for_provider(nil, config)
40
+ when Symbol, String
41
+ overridden = config.with_overrides(provider: spec)
42
+ for_provider(overridden.llm_provider, overridden)
43
+ when Hash
44
+ overridden = config.with_overrides(**spec)
45
+ for_provider(overridden.llm_provider, overridden)
46
+ else
47
+ return spec if spec.respond_to?(:chat)
48
+
49
+ raise ConfigurationError,
50
+ "Invalid llm: expected a provider symbol, an options hash, or a client responding to #chat, got #{spec.class}"
51
+ end
52
+ end
53
+
31
54
  def self.chat(messages:, **options)
32
55
  client = for_provider
33
56
  client.chat(messages: messages, **options)
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module RCrewAI
6
+ # Validates and coerces a task's raw string output against a JSON-Schema
7
+ # subset (object / type / required / property types). Used by Task for the
8
+ # `output_schema:` option. Kept intentionally small: it covers the shapes an
9
+ # LLM is realistically asked to emit, not the whole JSON Schema spec.
10
+ module OutputSchema
11
+ module_function
12
+
13
+ # Returns the validated/coerced object.
14
+ # Raises OutputSchemaError if the string can't be parsed or doesn't conform.
15
+ def coerce(raw, schema)
16
+ data = parse(raw)
17
+ validate!(data, schema)
18
+ data
19
+ end
20
+
21
+ # Extracts a JSON document from a string that may contain surrounding prose,
22
+ # then parses it. Prefers a fenced ```json block, then the first balanced
23
+ # object/array, then the whole string.
24
+ def parse(raw)
25
+ candidate = extract_json(raw.to_s)
26
+ JSON.parse(candidate)
27
+ rescue JSON::ParserError => e
28
+ raise OutputSchemaError, "output is not valid JSON: #{e.message}"
29
+ end
30
+
31
+ def extract_json(text)
32
+ if (fenced = text[/```(?:json)?\s*(\{.*?\}|\[.*?\])\s*```/m, 1])
33
+ return fenced
34
+ end
35
+
36
+ first = text.index(/[{\[]/)
37
+ last = text.rindex(/[}\]]/)
38
+ return text if first.nil? || last.nil? || last < first
39
+
40
+ text[first..last]
41
+ end
42
+
43
+ def validate!(data, schema)
44
+ type = (schema[:type] || schema['type'])&.to_s
45
+ case type
46
+ when 'object' then validate_object!(data, schema)
47
+ when 'array' then raise_unless(data.is_a?(Array), 'expected an array')
48
+ when 'string' then raise_unless(data.is_a?(String), 'expected a string')
49
+ when 'integer' then raise_unless(data.is_a?(Integer), 'expected an integer')
50
+ when 'number' then raise_unless(data.is_a?(Numeric), 'expected a number')
51
+ when 'boolean' then raise_unless([true, false].include?(data), 'expected a boolean')
52
+ end
53
+ data
54
+ end
55
+
56
+ def validate_object!(data, schema)
57
+ raise_unless(data.is_a?(Hash), 'expected a JSON object')
58
+
59
+ required = schema[:required] || schema['required'] || []
60
+ required.each do |key|
61
+ raise_unless(data.key?(key.to_s), "missing required property '#{key}'")
62
+ end
63
+
64
+ props = schema[:properties] || schema['properties'] || {}
65
+ props.each do |name, subschema|
66
+ value = data[name.to_s]
67
+ next if value.nil?
68
+
69
+ validate!(value, subschema)
70
+ end
71
+ end
72
+
73
+ def raise_unless(condition, message)
74
+ raise OutputSchemaError, message unless condition
75
+ end
76
+ end
77
+
78
+ class OutputSchemaError < Error; end
79
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require_relative 'output_schema'
5
+
6
+ module RCrewAI
7
+ # Runs a single planning pass over a crew's tasks before execution. Asks an
8
+ # LLM to draft a short, concrete plan for each task and folds that plan into
9
+ # the task's description, so the executing agent starts with a game plan.
10
+ #
11
+ # Mirrors CrewAI's `planning=True`. Best-effort: if the planner errors or
12
+ # returns unparseable output, execution proceeds with the original tasks.
13
+ class Planning
14
+ def initialize(crew, llm: nil, logger: nil)
15
+ @crew = crew
16
+ @llm = llm || LLMClient.for_provider
17
+ @logger = logger
18
+ end
19
+
20
+ def plan!
21
+ return if @crew.tasks.empty?
22
+
23
+ plans = request_plans
24
+ return if plans.nil? || plans.empty?
25
+
26
+ @crew.tasks.each do |task|
27
+ step = plans[task.name] || plans[task.name.to_s]
28
+ task.enrich_description("Plan: #{step}") if step
29
+ end
30
+ rescue StandardError => e
31
+ @logger&.warn("Planning pass failed, continuing without a plan: #{e.message}")
32
+ nil
33
+ end
34
+
35
+ private
36
+
37
+ def request_plans
38
+ response = @llm.chat(messages: [
39
+ { role: 'system', content: system_prompt },
40
+ { role: 'user', content: user_prompt }
41
+ ])
42
+ content = response.is_a?(Hash) ? response[:content].to_s : response.to_s
43
+ parse_plans(content)
44
+ end
45
+
46
+ def parse_plans(content)
47
+ OutputSchema.parse(content)
48
+ rescue OutputSchemaError
49
+ nil
50
+ end
51
+
52
+ def system_prompt
53
+ 'You are a planning assistant. Given a list of tasks, produce a short, ' \
54
+ 'concrete plan for each. Respond ONLY with a JSON object mapping each ' \
55
+ 'task name to a one-sentence plan string.'
56
+ end
57
+
58
+ def user_prompt
59
+ lines = @crew.tasks.map do |t|
60
+ "- #{t.name}: #{t.description} (expected: #{t.expected_output || 'n/a'})"
61
+ end
62
+ "Tasks:\n#{lines.join("\n")}\n\nReturn JSON: { \"<task name>\": \"<plan>\", ... }"
63
+ end
64
+ end
65
+ end