scout-ai 0.2.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +155 -9
  3. data/README.md +296 -0
  4. data/Rakefile +3 -0
  5. data/VERSION +1 -1
  6. data/bin/scout-ai +2 -0
  7. data/doc/Agent.md +279 -0
  8. data/doc/Chat.md +258 -0
  9. data/doc/LLM.md +446 -0
  10. data/doc/Model.md +513 -0
  11. data/doc/RAG.md +129 -0
  12. data/lib/scout/llm/agent/chat.rb +74 -0
  13. data/lib/scout/llm/agent/delegate.rb +39 -0
  14. data/lib/scout/llm/agent/iterate.rb +44 -0
  15. data/lib/scout/llm/agent.rb +51 -30
  16. data/lib/scout/llm/ask.rb +63 -21
  17. data/lib/scout/llm/backends/anthropic.rb +147 -0
  18. data/lib/scout/llm/backends/bedrock.rb +129 -0
  19. data/lib/scout/llm/backends/huggingface.rb +6 -21
  20. data/lib/scout/llm/backends/ollama.rb +62 -35
  21. data/lib/scout/llm/backends/openai.rb +77 -33
  22. data/lib/scout/llm/backends/openwebui.rb +1 -1
  23. data/lib/scout/llm/backends/relay.rb +3 -2
  24. data/lib/scout/llm/backends/responses.rb +320 -0
  25. data/lib/scout/llm/chat.rb +703 -0
  26. data/lib/scout/llm/embed.rb +4 -4
  27. data/lib/scout/llm/mcp.rb +28 -0
  28. data/lib/scout/llm/parse.rb +71 -13
  29. data/lib/scout/llm/rag.rb +9 -0
  30. data/lib/scout/llm/tools/call.rb +66 -0
  31. data/lib/scout/llm/tools/knowledge_base.rb +158 -0
  32. data/lib/scout/llm/tools/mcp.rb +59 -0
  33. data/lib/scout/llm/tools/workflow.rb +69 -0
  34. data/lib/scout/llm/tools.rb +112 -76
  35. data/lib/scout/llm/utils.rb +17 -10
  36. data/lib/scout/model/base.rb +19 -0
  37. data/lib/scout/model/python/base.rb +25 -0
  38. data/lib/scout/model/python/huggingface/causal/next_token.rb +23 -0
  39. data/lib/scout/model/python/huggingface/causal.rb +29 -0
  40. data/lib/scout/model/python/huggingface/classification +0 -0
  41. data/lib/scout/model/python/huggingface/classification.rb +50 -0
  42. data/lib/scout/model/python/huggingface.rb +112 -0
  43. data/lib/scout/model/python/torch/dataloader.rb +57 -0
  44. data/lib/scout/model/python/torch/helpers.rb +84 -0
  45. data/lib/scout/model/python/torch/introspection.rb +34 -0
  46. data/lib/scout/model/python/torch/load_and_save.rb +47 -0
  47. data/lib/scout/model/python/torch.rb +94 -0
  48. data/lib/scout/model/util/run.rb +181 -0
  49. data/lib/scout/model/util/save.rb +81 -0
  50. data/lib/scout-ai.rb +4 -1
  51. data/python/scout_ai/__init__.py +35 -0
  52. data/python/scout_ai/huggingface/data.py +48 -0
  53. data/python/scout_ai/huggingface/eval.py +60 -0
  54. data/python/scout_ai/huggingface/model.py +29 -0
  55. data/python/scout_ai/huggingface/rlhf.py +83 -0
  56. data/python/scout_ai/huggingface/train/__init__.py +34 -0
  57. data/python/scout_ai/huggingface/train/next_token.py +315 -0
  58. data/python/scout_ai/util.py +32 -0
  59. data/scout-ai.gemspec +143 -0
  60. data/scout_commands/agent/ask +89 -14
  61. data/scout_commands/agent/kb +15 -0
  62. data/scout_commands/documenter +148 -0
  63. data/scout_commands/llm/ask +71 -12
  64. data/scout_commands/llm/process +4 -2
  65. data/scout_commands/llm/server +319 -0
  66. data/share/server/chat.html +138 -0
  67. data/share/server/chat.js +468 -0
  68. data/test/data/cat.jpg +0 -0
  69. data/test/scout/llm/agent/test_chat.rb +14 -0
  70. data/test/scout/llm/backends/test_anthropic.rb +134 -0
  71. data/test/scout/llm/backends/test_bedrock.rb +60 -0
  72. data/test/scout/llm/backends/test_huggingface.rb +3 -3
  73. data/test/scout/llm/backends/test_ollama.rb +48 -10
  74. data/test/scout/llm/backends/test_openai.rb +134 -10
  75. data/test/scout/llm/backends/test_responses.rb +239 -0
  76. data/test/scout/llm/test_agent.rb +0 -70
  77. data/test/scout/llm/test_ask.rb +4 -1
  78. data/test/scout/llm/test_chat.rb +256 -0
  79. data/test/scout/llm/test_mcp.rb +29 -0
  80. data/test/scout/llm/test_parse.rb +81 -2
  81. data/test/scout/llm/tools/test_call.rb +0 -0
  82. data/test/scout/llm/tools/test_knowledge_base.rb +22 -0
  83. data/test/scout/llm/tools/test_mcp.rb +11 -0
  84. data/test/scout/llm/tools/test_workflow.rb +39 -0
  85. data/test/scout/model/python/huggingface/causal/test_next_token.rb +59 -0
  86. data/test/scout/model/python/huggingface/test_causal.rb +33 -0
  87. data/test/scout/model/python/huggingface/test_classification.rb +30 -0
  88. data/test/scout/model/python/test_base.rb +44 -0
  89. data/test/scout/model/python/test_huggingface.rb +9 -0
  90. data/test/scout/model/python/test_torch.rb +71 -0
  91. data/test/scout/model/python/torch/test_helpers.rb +14 -0
  92. data/test/scout/model/test_base.rb +117 -0
  93. data/test/scout/model/util/test_save.rb +31 -0
  94. metadata +113 -7
  95. data/README.rdoc +0 -18
  96. data/questions/coach +0 -2
@@ -0,0 +1,28 @@
1
+ require 'mcp'
2
+
3
+ module Workflow
4
+ def mcp(*tasks)
5
+ tasks = tasks.flatten.compact
6
+ tasks = self.tasks.keys if tasks.empty?
7
+
8
+ tools = tasks.collect do |task,inputs=nil|
9
+ tool_definition = LLM.task_tool_definition(self, task, inputs)[:function]
10
+ description = tool_definition[:description]
11
+ input_schema = tool_definition[:parameters].slice(:properties, :required)
12
+ annotations = tool_definition.slice(:title)
13
+ annotations[:read_only_hint] = true
14
+ annotations[:destructive_hint] = false
15
+ annotations[:idempotent_hint] = true
16
+ annotations[:open_world_hint] = false
17
+ MCP::Tool.define(name:task, description: description, input_schema: input_schema, annotations:annotations) do |parameters,context|
18
+ self.job(name, parameters)
19
+ end
20
+ end
21
+
22
+ MCP::Server.new(
23
+ name: self.name,
24
+ version: "1.0.0",
25
+ tools: tools
26
+ )
27
+ end
28
+ end
@@ -1,4 +1,34 @@
1
+ require 'scout/llm/utils'
1
2
  module LLM
3
+ def self.process_inside(inside)
4
+ header, content = inside.match(/([^\n]*)\n(.*)/).values_at 1, 2
5
+ if header.empty?
6
+ content
7
+ else
8
+ action, _sep, rest = header.partition /\s/
9
+ case action
10
+ when 'import'
11
+ when 'cmd'
12
+ title = rest.strip.empty? ? content : rest
13
+ tag('file', title, CMD.cmd(content).read)
14
+ when 'file'
15
+ file = content
16
+ title = rest.strip.empty? ? file : rest
17
+ tag(action, title, Open.read(file))
18
+ when 'directory'
19
+ directory = content
20
+ title = rest.strip.empty? ? directory : rest
21
+ directory_content = Dir.glob(File.join(directory, '**/*')).collect do |file|
22
+ file_title = Misc.path_relative_to(directory, file)
23
+ tag('file', file_title, Open.read(file) )
24
+ end * "\n"
25
+ tag(action, title, directory_content )
26
+ else
27
+ tag(action, rest, content)
28
+ end
29
+ end
30
+ end
31
+
2
32
  def self.parse(question, role = nil)
3
33
  role = :user if role.nil?
4
34
 
@@ -12,21 +42,49 @@ module LLM
12
42
  inside = m[2]
13
43
  post = m[3]
14
44
  messages = parse(pre, role)
15
- messages.last[:content] += "\n" + inside
16
- messages.concat parse(post, role)
45
+
46
+ messages = [{role: role, content: ''}] if messages.empty?
47
+ messages.last[:content] += process_inside inside
48
+
49
+ last = parse(post, messages.last[:role])
50
+
51
+ messages.concat last
52
+
53
+ messages
54
+ elsif m = question.match(/(.*?)(```.*?```)(.*)/m)
55
+ pre = m[1]
56
+ inside = m[2]
57
+ post = m[3]
58
+ messages = parse(pre, role)
59
+
60
+ messages = [{role: role, content: ''}] if messages.empty?
61
+ messages.last[:content] += inside
62
+
63
+ last = parse(post, messages.last[:role])
64
+
65
+ if last.first[:role] == messages.last[:role]
66
+ m = last.shift
67
+ messages.last[:content] += m[:content]
68
+ end
69
+
70
+ messages.concat last
71
+
72
+ messages
17
73
  else
18
- question.split("\n").collect do |line|
19
- if line.include?("\t")
20
- question_role, _sep, q = line.partition("\t")
21
- elsif m = line.match(/^([^\s]*): ?(.*)/)
22
- question_role, q = m.values_at 1, 2
23
- else
24
- question_role = role
25
- q = line
74
+ chunks = question.scan(/(.*?)^(\w+):(.*?)(?=^\w+:|\z)/m)
75
+
76
+ if chunks.any?
77
+ messages = []
78
+ messages << {role: role, content: chunks.first.first} if chunks.first and not chunks.first.first.empty?
79
+ chunks.collect do |pre,role,text|
80
+ messages << {role: role, content: text.strip}
26
81
  end
27
- next if q.empty?
28
- {role: question_role, content: q}
29
- end.compact
82
+ messages
83
+ elsif question.strip.empty?
84
+ []
85
+ else
86
+ [{role: role, content: question}]
87
+ end
30
88
  end
31
89
  end
32
90
  end
data/lib/scout/llm/rag.rb CHANGED
@@ -12,5 +12,14 @@ module LLM
12
12
  end
13
13
  t
14
14
  end
15
+
16
+ def self.load(path, dim)
17
+ require 'hnswlib'
18
+
19
+ u = Hnswlib::HierarchicalNSW.new(space: 'l2', dim: dim)
20
+ u.load_index(path)
21
+
22
+ u
23
+ end
15
24
  end
16
25
  end
@@ -0,0 +1,66 @@
1
+ module LLM
2
+ def self.call_id_name_and_arguments(tool_call)
3
+ tool_call_id = tool_call.dig("call_id") || tool_call.dig("id")
4
+ if tool_call['function']
5
+ function_name = tool_call.dig("function", "name")
6
+ function_arguments = tool_call.dig("function", "arguments")
7
+ else
8
+ function_name = tool_call.dig("name")
9
+ function_arguments = tool_call.dig("arguments")
10
+ end
11
+
12
+ function_arguments = JSON.parse(function_arguments, { symbolize_names: true }) if String === function_arguments
13
+
14
+ [tool_call_id, function_name, function_arguments]
15
+ end
16
+
17
+ def self.process_calls(tools, calls, &block)
18
+ IndiferentHash.setup tools
19
+ calls.collect do |tool_call|
20
+ tool_call_id, function_name, function_arguments = call_id_name_and_arguments(tool_call)
21
+
22
+ obj, definition = tools[function_name]
23
+
24
+ function_response = case obj
25
+ when Proc
26
+ obj.call function_name, function_arguments
27
+ when Workflow
28
+ call_workflow(obj, function_name, function_arguments)
29
+ when KnowledgeBase
30
+ call_knowledge_base(obj, function_name, function_arguments)
31
+ else
32
+ if block_given?
33
+ block.call function_name, function_arguments
34
+ else
35
+ raise "Unkown executor #{Log.fingerprint obj} for function #{function_name}"
36
+ end
37
+ end
38
+
39
+ content = case function_response
40
+ when String
41
+ function_response
42
+ when nil
43
+ "success"
44
+ when Exception
45
+ {exception: function_response.message, stack: function_response.backtrace }.to_json
46
+ else
47
+ function_response.to_json
48
+ end
49
+ content = content.to_s if Numeric === content
50
+
51
+ response_message = {
52
+ id: tool_call_id,
53
+ role: "tool",
54
+ content: content
55
+ }
56
+
57
+ function_call = tool_call.dup
58
+
59
+ function_call['id'] = function_call.delete('call_id') if function_call.dig('call_id')
60
+ [
61
+ {role: "function_call", content: function_call.to_json},
62
+ {role: "function_call_output", content: response_message.to_json},
63
+ ]
64
+ end.flatten
65
+ end
66
+ end
@@ -0,0 +1,158 @@
1
+ require 'scout/knowledge_base'
2
+
3
+ module LLM
4
+ def self.database_tool_definition(database, undirected = false, database_description = nil)
5
+
6
+ if undirected
7
+ properties = {
8
+ entities: {
9
+ type: "array",
10
+ items: { type: :string },
11
+ description: "Entities for which to find associations"
12
+ },
13
+ }
14
+ else
15
+ properties = {
16
+ entities: {
17
+ type: "array",
18
+ items: { type: :string },
19
+ description: "Source entities in the association, or target entities if 'reverse' is 'true'"
20
+ },
21
+ reverse: {
22
+ type: "boolean",
23
+ description: "Look for targets instead of sources, defaults to 'false'"
24
+ }
25
+ }
26
+ end
27
+
28
+ if database_description and not database_description.strip.empty?
29
+ description = <<-EOF
30
+ Find associations for a list of entities in database #{database}: #{database_description}
31
+ EOF
32
+ else
33
+ description = <<-EOF
34
+ Find associations for a list of entities in database #{database}.
35
+ EOF
36
+ end
37
+
38
+ if undirected
39
+ description += <<-EOF
40
+ Returns a list in the format entity~partner.
41
+ EOF
42
+ else
43
+ description += <<-EOF
44
+ Returns a list in the format source~target.
45
+ EOF
46
+ end
47
+
48
+ function = {
49
+ name: database,
50
+ description: description,
51
+ parameters: {
52
+ type: "object",
53
+ properties: properties,
54
+ required: ['entities']
55
+ }
56
+ }
57
+
58
+ IndiferentHash.setup function.merge(type: 'function', function: function)
59
+ end
60
+
61
+ def self.database_details_tool_definition(database, undirected, fields)
62
+
63
+ if undirected
64
+ properties = {
65
+ associations: {
66
+ type: "array",
67
+ items: { type: :string },
68
+ description: "Associations in the form of source~target or target~source"
69
+ },
70
+ fields: {
71
+ type: "string",
72
+ enum: select_options,
73
+ description: "Limit the response to these detail fields fields"
74
+ },
75
+ }
76
+ else
77
+ properties = {
78
+ associations: {
79
+ type: "array",
80
+ items: { type: :string },
81
+ description: "Associations in the form of source~target"
82
+ },
83
+ }
84
+ end
85
+
86
+ if fields.length > 1
87
+ description = <<-EOF
88
+ Return details of association as a dictionary object.
89
+ Each key is an association and the value is an array with the values of the different fields you asked for, or for all fields otherwise.
90
+ The fields are: #{fields * ', '}.
91
+ Multiple values may be present and use the charater ';' to separate them.
92
+ EOF
93
+ else
94
+ properties.delete(:fields)
95
+ description = <<-EOF
96
+ Return the #{field} of association.
97
+ Multiple values may be present and use the charater ';' to separate them.
98
+ EOF
99
+ end
100
+
101
+ function = {
102
+ name: database + '_association_details',
103
+ description: description,
104
+ parameters: {
105
+ type: "object",
106
+ properties: properties,
107
+ required: ['associations']
108
+ }
109
+ }
110
+
111
+ IndiferentHash.setup function.merge(type: 'function', function: function)
112
+ end
113
+
114
+
115
+ def self.knowledge_base_tool_definition(knowledge_base, databases = nil)
116
+ databases ||= knowledge_base.all_databases
117
+
118
+ databases.inject({}){|tool_definitions,database|
119
+ database_description = knowledge_base.description(database)
120
+ undirected = knowledge_base.undirected(database)
121
+ definition = self.database_tool_definition(database, undirected, database_description)
122
+ tool_definitions.merge(database => [knowledge_base, definition])
123
+ if (fields = knowledge_base.get_database(database).fields).any?
124
+ details_definition = self.database_details_tool_definition(database, undirected, fields)
125
+ tool_definitions.merge(database + '_association_details' => [knowledge_base, details_definition])
126
+ end
127
+ }
128
+ end
129
+
130
+ def self.call_knowledge_base(knowledge_base, database, parameters={})
131
+ if database.end_with?('_association_details')
132
+ database = database.sub('_association_details', '')
133
+ associations, fields = IndiferentHash.process_options parameters, :associations, :fields
134
+ index = knowledge_base.get_index(database)
135
+ if fields
136
+ field_pos = fields.collect{|f| index.identify_field f }
137
+ associations.each_with_object({}) do |a,hash|
138
+ values = index[a]
139
+ next if values.nil?
140
+ hash[a] = values.values_at *field_pos
141
+ end
142
+ else
143
+ associations.each_with_object({}) do |a,hash|
144
+ values = index[a]
145
+ next if values.nil?
146
+ hash[a] = values
147
+ end
148
+ end
149
+ else
150
+ entities, reverse = IndiferentHash.process_options parameters, :entities, :reverse
151
+ if reverse
152
+ knowledge_base.parents(database, entities)
153
+ else
154
+ knowledge_base.children(database, entities)
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,59 @@
1
+ require_relative '../utils'
2
+ require 'mcp_client'
3
+
4
+ module LLM
5
+ def self.mcp_tools(url, options = {})
6
+ if url == 'stdio'
7
+ client = MCPClient.create_client(mcp_server_configs: [options.merge(type: 'stdio')])
8
+ else
9
+ type = IndiferentHash.process_options options, :type,
10
+ type: (Open.remote?(url) ? :http : :stdio)
11
+
12
+ if url && Open.remote?(url)
13
+ token ||= LLM.get_url_config(:key, url, :mcp)
14
+ options[:headers] = { 'Authorization' => "Bearer #{token}" }
15
+ end
16
+
17
+ client = MCPClient.create_client(mcp_server_configs: [options.merge(type: 'http', url: url)])
18
+ end
19
+
20
+ tools = client.list_tools
21
+
22
+ tool_definitions = IndiferentHash.setup({})
23
+ tools.each do |tool|
24
+ name = tool.name
25
+ description = tool.description
26
+ schema = tool.schema
27
+
28
+ function = {
29
+ name: name,
30
+ description: description,
31
+ parameters: schema
32
+ }
33
+
34
+ definition = IndiferentHash.setup function.merge(type: 'function', function: function)
35
+ block = Proc.new do |name,params|
36
+ res = tool.server.call_tool(name, params)
37
+ if Hash === res && res['content']
38
+ res = res['content']
39
+ end
40
+
41
+ if Array === res and res.length == 1
42
+ res = res.first
43
+ end
44
+
45
+ if Hash === res && res['content']
46
+ res = res['content']
47
+ end
48
+
49
+ if Hash === res && res['text']
50
+ res = res['text']
51
+ end
52
+
53
+ res
54
+ end
55
+ tool_definitions[name] = [block, definition]
56
+ end
57
+ tool_definitions
58
+ end
59
+ end
@@ -0,0 +1,69 @@
1
+ require 'scout/workflow'
2
+ module LLM
3
+ def self.task_tool_definition(workflow, task_name, inputs = nil)
4
+ task_info = workflow.task_info(task_name)
5
+
6
+ inputs = inputs.collect{|i| i.to_sym } if inputs
7
+
8
+ properties = task_info[:inputs].inject({}) do |acc,input|
9
+ next acc if inputs and not inputs.include?(input)
10
+ type = task_info[:input_types][input]
11
+ description = task_info[:input_descriptions][input]
12
+
13
+ type = :string if type == :text
14
+ type = :string if type == :select
15
+ type = :string if type == :path
16
+ type = :number if type == :float
17
+
18
+ acc[input] = {
19
+ "type": type,
20
+ "description": description
21
+ }
22
+
23
+ if input_options = task_info[:input_options][input]
24
+ if select_options = input_options[:select_options]
25
+ select_options = select_options.values if Hash === select_options
26
+ acc[input]["enum"] = select_options
27
+ end
28
+ end
29
+
30
+ acc
31
+ end
32
+
33
+ required_inputs = task_info[:inputs].select do |input|
34
+ next if inputs and not inputs.include?(input.to_sym)
35
+ task_info[:input_options].include?(input) && task_info[:input_options][input][:required]
36
+ end
37
+
38
+ function = {
39
+ name: task_name,
40
+ description: task_info[:description],
41
+ parameters: {
42
+ type: "object",
43
+ properties: properties,
44
+ required: required_inputs
45
+ }
46
+ }
47
+
48
+ IndiferentHash.setup function.merge(type: 'function', function: function)
49
+ end
50
+
51
+ def self.workflow_tools(workflow, tasks = nil)
52
+ tasks = workflow.all_exports if tasks.nil?
53
+ tasks = workflow.all_tasks if tasks.empty?
54
+
55
+ tasks.inject({}){|tool_definitions,task_name|
56
+ definition = self.task_tool_definition(workflow, task_name)
57
+ tool_definitions.merge(task_name => [workflow, definition])
58
+ }
59
+ end
60
+
61
+ def self.call_workflow(workflow, task_name, parameters={})
62
+ jobname = parameters.delete :jobname
63
+ if workflow.exec_exports.include? task_name.to_sym
64
+ workflow.job(task_name, jobname, parameters).exec
65
+ else
66
+ workflow.job(task_name, jobname, parameters).run
67
+ end
68
+ end
69
+ end