ollama-client 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/README.md +336 -91
- data/RELEASE_NOTES_v0.2.6.md +41 -0
- data/docs/AREAS_FOR_CONSIDERATION.md +325 -0
- data/docs/EXAMPLE_REORGANIZATION.md +412 -0
- data/docs/FEATURES_ADDED.md +12 -1
- data/docs/GETTING_STARTED.md +361 -0
- data/docs/INTEGRATION_TESTING.md +170 -0
- data/docs/NEXT_STEPS_SUMMARY.md +114 -0
- data/docs/PERSONAS.md +383 -0
- data/docs/QUICK_START.md +195 -0
- data/docs/TESTING.md +392 -170
- data/docs/TEST_CHECKLIST.md +450 -0
- data/examples/README.md +62 -63
- data/examples/basic_chat.rb +33 -0
- data/examples/basic_generate.rb +29 -0
- data/examples/mcp_executor.rb +39 -0
- data/examples/mcp_http_executor.rb +45 -0
- data/examples/tool_calling_parsing.rb +59 -0
- data/examples/tool_dto_example.rb +0 -0
- data/exe/ollama-client +128 -1
- data/lib/ollama/agent/planner.rb +7 -2
- data/lib/ollama/chat_session.rb +101 -0
- data/lib/ollama/client.rb +41 -35
- data/lib/ollama/config.rb +9 -4
- data/lib/ollama/document_loader.rb +1 -1
- data/lib/ollama/embeddings.rb +61 -28
- data/lib/ollama/errors.rb +1 -0
- data/lib/ollama/mcp/http_client.rb +149 -0
- data/lib/ollama/mcp/stdio_client.rb +146 -0
- data/lib/ollama/mcp/tools_bridge.rb +72 -0
- data/lib/ollama/mcp.rb +31 -0
- data/lib/ollama/options.rb +3 -1
- data/lib/ollama/personas.rb +287 -0
- data/lib/ollama/version.rb +1 -1
- data/lib/ollama_client.rb +17 -5
- metadata +22 -48
- data/examples/advanced_complex_schemas.rb +0 -366
- data/examples/advanced_edge_cases.rb +0 -241
- data/examples/advanced_error_handling.rb +0 -200
- data/examples/advanced_multi_step_agent.rb +0 -341
- data/examples/advanced_performance_testing.rb +0 -186
- data/examples/chat_console.rb +0 -143
- data/examples/complete_workflow.rb +0 -245
- data/examples/dhan_console.rb +0 -843
- data/examples/dhanhq/README.md +0 -236
- data/examples/dhanhq/agents/base_agent.rb +0 -74
- data/examples/dhanhq/agents/data_agent.rb +0 -66
- data/examples/dhanhq/agents/orchestrator_agent.rb +0 -120
- data/examples/dhanhq/agents/technical_analysis_agent.rb +0 -252
- data/examples/dhanhq/agents/trading_agent.rb +0 -81
- data/examples/dhanhq/analysis/market_structure.rb +0 -138
- data/examples/dhanhq/analysis/pattern_recognizer.rb +0 -192
- data/examples/dhanhq/analysis/trend_analyzer.rb +0 -88
- data/examples/dhanhq/builders/market_context_builder.rb +0 -67
- data/examples/dhanhq/dhanhq_agent.rb +0 -829
- data/examples/dhanhq/indicators/technical_indicators.rb +0 -158
- data/examples/dhanhq/scanners/intraday_options_scanner.rb +0 -492
- data/examples/dhanhq/scanners/swing_scanner.rb +0 -247
- data/examples/dhanhq/schemas/agent_schemas.rb +0 -61
- data/examples/dhanhq/services/base_service.rb +0 -46
- data/examples/dhanhq/services/data_service.rb +0 -118
- data/examples/dhanhq/services/trading_service.rb +0 -59
- data/examples/dhanhq/technical_analysis_agentic_runner.rb +0 -411
- data/examples/dhanhq/technical_analysis_runner.rb +0 -420
- data/examples/dhanhq/test_tool_calling.rb +0 -538
- data/examples/dhanhq/test_tool_calling_verbose.rb +0 -251
- data/examples/dhanhq/utils/instrument_helper.rb +0 -32
- data/examples/dhanhq/utils/parameter_cleaner.rb +0 -28
- data/examples/dhanhq/utils/parameter_normalizer.rb +0 -45
- data/examples/dhanhq/utils/rate_limiter.rb +0 -23
- data/examples/dhanhq/utils/trading_parameter_normalizer.rb +0 -72
- data/examples/dhanhq_agent.rb +0 -964
- data/examples/dhanhq_tools.rb +0 -1663
- data/examples/multi_step_agent_with_external_data.rb +0 -368
- data/examples/structured_outputs_chat.rb +0 -72
- data/examples/structured_tools.rb +0 -89
- data/examples/test_dhanhq_tool_calling.rb +0 -375
- data/examples/test_tool_calling.rb +0 -160
- data/examples/tool_calling_direct.rb +0 -124
- data/examples/tool_calling_pattern.rb +0 -269
- data/exe/dhan_console +0 -4
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Example: Use a local MCP server's tools with Ollama::Agent::Executor.
|
|
4
|
+
#
|
|
5
|
+
# Prerequisites:
|
|
6
|
+
# - Ollama running (localhost:11434)
|
|
7
|
+
# - Node.js/npx (for @modelcontextprotocol/server-filesystem)
|
|
8
|
+
#
|
|
9
|
+
# Run:
|
|
10
|
+
# ruby examples/mcp_executor.rb
|
|
11
|
+
#
|
|
12
|
+
# This connects to the MCP filesystem server, fetches its tools, and runs
|
|
13
|
+
# the Executor so the LLM can call those tools (e.g. list directory, read file).
|
|
14
|
+
|
|
15
|
+
require_relative "../lib/ollama_client"
|
|
16
|
+
|
|
17
|
+
ollama = Ollama::Client.new
|
|
18
|
+
|
|
19
|
+
# Local MCP server via stdio; allow /tmp and the project directory
|
|
20
|
+
project_root = File.expand_path("..", __dir__)
|
|
21
|
+
mcp_client = Ollama::MCP::StdioClient.new(
|
|
22
|
+
command: "npx",
|
|
23
|
+
args: ["-y", "@modelcontextprotocol/server-filesystem", "/tmp", project_root],
|
|
24
|
+
timeout_seconds: 60
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
bridge = Ollama::MCP::ToolsBridge.new(stdio_client: mcp_client)
|
|
28
|
+
tools = bridge.tools_for_executor
|
|
29
|
+
|
|
30
|
+
executor = Ollama::Agent::Executor.new(ollama, tools: tools)
|
|
31
|
+
|
|
32
|
+
answer = executor.run(
|
|
33
|
+
system: "You have access to filesystem tools. Use them when the user asks about files or directories.",
|
|
34
|
+
user: "What files are in ~/project/ollama-client? List a few."
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
puts answer
|
|
38
|
+
|
|
39
|
+
mcp_client.close
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Example: Use a remote MCP server (HTTP URL) with Ollama::Agent::Executor.
|
|
4
|
+
#
|
|
5
|
+
# Works with GitMCP and any MCP-over-HTTP endpoint:
|
|
6
|
+
# https://gitmcp.io/owner/repo → MCP server for that GitHub repo
|
|
7
|
+
#
|
|
8
|
+
# Prerequisites:
|
|
9
|
+
# - Ollama running (localhost:11434)
|
|
10
|
+
# - Network access to the MCP URL
|
|
11
|
+
#
|
|
12
|
+
# Run:
|
|
13
|
+
# ruby examples/mcp_http_executor.rb
|
|
14
|
+
#
|
|
15
|
+
# To add this MCP to Cursor, use ~/.cursor/mcp.json:
|
|
16
|
+
# {
|
|
17
|
+
# "mcpServers": {
|
|
18
|
+
# "agent-runtime Docs": {
|
|
19
|
+
# "url": "https://gitmcp.io/shubhamtaywade82/agent-runtime"
|
|
20
|
+
# }
|
|
21
|
+
# }
|
|
22
|
+
# }
|
|
23
|
+
|
|
24
|
+
require "ollama_client"
|
|
25
|
+
|
|
26
|
+
client = Ollama::Client.new
|
|
27
|
+
|
|
28
|
+
mcp_client = Ollama::MCP::HttpClient.new(
|
|
29
|
+
url: "https://gitmcp.io/shubhamtaywade82/agent-runtime",
|
|
30
|
+
timeout_seconds: 60
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
bridge = Ollama::MCP::ToolsBridge.new(client: mcp_client)
|
|
34
|
+
tools = bridge.tools_for_executor
|
|
35
|
+
|
|
36
|
+
executor = Ollama::Agent::Executor.new(client, tools: tools)
|
|
37
|
+
|
|
38
|
+
answer = executor.run(
|
|
39
|
+
system: "You have access to the agent-runtime repository docs. Use tools when the user asks about the repo.",
|
|
40
|
+
user: "What does this repository do? Summarize briefly."
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
puts answer
|
|
44
|
+
|
|
45
|
+
mcp_client.close
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Example: Tool-call parsing (no execution)
|
|
5
|
+
# Demonstrates client transport layer - tool-call detection and extraction
|
|
6
|
+
# NOTE: This example does NOT execute tools. It only parses tool calls from the LLM response.
|
|
7
|
+
|
|
8
|
+
require "json"
|
|
9
|
+
require_relative "../lib/ollama_client"
|
|
10
|
+
|
|
11
|
+
client = Ollama::Client.new
|
|
12
|
+
|
|
13
|
+
# Define tool using Tool classes
|
|
14
|
+
tool = Ollama::Tool.new(
|
|
15
|
+
type: "function",
|
|
16
|
+
function: Ollama::Tool::Function.new(
|
|
17
|
+
name: "get_weather",
|
|
18
|
+
description: "Get weather for a location",
|
|
19
|
+
parameters: Ollama::Tool::Function::Parameters.new(
|
|
20
|
+
type: "object",
|
|
21
|
+
properties: {
|
|
22
|
+
location: Ollama::Tool::Function::Parameters::Property.new(
|
|
23
|
+
type: "string",
|
|
24
|
+
description: "The city name"
|
|
25
|
+
)
|
|
26
|
+
},
|
|
27
|
+
required: %w[location]
|
|
28
|
+
)
|
|
29
|
+
)
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Request tool call from LLM
|
|
33
|
+
response = client.chat_raw(
|
|
34
|
+
messages: [{ role: "user", content: "What's the weather in Paris?" }],
|
|
35
|
+
tools: tool,
|
|
36
|
+
allow_chat: true
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Parse tool calls (but do NOT execute)
|
|
40
|
+
tool_calls = response.message&.tool_calls
|
|
41
|
+
|
|
42
|
+
if tool_calls && !tool_calls.empty?
|
|
43
|
+
puts "Tool calls detected:"
|
|
44
|
+
tool_calls.each do |call|
|
|
45
|
+
# Access via method (if available)
|
|
46
|
+
name = call.respond_to?(:name) ? call.name : call["function"]["name"]
|
|
47
|
+
args = call.respond_to?(:arguments) ? call.arguments : JSON.parse(call["function"]["arguments"])
|
|
48
|
+
|
|
49
|
+
puts " Tool: #{name}"
|
|
50
|
+
puts " Arguments: #{args.inspect}"
|
|
51
|
+
puts " (Tool execution would happen here in your agent code)"
|
|
52
|
+
end
|
|
53
|
+
else
|
|
54
|
+
puts "No tool calls in response"
|
|
55
|
+
puts "Response: #{response.message&.content}"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Alternative: Access via hash
|
|
59
|
+
# tool_calls = response.to_h.dig("message", "tool_calls")
|
|
File without changes
|
data/exe/ollama-client
CHANGED
|
@@ -1,4 +1,131 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
# frozen_string_literal: true
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
# Chat console using ChatSession with TTY reader for input only
|
|
5
|
+
# Note: dotenv is automatically loaded by lib/ollama_client.rb
|
|
6
|
+
require_relative "../lib/ollama_client"
|
|
7
|
+
require "tty-reader"
|
|
8
|
+
|
|
9
|
+
# Build config from environment or defaults
|
|
10
|
+
config = Ollama::Config.new
|
|
11
|
+
config.base_url = ENV["OLLAMA_BASE_URL"] if ENV["OLLAMA_BASE_URL"]
|
|
12
|
+
config.model = ENV["OLLAMA_MODEL"] || config.model
|
|
13
|
+
config.temperature = ENV["OLLAMA_TEMPERATURE"].to_f if ENV["OLLAMA_TEMPERATURE"]
|
|
14
|
+
|
|
15
|
+
# Enable chat + streaming (required for ChatSession)
|
|
16
|
+
config.allow_chat = true
|
|
17
|
+
config.streaming_enabled = true
|
|
18
|
+
|
|
19
|
+
client = Ollama::Client.new(config: config)
|
|
20
|
+
|
|
21
|
+
# Create streaming observer for real-time token display
|
|
22
|
+
observer = Ollama::StreamingObserver.new do |event|
|
|
23
|
+
case event.type
|
|
24
|
+
when :token
|
|
25
|
+
print event.text
|
|
26
|
+
$stdout.flush
|
|
27
|
+
when :tool_call_detected
|
|
28
|
+
puts "\n[Tool call: #{event.name}]"
|
|
29
|
+
when :final
|
|
30
|
+
puts "\n"
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Create chat session with optional system message from env
|
|
35
|
+
system_prompt = ENV.fetch("OLLAMA_SYSTEM", "You are a helpful assistant.")
|
|
36
|
+
chat = Ollama::ChatSession.new(client, system: system_prompt, stream: observer)
|
|
37
|
+
|
|
38
|
+
# Setup TTY reader for input with history
|
|
39
|
+
def build_reader
|
|
40
|
+
TTY::Reader.new
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def read_input(reader)
|
|
44
|
+
# Pass prompt directly to read_line - this is how TTY::Reader is designed to work
|
|
45
|
+
reader.read_line("You: ")
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
HISTORY_PATH = File.expand_path("~/.ollama_chat_history")
|
|
49
|
+
|
|
50
|
+
def load_history(reader, path)
|
|
51
|
+
return unless File.exist?(path)
|
|
52
|
+
|
|
53
|
+
File.readlines(path, chomp: true).reverse_each do |line|
|
|
54
|
+
reader.add_to_history(line) unless line.strip.empty?
|
|
55
|
+
end
|
|
56
|
+
rescue StandardError
|
|
57
|
+
# Ignore history loading errors
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def save_history(path, text)
|
|
61
|
+
return if text.strip.empty?
|
|
62
|
+
|
|
63
|
+
history = []
|
|
64
|
+
history = File.readlines(path, chomp: true) if File.exist?(path)
|
|
65
|
+
history.delete(text)
|
|
66
|
+
history.unshift(text)
|
|
67
|
+
history = history.first(200) # Limit history size
|
|
68
|
+
|
|
69
|
+
File.write(path, "#{history.join("\n")}\n")
|
|
70
|
+
rescue StandardError
|
|
71
|
+
# Ignore history saving errors
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Print simple banner
|
|
75
|
+
puts "Ollama Chat Console"
|
|
76
|
+
puts "Model: #{config.model}"
|
|
77
|
+
puts "Base URL: #{config.base_url}"
|
|
78
|
+
puts "Type 'quit' or 'exit' to exit, 'clear' to reset history."
|
|
79
|
+
puts
|
|
80
|
+
|
|
81
|
+
# Setup reader and load history
|
|
82
|
+
reader = build_reader
|
|
83
|
+
load_history(reader, HISTORY_PATH)
|
|
84
|
+
|
|
85
|
+
# Main loop
|
|
86
|
+
begin
|
|
87
|
+
loop do
|
|
88
|
+
# Use TTY reader with prompt (supports history, arrow keys, editing)
|
|
89
|
+
input = read_input(reader)
|
|
90
|
+
|
|
91
|
+
break if input.nil?
|
|
92
|
+
|
|
93
|
+
text = input.strip
|
|
94
|
+
next if text.empty?
|
|
95
|
+
|
|
96
|
+
# Handle commands
|
|
97
|
+
case text.downcase
|
|
98
|
+
when "quit", "exit", "/quit", "/exit"
|
|
99
|
+
puts "\nGoodbye!\n"
|
|
100
|
+
break
|
|
101
|
+
when "clear", "/clear"
|
|
102
|
+
chat.clear
|
|
103
|
+
puts "Conversation history cleared.\n\n"
|
|
104
|
+
next
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Save to history
|
|
108
|
+
save_history(HISTORY_PATH, text)
|
|
109
|
+
|
|
110
|
+
# Assistant response
|
|
111
|
+
print "Assistant: "
|
|
112
|
+
|
|
113
|
+
begin
|
|
114
|
+
response = chat.say(text)
|
|
115
|
+
# Ensure newline after streaming
|
|
116
|
+
puts "" if response.empty?
|
|
117
|
+
rescue Ollama::ChatNotAllowedError => e
|
|
118
|
+
puts "\n❌ Error: #{e.message}"
|
|
119
|
+
puts "Make sure config.allow_chat = true"
|
|
120
|
+
rescue Ollama::Error => e
|
|
121
|
+
puts "\n❌ Error: #{e.message}"
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
puts "" # Blank line between exchanges
|
|
125
|
+
end
|
|
126
|
+
rescue Interrupt
|
|
127
|
+
puts "\n\nInterrupted. Goodbye!\n"
|
|
128
|
+
rescue StandardError => e
|
|
129
|
+
puts "\nUnexpected error: #{e.message}\n"
|
|
130
|
+
puts "#{e.backtrace.first}\n" if ENV["DEBUG"]
|
|
131
|
+
end
|
data/lib/ollama/agent/planner.rb
CHANGED
|
@@ -21,17 +21,22 @@ module Ollama
|
|
|
21
21
|
]
|
|
22
22
|
}.freeze
|
|
23
23
|
|
|
24
|
-
def initialize(client)
|
|
24
|
+
def initialize(client, system_prompt: nil)
|
|
25
25
|
@client = client
|
|
26
|
+
@system_prompt = system_prompt
|
|
26
27
|
end
|
|
27
28
|
|
|
28
29
|
# @param prompt [String]
|
|
29
30
|
# @param context [Hash, nil]
|
|
30
31
|
# @param schema [Hash, nil]
|
|
32
|
+
# @param system_prompt [String, nil] Optional system prompt override for this call
|
|
31
33
|
# @return [Object] Parsed JSON (Hash/Array/String/Number/Boolean/Nil)
|
|
32
|
-
def run(prompt:, context: nil, schema: nil)
|
|
34
|
+
def run(prompt:, context: nil, schema: nil, system_prompt: nil)
|
|
35
|
+
effective_system = system_prompt || @system_prompt
|
|
33
36
|
full_prompt = prompt.to_s
|
|
34
37
|
|
|
38
|
+
full_prompt = "#{effective_system}\n\n#{full_prompt}" if effective_system && !effective_system.empty?
|
|
39
|
+
|
|
35
40
|
if context && !context.empty?
|
|
36
41
|
full_prompt = "#{full_prompt}\n\nContext (JSON):\n#{JSON.pretty_generate(context)}"
|
|
37
42
|
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "streaming_observer"
|
|
4
|
+
require_relative "agent/messages"
|
|
5
|
+
|
|
6
|
+
module Ollama
|
|
7
|
+
# Stateful chat session for human-facing interactions.
|
|
8
|
+
#
|
|
9
|
+
# Chat sessions maintain conversation history and support streaming
|
|
10
|
+
# for presentation purposes. They are isolated from agent internals
|
|
11
|
+
# to preserve deterministic behavior in schema-first workflows.
|
|
12
|
+
#
|
|
13
|
+
# Example:
|
|
14
|
+
# client = Ollama::Client.new(config: Ollama::Config.new.tap { |c| c.allow_chat = true })
|
|
15
|
+
# observer = Ollama::StreamingObserver.new do |event|
|
|
16
|
+
# print event.text if event.type == :token
|
|
17
|
+
# end
|
|
18
|
+
# chat = Ollama::ChatSession.new(client, system: "You are helpful", stream: observer)
|
|
19
|
+
# chat.say("Hello")
|
|
20
|
+
# chat.say("Explain Ruby blocks")
|
|
21
|
+
class ChatSession
|
|
22
|
+
attr_reader :messages
|
|
23
|
+
|
|
24
|
+
def initialize(client, system: nil, stream: nil)
|
|
25
|
+
@client = client
|
|
26
|
+
@messages = []
|
|
27
|
+
@stream = stream
|
|
28
|
+
@messages << Agent::Messages.system(system) if system
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Send a user message and get assistant response.
|
|
32
|
+
#
|
|
33
|
+
# @param text [String] User message text
|
|
34
|
+
# @param model [String, nil] Model override (uses client config if nil)
|
|
35
|
+
# @param format [Hash, nil] Optional JSON schema for formatting (best-effort, not guaranteed)
|
|
36
|
+
# @param tools [Tool, Array<Tool>, Array<Hash>, nil] Optional tool definitions
|
|
37
|
+
# @param options [Hash] Additional options (temperature, top_p, etc.)
|
|
38
|
+
# @return [String] Assistant response content
|
|
39
|
+
def say(text, model: nil, format: nil, tools: nil, options: {})
|
|
40
|
+
@messages << Agent::Messages.user(text)
|
|
41
|
+
|
|
42
|
+
response = if @stream
|
|
43
|
+
stream_response(model: model, format: format, tools: tools, options: options)
|
|
44
|
+
else
|
|
45
|
+
non_stream_response(model: model, format: format, tools: tools, options: options)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
content = response["message"]&.dig("content") || ""
|
|
49
|
+
tool_calls = response["message"]&.dig("tool_calls")
|
|
50
|
+
|
|
51
|
+
@messages << Agent::Messages.assistant(content, tool_calls: tool_calls) if content || tool_calls
|
|
52
|
+
|
|
53
|
+
content
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Clear conversation history (keeps system message if present).
|
|
57
|
+
def clear
|
|
58
|
+
system_msg = @messages.find { |m| m["role"] == "system" }
|
|
59
|
+
@messages = system_msg ? [system_msg] : []
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def stream_response(model:, format:, tools:, options:)
|
|
65
|
+
@client.chat_raw(
|
|
66
|
+
messages: @messages,
|
|
67
|
+
model: model,
|
|
68
|
+
format: format,
|
|
69
|
+
tools: tools,
|
|
70
|
+
options: options,
|
|
71
|
+
allow_chat: true,
|
|
72
|
+
stream: true
|
|
73
|
+
) do |chunk|
|
|
74
|
+
delta = chunk.dig("message", "content")
|
|
75
|
+
@stream.emit(:token, text: delta.to_s) if delta && !delta.to_s.empty?
|
|
76
|
+
|
|
77
|
+
calls = chunk.dig("message", "tool_calls")
|
|
78
|
+
if calls.is_a?(Array)
|
|
79
|
+
calls.each do |call|
|
|
80
|
+
name = call.dig("function", "name") || call["name"]
|
|
81
|
+
@stream.emit(:tool_call_detected, name: name, data: call) if name
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Emit final event when stream completes
|
|
86
|
+
@stream.emit(:final) if chunk["done"] == true
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def non_stream_response(model:, format:, tools:, options:)
|
|
91
|
+
@client.chat_raw(
|
|
92
|
+
messages: @messages,
|
|
93
|
+
model: model,
|
|
94
|
+
format: format,
|
|
95
|
+
tools: tools,
|
|
96
|
+
options: options,
|
|
97
|
+
allow_chat: true
|
|
98
|
+
)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
data/lib/ollama/client.rb
CHANGED
|
@@ -207,6 +207,8 @@ module Ollama
|
|
|
207
207
|
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/ParameterLists
|
|
208
208
|
|
|
209
209
|
def generate(prompt:, schema: nil, model: nil, strict: false, return_meta: false)
|
|
210
|
+
validate_generate_params!(prompt, schema)
|
|
211
|
+
|
|
210
212
|
attempts = 0
|
|
211
213
|
@current_schema = schema # Store for prompt enhancement
|
|
212
214
|
started_at = monotonic_time
|
|
@@ -227,39 +229,12 @@ module Ollama
|
|
|
227
229
|
}
|
|
228
230
|
)
|
|
229
231
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
return {
|
|
235
|
-
"data" => raw,
|
|
236
|
-
"meta" => {
|
|
237
|
-
"endpoint" => "/api/generate",
|
|
238
|
-
"model" => model || @config.model,
|
|
239
|
-
"attempts" => attempts,
|
|
240
|
-
"latency_ms" => elapsed_ms(started_at)
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
end
|
|
244
|
-
|
|
245
|
-
# Schema provided - parse and validate JSON
|
|
246
|
-
parsed = parse_json_response(raw)
|
|
247
|
-
|
|
248
|
-
# CRITICAL: If schema is provided, free-text output is forbidden
|
|
249
|
-
raise SchemaViolationError, "Empty or nil response when schema is required" if parsed.nil? || parsed.empty?
|
|
250
|
-
|
|
251
|
-
SchemaValidator.validate!(parsed, schema)
|
|
252
|
-
return parsed unless return_meta
|
|
253
|
-
|
|
254
|
-
{
|
|
255
|
-
"data" => parsed,
|
|
256
|
-
"meta" => {
|
|
257
|
-
"endpoint" => "/api/generate",
|
|
258
|
-
"model" => model || @config.model,
|
|
259
|
-
"attempts" => attempts,
|
|
260
|
-
"latency_ms" => elapsed_ms(started_at)
|
|
261
|
-
}
|
|
232
|
+
meta = {
|
|
233
|
+
model: model || @config.model,
|
|
234
|
+
attempts: attempts,
|
|
235
|
+
started_at: started_at
|
|
262
236
|
}
|
|
237
|
+
process_generate_response(raw: raw, schema: schema, meta: meta, return_meta: return_meta)
|
|
263
238
|
rescue NotFoundError => e
|
|
264
239
|
# 404 errors are never retried, but we can suggest models
|
|
265
240
|
enhanced_error = enhance_not_found_error(e)
|
|
@@ -364,12 +339,43 @@ module Ollama
|
|
|
364
339
|
|
|
365
340
|
private
|
|
366
341
|
|
|
342
|
+
def validate_generate_params!(prompt, _schema)
|
|
343
|
+
raise ArgumentError, "prompt is required" if prompt.nil?
|
|
344
|
+
# schema is optional - nil means plain text/markdown response
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
def process_generate_response(raw:, schema:, meta:, return_meta:)
|
|
348
|
+
response_data = schema ? parse_and_validate_schema_response(raw, schema) : raw
|
|
349
|
+
return response_data unless return_meta
|
|
350
|
+
|
|
351
|
+
{
|
|
352
|
+
"data" => response_data,
|
|
353
|
+
"meta" => {
|
|
354
|
+
"endpoint" => "/api/generate",
|
|
355
|
+
"model" => meta[:model],
|
|
356
|
+
"attempts" => meta[:attempts],
|
|
357
|
+
"latency_ms" => elapsed_ms(meta[:started_at])
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
def parse_and_validate_schema_response(raw, schema)
|
|
363
|
+
parsed = parse_json_response(raw)
|
|
364
|
+
raise SchemaViolationError, "Empty or nil response when schema is required" if parsed.nil? || parsed.empty?
|
|
365
|
+
|
|
366
|
+
SchemaValidator.validate!(parsed, schema)
|
|
367
|
+
parsed
|
|
368
|
+
end
|
|
369
|
+
|
|
367
370
|
def ensure_chat_allowed!(allow_chat:, strict:, method_name:)
|
|
368
|
-
return if allow_chat || strict
|
|
371
|
+
return if allow_chat || strict || @config.allow_chat
|
|
369
372
|
|
|
370
|
-
raise
|
|
373
|
+
raise ChatNotAllowedError,
|
|
371
374
|
"#{method_name}() is intentionally gated because it is easy to misuse inside agents. " \
|
|
372
|
-
"Prefer generate()
|
|
375
|
+
"Prefer generate() for deterministic, schema-first workflows. " \
|
|
376
|
+
"To use #{method_name}(), either: " \
|
|
377
|
+
"1) Pass allow_chat: true as a parameter, or " \
|
|
378
|
+
"2) Enable chat in config: config.allow_chat = true"
|
|
373
379
|
end
|
|
374
380
|
|
|
375
381
|
# Normalize tools to array of hashes for API
|
data/lib/ollama/config.rb
CHANGED
|
@@ -6,16 +6,19 @@ module Ollama
|
|
|
6
6
|
# Configuration class with safe defaults for agent-grade usage
|
|
7
7
|
#
|
|
8
8
|
# ⚠️ THREAD SAFETY WARNING:
|
|
9
|
-
# Global configuration
|
|
10
|
-
#
|
|
11
|
-
# configuration
|
|
9
|
+
# Global configuration access is mutex-protected, but modifying global config
|
|
10
|
+
# while clients are active can cause race conditions. For concurrent agents
|
|
11
|
+
# or multi-threaded applications, use per-client configuration (recommended):
|
|
12
12
|
#
|
|
13
13
|
# config = Ollama::Config.new
|
|
14
14
|
# config.model = "llama3.1"
|
|
15
15
|
# client = Ollama::Client.new(config: config)
|
|
16
16
|
#
|
|
17
|
+
# Each client instance with its own config is thread-safe.
|
|
18
|
+
#
|
|
17
19
|
class Config
|
|
18
|
-
attr_accessor :base_url, :model, :timeout, :retries, :temperature, :top_p, :num_ctx, :on_response
|
|
20
|
+
attr_accessor :base_url, :model, :timeout, :retries, :temperature, :top_p, :num_ctx, :on_response, :allow_chat,
|
|
21
|
+
:streaming_enabled
|
|
19
22
|
|
|
20
23
|
def initialize
|
|
21
24
|
@base_url = "http://localhost:11434"
|
|
@@ -26,6 +29,8 @@ module Ollama
|
|
|
26
29
|
@top_p = 0.9
|
|
27
30
|
@num_ctx = 8192
|
|
28
31
|
@on_response = nil
|
|
32
|
+
@allow_chat = false
|
|
33
|
+
@streaming_enabled = false
|
|
29
34
|
end
|
|
30
35
|
|
|
31
36
|
# Load configuration from JSON file (useful for production deployments)
|
data/lib/ollama/embeddings.rb
CHANGED
|
@@ -21,7 +21,8 @@ module Ollama
|
|
|
21
21
|
# @param input [String, Array<String>] Single text or array of texts
|
|
22
22
|
# @return [Array<Float>, Array<Array<Float>>] Embedding vector(s)
|
|
23
23
|
def embed(model:, input:)
|
|
24
|
-
|
|
24
|
+
# Use /api/embed (not /api/embeddings) - the working endpoint
|
|
25
|
+
uri = URI("#{@config.base_url}/api/embed")
|
|
25
26
|
req = Net::HTTP::Post.new(uri)
|
|
26
27
|
req["Content-Type"] = "application/json"
|
|
27
28
|
|
|
@@ -42,35 +43,12 @@ module Ollama
|
|
|
42
43
|
handle_http_error(res, requested_model: model) unless res.is_a?(Net::HTTPSuccess)
|
|
43
44
|
|
|
44
45
|
response_body = JSON.parse(res.body)
|
|
45
|
-
|
|
46
|
+
# /api/embed returns "embeddings" (plural) as array of arrays
|
|
47
|
+
embeddings = response_body["embeddings"] || response_body["embedding"]
|
|
46
48
|
|
|
47
|
-
|
|
48
|
-
raise Error,
|
|
49
|
-
"Embedding not found in response. Response keys: #{response_body.keys.join(", ")}. Full response: #{response_body.inspect[0..200]}"
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
if embedding.is_a?(Array) && embedding.empty?
|
|
53
|
-
error_msg = "Empty embedding returned. This usually means:\n"
|
|
54
|
-
error_msg += " 1. The model may not be properly loaded - try: ollama pull #{model}\n"
|
|
55
|
-
error_msg += " 2. The model may not support embeddings - verify it's an embedding model\n"
|
|
56
|
-
error_msg += " 3. Check if the model is working: curl http://localhost:11434/api/embeddings -d '{\"model\":\"#{model}\",\"input\":\"test\"}'\n"
|
|
57
|
-
error_msg += "Response: #{response_body.inspect[0..300]}"
|
|
58
|
-
raise Error, error_msg
|
|
59
|
-
end
|
|
49
|
+
validate_embedding_response!(embeddings, response_body, model)
|
|
60
50
|
|
|
61
|
-
|
|
62
|
-
if input.is_a?(Array)
|
|
63
|
-
# Ollama returns single embedding array even for multiple inputs
|
|
64
|
-
# We need to check the response structure
|
|
65
|
-
if embedding.is_a?(Array) && embedding.first.is_a?(Array)
|
|
66
|
-
embedding
|
|
67
|
-
else
|
|
68
|
-
# Single embedding returned, wrap it
|
|
69
|
-
[embedding]
|
|
70
|
-
end
|
|
71
|
-
else
|
|
72
|
-
embedding
|
|
73
|
-
end
|
|
51
|
+
format_embedding_result(embeddings, input)
|
|
74
52
|
rescue JSON::ParserError => e
|
|
75
53
|
raise InvalidJSONError, "Failed to parse embeddings response: #{e.message}"
|
|
76
54
|
rescue Net::ReadTimeout, Net::OpenTimeout
|
|
@@ -81,6 +59,61 @@ module Ollama
|
|
|
81
59
|
|
|
82
60
|
private
|
|
83
61
|
|
|
62
|
+
def validate_embedding_response!(embeddings, response_body, model)
|
|
63
|
+
if embeddings.nil?
|
|
64
|
+
keys = response_body.keys.join(", ")
|
|
65
|
+
response_preview = response_body.inspect[0..200]
|
|
66
|
+
raise Error, "Embeddings not found in response. Response keys: #{keys}. " \
|
|
67
|
+
"Full response: #{response_preview}"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Handle both formats: array of arrays [[...]] or single array [...]
|
|
71
|
+
# Check if it's empty or contains empty arrays
|
|
72
|
+
if embeddings.is_a?(Array) && (embeddings.empty? || (embeddings.first.is_a?(Array) && embeddings.first.empty?))
|
|
73
|
+
error_msg = build_empty_embedding_error_message(model, response_body)
|
|
74
|
+
raise Error, error_msg
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def build_empty_embedding_error_message(model, response_body)
|
|
81
|
+
curl_command = "curl -X POST http://localhost:11434/api/embed " \
|
|
82
|
+
"-d '{\"model\":\"#{model}\",\"input\":\"test\"}'"
|
|
83
|
+
response_preview = response_body.inspect[0..300]
|
|
84
|
+
|
|
85
|
+
# Check for error messages in response
|
|
86
|
+
error_hint = ""
|
|
87
|
+
if response_body.is_a?(Hash)
|
|
88
|
+
if response_body.key?("error")
|
|
89
|
+
error_hint = "\n Error from Ollama: #{response_body["error"]}"
|
|
90
|
+
elsif response_body.key?("message")
|
|
91
|
+
error_hint = "\n Message from Ollama: #{response_body["message"]}"
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
"Empty embedding returned. This usually means:\n " \
|
|
96
|
+
"1. The model may not be properly loaded - try: ollama pull #{model}\n " \
|
|
97
|
+
"2. The model file may be corrupted - try: ollama rm #{model} && ollama pull #{model}\n " \
|
|
98
|
+
"3. The model may not support embeddings - verify it's an embedding model\n " \
|
|
99
|
+
"4. Check if the model is working: #{curl_command}#{error_hint}\n" \
|
|
100
|
+
"Response: #{response_preview}"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def format_embedding_result(embeddings, input)
|
|
104
|
+
# /api/embed returns "embeddings" as array of arrays [[...]]
|
|
105
|
+
# For single input, it's [[...]], for multiple inputs it's [[...], [...], ...]
|
|
106
|
+
if embeddings.is_a?(Array) && embeddings.first.is_a?(Array)
|
|
107
|
+
# Already in correct format (array of arrays)
|
|
108
|
+
# For single input, return first embedding array
|
|
109
|
+
# For multiple inputs, return all embedding arrays
|
|
110
|
+
input.is_a?(Array) ? embeddings : embeddings.first
|
|
111
|
+
else
|
|
112
|
+
# Fallback: single array format (shouldn't happen with /api/embed)
|
|
113
|
+
input.is_a?(Array) ? [embeddings] : embeddings
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
84
117
|
def handle_http_error(res, requested_model: nil)
|
|
85
118
|
status_code = res.code.to_i
|
|
86
119
|
raise NotFoundError.new(res.message, requested_model: requested_model) if status_code == 404
|
data/lib/ollama/errors.rb
CHANGED