RubyGems - ollama-client - Versions diffs - 0.2.7 → 1.1.0 - Mend

ollama-client 0.2.7 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

checksums.yaml +4 -4
data/.cursor/.gitignore +1 -0
data/.rubocop_todo.yml +66 -0
data/API_CONTRACT.md +166 -0
data/CHANGELOG.md +26 -40
data/CLAUDE.md +56 -0
data/README.md +223 -1383
data/SECURITY.md +17 -0
data/devagent_proper.rb +430 -0
data/docs/API_GAPS.md +143 -0
data/docs/AREAS_FOR_CONSIDERATION.md +3 -3
data/docs/CONSOLE_IMPROVEMENTS.md +1 -1
data/docs/GETTING_STARTED.md +25 -7
data/docs/INTEGRATION_TESTING.md +9 -9
data/docs/PRODUCTION_FIXES.md +1 -1
data/docs/QUICK_START.md +2 -2
data/examples/agent_loop.rb +120 -0
data/examples/failure_modes/invalid_json_repair.rb +42 -0
data/examples/production/rails_agent.rb +62 -0
data/examples/timeout_retry.rb +40 -0
data/exe/ollama-client +187 -110
data/lib/ollama/capabilities.rb +60 -0
data/lib/ollama/client/chat.rb +140 -0
data/lib/ollama/client/generate.rb +389 -0
data/lib/ollama/client/model_management.rb +206 -0
data/lib/ollama/client.rb +51 -804
data/lib/ollama/config.rb +51 -7
data/lib/ollama/embeddings.rb +25 -6
data/lib/ollama/errors.rb +3 -0
data/lib/ollama/options.rb +93 -15
data/lib/ollama/response.rb +89 -7
data/lib/ollama/version.rb +1 -1
data/lib/ollama_client.rb +0 -9
data/market.jpg +0 -0
data/print_capabilities.rb +20 -0
data/schema.json +1 -0
data/test_tool.rb +26 -0
metadata +26 -44
data/examples/README.md +0 -91
data/examples/basic_chat.rb +0 -33
data/examples/basic_generate.rb +0 -29
data/examples/mcp_executor.rb +0 -39
data/examples/mcp_http_executor.rb +0 -45
data/examples/tool_calling_parsing.rb +0 -59
data/examples/tool_dto_example.rb +0 -94
data/lib/ollama/agent/executor.rb +0 -258
data/lib/ollama/agent/messages.rb +0 -31
data/lib/ollama/agent/planner.rb +0 -52
data/lib/ollama/chat_session.rb +0 -101
data/lib/ollama/document_loader.rb +0 -163
data/lib/ollama/mcp/http_client.rb +0 -149
data/lib/ollama/mcp/stdio_client.rb +0 -146
data/lib/ollama/mcp/tools_bridge.rb +0 -72
data/lib/ollama/mcp.rb +0 -31
data/lib/ollama/personas.rb +0 -287
data/lib/ollama/streaming_observer.rb +0 -22
data/lib/ollama/tool/function/parameters/property.rb +0 -72
data/lib/ollama/tool/function/parameters.rb +0 -101
data/lib/ollama/tool/function.rb +0 -78
data/lib/ollama/tool.rb +0 -60

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 53bd3c1ff3323d004a7ba3549317ef49d3ce9698d41133a1d2150d0a5135fa80
-  data.tar.gz: 4c726b2a7fabf164c91cd51206266d250de836986d1a36726556cbefb47a6e8e
+  metadata.gz: 35dd7d6fe83f10045ed0ea1efa14f5ffc7547b8f132a5a504ad5fcbad40843ec
+  data.tar.gz: ca238782d4431c3c0beeee0615dca25699d479a66fe44c88509f8905803656a6
 SHA512:
-  metadata.gz: 1cdcc10b54d2fa318daefc2736d338715b65f03cacf94c7c9303c07ec745ea510e6de0ac73c0c668b0301481663989af1368d86727ebfa1252a43edab9d7d711
-  data.tar.gz: aead7b8ae703d436866cf796e91b102fbcc21ff47e6528c2edf62b13a1bac1f98f36d1146ba3e56a7fa0792bd0aff0880be65450c05b2352336cd20b39b3f9ab
+  metadata.gz: 51ece11f047b2ed872a0860b04d3e0cc2cfbbf443259deef5edddbf8511ac94318d8e7ea3ccfb6251945e1d38abe96fc31b366d6905c9ed45cc452530ad01b61
+  data.tar.gz: c5f8cf9f1296768c13740e4462abe3bbe0aaa48e936da720a061c55f6c9456362e32b7a1aa5fcaac09c630e0e4beac36b3990e6673367434defb234eb2895dc4

data/.cursor/.gitignore ADDED Viewed

	@@ -0,0 +1 @@
1	+ plans/

data/.rubocop_todo.yml ADDED Viewed

@@ -0,0 +1,66 @@
+# This configuration was generated by
+# `rubocop --auto-gen-config`
+# on 2026-02-22 07:05:15 UTC using RuboCop version 1.82.1.
+# The point is for the user to remove these configuration records
+# one by one as the offenses are removed from the code base.
+# Note that changes in the inspected code, or installation of new
+# versions of RuboCop, may require this file to be generated again.
+# Offense count: 1
+# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
+# AllowedMethods: refine
+Metrics/BlockLength:
+  Max: 26
+# Offense count: 0  (resolved via module extraction in lib/ollama/client/)
+# Metrics/ClassLength - no longer needed
+# Offense count: 1
+# Configuration parameters: CountComments, Max, CountAsOne, AllowedMethods, AllowedPatterns.
+Metrics/MethodLength:
+  Exclude:
+    - 'spec/**/*'
+    - 'examples/**/*'
+    - 'devagent_proper.rb'
+# Offense count: 1
+# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
+Metrics/ParameterLists:
+  Max: 6
+# Offense count: 1
+# This cop supports unsafe autocorrection (--autocorrect-all).
+# Configuration parameters: SkipBlocks, EnforcedStyle, OnlyStaticConstants.
+# SupportedStyles: described_class, explicit
+RSpec/DescribedClass:
+  Exclude:
+    - 'spec/ollama/errors_spec.rb'
+    - 'spec/integration/client_integration_spec.rb'
+# Offense count: 1
+# Configuration parameters: CustomTransform, IgnoreMethods, IgnoreMetadata, InflectorPath, EnforcedInflector.
+# SupportedInflectors: default, active_support
+RSpec/SpecFilePathFormat:
+  Exclude:
+    - 'spec/ollama/client_model_suggestions_spec.rb'
+    - 'spec/integration/client_integration_spec.rb'
+# Offense count: 4
+# Configuration parameters: AllowedConstants.
+Style/Documentation:
+  Exclude:
+    - 'examples/**/*'
+    - 'devagent_proper.rb'
+# Offense count: 2
+# This cop supports safe autocorrection (--autocorrect).
+Style/IfUnlessModifier:
+  Exclude:
+    - 'lib/ollama/schema_validator.rb'
+# Offense count: 2
+# This cop supports safe autocorrection (--autocorrect).
+# Configuration parameters: AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
+# URISchemes: http, https
+Layout/LineLength:
+  Max: 136

data/API_CONTRACT.md ADDED Viewed

@@ -0,0 +1,166 @@
+# API Contract — v1.0.0
+This document defines the **public API surface** of `ollama-client` v1.0.
+Everything listed here is guaranteed stable until `v2.0.0`.
+## Public Methods
+### `Ollama::Client`
+```ruby
+client = Ollama::Client.new(config: Ollama::Config.new)
+```
+#### Chat
+| Method | Signature | Returns |
+|---|---|---|
+| `chat` | `(messages:, model: nil, format: nil, tools: nil, stream: nil, think: nil, keep_alive: nil, options: nil, logprobs: nil, top_logprobs: nil, hooks: {})` | `Ollama::Response` |
+#### Generate
+| Method | Signature | Returns |
+|---|---|---|
+| `generate` | `(prompt:, schema: nil, model: nil, strict: config.strict_json, return_meta: false, system: nil, images: nil, think: nil, return_reasoning: false, keep_alive: nil, suffix: nil, raw: nil, options: nil, hooks: {})` | `String` (no schema) or `Hash` (with schema) |
+When `think: true` and `return_reasoning: true`, the return value is a `Hash` with:
+- `"reasoning"` — the extracted reasoning text (may be empty string)
+- `"final"` — either a `String` (no schema) or a `Hash` (when `schema:` is provided)
+#### Model Management
+| Method | Signature | Returns |
+|---|---|---|
+| `list_models` | `()` | `Array<Hash>` |
+| `list_model_names` | `()` | `Array<String>` |
+| `list_running` / `ps` | `()` | `Array<Hash>` |
+| `show_model` | `(model:, verbose: false)` | `Hash` |
+| `pull` | `(model_name)` | `true` |
+| `delete_model` | `(model:)` | `true` |
+| `copy_model` | `(source:, destination:)` | `true` |
+| `create_model` | `(model:, from:, system: nil, template: nil, license: nil, parameters: nil, messages: nil, quantize: nil, stream: false)` | `Hash` |
+| `push_model` | `(model:, insecure: false, stream: false)` | `Hash` |
+| `version` | `()` | `String` |
+| `embeddings` | _(attr_reader)_ | `Ollama::Embeddings` instance |
+### `Ollama::Embeddings`
+```ruby
+client.embeddings.embed(model: "nomic-embed-text:latest", input: "text")
+```
+| Method | Signature | Returns |
+|---|---|---|
+| `embed` | `(model:, input:, truncate: nil, dimensions: nil, keep_alive: nil, options: nil)` | `Array<Float>` (single) or `Array<Array<Float>>` (batch) |
+### `Ollama::Response`
+Returned by `chat`. Wraps the API response with accessor methods:
+| Method | Returns | Description |
+|---|---|---|
+| `message` | `Ollama::Response::Message` | Message wrapper |
+| `content` | `String` | Shorthand for `message.content` |
+| `done?` | `Boolean` | Whether generation finished |
+| `done_reason` | `String` | Why generation stopped (`"stop"`, etc.) |
+| `model` | `String` | Model name used |
+| `total_duration` | `Integer` | Total time (nanoseconds) |
+| `load_duration` | `Integer` | Model load time |
+| `prompt_eval_count` | `Integer` | Prompt token count |
+| `eval_count` | `Integer` | Response token count |
+| `logprobs` | `Array` | Log probabilities (when enabled) |
+#### `Ollama::Response::Message`
+| Method | Returns | Description |
+|---|---|---|
+| `content` | `String` | Message content |
+| `thinking` | `String` | Thinking output (when `think: true`) |
+| `role` | `String` | `"assistant"` |
+| `tool_calls` | `Array<ToolCall>` | Function calls |
+| `images` | `Array<String>` | Base64 images |
+### `Ollama::Options`
+Type-safe runtime options passed via `options:` parameter:
+```ruby
+Ollama::Options.new(temperature: 0.7, num_predict: 256)
+```
+Valid keys: `temperature`, `top_p`, `top_k`, `num_ctx`, `repeat_penalty`, `seed`, `num_predict`, `stop`, `tfs_z`, `mirostat`, `mirostat_tau`, `mirostat_eta`, `num_gpu`, `num_thread`, `num_keep`, `typical_p`, `presence_penalty`, `frequency_penalty`.
+### `Ollama::Config`
+All attributes are read/write via `attr_accessor`:
+| Attribute | Type | Default | Description |
+|---|---|---|---|
+| `base_url` | `String` | `"http://localhost:11434"` | Ollama server URL |
+| `api_key` | `String, nil` | `nil` | Optional Bearer token for Ollama Cloud (`https://ollama.com`) |
+| `model` | `String` | `"llama3.2:3b"` | Default model for generation |
+| `timeout` | `Integer` | `30` | HTTP read/open timeout in seconds |
+| `retries` | `Integer` | `2` | Max retry attempts |
+| `strict_json` | `Boolean` | `true` | Enable JSON validation + repair |
+| `temperature` | `Float` | `0.2` | Sampling temperature |
+| `top_p` | `Float` | `0.9` | Nucleus sampling |
+| `num_ctx` | `Integer` | `8192` | Context window size |
+| `on_response` | `Proc/nil` | `nil` | Global response callback |
+## Error Classes
+All errors inherit from `Ollama::Error < StandardError`.
+| Error | Raised When | Retryable? |
+|---|---|---|
+| `Ollama::Error` | Base class / connection failures | **No** — fast fail |
+| `Ollama::TimeoutError` | `Net::ReadTimeout` / `Net::OpenTimeout` | **Yes** — exponential backoff |
+| `Ollama::InvalidJSONError` | Response cannot be parsed as JSON | **Yes** — repair prompt retry |
+| `Ollama::SchemaViolationError` | Parsed JSON fails schema validation | **Yes** — repair prompt retry |
+| `Ollama::RetryExhaustedError` | All retry attempts exhausted | **No** — terminal |
+| `Ollama::HTTPError` | Non-200 HTTP response | Depends on status code |
+| `Ollama::NotFoundError` | HTTP 404 (model not found) | **Auto-handled** — triggers pull |
+| `Ollama::StreamError` | `{"error": "..."}` in NDJSON stream | **No** — immediate |
+## Recovery Behaviors (Guaranteed)
+| Scenario | Behavior |
+|---|---|
+| Model missing (404) | Auto-pull once → retry original request |
+| Timeout | Exponential backoff: `sleep(2 ** attempt)` |
+| Invalid JSON (strict mode) | Append repair prompt → retry |
+| Schema violation (strict mode) | Append repair prompt → retry |
+| Server unreachable (ECONNREFUSED) | Immediate `Ollama::Error` — no retries |
+| All retries exhausted | `Ollama::RetryExhaustedError` |
+| Streaming error | `Ollama::StreamError` with server message |
+## Streaming Hooks
+Passed via `hooks:` parameter on `generate` and `chat`:
+```ruby
+hooks: {
+  on_token:    ->(token) { ... },  # Called per token chunk
+  on_error:    ->(error) { ... },  # Called on stream error
+  on_complete: -> { ... }          # Called when stream finishes
+}
+```
+Hooks are **observer-only** — they cannot modify the response. Streaming is auto-enabled when any hook is present.
+## What Will NOT Change Before v2.0
+1. Method signatures listed above
+2. Error class hierarchy
+3. Default config values
+4. Recovery behaviors (auto-pull, backoff, repair)
+5. JSON schema validation via `json-schema` gem
+6. Observer-style hooks interface
+## What MAY Change (Minor Versions)
+- New optional keyword arguments on existing methods
+- New error subclasses (always inheriting from existing hierarchy)
+- Additional config attributes (always with backwards-compatible defaults)
+- Performance improvements to retry/backoff timing

data/CHANGELOG.md CHANGED Viewed

@@ -1,49 +1,35 @@
-## [Unreleased]
+# Changelog
-## [0.2.7] - 2026-02-04
+All notable changes to this project will be documented in this file.
-- Add MCP (Model Context Protocol) support for local and remote servers
-- Add `Ollama::MCP::StdioClient` for local MCP servers over stdio (e.g. `npx @modelcontextprotocol/server-filesystem`)
-- Add `Ollama::MCP::HttpClient` for remote MCP servers over HTTP (e.g. [gitmcp.io](https://gitmcp.io)/owner/repo)
-- Add `Ollama::MCP::ToolsBridge` to expose MCP tools to `Ollama::Agent::Executor` (`client:` or `stdio_client:`)
-- Add examples: `examples/mcp_executor.rb` (stdio), `examples/mcp_http_executor.rb` (URL)
-- Document MCP usage and GitMCP URL in README; fix RuboCop offenses in MCP code
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## [0.2.6] - 2026-01-26
+## [1.1.0] - 2026-03-17
-- Reorganize examples: move agent examples to separate repository, keep minimal client examples
-- Add comprehensive test coverage (increased from 65.66% to 79.59%)
-- Add test suite for `Ollama::DocumentLoader` (file loading, context building)
-- Add test suite for `Ollama::Embeddings` (API calls, error handling)
-- Add test suite for `Ollama::ChatSession` (session management)
-- Add test suite for tool classes (`Tool`, `Function`, `Parameters`, `Property`)
-- Rewrite testing documentation to focus on client-only testing (transport/protocol)
-- Add test checklist with specific test categories (G1-G3, C1-C3, A1-A2, F1-F3)
-- Update README with enhanced "What This Gem IS NOT" section
-- Fix RuboCop offenses and improve code quality
+### Added
+- Ollama Cloud support via `Ollama::Config#api_key` and HTTPS `base_url` (e.g. `https://ollama.com`).
+- `Ollama::Config#http_connection_options` to centralize Net::HTTP connection options (including SSL and timeouts).
+- `Ollama::Config#inspect` now redacts `api_key` while keeping other attributes visible.
-## [0.2.5] - 2026-01-22
+### Changed
+- Chat, generate, embeddings, and model management HTTP calls now share connection-option logic but keep existing behavior.
-- Add `Ollama::DocumentLoader` for loading files as context in queries
-- Enhance README with context provision methods and examples
-- Improve embeddings error handling and model usage guidance
-- Add comprehensive Ruby guide documentation
-- Update `generate()` method with enhanced functionality and usage examples
-- Improve error handling across client and embeddings modules
+## [1.0.0] - 2026-02-22
-## [0.2.3] - 2026-01-17
+### Changed
+- **Massive surface area reduction:** Removed `chat`, `chat_raw`, `call_chat_api`, `call_chat_api_raw`, and related endpoints.
+- **Architectural Shift:** Removed all chatbot UI logic (`ChatSession`, `Personas`), abstract Agent implementations (`Planner`, `Executor`), and `DocumentLoader` to enforce strict low-level determinism.
+- **API Contracts:** `Client#generate` now handles strict JSON schemas directly and implements resilient auto-recovery.
+- **Defaults:** Opinionated defaults out-of-the-box (`timeout: 30`, `retries: 2`, `strict_json: true`).
+- **Streaming Hooks:** Deprecated raw SSE streaming over `chat` in favor of safe observer callbacks (`on_token`, `on_error`, `on_complete`) on `generate`.
+- **Model Auto-Pulling:** If `generate` receives a 404 Model Not Found, it attempts to synchronously `/pull` the model once, and then automatically retries generation.
+- **JSON Repair Loop:** Provided `strict_json: true`, if a model hallucinates malformed JSON formatting (like wrapping in markdown code blocks), the client automatically loops a retry with a CRITICAL repair prompt to seamlessly fix the output.
+- **Backoff:** Encountering a `Net::ReadTimeout` now triggers an exponential backoff sleep (`2 ** attempt`) between retries rather than immediately re-hammering the server.
-- Add per-call `model:` override for `Ollama::Client#generate`.
-- Document `generate` model override usage in README.
-- Add spec to cover per-call `model:` in 404 error path.
+### Security
+- **Strict Error Boundaries:** Malformed payloads can no longer leak into application state due to strict `SchemaViolationError` bounding.
+- **Fast-fail Networking:** Encountering `Errno::ECONNREFUSED` fast-fails immediately.
-## [0.2.0] - 2026-01-12
-- Add `Ollama::Agent::Planner` (stateless `/api/generate`)
-- Add `Ollama::Agent::Executor` (stateful `/api/chat` tool loop)
-- Add `Ollama::StreamingObserver` + disciplined streaming support (Executor only)
-- Add `Ollama::Client#chat_raw` (full response body, supports tool calls)
-## [0.1.0] - 2026-01-04
-- Initial release
+### Rationale
+Version `1.0.0` repositions `ollama-client` away from a bloated general-purpose wrapper toward a production-safe, failure-aware adapter intentionally crafted for Headless Rails Jobs and Agent Systems. By severing chat tools and abstractions, the gem commits to a strictly deterministic API that doesn't collapse under back-pressure, missing models, or temporary JSON formatting hallucinations.

data/CLAUDE.md ADDED Viewed

@@ -0,0 +1,56 @@
+# ollama-client
+Ruby gem — Ollama HTTP client for agent-grade usage. Provides `chat`, `generate`, embeddings, and full model management. Stable public API defined in `API_CONTRACT.md`.
+## Stack
+- Ruby gem (no Rails)
+- Zeitwerk autoloader
+- RSpec + WebMock + Timecop + SimpleCov
+- RuboCop
+## Commands
+```bash
+bundle exec rspec
+COVERAGE=true bundle exec rspec
+bundle exec rubocop
+bundle exec rake
+```
+## Architecture
+```
+lib/ollama/
+  client.rb             # Top-level entry point
+  client/
+    chat.rb             # chat() method
+    generate.rb         # generate() method
+    model_management.rb # list, pull, delete, copy, create, push, version
+  config.rb             # Ollama::Config (base_url, model, timeout, retries, strict_json, etc.)
+  response.rb           # Ollama::Response wrapper
+  embeddings.rb         # client.embeddings.embed()
+  options.rb            # Model options (temperature, top_p, num_ctx)
+  dto.rb                # Data transfer objects
+  schema_validator.rb   # JSON schema validation for structured output
+  schemas/              # Built-in JSON schemas
+  capabilities.rb       # Model capability detection
+  errors.rb             # Error hierarchy
+  version.rb
+```
+## Public API (stable — see API_CONTRACT.md)
+- `client.chat(messages:, model:, tools:, stream:, think:, ...)` → `Ollama::Response`
+- `client.generate(prompt:, schema:, model:, strict:, ...)` → `String` or `Hash`
+- `client.embeddings.embed(model:, input:)` → `Array<Float>`
+- Model management: `list_models`, `pull`, `delete_model`, `copy_model`, `create_model`, `push_model`, `version`
+## Key rules
+- **Never break the public API** — changes to method signatures require a version bump and API_CONTRACT.md update
+- All HTTP calls must be mockable with WebMock — never require live Ollama in tests
+- `Ollama::Config` defaults to `localhost:11434` — config is per-client, not global (thread-safe)
+- `generate` with `schema:` returns a parsed Hash; without `schema:` returns raw String — never mix
+- `strict_json: true` (default) — do not disable in production code
+- Thread safety: per-client config is safe; modifying global config while clients are active is not