RubyGems - embedding_util - Versions diffs - 0.1.4 → 0.1.5 - Mend

embedding_util 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/lib/embedding_util/providers/self_hosted.rb +18 -1
data/lib/embedding_util/runtime_command.rb +9 -1
data/lib/embedding_util/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 25117f1c8eb2df3a23d26b45a76df8f850af2e96a9dc7c5ecfc9f2820a8cd913
-  data.tar.gz: a1d173a4933740d65cadbc6dad877cf605859029a0bd351c3123a7899dd49fe3
+  metadata.gz: 5042d545b7971013889232394972d9f444de89cc14f459c2b852b60049f08741
+  data.tar.gz: 305c3b11b11ae626ee14e04950052c02900c502ff6e477616e1641ee01aea0c1
 SHA512:
-  metadata.gz: 5b61c1d0e518af3aa80acbb19db28314412ff24611681dc61ce1a8b2d1a989da0bbe181e73c488ac0ab409d2ab579b598e2ca41b23871667ed19faec59e6e9a1
-  data.tar.gz: 9eb004c7a36be9a82638bd7883d865219f6f09dfe443d016bd73267c8124cac7f41d9f94960d86ea34d02828f7c186b49d5220a00808f0eec1195ff7c40769de
+  metadata.gz: 0d960182799ed19de3510589def9203bdb0b3caa3b0fd477eb46cd2b8077705c1e55304021a9c5473199316d1750047ef42cb03a43dbbde0a38fef13b8088c6b
+  data.tar.gz: 5b7438b8c8f163723728e16dfcbe90c8b46025d18f93831890a5f6d6b3484e554bd7bd8ad86468a3f5179a1809ec6731b2446dd1e929f2780a7aeb1bb035ba1d

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,9 @@
+## [0.1.5] - 2026-06-10
+- Add lean Ramalama defaults with bounded `--ctx-size 4096` and disabled llama.cpp prompt cache via `--cache-ram 0`
+- Split self-hosted embedding arrays into smaller endpoint batches to keep local document indexing within bounded context
+- Preserve embedding output order across self-hosted batches
 ## [0.1.4] - 2026-06-10
 - Stabilize managed Ramalama reranker startup, restart, and idle cleanup

data/lib/embedding_util/providers/self_hosted.rb CHANGED Viewed

@@ -7,6 +7,8 @@ require_relative "endpoint"
 module EmbeddingUtil
   module Providers
     class SelfHosted < Provider
+      EMBEDDING_BATCH_SIZE = 32
       def supported?
         ServerManager.supported?(config)
       end
@@ -25,7 +27,7 @@ module EmbeddingUtil
         manager = ServerManager.new(config: config)
         endpoint = manager.ensure_server(:embedding, profile: profile)
         manager.track_activity(:embedding, profile: profile) do
-          endpoint_provider(embedding_endpoint: endpoint).embed(texts, profile: profile)
+          embed_batches(endpoint, texts, profile)
         end
       end
@@ -50,6 +52,21 @@ module EmbeddingUtil
         Endpoint.new(config: endpoint_config)
       end
+      def embed_batches(endpoint, texts, profile)
+        results = texts.each_slice(EMBEDDING_BATCH_SIZE).map do |batch|
+          endpoint_provider(embedding_endpoint: endpoint).embed(batch, profile: profile)
+        end
+        return results.fetch(0) if results.size == 1
+        EmbeddingResult.new(
+          embedding: results.flat_map(&:embedding),
+          model: results.fetch(0).model,
+          profile: profile.name,
+          provider: provider_name,
+          metadata: { batches: results.size }
+        )
+      end
       def rerank_with_activity(manager, endpoint, query, documents, profile)
         manager.track_activity(:reranker, profile: profile) do
           endpoint_provider(reranker_endpoint: endpoint).rerank(query, documents, profile: profile)

data/lib/embedding_util/runtime_command.rb CHANGED Viewed

@@ -2,6 +2,9 @@
 module EmbeddingUtil
   class RuntimeCommand
+    RAMALAMA_CONTEXT_SIZE = "4096"
+    RAMALAMA_RUNTIME_FLAGS = ["--cache-ram", "0"].freeze
     attr_reader :runtime, :server_model, :host, :port, :server_flags, :ramalama_device
     def initialize(runtime:, server_model:, host:, port:, **options)
@@ -87,13 +90,18 @@ module EmbeddingUtil
         "ramalama", "--runtime=llama.cpp", "serve",
         "--name", server_name,
         *ramalama_device_args,
+        "--ctx-size", RAMALAMA_CONTEXT_SIZE,
         "--host", host,
         "--port", port.to_s,
-        "--runtime-args=#{server_flags.join(' ')}",
+        "--runtime-args=#{ramalama_runtime_flags.join(' ')}",
         huggingface_model
       ]
     end
+    def ramalama_runtime_flags
+      server_flags + RAMALAMA_RUNTIME_FLAGS
+    end
     def llama_server_argv
       [
         "llama-server",

data/lib/embedding_util/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module EmbeddingUtil
-  VERSION = "0.1.4"
+  VERSION = "0.1.5"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: embedding_util
 version: !ruby/object:Gem::Version
-  version: 0.1.4
+  version: 0.1.5
 platform: ruby
 authors:
 - hmdne