embedding_util 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 25117f1c8eb2df3a23d26b45a76df8f850af2e96a9dc7c5ecfc9f2820a8cd913
4
- data.tar.gz: a1d173a4933740d65cadbc6dad877cf605859029a0bd351c3123a7899dd49fe3
3
+ metadata.gz: 5042d545b7971013889232394972d9f444de89cc14f459c2b852b60049f08741
4
+ data.tar.gz: 305c3b11b11ae626ee14e04950052c02900c502ff6e477616e1641ee01aea0c1
5
5
  SHA512:
6
- metadata.gz: 5b61c1d0e518af3aa80acbb19db28314412ff24611681dc61ce1a8b2d1a989da0bbe181e73c488ac0ab409d2ab579b598e2ca41b23871667ed19faec59e6e9a1
7
- data.tar.gz: 9eb004c7a36be9a82638bd7883d865219f6f09dfe443d016bd73267c8124cac7f41d9f94960d86ea34d02828f7c186b49d5220a00808f0eec1195ff7c40769de
6
+ metadata.gz: 0d960182799ed19de3510589def9203bdb0b3caa3b0fd477eb46cd2b8077705c1e55304021a9c5473199316d1750047ef42cb03a43dbbde0a38fef13b8088c6b
7
+ data.tar.gz: 5b7438b8c8f163723728e16dfcbe90c8b46025d18f93831890a5f6d6b3484e554bd7bd8ad86468a3f5179a1809ec6731b2446dd1e929f2780a7aeb1bb035ba1d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [0.1.5] - 2026-06-10
2
+
3
+ - Add lean Ramalama defaults with bounded `--ctx-size 4096` and disabled llama.cpp prompt cache via `--cache-ram 0`
4
+ - Split self-hosted embedding arrays into smaller endpoint batches to keep local document indexing within bounded context
5
+ - Preserve embedding output order across self-hosted batches
6
+
1
7
  ## [0.1.4] - 2026-06-10
2
8
 
3
9
  - Stabilize managed Ramalama reranker startup, restart, and idle cleanup
@@ -7,6 +7,8 @@ require_relative "endpoint"
7
7
  module EmbeddingUtil
8
8
  module Providers
9
9
  class SelfHosted < Provider
10
+ EMBEDDING_BATCH_SIZE = 32
11
+
10
12
  def supported?
11
13
  ServerManager.supported?(config)
12
14
  end
@@ -25,7 +27,7 @@ module EmbeddingUtil
25
27
  manager = ServerManager.new(config: config)
26
28
  endpoint = manager.ensure_server(:embedding, profile: profile)
27
29
  manager.track_activity(:embedding, profile: profile) do
28
- endpoint_provider(embedding_endpoint: endpoint).embed(texts, profile: profile)
30
+ embed_batches(endpoint, texts, profile)
29
31
  end
30
32
  end
31
33
 
@@ -50,6 +52,21 @@ module EmbeddingUtil
50
52
  Endpoint.new(config: endpoint_config)
51
53
  end
52
54
 
55
+ def embed_batches(endpoint, texts, profile)
56
+ results = texts.each_slice(EMBEDDING_BATCH_SIZE).map do |batch|
57
+ endpoint_provider(embedding_endpoint: endpoint).embed(batch, profile: profile)
58
+ end
59
+ return results.fetch(0) if results.size == 1
60
+
61
+ EmbeddingResult.new(
62
+ embedding: results.flat_map(&:embedding),
63
+ model: results.fetch(0).model,
64
+ profile: profile.name,
65
+ provider: provider_name,
66
+ metadata: { batches: results.size }
67
+ )
68
+ end
69
+
53
70
  def rerank_with_activity(manager, endpoint, query, documents, profile)
54
71
  manager.track_activity(:reranker, profile: profile) do
55
72
  endpoint_provider(reranker_endpoint: endpoint).rerank(query, documents, profile: profile)
@@ -2,6 +2,9 @@
2
2
 
3
3
  module EmbeddingUtil
4
4
  class RuntimeCommand
5
+ RAMALAMA_CONTEXT_SIZE = "4096"
6
+ RAMALAMA_RUNTIME_FLAGS = ["--cache-ram", "0"].freeze
7
+
5
8
  attr_reader :runtime, :server_model, :host, :port, :server_flags, :ramalama_device
6
9
 
7
10
  def initialize(runtime:, server_model:, host:, port:, **options)
@@ -87,13 +90,18 @@ module EmbeddingUtil
87
90
  "ramalama", "--runtime=llama.cpp", "serve",
88
91
  "--name", server_name,
89
92
  *ramalama_device_args,
93
+ "--ctx-size", RAMALAMA_CONTEXT_SIZE,
90
94
  "--host", host,
91
95
  "--port", port.to_s,
92
- "--runtime-args=#{server_flags.join(' ')}",
96
+ "--runtime-args=#{ramalama_runtime_flags.join(' ')}",
93
97
  huggingface_model
94
98
  ]
95
99
  end
96
100
 
101
+ def ramalama_runtime_flags
102
+ server_flags + RAMALAMA_RUNTIME_FLAGS
103
+ end
104
+
97
105
  def llama_server_argv
98
106
  [
99
107
  "llama-server",
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EmbeddingUtil
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.5"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embedding_util
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - hmdne