liter_llm 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 87c0ce7287f3e000b825d496b84662586cb3d8899f11a9f7a8bac55587ddc26c
4
- data.tar.gz: 5099baea9360fb98d4949324773b9ef5b3dafc870150f1992f3887162b79994e
3
+ metadata.gz: 7c5ff27da16bef518f0774730ea20b7365795a36ab51504e8e141be7c8ee0702
4
+ data.tar.gz: 6418ff93511a905374dcdd40767821f50f333a058684ef8379bc21357d17a71c
5
5
  SHA512:
6
- metadata.gz: 9452263146c7206ebad3b3ca9b0bc163e251ad832b3b4b4b7352132356e59d29779f2741ebe095607a3cdf7a668ff3eb5cf7ae0b2e6593a043e1f92922761897
7
- data.tar.gz: 991f0752533062901b10252157ff0c4fa9e6e69bb69ee5e8218e2124ed70d01cc84c51a1a469ef2a167504b6ff369239712116ed80e1873fb0d662da8c9d3bdd
6
+ metadata.gz: ce2371e26960cb849413bf447cd50ec6e985a370d2d663ae23f1c6aa6f5b7d2a38f69c9268b857513d31cdb88cd1abcc6b766c40845aa4cdea34a434c2a9ab50
7
+ data.tar.gz: 556703afe772088cc18963f308c189ccf146e8f789fe154822e3edcbb97c692220529d36a3d37d9d0a4f5604942e2454a1ac5d8ab1faf182ae61091ac08ef5e4
data/README.md CHANGED
@@ -35,6 +35,9 @@
35
35
  <a href="https://github.com/kreuzberg-dev/liter-llm/pkgs/container/liter-llm">
36
36
  <img src="https://img.shields.io/badge/Docker-007ec6?logo=docker&logoColor=white" alt="Docker">
37
37
  </a>
38
+ <a href="https://github.com/kreuzberg-dev/homebrew-tap/blob/main/Formula/liter-llm.rb">
39
+ <img src="https://img.shields.io/badge/Homebrew-007ec6?logo=homebrew&logoColor=white" alt="Homebrew">
40
+ </a>
38
41
  <a href="https://github.com/kreuzberg-dev/liter-llm/tree/main/crates/liter-llm-ffi">
39
42
  <img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C FFI">
40
43
  </a>
@@ -63,7 +66,7 @@
63
66
  </div>
64
67
 
65
68
 
66
- Universal LLM API client for Ruby. Access 142+ LLM providers through a single interface with idiomatic Ruby API and native performance.
69
+ Universal LLM API client for Ruby. Access 143+ LLM providers through a single interface with idiomatic Ruby API and native performance.
67
70
 
68
71
 
69
72
  ## Installation
@@ -161,7 +164,7 @@ chunks.each { |chunk| puts chunk }
161
164
 
162
165
  ## Features
163
166
 
164
- ### Supported Providers (142+)
167
+ ### Supported Providers (143+)
165
168
 
166
169
  Route to any provider using the `provider/model` prefix convention:
167
170
 
@@ -181,7 +184,8 @@ Route to any provider using the `provider/model` prefix convention:
181
184
 
182
185
  ### Key Capabilities
183
186
 
184
- - **Provider Routing** -- Single client for 142+ LLM providers via `provider/model` prefix
187
+ - **Provider Routing** -- Single client for 143+ LLM providers via `provider/model` prefix
188
+ - **Local LLMs** — Connect to locally-hosted models via Ollama, LM Studio, vLLM, llama.cpp, and other local inference servers
185
189
  - **Unified API** -- Consistent `chat`, `chat_stream`, `embeddings`, `list_models` interface
186
190
 
187
191
  - **Streaming** -- Real-time token streaming via `chat_stream`
@@ -207,7 +211,7 @@ Built on a compiled Rust core for speed and safety:
207
211
 
208
212
  ## Provider Routing
209
213
 
210
- Route to 142+ providers using the `provider/model` prefix convention:
214
+ Route to 143+ providers using the `provider/model` prefix convention:
211
215
 
212
216
  ```text
213
217
  openai/gpt-4o
@@ -235,7 +239,7 @@ See the [proxy server documentation](https://docs.liter-llm.kreuzberg.dev/server
235
239
 
236
240
  - **[Documentation](https://docs.liter-llm.kreuzberg.dev)** -- Full docs and API reference
237
241
  - **[GitHub Repository](https://github.com/kreuzberg-dev/liter-llm)** -- Source, issues, and discussions
238
- - **[Provider Registry](https://github.com/kreuzberg-dev/liter-llm/blob/main/schemas/providers.json)** -- 142 supported providers
242
+ - **[Provider Registry](https://github.com/kreuzberg-dev/liter-llm/blob/main/schemas/providers.json)** -- 143 supported providers
239
243
 
240
244
  Part of [kreuzberg.dev](https://kreuzberg.dev).
241
245
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "liter-llm-rb"
3
- version = "1.1.1"
3
+ version = "1.2.1"
4
4
  edition = "2024"
5
5
  authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
6
6
  license = "MIT"
data/vendor/Cargo.toml CHANGED
@@ -2,7 +2,7 @@
2
2
  members = ["liter-llm", "liter-llm-ffi"]
3
3
 
4
4
  [workspace.package]
5
- version = "1.1.1"
5
+ version = "1.2.1"
6
6
  edition = "2024"
7
7
  authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
8
8
  license = "MIT"
@@ -19,7 +19,7 @@ clap = { version = "4", features = ["derive", "env"] }
19
19
  dashmap = "6"
20
20
  futures-core = "0.3"
21
21
  futures-util = "0.3"
22
- jsonschema = "0.45"
22
+ jsonschema = "0.46"
23
23
  jsonwebtoken = { version = "10", features = ["use_pem"], default-features = false }
24
24
  magnus = "0.8"
25
25
  memchr = "2"
@@ -34,7 +34,7 @@ pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }
34
34
  rayon = "1"
35
35
  rb-sys = "0.9"
36
36
  reqwest = { version = "0.13", features = ["json", "stream", "rustls", "multipart", "form"], default-features = false }
37
- rmcp = { version = "1.3", features = ["server", "macros", "transport-io", "transport-streamable-http-server", "server-side-http"] }
37
+ rmcp = { version = "1.5", features = ["server", "macros", "transport-io", "transport-streamable-http-server", "server-side-http"] }
38
38
  rustler = "0.37"
39
39
  schemars = "1"
40
40
  secrecy = { version = "0.10", features = ["serde"] }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "liter-llm"
3
- version = "1.1.1"
3
+ version = "1.2.1"
4
4
  edition = "2024"
5
5
  license = "MIT"
6
6
  repository.workspace = true
@@ -89,6 +89,6 @@ tracing-opentelemetry = { version = "0.32", optional = true }
89
89
 
90
90
  [dev-dependencies]
91
91
  futures-util = "0.3"
92
- jsonschema = "0.45"
92
+ jsonschema = "0.46"
93
93
  serial_test = "3"
94
94
  tokio = { version = "1", features = ["test-util", "macros"] }
@@ -35,6 +35,9 @@
35
35
  <a href="https://github.com/kreuzberg-dev/liter-llm/pkgs/container/liter-llm">
36
36
  <img src="https://img.shields.io/badge/Docker-007ec6?logo=docker&logoColor=white" alt="Docker">
37
37
  </a>
38
+ <a href="https://github.com/kreuzberg-dev/homebrew-tap/blob/main/Formula/liter-llm.rb">
39
+ <img src="https://img.shields.io/badge/Homebrew-007ec6?logo=homebrew&logoColor=white" alt="Homebrew">
40
+ </a>
38
41
  <a href="https://github.com/kreuzberg-dev/liter-llm/tree/main/crates/liter-llm-ffi">
39
42
  <img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C FFI">
40
43
  </a>
@@ -63,7 +66,7 @@
63
66
  </div>
64
67
 
65
68
 
66
- Universal LLM API client for Rust. Access 142+ LLM providers — OpenAI, Anthropic, Groq, Mistral, and more — through a single unified interface. Async/await with Tokio, streaming via BoxStream, composable Tower middleware stack, and compile-time type safety.
69
+ Universal LLM API client for Rust. Access 143+ LLM providers — OpenAI, Anthropic, Groq, Mistral, and more — through a single unified interface. Async/await with Tokio, streaming via BoxStream, composable Tower middleware stack, and compile-time type safety.
67
70
 
68
71
 
69
72
  ## Installation
@@ -174,7 +177,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
174
177
 
175
178
  ## Features
176
179
 
177
- ### Supported Providers (142+)
180
+ ### Supported Providers (143+)
178
181
 
179
182
  Route to any provider using the `provider/model` prefix convention:
180
183
 
@@ -194,7 +197,8 @@ Route to any provider using the `provider/model` prefix convention:
194
197
 
195
198
  ### Key Capabilities
196
199
 
197
- - **Provider Routing** -- Single client for 142+ LLM providers via `provider/model` prefix
200
+ - **Provider Routing** -- Single client for 143+ LLM providers via `provider/model` prefix
201
+ - **Local LLMs** — Connect to locally-hosted models via Ollama, LM Studio, vLLM, llama.cpp, and other local inference servers
198
202
  - **Unified API** -- Consistent `chat`, `chat_stream`, `embeddings`, `list_models` interface
199
203
 
200
204
  - **Streaming** -- Real-time token streaming via `chat_stream`
@@ -220,7 +224,7 @@ Built on a compiled Rust core for speed and safety:
220
224
 
221
225
  ## Provider Routing
222
226
 
223
- Route to 142+ providers using the `provider/model` prefix convention:
227
+ Route to 143+ providers using the `provider/model` prefix convention:
224
228
 
225
229
  ```text
226
230
  openai/gpt-4o
@@ -248,7 +252,7 @@ See the [proxy server documentation](https://docs.liter-llm.kreuzberg.dev/server
248
252
 
249
253
  - **[Documentation](https://docs.liter-llm.kreuzberg.dev)** -- Full docs and API reference
250
254
  - **[GitHub Repository](https://github.com/kreuzberg-dev/liter-llm)** -- Source, issues, and discussions
251
- - **[Provider Registry](https://github.com/kreuzberg-dev/liter-llm/blob/main/schemas/providers.json)** -- 142 supported providers
255
+ - **[Provider Registry](https://github.com/kreuzberg-dev/liter-llm/blob/main/schemas/providers.json)** -- 143 supported providers
252
256
 
253
257
  Part of [kreuzberg.dev](https://kreuzberg.dev).
254
258
 
@@ -17,6 +17,7 @@ use crate::types::files::{CreateFileRequest, DeleteResponse, FileListQuery, File
17
17
  use crate::types::image::{CreateImageRequest, ImagesResponse};
18
18
  use crate::types::moderation::{ModerationRequest, ModerationResponse};
19
19
  use crate::types::ocr::{OcrRequest, OcrResponse};
20
+ use crate::types::raw::{RawExchange, RawStreamExchange};
20
21
  use crate::types::rerank::{RerankRequest, RerankResponse};
21
22
  use crate::types::responses::{CreateResponseRequest, ResponseObject};
22
23
  use crate::types::search::{SearchRequest, SearchResponse};
@@ -112,6 +113,53 @@ pub trait LlmClient: Send + Sync {
112
113
  fn ocr(&self, req: OcrRequest) -> BoxFuture<'_, OcrResponse>;
113
114
  }
114
115
 
116
+ /// Extension of [`LlmClient`] that returns raw request/response data
117
+ /// alongside the typed response.
118
+ ///
119
+ /// Every `_raw` method mirrors its counterpart on [`LlmClient`] but wraps the
120
+ /// result in a [`RawExchange`] that exposes the final request body (after
121
+ /// `transform_request`) and the raw provider response (before
122
+ /// `transform_response`). This is useful for debugging provider-specific
123
+ /// transformations, capturing wire-level data, or implementing custom parsing.
124
+ pub trait LlmClientRaw: LlmClient {
125
+ /// Send a chat completion request and return the raw exchange.
126
+ ///
127
+ /// The `raw_request` field contains the final JSON body sent to the
128
+ /// provider; `raw_response` contains the provider JSON before
129
+ /// normalization.
130
+ fn chat_raw(&self, req: ChatCompletionRequest) -> BoxFuture<'_, RawExchange<ChatCompletionResponse>>;
131
+
132
+ /// Send a streaming chat completion request and return the raw exchange.
133
+ ///
134
+ /// Only `raw_request` is available upfront — the stream itself is
135
+ /// returned in `stream` and consumed incrementally.
136
+ fn chat_stream_raw(
137
+ &self,
138
+ req: ChatCompletionRequest,
139
+ ) -> BoxFuture<'_, RawStreamExchange<BoxStream<'_, ChatCompletionChunk>>>;
140
+
141
+ /// Send an embedding request and return the raw exchange.
142
+ fn embed_raw(&self, req: EmbeddingRequest) -> BoxFuture<'_, RawExchange<EmbeddingResponse>>;
143
+
144
+ /// Generate an image and return the raw exchange.
145
+ fn image_generate_raw(&self, req: CreateImageRequest) -> BoxFuture<'_, RawExchange<ImagesResponse>>;
146
+
147
+ /// Transcribe audio to text and return the raw exchange.
148
+ fn transcribe_raw(&self, req: CreateTranscriptionRequest) -> BoxFuture<'_, RawExchange<TranscriptionResponse>>;
149
+
150
+ /// Check content against moderation policies and return the raw exchange.
151
+ fn moderate_raw(&self, req: ModerationRequest) -> BoxFuture<'_, RawExchange<ModerationResponse>>;
152
+
153
+ /// Rerank documents by relevance to a query and return the raw exchange.
154
+ fn rerank_raw(&self, req: RerankRequest) -> BoxFuture<'_, RawExchange<RerankResponse>>;
155
+
156
+ /// Perform a web/document search and return the raw exchange.
157
+ fn search_raw(&self, req: SearchRequest) -> BoxFuture<'_, RawExchange<SearchResponse>>;
158
+
159
+ /// Extract text from a document via OCR and return the raw exchange.
160
+ fn ocr_raw(&self, req: OcrRequest) -> BoxFuture<'_, RawExchange<OcrResponse>>;
161
+ }
162
+
115
163
  /// File management operations (upload, list, retrieve, delete).
116
164
  pub trait FileClient: Send + Sync {
117
165
  /// Upload a file.
@@ -788,6 +836,390 @@ impl LlmClient for DefaultClient {
788
836
  }
789
837
  }
790
838
 
839
+ #[cfg(feature = "native-http")]
840
+ impl LlmClientRaw for DefaultClient {
841
+ fn chat_raw(&self, req: ChatCompletionRequest) -> BoxFuture<'_, RawExchange<ChatCompletionResponse>> {
842
+ Box::pin(async move {
843
+ let prepared = self.prepare_request(&req, |p| p.chat_completions_path(), &req.model, Some(false))?;
844
+ let raw_request = prepared.body_json.clone();
845
+
846
+ let auth_header = self
847
+ .resolve_auth_header_for_provider(prepared.provider.as_ref())
848
+ .await?;
849
+ let all_headers = self.all_headers_for_provider(
850
+ prepared.provider.as_ref(),
851
+ "POST",
852
+ &prepared.url,
853
+ &prepared.body_json,
854
+ &prepared.body_bytes,
855
+ );
856
+ let extra: Vec<(&str, &str)> = all_headers.iter().map(|(n, v)| (n.as_str(), v.as_str())).collect();
857
+
858
+ let auth = auth_header.as_ref().map(str_pair);
859
+ let mut raw = http::request::post_json_raw(
860
+ &self.http,
861
+ &prepared.url,
862
+ auth,
863
+ &extra,
864
+ prepared.body_bytes,
865
+ self.config.max_retries,
866
+ )
867
+ .await?;
868
+
869
+ let raw_response = Some(raw.clone());
870
+ prepared.provider.transform_response(&mut raw)?;
871
+ let data = serde_json::from_value::<ChatCompletionResponse>(raw).map_err(LiterLlmError::from)?;
872
+
873
+ Ok(RawExchange {
874
+ data,
875
+ raw_request,
876
+ raw_response,
877
+ })
878
+ })
879
+ }
880
+
881
+ fn chat_stream_raw(
882
+ &self,
883
+ req: ChatCompletionRequest,
884
+ ) -> BoxFuture<'_, RawStreamExchange<BoxStream<'_, ChatCompletionChunk>>> {
885
+ Box::pin(async move {
886
+ let prepared = self.prepare_request(&req, |p| p.chat_completions_path(), &req.model, Some(true))?;
887
+ let raw_request = prepared.body_json.clone();
888
+
889
+ let bare_model = prepared.provider.strip_model_prefix(&req.model);
890
+ let url = prepared
891
+ .provider
892
+ .build_stream_url(prepared.provider.chat_completions_path(), bare_model);
893
+
894
+ let auth_header = self
895
+ .resolve_auth_header_for_provider(prepared.provider.as_ref())
896
+ .await?;
897
+ let all_headers = self.all_headers_for_provider(
898
+ prepared.provider.as_ref(),
899
+ "POST",
900
+ &url,
901
+ &prepared.body_json,
902
+ &prepared.body_bytes,
903
+ );
904
+ let extra: Vec<(&str, &str)> = all_headers.iter().map(|(n, v)| (n.as_str(), v.as_str())).collect();
905
+ let auth = auth_header.as_ref().map(str_pair);
906
+
907
+ let stream = match prepared.provider.stream_format() {
908
+ provider::StreamFormat::Sse => {
909
+ let provider = Arc::clone(&prepared.provider);
910
+ let parse_event = move |data: &str| provider.parse_stream_event(data);
911
+ http::streaming::post_stream(
912
+ &self.http,
913
+ &url,
914
+ auth,
915
+ &extra,
916
+ prepared.body_bytes,
917
+ self.config.max_retries,
918
+ parse_event,
919
+ )
920
+ .await?
921
+ }
922
+ provider::StreamFormat::AwsEventStream => {
923
+ http::eventstream::post_eventstream(
924
+ &self.http,
925
+ &url,
926
+ auth,
927
+ &extra,
928
+ prepared.body_bytes,
929
+ self.config.max_retries,
930
+ provider::bedrock::parse_bedrock_stream_event,
931
+ )
932
+ .await?
933
+ }
934
+ };
935
+
936
+ Ok(RawStreamExchange { stream, raw_request })
937
+ })
938
+ }
939
+
940
+ fn embed_raw(&self, req: EmbeddingRequest) -> BoxFuture<'_, RawExchange<EmbeddingResponse>> {
941
+ Box::pin(async move {
942
+ let prepared = self.prepare_request(&req, |p| p.embeddings_path(), &req.model, None)?;
943
+ let raw_request = prepared.body_json.clone();
944
+
945
+ let auth_header = self
946
+ .resolve_auth_header_for_provider(prepared.provider.as_ref())
947
+ .await?;
948
+ let all_headers = self.all_headers_for_provider(
949
+ prepared.provider.as_ref(),
950
+ "POST",
951
+ &prepared.url,
952
+ &prepared.body_json,
953
+ &prepared.body_bytes,
954
+ );
955
+ let extra: Vec<(&str, &str)> = all_headers.iter().map(|(n, v)| (n.as_str(), v.as_str())).collect();
956
+
957
+ let auth = auth_header.as_ref().map(str_pair);
958
+ let mut raw = http::request::post_json_raw(
959
+ &self.http,
960
+ &prepared.url,
961
+ auth,
962
+ &extra,
963
+ prepared.body_bytes,
964
+ self.config.max_retries,
965
+ )
966
+ .await?;
967
+
968
+ let raw_response = Some(raw.clone());
969
+ prepared.provider.transform_response(&mut raw)?;
970
+ let data = serde_json::from_value::<EmbeddingResponse>(raw).map_err(LiterLlmError::from)?;
971
+
972
+ Ok(RawExchange {
973
+ data,
974
+ raw_request,
975
+ raw_response,
976
+ })
977
+ })
978
+ }
979
+
980
+ fn image_generate_raw(&self, req: CreateImageRequest) -> BoxFuture<'_, RawExchange<ImagesResponse>> {
981
+ Box::pin(async move {
982
+ let model = req.model.as_deref().unwrap_or_default();
983
+ let prepared = self.prepare_request(&req, |p| p.image_generations_path(), model, None)?;
984
+ let raw_request = prepared.body_json.clone();
985
+
986
+ let auth_header = self
987
+ .resolve_auth_header_for_provider(prepared.provider.as_ref())
988
+ .await?;
989
+ let all_headers = self.all_headers_for_provider(
990
+ prepared.provider.as_ref(),
991
+ "POST",
992
+ &prepared.url,
993
+ &prepared.body_json,
994
+ &prepared.body_bytes,
995
+ );
996
+ let extra: Vec<(&str, &str)> = all_headers.iter().map(|(n, v)| (n.as_str(), v.as_str())).collect();
997
+
998
+ let auth = auth_header.as_ref().map(str_pair);
999
+ let mut raw = http::request::post_json_raw(
1000
+ &self.http,
1001
+ &prepared.url,
1002
+ auth,
1003
+ &extra,
1004
+ prepared.body_bytes,
1005
+ self.config.max_retries,
1006
+ )
1007
+ .await?;
1008
+
1009
+ let raw_response = Some(raw.clone());
1010
+ prepared.provider.transform_response(&mut raw)?;
1011
+ let data = serde_json::from_value::<ImagesResponse>(raw).map_err(LiterLlmError::from)?;
1012
+
1013
+ Ok(RawExchange {
1014
+ data,
1015
+ raw_request,
1016
+ raw_response,
1017
+ })
1018
+ })
1019
+ }
1020
+
1021
+ fn transcribe_raw(&self, req: CreateTranscriptionRequest) -> BoxFuture<'_, RawExchange<TranscriptionResponse>> {
1022
+ Box::pin(async move {
1023
+ let prepared = self.prepare_request(&req, |p| p.audio_transcriptions_path(), &req.model, None)?;
1024
+ let raw_request = prepared.body_json.clone();
1025
+
1026
+ let auth_header = self
1027
+ .resolve_auth_header_for_provider(prepared.provider.as_ref())
1028
+ .await?;
1029
+ let all_headers = self.all_headers_for_provider(
1030
+ prepared.provider.as_ref(),
1031
+ "POST",
1032
+ &prepared.url,
1033
+ &prepared.body_json,
1034
+ &prepared.body_bytes,
1035
+ );
1036
+ let extra: Vec<(&str, &str)> = all_headers.iter().map(|(n, v)| (n.as_str(), v.as_str())).collect();
1037
+
1038
+ let auth = auth_header.as_ref().map(str_pair);
1039
+ let mut raw = http::request::post_json_raw(
1040
+ &self.http,
1041
+ &prepared.url,
1042
+ auth,
1043
+ &extra,
1044
+ prepared.body_bytes,
1045
+ self.config.max_retries,
1046
+ )
1047
+ .await?;
1048
+
1049
+ let raw_response = Some(raw.clone());
1050
+ prepared.provider.transform_response(&mut raw)?;
1051
+ let data = serde_json::from_value::<TranscriptionResponse>(raw).map_err(LiterLlmError::from)?;
1052
+
1053
+ Ok(RawExchange {
1054
+ data,
1055
+ raw_request,
1056
+ raw_response,
1057
+ })
1058
+ })
1059
+ }
1060
+
1061
+ fn moderate_raw(&self, req: ModerationRequest) -> BoxFuture<'_, RawExchange<ModerationResponse>> {
1062
+ Box::pin(async move {
1063
+ let model = req.model.as_deref().unwrap_or_default();
1064
+ let prepared = self.prepare_request(&req, |p| p.moderations_path(), model, None)?;
1065
+ let raw_request = prepared.body_json.clone();
1066
+
1067
+ let auth_header = self
1068
+ .resolve_auth_header_for_provider(prepared.provider.as_ref())
1069
+ .await?;
1070
+ let all_headers = self.all_headers_for_provider(
1071
+ prepared.provider.as_ref(),
1072
+ "POST",
1073
+ &prepared.url,
1074
+ &prepared.body_json,
1075
+ &prepared.body_bytes,
1076
+ );
1077
+ let extra: Vec<(&str, &str)> = all_headers.iter().map(|(n, v)| (n.as_str(), v.as_str())).collect();
1078
+
1079
+ let auth = auth_header.as_ref().map(str_pair);
1080
+ let mut raw = http::request::post_json_raw(
1081
+ &self.http,
1082
+ &prepared.url,
1083
+ auth,
1084
+ &extra,
1085
+ prepared.body_bytes,
1086
+ self.config.max_retries,
1087
+ )
1088
+ .await?;
1089
+
1090
+ let raw_response = Some(raw.clone());
1091
+ prepared.provider.transform_response(&mut raw)?;
1092
+ let data = serde_json::from_value::<ModerationResponse>(raw).map_err(LiterLlmError::from)?;
1093
+
1094
+ Ok(RawExchange {
1095
+ data,
1096
+ raw_request,
1097
+ raw_response,
1098
+ })
1099
+ })
1100
+ }
1101
+
1102
+ fn rerank_raw(&self, req: RerankRequest) -> BoxFuture<'_, RawExchange<RerankResponse>> {
1103
+ Box::pin(async move {
1104
+ let prepared = self.prepare_request(&req, |p| p.rerank_path(), &req.model, None)?;
1105
+ let raw_request = prepared.body_json.clone();
1106
+
1107
+ let auth_header = self
1108
+ .resolve_auth_header_for_provider(prepared.provider.as_ref())
1109
+ .await?;
1110
+ let all_headers = self.all_headers_for_provider(
1111
+ prepared.provider.as_ref(),
1112
+ "POST",
1113
+ &prepared.url,
1114
+ &prepared.body_json,
1115
+ &prepared.body_bytes,
1116
+ );
1117
+ let extra: Vec<(&str, &str)> = all_headers.iter().map(|(n, v)| (n.as_str(), v.as_str())).collect();
1118
+
1119
+ let auth = auth_header.as_ref().map(str_pair);
1120
+ let mut raw = http::request::post_json_raw(
1121
+ &self.http,
1122
+ &prepared.url,
1123
+ auth,
1124
+ &extra,
1125
+ prepared.body_bytes,
1126
+ self.config.max_retries,
1127
+ )
1128
+ .await?;
1129
+
1130
+ let raw_response = Some(raw.clone());
1131
+ prepared.provider.transform_response(&mut raw)?;
1132
+ let data = serde_json::from_value::<RerankResponse>(raw).map_err(LiterLlmError::from)?;
1133
+
1134
+ Ok(RawExchange {
1135
+ data,
1136
+ raw_request,
1137
+ raw_response,
1138
+ })
1139
+ })
1140
+ }
1141
+
1142
+ fn search_raw(&self, req: SearchRequest) -> BoxFuture<'_, RawExchange<SearchResponse>> {
1143
+ Box::pin(async move {
1144
+ let prepared = self.prepare_request(&req, |p| p.search_path(), &req.model, None)?;
1145
+ let raw_request = prepared.body_json.clone();
1146
+
1147
+ let auth_header = self
1148
+ .resolve_auth_header_for_provider(prepared.provider.as_ref())
1149
+ .await?;
1150
+ let all_headers = self.all_headers_for_provider(
1151
+ prepared.provider.as_ref(),
1152
+ "POST",
1153
+ &prepared.url,
1154
+ &prepared.body_json,
1155
+ &prepared.body_bytes,
1156
+ );
1157
+ let extra: Vec<(&str, &str)> = all_headers.iter().map(|(n, v)| (n.as_str(), v.as_str())).collect();
1158
+
1159
+ let auth = auth_header.as_ref().map(str_pair);
1160
+ let mut raw = http::request::post_json_raw(
1161
+ &self.http,
1162
+ &prepared.url,
1163
+ auth,
1164
+ &extra,
1165
+ prepared.body_bytes,
1166
+ self.config.max_retries,
1167
+ )
1168
+ .await?;
1169
+
1170
+ let raw_response = Some(raw.clone());
1171
+ prepared.provider.transform_response(&mut raw)?;
1172
+ let data = serde_json::from_value::<SearchResponse>(raw).map_err(LiterLlmError::from)?;
1173
+
1174
+ Ok(RawExchange {
1175
+ data,
1176
+ raw_request,
1177
+ raw_response,
1178
+ })
1179
+ })
1180
+ }
1181
+
1182
+ fn ocr_raw(&self, req: OcrRequest) -> BoxFuture<'_, RawExchange<OcrResponse>> {
1183
+ Box::pin(async move {
1184
+ let prepared = self.prepare_request(&req, |p| p.ocr_path(), &req.model, None)?;
1185
+ let raw_request = prepared.body_json.clone();
1186
+
1187
+ let auth_header = self
1188
+ .resolve_auth_header_for_provider(prepared.provider.as_ref())
1189
+ .await?;
1190
+ let all_headers = self.all_headers_for_provider(
1191
+ prepared.provider.as_ref(),
1192
+ "POST",
1193
+ &prepared.url,
1194
+ &prepared.body_json,
1195
+ &prepared.body_bytes,
1196
+ );
1197
+ let extra: Vec<(&str, &str)> = all_headers.iter().map(|(n, v)| (n.as_str(), v.as_str())).collect();
1198
+
1199
+ let auth = auth_header.as_ref().map(str_pair);
1200
+ let mut raw = http::request::post_json_raw(
1201
+ &self.http,
1202
+ &prepared.url,
1203
+ auth,
1204
+ &extra,
1205
+ prepared.body_bytes,
1206
+ self.config.max_retries,
1207
+ )
1208
+ .await?;
1209
+
1210
+ let raw_response = Some(raw.clone());
1211
+ prepared.provider.transform_response(&mut raw)?;
1212
+ let data = serde_json::from_value::<OcrResponse>(raw).map_err(LiterLlmError::from)?;
1213
+
1214
+ Ok(RawExchange {
1215
+ data,
1216
+ raw_request,
1217
+ raw_response,
1218
+ })
1219
+ })
1220
+ }
1221
+ }
1222
+
791
1223
  #[cfg(feature = "native-http")]
792
1224
  impl FileClient for DefaultClient {
793
1225
  fn create_file(&self, req: CreateFileRequest) -> BoxFuture<'_, FileObject> {
@@ -20,7 +20,7 @@ pub mod types;
20
20
  // Re-export key types at crate root.
21
21
  pub use client::{
22
22
  BatchClient, BoxFuture, BoxStream, ClientConfig, ClientConfigBuilder, FileClient, FileConfig, LlmClient,
23
- ResponseClient,
23
+ LlmClientRaw, ResponseClient,
24
24
  };
25
25
  // DefaultClient requires the native HTTP stack (reqwest + tokio).
26
26
  #[cfg(feature = "native-http")]
@@ -917,7 +917,10 @@ mod tests {
917
917
  // ── build_url ─────────────────────────────────────────────────────────────
918
918
 
919
919
  #[test]
920
+ #[serial]
920
921
  fn build_url_chat_completions() {
922
+ // SAFETY: env vars are process-global; `#[serial]` ensures no parallel mutation.
923
+ unsafe { std::env::remove_var("BEDROCK_CROSS_REGION") };
921
924
  let p = provider();
922
925
  let url = p.build_url("/chat/completions", "anthropic.claude-3-sonnet-20240229-v1:0");
923
926
  // Colon must be uppercase-encoded per RFC 3986 §2.1.
@@ -928,7 +931,10 @@ mod tests {
928
931
  }
929
932
 
930
933
  #[test]
934
+ #[serial]
931
935
  fn build_url_embeddings() {
936
+ // SAFETY: env vars are process-global; `#[serial]` ensures no parallel mutation.
937
+ unsafe { std::env::remove_var("BEDROCK_CROSS_REGION") };
932
938
  let p = provider();
933
939
  let url = p.build_url("/embeddings", "amazon.titan-embed-text-v1");
934
940
  assert_eq!(
@@ -938,7 +944,10 @@ mod tests {
938
944
  }
939
945
 
940
946
  #[test]
947
+ #[serial]
941
948
  fn build_url_other_path() {
949
+ // SAFETY: env vars are process-global; `#[serial]` ensures no parallel mutation.
950
+ unsafe { std::env::remove_var("BEDROCK_CROSS_REGION") };
942
951
  let p = provider();
943
952
  let url = p.build_url("/models", "any-model");
944
953
  assert_eq!(url, "https://bedrock-runtime.us-east-1.amazonaws.com/models");
@@ -8,6 +8,7 @@ pub mod image;
8
8
  pub mod models;
9
9
  pub mod moderation;
10
10
  pub mod ocr;
11
+ pub mod raw;
11
12
  pub mod rerank;
12
13
  pub mod responses;
13
14
  pub mod search;
@@ -22,6 +23,7 @@ pub use image::*;
22
23
  pub use models::*;
23
24
  pub use moderation::*;
24
25
  pub use ocr::*;
26
+ pub use raw::*;
25
27
  pub use rerank::*;
26
28
  pub use responses::*;
27
29
  pub use search::*;
@@ -0,0 +1,29 @@
1
+ /// The raw request and response JSON exchanged with the provider,
2
+ /// paired with the typed (normalized) response.
3
+ ///
4
+ /// Returned by every `_raw` method on [`crate::LlmClientRaw`]. Useful for
5
+ /// debugging provider-specific transformations or implementing custom parsing.
6
+ #[derive(Debug, Clone)]
7
+ pub struct RawExchange<T> {
8
+ /// The typed, normalized response.
9
+ pub data: T,
10
+ /// The final request body sent to the provider (after `transform_request`).
11
+ pub raw_request: serde_json::Value,
12
+ /// The raw response body from the provider, before `transform_response`.
13
+ /// `None` for binary endpoints (speech) or when not applicable.
14
+ pub raw_response: Option<serde_json::Value>,
15
+ }
16
+
17
+ /// Raw exchange data for streaming responses.
18
+ ///
19
+ /// Returned by [`crate::LlmClientRaw::chat_stream_raw`]. The stream itself is
20
+ /// not captured in its entirety — only the request body is available upfront.
21
+ /// `RawStreamExchange` intentionally does not implement `Clone` because streams
22
+ /// cannot be duplicated.
23
+ #[derive(Debug)]
24
+ pub struct RawStreamExchange<S> {
25
+ /// The chunk stream, unchanged.
26
+ pub stream: S,
27
+ /// The final request body sent to the provider.
28
+ pub raw_request: serde_json::Value,
29
+ }
@@ -0,0 +1,134 @@
1
+ //! Integration tests against local LLM providers (Ollama).
2
+ //!
3
+ //! These tests require a running Ollama instance with models pulled.
4
+ //! Start with: `task local:up`
5
+ //! Run with: `cargo test -p liter-llm --test local_llm -- --ignored`
6
+
7
+ use futures_util::StreamExt;
8
+ use liter_llm::{
9
+ ChatCompletionRequest, ClientConfigBuilder, DefaultClient, EmbeddingInput, EmbeddingRequest, LlmClient,
10
+ };
11
+
12
+ const OLLAMA_CHAT_MODEL: &str = "ollama/qwen2:0.5b";
13
+ const OLLAMA_EMBED_MODEL: &str = "ollama/all-minilm";
14
+
15
+ /// Check whether an Ollama instance is reachable.
16
+ async fn is_ollama_available() -> bool {
17
+ let base = std::env::var("OLLAMA_BASE_URL").unwrap_or_else(|_| "http://localhost:11434".into());
18
+ reqwest::get(format!("{base}/v1/models")).await.is_ok()
19
+ }
20
+
21
+ fn ollama_client(model_hint: &str) -> DefaultClient {
22
+ let config = ClientConfigBuilder::new("").max_retries(2).build();
23
+ DefaultClient::new(config, Some(model_hint)).expect("failed to build Ollama client")
24
+ }
25
+
26
+ fn simple_chat_request(model: &str) -> ChatCompletionRequest {
27
+ serde_json::from_value(serde_json::json!({
28
+ "model": model,
29
+ "messages": [{"role": "user", "content": "Say hello in one word."}],
30
+ "max_tokens": 16,
31
+ }))
32
+ .expect("failed to build chat request from JSON")
33
+ }
34
+
35
+ fn simple_embed_request(model: &str) -> EmbeddingRequest {
36
+ EmbeddingRequest {
37
+ model: model.into(),
38
+ input: EmbeddingInput::Single("hello world".into()),
39
+ encoding_format: None,
40
+ dimensions: None,
41
+ user: None,
42
+ }
43
+ }
44
+
45
+ #[tokio::test]
46
+ #[ignore]
47
+ async fn local_chat_ollama() {
48
+ if !is_ollama_available().await {
49
+ eprintln!("SKIP: Ollama not available, skipping");
50
+ return;
51
+ }
52
+
53
+ let client = ollama_client(OLLAMA_CHAT_MODEL);
54
+ let resp = client.chat(simple_chat_request(OLLAMA_CHAT_MODEL)).await.unwrap();
55
+
56
+ assert!(!resp.choices.is_empty(), "should have at least one choice");
57
+ let choice = &resp.choices[0];
58
+ assert!(
59
+ choice.message.content.as_ref().is_some_and(|c| !c.is_empty()),
60
+ "first choice content should be non-empty"
61
+ );
62
+ assert!(choice.finish_reason.is_some(), "finish_reason should be present");
63
+ assert!(!resp.model.is_empty(), "model field should be non-empty");
64
+ }
65
+
66
+ #[tokio::test]
67
+ #[ignore]
68
+ async fn local_stream_ollama() {
69
+ if !is_ollama_available().await {
70
+ eprintln!("SKIP: Ollama not available, skipping");
71
+ return;
72
+ }
73
+
74
+ let client = ollama_client(OLLAMA_CHAT_MODEL);
75
+ let mut stream = client
76
+ .chat_stream(simple_chat_request(OLLAMA_CHAT_MODEL))
77
+ .await
78
+ .unwrap();
79
+
80
+ let mut content = String::new();
81
+ let mut chunk_count = 0u32;
82
+ let mut saw_finish = false;
83
+
84
+ while let Some(result) = stream.next().await {
85
+ let chunk = result.unwrap();
86
+ chunk_count += 1;
87
+ if let Some(choice) = chunk.choices.first() {
88
+ if let Some(text) = &choice.delta.content {
89
+ content.push_str(text);
90
+ }
91
+ if choice.finish_reason.is_some() {
92
+ saw_finish = true;
93
+ }
94
+ }
95
+ if chunk_count > 200 {
96
+ break;
97
+ }
98
+ }
99
+
100
+ assert!(chunk_count >= 1, "should receive at least 1 chunk");
101
+ assert!(!content.is_empty(), "concatenated content should be non-empty");
102
+ assert!(saw_finish, "should see a finish_reason in the stream");
103
+ }
104
+
105
+ #[tokio::test]
106
+ #[ignore]
107
+ async fn local_embed_ollama() {
108
+ if !is_ollama_available().await {
109
+ eprintln!("SKIP: Ollama not available, skipping");
110
+ return;
111
+ }
112
+
113
+ let client = ollama_client(OLLAMA_EMBED_MODEL);
114
+ let resp = client.embed(simple_embed_request(OLLAMA_EMBED_MODEL)).await.unwrap();
115
+
116
+ assert!(!resp.data.is_empty(), "should have embedding data");
117
+ assert!(!resp.data[0].embedding.is_empty(), "embedding should have dimensions");
118
+ assert!(!resp.model.is_empty(), "model field should be non-empty");
119
+ }
120
+
121
+ #[tokio::test]
122
+ #[ignore]
123
+ async fn local_list_models_ollama() {
124
+ if !is_ollama_available().await {
125
+ eprintln!("SKIP: Ollama not available, skipping");
126
+ return;
127
+ }
128
+
129
+ let client = ollama_client(OLLAMA_CHAT_MODEL);
130
+ let resp = client.list_models().await.unwrap();
131
+
132
+ assert!(!resp.data.is_empty(), "should list at least one model");
133
+ assert!(!resp.data[0].id.is_empty(), "first model id should be non-empty");
134
+ }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "liter-llm-ffi"
3
- version = "1.1.1"
3
+ version = "1.2.1"
4
4
  edition = "2024"
5
5
  license = "MIT"
6
6
  repository.workspace = true
@@ -20,8 +20,8 @@ default = []
20
20
  base64.workspace = true
21
21
  bytes.workspace = true
22
22
  futures-core.workspace = true
23
- liter-llm = { path = "../liter-llm", version = "1.1.1", features = ["full"] }
24
- liter-llm-bindings-core = { path = "../liter-llm-bindings-core", version = "1.1.1" }
23
+ liter-llm = { path = "../liter-llm", version = "1.2.1", features = ["full"] }
24
+ liter-llm-bindings-core = { path = "../liter-llm-bindings-core", version = "1.2.1" }
25
25
  serde.workspace = true
26
26
  serde_json.workspace = true
27
27
  tokio.workspace = true
@@ -8,9 +8,9 @@
8
8
  /* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */
9
9
 
10
10
  #define LITER_LLM_VERSION_MAJOR 1
11
- #define LITER_LLM_VERSION_MINOR 1
11
+ #define LITER_LLM_VERSION_MINOR 2
12
12
  #define LITER_LLM_VERSION_PATCH 1
13
- #define LITER_LLM_VERSION "1.1.1"
13
+ #define LITER_LLM_VERSION "1.2.1"
14
14
 
15
15
 
16
16
  #include <stdarg.h>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: liter_llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-03-29 00:00:00.000000000 Z
11
+ date: 2026-04-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -248,6 +248,7 @@ files:
248
248
  - vendor/liter-llm/src/types/models.rs
249
249
  - vendor/liter-llm/src/types/moderation.rs
250
250
  - vendor/liter-llm/src/types/ocr.rs
251
+ - vendor/liter-llm/src/types/raw.rs
251
252
  - vendor/liter-llm/src/types/rerank.rs
252
253
  - vendor/liter-llm/src/types/responses.rs
253
254
  - vendor/liter-llm/src/types/search.rs
@@ -274,6 +275,7 @@ files:
274
275
  - vendor/liter-llm/tests/live_providers/mistral.rs
275
276
  - vendor/liter-llm/tests/live_providers/openai.rs
276
277
  - vendor/liter-llm/tests/live_providers/vertex_ai.rs
278
+ - vendor/liter-llm/tests/local_llm.rs
277
279
  - vendor/liter-llm/tests/middleware_integration.rs
278
280
  - vendor/liter-llm/tests/operations_integration.rs
279
281
  - vendor/liter-llm/tests/routing_integration.rs