inference-proxy 0.2.2__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: inference-proxy
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: "Inference Proxy" is OpenAI-compatible http proxy server for inferencing various LLMs capable of working with Google, Anthropic, OpenAI APIs, local PyTorch inference, etc.
5
5
  License: MIT License
6
6
 
@@ -158,6 +158,10 @@ api_key = "env:OPENAI_API_KEY"
158
158
  api_type = "google_ai_studio"
159
159
  api_key = "env:GOOGLE_API_KEY"
160
160
 
161
+ [connections.anthropic]
162
+ api_type = "anthropic"
163
+ api_key = "env:ANTHROPIC_API_KEY"
164
+
161
165
  # Routing rules (model_pattern = "connection.model")
162
166
  [routing]
163
167
  "gpt*" = "openai.*" # Route all GPT models to OpenAI
@@ -171,6 +175,25 @@ api_keys = [
171
175
  "KEY1",
172
176
  "KEY2"
173
177
  ]
178
+
179
+ # optional
180
+ [[loggers]]
181
+ class = 'lm_proxy.loggers.BaseLogger'
182
+ [loggers.log_writer]
183
+ class = 'lm_proxy.loggers.log_writers.JsonLogWriter'
184
+ file_name = 'storage/json.log'
185
+ [loggers.entry_transformer]
186
+ class = 'lm_proxy.loggers.LogEntryTransformer'
187
+ completion_tokens = "response.usage.completion_tokens"
188
+ prompt_tokens = "response.usage.prompt_tokens"
189
+ prompt = "request.messages"
190
+ response = "response"
191
+ group = "group"
192
+ connection = "connection"
193
+ api_key_id = "api_key_id"
194
+ remote_addr = "remote_addr"
195
+ created_at = "created_at"
196
+ duration = "duration"
174
197
  ```
175
198
 
176
199
  ### Environment Variables
@@ -184,6 +207,28 @@ api_key = "env:OPENAI_API_KEY"
184
207
 
185
208
  Load these from a `.env` file or set them in your environment before starting the server.
186
209
 
210
+
211
+ ## 🔑 Proxy API Keys vs. Provider API Keys
212
+
213
+ Inference Proxy utilizes two distinct types of API keys to facilitate secure and efficient request handling.
214
+
215
+ - **Proxy API Key (Virtual API Key, Client API Key):**
216
+ A unique key generated and managed within the Inference Proxy.
217
+ Clients use these keys to authenticate their requests to the proxy's API endpoints.
218
+ Each Client API Key is associated with a specific group, which defines the scope of access and permissions for the client's requests.
219
+ These keys allow users to securely interact with the proxy without direct access to external service credentials.
220
+
221
+
222
+
223
+ - **Provider API Key (Upstream API Key):**
224
+ A key provided by external LLM inference providers (e.g., OpenAI, Anthropic, Mistral, etc.) and configured within the Inference Proxy.
225
+ The proxy uses these keys to authenticate and forward validated client requests to the respective external services.
226
+ Provider API Keys remain hidden from end users, ensuring secure and transparent communication with provider APIs.
227
+
228
+ This distinction ensures a clear separation of concerns:
229
+ Virtual API Keys manage user authentication and access within the proxy,
230
+ while Upstream API Keys handle secure communication with external providers.
231
+
187
232
  ## 🔌 API Usage
188
233
 
189
234
  Inference Proxy implements the OpenAI chat completions API endpoint. You can use any OpenAI-compatible client to interact with it.
@@ -112,6 +112,10 @@ api_key = "env:OPENAI_API_KEY"
112
112
  api_type = "google_ai_studio"
113
113
  api_key = "env:GOOGLE_API_KEY"
114
114
 
115
+ [connections.anthropic]
116
+ api_type = "anthropic"
117
+ api_key = "env:ANTHROPIC_API_KEY"
118
+
115
119
  # Routing rules (model_pattern = "connection.model")
116
120
  [routing]
117
121
  "gpt*" = "openai.*" # Route all GPT models to OpenAI
@@ -125,6 +129,25 @@ api_keys = [
125
129
  "KEY1",
126
130
  "KEY2"
127
131
  ]
132
+
133
+ # optional
134
+ [[loggers]]
135
+ class = 'lm_proxy.loggers.BaseLogger'
136
+ [loggers.log_writer]
137
+ class = 'lm_proxy.loggers.log_writers.JsonLogWriter'
138
+ file_name = 'storage/json.log'
139
+ [loggers.entry_transformer]
140
+ class = 'lm_proxy.loggers.LogEntryTransformer'
141
+ completion_tokens = "response.usage.completion_tokens"
142
+ prompt_tokens = "response.usage.prompt_tokens"
143
+ prompt = "request.messages"
144
+ response = "response"
145
+ group = "group"
146
+ connection = "connection"
147
+ api_key_id = "api_key_id"
148
+ remote_addr = "remote_addr"
149
+ created_at = "created_at"
150
+ duration = "duration"
128
151
  ```
129
152
 
130
153
  ### Environment Variables
@@ -138,6 +161,28 @@ api_key = "env:OPENAI_API_KEY"
138
161
 
139
162
  Load these from a `.env` file or set them in your environment before starting the server.
140
163
 
164
+
165
+ ## 🔑 Proxy API Keys vs. Provider API Keys
166
+
167
+ Inference Proxy utilizes two distinct types of API keys to facilitate secure and efficient request handling.
168
+
169
+ - **Proxy API Key (Virtual API Key, Client API Key):**
170
+ A unique key generated and managed within the Inference Proxy.
171
+ Clients use these keys to authenticate their requests to the proxy's API endpoints.
172
+ Each Client API Key is associated with a specific group, which defines the scope of access and permissions for the client's requests.
173
+ These keys allow users to securely interact with the proxy without direct access to external service credentials.
174
+
175
+
176
+
177
+ - **Provider API Key (Upstream API Key):**
178
+ A key provided by external LLM inference providers (e.g., OpenAI, Anthropic, Mistral, etc.) and configured within the Inference Proxy.
179
+ The proxy uses these keys to authenticate and forward validated client requests to the respective external services.
180
+ Provider API Keys remain hidden from end users, ensuring secure and transparent communication with provider APIs.
181
+
182
+ This distinction ensures a clear separation of concerns:
183
+ Virtual API Keys manage user authentication and access within the proxy,
184
+ while Upstream API Keys handle secure communication with external providers.
185
+
141
186
  ## 🔌 API Usage
142
187
 
143
188
  Inference Proxy implements the OpenAI chat completions API endpoint. You can use any OpenAI-compatible client to interact with it.
@@ -14,7 +14,7 @@ def run_server(
14
14
  config: str = typer.Option(None, help="Path to the configuration file"),
15
15
  debug: bool = typer.Option(False, help="Enable debug mode (more verbose logging)"),
16
16
  ):
17
- bootstrap(config or 'config.toml')
17
+ bootstrap(config or "config.toml")
18
18
  uvicorn.run(
19
19
  "lm_proxy.app:web_app",
20
20
  host=env.config.host,
@@ -25,7 +25,9 @@ def run_server(
25
25
 
26
26
 
27
27
  def web_app():
28
- app = FastAPI(title="LM-Proxy", description="OpenAI-compatible proxy server for LLM inference")
28
+ app = FastAPI(
29
+ title="LM-Proxy", description="OpenAI-compatible proxy server for LLM inference"
30
+ )
29
31
  app.add_api_route(
30
32
  path="/v1/chat/completions",
31
33
  endpoint=chat_completions,
@@ -55,12 +55,13 @@ class Env:
55
55
  env.connections[conn_name] = conn_config
56
56
  else:
57
57
  mc.configure(
58
- **conn_config,
59
- EMBEDDING_DB_TYPE=mc.EmbeddingDbType.NONE
58
+ **conn_config, EMBEDDING_DB_TYPE=mc.EmbeddingDbType.NONE
60
59
  )
61
60
  env.connections[conn_name] = mc.env().llm_async_function
62
61
  except mc.LLMConfigError as e:
63
- raise ValueError(f"Error in configuration for connection '{conn_name}': {e}")
62
+ raise ValueError(
63
+ f"Error in configuration for connection '{conn_name}': {e}"
64
+ )
64
65
 
65
66
  logging.info(f"Done initializing {len(env.connections)} connections.")
66
67
 
@@ -68,9 +69,9 @@ class Env:
68
69
  env = Env()
69
70
 
70
71
 
71
- def bootstrap(config: str | Config = 'config.toml'):
72
- load_dotenv('.env', override=True)
73
- debug = '--debug' in sys.argv or get_bool_from_env('LM_PROXY_DEBUG', False)
72
+ def bootstrap(config: str | Config = "config.toml"):
73
+ load_dotenv(".env", override=True)
74
+ debug = "--debug" in sys.argv or get_bool_from_env("LM_PROXY_DEBUG", False)
74
75
  setup_logging(logging.DEBUG if debug else logging.INFO)
75
76
  mc.logging.LoggingConfig.OUTPUT_METHOD = logging.info
76
77
  logging.info(
@@ -2,6 +2,7 @@
2
2
  Configuration models for LM-Proxy settings.
3
3
  This module defines Pydantic models that match the structure of config.toml.
4
4
  """
5
+
5
6
  import os
6
7
  from typing import Union, Callable
7
8
  import tomllib
@@ -10,6 +11,8 @@ import importlib.util
10
11
  from pydantic import BaseModel, Field, ConfigDict
11
12
  from microcore.utils import resolve_callable
12
13
 
14
+ from .utils import resolve_instance_or_callable
15
+
13
16
 
14
17
  class Group(BaseModel):
15
18
  api_keys: list[str] = Field(default_factory=list)
@@ -24,20 +27,30 @@ class Group(BaseModel):
24
27
 
25
28
  class Config(BaseModel):
26
29
  """Main configuration model matching config.toml structure."""
30
+
27
31
  model_config = ConfigDict(extra="forbid")
28
32
  enabled: bool = True
29
33
  host: str = "0.0.0.0"
30
34
  port: int = 8000
31
35
  dev_autoreload: bool = False
32
- connections: dict[str, Union[dict, Callable]]
36
+ connections: dict[str, Union[dict, Callable]] = Field(
37
+ ..., # Required field (no default)
38
+ description="Dictionary of connection configurations",
39
+ examples=[{"openai": {"api_key": "sk-..."}}],
40
+ )
33
41
  routing: dict[str, str] = Field(default_factory=dict)
34
42
  """ model_name_pattern* => connection_name.< model | * >, example: {"gpt-*": "oai.*"} """
35
43
  groups: dict[str, Group] = Field(default_factory=dict)
36
44
  check_api_key: Union[str, Callable] = Field(default="lm_proxy.core.check_api_key")
45
+ loggers: list[Union[str, Callable, dict]] = Field(default_factory=list)
46
+ encryption_key: str = Field(
47
+ default="Eclipse", description="Key for encrypting sensitive data"
48
+ )
37
49
 
38
50
  def __init__(self, **data):
39
51
  super().__init__(**data)
40
52
  self.check_api_key = resolve_callable(self.check_api_key)
53
+ self.loggers = [resolve_instance_or_callable(logger) for logger in self.loggers]
41
54
  if not self.groups:
42
55
  # Default group with no restrictions
43
56
  self.groups = {"default": Group()}
@@ -4,16 +4,20 @@ import json
4
4
  import logging
5
5
  import secrets
6
6
  import time
7
+ import hashlib
7
8
  from typing import List, Optional
8
9
 
9
10
  import microcore as mc
10
11
  from fastapi import HTTPException
12
+ from lm_proxy.loggers import LogEntry
11
13
  from pydantic import BaseModel
12
14
  from starlette.requests import Request
13
15
  from starlette.responses import JSONResponse, Response, StreamingResponse
14
16
 
15
17
  from .bootstrap import env
16
18
  from .config import Config, Group
19
+ from .loggers import log_non_blocking
20
+ from .utils import get_client_ip
17
21
 
18
22
 
19
23
  class ChatCompletionRequest(BaseModel):
@@ -30,7 +34,9 @@ class ChatCompletionRequest(BaseModel):
30
34
  user: Optional[str] = None
31
35
 
32
36
 
33
- def resolve_connection_and_model(config: Config, external_model: str) -> tuple[str, str]:
37
+ def resolve_connection_and_model(
38
+ config: Config, external_model: str
39
+ ) -> tuple[str, str]:
34
40
  for model_match, rule in config.routing.items():
35
41
  if fnmatch.fnmatchcase(external_model, model_match):
36
42
  connection_name, model_part = rule.split(".", 1)
@@ -45,11 +51,14 @@ def resolve_connection_and_model(config: Config, external_model: str) -> tuple[s
45
51
 
46
52
  raise ValueError(
47
53
  f"No routing rule matched model '{external_model}'. "
48
- "Add a catch-all rule like \"*\" = \"openai.gpt-3.5-turbo\" if desired."
54
+ 'Add a catch-all rule like "*" = "openai.gpt-3.5-turbo" if desired.'
49
55
  )
50
56
 
51
57
 
52
- async def process_stream(async_llm_func, prompt, llm_params):
58
+ async def process_stream(
59
+ async_llm_func, request: ChatCompletionRequest, llm_params, log_entry: LogEntry
60
+ ):
61
+ prompt = request.messages
53
62
  queue = asyncio.Queue()
54
63
  stream_id = f"chatcmpl-{secrets.token_hex(12)}"
55
64
  created = int(time.time())
@@ -67,20 +76,18 @@ async def process_stream(async_llm_func, prompt, llm_params):
67
76
  "choices": [{"index": 0, "delta": delta}],
68
77
  }
69
78
  if error is not None:
70
- obj['error'] = {'message': str(error), 'type': type(error).__name__}
79
+ obj["error"] = {"message": str(error), "type": type(error).__name__}
71
80
  if finish_reason is None:
72
- finish_reason = 'error'
81
+ finish_reason = "error"
73
82
  if finish_reason is not None:
74
- obj['choices'][0]['finish_reason'] = finish_reason
83
+ obj["choices"][0]["finish_reason"] = finish_reason
75
84
  return "data: " + json.dumps(obj) + "\n\n"
76
85
 
77
- task = asyncio.create_task(
78
- async_llm_func(prompt, **llm_params, callback=callback)
79
- )
86
+ task = asyncio.create_task(async_llm_func(prompt, **llm_params, callback=callback))
80
87
 
81
88
  try:
82
89
  # Initial chunk: role
83
- yield make_chunk(delta={'role': 'assistant'})
90
+ yield make_chunk(delta={"role": "assistant"})
84
91
 
85
92
  while not task.done():
86
93
  try:
@@ -96,13 +103,16 @@ async def process_stream(async_llm_func, prompt, llm_params):
96
103
 
97
104
  finally:
98
105
  try:
99
- await task
106
+ result = await task
107
+ log_entry.response = result
100
108
  except Exception as e:
101
- yield make_chunk(error={'message': str(e), 'type': type(e).__name__})
109
+ log_entry.error = e
110
+ yield make_chunk(error={"message": str(e), "type": type(e).__name__})
102
111
 
103
112
  # Final chunk: finish_reason
104
- yield make_chunk(finish_reason='stop')
113
+ yield make_chunk(finish_reason="stop")
105
114
  yield "data: [DONE]\n\n"
115
+ await log_non_blocking(log_entry)
106
116
 
107
117
 
108
118
  def read_api_key(request: Request) -> str:
@@ -116,13 +126,33 @@ def read_api_key(request: Request) -> str:
116
126
  return ""
117
127
 
118
128
 
119
- def check_api_key(api_key: Optional[str]) -> Group:
129
+ def check_api_key(api_key: Optional[str]) -> Optional[Group]:
130
+ """
131
+ Validates an Client API key against configured groups and returns the matching group.
132
+
133
+ Args:
134
+ api_key (Optional[str]): The Virtual / Client API key to validate.
135
+ Returns:
136
+ Optional[Group]: The Group object if the API key is valid and found in a group,
137
+ None otherwise.
138
+ """
120
139
  for group_name, group in env.config.groups.items():
121
140
  if api_key in group.api_keys:
122
141
  return group_name
142
+ return None
143
+
123
144
 
145
+ def api_key_id(api_key: Optional[str]) -> str | None:
146
+ if not api_key:
147
+ return None
148
+ return hashlib.md5(
149
+ (api_key + env.config.encryption_key).encode("utf-8")
150
+ ).hexdigest()
124
151
 
125
- async def chat_completions(request: ChatCompletionRequest, raw_request: Request) -> Response:
152
+
153
+ async def chat_completions(
154
+ request: ChatCompletionRequest, raw_request: Request
155
+ ) -> Response:
126
156
  """
127
157
  Endpoint for chat completions that mimics OpenAI's API structure.
128
158
  Streams the response from the LLM using microcore.
@@ -141,13 +171,19 @@ async def chat_completions(request: ChatCompletionRequest, raw_request: Request)
141
171
  )
142
172
  api_key = read_api_key(raw_request)
143
173
  group: str | bool | None = (env.config.check_api_key)(api_key)
174
+ log_entry = LogEntry(
175
+ request=request,
176
+ api_key_id=api_key_id(api_key),
177
+ group=group if isinstance(group, str) else None,
178
+ remote_addr=get_client_ip(raw_request),
179
+ )
144
180
  if not group:
145
181
  raise HTTPException(
146
182
  status_code=403,
147
183
  detail={
148
184
  "error": {
149
185
  "message": "Incorrect API key provided: "
150
- "your API key is invalid, expired, or revoked.",
186
+ "your API key is invalid, expired, or revoked.",
151
187
  "type": "invalid_request_error",
152
188
  "param": None,
153
189
  "code": "invalid_api_key",
@@ -155,17 +191,17 @@ async def chat_completions(request: ChatCompletionRequest, raw_request: Request)
155
191
  },
156
192
  )
157
193
 
158
- llm_params = request.model_dump(exclude={'messages'}, exclude_none=True)
194
+ llm_params = request.model_dump(exclude={"messages"}, exclude_none=True)
159
195
 
160
196
  connection, llm_params["model"] = resolve_connection_and_model(
161
- env.config,
162
- llm_params.get("model", "default_model")
197
+ env.config, llm_params.get("model", "default_model")
163
198
  )
199
+ log_entry.connection = connection
164
200
  logging.debug(
165
201
  "Resolved routing for [%s] --> connection: %s, model: %s",
166
202
  request.model,
167
203
  connection,
168
- llm_params["model"]
204
+ llm_params["model"],
169
205
  )
170
206
 
171
207
  if not env.config.groups[group].allows_connecting_to(connection):
@@ -186,18 +222,27 @@ async def chat_completions(request: ChatCompletionRequest, raw_request: Request)
186
222
  logging.info("Querying LLM... params: %s", llm_params)
187
223
  if request.stream:
188
224
  return StreamingResponse(
189
- process_stream(async_llm_func, request.messages, llm_params),
190
- media_type="text/event-stream"
225
+ process_stream(async_llm_func, request, llm_params, log_entry),
226
+ media_type="text/event-stream",
191
227
  )
192
- out = await async_llm_func(request.messages, **llm_params)
193
- logging.info("LLM response: %s", out)
228
+
229
+ try:
230
+ out = await async_llm_func(request.messages, **llm_params)
231
+ log_entry.response = out
232
+ logging.info("LLM response: %s", out)
233
+ except Exception as e:
234
+ log_entry.error = e
235
+ await log_non_blocking(log_entry)
236
+ raise
237
+ await log_non_blocking(log_entry)
238
+
194
239
  return JSONResponse(
195
240
  {
196
241
  "choices": [
197
242
  {
198
243
  "index": 0,
199
244
  "message": {"role": "assistant", "content": str(out)},
200
- "finish_reason": "stop"
245
+ "finish_reason": "stop",
201
246
  }
202
247
  ]
203
248
  }
@@ -0,0 +1,11 @@
1
+ from .base_logger import BaseLogger, LogEntryTransformer
2
+ from .log_writers import JsonLogWriter
3
+ from .core import LogEntry, log_non_blocking
4
+
5
+ __all__ = [
6
+ "BaseLogger",
7
+ "LogEntryTransformer",
8
+ "JsonLogWriter",
9
+ "LogEntry",
10
+ "log_non_blocking",
11
+ ]
@@ -0,0 +1,56 @@
1
+ import abc
2
+ from dataclasses import dataclass, field
3
+
4
+ from lm_proxy.utils import resolve_instance_or_callable
5
+
6
+ from ..utils import resolve_obj_path
7
+ from .core import LogEntry
8
+
9
+
10
+ class AbstractLogEntryTransformer(abc.ABC):
11
+ @abc.abstractmethod
12
+ def __call__(self, log_entry: LogEntry) -> dict:
13
+ raise NotImplementedError()
14
+
15
+
16
+ class LogEntryTransformer(AbstractLogEntryTransformer):
17
+ def __init__(self, **kwargs):
18
+ self.mapping = kwargs
19
+
20
+ def __call__(self, log_entry: LogEntry) -> dict:
21
+ result = {}
22
+ for key, path in self.mapping.items():
23
+ result[key] = resolve_obj_path(log_entry, path)
24
+ return result
25
+
26
+
27
+ class AbstractLogWriter(abc.ABC):
28
+ @abc.abstractmethod
29
+ def __call__(self, logged_data: dict) -> dict:
30
+ raise NotImplementedError()
31
+
32
+
33
+ @dataclass
34
+ class BaseLogger:
35
+ log_writer: AbstractLogWriter | str | dict
36
+ entry_transformer: AbstractLogEntryTransformer | str | dict = field(default=None)
37
+
38
+ def __post_init__(self):
39
+ self.entry_transformer = resolve_instance_or_callable(
40
+ self.entry_transformer,
41
+ debug_name="logging.<logger>.entry_transformer",
42
+ )
43
+ self.log_writer = resolve_instance_or_callable(
44
+ self.log_writer,
45
+ debug_name="logging.<logger>.log_writer",
46
+ )
47
+
48
+ def _transform(self, log_entry: LogEntry) -> dict:
49
+ return (
50
+ self.entry_transformer(log_entry)
51
+ if self.entry_transformer
52
+ else log_entry.to_dict()
53
+ )
54
+
55
+ def __call__(self, log_entry: LogEntry):
56
+ self.log_writer(self._transform(log_entry))
@@ -0,0 +1,53 @@
1
+ import asyncio
2
+ import logging
3
+ from typing import Optional, TYPE_CHECKING
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime
6
+
7
+ import microcore as mc
8
+ from ..bootstrap import env
9
+
10
+ if TYPE_CHECKING:
11
+ from lm_proxy.core import ChatCompletionRequest, Group
12
+
13
+
14
+ @dataclass
15
+ class LogEntry:
16
+ request: "ChatCompletionRequest" = field()
17
+ response: Optional[mc.LLMResponse] = field(default=None)
18
+ error: Optional[Exception] = field(default=None)
19
+ group: "Group" = field(default=None)
20
+ connection: str = field(default=None)
21
+ api_key_id: Optional[str] = field(default=None)
22
+ remote_addr: Optional[str] = field(default=None)
23
+ created_at: Optional[datetime] = field(default_factory=datetime.now)
24
+ duration: Optional[float] = field(default=None)
25
+
26
+ def to_dict(self) -> dict:
27
+ data = self.__dict__.copy()
28
+ if self.request:
29
+ data["request"] = self.request.model_dump(mode="json")
30
+ return data
31
+
32
+
33
+ async def log(log_entry: LogEntry):
34
+ if log_entry.duration is None and log_entry.created_at:
35
+ log_entry.duration = (datetime.now() - log_entry.created_at).total_seconds()
36
+ for handler in env.config.loggers:
37
+ # check if it is async, then run both sync and async loggers in non-blocking way (sync too)
38
+ if asyncio.iscoroutinefunction(handler):
39
+ asyncio.create_task(handler(log_entry))
40
+ else:
41
+ try:
42
+ handler(log_entry)
43
+ except Exception as e:
44
+ logging.error("Error in logger handler: %s", e)
45
+ raise e
46
+
47
+
48
+ async def log_non_blocking(
49
+ log_entry: LogEntry,
50
+ ) -> Optional[asyncio.Task]:
51
+ if env.config.loggers:
52
+ task = asyncio.create_task(log(log_entry))
53
+ return task
@@ -0,0 +1,24 @@
1
+ import os
2
+ import json
3
+ from dataclasses import dataclass
4
+
5
+ from .base_logger import AbstractLogWriter
6
+ from ..utils import CustomJsonEncoder
7
+
8
+
9
+ @dataclass
10
+ class JsonLogWriter(AbstractLogWriter):
11
+
12
+ file_name: str
13
+
14
+ def __post_init__(self):
15
+ dir_path = os.path.dirname(self.file_name)
16
+ if dir_path:
17
+ os.makedirs(dir_path, exist_ok=True)
18
+ # Create the file if it doesn't exist
19
+ with open(self.file_name, "a", encoding="utf-8"):
20
+ pass
21
+
22
+ def __call__(self, logged_data: dict):
23
+ with open(self.file_name, "a", encoding="utf-8") as f:
24
+ f.write(json.dumps(logged_data, cls=CustomJsonEncoder) + "\n")
@@ -0,0 +1,73 @@
1
+ import json
2
+ import inspect
3
+ from typing import Union, Callable
4
+ from datetime import datetime, date, time
5
+
6
+ from microcore.utils import resolve_callable
7
+ from starlette.requests import Request
8
+
9
+
10
+ def resolve_obj_path(obj, path: str, default=None):
11
+ """Resolves dotted path supporting both attributes and dict keys."""
12
+ for part in path.split("."):
13
+ try:
14
+ if isinstance(obj, dict):
15
+ obj = obj[part]
16
+ else:
17
+ obj = getattr(obj, part)
18
+ except (AttributeError, KeyError, TypeError):
19
+ return default
20
+ return obj
21
+
22
+
23
+ def resolve_instance_or_callable(
24
+ item: Union[str, Callable, dict], class_key: str = "class", debug_name: str = None
25
+ ) -> Callable:
26
+ if not item:
27
+ return None
28
+ if isinstance(item, dict):
29
+ if class_key not in item:
30
+ raise ValueError(
31
+ f"'{class_key}' key is missing in {debug_name or 'item'} config: {item}"
32
+ )
33
+ class_name = item.pop(class_key)
34
+ constructor = resolve_callable(class_name)
35
+ return constructor(**item)
36
+ if isinstance(item, str):
37
+ fn = resolve_callable(item)
38
+ return fn() if inspect.isclass(fn) else fn
39
+ if callable(item):
40
+ return item() if inspect.isclass(item) else item
41
+ else:
42
+ raise ValueError(f"Invalid {debug_name or 'item'} config: {item}")
43
+
44
+
45
+ class CustomJsonEncoder(json.JSONEncoder):
46
+ def default(self, obj):
47
+ if isinstance(obj, datetime):
48
+ return obj.isoformat()
49
+ elif isinstance(obj, date):
50
+ return obj.isoformat()
51
+ elif isinstance(obj, time):
52
+ return obj.isoformat()
53
+ elif hasattr(obj, "__dict__"):
54
+ return obj.__dict__
55
+ elif hasattr(obj, "model_dump"):
56
+ return obj.model_dump()
57
+ elif hasattr(obj, "dict"):
58
+ return obj.dict()
59
+ return super().default(obj)
60
+
61
+
62
+ def get_client_ip(request: Request) -> str:
63
+ # Try different headers in order of preference
64
+ if forwarded_for := request.headers.get("X-Forwarded-For"):
65
+ return forwarded_for.split(",")[0].strip()
66
+ if real_ip := request.headers.get("X-Real-IP"):
67
+ return real_ip
68
+ if forwarded := request.headers.get("Forwarded"):
69
+ # Parse Forwarded header (RFC 7239)
70
+ return forwarded.split("for=")[1].split(";")[0].strip()
71
+
72
+ # Fallback to direct client
73
+ return request.client.host if request.client else "unknown"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "inference-proxy"
3
- version = "0.2.2"
3
+ version = "0.3.0"
4
4
  description = "\"Inference Proxy\" is OpenAI-compatible http proxy server for inferencing various LLMs capable of working with Google, Anthropic, OpenAI APIs, local PyTorch inference, etc."
5
5
  readme = "README.md"
6
6
  keywords = ["llm", "large language models", "ai", "gpt", "openai", "proxy", "http", "proxy-server"]
@@ -43,7 +43,11 @@ package-mode = true
43
43
  packages = [{ include = "lm_proxy"}]
44
44
 
45
45
  [tool.poetry.group.test.dependencies]
46
- pytest = "^7.4.3"
46
+ pytest = "~=8.4.2"
47
+ pytest-asyncio = "~=1.2.0"
47
48
 
48
49
  [tool.poetry.scripts]
49
50
  inference-proxy = "lm_proxy.app:cli_app"
51
+
52
+ [tool.pytest.ini_options]
53
+ asyncio_mode = "auto"
File without changes