cli2api 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cli2api/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """CLI2API - OpenAI-compatible API over CLI tools."""
2
+
3
+ __version__ = "0.1.0"
cli2api/__main__.py ADDED
@@ -0,0 +1,22 @@
1
+ """Entry point for running CLI2API as a module."""
2
+
3
+ import uvicorn
4
+
5
+ from cli2api.api.dependencies import get_settings
6
+
7
+
8
+ def main():
9
+ """Run the CLI2API server."""
10
+ settings = get_settings()
11
+
12
+ uvicorn.run(
13
+ "cli2api.main:app",
14
+ host=settings.host,
15
+ port=settings.port,
16
+ reload=settings.debug,
17
+ log_level=settings.log_level.lower(),
18
+ )
19
+
20
+
21
+ if __name__ == "__main__":
22
+ main()
@@ -0,0 +1,5 @@
1
+ """API module with FastAPI routers."""
2
+
3
+ from cli2api.api.router import api_router
4
+
5
+ __all__ = ["api_router"]
@@ -0,0 +1,38 @@
1
+ """FastAPI dependencies for dependency injection."""
2
+
3
+ from functools import lru_cache
4
+ from pathlib import Path
5
+
6
+ from cli2api.config.settings import Settings
7
+ from cli2api.providers.claude import ClaudeCodeProvider
8
+
9
+
10
+ @lru_cache
11
+ def get_settings() -> Settings:
12
+ """Get cached application settings.
13
+
14
+ Returns:
15
+ Singleton Settings instance.
16
+ """
17
+ return Settings()
18
+
19
+
20
+ @lru_cache
21
+ def get_provider() -> ClaudeCodeProvider:
22
+ """Get cached Claude provider.
23
+
24
+ Returns:
25
+ Singleton ClaudeCodeProvider instance.
26
+
27
+ Raises:
28
+ RuntimeError: If Claude CLI is not configured.
29
+ """
30
+ settings = get_settings()
31
+ if not settings.claude_cli_path:
32
+ raise RuntimeError("Claude CLI not found. Set CLI2API_CLAUDE_CLI_PATH.")
33
+
34
+ return ClaudeCodeProvider(
35
+ executable_path=Path(settings.claude_cli_path),
36
+ default_timeout=settings.default_timeout,
37
+ models=settings.get_claude_models(),
38
+ )
cli2api/api/router.py ADDED
@@ -0,0 +1,12 @@
1
+ """Main API router combining all endpoints."""
2
+
3
+ from fastapi import APIRouter
4
+
5
+ from cli2api.api.v1 import chat, models, responses
6
+
7
+ api_router = APIRouter()
8
+
9
+ # Include v1 routers
10
+ api_router.include_router(chat.router, prefix="/v1", tags=["chat"])
11
+ api_router.include_router(models.router, prefix="/v1", tags=["models"])
12
+ api_router.include_router(responses.router, prefix="/v1", tags=["responses"])
cli2api/api/utils.py ADDED
@@ -0,0 +1,15 @@
1
+ """Shared utilities for API endpoints."""
2
+
3
+
4
+ def parse_model_name(model_id: str) -> str:
5
+ """Extract actual model name from prefixed format.
6
+
7
+ Args:
8
+ model_id: Model ID like "claude: sonnet" or "sonnet".
9
+
10
+ Returns:
11
+ Actual model name like "sonnet".
12
+ """
13
+ if ": " in model_id:
14
+ return model_id.split(": ", 1)[1]
15
+ return model_id
@@ -0,0 +1,5 @@
1
+ """API v1 endpoints."""
2
+
3
+ from cli2api.api.v1 import chat, models, responses
4
+
5
+ __all__ = ["chat", "models", "responses"]
cli2api/api/v1/chat.py ADDED
@@ -0,0 +1,378 @@
1
+ """Chat completions endpoint - OpenAI compatible."""
2
+
3
+ import time
4
+ import uuid
5
+ from typing import AsyncIterator
6
+
7
+ from fastapi import APIRouter, Depends, HTTPException
8
+ from fastapi.responses import StreamingResponse
9
+
10
+ from cli2api.api.dependencies import get_provider
11
+ from cli2api.api.utils import parse_model_name
12
+ from cli2api.providers.claude import ClaudeCodeProvider
13
+ from cli2api.schemas.openai import (
14
+ ChatCompletionChunk,
15
+ ChatCompletionChoice,
16
+ ChatCompletionRequest,
17
+ ChatCompletionResponse,
18
+ ChatMessage,
19
+ DeltaContent,
20
+ ResponseMessage,
21
+ StreamChoice,
22
+ ToolCall,
23
+ ToolCallFunction,
24
+ UsageInfo,
25
+ )
26
+ from cli2api.streaming.sse import sse_encode, sse_error
27
+ from cli2api.utils.logging import get_logger
28
+
29
+ logger = get_logger(__name__)
30
+
31
+ router = APIRouter()
32
+
33
+
34
+ @router.post("/chat/completions")
35
+ async def chat_completions(
36
+ request: ChatCompletionRequest,
37
+ provider: ClaudeCodeProvider = Depends(get_provider),
38
+ ):
39
+ """Create a chat completion.
40
+
41
+ OpenAI-compatible endpoint supporting both streaming and non-streaming modes.
42
+
43
+ Args:
44
+ request: Chat completion request.
45
+ provider: Claude provider (injected).
46
+ settings: Application settings (injected).
47
+
48
+ Returns:
49
+ ChatCompletionResponse for non-streaming, StreamingResponse for streaming.
50
+ """
51
+ actual_model = parse_model_name(request.model)
52
+ completion_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
53
+
54
+ if request.stream:
55
+ return StreamingResponse(
56
+ stream_completion(
57
+ provider=provider,
58
+ messages=request.messages,
59
+ model=actual_model,
60
+ completion_id=completion_id,
61
+ tools=request.tools,
62
+ reasoning_effort=request.reasoning_effort,
63
+ ),
64
+ media_type="text/event-stream",
65
+ headers={
66
+ "Cache-Control": "no-cache",
67
+ "Connection": "keep-alive",
68
+ "X-Accel-Buffering": "no",
69
+ },
70
+ )
71
+ else:
72
+ # Non-streaming response
73
+ try:
74
+ result = await provider.execute(
75
+ messages=request.messages,
76
+ model=actual_model,
77
+ tools=request.tools,
78
+ )
79
+ except TimeoutError as e:
80
+ raise HTTPException(status_code=504, detail=str(e))
81
+ except RuntimeError as e:
82
+ raise HTTPException(status_code=500, detail=str(e))
83
+
84
+ # Build usage info
85
+ usage = UsageInfo()
86
+ if result.usage:
87
+ usage = UsageInfo(
88
+ prompt_tokens=result.usage.get("input_tokens", 0),
89
+ completion_tokens=result.usage.get("output_tokens", 0),
90
+ total_tokens=(
91
+ result.usage.get("input_tokens", 0)
92
+ + result.usage.get("output_tokens", 0)
93
+ ),
94
+ )
95
+
96
+ # Check for tool_calls in result
97
+ if result.tool_calls:
98
+ tool_calls = [
99
+ ToolCall(
100
+ id=tc["id"],
101
+ type=tc.get("type", "function"),
102
+ function=ToolCallFunction(
103
+ name=tc["function"]["name"],
104
+ arguments=tc["function"]["arguments"],
105
+ ),
106
+ )
107
+ for tc in result.tool_calls
108
+ ]
109
+ response = ChatCompletionResponse(
110
+ id=completion_id,
111
+ model=request.model,
112
+ choices=[
113
+ ChatCompletionChoice(
114
+ index=0,
115
+ message=ResponseMessage(
116
+ role="assistant",
117
+ content=result.content if result.content else None,
118
+ tool_calls=tool_calls,
119
+ ),
120
+ finish_reason="tool_calls",
121
+ )
122
+ ],
123
+ usage=usage,
124
+ )
125
+ return response.model_dump(exclude_none=True)
126
+
127
+ response = ChatCompletionResponse(
128
+ id=completion_id,
129
+ model=request.model,
130
+ choices=[
131
+ ChatCompletionChoice(
132
+ index=0,
133
+ message=ResponseMessage(role="assistant", content=result.content),
134
+ finish_reason="stop",
135
+ )
136
+ ],
137
+ usage=usage,
138
+ )
139
+ return response.model_dump(exclude_none=True)
140
+
141
+
142
+ def split_content_chunks(content: str, max_size: int = 150) -> list[str]:
143
+ """Split large content into smaller chunks.
144
+
145
+ Tries to split on word boundaries for cleaner output.
146
+
147
+ Args:
148
+ content: Content to split.
149
+ max_size: Maximum chunk size in characters.
150
+
151
+ Returns:
152
+ List of content chunks.
153
+ """
154
+ if len(content) <= max_size:
155
+ return [content]
156
+
157
+ chunks = []
158
+ remaining = content
159
+
160
+ while remaining:
161
+ if len(remaining) <= max_size:
162
+ chunks.append(remaining)
163
+ break
164
+
165
+ # Try to find a good split point (space, newline)
166
+ split_at = max_size
167
+ for sep in [" ", "\n", ".", ",", ";"]:
168
+ pos = remaining.rfind(sep, 0, max_size)
169
+ if pos > max_size // 2: # Don't split too early
170
+ split_at = pos + 1
171
+ break
172
+
173
+ chunks.append(remaining[:split_at])
174
+ remaining = remaining[split_at:]
175
+
176
+ return chunks
177
+
178
+
179
+ async def stream_completion(
180
+ provider: ClaudeCodeProvider,
181
+ messages: list[ChatMessage],
182
+ model: str,
183
+ completion_id: str,
184
+ tools: list[dict] | None = None,
185
+ reasoning_effort: str | None = None,
186
+ ) -> AsyncIterator[str]:
187
+ """Generate SSE events for a streaming completion.
188
+
189
+ Args:
190
+ provider: The Claude provider to use.
191
+ messages: Chat messages.
192
+ model: Model identifier.
193
+ completion_id: Unique completion ID.
194
+ tools: Optional tool definitions.
195
+ reasoning_effort: Reasoning effort for extended thinking (low/medium/high).
196
+
197
+ Yields:
198
+ SSE-encoded strings.
199
+ """
200
+ created = int(time.time())
201
+ sent_final = False
202
+
203
+ logger.info(f"[{completion_id}] Starting stream for model={model}")
204
+
205
+ try:
206
+ # First chunk with role
207
+ first_chunk = ChatCompletionChunk(
208
+ id=completion_id,
209
+ created=created,
210
+ model=model,
211
+ choices=[
212
+ StreamChoice(
213
+ index=0,
214
+ delta=DeltaContent(role="assistant"),
215
+ finish_reason=None,
216
+ )
217
+ ],
218
+ )
219
+ yield sse_encode(first_chunk.model_dump())
220
+
221
+ # When tools are provided, buffer content to check for tool_calls before sending
222
+ content_buffer = "" if tools else None
223
+
224
+ # Stream content chunks
225
+ async for chunk in provider.execute_stream(
226
+ messages=messages, model=model, tools=tools, reasoning_effort=reasoning_effort
227
+ ):
228
+ if chunk.is_final:
229
+ if not sent_final:
230
+ # Check for tool_calls in chunk or buffered content
231
+ tool_calls_data = chunk.tool_calls
232
+
233
+ # If we buffered content, check it for tool_calls
234
+ if content_buffer and not tool_calls_data:
235
+ from cli2api.tools.handler import ToolHandler
236
+ _, parsed_tools = ToolHandler.parse_tool_calls(content_buffer)
237
+ if parsed_tools:
238
+ tool_calls_data = parsed_tools
239
+
240
+ if tool_calls_data:
241
+ tool_calls = [
242
+ ToolCall(
243
+ id=tc["id"],
244
+ type=tc.get("type", "function"),
245
+ function=ToolCallFunction(
246
+ name=tc["function"]["name"],
247
+ arguments=tc["function"]["arguments"],
248
+ ),
249
+ )
250
+ for tc in tool_calls_data
251
+ ]
252
+ tool_chunk = ChatCompletionChunk(
253
+ id=completion_id,
254
+ created=created,
255
+ model=model,
256
+ choices=[
257
+ StreamChoice(
258
+ index=0,
259
+ delta=DeltaContent(tool_calls=tool_calls),
260
+ finish_reason="tool_calls",
261
+ )
262
+ ],
263
+ )
264
+ yield sse_encode(tool_chunk.model_dump())
265
+ else:
266
+ # No tool_calls - send buffered content if any
267
+ if content_buffer:
268
+ content_parts = split_content_chunks(content_buffer)
269
+ for part in content_parts:
270
+ content_chunk = ChatCompletionChunk(
271
+ id=completion_id,
272
+ created=created,
273
+ model=model,
274
+ choices=[
275
+ StreamChoice(
276
+ index=0,
277
+ delta=DeltaContent(content=part),
278
+ finish_reason=None,
279
+ )
280
+ ],
281
+ )
282
+ yield sse_encode(content_chunk.model_dump())
283
+
284
+ # Normal final chunk
285
+ final_chunk = ChatCompletionChunk(
286
+ id=completion_id,
287
+ created=created,
288
+ model=model,
289
+ choices=[
290
+ StreamChoice(
291
+ index=0,
292
+ delta=DeltaContent(),
293
+ finish_reason="stop",
294
+ )
295
+ ],
296
+ )
297
+ yield sse_encode(final_chunk.model_dump())
298
+ sent_final = True
299
+
300
+ elif chunk.reasoning:
301
+ # Stream reasoning/thinking content
302
+ from cli2api.schemas.openai import ReasoningDetail
303
+ reasoning_chunk = ChatCompletionChunk(
304
+ id=completion_id,
305
+ created=created,
306
+ model=model,
307
+ choices=[
308
+ StreamChoice(
309
+ index=0,
310
+ delta=DeltaContent(
311
+ reasoning_details=[
312
+ ReasoningDetail(
313
+ type="reasoning.text",
314
+ text=chunk.reasoning,
315
+ )
316
+ ]
317
+ ),
318
+ finish_reason=None,
319
+ )
320
+ ],
321
+ )
322
+ yield sse_encode(reasoning_chunk.model_dump())
323
+
324
+ elif chunk.content:
325
+ # Check if this is a step indicator (should be streamed immediately)
326
+ is_step = chunk.content.startswith("`") and any(
327
+ emoji in chunk.content for emoji in ["🤔", "⚡", "🔍", "📄", "🔧", "✏️"]
328
+ )
329
+
330
+ if content_buffer is not None and not is_step:
331
+ # Buffer regular content when tools are active (might be tool_call JSON)
332
+ content_buffer += chunk.content
333
+ else:
334
+ # Stream step indicators and regular content immediately
335
+ content_parts = split_content_chunks(chunk.content)
336
+ for part in content_parts:
337
+ content_chunk = ChatCompletionChunk(
338
+ id=completion_id,
339
+ created=created,
340
+ model=model,
341
+ choices=[
342
+ StreamChoice(
343
+ index=0,
344
+ delta=DeltaContent(content=part),
345
+ finish_reason=None,
346
+ )
347
+ ],
348
+ )
349
+ yield sse_encode(content_chunk.model_dump())
350
+
351
+ # Ensure final chunk is sent
352
+ if not sent_final:
353
+ final_chunk = ChatCompletionChunk(
354
+ id=completion_id,
355
+ created=created,
356
+ model=model,
357
+ choices=[
358
+ StreamChoice(
359
+ index=0,
360
+ delta=DeltaContent(),
361
+ finish_reason="stop",
362
+ )
363
+ ],
364
+ )
365
+ yield sse_encode(final_chunk.model_dump())
366
+
367
+ # Final [DONE] event
368
+ logger.info(f"[{completion_id}] Stream completed successfully")
369
+ yield "data: [DONE]\n\n"
370
+
371
+ except RuntimeError as e:
372
+ logger.error(f"[{completion_id}] Provider error: {e}")
373
+ yield sse_error(str(e))
374
+ yield "data: [DONE]\n\n"
375
+ except Exception as e:
376
+ logger.error(f"[{completion_id}] Stream error: {e}")
377
+ yield sse_error(str(e))
378
+ yield "data: [DONE]\n\n"
@@ -0,0 +1,52 @@
1
+ """Models endpoint - OpenAI compatible."""
2
+
3
+ from fastapi import APIRouter, Depends, HTTPException
4
+
5
+ from cli2api.api.dependencies import get_provider
6
+ from cli2api.providers.claude import ClaudeCodeProvider
7
+ from cli2api.schemas.openai import ModelInfo, ModelsResponse
8
+
9
+ router = APIRouter()
10
+
11
+
12
+ @router.get("/models")
13
+ async def list_models(
14
+ provider: ClaudeCodeProvider = Depends(get_provider),
15
+ ) -> ModelsResponse:
16
+ """List available models.
17
+
18
+ Returns:
19
+ ModelsResponse with all available Claude models.
20
+ """
21
+ models = []
22
+ for model_id in provider.supported_models:
23
+ # Format as "claude: model"
24
+ full_id = f"claude: {model_id}"
25
+ models.append(ModelInfo(id=full_id, owned_by="claude"))
26
+ return ModelsResponse(data=sorted(models, key=lambda m: m.id))
27
+
28
+
29
+ @router.get("/models/{model_id:path}")
30
+ async def get_model(
31
+ model_id: str,
32
+ provider: ClaudeCodeProvider = Depends(get_provider),
33
+ ) -> ModelInfo:
34
+ """Get information about a specific model.
35
+
36
+ Args:
37
+ model_id: The model identifier (e.g., "claude: sonnet" or "sonnet").
38
+ provider: Claude provider (injected).
39
+
40
+ Returns:
41
+ ModelInfo for the requested model.
42
+
43
+ Raises:
44
+ HTTPException: If model not found.
45
+ """
46
+ # Parse model name
47
+ actual_model = model_id.split(": ", 1)[1] if ": " in model_id else model_id
48
+
49
+ if actual_model in provider.supported_models:
50
+ return ModelInfo(id=f"claude: {actual_model}", owned_by="claude")
51
+
52
+ raise HTTPException(status_code=404, detail=f"Model not found: {model_id}")