llms-py 2.0.7__tar.gz → 2.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llms_py-2.0.7/llms_py.egg-info → llms_py-2.0.9}/PKG-INFO +124 -39
- {llms_py-2.0.7 → llms_py-2.0.9}/README.md +123 -38
- {llms_py-2.0.7 → llms_py-2.0.9}/llms.json +35 -5
- {llms_py-2.0.7 → llms_py-2.0.9}/llms.py +140 -10
- {llms_py-2.0.7 → llms_py-2.0.9/llms_py.egg-info}/PKG-INFO +124 -39
- {llms_py-2.0.7 → llms_py-2.0.9}/pyproject.toml +1 -1
- {llms_py-2.0.7 → llms_py-2.0.9}/setup.py +1 -1
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/Main.mjs +1 -1
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/Recents.mjs +31 -2
- {llms_py-2.0.7 → llms_py-2.0.9}/LICENSE +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/MANIFEST.in +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/index.html +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/SOURCES.txt +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/dependency_links.txt +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/entry_points.txt +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/not-zip-safe +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/requires.txt +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/top_level.txt +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/requirements.txt +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/setup.cfg +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/App.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/ChatPrompt.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/Sidebar.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/app.css +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/fav.svg +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/highlight.min.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/idb.min.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/marked.min.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/servicestack-client.min.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/servicestack-vue.min.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/vue-router.min.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/vue.min.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/vue.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/markdown.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/tailwind.input.css +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/threadStore.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/typography.css +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui/utils.mjs +0 -0
- {llms_py-2.0.7 → llms_py-2.0.9}/ui.json +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llms-py
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.9
|
|
4
4
|
Summary: A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers
|
|
5
5
|
Home-page: https://github.com/ServiceStack/llms
|
|
6
6
|
Author: ServiceStack
|
|
@@ -51,7 +51,7 @@ Configure additional providers and models in [llms.json](llms.json)
|
|
|
51
51
|
## Features
|
|
52
52
|
|
|
53
53
|
- **Lightweight**: Single [llms.py](llms.py) Python file with single `aiohttp` dependency
|
|
54
|
-
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Mistral
|
|
54
|
+
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Z.ai, Mistral
|
|
55
55
|
- **OpenAI-Compatible API**: Works with any client that supports OpenAI's chat completion API
|
|
56
56
|
- **Configuration Management**: Easy provider enable/disable and configuration management
|
|
57
57
|
- **CLI Interface**: Simple command-line interface for quick interactions
|
|
@@ -510,7 +510,52 @@ llms --default grok-4
|
|
|
510
510
|
|
|
511
511
|
# Update llms.py to latest version
|
|
512
512
|
llms --update
|
|
513
|
-
|
|
513
|
+
|
|
514
|
+
# Pass custom parameters to chat request (URL-encoded)
|
|
515
|
+
llms --args "temperature=0.7&seed=111" "What is 2+2?"
|
|
516
|
+
|
|
517
|
+
# Multiple parameters with different types
|
|
518
|
+
llms --args "temperature=0.5&max_completion_tokens=50" "Tell me a joke"
|
|
519
|
+
|
|
520
|
+
# URL-encoded special characters (stop sequences)
|
|
521
|
+
llms --args "stop=Two,Words" "Count to 5"
|
|
522
|
+
|
|
523
|
+
# Combine with other options
|
|
524
|
+
llms --system "You are helpful" --args "temperature=0.3" --raw "Hello"
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
#### Custom Parameters with `--args`
|
|
528
|
+
|
|
529
|
+
The `--args` option allows you to pass URL-encoded parameters to customize the chat request sent to LLM providers:
|
|
530
|
+
|
|
531
|
+
**Parameter Types:**
|
|
532
|
+
- **Floats**: `temperature=0.7`, `frequency_penalty=0.2`
|
|
533
|
+
- **Integers**: `max_completion_tokens=100`
|
|
534
|
+
- **Booleans**: `store=true`, `verbose=false`, `logprobs=true`
|
|
535
|
+
- **Strings**: `stop=one`
|
|
536
|
+
- **Lists**: `stop=two,words`
|
|
537
|
+
|
|
538
|
+
**Common Parameters:**
|
|
539
|
+
- `temperature`: Controls randomness (0.0 to 2.0)
|
|
540
|
+
- `max_completion_tokens`: Maximum tokens in response
|
|
541
|
+
- `seed`: For reproducible outputs
|
|
542
|
+
- `top_p`: Nucleus sampling parameter
|
|
543
|
+
- `stop`: Stop sequences (URL-encode special chars)
|
|
544
|
+
- `store`: Whether or not to store the output
|
|
545
|
+
- `frequency_penalty`: Penalize new tokens based on frequency
|
|
546
|
+
- `presence_penalty`: Penalize new tokens based on presence
|
|
547
|
+
- `logprobs`: Include log probabilities in response
|
|
548
|
+
- `parallel_tool_calls`: Enable parallel tool calls
|
|
549
|
+
- `prompt_cache_key`: Cache key for prompt
|
|
550
|
+
- `reasoning_effort`: Reasoning effort (low, medium, high, *minimal, *none, *default)
|
|
551
|
+
- `safety_identifier`: A string that uniquely identifies each user
|
|
552
|
+
- `seed`: For reproducible outputs
|
|
553
|
+
- `service_tier`: Service tier (free, standard, premium, *default)
|
|
554
|
+
- `top_logprobs`: Number of top logprobs to return
|
|
555
|
+
- `top_p`: Nucleus sampling parameter
|
|
556
|
+
- `verbosity`: Verbosity level (0, 1, 2, 3, *default)
|
|
557
|
+
- `enable_thinking`: Enable thinking mode (Qwen)
|
|
558
|
+
- `stream`: Enable streaming responses
|
|
514
559
|
|
|
515
560
|
### Default Model Configuration
|
|
516
561
|
|
|
@@ -558,6 +603,42 @@ llms "Explain quantum computing" | glow
|
|
|
558
603
|
|
|
559
604
|
## Supported Providers
|
|
560
605
|
|
|
606
|
+
Any OpenAI-compatible providers and their models can be added by configuring them in [llms.json](./llms.json). By default only AI Providers with free tiers are enabled which will only be "available" if their API Key is set.
|
|
607
|
+
|
|
608
|
+
You can list the available providers, their models and which are enabled or disabled with:
|
|
609
|
+
|
|
610
|
+
```bash
|
|
611
|
+
llms ls
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
They can be enabled/disabled in your `llms.json` file or with:
|
|
615
|
+
|
|
616
|
+
```bash
|
|
617
|
+
llms --enable <provider>
|
|
618
|
+
llms --disable <provider>
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
For a provider to be available, they also require their API Key configured in either your Environment Variables
|
|
622
|
+
or directly in your `llms.json`.
|
|
623
|
+
|
|
624
|
+
### Environment Variables
|
|
625
|
+
|
|
626
|
+
| Provider | Variable | Description | Example |
|
|
627
|
+
|-----------------|---------------------------|---------------------|---------|
|
|
628
|
+
| openrouter_free | `OPENROUTER_FREE_API_KEY` | OpenRouter FREE models API key | `sk-or-...` |
|
|
629
|
+
| groq | `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
630
|
+
| google_free | `GOOGLE_FREE_API_KEY` | Google FREE API key | `AIza...` |
|
|
631
|
+
| codestral | `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
632
|
+
| ollama | N/A | No API key required | |
|
|
633
|
+
| openrouter | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
634
|
+
| google | `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
635
|
+
| anthropic | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
636
|
+
| openai | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
637
|
+
| grok | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
638
|
+
| qwen | `DASHSCOPE_API_KEY` | Qwen (Alibaba) API key | `sk-...` |
|
|
639
|
+
| z.ai | `ZAI_API_KEY` | Z.ai API key | `sk-...` |
|
|
640
|
+
| mistral | `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
641
|
+
|
|
561
642
|
### OpenAI
|
|
562
643
|
- **Type**: `OpenAiProvider`
|
|
563
644
|
- **Models**: GPT-5, GPT-5 Codex, GPT-4o, GPT-4o-mini, o3, etc.
|
|
@@ -588,6 +669,26 @@ export GOOGLE_API_KEY="your-key"
|
|
|
588
669
|
llms --enable google_free
|
|
589
670
|
```
|
|
590
671
|
|
|
672
|
+
### OpenRouter
|
|
673
|
+
- **Type**: `OpenAiProvider`
|
|
674
|
+
- **Models**: 100+ models from various providers
|
|
675
|
+
- **Features**: Access to latest models, free tier available
|
|
676
|
+
|
|
677
|
+
```bash
|
|
678
|
+
export OPENROUTER_API_KEY="your-key"
|
|
679
|
+
llms --enable openrouter
|
|
680
|
+
```
|
|
681
|
+
|
|
682
|
+
### Grok (X.AI)
|
|
683
|
+
- **Type**: `OpenAiProvider`
|
|
684
|
+
- **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
|
|
685
|
+
- **Features**: Real-time information, humor, uncensored responses
|
|
686
|
+
|
|
687
|
+
```bash
|
|
688
|
+
export GROK_API_KEY="your-key"
|
|
689
|
+
llms --enable grok
|
|
690
|
+
```
|
|
691
|
+
|
|
591
692
|
### Groq
|
|
592
693
|
- **Type**: `OpenAiProvider`
|
|
593
694
|
- **Models**: Llama 3.3, Gemma 2, Kimi K2, etc.
|
|
@@ -608,44 +709,44 @@ llms --enable groq
|
|
|
608
709
|
llms --enable ollama
|
|
609
710
|
```
|
|
610
711
|
|
|
611
|
-
###
|
|
712
|
+
### Qwen (Alibaba Cloud)
|
|
612
713
|
- **Type**: `OpenAiProvider`
|
|
613
|
-
- **Models**:
|
|
614
|
-
- **Features**:
|
|
714
|
+
- **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
|
|
715
|
+
- **Features**: Multilingual, vision models, coding, reasoning, audio processing
|
|
615
716
|
|
|
616
717
|
```bash
|
|
617
|
-
export
|
|
618
|
-
llms --enable
|
|
718
|
+
export DASHSCOPE_API_KEY="your-key"
|
|
719
|
+
llms --enable qwen
|
|
619
720
|
```
|
|
620
721
|
|
|
621
|
-
###
|
|
722
|
+
### Z.ai
|
|
622
723
|
- **Type**: `OpenAiProvider`
|
|
623
|
-
- **Models**:
|
|
624
|
-
- **Features**:
|
|
724
|
+
- **Models**: GLM-4.6, GLM-4.5, GLM-4.5-air, GLM-4.5-x, GLM-4.5-airx, GLM-4.5-flash, GLM-4:32b
|
|
725
|
+
- **Features**: Advanced language models with strong reasoning capabilities
|
|
625
726
|
|
|
626
727
|
```bash
|
|
627
|
-
export
|
|
628
|
-
llms --enable
|
|
728
|
+
export ZAI_API_KEY="your-key"
|
|
729
|
+
llms --enable z.ai
|
|
629
730
|
```
|
|
630
731
|
|
|
631
|
-
###
|
|
732
|
+
### Mistral
|
|
632
733
|
- **Type**: `OpenAiProvider`
|
|
633
|
-
- **Models**:
|
|
634
|
-
- **Features**:
|
|
734
|
+
- **Models**: Mistral Large, Codestral, Pixtral, etc.
|
|
735
|
+
- **Features**: Code generation, multilingual
|
|
635
736
|
|
|
636
737
|
```bash
|
|
637
|
-
export
|
|
638
|
-
llms --enable
|
|
738
|
+
export MISTRAL_API_KEY="your-key"
|
|
739
|
+
llms --enable mistral
|
|
639
740
|
```
|
|
640
741
|
|
|
641
|
-
###
|
|
742
|
+
### Codestral
|
|
642
743
|
- **Type**: `OpenAiProvider`
|
|
643
|
-
- **Models**:
|
|
644
|
-
- **Features**:
|
|
744
|
+
- **Models**: Codestral
|
|
745
|
+
- **Features**: Code generation
|
|
645
746
|
|
|
646
747
|
```bash
|
|
647
|
-
export
|
|
648
|
-
llms --enable
|
|
748
|
+
export CODESTRAL_API_KEY="your-key"
|
|
749
|
+
llms --enable codestral
|
|
649
750
|
```
|
|
650
751
|
|
|
651
752
|
## Model Routing
|
|
@@ -654,22 +755,6 @@ The tool automatically routes requests to the first available provider that supp
|
|
|
654
755
|
|
|
655
756
|
Example: If both OpenAI and OpenRouter support `kimi-k2`, the request will first try OpenRouter (free), then fall back to Groq than OpenRouter (Paid) if requests fails.
|
|
656
757
|
|
|
657
|
-
## Environment Variables
|
|
658
|
-
|
|
659
|
-
| Variable | Description | Example |
|
|
660
|
-
|----------|-------------|---------|
|
|
661
|
-
| `LLMS_CONFIG_PATH` | Custom config file path | `/path/to/llms.json` |
|
|
662
|
-
| `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
663
|
-
| `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
664
|
-
| `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
665
|
-
| `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
666
|
-
| `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
667
|
-
| `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
668
|
-
| `OPENROUTER_FREE_API_KEY` | OpenRouter free tier key | `sk-or-...` |
|
|
669
|
-
| `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
670
|
-
| `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
671
|
-
| `DASHSCOPE_API_KEY` | Qwen (Alibaba Cloud) API key | `sk-...` |
|
|
672
|
-
|
|
673
758
|
## Configuration Examples
|
|
674
759
|
|
|
675
760
|
### Minimal Configuration
|
|
@@ -11,7 +11,7 @@ Configure additional providers and models in [llms.json](llms.json)
|
|
|
11
11
|
## Features
|
|
12
12
|
|
|
13
13
|
- **Lightweight**: Single [llms.py](llms.py) Python file with single `aiohttp` dependency
|
|
14
|
-
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Mistral
|
|
14
|
+
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Z.ai, Mistral
|
|
15
15
|
- **OpenAI-Compatible API**: Works with any client that supports OpenAI's chat completion API
|
|
16
16
|
- **Configuration Management**: Easy provider enable/disable and configuration management
|
|
17
17
|
- **CLI Interface**: Simple command-line interface for quick interactions
|
|
@@ -470,7 +470,52 @@ llms --default grok-4
|
|
|
470
470
|
|
|
471
471
|
# Update llms.py to latest version
|
|
472
472
|
llms --update
|
|
473
|
-
|
|
473
|
+
|
|
474
|
+
# Pass custom parameters to chat request (URL-encoded)
|
|
475
|
+
llms --args "temperature=0.7&seed=111" "What is 2+2?"
|
|
476
|
+
|
|
477
|
+
# Multiple parameters with different types
|
|
478
|
+
llms --args "temperature=0.5&max_completion_tokens=50" "Tell me a joke"
|
|
479
|
+
|
|
480
|
+
# URL-encoded special characters (stop sequences)
|
|
481
|
+
llms --args "stop=Two,Words" "Count to 5"
|
|
482
|
+
|
|
483
|
+
# Combine with other options
|
|
484
|
+
llms --system "You are helpful" --args "temperature=0.3" --raw "Hello"
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
#### Custom Parameters with `--args`
|
|
488
|
+
|
|
489
|
+
The `--args` option allows you to pass URL-encoded parameters to customize the chat request sent to LLM providers:
|
|
490
|
+
|
|
491
|
+
**Parameter Types:**
|
|
492
|
+
- **Floats**: `temperature=0.7`, `frequency_penalty=0.2`
|
|
493
|
+
- **Integers**: `max_completion_tokens=100`
|
|
494
|
+
- **Booleans**: `store=true`, `verbose=false`, `logprobs=true`
|
|
495
|
+
- **Strings**: `stop=one`
|
|
496
|
+
- **Lists**: `stop=two,words`
|
|
497
|
+
|
|
498
|
+
**Common Parameters:**
|
|
499
|
+
- `temperature`: Controls randomness (0.0 to 2.0)
|
|
500
|
+
- `max_completion_tokens`: Maximum tokens in response
|
|
501
|
+
- `seed`: For reproducible outputs
|
|
502
|
+
- `top_p`: Nucleus sampling parameter
|
|
503
|
+
- `stop`: Stop sequences (URL-encode special chars)
|
|
504
|
+
- `store`: Whether or not to store the output
|
|
505
|
+
- `frequency_penalty`: Penalize new tokens based on frequency
|
|
506
|
+
- `presence_penalty`: Penalize new tokens based on presence
|
|
507
|
+
- `logprobs`: Include log probabilities in response
|
|
508
|
+
- `parallel_tool_calls`: Enable parallel tool calls
|
|
509
|
+
- `prompt_cache_key`: Cache key for prompt
|
|
510
|
+
- `reasoning_effort`: Reasoning effort (low, medium, high, *minimal, *none, *default)
|
|
511
|
+
- `safety_identifier`: A string that uniquely identifies each user
|
|
512
|
+
- `seed`: For reproducible outputs
|
|
513
|
+
- `service_tier`: Service tier (free, standard, premium, *default)
|
|
514
|
+
- `top_logprobs`: Number of top logprobs to return
|
|
515
|
+
- `top_p`: Nucleus sampling parameter
|
|
516
|
+
- `verbosity`: Verbosity level (0, 1, 2, 3, *default)
|
|
517
|
+
- `enable_thinking`: Enable thinking mode (Qwen)
|
|
518
|
+
- `stream`: Enable streaming responses
|
|
474
519
|
|
|
475
520
|
### Default Model Configuration
|
|
476
521
|
|
|
@@ -518,6 +563,42 @@ llms "Explain quantum computing" | glow
|
|
|
518
563
|
|
|
519
564
|
## Supported Providers
|
|
520
565
|
|
|
566
|
+
Any OpenAI-compatible providers and their models can be added by configuring them in [llms.json](./llms.json). By default only AI Providers with free tiers are enabled which will only be "available" if their API Key is set.
|
|
567
|
+
|
|
568
|
+
You can list the available providers, their models and which are enabled or disabled with:
|
|
569
|
+
|
|
570
|
+
```bash
|
|
571
|
+
llms ls
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
They can be enabled/disabled in your `llms.json` file or with:
|
|
575
|
+
|
|
576
|
+
```bash
|
|
577
|
+
llms --enable <provider>
|
|
578
|
+
llms --disable <provider>
|
|
579
|
+
```
|
|
580
|
+
|
|
581
|
+
For a provider to be available, they also require their API Key configured in either your Environment Variables
|
|
582
|
+
or directly in your `llms.json`.
|
|
583
|
+
|
|
584
|
+
### Environment Variables
|
|
585
|
+
|
|
586
|
+
| Provider | Variable | Description | Example |
|
|
587
|
+
|-----------------|---------------------------|---------------------|---------|
|
|
588
|
+
| openrouter_free | `OPENROUTER_FREE_API_KEY` | OpenRouter FREE models API key | `sk-or-...` |
|
|
589
|
+
| groq | `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
590
|
+
| google_free | `GOOGLE_FREE_API_KEY` | Google FREE API key | `AIza...` |
|
|
591
|
+
| codestral | `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
592
|
+
| ollama | N/A | No API key required | |
|
|
593
|
+
| openrouter | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
594
|
+
| google | `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
595
|
+
| anthropic | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
596
|
+
| openai | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
597
|
+
| grok | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
598
|
+
| qwen | `DASHSCOPE_API_KEY` | Qwen (Alibaba) API key | `sk-...` |
|
|
599
|
+
| z.ai | `ZAI_API_KEY` | Z.ai API key | `sk-...` |
|
|
600
|
+
| mistral | `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
601
|
+
|
|
521
602
|
### OpenAI
|
|
522
603
|
- **Type**: `OpenAiProvider`
|
|
523
604
|
- **Models**: GPT-5, GPT-5 Codex, GPT-4o, GPT-4o-mini, o3, etc.
|
|
@@ -548,6 +629,26 @@ export GOOGLE_API_KEY="your-key"
|
|
|
548
629
|
llms --enable google_free
|
|
549
630
|
```
|
|
550
631
|
|
|
632
|
+
### OpenRouter
|
|
633
|
+
- **Type**: `OpenAiProvider`
|
|
634
|
+
- **Models**: 100+ models from various providers
|
|
635
|
+
- **Features**: Access to latest models, free tier available
|
|
636
|
+
|
|
637
|
+
```bash
|
|
638
|
+
export OPENROUTER_API_KEY="your-key"
|
|
639
|
+
llms --enable openrouter
|
|
640
|
+
```
|
|
641
|
+
|
|
642
|
+
### Grok (X.AI)
|
|
643
|
+
- **Type**: `OpenAiProvider`
|
|
644
|
+
- **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
|
|
645
|
+
- **Features**: Real-time information, humor, uncensored responses
|
|
646
|
+
|
|
647
|
+
```bash
|
|
648
|
+
export GROK_API_KEY="your-key"
|
|
649
|
+
llms --enable grok
|
|
650
|
+
```
|
|
651
|
+
|
|
551
652
|
### Groq
|
|
552
653
|
- **Type**: `OpenAiProvider`
|
|
553
654
|
- **Models**: Llama 3.3, Gemma 2, Kimi K2, etc.
|
|
@@ -568,44 +669,44 @@ llms --enable groq
|
|
|
568
669
|
llms --enable ollama
|
|
569
670
|
```
|
|
570
671
|
|
|
571
|
-
###
|
|
672
|
+
### Qwen (Alibaba Cloud)
|
|
572
673
|
- **Type**: `OpenAiProvider`
|
|
573
|
-
- **Models**:
|
|
574
|
-
- **Features**:
|
|
674
|
+
- **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
|
|
675
|
+
- **Features**: Multilingual, vision models, coding, reasoning, audio processing
|
|
575
676
|
|
|
576
677
|
```bash
|
|
577
|
-
export
|
|
578
|
-
llms --enable
|
|
678
|
+
export DASHSCOPE_API_KEY="your-key"
|
|
679
|
+
llms --enable qwen
|
|
579
680
|
```
|
|
580
681
|
|
|
581
|
-
###
|
|
682
|
+
### Z.ai
|
|
582
683
|
- **Type**: `OpenAiProvider`
|
|
583
|
-
- **Models**:
|
|
584
|
-
- **Features**:
|
|
684
|
+
- **Models**: GLM-4.6, GLM-4.5, GLM-4.5-air, GLM-4.5-x, GLM-4.5-airx, GLM-4.5-flash, GLM-4:32b
|
|
685
|
+
- **Features**: Advanced language models with strong reasoning capabilities
|
|
585
686
|
|
|
586
687
|
```bash
|
|
587
|
-
export
|
|
588
|
-
llms --enable
|
|
688
|
+
export ZAI_API_KEY="your-key"
|
|
689
|
+
llms --enable z.ai
|
|
589
690
|
```
|
|
590
691
|
|
|
591
|
-
###
|
|
692
|
+
### Mistral
|
|
592
693
|
- **Type**: `OpenAiProvider`
|
|
593
|
-
- **Models**:
|
|
594
|
-
- **Features**:
|
|
694
|
+
- **Models**: Mistral Large, Codestral, Pixtral, etc.
|
|
695
|
+
- **Features**: Code generation, multilingual
|
|
595
696
|
|
|
596
697
|
```bash
|
|
597
|
-
export
|
|
598
|
-
llms --enable
|
|
698
|
+
export MISTRAL_API_KEY="your-key"
|
|
699
|
+
llms --enable mistral
|
|
599
700
|
```
|
|
600
701
|
|
|
601
|
-
###
|
|
702
|
+
### Codestral
|
|
602
703
|
- **Type**: `OpenAiProvider`
|
|
603
|
-
- **Models**:
|
|
604
|
-
- **Features**:
|
|
704
|
+
- **Models**: Codestral
|
|
705
|
+
- **Features**: Code generation
|
|
605
706
|
|
|
606
707
|
```bash
|
|
607
|
-
export
|
|
608
|
-
llms --enable
|
|
708
|
+
export CODESTRAL_API_KEY="your-key"
|
|
709
|
+
llms --enable codestral
|
|
609
710
|
```
|
|
610
711
|
|
|
611
712
|
## Model Routing
|
|
@@ -614,22 +715,6 @@ The tool automatically routes requests to the first available provider that supp
|
|
|
614
715
|
|
|
615
716
|
Example: If both OpenAI and OpenRouter support `kimi-k2`, the request will first try OpenRouter (free), then fall back to Groq than OpenRouter (Paid) if requests fails.
|
|
616
717
|
|
|
617
|
-
## Environment Variables
|
|
618
|
-
|
|
619
|
-
| Variable | Description | Example |
|
|
620
|
-
|----------|-------------|---------|
|
|
621
|
-
| `LLMS_CONFIG_PATH` | Custom config file path | `/path/to/llms.json` |
|
|
622
|
-
| `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
623
|
-
| `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
624
|
-
| `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
625
|
-
| `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
626
|
-
| `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
627
|
-
| `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
628
|
-
| `OPENROUTER_FREE_API_KEY` | OpenRouter free tier key | `sk-or-...` |
|
|
629
|
-
| `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
630
|
-
| `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
631
|
-
| `DASHSCOPE_API_KEY` | Qwen (Alibaba Cloud) API key | `sk-...` |
|
|
632
|
-
|
|
633
718
|
## Configuration Examples
|
|
634
719
|
|
|
635
720
|
### Minimal Configuration
|
|
@@ -9,7 +9,12 @@
|
|
|
9
9
|
"messages": [
|
|
10
10
|
{
|
|
11
11
|
"role": "user",
|
|
12
|
-
"content":
|
|
12
|
+
"content": [
|
|
13
|
+
{
|
|
14
|
+
"type": "text",
|
|
15
|
+
"text": ""
|
|
16
|
+
}
|
|
17
|
+
]
|
|
13
18
|
}
|
|
14
19
|
]
|
|
15
20
|
},
|
|
@@ -193,6 +198,9 @@
|
|
|
193
198
|
"nova-micro": "amazon/nova-micro-v1",
|
|
194
199
|
"nova-lite": "amazon/nova-lite-v1",
|
|
195
200
|
"nova-pro": "amazon/nova-pro-v1",
|
|
201
|
+
"claude-opus-4-1": "anthropic/claude-opus-4.1",
|
|
202
|
+
"claude-sonnet-4-5": "anthropic/claude-sonnet-4.5",
|
|
203
|
+
"claude-sonnet-4-0": "anthropic/claude-sonnet-4",
|
|
196
204
|
"gpt-5": "openai/gpt-5",
|
|
197
205
|
"gpt-5-chat": "openai/gpt-5-chat",
|
|
198
206
|
"gpt-5-mini": "openai/gpt-5-mini",
|
|
@@ -210,11 +218,13 @@
|
|
|
210
218
|
"grok-4": "x-ai/grok-4",
|
|
211
219
|
"grok-4-fast": "x-ai/grok-4-fast",
|
|
212
220
|
"grok-code-fast-1": "x-ai/grok-code-fast-1",
|
|
221
|
+
"glm-4.6": "z-ai/glm-4.6",
|
|
213
222
|
"glm-4.5v": "z-ai/glm-4.5v",
|
|
214
223
|
"glm-4.5": "z-ai/glm-4.5",
|
|
215
224
|
"glm-4.5-air": "z-ai/glm-4.5-air",
|
|
216
225
|
"kimi-k2": "moonshotai/kimi-k2",
|
|
217
226
|
"deepseek-v3.1:671b": "deepseek/deepseek-chat",
|
|
227
|
+
"deepseek-v3.2-exp": "deepseek/deepseek-v3.2-exp",
|
|
218
228
|
"deepseek-chat-v3.1:671b": "deepseek/deepseek-chat-v3.1:free",
|
|
219
229
|
"deepseek-r1:671b": "deepseek/deepseek-r1",
|
|
220
230
|
"deepseek-v3.1-terminus": "deepseek/deepseek-v3.1-terminus",
|
|
@@ -263,6 +273,7 @@
|
|
|
263
273
|
"models": {
|
|
264
274
|
"claude-opus-4-1": "claude-opus-4-1",
|
|
265
275
|
"claude-opus-4": "claude-opus-4",
|
|
276
|
+
"claude-sonnet-4-5": "claude-sonnet-4-5",
|
|
266
277
|
"claude-sonnet-4-0": "claude-sonnet-4-0",
|
|
267
278
|
"claude-3-7-sonnet": "claude-3-7-sonnet-latest",
|
|
268
279
|
"claude-3-5-haiku": "claude-3-5-haiku-latest",
|
|
@@ -383,7 +394,24 @@
|
|
|
383
394
|
"qwen2.5-vl:7b": "qwen2.5-vl-7b-instruct",
|
|
384
395
|
"qwen2.5-vl:3b": "qwen2.5-vl-3b-instruct",
|
|
385
396
|
"qwen2.5-omni:7b": "qwen2.5-omni-7b"
|
|
386
|
-
}
|
|
397
|
+
},
|
|
398
|
+
"enable_thinking": false
|
|
399
|
+
},
|
|
400
|
+
"z.ai": {
|
|
401
|
+
"enabled": false,
|
|
402
|
+
"type": "OpenAiProvider",
|
|
403
|
+
"base_url": "https://api.z.ai/api/paas/v4",
|
|
404
|
+
"api_key": "$ZAI_API_KEY",
|
|
405
|
+
"models": {
|
|
406
|
+
"glm-4.6": "glm-4.6",
|
|
407
|
+
"glm-4.5": "glm-4.5",
|
|
408
|
+
"glm-4.5-air": "glm-4.5-air",
|
|
409
|
+
"glm-4.5-x": "glm-4.5-x",
|
|
410
|
+
"glm-4.5-airx": "glm-4.5-airx",
|
|
411
|
+
"glm-4.5-flash": "glm-4.5-flash",
|
|
412
|
+
"glm-4:32b": "glm-4-32b-0414-128k"
|
|
413
|
+
},
|
|
414
|
+
"temperature": 0.7
|
|
387
415
|
},
|
|
388
416
|
"mistral": {
|
|
389
417
|
"enabled": false,
|
|
@@ -396,20 +424,22 @@
|
|
|
396
424
|
"devstral-medium": "devstral-medium-2507",
|
|
397
425
|
"codestral:22b": "codestral-latest",
|
|
398
426
|
"mistral-ocr": "mistral-ocr-latest",
|
|
399
|
-
"voxtral-mini": "voxtral-mini-latest",
|
|
400
427
|
"mistral-small3.2:24b": "mistral-small-latest",
|
|
401
428
|
"magistral-small": "magistral-small-latest",
|
|
402
429
|
"devstral-small": "devstral-small-2507",
|
|
403
430
|
"voxtral-small": "voxtral-small-latest",
|
|
431
|
+
"voxtral-mini": "voxtral-mini-latest",
|
|
432
|
+
"codestral-embed": "codestral-embed-2505",
|
|
433
|
+
"mistral-embed": "mistral-embed",
|
|
404
434
|
"mistral-large:123b": "mistral-large-latest",
|
|
405
435
|
"pixtral-large:124b": "pixtral-large-latest",
|
|
406
436
|
"pixtral:12b": "pixtral-12b",
|
|
407
|
-
"mistral-nemo:12b": "mistral-nemo",
|
|
437
|
+
"mistral-nemo:12b": "open-mistral-nemo",
|
|
408
438
|
"mistral-saba": "mistral-saba-latest",
|
|
409
439
|
"mistral:7b": "open-mistral-7b",
|
|
410
440
|
"mixtral:8x7b": "open-mixtral-8x7b",
|
|
411
441
|
"mixtral:8x22b": "open-mixtral-8x22b",
|
|
412
|
-
"ministral:8b": "ministral-
|
|
442
|
+
"ministral:8b": "ministral-8b-latest",
|
|
413
443
|
"ministral:3b": "ministral-3b-latest"
|
|
414
444
|
}
|
|
415
445
|
}
|
|
@@ -14,6 +14,7 @@ import mimetypes
|
|
|
14
14
|
import traceback
|
|
15
15
|
import sys
|
|
16
16
|
import site
|
|
17
|
+
from urllib.parse import parse_qs
|
|
17
18
|
|
|
18
19
|
import aiohttp
|
|
19
20
|
from aiohttp import web
|
|
@@ -21,7 +22,7 @@ from aiohttp import web
|
|
|
21
22
|
from pathlib import Path
|
|
22
23
|
from importlib import resources # Py≥3.9 (pip install importlib_resources for 3.7/3.8)
|
|
23
24
|
|
|
24
|
-
VERSION = "2.0.
|
|
25
|
+
VERSION = "2.0.9"
|
|
25
26
|
_ROOT = None
|
|
26
27
|
g_config_path = None
|
|
27
28
|
g_ui_path = None
|
|
@@ -63,7 +64,8 @@ def chat_summary(chat):
|
|
|
63
64
|
elif 'file' in item:
|
|
64
65
|
if 'file_data' in item['file']:
|
|
65
66
|
data = item['file']['file_data']
|
|
66
|
-
|
|
67
|
+
prefix = url.split(',', 1)[0]
|
|
68
|
+
item['file']['file_data'] = prefix + f",({len(url) - len(prefix)})"
|
|
67
69
|
return json.dumps(clone, indent=2)
|
|
68
70
|
|
|
69
71
|
def gemini_chat_summary(gemini_chat):
|
|
@@ -89,6 +91,60 @@ def is_url(url):
|
|
|
89
91
|
def get_filename(file):
|
|
90
92
|
return file.rsplit('/',1)[1] if '/' in file else 'file'
|
|
91
93
|
|
|
94
|
+
def parse_args_params(args_str):
|
|
95
|
+
"""Parse URL-encoded parameters and return a dictionary."""
|
|
96
|
+
if not args_str:
|
|
97
|
+
return {}
|
|
98
|
+
|
|
99
|
+
# Parse the URL-encoded string
|
|
100
|
+
parsed = parse_qs(args_str, keep_blank_values=True)
|
|
101
|
+
|
|
102
|
+
# Convert to simple dict with single values (not lists)
|
|
103
|
+
result = {}
|
|
104
|
+
for key, values in parsed.items():
|
|
105
|
+
if len(values) == 1:
|
|
106
|
+
value = values[0]
|
|
107
|
+
# Try to convert to appropriate types
|
|
108
|
+
if value.lower() == 'true':
|
|
109
|
+
result[key] = True
|
|
110
|
+
elif value.lower() == 'false':
|
|
111
|
+
result[key] = False
|
|
112
|
+
elif value.isdigit():
|
|
113
|
+
result[key] = int(value)
|
|
114
|
+
else:
|
|
115
|
+
try:
|
|
116
|
+
# Try to parse as float
|
|
117
|
+
result[key] = float(value)
|
|
118
|
+
except ValueError:
|
|
119
|
+
# Keep as string
|
|
120
|
+
result[key] = value
|
|
121
|
+
else:
|
|
122
|
+
# Multiple values, keep as list
|
|
123
|
+
result[key] = values
|
|
124
|
+
|
|
125
|
+
return result
|
|
126
|
+
|
|
127
|
+
def apply_args_to_chat(chat, args_params):
|
|
128
|
+
"""Apply parsed arguments to the chat request."""
|
|
129
|
+
if not args_params:
|
|
130
|
+
return chat
|
|
131
|
+
|
|
132
|
+
# Apply each parameter to the chat request
|
|
133
|
+
for key, value in args_params.items():
|
|
134
|
+
if isinstance(value, str):
|
|
135
|
+
if key == 'stop':
|
|
136
|
+
if ',' in value:
|
|
137
|
+
value = value.split(',')
|
|
138
|
+
elif key == 'max_completion_tokens' or key == 'max_tokens' or key == 'n' or key == 'seed' or key == 'top_logprobs':
|
|
139
|
+
value = int(value)
|
|
140
|
+
elif key == 'temperature' or key == 'top_p' or key == 'frequency_penalty' or key == 'presence_penalty':
|
|
141
|
+
value = float(value)
|
|
142
|
+
elif key == 'store' or key == 'logprobs' or key == 'enable_thinking' or key == 'parallel_tool_calls' or key == 'stream':
|
|
143
|
+
value = bool(value)
|
|
144
|
+
chat[key] = value
|
|
145
|
+
|
|
146
|
+
return chat
|
|
147
|
+
|
|
92
148
|
def is_base_64(data):
|
|
93
149
|
try:
|
|
94
150
|
base64.b64decode(data)
|
|
@@ -190,8 +246,9 @@ async def process_chat(chat):
|
|
|
190
246
|
content = f.read()
|
|
191
247
|
file['filename'] = get_filename(url)
|
|
192
248
|
file['file_data'] = f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
|
|
193
|
-
elif
|
|
194
|
-
|
|
249
|
+
elif url.startswith('data:'):
|
|
250
|
+
if 'filename' not in file:
|
|
251
|
+
file['filename'] = 'file'
|
|
195
252
|
pass # use base64 data as-is
|
|
196
253
|
else:
|
|
197
254
|
raise Exception(f"Invalid file: {url}")
|
|
@@ -219,13 +276,38 @@ class OpenAiProvider:
|
|
|
219
276
|
self.api_key = api_key
|
|
220
277
|
self.models = models
|
|
221
278
|
|
|
222
|
-
|
|
279
|
+
# check if base_url ends with /v{\d} to handle providers with different versions (e.g. z.ai uses /v4)
|
|
280
|
+
last_segment = base_url.rsplit('/',1)[1]
|
|
281
|
+
if last_segment.startswith('v') and last_segment[1:].isdigit():
|
|
282
|
+
self.chat_url = f"{base_url}/chat/completions"
|
|
283
|
+
else:
|
|
284
|
+
self.chat_url = f"{base_url}/v1/chat/completions"
|
|
285
|
+
|
|
223
286
|
self.headers = kwargs['headers'] if 'headers' in kwargs else {
|
|
224
287
|
"Content-Type": "application/json",
|
|
225
288
|
}
|
|
226
289
|
if api_key is not None:
|
|
227
290
|
self.headers["Authorization"] = f"Bearer {api_key}"
|
|
228
291
|
|
|
292
|
+
self.frequency_penalty = float(kwargs['frequency_penalty']) if 'frequency_penalty' in kwargs else None
|
|
293
|
+
self.max_completion_tokens = int(kwargs['max_completion_tokens']) if 'max_completion_tokens' in kwargs else None
|
|
294
|
+
self.n = int(kwargs['n']) if 'n' in kwargs else None
|
|
295
|
+
self.parallel_tool_calls = bool(kwargs['parallel_tool_calls']) if 'parallel_tool_calls' in kwargs else None
|
|
296
|
+
self.presence_penalty = float(kwargs['presence_penalty']) if 'presence_penalty' in kwargs else None
|
|
297
|
+
self.prompt_cache_key = kwargs['prompt_cache_key'] if 'prompt_cache_key' in kwargs else None
|
|
298
|
+
self.reasoning_effort = kwargs['reasoning_effort'] if 'reasoning_effort' in kwargs else None
|
|
299
|
+
self.safety_identifier = kwargs['safety_identifier'] if 'safety_identifier' in kwargs else None
|
|
300
|
+
self.seed = int(kwargs['seed']) if 'seed' in kwargs else None
|
|
301
|
+
self.service_tier = kwargs['service_tier'] if 'service_tier' in kwargs else None
|
|
302
|
+
self.stop = kwargs['stop'] if 'stop' in kwargs else None
|
|
303
|
+
self.store = bool(kwargs['store']) if 'store' in kwargs else None
|
|
304
|
+
self.temperature = float(kwargs['temperature']) if 'temperature' in kwargs else None
|
|
305
|
+
self.top_logprobs = int(kwargs['top_logprobs']) if 'top_logprobs' in kwargs else None
|
|
306
|
+
self.top_p = float(kwargs['top_p']) if 'top_p' in kwargs else None
|
|
307
|
+
self.verbosity = kwargs['verbosity'] if 'verbosity' in kwargs else None
|
|
308
|
+
self.stream = bool(kwargs['stream']) if 'stream' in kwargs else None
|
|
309
|
+
self.enable_thinking = bool(kwargs['enable_thinking']) if 'enable_thinking' in kwargs else None
|
|
310
|
+
|
|
229
311
|
@classmethod
|
|
230
312
|
def test(cls, base_url=None, api_key=None, models={}, **kwargs):
|
|
231
313
|
return base_url is not None and api_key is not None and len(models) > 0
|
|
@@ -241,6 +323,41 @@ class OpenAiProvider:
|
|
|
241
323
|
# with open(os.path.join(os.path.dirname(__file__), 'chat.wip.json'), "w") as f:
|
|
242
324
|
# f.write(json.dumps(chat, indent=2))
|
|
243
325
|
|
|
326
|
+
if self.frequency_penalty is not None:
|
|
327
|
+
chat['frequency_penalty'] = self.frequency_penalty
|
|
328
|
+
if self.max_completion_tokens is not None:
|
|
329
|
+
chat['max_completion_tokens'] = self.max_completion_tokens
|
|
330
|
+
if self.n is not None:
|
|
331
|
+
chat['n'] = self.n
|
|
332
|
+
if self.parallel_tool_calls is not None:
|
|
333
|
+
chat['parallel_tool_calls'] = self.parallel_tool_calls
|
|
334
|
+
if self.presence_penalty is not None:
|
|
335
|
+
chat['presence_penalty'] = self.presence_penalty
|
|
336
|
+
if self.prompt_cache_key is not None:
|
|
337
|
+
chat['prompt_cache_key'] = self.prompt_cache_key
|
|
338
|
+
if self.reasoning_effort is not None:
|
|
339
|
+
chat['reasoning_effort'] = self.reasoning_effort
|
|
340
|
+
if self.safety_identifier is not None:
|
|
341
|
+
chat['safety_identifier'] = self.safety_identifier
|
|
342
|
+
if self.seed is not None:
|
|
343
|
+
chat['seed'] = self.seed
|
|
344
|
+
if self.service_tier is not None:
|
|
345
|
+
chat['service_tier'] = self.service_tier
|
|
346
|
+
if self.stop is not None:
|
|
347
|
+
chat['stop'] = self.stop
|
|
348
|
+
if self.store is not None:
|
|
349
|
+
chat['store'] = self.store
|
|
350
|
+
if self.temperature is not None:
|
|
351
|
+
chat['temperature'] = self.temperature
|
|
352
|
+
if self.top_logprobs is not None:
|
|
353
|
+
chat['top_logprobs'] = self.top_logprobs
|
|
354
|
+
if self.top_p is not None:
|
|
355
|
+
chat['top_p'] = self.top_p
|
|
356
|
+
if self.verbosity is not None:
|
|
357
|
+
chat['verbosity'] = self.verbosity
|
|
358
|
+
if self.enable_thinking is not None:
|
|
359
|
+
chat['enable_thinking'] = self.enable_thinking
|
|
360
|
+
|
|
244
361
|
chat = await process_chat(chat)
|
|
245
362
|
_log(f"POST {self.chat_url}")
|
|
246
363
|
_log(chat_summary(chat))
|
|
@@ -531,10 +648,14 @@ async def chat_completion(chat):
|
|
|
531
648
|
# If we get here, all providers failed
|
|
532
649
|
raise first_exception
|
|
533
650
|
|
|
534
|
-
async def cli_chat(chat, image=None, audio=None, file=None, raw=False):
|
|
651
|
+
async def cli_chat(chat, image=None, audio=None, file=None, args=None, raw=False):
|
|
535
652
|
if g_default_model:
|
|
536
653
|
chat['model'] = g_default_model
|
|
537
654
|
|
|
655
|
+
# Apply args parameters to chat request
|
|
656
|
+
if args:
|
|
657
|
+
chat = apply_args_to_chat(chat, args)
|
|
658
|
+
|
|
538
659
|
# process_chat downloads the image, just adding the reference here
|
|
539
660
|
if image is not None:
|
|
540
661
|
first_message = None
|
|
@@ -919,6 +1040,7 @@ def main():
|
|
|
919
1040
|
parser.add_argument('--image', default=None, help='Image input to use in chat completion')
|
|
920
1041
|
parser.add_argument('--audio', default=None, help='Audio input to use in chat completion')
|
|
921
1042
|
parser.add_argument('--file', default=None, help='File input to use in chat completion')
|
|
1043
|
+
parser.add_argument('--args', default=None, help='URL-encoded parameters to add to chat request (e.g. "temperature=0.7&seed=111")', metavar='PARAMS')
|
|
922
1044
|
parser.add_argument('--raw', action='store_true', help='Return raw AI JSON response')
|
|
923
1045
|
|
|
924
1046
|
parser.add_argument('--list', action='store_true', help='Show list of enabled providers and their models (alias ls provider?)')
|
|
@@ -1250,13 +1372,21 @@ def main():
|
|
|
1250
1372
|
if len(extra_args) > 0:
|
|
1251
1373
|
prompt = ' '.join(extra_args)
|
|
1252
1374
|
# replace content of last message if exists, else add
|
|
1253
|
-
last_msg = chat['messages'][-1]
|
|
1254
|
-
if last_msg['role'] == 'user':
|
|
1255
|
-
last_msg['content']
|
|
1375
|
+
last_msg = chat['messages'][-1] if 'messages' in chat else None
|
|
1376
|
+
if last_msg and last_msg['role'] == 'user':
|
|
1377
|
+
if isinstance(last_msg['content'], list):
|
|
1378
|
+
last_msg['content'][-1]['text'] = prompt
|
|
1379
|
+
else:
|
|
1380
|
+
last_msg['content'] = prompt
|
|
1256
1381
|
else:
|
|
1257
1382
|
chat['messages'].append({'role': 'user', 'content': prompt})
|
|
1258
1383
|
|
|
1259
|
-
|
|
1384
|
+
# Parse args parameters if provided
|
|
1385
|
+
args = None
|
|
1386
|
+
if cli_args.args is not None:
|
|
1387
|
+
args = parse_args_params(cli_args.args)
|
|
1388
|
+
|
|
1389
|
+
asyncio.run(cli_chat(chat, image=cli_args.image, audio=cli_args.audio, file=cli_args.file, args=args, raw=cli_args.raw))
|
|
1260
1390
|
exit(0)
|
|
1261
1391
|
except Exception as e:
|
|
1262
1392
|
print(f"{cli_args.logprefix}Error: {e}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llms-py
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.9
|
|
4
4
|
Summary: A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers
|
|
5
5
|
Home-page: https://github.com/ServiceStack/llms
|
|
6
6
|
Author: ServiceStack
|
|
@@ -51,7 +51,7 @@ Configure additional providers and models in [llms.json](llms.json)
|
|
|
51
51
|
## Features
|
|
52
52
|
|
|
53
53
|
- **Lightweight**: Single [llms.py](llms.py) Python file with single `aiohttp` dependency
|
|
54
|
-
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Mistral
|
|
54
|
+
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Z.ai, Mistral
|
|
55
55
|
- **OpenAI-Compatible API**: Works with any client that supports OpenAI's chat completion API
|
|
56
56
|
- **Configuration Management**: Easy provider enable/disable and configuration management
|
|
57
57
|
- **CLI Interface**: Simple command-line interface for quick interactions
|
|
@@ -510,7 +510,52 @@ llms --default grok-4
|
|
|
510
510
|
|
|
511
511
|
# Update llms.py to latest version
|
|
512
512
|
llms --update
|
|
513
|
-
|
|
513
|
+
|
|
514
|
+
# Pass custom parameters to chat request (URL-encoded)
|
|
515
|
+
llms --args "temperature=0.7&seed=111" "What is 2+2?"
|
|
516
|
+
|
|
517
|
+
# Multiple parameters with different types
|
|
518
|
+
llms --args "temperature=0.5&max_completion_tokens=50" "Tell me a joke"
|
|
519
|
+
|
|
520
|
+
# URL-encoded special characters (stop sequences)
|
|
521
|
+
llms --args "stop=Two,Words" "Count to 5"
|
|
522
|
+
|
|
523
|
+
# Combine with other options
|
|
524
|
+
llms --system "You are helpful" --args "temperature=0.3" --raw "Hello"
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
#### Custom Parameters with `--args`
|
|
528
|
+
|
|
529
|
+
The `--args` option allows you to pass URL-encoded parameters to customize the chat request sent to LLM providers:
|
|
530
|
+
|
|
531
|
+
**Parameter Types:**
|
|
532
|
+
- **Floats**: `temperature=0.7`, `frequency_penalty=0.2`
|
|
533
|
+
- **Integers**: `max_completion_tokens=100`
|
|
534
|
+
- **Booleans**: `store=true`, `verbose=false`, `logprobs=true`
|
|
535
|
+
- **Strings**: `stop=one`
|
|
536
|
+
- **Lists**: `stop=two,words`
|
|
537
|
+
|
|
538
|
+
**Common Parameters:**
|
|
539
|
+
- `temperature`: Controls randomness (0.0 to 2.0)
|
|
540
|
+
- `max_completion_tokens`: Maximum tokens in response
|
|
541
|
+
- `seed`: For reproducible outputs
|
|
542
|
+
- `top_p`: Nucleus sampling parameter
|
|
543
|
+
- `stop`: Stop sequences (URL-encode special chars)
|
|
544
|
+
- `store`: Whether or not to store the output
|
|
545
|
+
- `frequency_penalty`: Penalize new tokens based on frequency
|
|
546
|
+
- `presence_penalty`: Penalize new tokens based on presence
|
|
547
|
+
- `logprobs`: Include log probabilities in response
|
|
548
|
+
- `parallel_tool_calls`: Enable parallel tool calls
|
|
549
|
+
- `prompt_cache_key`: Cache key for prompt
|
|
550
|
+
- `reasoning_effort`: Reasoning effort (low, medium, high, *minimal, *none, *default)
|
|
551
|
+
- `safety_identifier`: A string that uniquely identifies each user
|
|
552
|
+
- `seed`: For reproducible outputs
|
|
553
|
+
- `service_tier`: Service tier (free, standard, premium, *default)
|
|
554
|
+
- `top_logprobs`: Number of top logprobs to return
|
|
555
|
+
- `top_p`: Nucleus sampling parameter
|
|
556
|
+
- `verbosity`: Verbosity level (0, 1, 2, 3, *default)
|
|
557
|
+
- `enable_thinking`: Enable thinking mode (Qwen)
|
|
558
|
+
- `stream`: Enable streaming responses
|
|
514
559
|
|
|
515
560
|
### Default Model Configuration
|
|
516
561
|
|
|
@@ -558,6 +603,42 @@ llms "Explain quantum computing" | glow
|
|
|
558
603
|
|
|
559
604
|
## Supported Providers
|
|
560
605
|
|
|
606
|
+
Any OpenAI-compatible providers and their models can be added by configuring them in [llms.json](./llms.json). By default only AI Providers with free tiers are enabled which will only be "available" if their API Key is set.
|
|
607
|
+
|
|
608
|
+
You can list the available providers, their models and which are enabled or disabled with:
|
|
609
|
+
|
|
610
|
+
```bash
|
|
611
|
+
llms ls
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
They can be enabled/disabled in your `llms.json` file or with:
|
|
615
|
+
|
|
616
|
+
```bash
|
|
617
|
+
llms --enable <provider>
|
|
618
|
+
llms --disable <provider>
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
For a provider to be available, they also require their API Key configured in either your Environment Variables
|
|
622
|
+
or directly in your `llms.json`.
|
|
623
|
+
|
|
624
|
+
### Environment Variables
|
|
625
|
+
|
|
626
|
+
| Provider | Variable | Description | Example |
|
|
627
|
+
|-----------------|---------------------------|---------------------|---------|
|
|
628
|
+
| openrouter_free | `OPENROUTER_FREE_API_KEY` | OpenRouter FREE models API key | `sk-or-...` |
|
|
629
|
+
| groq | `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
630
|
+
| google_free | `GOOGLE_FREE_API_KEY` | Google FREE API key | `AIza...` |
|
|
631
|
+
| codestral | `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
632
|
+
| ollama | N/A | No API key required | |
|
|
633
|
+
| openrouter | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
634
|
+
| google | `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
635
|
+
| anthropic | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
636
|
+
| openai | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
637
|
+
| grok | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
638
|
+
| qwen | `DASHSCOPE_API_KEY` | Qwen (Alibaba) API key | `sk-...` |
|
|
639
|
+
| z.ai | `ZAI_API_KEY` | Z.ai API key | `sk-...` |
|
|
640
|
+
| mistral | `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
641
|
+
|
|
561
642
|
### OpenAI
|
|
562
643
|
- **Type**: `OpenAiProvider`
|
|
563
644
|
- **Models**: GPT-5, GPT-5 Codex, GPT-4o, GPT-4o-mini, o3, etc.
|
|
@@ -588,6 +669,26 @@ export GOOGLE_API_KEY="your-key"
|
|
|
588
669
|
llms --enable google_free
|
|
589
670
|
```
|
|
590
671
|
|
|
672
|
+
### OpenRouter
|
|
673
|
+
- **Type**: `OpenAiProvider`
|
|
674
|
+
- **Models**: 100+ models from various providers
|
|
675
|
+
- **Features**: Access to latest models, free tier available
|
|
676
|
+
|
|
677
|
+
```bash
|
|
678
|
+
export OPENROUTER_API_KEY="your-key"
|
|
679
|
+
llms --enable openrouter
|
|
680
|
+
```
|
|
681
|
+
|
|
682
|
+
### Grok (X.AI)
|
|
683
|
+
- **Type**: `OpenAiProvider`
|
|
684
|
+
- **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
|
|
685
|
+
- **Features**: Real-time information, humor, uncensored responses
|
|
686
|
+
|
|
687
|
+
```bash
|
|
688
|
+
export GROK_API_KEY="your-key"
|
|
689
|
+
llms --enable grok
|
|
690
|
+
```
|
|
691
|
+
|
|
591
692
|
### Groq
|
|
592
693
|
- **Type**: `OpenAiProvider`
|
|
593
694
|
- **Models**: Llama 3.3, Gemma 2, Kimi K2, etc.
|
|
@@ -608,44 +709,44 @@ llms --enable groq
|
|
|
608
709
|
llms --enable ollama
|
|
609
710
|
```
|
|
610
711
|
|
|
611
|
-
###
|
|
712
|
+
### Qwen (Alibaba Cloud)
|
|
612
713
|
- **Type**: `OpenAiProvider`
|
|
613
|
-
- **Models**:
|
|
614
|
-
- **Features**:
|
|
714
|
+
- **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
|
|
715
|
+
- **Features**: Multilingual, vision models, coding, reasoning, audio processing
|
|
615
716
|
|
|
616
717
|
```bash
|
|
617
|
-
export
|
|
618
|
-
llms --enable
|
|
718
|
+
export DASHSCOPE_API_KEY="your-key"
|
|
719
|
+
llms --enable qwen
|
|
619
720
|
```
|
|
620
721
|
|
|
621
|
-
###
|
|
722
|
+
### Z.ai
|
|
622
723
|
- **Type**: `OpenAiProvider`
|
|
623
|
-
- **Models**:
|
|
624
|
-
- **Features**:
|
|
724
|
+
- **Models**: GLM-4.6, GLM-4.5, GLM-4.5-air, GLM-4.5-x, GLM-4.5-airx, GLM-4.5-flash, GLM-4:32b
|
|
725
|
+
- **Features**: Advanced language models with strong reasoning capabilities
|
|
625
726
|
|
|
626
727
|
```bash
|
|
627
|
-
export
|
|
628
|
-
llms --enable
|
|
728
|
+
export ZAI_API_KEY="your-key"
|
|
729
|
+
llms --enable z.ai
|
|
629
730
|
```
|
|
630
731
|
|
|
631
|
-
###
|
|
732
|
+
### Mistral
|
|
632
733
|
- **Type**: `OpenAiProvider`
|
|
633
|
-
- **Models**:
|
|
634
|
-
- **Features**:
|
|
734
|
+
- **Models**: Mistral Large, Codestral, Pixtral, etc.
|
|
735
|
+
- **Features**: Code generation, multilingual
|
|
635
736
|
|
|
636
737
|
```bash
|
|
637
|
-
export
|
|
638
|
-
llms --enable
|
|
738
|
+
export MISTRAL_API_KEY="your-key"
|
|
739
|
+
llms --enable mistral
|
|
639
740
|
```
|
|
640
741
|
|
|
641
|
-
###
|
|
742
|
+
### Codestral
|
|
642
743
|
- **Type**: `OpenAiProvider`
|
|
643
|
-
- **Models**:
|
|
644
|
-
- **Features**:
|
|
744
|
+
- **Models**: Codestral
|
|
745
|
+
- **Features**: Code generation
|
|
645
746
|
|
|
646
747
|
```bash
|
|
647
|
-
export
|
|
648
|
-
llms --enable
|
|
748
|
+
export CODESTRAL_API_KEY="your-key"
|
|
749
|
+
llms --enable codestral
|
|
649
750
|
```
|
|
650
751
|
|
|
651
752
|
## Model Routing
|
|
@@ -654,22 +755,6 @@ The tool automatically routes requests to the first available provider that supp
|
|
|
654
755
|
|
|
655
756
|
Example: If both OpenAI and OpenRouter support `kimi-k2`, the request will first try OpenRouter (free), then fall back to Groq than OpenRouter (Paid) if requests fails.
|
|
656
757
|
|
|
657
|
-
## Environment Variables
|
|
658
|
-
|
|
659
|
-
| Variable | Description | Example |
|
|
660
|
-
|----------|-------------|---------|
|
|
661
|
-
| `LLMS_CONFIG_PATH` | Custom config file path | `/path/to/llms.json` |
|
|
662
|
-
| `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
663
|
-
| `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
664
|
-
| `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
665
|
-
| `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
666
|
-
| `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
667
|
-
| `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
668
|
-
| `OPENROUTER_FREE_API_KEY` | OpenRouter free tier key | `sk-or-...` |
|
|
669
|
-
| `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
670
|
-
| `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
671
|
-
| `DASHSCOPE_API_KEY` | Qwen (Alibaba Cloud) API key | `sk-...` |
|
|
672
|
-
|
|
673
758
|
## Configuration Examples
|
|
674
759
|
|
|
675
760
|
### Minimal Configuration
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "llms-py"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.9"
|
|
8
8
|
description = "A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "BSD-3-Clause"
|
|
@@ -16,7 +16,7 @@ with open(os.path.join(this_directory, "requirements.txt"), encoding="utf-8") as
|
|
|
16
16
|
|
|
17
17
|
setup(
|
|
18
18
|
name="llms-py",
|
|
19
|
-
version="2.0.
|
|
19
|
+
version="2.0.9",
|
|
20
20
|
author="ServiceStack",
|
|
21
21
|
author_email="team@servicestack.net",
|
|
22
22
|
description="A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers",
|
|
@@ -20,7 +20,7 @@ const ProviderStatus = {
|
|
|
20
20
|
<span class="text-red-700">{{(config.status.disabled||[]).length}}</span>
|
|
21
21
|
</div>
|
|
22
22
|
</button>
|
|
23
|
-
<div v-if="showPopover" ref="popoverRef" class="absolute right-0 mt-2 w-72 max-h-
|
|
23
|
+
<div v-if="showPopover" ref="popoverRef" class="absolute right-0 mt-2 w-72 max-h-116 overflow-y-auto bg-white border border-gray-200 rounded-md shadow-lg z-10">
|
|
24
24
|
<div class="divide-y divide-gray-100">
|
|
25
25
|
<div v-for="p in allProviders" :key="p" class="flex items-center justify-between px-3 py-2">
|
|
26
26
|
<label :for="'chk_' + p" class="cursor-pointer text-sm text-gray-900 truncate mr-2" :title="p">{{ p }}</label>
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { ref,
|
|
2
|
-
import { useRouter } from 'vue-router'
|
|
1
|
+
import { ref, onMounted, watch, inject } from 'vue'
|
|
2
|
+
import { useRouter, useRoute } from 'vue-router'
|
|
3
3
|
import { useThreadStore } from './threadStore.mjs'
|
|
4
4
|
import { renderMarkdown } from './markdown.mjs'
|
|
5
5
|
|
|
@@ -169,7 +169,36 @@ export default {
|
|
|
169
169
|
</div>
|
|
170
170
|
`,
|
|
171
171
|
setup() {
|
|
172
|
+
const router = useRouter()
|
|
173
|
+
const route = useRoute()
|
|
172
174
|
const q = ref('')
|
|
175
|
+
|
|
176
|
+
// Initialize search query from URL parameter
|
|
177
|
+
onMounted(() => {
|
|
178
|
+
const urlQuery = route.query.q || ''
|
|
179
|
+
q.value = urlQuery
|
|
180
|
+
})
|
|
181
|
+
|
|
182
|
+
// Watch for changes in the search input and update URL
|
|
183
|
+
watch(q, (newQuery) => {
|
|
184
|
+
const currentQuery = route.query.q || ''
|
|
185
|
+
if (newQuery !== currentQuery) {
|
|
186
|
+
// Update URL without triggering navigation
|
|
187
|
+
router.replace({
|
|
188
|
+
path: route.path,
|
|
189
|
+
query: newQuery ? { q: newQuery } : {}
|
|
190
|
+
})
|
|
191
|
+
}
|
|
192
|
+
})
|
|
193
|
+
|
|
194
|
+
// Watch for URL changes (browser back/forward) and update search input
|
|
195
|
+
watch(() => route.query.q, (newQuery) => {
|
|
196
|
+
const urlQuery = newQuery || ''
|
|
197
|
+
if (q.value !== urlQuery) {
|
|
198
|
+
q.value = urlQuery
|
|
199
|
+
}
|
|
200
|
+
})
|
|
201
|
+
|
|
173
202
|
return {
|
|
174
203
|
q,
|
|
175
204
|
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|