llms-py 2.0.8__tar.gz → 2.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llms_py-2.0.8/llms_py.egg-info → llms_py-2.0.9}/PKG-INFO +124 -39
- {llms_py-2.0.8 → llms_py-2.0.9}/README.md +123 -38
- {llms_py-2.0.8 → llms_py-2.0.9}/llms.json +15 -6
- {llms_py-2.0.8 → llms_py-2.0.9}/llms.py +133 -9
- {llms_py-2.0.8 → llms_py-2.0.9/llms_py.egg-info}/PKG-INFO +124 -39
- {llms_py-2.0.8 → llms_py-2.0.9}/pyproject.toml +1 -1
- {llms_py-2.0.8 → llms_py-2.0.9}/setup.py +1 -1
- {llms_py-2.0.8 → llms_py-2.0.9}/LICENSE +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/MANIFEST.in +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/index.html +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/llms_py.egg-info/SOURCES.txt +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/llms_py.egg-info/dependency_links.txt +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/llms_py.egg-info/entry_points.txt +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/llms_py.egg-info/not-zip-safe +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/llms_py.egg-info/requires.txt +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/llms_py.egg-info/top_level.txt +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/requirements.txt +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/setup.cfg +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/App.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/ChatPrompt.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/Main.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/Recents.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/Sidebar.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/app.css +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/fav.svg +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/lib/highlight.min.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/lib/idb.min.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/lib/marked.min.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/lib/servicestack-client.min.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/lib/servicestack-vue.min.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/lib/vue-router.min.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/lib/vue.min.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/lib/vue.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/markdown.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/tailwind.input.css +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/threadStore.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/typography.css +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui/utils.mjs +0 -0
- {llms_py-2.0.8 → llms_py-2.0.9}/ui.json +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llms-py
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.9
|
|
4
4
|
Summary: A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers
|
|
5
5
|
Home-page: https://github.com/ServiceStack/llms
|
|
6
6
|
Author: ServiceStack
|
|
@@ -51,7 +51,7 @@ Configure additional providers and models in [llms.json](llms.json)
|
|
|
51
51
|
## Features
|
|
52
52
|
|
|
53
53
|
- **Lightweight**: Single [llms.py](llms.py) Python file with single `aiohttp` dependency
|
|
54
|
-
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Mistral
|
|
54
|
+
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Z.ai, Mistral
|
|
55
55
|
- **OpenAI-Compatible API**: Works with any client that supports OpenAI's chat completion API
|
|
56
56
|
- **Configuration Management**: Easy provider enable/disable and configuration management
|
|
57
57
|
- **CLI Interface**: Simple command-line interface for quick interactions
|
|
@@ -510,7 +510,52 @@ llms --default grok-4
|
|
|
510
510
|
|
|
511
511
|
# Update llms.py to latest version
|
|
512
512
|
llms --update
|
|
513
|
-
|
|
513
|
+
|
|
514
|
+
# Pass custom parameters to chat request (URL-encoded)
|
|
515
|
+
llms --args "temperature=0.7&seed=111" "What is 2+2?"
|
|
516
|
+
|
|
517
|
+
# Multiple parameters with different types
|
|
518
|
+
llms --args "temperature=0.5&max_completion_tokens=50" "Tell me a joke"
|
|
519
|
+
|
|
520
|
+
# URL-encoded special characters (stop sequences)
|
|
521
|
+
llms --args "stop=Two,Words" "Count to 5"
|
|
522
|
+
|
|
523
|
+
# Combine with other options
|
|
524
|
+
llms --system "You are helpful" --args "temperature=0.3" --raw "Hello"
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
#### Custom Parameters with `--args`
|
|
528
|
+
|
|
529
|
+
The `--args` option allows you to pass URL-encoded parameters to customize the chat request sent to LLM providers:
|
|
530
|
+
|
|
531
|
+
**Parameter Types:**
|
|
532
|
+
- **Floats**: `temperature=0.7`, `frequency_penalty=0.2`
|
|
533
|
+
- **Integers**: `max_completion_tokens=100`
|
|
534
|
+
- **Booleans**: `store=true`, `verbose=false`, `logprobs=true`
|
|
535
|
+
- **Strings**: `stop=one`
|
|
536
|
+
- **Lists**: `stop=two,words`
|
|
537
|
+
|
|
538
|
+
**Common Parameters:**
|
|
539
|
+
- `temperature`: Controls randomness (0.0 to 2.0)
|
|
540
|
+
- `max_completion_tokens`: Maximum tokens in response
|
|
541
|
+
- `seed`: For reproducible outputs
|
|
542
|
+
- `top_p`: Nucleus sampling parameter
|
|
543
|
+
- `stop`: Stop sequences (URL-encode special chars)
|
|
544
|
+
- `store`: Whether or not to store the output
|
|
545
|
+
- `frequency_penalty`: Penalize new tokens based on frequency
|
|
546
|
+
- `presence_penalty`: Penalize new tokens based on presence
|
|
547
|
+
- `logprobs`: Include log probabilities in response
|
|
548
|
+
- `parallel_tool_calls`: Enable parallel tool calls
|
|
549
|
+
- `prompt_cache_key`: Cache key for prompt
|
|
550
|
+
- `reasoning_effort`: Reasoning effort (low, medium, high, *minimal, *none, *default)
|
|
551
|
+
- `safety_identifier`: A string that uniquely identifies each user
|
|
552
|
+
- `seed`: For reproducible outputs
|
|
553
|
+
- `service_tier`: Service tier (free, standard, premium, *default)
|
|
554
|
+
- `top_logprobs`: Number of top logprobs to return
|
|
555
|
+
- `top_p`: Nucleus sampling parameter
|
|
556
|
+
- `verbosity`: Verbosity level (0, 1, 2, 3, *default)
|
|
557
|
+
- `enable_thinking`: Enable thinking mode (Qwen)
|
|
558
|
+
- `stream`: Enable streaming responses
|
|
514
559
|
|
|
515
560
|
### Default Model Configuration
|
|
516
561
|
|
|
@@ -558,6 +603,42 @@ llms "Explain quantum computing" | glow
|
|
|
558
603
|
|
|
559
604
|
## Supported Providers
|
|
560
605
|
|
|
606
|
+
Any OpenAI-compatible providers and their models can be added by configuring them in [llms.json](./llms.json). By default only AI Providers with free tiers are enabled which will only be "available" if their API Key is set.
|
|
607
|
+
|
|
608
|
+
You can list the available providers, their models and which are enabled or disabled with:
|
|
609
|
+
|
|
610
|
+
```bash
|
|
611
|
+
llms ls
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
They can be enabled/disabled in your `llms.json` file or with:
|
|
615
|
+
|
|
616
|
+
```bash
|
|
617
|
+
llms --enable <provider>
|
|
618
|
+
llms --disable <provider>
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
For a provider to be available, they also require their API Key configured in either your Environment Variables
|
|
622
|
+
or directly in your `llms.json`.
|
|
623
|
+
|
|
624
|
+
### Environment Variables
|
|
625
|
+
|
|
626
|
+
| Provider | Variable | Description | Example |
|
|
627
|
+
|-----------------|---------------------------|---------------------|---------|
|
|
628
|
+
| openrouter_free | `OPENROUTER_FREE_API_KEY` | OpenRouter FREE models API key | `sk-or-...` |
|
|
629
|
+
| groq | `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
630
|
+
| google_free | `GOOGLE_FREE_API_KEY` | Google FREE API key | `AIza...` |
|
|
631
|
+
| codestral | `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
632
|
+
| ollama | N/A | No API key required | |
|
|
633
|
+
| openrouter | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
634
|
+
| google | `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
635
|
+
| anthropic | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
636
|
+
| openai | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
637
|
+
| grok | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
638
|
+
| qwen | `DASHSCOPE_API_KEY` | Qwen (Alibaba) API key | `sk-...` |
|
|
639
|
+
| z.ai | `ZAI_API_KEY` | Z.ai API key | `sk-...` |
|
|
640
|
+
| mistral | `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
641
|
+
|
|
561
642
|
### OpenAI
|
|
562
643
|
- **Type**: `OpenAiProvider`
|
|
563
644
|
- **Models**: GPT-5, GPT-5 Codex, GPT-4o, GPT-4o-mini, o3, etc.
|
|
@@ -588,6 +669,26 @@ export GOOGLE_API_KEY="your-key"
|
|
|
588
669
|
llms --enable google_free
|
|
589
670
|
```
|
|
590
671
|
|
|
672
|
+
### OpenRouter
|
|
673
|
+
- **Type**: `OpenAiProvider`
|
|
674
|
+
- **Models**: 100+ models from various providers
|
|
675
|
+
- **Features**: Access to latest models, free tier available
|
|
676
|
+
|
|
677
|
+
```bash
|
|
678
|
+
export OPENROUTER_API_KEY="your-key"
|
|
679
|
+
llms --enable openrouter
|
|
680
|
+
```
|
|
681
|
+
|
|
682
|
+
### Grok (X.AI)
|
|
683
|
+
- **Type**: `OpenAiProvider`
|
|
684
|
+
- **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
|
|
685
|
+
- **Features**: Real-time information, humor, uncensored responses
|
|
686
|
+
|
|
687
|
+
```bash
|
|
688
|
+
export GROK_API_KEY="your-key"
|
|
689
|
+
llms --enable grok
|
|
690
|
+
```
|
|
691
|
+
|
|
591
692
|
### Groq
|
|
592
693
|
- **Type**: `OpenAiProvider`
|
|
593
694
|
- **Models**: Llama 3.3, Gemma 2, Kimi K2, etc.
|
|
@@ -608,44 +709,44 @@ llms --enable groq
|
|
|
608
709
|
llms --enable ollama
|
|
609
710
|
```
|
|
610
711
|
|
|
611
|
-
###
|
|
712
|
+
### Qwen (Alibaba Cloud)
|
|
612
713
|
- **Type**: `OpenAiProvider`
|
|
613
|
-
- **Models**:
|
|
614
|
-
- **Features**:
|
|
714
|
+
- **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
|
|
715
|
+
- **Features**: Multilingual, vision models, coding, reasoning, audio processing
|
|
615
716
|
|
|
616
717
|
```bash
|
|
617
|
-
export
|
|
618
|
-
llms --enable
|
|
718
|
+
export DASHSCOPE_API_KEY="your-key"
|
|
719
|
+
llms --enable qwen
|
|
619
720
|
```
|
|
620
721
|
|
|
621
|
-
###
|
|
722
|
+
### Z.ai
|
|
622
723
|
- **Type**: `OpenAiProvider`
|
|
623
|
-
- **Models**:
|
|
624
|
-
- **Features**:
|
|
724
|
+
- **Models**: GLM-4.6, GLM-4.5, GLM-4.5-air, GLM-4.5-x, GLM-4.5-airx, GLM-4.5-flash, GLM-4:32b
|
|
725
|
+
- **Features**: Advanced language models with strong reasoning capabilities
|
|
625
726
|
|
|
626
727
|
```bash
|
|
627
|
-
export
|
|
628
|
-
llms --enable
|
|
728
|
+
export ZAI_API_KEY="your-key"
|
|
729
|
+
llms --enable z.ai
|
|
629
730
|
```
|
|
630
731
|
|
|
631
|
-
###
|
|
732
|
+
### Mistral
|
|
632
733
|
- **Type**: `OpenAiProvider`
|
|
633
|
-
- **Models**:
|
|
634
|
-
- **Features**:
|
|
734
|
+
- **Models**: Mistral Large, Codestral, Pixtral, etc.
|
|
735
|
+
- **Features**: Code generation, multilingual
|
|
635
736
|
|
|
636
737
|
```bash
|
|
637
|
-
export
|
|
638
|
-
llms --enable
|
|
738
|
+
export MISTRAL_API_KEY="your-key"
|
|
739
|
+
llms --enable mistral
|
|
639
740
|
```
|
|
640
741
|
|
|
641
|
-
###
|
|
742
|
+
### Codestral
|
|
642
743
|
- **Type**: `OpenAiProvider`
|
|
643
|
-
- **Models**:
|
|
644
|
-
- **Features**:
|
|
744
|
+
- **Models**: Codestral
|
|
745
|
+
- **Features**: Code generation
|
|
645
746
|
|
|
646
747
|
```bash
|
|
647
|
-
export
|
|
648
|
-
llms --enable
|
|
748
|
+
export CODESTRAL_API_KEY="your-key"
|
|
749
|
+
llms --enable codestral
|
|
649
750
|
```
|
|
650
751
|
|
|
651
752
|
## Model Routing
|
|
@@ -654,22 +755,6 @@ The tool automatically routes requests to the first available provider that supp
|
|
|
654
755
|
|
|
655
756
|
Example: If both OpenAI and OpenRouter support `kimi-k2`, the request will first try OpenRouter (free), then fall back to Groq than OpenRouter (Paid) if requests fails.
|
|
656
757
|
|
|
657
|
-
## Environment Variables
|
|
658
|
-
|
|
659
|
-
| Variable | Description | Example |
|
|
660
|
-
|----------|-------------|---------|
|
|
661
|
-
| `LLMS_CONFIG_PATH` | Custom config file path | `/path/to/llms.json` |
|
|
662
|
-
| `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
663
|
-
| `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
664
|
-
| `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
665
|
-
| `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
666
|
-
| `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
667
|
-
| `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
668
|
-
| `OPENROUTER_FREE_API_KEY` | OpenRouter free tier key | `sk-or-...` |
|
|
669
|
-
| `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
670
|
-
| `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
671
|
-
| `DASHSCOPE_API_KEY` | Qwen (Alibaba Cloud) API key | `sk-...` |
|
|
672
|
-
|
|
673
758
|
## Configuration Examples
|
|
674
759
|
|
|
675
760
|
### Minimal Configuration
|
|
@@ -11,7 +11,7 @@ Configure additional providers and models in [llms.json](llms.json)
|
|
|
11
11
|
## Features
|
|
12
12
|
|
|
13
13
|
- **Lightweight**: Single [llms.py](llms.py) Python file with single `aiohttp` dependency
|
|
14
|
-
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Mistral
|
|
14
|
+
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Z.ai, Mistral
|
|
15
15
|
- **OpenAI-Compatible API**: Works with any client that supports OpenAI's chat completion API
|
|
16
16
|
- **Configuration Management**: Easy provider enable/disable and configuration management
|
|
17
17
|
- **CLI Interface**: Simple command-line interface for quick interactions
|
|
@@ -470,7 +470,52 @@ llms --default grok-4
|
|
|
470
470
|
|
|
471
471
|
# Update llms.py to latest version
|
|
472
472
|
llms --update
|
|
473
|
-
|
|
473
|
+
|
|
474
|
+
# Pass custom parameters to chat request (URL-encoded)
|
|
475
|
+
llms --args "temperature=0.7&seed=111" "What is 2+2?"
|
|
476
|
+
|
|
477
|
+
# Multiple parameters with different types
|
|
478
|
+
llms --args "temperature=0.5&max_completion_tokens=50" "Tell me a joke"
|
|
479
|
+
|
|
480
|
+
# URL-encoded special characters (stop sequences)
|
|
481
|
+
llms --args "stop=Two,Words" "Count to 5"
|
|
482
|
+
|
|
483
|
+
# Combine with other options
|
|
484
|
+
llms --system "You are helpful" --args "temperature=0.3" --raw "Hello"
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
#### Custom Parameters with `--args`
|
|
488
|
+
|
|
489
|
+
The `--args` option allows you to pass URL-encoded parameters to customize the chat request sent to LLM providers:
|
|
490
|
+
|
|
491
|
+
**Parameter Types:**
|
|
492
|
+
- **Floats**: `temperature=0.7`, `frequency_penalty=0.2`
|
|
493
|
+
- **Integers**: `max_completion_tokens=100`
|
|
494
|
+
- **Booleans**: `store=true`, `verbose=false`, `logprobs=true`
|
|
495
|
+
- **Strings**: `stop=one`
|
|
496
|
+
- **Lists**: `stop=two,words`
|
|
497
|
+
|
|
498
|
+
**Common Parameters:**
|
|
499
|
+
- `temperature`: Controls randomness (0.0 to 2.0)
|
|
500
|
+
- `max_completion_tokens`: Maximum tokens in response
|
|
501
|
+
- `seed`: For reproducible outputs
|
|
502
|
+
- `top_p`: Nucleus sampling parameter
|
|
503
|
+
- `stop`: Stop sequences (URL-encode special chars)
|
|
504
|
+
- `store`: Whether or not to store the output
|
|
505
|
+
- `frequency_penalty`: Penalize new tokens based on frequency
|
|
506
|
+
- `presence_penalty`: Penalize new tokens based on presence
|
|
507
|
+
- `logprobs`: Include log probabilities in response
|
|
508
|
+
- `parallel_tool_calls`: Enable parallel tool calls
|
|
509
|
+
- `prompt_cache_key`: Cache key for prompt
|
|
510
|
+
- `reasoning_effort`: Reasoning effort (low, medium, high, *minimal, *none, *default)
|
|
511
|
+
- `safety_identifier`: A string that uniquely identifies each user
|
|
512
|
+
- `seed`: For reproducible outputs
|
|
513
|
+
- `service_tier`: Service tier (free, standard, premium, *default)
|
|
514
|
+
- `top_logprobs`: Number of top logprobs to return
|
|
515
|
+
- `top_p`: Nucleus sampling parameter
|
|
516
|
+
- `verbosity`: Verbosity level (0, 1, 2, 3, *default)
|
|
517
|
+
- `enable_thinking`: Enable thinking mode (Qwen)
|
|
518
|
+
- `stream`: Enable streaming responses
|
|
474
519
|
|
|
475
520
|
### Default Model Configuration
|
|
476
521
|
|
|
@@ -518,6 +563,42 @@ llms "Explain quantum computing" | glow
|
|
|
518
563
|
|
|
519
564
|
## Supported Providers
|
|
520
565
|
|
|
566
|
+
Any OpenAI-compatible providers and their models can be added by configuring them in [llms.json](./llms.json). By default only AI Providers with free tiers are enabled which will only be "available" if their API Key is set.
|
|
567
|
+
|
|
568
|
+
You can list the available providers, their models and which are enabled or disabled with:
|
|
569
|
+
|
|
570
|
+
```bash
|
|
571
|
+
llms ls
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
They can be enabled/disabled in your `llms.json` file or with:
|
|
575
|
+
|
|
576
|
+
```bash
|
|
577
|
+
llms --enable <provider>
|
|
578
|
+
llms --disable <provider>
|
|
579
|
+
```
|
|
580
|
+
|
|
581
|
+
For a provider to be available, they also require their API Key configured in either your Environment Variables
|
|
582
|
+
or directly in your `llms.json`.
|
|
583
|
+
|
|
584
|
+
### Environment Variables
|
|
585
|
+
|
|
586
|
+
| Provider | Variable | Description | Example |
|
|
587
|
+
|-----------------|---------------------------|---------------------|---------|
|
|
588
|
+
| openrouter_free | `OPENROUTER_FREE_API_KEY` | OpenRouter FREE models API key | `sk-or-...` |
|
|
589
|
+
| groq | `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
590
|
+
| google_free | `GOOGLE_FREE_API_KEY` | Google FREE API key | `AIza...` |
|
|
591
|
+
| codestral | `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
592
|
+
| ollama | N/A | No API key required | |
|
|
593
|
+
| openrouter | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
594
|
+
| google | `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
595
|
+
| anthropic | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
596
|
+
| openai | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
597
|
+
| grok | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
598
|
+
| qwen | `DASHSCOPE_API_KEY` | Qwen (Alibaba) API key | `sk-...` |
|
|
599
|
+
| z.ai | `ZAI_API_KEY` | Z.ai API key | `sk-...` |
|
|
600
|
+
| mistral | `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
601
|
+
|
|
521
602
|
### OpenAI
|
|
522
603
|
- **Type**: `OpenAiProvider`
|
|
523
604
|
- **Models**: GPT-5, GPT-5 Codex, GPT-4o, GPT-4o-mini, o3, etc.
|
|
@@ -548,6 +629,26 @@ export GOOGLE_API_KEY="your-key"
|
|
|
548
629
|
llms --enable google_free
|
|
549
630
|
```
|
|
550
631
|
|
|
632
|
+
### OpenRouter
|
|
633
|
+
- **Type**: `OpenAiProvider`
|
|
634
|
+
- **Models**: 100+ models from various providers
|
|
635
|
+
- **Features**: Access to latest models, free tier available
|
|
636
|
+
|
|
637
|
+
```bash
|
|
638
|
+
export OPENROUTER_API_KEY="your-key"
|
|
639
|
+
llms --enable openrouter
|
|
640
|
+
```
|
|
641
|
+
|
|
642
|
+
### Grok (X.AI)
|
|
643
|
+
- **Type**: `OpenAiProvider`
|
|
644
|
+
- **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
|
|
645
|
+
- **Features**: Real-time information, humor, uncensored responses
|
|
646
|
+
|
|
647
|
+
```bash
|
|
648
|
+
export GROK_API_KEY="your-key"
|
|
649
|
+
llms --enable grok
|
|
650
|
+
```
|
|
651
|
+
|
|
551
652
|
### Groq
|
|
552
653
|
- **Type**: `OpenAiProvider`
|
|
553
654
|
- **Models**: Llama 3.3, Gemma 2, Kimi K2, etc.
|
|
@@ -568,44 +669,44 @@ llms --enable groq
|
|
|
568
669
|
llms --enable ollama
|
|
569
670
|
```
|
|
570
671
|
|
|
571
|
-
###
|
|
672
|
+
### Qwen (Alibaba Cloud)
|
|
572
673
|
- **Type**: `OpenAiProvider`
|
|
573
|
-
- **Models**:
|
|
574
|
-
- **Features**:
|
|
674
|
+
- **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
|
|
675
|
+
- **Features**: Multilingual, vision models, coding, reasoning, audio processing
|
|
575
676
|
|
|
576
677
|
```bash
|
|
577
|
-
export
|
|
578
|
-
llms --enable
|
|
678
|
+
export DASHSCOPE_API_KEY="your-key"
|
|
679
|
+
llms --enable qwen
|
|
579
680
|
```
|
|
580
681
|
|
|
581
|
-
###
|
|
682
|
+
### Z.ai
|
|
582
683
|
- **Type**: `OpenAiProvider`
|
|
583
|
-
- **Models**:
|
|
584
|
-
- **Features**:
|
|
684
|
+
- **Models**: GLM-4.6, GLM-4.5, GLM-4.5-air, GLM-4.5-x, GLM-4.5-airx, GLM-4.5-flash, GLM-4:32b
|
|
685
|
+
- **Features**: Advanced language models with strong reasoning capabilities
|
|
585
686
|
|
|
586
687
|
```bash
|
|
587
|
-
export
|
|
588
|
-
llms --enable
|
|
688
|
+
export ZAI_API_KEY="your-key"
|
|
689
|
+
llms --enable z.ai
|
|
589
690
|
```
|
|
590
691
|
|
|
591
|
-
###
|
|
692
|
+
### Mistral
|
|
592
693
|
- **Type**: `OpenAiProvider`
|
|
593
|
-
- **Models**:
|
|
594
|
-
- **Features**:
|
|
694
|
+
- **Models**: Mistral Large, Codestral, Pixtral, etc.
|
|
695
|
+
- **Features**: Code generation, multilingual
|
|
595
696
|
|
|
596
697
|
```bash
|
|
597
|
-
export
|
|
598
|
-
llms --enable
|
|
698
|
+
export MISTRAL_API_KEY="your-key"
|
|
699
|
+
llms --enable mistral
|
|
599
700
|
```
|
|
600
701
|
|
|
601
|
-
###
|
|
702
|
+
### Codestral
|
|
602
703
|
- **Type**: `OpenAiProvider`
|
|
603
|
-
- **Models**:
|
|
604
|
-
- **Features**:
|
|
704
|
+
- **Models**: Codestral
|
|
705
|
+
- **Features**: Code generation
|
|
605
706
|
|
|
606
707
|
```bash
|
|
607
|
-
export
|
|
608
|
-
llms --enable
|
|
708
|
+
export CODESTRAL_API_KEY="your-key"
|
|
709
|
+
llms --enable codestral
|
|
609
710
|
```
|
|
610
711
|
|
|
611
712
|
## Model Routing
|
|
@@ -614,22 +715,6 @@ The tool automatically routes requests to the first available provider that supp
|
|
|
614
715
|
|
|
615
716
|
Example: If both OpenAI and OpenRouter support `kimi-k2`, the request will first try OpenRouter (free), then fall back to Groq than OpenRouter (Paid) if requests fails.
|
|
616
717
|
|
|
617
|
-
## Environment Variables
|
|
618
|
-
|
|
619
|
-
| Variable | Description | Example |
|
|
620
|
-
|----------|-------------|---------|
|
|
621
|
-
| `LLMS_CONFIG_PATH` | Custom config file path | `/path/to/llms.json` |
|
|
622
|
-
| `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
623
|
-
| `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
624
|
-
| `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
625
|
-
| `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
626
|
-
| `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
627
|
-
| `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
628
|
-
| `OPENROUTER_FREE_API_KEY` | OpenRouter free tier key | `sk-or-...` |
|
|
629
|
-
| `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
630
|
-
| `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
631
|
-
| `DASHSCOPE_API_KEY` | Qwen (Alibaba Cloud) API key | `sk-...` |
|
|
632
|
-
|
|
633
718
|
## Configuration Examples
|
|
634
719
|
|
|
635
720
|
### Minimal Configuration
|
|
@@ -9,7 +9,12 @@
|
|
|
9
9
|
"messages": [
|
|
10
10
|
{
|
|
11
11
|
"role": "user",
|
|
12
|
-
"content":
|
|
12
|
+
"content": [
|
|
13
|
+
{
|
|
14
|
+
"type": "text",
|
|
15
|
+
"text": ""
|
|
16
|
+
}
|
|
17
|
+
]
|
|
13
18
|
}
|
|
14
19
|
]
|
|
15
20
|
},
|
|
@@ -389,7 +394,8 @@
|
|
|
389
394
|
"qwen2.5-vl:7b": "qwen2.5-vl-7b-instruct",
|
|
390
395
|
"qwen2.5-vl:3b": "qwen2.5-vl-3b-instruct",
|
|
391
396
|
"qwen2.5-omni:7b": "qwen2.5-omni-7b"
|
|
392
|
-
}
|
|
397
|
+
},
|
|
398
|
+
"enable_thinking": false
|
|
393
399
|
},
|
|
394
400
|
"z.ai": {
|
|
395
401
|
"enabled": false,
|
|
@@ -404,7 +410,8 @@
|
|
|
404
410
|
"glm-4.5-airx": "glm-4.5-airx",
|
|
405
411
|
"glm-4.5-flash": "glm-4.5-flash",
|
|
406
412
|
"glm-4:32b": "glm-4-32b-0414-128k"
|
|
407
|
-
}
|
|
413
|
+
},
|
|
414
|
+
"temperature": 0.7
|
|
408
415
|
},
|
|
409
416
|
"mistral": {
|
|
410
417
|
"enabled": false,
|
|
@@ -417,20 +424,22 @@
|
|
|
417
424
|
"devstral-medium": "devstral-medium-2507",
|
|
418
425
|
"codestral:22b": "codestral-latest",
|
|
419
426
|
"mistral-ocr": "mistral-ocr-latest",
|
|
420
|
-
"voxtral-mini": "voxtral-mini-latest",
|
|
421
427
|
"mistral-small3.2:24b": "mistral-small-latest",
|
|
422
428
|
"magistral-small": "magistral-small-latest",
|
|
423
429
|
"devstral-small": "devstral-small-2507",
|
|
424
430
|
"voxtral-small": "voxtral-small-latest",
|
|
431
|
+
"voxtral-mini": "voxtral-mini-latest",
|
|
432
|
+
"codestral-embed": "codestral-embed-2505",
|
|
433
|
+
"mistral-embed": "mistral-embed",
|
|
425
434
|
"mistral-large:123b": "mistral-large-latest",
|
|
426
435
|
"pixtral-large:124b": "pixtral-large-latest",
|
|
427
436
|
"pixtral:12b": "pixtral-12b",
|
|
428
|
-
"mistral-nemo:12b": "mistral-nemo",
|
|
437
|
+
"mistral-nemo:12b": "open-mistral-nemo",
|
|
429
438
|
"mistral-saba": "mistral-saba-latest",
|
|
430
439
|
"mistral:7b": "open-mistral-7b",
|
|
431
440
|
"mixtral:8x7b": "open-mixtral-8x7b",
|
|
432
441
|
"mixtral:8x22b": "open-mixtral-8x22b",
|
|
433
|
-
"ministral:8b": "ministral-
|
|
442
|
+
"ministral:8b": "ministral-8b-latest",
|
|
434
443
|
"ministral:3b": "ministral-3b-latest"
|
|
435
444
|
}
|
|
436
445
|
}
|
|
@@ -14,6 +14,7 @@ import mimetypes
|
|
|
14
14
|
import traceback
|
|
15
15
|
import sys
|
|
16
16
|
import site
|
|
17
|
+
from urllib.parse import parse_qs
|
|
17
18
|
|
|
18
19
|
import aiohttp
|
|
19
20
|
from aiohttp import web
|
|
@@ -21,7 +22,7 @@ from aiohttp import web
|
|
|
21
22
|
from pathlib import Path
|
|
22
23
|
from importlib import resources # Py≥3.9 (pip install importlib_resources for 3.7/3.8)
|
|
23
24
|
|
|
24
|
-
VERSION = "2.0.
|
|
25
|
+
VERSION = "2.0.9"
|
|
25
26
|
_ROOT = None
|
|
26
27
|
g_config_path = None
|
|
27
28
|
g_ui_path = None
|
|
@@ -63,7 +64,8 @@ def chat_summary(chat):
|
|
|
63
64
|
elif 'file' in item:
|
|
64
65
|
if 'file_data' in item['file']:
|
|
65
66
|
data = item['file']['file_data']
|
|
66
|
-
|
|
67
|
+
prefix = url.split(',', 1)[0]
|
|
68
|
+
item['file']['file_data'] = prefix + f",({len(url) - len(prefix)})"
|
|
67
69
|
return json.dumps(clone, indent=2)
|
|
68
70
|
|
|
69
71
|
def gemini_chat_summary(gemini_chat):
|
|
@@ -89,6 +91,60 @@ def is_url(url):
|
|
|
89
91
|
def get_filename(file):
|
|
90
92
|
return file.rsplit('/',1)[1] if '/' in file else 'file'
|
|
91
93
|
|
|
94
|
+
def parse_args_params(args_str):
|
|
95
|
+
"""Parse URL-encoded parameters and return a dictionary."""
|
|
96
|
+
if not args_str:
|
|
97
|
+
return {}
|
|
98
|
+
|
|
99
|
+
# Parse the URL-encoded string
|
|
100
|
+
parsed = parse_qs(args_str, keep_blank_values=True)
|
|
101
|
+
|
|
102
|
+
# Convert to simple dict with single values (not lists)
|
|
103
|
+
result = {}
|
|
104
|
+
for key, values in parsed.items():
|
|
105
|
+
if len(values) == 1:
|
|
106
|
+
value = values[0]
|
|
107
|
+
# Try to convert to appropriate types
|
|
108
|
+
if value.lower() == 'true':
|
|
109
|
+
result[key] = True
|
|
110
|
+
elif value.lower() == 'false':
|
|
111
|
+
result[key] = False
|
|
112
|
+
elif value.isdigit():
|
|
113
|
+
result[key] = int(value)
|
|
114
|
+
else:
|
|
115
|
+
try:
|
|
116
|
+
# Try to parse as float
|
|
117
|
+
result[key] = float(value)
|
|
118
|
+
except ValueError:
|
|
119
|
+
# Keep as string
|
|
120
|
+
result[key] = value
|
|
121
|
+
else:
|
|
122
|
+
# Multiple values, keep as list
|
|
123
|
+
result[key] = values
|
|
124
|
+
|
|
125
|
+
return result
|
|
126
|
+
|
|
127
|
+
def apply_args_to_chat(chat, args_params):
|
|
128
|
+
"""Apply parsed arguments to the chat request."""
|
|
129
|
+
if not args_params:
|
|
130
|
+
return chat
|
|
131
|
+
|
|
132
|
+
# Apply each parameter to the chat request
|
|
133
|
+
for key, value in args_params.items():
|
|
134
|
+
if isinstance(value, str):
|
|
135
|
+
if key == 'stop':
|
|
136
|
+
if ',' in value:
|
|
137
|
+
value = value.split(',')
|
|
138
|
+
elif key == 'max_completion_tokens' or key == 'max_tokens' or key == 'n' or key == 'seed' or key == 'top_logprobs':
|
|
139
|
+
value = int(value)
|
|
140
|
+
elif key == 'temperature' or key == 'top_p' or key == 'frequency_penalty' or key == 'presence_penalty':
|
|
141
|
+
value = float(value)
|
|
142
|
+
elif key == 'store' or key == 'logprobs' or key == 'enable_thinking' or key == 'parallel_tool_calls' or key == 'stream':
|
|
143
|
+
value = bool(value)
|
|
144
|
+
chat[key] = value
|
|
145
|
+
|
|
146
|
+
return chat
|
|
147
|
+
|
|
92
148
|
def is_base_64(data):
|
|
93
149
|
try:
|
|
94
150
|
base64.b64decode(data)
|
|
@@ -190,8 +246,9 @@ async def process_chat(chat):
|
|
|
190
246
|
content = f.read()
|
|
191
247
|
file['filename'] = get_filename(url)
|
|
192
248
|
file['file_data'] = f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
|
|
193
|
-
elif
|
|
194
|
-
|
|
249
|
+
elif url.startswith('data:'):
|
|
250
|
+
if 'filename' not in file:
|
|
251
|
+
file['filename'] = 'file'
|
|
195
252
|
pass # use base64 data as-is
|
|
196
253
|
else:
|
|
197
254
|
raise Exception(f"Invalid file: {url}")
|
|
@@ -232,6 +289,25 @@ class OpenAiProvider:
|
|
|
232
289
|
if api_key is not None:
|
|
233
290
|
self.headers["Authorization"] = f"Bearer {api_key}"
|
|
234
291
|
|
|
292
|
+
self.frequency_penalty = float(kwargs['frequency_penalty']) if 'frequency_penalty' in kwargs else None
|
|
293
|
+
self.max_completion_tokens = int(kwargs['max_completion_tokens']) if 'max_completion_tokens' in kwargs else None
|
|
294
|
+
self.n = int(kwargs['n']) if 'n' in kwargs else None
|
|
295
|
+
self.parallel_tool_calls = bool(kwargs['parallel_tool_calls']) if 'parallel_tool_calls' in kwargs else None
|
|
296
|
+
self.presence_penalty = float(kwargs['presence_penalty']) if 'presence_penalty' in kwargs else None
|
|
297
|
+
self.prompt_cache_key = kwargs['prompt_cache_key'] if 'prompt_cache_key' in kwargs else None
|
|
298
|
+
self.reasoning_effort = kwargs['reasoning_effort'] if 'reasoning_effort' in kwargs else None
|
|
299
|
+
self.safety_identifier = kwargs['safety_identifier'] if 'safety_identifier' in kwargs else None
|
|
300
|
+
self.seed = int(kwargs['seed']) if 'seed' in kwargs else None
|
|
301
|
+
self.service_tier = kwargs['service_tier'] if 'service_tier' in kwargs else None
|
|
302
|
+
self.stop = kwargs['stop'] if 'stop' in kwargs else None
|
|
303
|
+
self.store = bool(kwargs['store']) if 'store' in kwargs else None
|
|
304
|
+
self.temperature = float(kwargs['temperature']) if 'temperature' in kwargs else None
|
|
305
|
+
self.top_logprobs = int(kwargs['top_logprobs']) if 'top_logprobs' in kwargs else None
|
|
306
|
+
self.top_p = float(kwargs['top_p']) if 'top_p' in kwargs else None
|
|
307
|
+
self.verbosity = kwargs['verbosity'] if 'verbosity' in kwargs else None
|
|
308
|
+
self.stream = bool(kwargs['stream']) if 'stream' in kwargs else None
|
|
309
|
+
self.enable_thinking = bool(kwargs['enable_thinking']) if 'enable_thinking' in kwargs else None
|
|
310
|
+
|
|
235
311
|
@classmethod
|
|
236
312
|
def test(cls, base_url=None, api_key=None, models={}, **kwargs):
|
|
237
313
|
return base_url is not None and api_key is not None and len(models) > 0
|
|
@@ -247,6 +323,41 @@ class OpenAiProvider:
|
|
|
247
323
|
# with open(os.path.join(os.path.dirname(__file__), 'chat.wip.json'), "w") as f:
|
|
248
324
|
# f.write(json.dumps(chat, indent=2))
|
|
249
325
|
|
|
326
|
+
if self.frequency_penalty is not None:
|
|
327
|
+
chat['frequency_penalty'] = self.frequency_penalty
|
|
328
|
+
if self.max_completion_tokens is not None:
|
|
329
|
+
chat['max_completion_tokens'] = self.max_completion_tokens
|
|
330
|
+
if self.n is not None:
|
|
331
|
+
chat['n'] = self.n
|
|
332
|
+
if self.parallel_tool_calls is not None:
|
|
333
|
+
chat['parallel_tool_calls'] = self.parallel_tool_calls
|
|
334
|
+
if self.presence_penalty is not None:
|
|
335
|
+
chat['presence_penalty'] = self.presence_penalty
|
|
336
|
+
if self.prompt_cache_key is not None:
|
|
337
|
+
chat['prompt_cache_key'] = self.prompt_cache_key
|
|
338
|
+
if self.reasoning_effort is not None:
|
|
339
|
+
chat['reasoning_effort'] = self.reasoning_effort
|
|
340
|
+
if self.safety_identifier is not None:
|
|
341
|
+
chat['safety_identifier'] = self.safety_identifier
|
|
342
|
+
if self.seed is not None:
|
|
343
|
+
chat['seed'] = self.seed
|
|
344
|
+
if self.service_tier is not None:
|
|
345
|
+
chat['service_tier'] = self.service_tier
|
|
346
|
+
if self.stop is not None:
|
|
347
|
+
chat['stop'] = self.stop
|
|
348
|
+
if self.store is not None:
|
|
349
|
+
chat['store'] = self.store
|
|
350
|
+
if self.temperature is not None:
|
|
351
|
+
chat['temperature'] = self.temperature
|
|
352
|
+
if self.top_logprobs is not None:
|
|
353
|
+
chat['top_logprobs'] = self.top_logprobs
|
|
354
|
+
if self.top_p is not None:
|
|
355
|
+
chat['top_p'] = self.top_p
|
|
356
|
+
if self.verbosity is not None:
|
|
357
|
+
chat['verbosity'] = self.verbosity
|
|
358
|
+
if self.enable_thinking is not None:
|
|
359
|
+
chat['enable_thinking'] = self.enable_thinking
|
|
360
|
+
|
|
250
361
|
chat = await process_chat(chat)
|
|
251
362
|
_log(f"POST {self.chat_url}")
|
|
252
363
|
_log(chat_summary(chat))
|
|
@@ -537,10 +648,14 @@ async def chat_completion(chat):
|
|
|
537
648
|
# If we get here, all providers failed
|
|
538
649
|
raise first_exception
|
|
539
650
|
|
|
540
|
-
async def cli_chat(chat, image=None, audio=None, file=None, raw=False):
|
|
651
|
+
async def cli_chat(chat, image=None, audio=None, file=None, args=None, raw=False):
|
|
541
652
|
if g_default_model:
|
|
542
653
|
chat['model'] = g_default_model
|
|
543
654
|
|
|
655
|
+
# Apply args parameters to chat request
|
|
656
|
+
if args:
|
|
657
|
+
chat = apply_args_to_chat(chat, args)
|
|
658
|
+
|
|
544
659
|
# process_chat downloads the image, just adding the reference here
|
|
545
660
|
if image is not None:
|
|
546
661
|
first_message = None
|
|
@@ -925,6 +1040,7 @@ def main():
|
|
|
925
1040
|
parser.add_argument('--image', default=None, help='Image input to use in chat completion')
|
|
926
1041
|
parser.add_argument('--audio', default=None, help='Audio input to use in chat completion')
|
|
927
1042
|
parser.add_argument('--file', default=None, help='File input to use in chat completion')
|
|
1043
|
+
parser.add_argument('--args', default=None, help='URL-encoded parameters to add to chat request (e.g. "temperature=0.7&seed=111")', metavar='PARAMS')
|
|
928
1044
|
parser.add_argument('--raw', action='store_true', help='Return raw AI JSON response')
|
|
929
1045
|
|
|
930
1046
|
parser.add_argument('--list', action='store_true', help='Show list of enabled providers and their models (alias ls provider?)')
|
|
@@ -1256,13 +1372,21 @@ def main():
|
|
|
1256
1372
|
if len(extra_args) > 0:
|
|
1257
1373
|
prompt = ' '.join(extra_args)
|
|
1258
1374
|
# replace content of last message if exists, else add
|
|
1259
|
-
last_msg = chat['messages'][-1]
|
|
1260
|
-
if last_msg['role'] == 'user':
|
|
1261
|
-
last_msg['content']
|
|
1375
|
+
last_msg = chat['messages'][-1] if 'messages' in chat else None
|
|
1376
|
+
if last_msg and last_msg['role'] == 'user':
|
|
1377
|
+
if isinstance(last_msg['content'], list):
|
|
1378
|
+
last_msg['content'][-1]['text'] = prompt
|
|
1379
|
+
else:
|
|
1380
|
+
last_msg['content'] = prompt
|
|
1262
1381
|
else:
|
|
1263
1382
|
chat['messages'].append({'role': 'user', 'content': prompt})
|
|
1264
1383
|
|
|
1265
|
-
|
|
1384
|
+
# Parse args parameters if provided
|
|
1385
|
+
args = None
|
|
1386
|
+
if cli_args.args is not None:
|
|
1387
|
+
args = parse_args_params(cli_args.args)
|
|
1388
|
+
|
|
1389
|
+
asyncio.run(cli_chat(chat, image=cli_args.image, audio=cli_args.audio, file=cli_args.file, args=args, raw=cli_args.raw))
|
|
1266
1390
|
exit(0)
|
|
1267
1391
|
except Exception as e:
|
|
1268
1392
|
print(f"{cli_args.logprefix}Error: {e}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llms-py
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.9
|
|
4
4
|
Summary: A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers
|
|
5
5
|
Home-page: https://github.com/ServiceStack/llms
|
|
6
6
|
Author: ServiceStack
|
|
@@ -51,7 +51,7 @@ Configure additional providers and models in [llms.json](llms.json)
|
|
|
51
51
|
## Features
|
|
52
52
|
|
|
53
53
|
- **Lightweight**: Single [llms.py](llms.py) Python file with single `aiohttp` dependency
|
|
54
|
-
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Mistral
|
|
54
|
+
- **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Z.ai, Mistral
|
|
55
55
|
- **OpenAI-Compatible API**: Works with any client that supports OpenAI's chat completion API
|
|
56
56
|
- **Configuration Management**: Easy provider enable/disable and configuration management
|
|
57
57
|
- **CLI Interface**: Simple command-line interface for quick interactions
|
|
@@ -510,7 +510,52 @@ llms --default grok-4
|
|
|
510
510
|
|
|
511
511
|
# Update llms.py to latest version
|
|
512
512
|
llms --update
|
|
513
|
-
|
|
513
|
+
|
|
514
|
+
# Pass custom parameters to chat request (URL-encoded)
|
|
515
|
+
llms --args "temperature=0.7&seed=111" "What is 2+2?"
|
|
516
|
+
|
|
517
|
+
# Multiple parameters with different types
|
|
518
|
+
llms --args "temperature=0.5&max_completion_tokens=50" "Tell me a joke"
|
|
519
|
+
|
|
520
|
+
# URL-encoded special characters (stop sequences)
|
|
521
|
+
llms --args "stop=Two,Words" "Count to 5"
|
|
522
|
+
|
|
523
|
+
# Combine with other options
|
|
524
|
+
llms --system "You are helpful" --args "temperature=0.3" --raw "Hello"
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
#### Custom Parameters with `--args`
|
|
528
|
+
|
|
529
|
+
The `--args` option allows you to pass URL-encoded parameters to customize the chat request sent to LLM providers:
|
|
530
|
+
|
|
531
|
+
**Parameter Types:**
|
|
532
|
+
- **Floats**: `temperature=0.7`, `frequency_penalty=0.2`
|
|
533
|
+
- **Integers**: `max_completion_tokens=100`
|
|
534
|
+
- **Booleans**: `store=true`, `verbose=false`, `logprobs=true`
|
|
535
|
+
- **Strings**: `stop=one`
|
|
536
|
+
- **Lists**: `stop=two,words`
|
|
537
|
+
|
|
538
|
+
**Common Parameters:**
|
|
539
|
+
- `temperature`: Controls randomness (0.0 to 2.0)
|
|
540
|
+
- `max_completion_tokens`: Maximum tokens in response
|
|
541
|
+
- `seed`: For reproducible outputs
|
|
542
|
+
- `top_p`: Nucleus sampling parameter
|
|
543
|
+
- `stop`: Stop sequences (URL-encode special chars)
|
|
544
|
+
- `store`: Whether or not to store the output
|
|
545
|
+
- `frequency_penalty`: Penalize new tokens based on frequency
|
|
546
|
+
- `presence_penalty`: Penalize new tokens based on presence
|
|
547
|
+
- `logprobs`: Include log probabilities in response
|
|
548
|
+
- `parallel_tool_calls`: Enable parallel tool calls
|
|
549
|
+
- `prompt_cache_key`: Cache key for prompt
|
|
550
|
+
- `reasoning_effort`: Reasoning effort (low, medium, high, *minimal, *none, *default)
|
|
551
|
+
- `safety_identifier`: A string that uniquely identifies each user
|
|
552
|
+
- `seed`: For reproducible outputs
|
|
553
|
+
- `service_tier`: Service tier (free, standard, premium, *default)
|
|
554
|
+
- `top_logprobs`: Number of top logprobs to return
|
|
555
|
+
- `top_p`: Nucleus sampling parameter
|
|
556
|
+
- `verbosity`: Verbosity level (0, 1, 2, 3, *default)
|
|
557
|
+
- `enable_thinking`: Enable thinking mode (Qwen)
|
|
558
|
+
- `stream`: Enable streaming responses
|
|
514
559
|
|
|
515
560
|
### Default Model Configuration
|
|
516
561
|
|
|
@@ -558,6 +603,42 @@ llms "Explain quantum computing" | glow
|
|
|
558
603
|
|
|
559
604
|
## Supported Providers
|
|
560
605
|
|
|
606
|
+
Any OpenAI-compatible providers and their models can be added by configuring them in [llms.json](./llms.json). By default only AI Providers with free tiers are enabled which will only be "available" if their API Key is set.
|
|
607
|
+
|
|
608
|
+
You can list the available providers, their models and which are enabled or disabled with:
|
|
609
|
+
|
|
610
|
+
```bash
|
|
611
|
+
llms ls
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
They can be enabled/disabled in your `llms.json` file or with:
|
|
615
|
+
|
|
616
|
+
```bash
|
|
617
|
+
llms --enable <provider>
|
|
618
|
+
llms --disable <provider>
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
For a provider to be available, they also require their API Key configured in either your Environment Variables
|
|
622
|
+
or directly in your `llms.json`.
|
|
623
|
+
|
|
624
|
+
### Environment Variables
|
|
625
|
+
|
|
626
|
+
| Provider | Variable | Description | Example |
|
|
627
|
+
|-----------------|---------------------------|---------------------|---------|
|
|
628
|
+
| openrouter_free | `OPENROUTER_FREE_API_KEY` | OpenRouter FREE models API key | `sk-or-...` |
|
|
629
|
+
| groq | `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
630
|
+
| google_free | `GOOGLE_FREE_API_KEY` | Google FREE API key | `AIza...` |
|
|
631
|
+
| codestral | `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
632
|
+
| ollama | N/A | No API key required | |
|
|
633
|
+
| openrouter | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
634
|
+
| google | `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
635
|
+
| anthropic | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
636
|
+
| openai | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
637
|
+
| grok | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
638
|
+
| qwen | `DASHSCOPE_API_KEY` | Qwen (Alibaba) API key | `sk-...` |
|
|
639
|
+
| z.ai | `ZAI_API_KEY` | Z.ai API key | `sk-...` |
|
|
640
|
+
| mistral | `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
641
|
+
|
|
561
642
|
### OpenAI
|
|
562
643
|
- **Type**: `OpenAiProvider`
|
|
563
644
|
- **Models**: GPT-5, GPT-5 Codex, GPT-4o, GPT-4o-mini, o3, etc.
|
|
@@ -588,6 +669,26 @@ export GOOGLE_API_KEY="your-key"
|
|
|
588
669
|
llms --enable google_free
|
|
589
670
|
```
|
|
590
671
|
|
|
672
|
+
### OpenRouter
|
|
673
|
+
- **Type**: `OpenAiProvider`
|
|
674
|
+
- **Models**: 100+ models from various providers
|
|
675
|
+
- **Features**: Access to latest models, free tier available
|
|
676
|
+
|
|
677
|
+
```bash
|
|
678
|
+
export OPENROUTER_API_KEY="your-key"
|
|
679
|
+
llms --enable openrouter
|
|
680
|
+
```
|
|
681
|
+
|
|
682
|
+
### Grok (X.AI)
|
|
683
|
+
- **Type**: `OpenAiProvider`
|
|
684
|
+
- **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
|
|
685
|
+
- **Features**: Real-time information, humor, uncensored responses
|
|
686
|
+
|
|
687
|
+
```bash
|
|
688
|
+
export GROK_API_KEY="your-key"
|
|
689
|
+
llms --enable grok
|
|
690
|
+
```
|
|
691
|
+
|
|
591
692
|
### Groq
|
|
592
693
|
- **Type**: `OpenAiProvider`
|
|
593
694
|
- **Models**: Llama 3.3, Gemma 2, Kimi K2, etc.
|
|
@@ -608,44 +709,44 @@ llms --enable groq
|
|
|
608
709
|
llms --enable ollama
|
|
609
710
|
```
|
|
610
711
|
|
|
611
|
-
###
|
|
712
|
+
### Qwen (Alibaba Cloud)
|
|
612
713
|
- **Type**: `OpenAiProvider`
|
|
613
|
-
- **Models**:
|
|
614
|
-
- **Features**:
|
|
714
|
+
- **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
|
|
715
|
+
- **Features**: Multilingual, vision models, coding, reasoning, audio processing
|
|
615
716
|
|
|
616
717
|
```bash
|
|
617
|
-
export
|
|
618
|
-
llms --enable
|
|
718
|
+
export DASHSCOPE_API_KEY="your-key"
|
|
719
|
+
llms --enable qwen
|
|
619
720
|
```
|
|
620
721
|
|
|
621
|
-
###
|
|
722
|
+
### Z.ai
|
|
622
723
|
- **Type**: `OpenAiProvider`
|
|
623
|
-
- **Models**:
|
|
624
|
-
- **Features**:
|
|
724
|
+
- **Models**: GLM-4.6, GLM-4.5, GLM-4.5-air, GLM-4.5-x, GLM-4.5-airx, GLM-4.5-flash, GLM-4:32b
|
|
725
|
+
- **Features**: Advanced language models with strong reasoning capabilities
|
|
625
726
|
|
|
626
727
|
```bash
|
|
627
|
-
export
|
|
628
|
-
llms --enable
|
|
728
|
+
export ZAI_API_KEY="your-key"
|
|
729
|
+
llms --enable z.ai
|
|
629
730
|
```
|
|
630
731
|
|
|
631
|
-
###
|
|
732
|
+
### Mistral
|
|
632
733
|
- **Type**: `OpenAiProvider`
|
|
633
|
-
- **Models**:
|
|
634
|
-
- **Features**:
|
|
734
|
+
- **Models**: Mistral Large, Codestral, Pixtral, etc.
|
|
735
|
+
- **Features**: Code generation, multilingual
|
|
635
736
|
|
|
636
737
|
```bash
|
|
637
|
-
export
|
|
638
|
-
llms --enable
|
|
738
|
+
export MISTRAL_API_KEY="your-key"
|
|
739
|
+
llms --enable mistral
|
|
639
740
|
```
|
|
640
741
|
|
|
641
|
-
###
|
|
742
|
+
### Codestral
|
|
642
743
|
- **Type**: `OpenAiProvider`
|
|
643
|
-
- **Models**:
|
|
644
|
-
- **Features**:
|
|
744
|
+
- **Models**: Codestral
|
|
745
|
+
- **Features**: Code generation
|
|
645
746
|
|
|
646
747
|
```bash
|
|
647
|
-
export
|
|
648
|
-
llms --enable
|
|
748
|
+
export CODESTRAL_API_KEY="your-key"
|
|
749
|
+
llms --enable codestral
|
|
649
750
|
```
|
|
650
751
|
|
|
651
752
|
## Model Routing
|
|
@@ -654,22 +755,6 @@ The tool automatically routes requests to the first available provider that supp
|
|
|
654
755
|
|
|
655
756
|
Example: If both OpenAI and OpenRouter support `kimi-k2`, the request will first try OpenRouter (free), then fall back to Groq than OpenRouter (Paid) if requests fails.
|
|
656
757
|
|
|
657
|
-
## Environment Variables
|
|
658
|
-
|
|
659
|
-
| Variable | Description | Example |
|
|
660
|
-
|----------|-------------|---------|
|
|
661
|
-
| `LLMS_CONFIG_PATH` | Custom config file path | `/path/to/llms.json` |
|
|
662
|
-
| `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
|
|
663
|
-
| `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
|
|
664
|
-
| `GOOGLE_API_KEY` | Google API key | `AIza...` |
|
|
665
|
-
| `GROQ_API_KEY` | Groq API key | `gsk_...` |
|
|
666
|
-
| `MISTRAL_API_KEY` | Mistral API key | `...` |
|
|
667
|
-
| `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
|
|
668
|
-
| `OPENROUTER_FREE_API_KEY` | OpenRouter free tier key | `sk-or-...` |
|
|
669
|
-
| `CODESTRAL_API_KEY` | Codestral API key | `...` |
|
|
670
|
-
| `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
|
|
671
|
-
| `DASHSCOPE_API_KEY` | Qwen (Alibaba Cloud) API key | `sk-...` |
|
|
672
|
-
|
|
673
758
|
## Configuration Examples
|
|
674
759
|
|
|
675
760
|
### Minimal Configuration
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "llms-py"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.9"
|
|
8
8
|
description = "A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "BSD-3-Clause"
|
|
@@ -16,7 +16,7 @@ with open(os.path.join(this_directory, "requirements.txt"), encoding="utf-8") as
|
|
|
16
16
|
|
|
17
17
|
setup(
|
|
18
18
|
name="llms-py",
|
|
19
|
-
version="2.0.
|
|
19
|
+
version="2.0.9",
|
|
20
20
|
author="ServiceStack",
|
|
21
21
|
author_email="team@servicestack.net",
|
|
22
22
|
description="A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|