llms-py 2.0.7__tar.gz → 2.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {llms_py-2.0.7/llms_py.egg-info → llms_py-2.0.9}/PKG-INFO +124 -39
  2. {llms_py-2.0.7 → llms_py-2.0.9}/README.md +123 -38
  3. {llms_py-2.0.7 → llms_py-2.0.9}/llms.json +35 -5
  4. {llms_py-2.0.7 → llms_py-2.0.9}/llms.py +140 -10
  5. {llms_py-2.0.7 → llms_py-2.0.9/llms_py.egg-info}/PKG-INFO +124 -39
  6. {llms_py-2.0.7 → llms_py-2.0.9}/pyproject.toml +1 -1
  7. {llms_py-2.0.7 → llms_py-2.0.9}/setup.py +1 -1
  8. {llms_py-2.0.7 → llms_py-2.0.9}/ui/Main.mjs +1 -1
  9. {llms_py-2.0.7 → llms_py-2.0.9}/ui/Recents.mjs +31 -2
  10. {llms_py-2.0.7 → llms_py-2.0.9}/LICENSE +0 -0
  11. {llms_py-2.0.7 → llms_py-2.0.9}/MANIFEST.in +0 -0
  12. {llms_py-2.0.7 → llms_py-2.0.9}/index.html +0 -0
  13. {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/SOURCES.txt +0 -0
  14. {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/dependency_links.txt +0 -0
  15. {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/entry_points.txt +0 -0
  16. {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/not-zip-safe +0 -0
  17. {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/requires.txt +0 -0
  18. {llms_py-2.0.7 → llms_py-2.0.9}/llms_py.egg-info/top_level.txt +0 -0
  19. {llms_py-2.0.7 → llms_py-2.0.9}/requirements.txt +0 -0
  20. {llms_py-2.0.7 → llms_py-2.0.9}/setup.cfg +0 -0
  21. {llms_py-2.0.7 → llms_py-2.0.9}/ui/App.mjs +0 -0
  22. {llms_py-2.0.7 → llms_py-2.0.9}/ui/ChatPrompt.mjs +0 -0
  23. {llms_py-2.0.7 → llms_py-2.0.9}/ui/Sidebar.mjs +0 -0
  24. {llms_py-2.0.7 → llms_py-2.0.9}/ui/app.css +0 -0
  25. {llms_py-2.0.7 → llms_py-2.0.9}/ui/fav.svg +0 -0
  26. {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/highlight.min.mjs +0 -0
  27. {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/idb.min.mjs +0 -0
  28. {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/marked.min.mjs +0 -0
  29. {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/servicestack-client.min.mjs +0 -0
  30. {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/servicestack-vue.min.mjs +0 -0
  31. {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/vue-router.min.mjs +0 -0
  32. {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/vue.min.mjs +0 -0
  33. {llms_py-2.0.7 → llms_py-2.0.9}/ui/lib/vue.mjs +0 -0
  34. {llms_py-2.0.7 → llms_py-2.0.9}/ui/markdown.mjs +0 -0
  35. {llms_py-2.0.7 → llms_py-2.0.9}/ui/tailwind.input.css +0 -0
  36. {llms_py-2.0.7 → llms_py-2.0.9}/ui/threadStore.mjs +0 -0
  37. {llms_py-2.0.7 → llms_py-2.0.9}/ui/typography.css +0 -0
  38. {llms_py-2.0.7 → llms_py-2.0.9}/ui/utils.mjs +0 -0
  39. {llms_py-2.0.7 → llms_py-2.0.9}/ui.json +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llms-py
3
- Version: 2.0.7
3
+ Version: 2.0.9
4
4
  Summary: A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers
5
5
  Home-page: https://github.com/ServiceStack/llms
6
6
  Author: ServiceStack
@@ -51,7 +51,7 @@ Configure additional providers and models in [llms.json](llms.json)
51
51
  ## Features
52
52
 
53
53
  - **Lightweight**: Single [llms.py](llms.py) Python file with single `aiohttp` dependency
54
- - **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Mistral
54
+ - **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Z.ai, Mistral
55
55
  - **OpenAI-Compatible API**: Works with any client that supports OpenAI's chat completion API
56
56
  - **Configuration Management**: Easy provider enable/disable and configuration management
57
57
  - **CLI Interface**: Simple command-line interface for quick interactions
@@ -510,7 +510,52 @@ llms --default grok-4
510
510
 
511
511
  # Update llms.py to latest version
512
512
  llms --update
513
- ```
513
+
514
+ # Pass custom parameters to chat request (URL-encoded)
515
+ llms --args "temperature=0.7&seed=111" "What is 2+2?"
516
+
517
+ # Multiple parameters with different types
518
+ llms --args "temperature=0.5&max_completion_tokens=50" "Tell me a joke"
519
+
520
+ # URL-encoded special characters (stop sequences)
521
+ llms --args "stop=Two,Words" "Count to 5"
522
+
523
+ # Combine with other options
524
+ llms --system "You are helpful" --args "temperature=0.3" --raw "Hello"
525
+ ```
526
+
527
+ #### Custom Parameters with `--args`
528
+
529
+ The `--args` option allows you to pass URL-encoded parameters to customize the chat request sent to LLM providers:
530
+
531
+ **Parameter Types:**
532
+ - **Floats**: `temperature=0.7`, `frequency_penalty=0.2`
533
+ - **Integers**: `max_completion_tokens=100`
534
+ - **Booleans**: `store=true`, `verbose=false`, `logprobs=true`
535
+ - **Strings**: `stop=one`
536
+ - **Lists**: `stop=two,words`
537
+
538
+ **Common Parameters:**
539
+ - `temperature`: Controls randomness (0.0 to 2.0)
540
+ - `max_completion_tokens`: Maximum tokens in response
541
+ - `seed`: For reproducible outputs
542
+ - `top_p`: Nucleus sampling parameter
543
+ - `stop`: Stop sequences (URL-encode special chars)
544
+ - `store`: Whether or not to store the output
545
+ - `frequency_penalty`: Penalize new tokens based on frequency
546
+ - `presence_penalty`: Penalize new tokens based on presence
547
+ - `logprobs`: Include log probabilities in response
548
+ - `parallel_tool_calls`: Enable parallel tool calls
549
+ - `prompt_cache_key`: Cache key for prompt
550
+ - `reasoning_effort`: Reasoning effort (low, medium, high, *minimal, *none, *default)
551
+ - `safety_identifier`: A string that uniquely identifies each user
552
+ - `seed`: For reproducible outputs
553
+ - `service_tier`: Service tier (free, standard, premium, *default)
554
+ - `top_logprobs`: Number of top logprobs to return
555
+ - `top_p`: Nucleus sampling parameter
556
+ - `verbosity`: Verbosity level (0, 1, 2, 3, *default)
557
+ - `enable_thinking`: Enable thinking mode (Qwen)
558
+ - `stream`: Enable streaming responses
514
559
 
515
560
  ### Default Model Configuration
516
561
 
@@ -558,6 +603,42 @@ llms "Explain quantum computing" | glow
558
603
 
559
604
  ## Supported Providers
560
605
 
606
+ Any OpenAI-compatible providers and their models can be added by configuring them in [llms.json](./llms.json). By default only AI Providers with free tiers are enabled which will only be "available" if their API Key is set.
607
+
608
+ You can list the available providers, their models and which are enabled or disabled with:
609
+
610
+ ```bash
611
+ llms ls
612
+ ```
613
+
614
+ They can be enabled/disabled in your `llms.json` file or with:
615
+
616
+ ```bash
617
+ llms --enable <provider>
618
+ llms --disable <provider>
619
+ ```
620
+
621
+ For a provider to be available, they also require their API Key configured in either your Environment Variables
622
+ or directly in your `llms.json`.
623
+
624
+ ### Environment Variables
625
+
626
+ | Provider | Variable | Description | Example |
627
+ |-----------------|---------------------------|---------------------|---------|
628
+ | openrouter_free | `OPENROUTER_FREE_API_KEY` | OpenRouter FREE models API key | `sk-or-...` |
629
+ | groq | `GROQ_API_KEY` | Groq API key | `gsk_...` |
630
+ | google_free | `GOOGLE_FREE_API_KEY` | Google FREE API key | `AIza...` |
631
+ | codestral | `CODESTRAL_API_KEY` | Codestral API key | `...` |
632
+ | ollama | N/A | No API key required | |
633
+ | openrouter | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
634
+ | google | `GOOGLE_API_KEY` | Google API key | `AIza...` |
635
+ | anthropic | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
636
+ | openai | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
637
+ | grok | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
638
+ | qwen | `DASHSCOPE_API_KEY` | Qwen (Alibaba) API key | `sk-...` |
639
+ | z.ai | `ZAI_API_KEY` | Z.ai API key | `sk-...` |
640
+ | mistral | `MISTRAL_API_KEY` | Mistral API key | `...` |
641
+
561
642
  ### OpenAI
562
643
  - **Type**: `OpenAiProvider`
563
644
  - **Models**: GPT-5, GPT-5 Codex, GPT-4o, GPT-4o-mini, o3, etc.
@@ -588,6 +669,26 @@ export GOOGLE_API_KEY="your-key"
588
669
  llms --enable google_free
589
670
  ```
590
671
 
672
+ ### OpenRouter
673
+ - **Type**: `OpenAiProvider`
674
+ - **Models**: 100+ models from various providers
675
+ - **Features**: Access to latest models, free tier available
676
+
677
+ ```bash
678
+ export OPENROUTER_API_KEY="your-key"
679
+ llms --enable openrouter
680
+ ```
681
+
682
+ ### Grok (X.AI)
683
+ - **Type**: `OpenAiProvider`
684
+ - **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
685
+ - **Features**: Real-time information, humor, uncensored responses
686
+
687
+ ```bash
688
+ export GROK_API_KEY="your-key"
689
+ llms --enable grok
690
+ ```
691
+
591
692
  ### Groq
592
693
  - **Type**: `OpenAiProvider`
593
694
  - **Models**: Llama 3.3, Gemma 2, Kimi K2, etc.
@@ -608,44 +709,44 @@ llms --enable groq
608
709
  llms --enable ollama
609
710
  ```
610
711
 
611
- ### OpenRouter
712
+ ### Qwen (Alibaba Cloud)
612
713
  - **Type**: `OpenAiProvider`
613
- - **Models**: 100+ models from various providers
614
- - **Features**: Access to latest models, free tier available
714
+ - **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
715
+ - **Features**: Multilingual, vision models, coding, reasoning, audio processing
615
716
 
616
717
  ```bash
617
- export OPENROUTER_API_KEY="your-key"
618
- llms --enable openrouter
718
+ export DASHSCOPE_API_KEY="your-key"
719
+ llms --enable qwen
619
720
  ```
620
721
 
621
- ### Mistral
722
+ ### Z.ai
622
723
  - **Type**: `OpenAiProvider`
623
- - **Models**: Mistral Large, Codestral, Pixtral, etc.
624
- - **Features**: Code generation, multilingual
724
+ - **Models**: GLM-4.6, GLM-4.5, GLM-4.5-air, GLM-4.5-x, GLM-4.5-airx, GLM-4.5-flash, GLM-4:32b
725
+ - **Features**: Advanced language models with strong reasoning capabilities
625
726
 
626
727
  ```bash
627
- export MISTRAL_API_KEY="your-key"
628
- llms --enable mistral
728
+ export ZAI_API_KEY="your-key"
729
+ llms --enable z.ai
629
730
  ```
630
731
 
631
- ### Grok (X.AI)
732
+ ### Mistral
632
733
  - **Type**: `OpenAiProvider`
633
- - **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
634
- - **Features**: Real-time information, humor, uncensored responses
734
+ - **Models**: Mistral Large, Codestral, Pixtral, etc.
735
+ - **Features**: Code generation, multilingual
635
736
 
636
737
  ```bash
637
- export GROK_API_KEY="your-key"
638
- llms --enable grok
738
+ export MISTRAL_API_KEY="your-key"
739
+ llms --enable mistral
639
740
  ```
640
741
 
641
- ### Qwen (Alibaba Cloud)
742
+ ### Codestral
642
743
  - **Type**: `OpenAiProvider`
643
- - **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
644
- - **Features**: Multilingual, vision models, coding, reasoning, audio processing
744
+ - **Models**: Codestral
745
+ - **Features**: Code generation
645
746
 
646
747
  ```bash
647
- export DASHSCOPE_API_KEY="your-key"
648
- llms --enable qwen
748
+ export CODESTRAL_API_KEY="your-key"
749
+ llms --enable codestral
649
750
  ```
650
751
 
651
752
  ## Model Routing
@@ -654,22 +755,6 @@ The tool automatically routes requests to the first available provider that supp
654
755
 
655
756
  Example: If both OpenAI and OpenRouter support `kimi-k2`, the request will first try OpenRouter (free), then fall back to Groq than OpenRouter (Paid) if requests fails.
656
757
 
657
- ## Environment Variables
658
-
659
- | Variable | Description | Example |
660
- |----------|-------------|---------|
661
- | `LLMS_CONFIG_PATH` | Custom config file path | `/path/to/llms.json` |
662
- | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
663
- | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
664
- | `GOOGLE_API_KEY` | Google API key | `AIza...` |
665
- | `GROQ_API_KEY` | Groq API key | `gsk_...` |
666
- | `MISTRAL_API_KEY` | Mistral API key | `...` |
667
- | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
668
- | `OPENROUTER_FREE_API_KEY` | OpenRouter free tier key | `sk-or-...` |
669
- | `CODESTRAL_API_KEY` | Codestral API key | `...` |
670
- | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
671
- | `DASHSCOPE_API_KEY` | Qwen (Alibaba Cloud) API key | `sk-...` |
672
-
673
758
  ## Configuration Examples
674
759
 
675
760
  ### Minimal Configuration
@@ -11,7 +11,7 @@ Configure additional providers and models in [llms.json](llms.json)
11
11
  ## Features
12
12
 
13
13
  - **Lightweight**: Single [llms.py](llms.py) Python file with single `aiohttp` dependency
14
- - **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Mistral
14
+ - **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Z.ai, Mistral
15
15
  - **OpenAI-Compatible API**: Works with any client that supports OpenAI's chat completion API
16
16
  - **Configuration Management**: Easy provider enable/disable and configuration management
17
17
  - **CLI Interface**: Simple command-line interface for quick interactions
@@ -470,7 +470,52 @@ llms --default grok-4
470
470
 
471
471
  # Update llms.py to latest version
472
472
  llms --update
473
- ```
473
+
474
+ # Pass custom parameters to chat request (URL-encoded)
475
+ llms --args "temperature=0.7&seed=111" "What is 2+2?"
476
+
477
+ # Multiple parameters with different types
478
+ llms --args "temperature=0.5&max_completion_tokens=50" "Tell me a joke"
479
+
480
+ # URL-encoded special characters (stop sequences)
481
+ llms --args "stop=Two,Words" "Count to 5"
482
+
483
+ # Combine with other options
484
+ llms --system "You are helpful" --args "temperature=0.3" --raw "Hello"
485
+ ```
486
+
487
+ #### Custom Parameters with `--args`
488
+
489
+ The `--args` option allows you to pass URL-encoded parameters to customize the chat request sent to LLM providers:
490
+
491
+ **Parameter Types:**
492
+ - **Floats**: `temperature=0.7`, `frequency_penalty=0.2`
493
+ - **Integers**: `max_completion_tokens=100`
494
+ - **Booleans**: `store=true`, `verbose=false`, `logprobs=true`
495
+ - **Strings**: `stop=one`
496
+ - **Lists**: `stop=two,words`
497
+
498
+ **Common Parameters:**
499
+ - `temperature`: Controls randomness (0.0 to 2.0)
500
+ - `max_completion_tokens`: Maximum tokens in response
501
+ - `seed`: For reproducible outputs
502
+ - `top_p`: Nucleus sampling parameter
503
+ - `stop`: Stop sequences (URL-encode special chars)
504
+ - `store`: Whether or not to store the output
505
+ - `frequency_penalty`: Penalize new tokens based on frequency
506
+ - `presence_penalty`: Penalize new tokens based on presence
507
+ - `logprobs`: Include log probabilities in response
508
+ - `parallel_tool_calls`: Enable parallel tool calls
509
+ - `prompt_cache_key`: Cache key for prompt
510
+ - `reasoning_effort`: Reasoning effort (low, medium, high, *minimal, *none, *default)
511
+ - `safety_identifier`: A string that uniquely identifies each user
512
+ - `seed`: For reproducible outputs
513
+ - `service_tier`: Service tier (free, standard, premium, *default)
514
+ - `top_logprobs`: Number of top logprobs to return
515
+ - `top_p`: Nucleus sampling parameter
516
+ - `verbosity`: Verbosity level (0, 1, 2, 3, *default)
517
+ - `enable_thinking`: Enable thinking mode (Qwen)
518
+ - `stream`: Enable streaming responses
474
519
 
475
520
  ### Default Model Configuration
476
521
 
@@ -518,6 +563,42 @@ llms "Explain quantum computing" | glow
518
563
 
519
564
  ## Supported Providers
520
565
 
566
+ Any OpenAI-compatible providers and their models can be added by configuring them in [llms.json](./llms.json). By default only AI Providers with free tiers are enabled which will only be "available" if their API Key is set.
567
+
568
+ You can list the available providers, their models and which are enabled or disabled with:
569
+
570
+ ```bash
571
+ llms ls
572
+ ```
573
+
574
+ They can be enabled/disabled in your `llms.json` file or with:
575
+
576
+ ```bash
577
+ llms --enable <provider>
578
+ llms --disable <provider>
579
+ ```
580
+
581
+ For a provider to be available, they also require their API Key configured in either your Environment Variables
582
+ or directly in your `llms.json`.
583
+
584
+ ### Environment Variables
585
+
586
+ | Provider | Variable | Description | Example |
587
+ |-----------------|---------------------------|---------------------|---------|
588
+ | openrouter_free | `OPENROUTER_FREE_API_KEY` | OpenRouter FREE models API key | `sk-or-...` |
589
+ | groq | `GROQ_API_KEY` | Groq API key | `gsk_...` |
590
+ | google_free | `GOOGLE_FREE_API_KEY` | Google FREE API key | `AIza...` |
591
+ | codestral | `CODESTRAL_API_KEY` | Codestral API key | `...` |
592
+ | ollama | N/A | No API key required | |
593
+ | openrouter | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
594
+ | google | `GOOGLE_API_KEY` | Google API key | `AIza...` |
595
+ | anthropic | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
596
+ | openai | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
597
+ | grok | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
598
+ | qwen | `DASHSCOPE_API_KEY` | Qwen (Alibaba) API key | `sk-...` |
599
+ | z.ai | `ZAI_API_KEY` | Z.ai API key | `sk-...` |
600
+ | mistral | `MISTRAL_API_KEY` | Mistral API key | `...` |
601
+
521
602
  ### OpenAI
522
603
  - **Type**: `OpenAiProvider`
523
604
  - **Models**: GPT-5, GPT-5 Codex, GPT-4o, GPT-4o-mini, o3, etc.
@@ -548,6 +629,26 @@ export GOOGLE_API_KEY="your-key"
548
629
  llms --enable google_free
549
630
  ```
550
631
 
632
+ ### OpenRouter
633
+ - **Type**: `OpenAiProvider`
634
+ - **Models**: 100+ models from various providers
635
+ - **Features**: Access to latest models, free tier available
636
+
637
+ ```bash
638
+ export OPENROUTER_API_KEY="your-key"
639
+ llms --enable openrouter
640
+ ```
641
+
642
+ ### Grok (X.AI)
643
+ - **Type**: `OpenAiProvider`
644
+ - **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
645
+ - **Features**: Real-time information, humor, uncensored responses
646
+
647
+ ```bash
648
+ export GROK_API_KEY="your-key"
649
+ llms --enable grok
650
+ ```
651
+
551
652
  ### Groq
552
653
  - **Type**: `OpenAiProvider`
553
654
  - **Models**: Llama 3.3, Gemma 2, Kimi K2, etc.
@@ -568,44 +669,44 @@ llms --enable groq
568
669
  llms --enable ollama
569
670
  ```
570
671
 
571
- ### OpenRouter
672
+ ### Qwen (Alibaba Cloud)
572
673
  - **Type**: `OpenAiProvider`
573
- - **Models**: 100+ models from various providers
574
- - **Features**: Access to latest models, free tier available
674
+ - **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
675
+ - **Features**: Multilingual, vision models, coding, reasoning, audio processing
575
676
 
576
677
  ```bash
577
- export OPENROUTER_API_KEY="your-key"
578
- llms --enable openrouter
678
+ export DASHSCOPE_API_KEY="your-key"
679
+ llms --enable qwen
579
680
  ```
580
681
 
581
- ### Mistral
682
+ ### Z.ai
582
683
  - **Type**: `OpenAiProvider`
583
- - **Models**: Mistral Large, Codestral, Pixtral, etc.
584
- - **Features**: Code generation, multilingual
684
+ - **Models**: GLM-4.6, GLM-4.5, GLM-4.5-air, GLM-4.5-x, GLM-4.5-airx, GLM-4.5-flash, GLM-4:32b
685
+ - **Features**: Advanced language models with strong reasoning capabilities
585
686
 
586
687
  ```bash
587
- export MISTRAL_API_KEY="your-key"
588
- llms --enable mistral
688
+ export ZAI_API_KEY="your-key"
689
+ llms --enable z.ai
589
690
  ```
590
691
 
591
- ### Grok (X.AI)
692
+ ### Mistral
592
693
  - **Type**: `OpenAiProvider`
593
- - **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
594
- - **Features**: Real-time information, humor, uncensored responses
694
+ - **Models**: Mistral Large, Codestral, Pixtral, etc.
695
+ - **Features**: Code generation, multilingual
595
696
 
596
697
  ```bash
597
- export GROK_API_KEY="your-key"
598
- llms --enable grok
698
+ export MISTRAL_API_KEY="your-key"
699
+ llms --enable mistral
599
700
  ```
600
701
 
601
- ### Qwen (Alibaba Cloud)
702
+ ### Codestral
602
703
  - **Type**: `OpenAiProvider`
603
- - **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
604
- - **Features**: Multilingual, vision models, coding, reasoning, audio processing
704
+ - **Models**: Codestral
705
+ - **Features**: Code generation
605
706
 
606
707
  ```bash
607
- export DASHSCOPE_API_KEY="your-key"
608
- llms --enable qwen
708
+ export CODESTRAL_API_KEY="your-key"
709
+ llms --enable codestral
609
710
  ```
610
711
 
611
712
  ## Model Routing
@@ -614,22 +715,6 @@ The tool automatically routes requests to the first available provider that supp
614
715
 
615
716
  Example: If both OpenAI and OpenRouter support `kimi-k2`, the request will first try OpenRouter (free), then fall back to Groq than OpenRouter (Paid) if requests fails.
616
717
 
617
- ## Environment Variables
618
-
619
- | Variable | Description | Example |
620
- |----------|-------------|---------|
621
- | `LLMS_CONFIG_PATH` | Custom config file path | `/path/to/llms.json` |
622
- | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
623
- | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
624
- | `GOOGLE_API_KEY` | Google API key | `AIza...` |
625
- | `GROQ_API_KEY` | Groq API key | `gsk_...` |
626
- | `MISTRAL_API_KEY` | Mistral API key | `...` |
627
- | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
628
- | `OPENROUTER_FREE_API_KEY` | OpenRouter free tier key | `sk-or-...` |
629
- | `CODESTRAL_API_KEY` | Codestral API key | `...` |
630
- | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
631
- | `DASHSCOPE_API_KEY` | Qwen (Alibaba Cloud) API key | `sk-...` |
632
-
633
718
  ## Configuration Examples
634
719
 
635
720
  ### Minimal Configuration
@@ -9,7 +9,12 @@
9
9
  "messages": [
10
10
  {
11
11
  "role": "user",
12
- "content": ""
12
+ "content": [
13
+ {
14
+ "type": "text",
15
+ "text": ""
16
+ }
17
+ ]
13
18
  }
14
19
  ]
15
20
  },
@@ -193,6 +198,9 @@
193
198
  "nova-micro": "amazon/nova-micro-v1",
194
199
  "nova-lite": "amazon/nova-lite-v1",
195
200
  "nova-pro": "amazon/nova-pro-v1",
201
+ "claude-opus-4-1": "anthropic/claude-opus-4.1",
202
+ "claude-sonnet-4-5": "anthropic/claude-sonnet-4.5",
203
+ "claude-sonnet-4-0": "anthropic/claude-sonnet-4",
196
204
  "gpt-5": "openai/gpt-5",
197
205
  "gpt-5-chat": "openai/gpt-5-chat",
198
206
  "gpt-5-mini": "openai/gpt-5-mini",
@@ -210,11 +218,13 @@
210
218
  "grok-4": "x-ai/grok-4",
211
219
  "grok-4-fast": "x-ai/grok-4-fast",
212
220
  "grok-code-fast-1": "x-ai/grok-code-fast-1",
221
+ "glm-4.6": "z-ai/glm-4.6",
213
222
  "glm-4.5v": "z-ai/glm-4.5v",
214
223
  "glm-4.5": "z-ai/glm-4.5",
215
224
  "glm-4.5-air": "z-ai/glm-4.5-air",
216
225
  "kimi-k2": "moonshotai/kimi-k2",
217
226
  "deepseek-v3.1:671b": "deepseek/deepseek-chat",
227
+ "deepseek-v3.2-exp": "deepseek/deepseek-v3.2-exp",
218
228
  "deepseek-chat-v3.1:671b": "deepseek/deepseek-chat-v3.1:free",
219
229
  "deepseek-r1:671b": "deepseek/deepseek-r1",
220
230
  "deepseek-v3.1-terminus": "deepseek/deepseek-v3.1-terminus",
@@ -263,6 +273,7 @@
263
273
  "models": {
264
274
  "claude-opus-4-1": "claude-opus-4-1",
265
275
  "claude-opus-4": "claude-opus-4",
276
+ "claude-sonnet-4-5": "claude-sonnet-4-5",
266
277
  "claude-sonnet-4-0": "claude-sonnet-4-0",
267
278
  "claude-3-7-sonnet": "claude-3-7-sonnet-latest",
268
279
  "claude-3-5-haiku": "claude-3-5-haiku-latest",
@@ -383,7 +394,24 @@
383
394
  "qwen2.5-vl:7b": "qwen2.5-vl-7b-instruct",
384
395
  "qwen2.5-vl:3b": "qwen2.5-vl-3b-instruct",
385
396
  "qwen2.5-omni:7b": "qwen2.5-omni-7b"
386
- }
397
+ },
398
+ "enable_thinking": false
399
+ },
400
+ "z.ai": {
401
+ "enabled": false,
402
+ "type": "OpenAiProvider",
403
+ "base_url": "https://api.z.ai/api/paas/v4",
404
+ "api_key": "$ZAI_API_KEY",
405
+ "models": {
406
+ "glm-4.6": "glm-4.6",
407
+ "glm-4.5": "glm-4.5",
408
+ "glm-4.5-air": "glm-4.5-air",
409
+ "glm-4.5-x": "glm-4.5-x",
410
+ "glm-4.5-airx": "glm-4.5-airx",
411
+ "glm-4.5-flash": "glm-4.5-flash",
412
+ "glm-4:32b": "glm-4-32b-0414-128k"
413
+ },
414
+ "temperature": 0.7
387
415
  },
388
416
  "mistral": {
389
417
  "enabled": false,
@@ -396,20 +424,22 @@
396
424
  "devstral-medium": "devstral-medium-2507",
397
425
  "codestral:22b": "codestral-latest",
398
426
  "mistral-ocr": "mistral-ocr-latest",
399
- "voxtral-mini": "voxtral-mini-latest",
400
427
  "mistral-small3.2:24b": "mistral-small-latest",
401
428
  "magistral-small": "magistral-small-latest",
402
429
  "devstral-small": "devstral-small-2507",
403
430
  "voxtral-small": "voxtral-small-latest",
431
+ "voxtral-mini": "voxtral-mini-latest",
432
+ "codestral-embed": "codestral-embed-2505",
433
+ "mistral-embed": "mistral-embed",
404
434
  "mistral-large:123b": "mistral-large-latest",
405
435
  "pixtral-large:124b": "pixtral-large-latest",
406
436
  "pixtral:12b": "pixtral-12b",
407
- "mistral-nemo:12b": "mistral-nemo",
437
+ "mistral-nemo:12b": "open-mistral-nemo",
408
438
  "mistral-saba": "mistral-saba-latest",
409
439
  "mistral:7b": "open-mistral-7b",
410
440
  "mixtral:8x7b": "open-mixtral-8x7b",
411
441
  "mixtral:8x22b": "open-mixtral-8x22b",
412
- "ministral:8b": "ministral-3b-latest",
442
+ "ministral:8b": "ministral-8b-latest",
413
443
  "ministral:3b": "ministral-3b-latest"
414
444
  }
415
445
  }
@@ -14,6 +14,7 @@ import mimetypes
14
14
  import traceback
15
15
  import sys
16
16
  import site
17
+ from urllib.parse import parse_qs
17
18
 
18
19
  import aiohttp
19
20
  from aiohttp import web
@@ -21,7 +22,7 @@ from aiohttp import web
21
22
  from pathlib import Path
22
23
  from importlib import resources # Py≥3.9 (pip install importlib_resources for 3.7/3.8)
23
24
 
24
- VERSION = "2.0.7"
25
+ VERSION = "2.0.9"
25
26
  _ROOT = None
26
27
  g_config_path = None
27
28
  g_ui_path = None
@@ -63,7 +64,8 @@ def chat_summary(chat):
63
64
  elif 'file' in item:
64
65
  if 'file_data' in item['file']:
65
66
  data = item['file']['file_data']
66
- item['file']['file_data'] = f"({len(data)})"
67
+ prefix = url.split(',', 1)[0]
68
+ item['file']['file_data'] = prefix + f",({len(url) - len(prefix)})"
67
69
  return json.dumps(clone, indent=2)
68
70
 
69
71
  def gemini_chat_summary(gemini_chat):
@@ -89,6 +91,60 @@ def is_url(url):
89
91
  def get_filename(file):
90
92
  return file.rsplit('/',1)[1] if '/' in file else 'file'
91
93
 
94
+ def parse_args_params(args_str):
95
+ """Parse URL-encoded parameters and return a dictionary."""
96
+ if not args_str:
97
+ return {}
98
+
99
+ # Parse the URL-encoded string
100
+ parsed = parse_qs(args_str, keep_blank_values=True)
101
+
102
+ # Convert to simple dict with single values (not lists)
103
+ result = {}
104
+ for key, values in parsed.items():
105
+ if len(values) == 1:
106
+ value = values[0]
107
+ # Try to convert to appropriate types
108
+ if value.lower() == 'true':
109
+ result[key] = True
110
+ elif value.lower() == 'false':
111
+ result[key] = False
112
+ elif value.isdigit():
113
+ result[key] = int(value)
114
+ else:
115
+ try:
116
+ # Try to parse as float
117
+ result[key] = float(value)
118
+ except ValueError:
119
+ # Keep as string
120
+ result[key] = value
121
+ else:
122
+ # Multiple values, keep as list
123
+ result[key] = values
124
+
125
+ return result
126
+
127
+ def apply_args_to_chat(chat, args_params):
128
+ """Apply parsed arguments to the chat request."""
129
+ if not args_params:
130
+ return chat
131
+
132
+ # Apply each parameter to the chat request
133
+ for key, value in args_params.items():
134
+ if isinstance(value, str):
135
+ if key == 'stop':
136
+ if ',' in value:
137
+ value = value.split(',')
138
+ elif key == 'max_completion_tokens' or key == 'max_tokens' or key == 'n' or key == 'seed' or key == 'top_logprobs':
139
+ value = int(value)
140
+ elif key == 'temperature' or key == 'top_p' or key == 'frequency_penalty' or key == 'presence_penalty':
141
+ value = float(value)
142
+ elif key == 'store' or key == 'logprobs' or key == 'enable_thinking' or key == 'parallel_tool_calls' or key == 'stream':
143
+ value = bool(value)
144
+ chat[key] = value
145
+
146
+ return chat
147
+
92
148
  def is_base_64(data):
93
149
  try:
94
150
  base64.b64decode(data)
@@ -190,8 +246,9 @@ async def process_chat(chat):
190
246
  content = f.read()
191
247
  file['filename'] = get_filename(url)
192
248
  file['file_data'] = f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
193
- elif is_base_64(url):
194
- file['filename'] = 'file'
249
+ elif url.startswith('data:'):
250
+ if 'filename' not in file:
251
+ file['filename'] = 'file'
195
252
  pass # use base64 data as-is
196
253
  else:
197
254
  raise Exception(f"Invalid file: {url}")
@@ -219,13 +276,38 @@ class OpenAiProvider:
219
276
  self.api_key = api_key
220
277
  self.models = models
221
278
 
222
- self.chat_url = f"{base_url}/v1/chat/completions"
279
+ # check if base_url ends with /v{\d} to handle providers with different versions (e.g. z.ai uses /v4)
280
+ last_segment = base_url.rsplit('/',1)[1]
281
+ if last_segment.startswith('v') and last_segment[1:].isdigit():
282
+ self.chat_url = f"{base_url}/chat/completions"
283
+ else:
284
+ self.chat_url = f"{base_url}/v1/chat/completions"
285
+
223
286
  self.headers = kwargs['headers'] if 'headers' in kwargs else {
224
287
  "Content-Type": "application/json",
225
288
  }
226
289
  if api_key is not None:
227
290
  self.headers["Authorization"] = f"Bearer {api_key}"
228
291
 
292
+ self.frequency_penalty = float(kwargs['frequency_penalty']) if 'frequency_penalty' in kwargs else None
293
+ self.max_completion_tokens = int(kwargs['max_completion_tokens']) if 'max_completion_tokens' in kwargs else None
294
+ self.n = int(kwargs['n']) if 'n' in kwargs else None
295
+ self.parallel_tool_calls = bool(kwargs['parallel_tool_calls']) if 'parallel_tool_calls' in kwargs else None
296
+ self.presence_penalty = float(kwargs['presence_penalty']) if 'presence_penalty' in kwargs else None
297
+ self.prompt_cache_key = kwargs['prompt_cache_key'] if 'prompt_cache_key' in kwargs else None
298
+ self.reasoning_effort = kwargs['reasoning_effort'] if 'reasoning_effort' in kwargs else None
299
+ self.safety_identifier = kwargs['safety_identifier'] if 'safety_identifier' in kwargs else None
300
+ self.seed = int(kwargs['seed']) if 'seed' in kwargs else None
301
+ self.service_tier = kwargs['service_tier'] if 'service_tier' in kwargs else None
302
+ self.stop = kwargs['stop'] if 'stop' in kwargs else None
303
+ self.store = bool(kwargs['store']) if 'store' in kwargs else None
304
+ self.temperature = float(kwargs['temperature']) if 'temperature' in kwargs else None
305
+ self.top_logprobs = int(kwargs['top_logprobs']) if 'top_logprobs' in kwargs else None
306
+ self.top_p = float(kwargs['top_p']) if 'top_p' in kwargs else None
307
+ self.verbosity = kwargs['verbosity'] if 'verbosity' in kwargs else None
308
+ self.stream = bool(kwargs['stream']) if 'stream' in kwargs else None
309
+ self.enable_thinking = bool(kwargs['enable_thinking']) if 'enable_thinking' in kwargs else None
310
+
229
311
  @classmethod
230
312
  def test(cls, base_url=None, api_key=None, models={}, **kwargs):
231
313
  return base_url is not None and api_key is not None and len(models) > 0
@@ -241,6 +323,41 @@ class OpenAiProvider:
241
323
  # with open(os.path.join(os.path.dirname(__file__), 'chat.wip.json'), "w") as f:
242
324
  # f.write(json.dumps(chat, indent=2))
243
325
 
326
+ if self.frequency_penalty is not None:
327
+ chat['frequency_penalty'] = self.frequency_penalty
328
+ if self.max_completion_tokens is not None:
329
+ chat['max_completion_tokens'] = self.max_completion_tokens
330
+ if self.n is not None:
331
+ chat['n'] = self.n
332
+ if self.parallel_tool_calls is not None:
333
+ chat['parallel_tool_calls'] = self.parallel_tool_calls
334
+ if self.presence_penalty is not None:
335
+ chat['presence_penalty'] = self.presence_penalty
336
+ if self.prompt_cache_key is not None:
337
+ chat['prompt_cache_key'] = self.prompt_cache_key
338
+ if self.reasoning_effort is not None:
339
+ chat['reasoning_effort'] = self.reasoning_effort
340
+ if self.safety_identifier is not None:
341
+ chat['safety_identifier'] = self.safety_identifier
342
+ if self.seed is not None:
343
+ chat['seed'] = self.seed
344
+ if self.service_tier is not None:
345
+ chat['service_tier'] = self.service_tier
346
+ if self.stop is not None:
347
+ chat['stop'] = self.stop
348
+ if self.store is not None:
349
+ chat['store'] = self.store
350
+ if self.temperature is not None:
351
+ chat['temperature'] = self.temperature
352
+ if self.top_logprobs is not None:
353
+ chat['top_logprobs'] = self.top_logprobs
354
+ if self.top_p is not None:
355
+ chat['top_p'] = self.top_p
356
+ if self.verbosity is not None:
357
+ chat['verbosity'] = self.verbosity
358
+ if self.enable_thinking is not None:
359
+ chat['enable_thinking'] = self.enable_thinking
360
+
244
361
  chat = await process_chat(chat)
245
362
  _log(f"POST {self.chat_url}")
246
363
  _log(chat_summary(chat))
@@ -531,10 +648,14 @@ async def chat_completion(chat):
531
648
  # If we get here, all providers failed
532
649
  raise first_exception
533
650
 
534
- async def cli_chat(chat, image=None, audio=None, file=None, raw=False):
651
+ async def cli_chat(chat, image=None, audio=None, file=None, args=None, raw=False):
535
652
  if g_default_model:
536
653
  chat['model'] = g_default_model
537
654
 
655
+ # Apply args parameters to chat request
656
+ if args:
657
+ chat = apply_args_to_chat(chat, args)
658
+
538
659
  # process_chat downloads the image, just adding the reference here
539
660
  if image is not None:
540
661
  first_message = None
@@ -919,6 +1040,7 @@ def main():
919
1040
  parser.add_argument('--image', default=None, help='Image input to use in chat completion')
920
1041
  parser.add_argument('--audio', default=None, help='Audio input to use in chat completion')
921
1042
  parser.add_argument('--file', default=None, help='File input to use in chat completion')
1043
+ parser.add_argument('--args', default=None, help='URL-encoded parameters to add to chat request (e.g. "temperature=0.7&seed=111")', metavar='PARAMS')
922
1044
  parser.add_argument('--raw', action='store_true', help='Return raw AI JSON response')
923
1045
 
924
1046
  parser.add_argument('--list', action='store_true', help='Show list of enabled providers and their models (alias ls provider?)')
@@ -1250,13 +1372,21 @@ def main():
1250
1372
  if len(extra_args) > 0:
1251
1373
  prompt = ' '.join(extra_args)
1252
1374
  # replace content of last message if exists, else add
1253
- last_msg = chat['messages'][-1]
1254
- if last_msg['role'] == 'user':
1255
- last_msg['content'] = prompt
1375
+ last_msg = chat['messages'][-1] if 'messages' in chat else None
1376
+ if last_msg and last_msg['role'] == 'user':
1377
+ if isinstance(last_msg['content'], list):
1378
+ last_msg['content'][-1]['text'] = prompt
1379
+ else:
1380
+ last_msg['content'] = prompt
1256
1381
  else:
1257
1382
  chat['messages'].append({'role': 'user', 'content': prompt})
1258
1383
 
1259
- asyncio.run(cli_chat(chat, image=cli_args.image, audio=cli_args.audio, file=cli_args.file, raw=cli_args.raw))
1384
+ # Parse args parameters if provided
1385
+ args = None
1386
+ if cli_args.args is not None:
1387
+ args = parse_args_params(cli_args.args)
1388
+
1389
+ asyncio.run(cli_chat(chat, image=cli_args.image, audio=cli_args.audio, file=cli_args.file, args=args, raw=cli_args.raw))
1260
1390
  exit(0)
1261
1391
  except Exception as e:
1262
1392
  print(f"{cli_args.logprefix}Error: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llms-py
3
- Version: 2.0.7
3
+ Version: 2.0.9
4
4
  Summary: A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers
5
5
  Home-page: https://github.com/ServiceStack/llms
6
6
  Author: ServiceStack
@@ -51,7 +51,7 @@ Configure additional providers and models in [llms.json](llms.json)
51
51
  ## Features
52
52
 
53
53
  - **Lightweight**: Single [llms.py](llms.py) Python file with single `aiohttp` dependency
54
- - **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Mistral
54
+ - **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Z.ai, Mistral
55
55
  - **OpenAI-Compatible API**: Works with any client that supports OpenAI's chat completion API
56
56
  - **Configuration Management**: Easy provider enable/disable and configuration management
57
57
  - **CLI Interface**: Simple command-line interface for quick interactions
@@ -510,7 +510,52 @@ llms --default grok-4
510
510
 
511
511
  # Update llms.py to latest version
512
512
  llms --update
513
- ```
513
+
514
+ # Pass custom parameters to chat request (URL-encoded)
515
+ llms --args "temperature=0.7&seed=111" "What is 2+2?"
516
+
517
+ # Multiple parameters with different types
518
+ llms --args "temperature=0.5&max_completion_tokens=50" "Tell me a joke"
519
+
520
+ # URL-encoded special characters (stop sequences)
521
+ llms --args "stop=Two,Words" "Count to 5"
522
+
523
+ # Combine with other options
524
+ llms --system "You are helpful" --args "temperature=0.3" --raw "Hello"
525
+ ```
526
+
527
+ #### Custom Parameters with `--args`
528
+
529
+ The `--args` option allows you to pass URL-encoded parameters to customize the chat request sent to LLM providers:
530
+
531
+ **Parameter Types:**
532
+ - **Floats**: `temperature=0.7`, `frequency_penalty=0.2`
533
+ - **Integers**: `max_completion_tokens=100`
534
+ - **Booleans**: `store=true`, `verbose=false`, `logprobs=true`
535
+ - **Strings**: `stop=one`
536
+ - **Lists**: `stop=two,words`
537
+
538
+ **Common Parameters:**
539
+ - `temperature`: Controls randomness (0.0 to 2.0)
540
+ - `max_completion_tokens`: Maximum tokens in response
541
+ - `seed`: For reproducible outputs
542
+ - `top_p`: Nucleus sampling parameter
543
+ - `stop`: Stop sequences (URL-encode special chars)
544
+ - `store`: Whether or not to store the output
545
+ - `frequency_penalty`: Penalize new tokens based on frequency
546
+ - `presence_penalty`: Penalize new tokens based on presence
547
+ - `logprobs`: Include log probabilities in response
548
+ - `parallel_tool_calls`: Enable parallel tool calls
549
+ - `prompt_cache_key`: Cache key for prompt
550
+ - `reasoning_effort`: Reasoning effort (low, medium, high, *minimal, *none, *default)
551
+ - `safety_identifier`: A string that uniquely identifies each user
552
+ - `seed`: For reproducible outputs
553
+ - `service_tier`: Service tier (free, standard, premium, *default)
554
+ - `top_logprobs`: Number of top logprobs to return
555
+ - `top_p`: Nucleus sampling parameter
556
+ - `verbosity`: Verbosity level (0, 1, 2, 3, *default)
557
+ - `enable_thinking`: Enable thinking mode (Qwen)
558
+ - `stream`: Enable streaming responses
514
559
 
515
560
  ### Default Model Configuration
516
561
 
@@ -558,6 +603,42 @@ llms "Explain quantum computing" | glow
558
603
 
559
604
  ## Supported Providers
560
605
 
606
+ Any OpenAI-compatible providers and their models can be added by configuring them in [llms.json](./llms.json). By default only AI Providers with free tiers are enabled which will only be "available" if their API Key is set.
607
+
608
+ You can list the available providers, their models and which are enabled or disabled with:
609
+
610
+ ```bash
611
+ llms ls
612
+ ```
613
+
614
+ They can be enabled/disabled in your `llms.json` file or with:
615
+
616
+ ```bash
617
+ llms --enable <provider>
618
+ llms --disable <provider>
619
+ ```
620
+
621
+ For a provider to be available, they also require their API Key configured in either your Environment Variables
622
+ or directly in your `llms.json`.
623
+
624
+ ### Environment Variables
625
+
626
+ | Provider | Variable | Description | Example |
627
+ |-----------------|---------------------------|---------------------|---------|
628
+ | openrouter_free | `OPENROUTER_FREE_API_KEY` | OpenRouter FREE models API key | `sk-or-...` |
629
+ | groq | `GROQ_API_KEY` | Groq API key | `gsk_...` |
630
+ | google_free | `GOOGLE_FREE_API_KEY` | Google FREE API key | `AIza...` |
631
+ | codestral | `CODESTRAL_API_KEY` | Codestral API key | `...` |
632
+ | ollama | N/A | No API key required | |
633
+ | openrouter | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
634
+ | google | `GOOGLE_API_KEY` | Google API key | `AIza...` |
635
+ | anthropic | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
636
+ | openai | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
637
+ | grok | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
638
+ | qwen | `DASHSCOPE_API_KEY` | Qwen (Alibaba) API key | `sk-...` |
639
+ | z.ai | `ZAI_API_KEY` | Z.ai API key | `sk-...` |
640
+ | mistral | `MISTRAL_API_KEY` | Mistral API key | `...` |
641
+
561
642
  ### OpenAI
562
643
  - **Type**: `OpenAiProvider`
563
644
  - **Models**: GPT-5, GPT-5 Codex, GPT-4o, GPT-4o-mini, o3, etc.
@@ -588,6 +669,26 @@ export GOOGLE_API_KEY="your-key"
588
669
  llms --enable google_free
589
670
  ```
590
671
 
672
+ ### OpenRouter
673
+ - **Type**: `OpenAiProvider`
674
+ - **Models**: 100+ models from various providers
675
+ - **Features**: Access to latest models, free tier available
676
+
677
+ ```bash
678
+ export OPENROUTER_API_KEY="your-key"
679
+ llms --enable openrouter
680
+ ```
681
+
682
+ ### Grok (X.AI)
683
+ - **Type**: `OpenAiProvider`
684
+ - **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
685
+ - **Features**: Real-time information, humor, uncensored responses
686
+
687
+ ```bash
688
+ export GROK_API_KEY="your-key"
689
+ llms --enable grok
690
+ ```
691
+
591
692
  ### Groq
592
693
  - **Type**: `OpenAiProvider`
593
694
  - **Models**: Llama 3.3, Gemma 2, Kimi K2, etc.
@@ -608,44 +709,44 @@ llms --enable groq
608
709
  llms --enable ollama
609
710
  ```
610
711
 
611
- ### OpenRouter
712
+ ### Qwen (Alibaba Cloud)
612
713
  - **Type**: `OpenAiProvider`
613
- - **Models**: 100+ models from various providers
614
- - **Features**: Access to latest models, free tier available
714
+ - **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
715
+ - **Features**: Multilingual, vision models, coding, reasoning, audio processing
615
716
 
616
717
  ```bash
617
- export OPENROUTER_API_KEY="your-key"
618
- llms --enable openrouter
718
+ export DASHSCOPE_API_KEY="your-key"
719
+ llms --enable qwen
619
720
  ```
620
721
 
621
- ### Mistral
722
+ ### Z.ai
622
723
  - **Type**: `OpenAiProvider`
623
- - **Models**: Mistral Large, Codestral, Pixtral, etc.
624
- - **Features**: Code generation, multilingual
724
+ - **Models**: GLM-4.6, GLM-4.5, GLM-4.5-air, GLM-4.5-x, GLM-4.5-airx, GLM-4.5-flash, GLM-4:32b
725
+ - **Features**: Advanced language models with strong reasoning capabilities
625
726
 
626
727
  ```bash
627
- export MISTRAL_API_KEY="your-key"
628
- llms --enable mistral
728
+ export ZAI_API_KEY="your-key"
729
+ llms --enable z.ai
629
730
  ```
630
731
 
631
- ### Grok (X.AI)
732
+ ### Mistral
632
733
  - **Type**: `OpenAiProvider`
633
- - **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
634
- - **Features**: Real-time information, humor, uncensored responses
734
+ - **Models**: Mistral Large, Codestral, Pixtral, etc.
735
+ - **Features**: Code generation, multilingual
635
736
 
636
737
  ```bash
637
- export GROK_API_KEY="your-key"
638
- llms --enable grok
738
+ export MISTRAL_API_KEY="your-key"
739
+ llms --enable mistral
639
740
  ```
640
741
 
641
- ### Qwen (Alibaba Cloud)
742
+ ### Codestral
642
743
  - **Type**: `OpenAiProvider`
643
- - **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
644
- - **Features**: Multilingual, vision models, coding, reasoning, audio processing
744
+ - **Models**: Codestral
745
+ - **Features**: Code generation
645
746
 
646
747
  ```bash
647
- export DASHSCOPE_API_KEY="your-key"
648
- llms --enable qwen
748
+ export CODESTRAL_API_KEY="your-key"
749
+ llms --enable codestral
649
750
  ```
650
751
 
651
752
  ## Model Routing
@@ -654,22 +755,6 @@ The tool automatically routes requests to the first available provider that supp
654
755
 
655
756
  Example: If both OpenAI and OpenRouter support `kimi-k2`, the request will first try OpenRouter (free), then fall back to Groq than OpenRouter (Paid) if requests fails.
656
757
 
657
- ## Environment Variables
658
-
659
- | Variable | Description | Example |
660
- |----------|-------------|---------|
661
- | `LLMS_CONFIG_PATH` | Custom config file path | `/path/to/llms.json` |
662
- | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
663
- | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
664
- | `GOOGLE_API_KEY` | Google API key | `AIza...` |
665
- | `GROQ_API_KEY` | Groq API key | `gsk_...` |
666
- | `MISTRAL_API_KEY` | Mistral API key | `...` |
667
- | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
668
- | `OPENROUTER_FREE_API_KEY` | OpenRouter free tier key | `sk-or-...` |
669
- | `CODESTRAL_API_KEY` | Codestral API key | `...` |
670
- | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
671
- | `DASHSCOPE_API_KEY` | Qwen (Alibaba Cloud) API key | `sk-...` |
672
-
673
758
  ## Configuration Examples
674
759
 
675
760
  ### Minimal Configuration
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "llms-py"
7
- version = "2.0.7"
7
+ version = "2.0.9"
8
8
  description = "A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers"
9
9
  readme = "README.md"
10
10
  license = "BSD-3-Clause"
@@ -16,7 +16,7 @@ with open(os.path.join(this_directory, "requirements.txt"), encoding="utf-8") as
16
16
 
17
17
  setup(
18
18
  name="llms-py",
19
- version="2.0.7",
19
+ version="2.0.9",
20
20
  author="ServiceStack",
21
21
  author_email="team@servicestack.net",
22
22
  description="A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers",
@@ -20,7 +20,7 @@ const ProviderStatus = {
20
20
  <span class="text-red-700">{{(config.status.disabled||[]).length}}</span>
21
21
  </div>
22
22
  </button>
23
- <div v-if="showPopover" ref="popoverRef" class="absolute right-0 mt-2 w-72 max-h-112 overflow-y-auto bg-white border border-gray-200 rounded-md shadow-lg z-10">
23
+ <div v-if="showPopover" ref="popoverRef" class="absolute right-0 mt-2 w-72 max-h-116 overflow-y-auto bg-white border border-gray-200 rounded-md shadow-lg z-10">
24
24
  <div class="divide-y divide-gray-100">
25
25
  <div v-for="p in allProviders" :key="p" class="flex items-center justify-between px-3 py-2">
26
26
  <label :for="'chk_' + p" class="cursor-pointer text-sm text-gray-900 truncate mr-2" :title="p">{{ p }}</label>
@@ -1,5 +1,5 @@
1
- import { ref, computed, onMounted, watch, inject } from 'vue'
2
- import { useRouter } from 'vue-router'
1
+ import { ref, onMounted, watch, inject } from 'vue'
2
+ import { useRouter, useRoute } from 'vue-router'
3
3
  import { useThreadStore } from './threadStore.mjs'
4
4
  import { renderMarkdown } from './markdown.mjs'
5
5
 
@@ -169,7 +169,36 @@ export default {
169
169
  </div>
170
170
  `,
171
171
  setup() {
172
+ const router = useRouter()
173
+ const route = useRoute()
172
174
  const q = ref('')
175
+
176
+ // Initialize search query from URL parameter
177
+ onMounted(() => {
178
+ const urlQuery = route.query.q || ''
179
+ q.value = urlQuery
180
+ })
181
+
182
+ // Watch for changes in the search input and update URL
183
+ watch(q, (newQuery) => {
184
+ const currentQuery = route.query.q || ''
185
+ if (newQuery !== currentQuery) {
186
+ // Update URL without triggering navigation
187
+ router.replace({
188
+ path: route.path,
189
+ query: newQuery ? { q: newQuery } : {}
190
+ })
191
+ }
192
+ })
193
+
194
+ // Watch for URL changes (browser back/forward) and update search input
195
+ watch(() => route.query.q, (newQuery) => {
196
+ const urlQuery = newQuery || ''
197
+ if (q.value !== urlQuery) {
198
+ q.value = urlQuery
199
+ }
200
+ })
201
+
173
202
  return {
174
203
  q,
175
204
  }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes