llms-py 2.0.8__tar.gz → 2.0.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {llms_py-2.0.8/llms_py.egg-info → llms_py-2.0.10}/PKG-INFO +124 -39
  2. {llms_py-2.0.8 → llms_py-2.0.10}/README.md +123 -38
  3. llms_py-2.0.10/index.html +80 -0
  4. {llms_py-2.0.8 → llms_py-2.0.10}/llms.json +16 -10
  5. {llms_py-2.0.8 → llms_py-2.0.10}/llms.py +144 -13
  6. {llms_py-2.0.8 → llms_py-2.0.10/llms_py.egg-info}/PKG-INFO +124 -39
  7. {llms_py-2.0.8 → llms_py-2.0.10}/llms_py.egg-info/SOURCES.txt +12 -2
  8. {llms_py-2.0.8 → llms_py-2.0.10}/pyproject.toml +1 -1
  9. {llms_py-2.0.8 → llms_py-2.0.10}/setup.py +16 -6
  10. llms_py-2.0.10/ui/Avatar.mjs +28 -0
  11. llms_py-2.0.10/ui/Brand.mjs +23 -0
  12. {llms_py-2.0.8 → llms_py-2.0.10}/ui/ChatPrompt.mjs +101 -69
  13. {llms_py-2.0.8 → llms_py-2.0.10}/ui/Main.mjs +43 -183
  14. llms_py-2.0.10/ui/ModelSelector.mjs +29 -0
  15. llms_py-2.0.10/ui/ProviderStatus.mjs +105 -0
  16. {llms_py-2.0.8 → llms_py-2.0.10}/ui/Recents.mjs +2 -1
  17. llms_py-2.0.10/ui/SettingsDialog.mjs +374 -0
  18. {llms_py-2.0.8 → llms_py-2.0.10}/ui/Sidebar.mjs +11 -27
  19. llms_py-2.0.10/ui/SignIn.mjs +64 -0
  20. llms_py-2.0.10/ui/SystemPromptEditor.mjs +31 -0
  21. llms_py-2.0.10/ui/SystemPromptSelector.mjs +36 -0
  22. llms_py-2.0.10/ui/Welcome.mjs +8 -0
  23. llms_py-2.0.10/ui/ai.mjs +80 -0
  24. {llms_py-2.0.8 → llms_py-2.0.10}/ui/app.css +76 -10
  25. llms_py-2.0.10/ui/lib/servicestack-vue.mjs +37 -0
  26. {llms_py-2.0.8 → llms_py-2.0.10}/ui/markdown.mjs +9 -2
  27. {llms_py-2.0.8 → llms_py-2.0.10}/ui/tailwind.input.css +13 -4
  28. {llms_py-2.0.8 → llms_py-2.0.10}/ui/threadStore.mjs +2 -2
  29. {llms_py-2.0.8 → llms_py-2.0.10}/ui/typography.css +109 -1
  30. {llms_py-2.0.8 → llms_py-2.0.10}/ui/utils.mjs +8 -2
  31. llms_py-2.0.8/index.html +0 -64
  32. llms_py-2.0.8/ui/lib/servicestack-vue.min.mjs +0 -37
  33. {llms_py-2.0.8 → llms_py-2.0.10}/LICENSE +0 -0
  34. {llms_py-2.0.8 → llms_py-2.0.10}/MANIFEST.in +0 -0
  35. {llms_py-2.0.8 → llms_py-2.0.10}/llms_py.egg-info/dependency_links.txt +0 -0
  36. {llms_py-2.0.8 → llms_py-2.0.10}/llms_py.egg-info/entry_points.txt +0 -0
  37. {llms_py-2.0.8 → llms_py-2.0.10}/llms_py.egg-info/not-zip-safe +0 -0
  38. {llms_py-2.0.8 → llms_py-2.0.10}/llms_py.egg-info/requires.txt +0 -0
  39. {llms_py-2.0.8 → llms_py-2.0.10}/llms_py.egg-info/top_level.txt +0 -0
  40. {llms_py-2.0.8 → llms_py-2.0.10}/requirements.txt +0 -0
  41. {llms_py-2.0.8 → llms_py-2.0.10}/setup.cfg +0 -0
  42. {llms_py-2.0.8 → llms_py-2.0.10}/ui/App.mjs +0 -0
  43. {llms_py-2.0.8 → llms_py-2.0.10}/ui/fav.svg +0 -0
  44. {llms_py-2.0.8 → llms_py-2.0.10}/ui/lib/highlight.min.mjs +0 -0
  45. {llms_py-2.0.8 → llms_py-2.0.10}/ui/lib/idb.min.mjs +0 -0
  46. {llms_py-2.0.8 → llms_py-2.0.10}/ui/lib/marked.min.mjs +0 -0
  47. /llms_py-2.0.8/ui/lib/servicestack-client.min.mjs → /llms_py-2.0.10/ui/lib/servicestack-client.mjs +0 -0
  48. {llms_py-2.0.8 → llms_py-2.0.10}/ui/lib/vue-router.min.mjs +0 -0
  49. {llms_py-2.0.8 → llms_py-2.0.10}/ui/lib/vue.min.mjs +0 -0
  50. {llms_py-2.0.8 → llms_py-2.0.10}/ui/lib/vue.mjs +0 -0
  51. {llms_py-2.0.8 → llms_py-2.0.10}/ui.json +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llms-py
3
- Version: 2.0.8
3
+ Version: 2.0.10
4
4
  Summary: A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers
5
5
  Home-page: https://github.com/ServiceStack/llms
6
6
  Author: ServiceStack
@@ -51,7 +51,7 @@ Configure additional providers and models in [llms.json](llms.json)
51
51
  ## Features
52
52
 
53
53
  - **Lightweight**: Single [llms.py](llms.py) Python file with single `aiohttp` dependency
54
- - **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Mistral
54
+ - **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Z.ai, Mistral
55
55
  - **OpenAI-Compatible API**: Works with any client that supports OpenAI's chat completion API
56
56
  - **Configuration Management**: Easy provider enable/disable and configuration management
57
57
  - **CLI Interface**: Simple command-line interface for quick interactions
@@ -510,7 +510,52 @@ llms --default grok-4
510
510
 
511
511
  # Update llms.py to latest version
512
512
  llms --update
513
- ```
513
+
514
+ # Pass custom parameters to chat request (URL-encoded)
515
+ llms --args "temperature=0.7&seed=111" "What is 2+2?"
516
+
517
+ # Multiple parameters with different types
518
+ llms --args "temperature=0.5&max_completion_tokens=50" "Tell me a joke"
519
+
520
+ # URL-encoded special characters (stop sequences)
521
+ llms --args "stop=Two,Words" "Count to 5"
522
+
523
+ # Combine with other options
524
+ llms --system "You are helpful" --args "temperature=0.3" --raw "Hello"
525
+ ```
526
+
527
+ #### Custom Parameters with `--args`
528
+
529
+ The `--args` option allows you to pass URL-encoded parameters to customize the chat request sent to LLM providers:
530
+
531
+ **Parameter Types:**
532
+ - **Floats**: `temperature=0.7`, `frequency_penalty=0.2`
533
+ - **Integers**: `max_completion_tokens=100`
534
+ - **Booleans**: `store=true`, `verbose=false`, `logprobs=true`
535
+ - **Strings**: `stop=one`
536
+ - **Lists**: `stop=two,words`
537
+
538
+ **Common Parameters:**
539
+ - `temperature`: Controls randomness (0.0 to 2.0)
540
+ - `max_completion_tokens`: Maximum tokens in response
541
+ - `seed`: For reproducible outputs
542
+ - `top_p`: Nucleus sampling parameter
543
+ - `stop`: Stop sequences (URL-encode special chars)
544
+ - `store`: Whether or not to store the output
545
+ - `frequency_penalty`: Penalize new tokens based on frequency
546
+ - `presence_penalty`: Penalize new tokens based on presence
547
+ - `logprobs`: Include log probabilities in response
548
+ - `parallel_tool_calls`: Enable parallel tool calls
549
+ - `prompt_cache_key`: Cache key for prompt
550
+ - `reasoning_effort`: Reasoning effort (low, medium, high, *minimal, *none, *default)
551
+ - `safety_identifier`: A string that uniquely identifies each user
552
+ - `seed`: For reproducible outputs
553
+ - `service_tier`: Service tier (free, standard, premium, *default)
554
+ - `top_logprobs`: Number of top logprobs to return
555
+ - `top_p`: Nucleus sampling parameter
556
+ - `verbosity`: Verbosity level (0, 1, 2, 3, *default)
557
+ - `enable_thinking`: Enable thinking mode (Qwen)
558
+ - `stream`: Enable streaming responses
514
559
 
515
560
  ### Default Model Configuration
516
561
 
@@ -558,6 +603,42 @@ llms "Explain quantum computing" | glow
558
603
 
559
604
  ## Supported Providers
560
605
 
606
+ Any OpenAI-compatible providers and their models can be added by configuring them in [llms.json](./llms.json). By default only AI Providers with free tiers are enabled which will only be "available" if their API Key is set.
607
+
608
+ You can list the available providers, their models and which are enabled or disabled with:
609
+
610
+ ```bash
611
+ llms ls
612
+ ```
613
+
614
+ They can be enabled/disabled in your `llms.json` file or with:
615
+
616
+ ```bash
617
+ llms --enable <provider>
618
+ llms --disable <provider>
619
+ ```
620
+
621
+ For a provider to be available, they also require their API Key configured in either your Environment Variables
622
+ or directly in your `llms.json`.
623
+
624
+ ### Environment Variables
625
+
626
+ | Provider | Variable | Description | Example |
627
+ |-----------------|---------------------------|---------------------|---------|
628
+ | openrouter_free | `OPENROUTER_FREE_API_KEY` | OpenRouter FREE models API key | `sk-or-...` |
629
+ | groq | `GROQ_API_KEY` | Groq API key | `gsk_...` |
630
+ | google_free | `GOOGLE_FREE_API_KEY` | Google FREE API key | `AIza...` |
631
+ | codestral | `CODESTRAL_API_KEY` | Codestral API key | `...` |
632
+ | ollama | N/A | No API key required | |
633
+ | openrouter | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
634
+ | google | `GOOGLE_API_KEY` | Google API key | `AIza...` |
635
+ | anthropic | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
636
+ | openai | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
637
+ | grok | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
638
+ | qwen | `DASHSCOPE_API_KEY` | Qwen (Alibaba) API key | `sk-...` |
639
+ | z.ai | `ZAI_API_KEY` | Z.ai API key | `sk-...` |
640
+ | mistral | `MISTRAL_API_KEY` | Mistral API key | `...` |
641
+
561
642
  ### OpenAI
562
643
  - **Type**: `OpenAiProvider`
563
644
  - **Models**: GPT-5, GPT-5 Codex, GPT-4o, GPT-4o-mini, o3, etc.
@@ -588,6 +669,26 @@ export GOOGLE_API_KEY="your-key"
588
669
  llms --enable google_free
589
670
  ```
590
671
 
672
+ ### OpenRouter
673
+ - **Type**: `OpenAiProvider`
674
+ - **Models**: 100+ models from various providers
675
+ - **Features**: Access to latest models, free tier available
676
+
677
+ ```bash
678
+ export OPENROUTER_API_KEY="your-key"
679
+ llms --enable openrouter
680
+ ```
681
+
682
+ ### Grok (X.AI)
683
+ - **Type**: `OpenAiProvider`
684
+ - **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
685
+ - **Features**: Real-time information, humor, uncensored responses
686
+
687
+ ```bash
688
+ export GROK_API_KEY="your-key"
689
+ llms --enable grok
690
+ ```
691
+
591
692
  ### Groq
592
693
  - **Type**: `OpenAiProvider`
593
694
  - **Models**: Llama 3.3, Gemma 2, Kimi K2, etc.
@@ -608,44 +709,44 @@ llms --enable groq
608
709
  llms --enable ollama
609
710
  ```
610
711
 
611
- ### OpenRouter
712
+ ### Qwen (Alibaba Cloud)
612
713
  - **Type**: `OpenAiProvider`
613
- - **Models**: 100+ models from various providers
614
- - **Features**: Access to latest models, free tier available
714
+ - **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
715
+ - **Features**: Multilingual, vision models, coding, reasoning, audio processing
615
716
 
616
717
  ```bash
617
- export OPENROUTER_API_KEY="your-key"
618
- llms --enable openrouter
718
+ export DASHSCOPE_API_KEY="your-key"
719
+ llms --enable qwen
619
720
  ```
620
721
 
621
- ### Mistral
722
+ ### Z.ai
622
723
  - **Type**: `OpenAiProvider`
623
- - **Models**: Mistral Large, Codestral, Pixtral, etc.
624
- - **Features**: Code generation, multilingual
724
+ - **Models**: GLM-4.6, GLM-4.5, GLM-4.5-air, GLM-4.5-x, GLM-4.5-airx, GLM-4.5-flash, GLM-4:32b
725
+ - **Features**: Advanced language models with strong reasoning capabilities
625
726
 
626
727
  ```bash
627
- export MISTRAL_API_KEY="your-key"
628
- llms --enable mistral
728
+ export ZAI_API_KEY="your-key"
729
+ llms --enable z.ai
629
730
  ```
630
731
 
631
- ### Grok (X.AI)
732
+ ### Mistral
632
733
  - **Type**: `OpenAiProvider`
633
- - **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
634
- - **Features**: Real-time information, humor, uncensored responses
734
+ - **Models**: Mistral Large, Codestral, Pixtral, etc.
735
+ - **Features**: Code generation, multilingual
635
736
 
636
737
  ```bash
637
- export GROK_API_KEY="your-key"
638
- llms --enable grok
738
+ export MISTRAL_API_KEY="your-key"
739
+ llms --enable mistral
639
740
  ```
640
741
 
641
- ### Qwen (Alibaba Cloud)
742
+ ### Codestral
642
743
  - **Type**: `OpenAiProvider`
643
- - **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
644
- - **Features**: Multilingual, vision models, coding, reasoning, audio processing
744
+ - **Models**: Codestral
745
+ - **Features**: Code generation
645
746
 
646
747
  ```bash
647
- export DASHSCOPE_API_KEY="your-key"
648
- llms --enable qwen
748
+ export CODESTRAL_API_KEY="your-key"
749
+ llms --enable codestral
649
750
  ```
650
751
 
651
752
  ## Model Routing
@@ -654,22 +755,6 @@ The tool automatically routes requests to the first available provider that supp
654
755
 
655
756
  Example: If both OpenAI and OpenRouter support `kimi-k2`, the request will first try OpenRouter (free), then fall back to Groq than OpenRouter (Paid) if requests fails.
656
757
 
657
- ## Environment Variables
658
-
659
- | Variable | Description | Example |
660
- |----------|-------------|---------|
661
- | `LLMS_CONFIG_PATH` | Custom config file path | `/path/to/llms.json` |
662
- | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
663
- | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
664
- | `GOOGLE_API_KEY` | Google API key | `AIza...` |
665
- | `GROQ_API_KEY` | Groq API key | `gsk_...` |
666
- | `MISTRAL_API_KEY` | Mistral API key | `...` |
667
- | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
668
- | `OPENROUTER_FREE_API_KEY` | OpenRouter free tier key | `sk-or-...` |
669
- | `CODESTRAL_API_KEY` | Codestral API key | `...` |
670
- | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
671
- | `DASHSCOPE_API_KEY` | Qwen (Alibaba Cloud) API key | `sk-...` |
672
-
673
758
  ## Configuration Examples
674
759
 
675
760
  ### Minimal Configuration
@@ -11,7 +11,7 @@ Configure additional providers and models in [llms.json](llms.json)
11
11
  ## Features
12
12
 
13
13
  - **Lightweight**: Single [llms.py](llms.py) Python file with single `aiohttp` dependency
14
- - **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Mistral
14
+ - **Multi-Provider Support**: OpenRouter, Ollama, Anthropic, Google, OpenAI, Grok, Groq, Qwen, Z.ai, Mistral
15
15
  - **OpenAI-Compatible API**: Works with any client that supports OpenAI's chat completion API
16
16
  - **Configuration Management**: Easy provider enable/disable and configuration management
17
17
  - **CLI Interface**: Simple command-line interface for quick interactions
@@ -470,7 +470,52 @@ llms --default grok-4
470
470
 
471
471
  # Update llms.py to latest version
472
472
  llms --update
473
- ```
473
+
474
+ # Pass custom parameters to chat request (URL-encoded)
475
+ llms --args "temperature=0.7&seed=111" "What is 2+2?"
476
+
477
+ # Multiple parameters with different types
478
+ llms --args "temperature=0.5&max_completion_tokens=50" "Tell me a joke"
479
+
480
+ # URL-encoded special characters (stop sequences)
481
+ llms --args "stop=Two,Words" "Count to 5"
482
+
483
+ # Combine with other options
484
+ llms --system "You are helpful" --args "temperature=0.3" --raw "Hello"
485
+ ```
486
+
487
+ #### Custom Parameters with `--args`
488
+
489
+ The `--args` option allows you to pass URL-encoded parameters to customize the chat request sent to LLM providers:
490
+
491
+ **Parameter Types:**
492
+ - **Floats**: `temperature=0.7`, `frequency_penalty=0.2`
493
+ - **Integers**: `max_completion_tokens=100`
494
+ - **Booleans**: `store=true`, `verbose=false`, `logprobs=true`
495
+ - **Strings**: `stop=one`
496
+ - **Lists**: `stop=two,words`
497
+
498
+ **Common Parameters:**
499
+ - `temperature`: Controls randomness (0.0 to 2.0)
500
+ - `max_completion_tokens`: Maximum tokens in response
501
+ - `seed`: For reproducible outputs
502
+ - `top_p`: Nucleus sampling parameter
503
+ - `stop`: Stop sequences (URL-encode special chars)
504
+ - `store`: Whether or not to store the output
505
+ - `frequency_penalty`: Penalize new tokens based on frequency
506
+ - `presence_penalty`: Penalize new tokens based on presence
507
+ - `logprobs`: Include log probabilities in response
508
+ - `parallel_tool_calls`: Enable parallel tool calls
509
+ - `prompt_cache_key`: Cache key for prompt
510
+ - `reasoning_effort`: Reasoning effort (low, medium, high, *minimal, *none, *default)
511
+ - `safety_identifier`: A string that uniquely identifies each user
512
+ - `seed`: For reproducible outputs
513
+ - `service_tier`: Service tier (free, standard, premium, *default)
514
+ - `top_logprobs`: Number of top logprobs to return
515
+ - `top_p`: Nucleus sampling parameter
516
+ - `verbosity`: Verbosity level (0, 1, 2, 3, *default)
517
+ - `enable_thinking`: Enable thinking mode (Qwen)
518
+ - `stream`: Enable streaming responses
474
519
 
475
520
  ### Default Model Configuration
476
521
 
@@ -518,6 +563,42 @@ llms "Explain quantum computing" | glow
518
563
 
519
564
  ## Supported Providers
520
565
 
566
+ Any OpenAI-compatible providers and their models can be added by configuring them in [llms.json](./llms.json). By default only AI Providers with free tiers are enabled which will only be "available" if their API Key is set.
567
+
568
+ You can list the available providers, their models and which are enabled or disabled with:
569
+
570
+ ```bash
571
+ llms ls
572
+ ```
573
+
574
+ They can be enabled/disabled in your `llms.json` file or with:
575
+
576
+ ```bash
577
+ llms --enable <provider>
578
+ llms --disable <provider>
579
+ ```
580
+
581
+ For a provider to be available, they also require their API Key configured in either your Environment Variables
582
+ or directly in your `llms.json`.
583
+
584
+ ### Environment Variables
585
+
586
+ | Provider | Variable | Description | Example |
587
+ |-----------------|---------------------------|---------------------|---------|
588
+ | openrouter_free | `OPENROUTER_FREE_API_KEY` | OpenRouter FREE models API key | `sk-or-...` |
589
+ | groq | `GROQ_API_KEY` | Groq API key | `gsk_...` |
590
+ | google_free | `GOOGLE_FREE_API_KEY` | Google FREE API key | `AIza...` |
591
+ | codestral | `CODESTRAL_API_KEY` | Codestral API key | `...` |
592
+ | ollama | N/A | No API key required | |
593
+ | openrouter | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
594
+ | google | `GOOGLE_API_KEY` | Google API key | `AIza...` |
595
+ | anthropic | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
596
+ | openai | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
597
+ | grok | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
598
+ | qwen | `DASHSCOPE_API_KEY` | Qwen (Alibaba) API key | `sk-...` |
599
+ | z.ai | `ZAI_API_KEY` | Z.ai API key | `sk-...` |
600
+ | mistral | `MISTRAL_API_KEY` | Mistral API key | `...` |
601
+
521
602
  ### OpenAI
522
603
  - **Type**: `OpenAiProvider`
523
604
  - **Models**: GPT-5, GPT-5 Codex, GPT-4o, GPT-4o-mini, o3, etc.
@@ -548,6 +629,26 @@ export GOOGLE_API_KEY="your-key"
548
629
  llms --enable google_free
549
630
  ```
550
631
 
632
+ ### OpenRouter
633
+ - **Type**: `OpenAiProvider`
634
+ - **Models**: 100+ models from various providers
635
+ - **Features**: Access to latest models, free tier available
636
+
637
+ ```bash
638
+ export OPENROUTER_API_KEY="your-key"
639
+ llms --enable openrouter
640
+ ```
641
+
642
+ ### Grok (X.AI)
643
+ - **Type**: `OpenAiProvider`
644
+ - **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
645
+ - **Features**: Real-time information, humor, uncensored responses
646
+
647
+ ```bash
648
+ export GROK_API_KEY="your-key"
649
+ llms --enable grok
650
+ ```
651
+
551
652
  ### Groq
552
653
  - **Type**: `OpenAiProvider`
553
654
  - **Models**: Llama 3.3, Gemma 2, Kimi K2, etc.
@@ -568,44 +669,44 @@ llms --enable groq
568
669
  llms --enable ollama
569
670
  ```
570
671
 
571
- ### OpenRouter
672
+ ### Qwen (Alibaba Cloud)
572
673
  - **Type**: `OpenAiProvider`
573
- - **Models**: 100+ models from various providers
574
- - **Features**: Access to latest models, free tier available
674
+ - **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
675
+ - **Features**: Multilingual, vision models, coding, reasoning, audio processing
575
676
 
576
677
  ```bash
577
- export OPENROUTER_API_KEY="your-key"
578
- llms --enable openrouter
678
+ export DASHSCOPE_API_KEY="your-key"
679
+ llms --enable qwen
579
680
  ```
580
681
 
581
- ### Mistral
682
+ ### Z.ai
582
683
  - **Type**: `OpenAiProvider`
583
- - **Models**: Mistral Large, Codestral, Pixtral, etc.
584
- - **Features**: Code generation, multilingual
684
+ - **Models**: GLM-4.6, GLM-4.5, GLM-4.5-air, GLM-4.5-x, GLM-4.5-airx, GLM-4.5-flash, GLM-4:32b
685
+ - **Features**: Advanced language models with strong reasoning capabilities
585
686
 
586
687
  ```bash
587
- export MISTRAL_API_KEY="your-key"
588
- llms --enable mistral
688
+ export ZAI_API_KEY="your-key"
689
+ llms --enable z.ai
589
690
  ```
590
691
 
591
- ### Grok (X.AI)
692
+ ### Mistral
592
693
  - **Type**: `OpenAiProvider`
593
- - **Models**: Grok-4, Grok-3, Grok-3-mini, Grok-code-fast-1, etc.
594
- - **Features**: Real-time information, humor, uncensored responses
694
+ - **Models**: Mistral Large, Codestral, Pixtral, etc.
695
+ - **Features**: Code generation, multilingual
595
696
 
596
697
  ```bash
597
- export GROK_API_KEY="your-key"
598
- llms --enable grok
698
+ export MISTRAL_API_KEY="your-key"
699
+ llms --enable mistral
599
700
  ```
600
701
 
601
- ### Qwen (Alibaba Cloud)
702
+ ### Codestral
602
703
  - **Type**: `OpenAiProvider`
603
- - **Models**: Qwen3-max, Qwen-max, Qwen-plus, Qwen2.5-VL, QwQ-plus, etc.
604
- - **Features**: Multilingual, vision models, coding, reasoning, audio processing
704
+ - **Models**: Codestral
705
+ - **Features**: Code generation
605
706
 
606
707
  ```bash
607
- export DASHSCOPE_API_KEY="your-key"
608
- llms --enable qwen
708
+ export CODESTRAL_API_KEY="your-key"
709
+ llms --enable codestral
609
710
  ```
610
711
 
611
712
  ## Model Routing
@@ -614,22 +715,6 @@ The tool automatically routes requests to the first available provider that supp
614
715
 
615
716
  Example: If both OpenAI and OpenRouter support `kimi-k2`, the request will first try OpenRouter (free), then fall back to Groq than OpenRouter (Paid) if requests fails.
616
717
 
617
- ## Environment Variables
618
-
619
- | Variable | Description | Example |
620
- |----------|-------------|---------|
621
- | `LLMS_CONFIG_PATH` | Custom config file path | `/path/to/llms.json` |
622
- | `OPENAI_API_KEY` | OpenAI API key | `sk-...` |
623
- | `ANTHROPIC_API_KEY` | Anthropic API key | `sk-ant-...` |
624
- | `GOOGLE_API_KEY` | Google API key | `AIza...` |
625
- | `GROQ_API_KEY` | Groq API key | `gsk_...` |
626
- | `MISTRAL_API_KEY` | Mistral API key | `...` |
627
- | `OPENROUTER_API_KEY` | OpenRouter API key | `sk-or-...` |
628
- | `OPENROUTER_FREE_API_KEY` | OpenRouter free tier key | `sk-or-...` |
629
- | `CODESTRAL_API_KEY` | Codestral API key | `...` |
630
- | `GROK_API_KEY` | Grok (X.AI) API key | `xai-...` |
631
- | `DASHSCOPE_API_KEY` | Qwen (Alibaba Cloud) API key | `sk-...` |
632
-
633
718
  ## Configuration Examples
634
719
 
635
720
  ### Minimal Configuration
@@ -0,0 +1,80 @@
1
+ <html>
2
+ <head>
3
+ <title>llms.py</title>
4
+ <link rel="stylesheet" href="/ui/typography.css">
5
+ <link rel="stylesheet" href="/ui/app.css">
6
+ <link rel="icon" type="image/svg" href="/ui/fav.svg">
7
+ <style>
8
+ [type='button'],button[type='submit']{cursor:pointer}
9
+ [type='checkbox'].switch:checked:hover,
10
+ [type='checkbox'].switch:checked:focus,
11
+ [type='checkbox'].switch:checked,
12
+ [type='checkbox'].switch:focus,
13
+ [type='checkbox'].switch
14
+ {
15
+ border: none;
16
+ background: none;
17
+ outline: none;
18
+ box-shadow: none;
19
+ cursor: pointer;
20
+ }
21
+ </style>
22
+ </head>
23
+ <script type="importmap">
24
+ {
25
+ "imports": {
26
+ "vue": "/ui/lib/vue.min.mjs",
27
+ "vue-router": "/ui/lib/vue-router.min.mjs",
28
+ "@servicestack/client": "/ui/lib/servicestack-client.mjs",
29
+ "@servicestack/vue": "/ui/lib/servicestack-vue.mjs",
30
+ "idb": "/ui/lib/idb.min.mjs",
31
+ "marked": "/ui/lib/marked.min.mjs",
32
+ "highlight.js": "/ui/lib/highlight.min.mjs"
33
+ }
34
+ }
35
+ </script>
36
+ <body>
37
+ <div id="app"></div>
38
+ </body>
39
+ <script type="module">
40
+ import { createApp, defineAsyncComponent } from 'vue'
41
+ import { createWebHistory, createRouter } from "vue-router"
42
+ import ServiceStackVue from "@servicestack/vue"
43
+ import App from '/ui/App.mjs'
44
+ import ai from '/ui/ai.mjs'
45
+ import SettingsDialog from '/ui/SettingsDialog.mjs'
46
+
47
+ const { config, models } = await ai.init()
48
+ const MainComponent = defineAsyncComponent(() => import(ai.base + '/ui/Main.mjs'))
49
+ const RecentsComponent = defineAsyncComponent(() => import(ai.base + '/ui/Recents.mjs'))
50
+
51
+ const Components = {
52
+ SettingsDialog,
53
+ }
54
+
55
+ const routes = [
56
+ { path: '/', component: MainComponent },
57
+ { path: '/c/:id', component: MainComponent },
58
+ { path: '/recents', component: RecentsComponent },
59
+ { path: '/:fallback(.*)*', component: MainComponent }
60
+ ]
61
+ routes.forEach(r => r.path = ai.base + r.path)
62
+ const router = createRouter({
63
+ history: createWebHistory(),
64
+ routes,
65
+ })
66
+ const app = createApp(App, { config, models })
67
+ app.use(router)
68
+ app.use(ServiceStackVue)
69
+ app.provide('ai', ai)
70
+ app.provide('config', config)
71
+ app.provide('models', models)
72
+ Object.keys(Components).forEach(name => {
73
+ app.component(name, Components[name])
74
+ })
75
+
76
+ window.ai = app.config.globalProperties.$ai = ai
77
+
78
+ app.mount('#app')
79
+ </script>
80
+ </html>
@@ -9,7 +9,12 @@
9
9
  "messages": [
10
10
  {
11
11
  "role": "user",
12
- "content": ""
12
+ "content": [
13
+ {
14
+ "type": "text",
15
+ "text": ""
16
+ }
17
+ ]
13
18
  }
14
19
  ]
15
20
  },
@@ -90,10 +95,8 @@
90
95
  "deepseek-r1:671b": "deepseek/deepseek-r1-0528:free",
91
96
  "gemini-2.0-flash": "google/gemini-2.0-flash-exp:free",
92
97
  "glm-4.5-air": "z-ai/glm-4.5-air:free",
93
- "grok-4-fast": "x-ai/grok-4-fast:free",
94
98
  "mai-ds-r1": "microsoft/mai-ds-r1:free",
95
99
  "llama3.3:70b": "meta-llama/llama-3.3-70b-instruct:free",
96
- "kimi-k2": "moonshotai/kimi-k2:free",
97
100
  "nemotron-nano:9b": "nvidia/nemotron-nano-9b-v2:free",
98
101
  "deepseek-r1-distill-llama:70b": "deepseek/deepseek-r1-distill-llama-70b:free",
99
102
  "gpt-oss:20b": "openai/gpt-oss-20b:free",
@@ -102,7 +105,6 @@
102
105
  "devstral-small": "mistralai/devstral-small-2505:free",
103
106
  "venice-uncensored:24b": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
104
107
  "llama3.3:8b": "meta-llama/llama-3.3-8b-instruct:free",
105
- "llama3.1:405b": "meta-llama/llama-3.1-405b-instruct:free",
106
108
  "kimi-dev:72b": "moonshotai/kimi-dev-72b:free",
107
109
  "gemma3:27b": "google/gemma-3-27b-it:free",
108
110
  "qwen3-coder": "qwen/qwen3-coder:free",
@@ -171,7 +173,7 @@
171
173
  }
172
174
  },
173
175
  "ollama": {
174
- "enabled": false,
176
+ "enabled": true,
175
177
  "type": "OllamaProvider",
176
178
  "base_url": "http://localhost:11434",
177
179
  "models": {},
@@ -389,7 +391,8 @@
389
391
  "qwen2.5-vl:7b": "qwen2.5-vl-7b-instruct",
390
392
  "qwen2.5-vl:3b": "qwen2.5-vl-3b-instruct",
391
393
  "qwen2.5-omni:7b": "qwen2.5-omni-7b"
392
- }
394
+ },
395
+ "enable_thinking": false
393
396
  },
394
397
  "z.ai": {
395
398
  "enabled": false,
@@ -404,7 +407,8 @@
404
407
  "glm-4.5-airx": "glm-4.5-airx",
405
408
  "glm-4.5-flash": "glm-4.5-flash",
406
409
  "glm-4:32b": "glm-4-32b-0414-128k"
407
- }
410
+ },
411
+ "temperature": 0.7
408
412
  },
409
413
  "mistral": {
410
414
  "enabled": false,
@@ -417,20 +421,22 @@
417
421
  "devstral-medium": "devstral-medium-2507",
418
422
  "codestral:22b": "codestral-latest",
419
423
  "mistral-ocr": "mistral-ocr-latest",
420
- "voxtral-mini": "voxtral-mini-latest",
421
424
  "mistral-small3.2:24b": "mistral-small-latest",
422
425
  "magistral-small": "magistral-small-latest",
423
426
  "devstral-small": "devstral-small-2507",
424
427
  "voxtral-small": "voxtral-small-latest",
428
+ "voxtral-mini": "voxtral-mini-latest",
429
+ "codestral-embed": "codestral-embed-2505",
430
+ "mistral-embed": "mistral-embed",
425
431
  "mistral-large:123b": "mistral-large-latest",
426
432
  "pixtral-large:124b": "pixtral-large-latest",
427
433
  "pixtral:12b": "pixtral-12b",
428
- "mistral-nemo:12b": "mistral-nemo",
434
+ "mistral-nemo:12b": "open-mistral-nemo",
429
435
  "mistral-saba": "mistral-saba-latest",
430
436
  "mistral:7b": "open-mistral-7b",
431
437
  "mixtral:8x7b": "open-mixtral-8x7b",
432
438
  "mixtral:8x22b": "open-mixtral-8x22b",
433
- "ministral:8b": "ministral-3b-latest",
439
+ "ministral:8b": "ministral-8b-latest",
434
440
  "ministral:3b": "ministral-3b-latest"
435
441
  }
436
442
  }