@botpress/cognitive 0.1.41 → 0.1.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +7 -7
- package/dist/index.cjs +1132 -23
- package/dist/index.cjs.map +4 -4
- package/dist/index.d.ts +26 -6
- package/dist/index.mjs +1130 -22
- package/dist/index.mjs.map +4 -4
- package/package.json +3 -2
- package/refresh-models.ts +100 -0
package/dist/index.mjs
CHANGED
|
@@ -630,9 +630,1101 @@ var getExtendedClient = (_client) => {
|
|
|
630
630
|
};
|
|
631
631
|
};
|
|
632
632
|
|
|
633
|
-
// src/
|
|
633
|
+
// src/cognitive-v2/index.ts
|
|
634
634
|
var import_exponential_backoff = __toESM(require_backoff());
|
|
635
635
|
import axios from "axios";
|
|
636
|
+
|
|
637
|
+
// src/cognitive-v2/models.ts
|
|
638
|
+
var models = {
|
|
639
|
+
"openai:gpt-5-2025-08-07": {
|
|
640
|
+
id: "openai:gpt-5-2025-08-07",
|
|
641
|
+
name: "GPT-5",
|
|
642
|
+
description: "GPT-5 is OpenAI's latest and most advanced AI model. It is a reasoning model that chooses the best way to respond based on task complexity and user intent. GPT-5 delivers expert-level performance across coding, math, writing, health, and visual perception, with improved accuracy, speed, and reduced hallucinations. It excels in complex tasks, long-context understanding, multimodal inputs (text and images), and safe, nuanced responses.",
|
|
643
|
+
input: {
|
|
644
|
+
maxTokens: 4e5,
|
|
645
|
+
costPer1MTokens: 1.25
|
|
646
|
+
},
|
|
647
|
+
output: {
|
|
648
|
+
maxTokens: 128e3,
|
|
649
|
+
costPer1MTokens: 10
|
|
650
|
+
},
|
|
651
|
+
tags: ["recommended", "reasoning", "general-purpose"],
|
|
652
|
+
lifecycle: "live"
|
|
653
|
+
},
|
|
654
|
+
"openai:gpt-5-mini-2025-08-07": {
|
|
655
|
+
id: "openai:gpt-5-mini-2025-08-07",
|
|
656
|
+
name: "GPT-5 Mini",
|
|
657
|
+
description: "GPT-5 Mini is a lightweight and cost-effective version of GPT-5, optimized for applications where speed and efficiency matter more than full advanced capabilities. It is designed for cost-sensitive use cases such as chatbots, content generation, and high-volume usage, striking a balance between performance and affordability, making it suitable for simpler tasks that do not require deep multi-step reasoning or the full reasoning power of GPT-5",
|
|
658
|
+
input: {
|
|
659
|
+
maxTokens: 4e5,
|
|
660
|
+
costPer1MTokens: 0.25
|
|
661
|
+
},
|
|
662
|
+
output: {
|
|
663
|
+
maxTokens: 128e3,
|
|
664
|
+
costPer1MTokens: 2
|
|
665
|
+
},
|
|
666
|
+
tags: ["recommended", "reasoning", "general-purpose"],
|
|
667
|
+
lifecycle: "live"
|
|
668
|
+
},
|
|
669
|
+
"openai:gpt-5-nano-2025-08-07": {
|
|
670
|
+
id: "openai:gpt-5-nano-2025-08-07",
|
|
671
|
+
name: "GPT-5 Nano",
|
|
672
|
+
description: "GPT-5 Nano is an ultra-lightweight version of GPT-5 optimized for speed and very low latency, making it ideal for use cases like simple chatbots, basic content generation, summarization, and classification tasks.",
|
|
673
|
+
input: {
|
|
674
|
+
maxTokens: 4e5,
|
|
675
|
+
costPer1MTokens: 0.05
|
|
676
|
+
},
|
|
677
|
+
output: {
|
|
678
|
+
maxTokens: 128e3,
|
|
679
|
+
costPer1MTokens: 0.4
|
|
680
|
+
},
|
|
681
|
+
tags: ["low-cost", "reasoning", "general-purpose"],
|
|
682
|
+
lifecycle: "live"
|
|
683
|
+
},
|
|
684
|
+
"openai:o4-mini-2025-04-16": {
|
|
685
|
+
id: "openai:o4-mini-2025-04-16",
|
|
686
|
+
name: "GPT o4-mini",
|
|
687
|
+
description: "o4-mini is OpenAI's latest small o-series model. It's optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks.",
|
|
688
|
+
input: {
|
|
689
|
+
maxTokens: 2e5,
|
|
690
|
+
costPer1MTokens: 1.1
|
|
691
|
+
},
|
|
692
|
+
output: {
|
|
693
|
+
maxTokens: 1e5,
|
|
694
|
+
costPer1MTokens: 4.4
|
|
695
|
+
},
|
|
696
|
+
tags: ["reasoning", "vision", "coding"],
|
|
697
|
+
lifecycle: "live"
|
|
698
|
+
},
|
|
699
|
+
"openai:o3-2025-04-16": {
|
|
700
|
+
id: "openai:o3-2025-04-16",
|
|
701
|
+
name: "GPT o3",
|
|
702
|
+
description: "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following.",
|
|
703
|
+
input: {
|
|
704
|
+
maxTokens: 2e5,
|
|
705
|
+
costPer1MTokens: 2
|
|
706
|
+
},
|
|
707
|
+
output: {
|
|
708
|
+
maxTokens: 1e5,
|
|
709
|
+
costPer1MTokens: 8
|
|
710
|
+
},
|
|
711
|
+
tags: ["reasoning", "vision", "coding"],
|
|
712
|
+
lifecycle: "live"
|
|
713
|
+
},
|
|
714
|
+
"openai:gpt-4.1-2025-04-14": {
|
|
715
|
+
id: "openai:gpt-4.1-2025-04-14",
|
|
716
|
+
name: "GPT 4.1",
|
|
717
|
+
description: "GPT 4.1 is our flagship model for complex tasks. It is well suited for problem solving across domains. The knowledge cutoff is June 2024.",
|
|
718
|
+
input: {
|
|
719
|
+
maxTokens: 1047576,
|
|
720
|
+
costPer1MTokens: 2
|
|
721
|
+
},
|
|
722
|
+
output: {
|
|
723
|
+
maxTokens: 32768,
|
|
724
|
+
costPer1MTokens: 8
|
|
725
|
+
},
|
|
726
|
+
tags: ["recommended", "vision", "general-purpose"],
|
|
727
|
+
lifecycle: "live"
|
|
728
|
+
},
|
|
729
|
+
"openai:gpt-4.1-mini-2025-04-14": {
|
|
730
|
+
id: "openai:gpt-4.1-mini-2025-04-14",
|
|
731
|
+
name: "GPT 4.1 Mini",
|
|
732
|
+
description: "GPT 4.1 mini provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases. The knowledge cutoff is June 2024.",
|
|
733
|
+
input: {
|
|
734
|
+
maxTokens: 1047576,
|
|
735
|
+
costPer1MTokens: 0.4
|
|
736
|
+
},
|
|
737
|
+
output: {
|
|
738
|
+
maxTokens: 32768,
|
|
739
|
+
costPer1MTokens: 1.6
|
|
740
|
+
},
|
|
741
|
+
tags: ["recommended", "vision", "general-purpose"],
|
|
742
|
+
lifecycle: "live"
|
|
743
|
+
},
|
|
744
|
+
"openai:gpt-4.1-nano-2025-04-14": {
|
|
745
|
+
id: "openai:gpt-4.1-nano-2025-04-14",
|
|
746
|
+
name: "GPT 4.1 Nano",
|
|
747
|
+
description: "GPT-4.1 nano is the fastest, most cost-effective GPT 4.1 model. The knowledge cutoff is June 2024.",
|
|
748
|
+
input: {
|
|
749
|
+
maxTokens: 1047576,
|
|
750
|
+
costPer1MTokens: 0.1
|
|
751
|
+
},
|
|
752
|
+
output: {
|
|
753
|
+
maxTokens: 32768,
|
|
754
|
+
costPer1MTokens: 0.4
|
|
755
|
+
},
|
|
756
|
+
tags: ["low-cost", "vision", "general-purpose"],
|
|
757
|
+
lifecycle: "live"
|
|
758
|
+
},
|
|
759
|
+
"openai:o3-mini-2025-01-31": {
|
|
760
|
+
id: "openai:o3-mini-2025-01-31",
|
|
761
|
+
name: "GPT o3-mini",
|
|
762
|
+
description: "o3-mini is the most recent small reasoning model from OpenAI, providing high intelligence at the same cost and latency targets of o1-mini. Also supports key developer features like Structured Outputs and function calling.",
|
|
763
|
+
input: {
|
|
764
|
+
maxTokens: 2e5,
|
|
765
|
+
costPer1MTokens: 1.1
|
|
766
|
+
},
|
|
767
|
+
output: {
|
|
768
|
+
maxTokens: 1e5,
|
|
769
|
+
costPer1MTokens: 4.4
|
|
770
|
+
},
|
|
771
|
+
tags: ["reasoning", "general-purpose", "coding"],
|
|
772
|
+
lifecycle: "live"
|
|
773
|
+
},
|
|
774
|
+
"openai:o1-2024-12-17": {
|
|
775
|
+
id: "openai:o1-2024-12-17",
|
|
776
|
+
name: "GPT o1",
|
|
777
|
+
description: "The o1 model is designed to solve hard problems across domains. Trained with reinforcement learning to perform complex reasoning with a long internal chain of thought.",
|
|
778
|
+
input: {
|
|
779
|
+
maxTokens: 2e5,
|
|
780
|
+
costPer1MTokens: 15
|
|
781
|
+
},
|
|
782
|
+
output: {
|
|
783
|
+
maxTokens: 1e5,
|
|
784
|
+
costPer1MTokens: 60
|
|
785
|
+
},
|
|
786
|
+
tags: ["reasoning", "vision", "general-purpose"],
|
|
787
|
+
lifecycle: "live"
|
|
788
|
+
},
|
|
789
|
+
"openai:o1-mini-2024-09-12": {
|
|
790
|
+
id: "openai:o1-mini-2024-09-12",
|
|
791
|
+
name: "GPT o1-mini",
|
|
792
|
+
description: "The o1-mini model is a fast and affordable reasoning model for specialized tasks. Trained with reinforcement learning to perform complex reasoning.",
|
|
793
|
+
input: {
|
|
794
|
+
maxTokens: 128e3,
|
|
795
|
+
costPer1MTokens: 1.1
|
|
796
|
+
},
|
|
797
|
+
output: {
|
|
798
|
+
maxTokens: 65536,
|
|
799
|
+
costPer1MTokens: 4.4
|
|
800
|
+
},
|
|
801
|
+
tags: ["reasoning", "vision", "general-purpose"],
|
|
802
|
+
lifecycle: "live"
|
|
803
|
+
},
|
|
804
|
+
"openai:gpt-4o-mini-2024-07-18": {
|
|
805
|
+
id: "openai:gpt-4o-mini-2024-07-18",
|
|
806
|
+
name: "GPT-4o Mini",
|
|
807
|
+
description: "GPT-4o mini is OpenAI's most advanced model in the small models category, and their cheapest model yet. Multimodal with higher intelligence than gpt-3.5-turbo but just as fast.",
|
|
808
|
+
input: {
|
|
809
|
+
maxTokens: 128e3,
|
|
810
|
+
costPer1MTokens: 0.15
|
|
811
|
+
},
|
|
812
|
+
output: {
|
|
813
|
+
maxTokens: 16384,
|
|
814
|
+
costPer1MTokens: 0.6
|
|
815
|
+
},
|
|
816
|
+
tags: ["recommended", "vision", "low-cost", "general-purpose", "function-calling"],
|
|
817
|
+
lifecycle: "live"
|
|
818
|
+
},
|
|
819
|
+
"openai:gpt-4o-2024-11-20": {
|
|
820
|
+
id: "openai:gpt-4o-2024-11-20",
|
|
821
|
+
name: "GPT-4o (November 2024)",
|
|
822
|
+
description: "GPT-4o is OpenAI's most advanced model. Multimodal with the same high intelligence as GPT-4 Turbo but cheaper and more efficient.",
|
|
823
|
+
input: {
|
|
824
|
+
maxTokens: 128e3,
|
|
825
|
+
costPer1MTokens: 2.5
|
|
826
|
+
},
|
|
827
|
+
output: {
|
|
828
|
+
maxTokens: 16384,
|
|
829
|
+
costPer1MTokens: 10
|
|
830
|
+
},
|
|
831
|
+
tags: ["recommended", "vision", "general-purpose", "coding", "agents", "function-calling"],
|
|
832
|
+
lifecycle: "live"
|
|
833
|
+
},
|
|
834
|
+
"openai:gpt-4o-2024-08-06": {
|
|
835
|
+
id: "openai:gpt-4o-2024-08-06",
|
|
836
|
+
name: "GPT-4o (August 2024)",
|
|
837
|
+
description: "GPT-4o is OpenAI's most advanced model. Multimodal with the same high intelligence as GPT-4 Turbo but cheaper and more efficient.",
|
|
838
|
+
input: {
|
|
839
|
+
maxTokens: 128e3,
|
|
840
|
+
costPer1MTokens: 2.5
|
|
841
|
+
},
|
|
842
|
+
output: {
|
|
843
|
+
maxTokens: 16384,
|
|
844
|
+
costPer1MTokens: 10
|
|
845
|
+
},
|
|
846
|
+
tags: ["deprecated", "vision", "general-purpose", "coding", "agents", "function-calling"],
|
|
847
|
+
lifecycle: "deprecated"
|
|
848
|
+
},
|
|
849
|
+
"openai:gpt-4o-2024-05-13": {
|
|
850
|
+
id: "openai:gpt-4o-2024-05-13",
|
|
851
|
+
name: "GPT-4o (May 2024)",
|
|
852
|
+
description: "GPT-4o is OpenAI's most advanced model. Multimodal with the same high intelligence as GPT-4 Turbo but cheaper and more efficient.",
|
|
853
|
+
input: {
|
|
854
|
+
maxTokens: 128e3,
|
|
855
|
+
costPer1MTokens: 5
|
|
856
|
+
},
|
|
857
|
+
output: {
|
|
858
|
+
maxTokens: 4096,
|
|
859
|
+
costPer1MTokens: 15
|
|
860
|
+
},
|
|
861
|
+
tags: ["deprecated", "vision", "general-purpose", "coding", "agents", "function-calling"],
|
|
862
|
+
lifecycle: "deprecated"
|
|
863
|
+
},
|
|
864
|
+
"openai:gpt-4-turbo-2024-04-09": {
|
|
865
|
+
id: "openai:gpt-4-turbo-2024-04-09",
|
|
866
|
+
name: "GPT-4 Turbo",
|
|
867
|
+
description: "GPT-4 is a large multimodal model that can solve difficult problems with greater accuracy than previous models, thanks to its broader general knowledge and advanced reasoning capabilities.",
|
|
868
|
+
input: {
|
|
869
|
+
maxTokens: 128e3,
|
|
870
|
+
costPer1MTokens: 10
|
|
871
|
+
},
|
|
872
|
+
output: {
|
|
873
|
+
maxTokens: 4096,
|
|
874
|
+
costPer1MTokens: 30
|
|
875
|
+
},
|
|
876
|
+
tags: ["deprecated", "general-purpose", "coding", "agents", "function-calling"],
|
|
877
|
+
lifecycle: "deprecated"
|
|
878
|
+
},
|
|
879
|
+
"openai:gpt-3.5-turbo-0125": {
|
|
880
|
+
id: "openai:gpt-3.5-turbo-0125",
|
|
881
|
+
name: "GPT-3.5 Turbo",
|
|
882
|
+
description: "GPT-3.5 Turbo can understand and generate natural language or code and has been optimized for chat but works well for non-chat tasks as well.",
|
|
883
|
+
input: {
|
|
884
|
+
maxTokens: 128e3,
|
|
885
|
+
costPer1MTokens: 0.5
|
|
886
|
+
},
|
|
887
|
+
output: {
|
|
888
|
+
maxTokens: 4096,
|
|
889
|
+
costPer1MTokens: 1.5
|
|
890
|
+
},
|
|
891
|
+
tags: ["deprecated", "general-purpose", "low-cost"],
|
|
892
|
+
lifecycle: "deprecated"
|
|
893
|
+
},
|
|
894
|
+
"anthropic:claude-sonnet-4-20250514": {
|
|
895
|
+
id: "anthropic:claude-sonnet-4-20250514",
|
|
896
|
+
name: "Claude Sonnet 4",
|
|
897
|
+
description: "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions.",
|
|
898
|
+
input: {
|
|
899
|
+
maxTokens: 2e5,
|
|
900
|
+
costPer1MTokens: 3
|
|
901
|
+
},
|
|
902
|
+
output: {
|
|
903
|
+
maxTokens: 64e3,
|
|
904
|
+
costPer1MTokens: 15
|
|
905
|
+
},
|
|
906
|
+
tags: ["recommended", "reasoning", "agents", "vision", "general-purpose", "coding"],
|
|
907
|
+
lifecycle: "live"
|
|
908
|
+
},
|
|
909
|
+
"anthropic:claude-sonnet-4-reasoning-20250514": {
|
|
910
|
+
id: "anthropic:claude-sonnet-4-reasoning-20250514",
|
|
911
|
+
name: "Claude Sonnet 4 (Reasoning Mode)",
|
|
912
|
+
description: 'This model uses the "Extended Thinking" mode and will use a significantly higher amount of output tokens than the Standard Mode, so this model should only be used for tasks that actually require it.\n\nClaude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability.',
|
|
913
|
+
input: {
|
|
914
|
+
maxTokens: 2e5,
|
|
915
|
+
costPer1MTokens: 3
|
|
916
|
+
},
|
|
917
|
+
output: {
|
|
918
|
+
maxTokens: 64e3,
|
|
919
|
+
costPer1MTokens: 15
|
|
920
|
+
},
|
|
921
|
+
tags: ["deprecated", "vision", "reasoning", "general-purpose", "agents", "coding"],
|
|
922
|
+
lifecycle: "deprecated"
|
|
923
|
+
},
|
|
924
|
+
"anthropic:claude-3-7-sonnet-20250219": {
|
|
925
|
+
id: "anthropic:claude-3-7-sonnet-20250219",
|
|
926
|
+
name: "Claude 3.7 Sonnet",
|
|
927
|
+
description: "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes.",
|
|
928
|
+
input: {
|
|
929
|
+
maxTokens: 2e5,
|
|
930
|
+
costPer1MTokens: 3
|
|
931
|
+
},
|
|
932
|
+
output: {
|
|
933
|
+
maxTokens: 64e3,
|
|
934
|
+
costPer1MTokens: 15
|
|
935
|
+
},
|
|
936
|
+
tags: ["recommended", "reasoning", "agents", "vision", "general-purpose", "coding"],
|
|
937
|
+
lifecycle: "live"
|
|
938
|
+
},
|
|
939
|
+
"anthropic:claude-3-7-sonnet-reasoning-20250219": {
|
|
940
|
+
id: "anthropic:claude-3-7-sonnet-reasoning-20250219",
|
|
941
|
+
name: "Claude 3.7 Sonnet (Reasoning Mode)",
|
|
942
|
+
description: 'This model uses the "Extended Thinking" mode and will use a significantly higher amount of output tokens than the Standard Mode, so this model should only be used for tasks that actually require it.\n\nClaude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities.',
|
|
943
|
+
input: {
|
|
944
|
+
maxTokens: 2e5,
|
|
945
|
+
costPer1MTokens: 3
|
|
946
|
+
},
|
|
947
|
+
output: {
|
|
948
|
+
maxTokens: 64e3,
|
|
949
|
+
costPer1MTokens: 15
|
|
950
|
+
},
|
|
951
|
+
tags: ["deprecated", "vision", "reasoning", "general-purpose", "agents", "coding"],
|
|
952
|
+
lifecycle: "deprecated"
|
|
953
|
+
},
|
|
954
|
+
"anthropic:claude-3-5-haiku-20241022": {
|
|
955
|
+
id: "anthropic:claude-3-5-haiku-20241022",
|
|
956
|
+
name: "Claude 3.5 Haiku",
|
|
957
|
+
description: "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.",
|
|
958
|
+
input: {
|
|
959
|
+
maxTokens: 2e5,
|
|
960
|
+
costPer1MTokens: 0.8
|
|
961
|
+
},
|
|
962
|
+
output: {
|
|
963
|
+
maxTokens: 8192,
|
|
964
|
+
costPer1MTokens: 4
|
|
965
|
+
},
|
|
966
|
+
tags: ["general-purpose", "low-cost"],
|
|
967
|
+
lifecycle: "live"
|
|
968
|
+
},
|
|
969
|
+
"anthropic:claude-3-5-sonnet-20241022": {
|
|
970
|
+
id: "anthropic:claude-3-5-sonnet-20241022",
|
|
971
|
+
name: "Claude 3.5 Sonnet (October 2024)",
|
|
972
|
+
description: "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at coding, data science, visual processing, and agentic tasks.",
|
|
973
|
+
input: {
|
|
974
|
+
maxTokens: 2e5,
|
|
975
|
+
costPer1MTokens: 3
|
|
976
|
+
},
|
|
977
|
+
output: {
|
|
978
|
+
maxTokens: 8192,
|
|
979
|
+
costPer1MTokens: 15
|
|
980
|
+
},
|
|
981
|
+
tags: ["vision", "general-purpose", "agents", "coding", "function-calling", "storytelling"],
|
|
982
|
+
lifecycle: "live"
|
|
983
|
+
},
|
|
984
|
+
"anthropic:claude-3-5-sonnet-20240620": {
|
|
985
|
+
id: "anthropic:claude-3-5-sonnet-20240620",
|
|
986
|
+
name: "Claude 3.5 Sonnet (June 2024)",
|
|
987
|
+
description: "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at coding, data science, visual processing, and agentic tasks.",
|
|
988
|
+
input: {
|
|
989
|
+
maxTokens: 2e5,
|
|
990
|
+
costPer1MTokens: 3
|
|
991
|
+
},
|
|
992
|
+
output: {
|
|
993
|
+
maxTokens: 4096,
|
|
994
|
+
costPer1MTokens: 15
|
|
995
|
+
},
|
|
996
|
+
tags: ["vision", "general-purpose", "agents", "coding", "function-calling", "storytelling"],
|
|
997
|
+
lifecycle: "live"
|
|
998
|
+
},
|
|
999
|
+
"anthropic:claude-3-haiku-20240307": {
|
|
1000
|
+
id: "anthropic:claude-3-haiku-20240307",
|
|
1001
|
+
name: "Claude 3 Haiku",
|
|
1002
|
+
description: "Claude 3 Haiku is Anthropic's fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance.",
|
|
1003
|
+
input: {
|
|
1004
|
+
maxTokens: 2e5,
|
|
1005
|
+
costPer1MTokens: 0.25
|
|
1006
|
+
},
|
|
1007
|
+
output: {
|
|
1008
|
+
maxTokens: 4096,
|
|
1009
|
+
costPer1MTokens: 1.25
|
|
1010
|
+
},
|
|
1011
|
+
tags: ["low-cost", "general-purpose"],
|
|
1012
|
+
lifecycle: "live"
|
|
1013
|
+
},
|
|
1014
|
+
"google-ai:gemini-2.5-flash": {
|
|
1015
|
+
id: "google-ai:gemini-2.5-flash",
|
|
1016
|
+
name: "Gemini 2.5 Flash",
|
|
1017
|
+
description: `Google's state-of-the-art workhorse model with advanced reasoning, coding, mathematics, and scientific capabilities. Includes built-in "thinking" capabilities for enhanced accuracy.`,
|
|
1018
|
+
input: {
|
|
1019
|
+
maxTokens: 1048576,
|
|
1020
|
+
costPer1MTokens: 0.3
|
|
1021
|
+
},
|
|
1022
|
+
output: {
|
|
1023
|
+
maxTokens: 65536,
|
|
1024
|
+
costPer1MTokens: 2.5
|
|
1025
|
+
},
|
|
1026
|
+
tags: ["recommended", "reasoning", "agents", "general-purpose", "vision"],
|
|
1027
|
+
lifecycle: "live"
|
|
1028
|
+
},
|
|
1029
|
+
"google-ai:gemini-2.5-pro": {
|
|
1030
|
+
id: "google-ai:gemini-2.5-pro",
|
|
1031
|
+
name: "Gemini 2.5 Pro",
|
|
1032
|
+
description: `Google's most advanced AI model designed for complex reasoning, coding, mathematics, and scientific tasks. Features "thinking" capabilities for superior human-preference alignment and problem-solving.`,
|
|
1033
|
+
input: {
|
|
1034
|
+
maxTokens: 2e5,
|
|
1035
|
+
costPer1MTokens: 1.25
|
|
1036
|
+
},
|
|
1037
|
+
output: {
|
|
1038
|
+
maxTokens: 65536,
|
|
1039
|
+
costPer1MTokens: 10
|
|
1040
|
+
},
|
|
1041
|
+
tags: ["recommended", "reasoning", "agents", "general-purpose", "vision", "coding"],
|
|
1042
|
+
lifecycle: "live"
|
|
1043
|
+
},
|
|
1044
|
+
"google-ai:models/gemini-2.0-flash": {
|
|
1045
|
+
id: "google-ai:models/gemini-2.0-flash",
|
|
1046
|
+
name: "Gemini 2.0 Flash",
|
|
1047
|
+
description: "Next-gen Gemini model with improved capabilities, superior speed, native tool use, multimodal generation, and 1M token context window.",
|
|
1048
|
+
input: {
|
|
1049
|
+
maxTokens: 1048576,
|
|
1050
|
+
costPer1MTokens: 0.1
|
|
1051
|
+
},
|
|
1052
|
+
output: {
|
|
1053
|
+
maxTokens: 8192,
|
|
1054
|
+
costPer1MTokens: 0.4
|
|
1055
|
+
},
|
|
1056
|
+
tags: ["low-cost", "general-purpose", "vision"],
|
|
1057
|
+
lifecycle: "live"
|
|
1058
|
+
},
|
|
1059
|
+
"cerebras:gpt-oss-120b": {
|
|
1060
|
+
id: "cerebras:gpt-oss-120b",
|
|
1061
|
+
name: "GPT-OSS 120B (Preview)",
|
|
1062
|
+
description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
|
|
1063
|
+
input: {
|
|
1064
|
+
maxTokens: 131e3,
|
|
1065
|
+
costPer1MTokens: 0.35
|
|
1066
|
+
},
|
|
1067
|
+
output: {
|
|
1068
|
+
maxTokens: 16e3,
|
|
1069
|
+
costPer1MTokens: 0.75
|
|
1070
|
+
},
|
|
1071
|
+
tags: ["preview", "general-purpose", "reasoning"],
|
|
1072
|
+
lifecycle: "live"
|
|
1073
|
+
},
|
|
1074
|
+
"cerebras:qwen-3-32b": {
|
|
1075
|
+
id: "cerebras:qwen-3-32b",
|
|
1076
|
+
name: "Qwen3 32B",
|
|
1077
|
+
description: "Qwen3-32B is a world-class reasoning model with comparable quality to DeepSeek R1 while outperforming GPT-4.1 and Claude Sonnet 3.7. It excels in code-gen, tool-calling, and advanced reasoning, making it an exceptional model for a wide range of production use cases.",
|
|
1078
|
+
input: {
|
|
1079
|
+
maxTokens: 128e3,
|
|
1080
|
+
costPer1MTokens: 0.4
|
|
1081
|
+
},
|
|
1082
|
+
output: {
|
|
1083
|
+
maxTokens: 16e3,
|
|
1084
|
+
costPer1MTokens: 0.8
|
|
1085
|
+
},
|
|
1086
|
+
tags: ["general-purpose", "reasoning"],
|
|
1087
|
+
lifecycle: "live"
|
|
1088
|
+
},
|
|
1089
|
+
"cerebras:llama-4-scout-17b-16e-instruct": {
|
|
1090
|
+
id: "cerebras:llama-4-scout-17b-16e-instruct",
|
|
1091
|
+
name: "Llama 4 Scout 17B",
|
|
1092
|
+
description: "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, uses 16 experts per forward pass, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages.",
|
|
1093
|
+
input: {
|
|
1094
|
+
maxTokens: 32e3,
|
|
1095
|
+
costPer1MTokens: 0.65
|
|
1096
|
+
},
|
|
1097
|
+
output: {
|
|
1098
|
+
maxTokens: 16e3,
|
|
1099
|
+
costPer1MTokens: 0.85
|
|
1100
|
+
},
|
|
1101
|
+
tags: ["general-purpose", "vision"],
|
|
1102
|
+
lifecycle: "live"
|
|
1103
|
+
},
|
|
1104
|
+
"cerebras:llama3.1-8b": {
|
|
1105
|
+
id: "cerebras:llama3.1-8b",
|
|
1106
|
+
name: "Llama 3.1 8B",
|
|
1107
|
+
description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8B and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
|
|
1108
|
+
input: {
|
|
1109
|
+
maxTokens: 32e3,
|
|
1110
|
+
costPer1MTokens: 0.1
|
|
1111
|
+
},
|
|
1112
|
+
output: {
|
|
1113
|
+
maxTokens: 16e3,
|
|
1114
|
+
costPer1MTokens: 0.1
|
|
1115
|
+
},
|
|
1116
|
+
tags: ["low-cost", "general-purpose"],
|
|
1117
|
+
lifecycle: "live"
|
|
1118
|
+
},
|
|
1119
|
+
"cerebras:llama3.3-70b": {
|
|
1120
|
+
id: "cerebras:llama3.3-70b",
|
|
1121
|
+
name: "Llama 3.3 70B",
|
|
1122
|
+
description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8B and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
|
|
1123
|
+
input: {
|
|
1124
|
+
maxTokens: 128e3,
|
|
1125
|
+
costPer1MTokens: 0.85
|
|
1126
|
+
},
|
|
1127
|
+
output: {
|
|
1128
|
+
maxTokens: 16e3,
|
|
1129
|
+
costPer1MTokens: 1.2
|
|
1130
|
+
},
|
|
1131
|
+
tags: ["general-purpose"],
|
|
1132
|
+
lifecycle: "live"
|
|
1133
|
+
},
|
|
1134
|
+
"groq:openai/gpt-oss-20b": {
|
|
1135
|
+
id: "groq:openai/gpt-oss-20b",
|
|
1136
|
+
name: "GPT-OSS 20B (Preview)",
|
|
1137
|
+
description: "gpt-oss-20b is a compact, open-weight language model optimized for low-latency. It shares the same training foundation and capabilities as the GPT-OSS 120B model, with faster responses and lower cost.",
|
|
1138
|
+
input: {
|
|
1139
|
+
maxTokens: 131e3,
|
|
1140
|
+
costPer1MTokens: 0.1
|
|
1141
|
+
},
|
|
1142
|
+
output: {
|
|
1143
|
+
maxTokens: 32e3,
|
|
1144
|
+
costPer1MTokens: 0.5
|
|
1145
|
+
},
|
|
1146
|
+
tags: ["preview", "general-purpose", "reasoning", "low-cost"],
|
|
1147
|
+
lifecycle: "live"
|
|
1148
|
+
},
|
|
1149
|
+
"groq:openai/gpt-oss-120b": {
|
|
1150
|
+
id: "groq:openai/gpt-oss-120b",
|
|
1151
|
+
name: "GPT-OSS 120B (Preview)",
|
|
1152
|
+
description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
|
|
1153
|
+
input: {
|
|
1154
|
+
maxTokens: 131e3,
|
|
1155
|
+
costPer1MTokens: 0.15
|
|
1156
|
+
},
|
|
1157
|
+
output: {
|
|
1158
|
+
maxTokens: 32e3,
|
|
1159
|
+
costPer1MTokens: 0.75
|
|
1160
|
+
},
|
|
1161
|
+
tags: ["preview", "general-purpose", "reasoning"],
|
|
1162
|
+
lifecycle: "live"
|
|
1163
|
+
},
|
|
1164
|
+
"groq:deepseek-r1-distill-llama-70b": {
|
|
1165
|
+
id: "groq:deepseek-r1-distill-llama-70b",
|
|
1166
|
+
name: "DeepSeek R1-Distill Llama 3.3 70B (Preview)",
|
|
1167
|
+
description: "A fine-tuned version of Llama 3.3 70B using samples generated by DeepSeek-R1, making it smarter than the original Llama 70B, particularly for tasks requiring mathematical and factual precision.",
|
|
1168
|
+
input: {
|
|
1169
|
+
maxTokens: 128e3,
|
|
1170
|
+
costPer1MTokens: 0.75
|
|
1171
|
+
},
|
|
1172
|
+
output: {
|
|
1173
|
+
maxTokens: 32768,
|
|
1174
|
+
costPer1MTokens: 0.99
|
|
1175
|
+
},
|
|
1176
|
+
tags: ["general-purpose", "reasoning", "preview"],
|
|
1177
|
+
lifecycle: "live"
|
|
1178
|
+
},
|
|
1179
|
+
"groq:llama-3.3-70b-versatile": {
|
|
1180
|
+
id: "groq:llama-3.3-70b-versatile",
|
|
1181
|
+
name: "LLaMA 3.3 70B",
|
|
1182
|
+
description: "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.",
|
|
1183
|
+
input: {
|
|
1184
|
+
maxTokens: 128e3,
|
|
1185
|
+
costPer1MTokens: 0.59
|
|
1186
|
+
},
|
|
1187
|
+
output: {
|
|
1188
|
+
maxTokens: 32768,
|
|
1189
|
+
costPer1MTokens: 0.79
|
|
1190
|
+
},
|
|
1191
|
+
tags: ["recommended", "general-purpose", "coding"],
|
|
1192
|
+
lifecycle: "live"
|
|
1193
|
+
},
|
|
1194
|
+
"groq:llama-3.2-1b-preview": {
|
|
1195
|
+
id: "groq:llama-3.2-1b-preview",
|
|
1196
|
+
name: "LLaMA 3.2 1B (Preview)",
|
|
1197
|
+
description: "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
|
|
1198
|
+
input: {
|
|
1199
|
+
maxTokens: 128e3,
|
|
1200
|
+
costPer1MTokens: 0.04
|
|
1201
|
+
},
|
|
1202
|
+
output: {
|
|
1203
|
+
maxTokens: 8192,
|
|
1204
|
+
costPer1MTokens: 0.04
|
|
1205
|
+
},
|
|
1206
|
+
tags: ["low-cost", "deprecated"],
|
|
1207
|
+
lifecycle: "discontinued"
|
|
1208
|
+
},
|
|
1209
|
+
"groq:llama-3.2-3b-preview": {
|
|
1210
|
+
id: "groq:llama-3.2-3b-preview",
|
|
1211
|
+
name: "LLaMA 3.2 3B (Preview)",
|
|
1212
|
+
description: "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
|
|
1213
|
+
input: {
|
|
1214
|
+
maxTokens: 128e3,
|
|
1215
|
+
costPer1MTokens: 0.06
|
|
1216
|
+
},
|
|
1217
|
+
output: {
|
|
1218
|
+
maxTokens: 8192,
|
|
1219
|
+
costPer1MTokens: 0.06
|
|
1220
|
+
},
|
|
1221
|
+
tags: ["low-cost", "general-purpose", "deprecated"],
|
|
1222
|
+
lifecycle: "discontinued"
|
|
1223
|
+
},
|
|
1224
|
+
"groq:llama-3.2-11b-vision-preview": {
|
|
1225
|
+
id: "groq:llama-3.2-11b-vision-preview",
|
|
1226
|
+
name: "LLaMA 3.2 11B Vision (Preview)",
|
|
1227
|
+
description: "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
|
|
1228
|
+
input: {
|
|
1229
|
+
maxTokens: 128e3,
|
|
1230
|
+
costPer1MTokens: 0.18
|
|
1231
|
+
},
|
|
1232
|
+
output: {
|
|
1233
|
+
maxTokens: 8192,
|
|
1234
|
+
costPer1MTokens: 0.18
|
|
1235
|
+
},
|
|
1236
|
+
tags: ["low-cost", "vision", "general-purpose", "deprecated"],
|
|
1237
|
+
lifecycle: "discontinued"
|
|
1238
|
+
},
|
|
1239
|
+
"groq:llama-3.2-90b-vision-preview": {
|
|
1240
|
+
id: "groq:llama-3.2-90b-vision-preview",
|
|
1241
|
+
name: "LLaMA 3.2 90B Vision (Preview)",
|
|
1242
|
+
description: "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
|
|
1243
|
+
input: {
|
|
1244
|
+
maxTokens: 128e3,
|
|
1245
|
+
costPer1MTokens: 0.9
|
|
1246
|
+
},
|
|
1247
|
+
output: {
|
|
1248
|
+
maxTokens: 8192,
|
|
1249
|
+
costPer1MTokens: 0.9
|
|
1250
|
+
},
|
|
1251
|
+
tags: ["vision", "general-purpose", "deprecated"],
|
|
1252
|
+
lifecycle: "discontinued"
|
|
1253
|
+
},
|
|
1254
|
+
"groq:llama-3.1-8b-instant": {
|
|
1255
|
+
id: "groq:llama-3.1-8b-instant",
|
|
1256
|
+
name: "LLaMA 3.1 8B",
|
|
1257
|
+
description: "The Llama 3.1 instruction-tuned, text-only models are optimized for multilingual dialogue use cases.",
|
|
1258
|
+
input: {
|
|
1259
|
+
maxTokens: 128e3,
|
|
1260
|
+
costPer1MTokens: 0.05
|
|
1261
|
+
},
|
|
1262
|
+
output: {
|
|
1263
|
+
maxTokens: 8192,
|
|
1264
|
+
costPer1MTokens: 0.08
|
|
1265
|
+
},
|
|
1266
|
+
tags: ["low-cost", "general-purpose"],
|
|
1267
|
+
lifecycle: "live"
|
|
1268
|
+
},
|
|
1269
|
+
"groq:llama3-8b-8192": {
|
|
1270
|
+
id: "groq:llama3-8b-8192",
|
|
1271
|
+
name: "LLaMA 3 8B",
|
|
1272
|
+
description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
|
|
1273
|
+
input: {
|
|
1274
|
+
maxTokens: 8192,
|
|
1275
|
+
costPer1MTokens: 0.05
|
|
1276
|
+
},
|
|
1277
|
+
output: {
|
|
1278
|
+
maxTokens: 8192,
|
|
1279
|
+
costPer1MTokens: 0.08
|
|
1280
|
+
},
|
|
1281
|
+
tags: ["low-cost", "general-purpose", "deprecated"],
|
|
1282
|
+
lifecycle: "discontinued"
|
|
1283
|
+
},
|
|
1284
|
+
"groq:llama3-70b-8192": {
|
|
1285
|
+
id: "groq:llama3-70b-8192",
|
|
1286
|
+
name: "LLaMA 3 70B",
|
|
1287
|
+
description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
|
|
1288
|
+
input: {
|
|
1289
|
+
maxTokens: 8192,
|
|
1290
|
+
costPer1MTokens: 0.59
|
|
1291
|
+
},
|
|
1292
|
+
output: {
|
|
1293
|
+
maxTokens: 8192,
|
|
1294
|
+
costPer1MTokens: 0.79
|
|
1295
|
+
},
|
|
1296
|
+
tags: ["general-purpose", "deprecated"],
|
|
1297
|
+
lifecycle: "discontinued"
|
|
1298
|
+
},
|
|
1299
|
+
"groq:gemma2-9b-it": {
|
|
1300
|
+
id: "groq:gemma2-9b-it",
|
|
1301
|
+
name: "Gemma2 9B",
|
|
1302
|
+
description: "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
|
|
1303
|
+
input: {
|
|
1304
|
+
maxTokens: 8192,
|
|
1305
|
+
costPer1MTokens: 0.2
|
|
1306
|
+
},
|
|
1307
|
+
output: {
|
|
1308
|
+
maxTokens: 8192,
|
|
1309
|
+
costPer1MTokens: 0.2
|
|
1310
|
+
},
|
|
1311
|
+
tags: ["low-cost", "general-purpose"],
|
|
1312
|
+
lifecycle: "live"
|
|
1313
|
+
},
|
|
1314
|
+
"xai:grok-code-fast-1": {
|
|
1315
|
+
id: "xai:grok-code-fast-1",
|
|
1316
|
+
name: "Grok Code Fast 1",
|
|
1317
|
+
description: "Fast coding-optimized Grok model with large context window.",
|
|
1318
|
+
input: {
|
|
1319
|
+
maxTokens: 256e3,
|
|
1320
|
+
costPer1MTokens: 0.2
|
|
1321
|
+
},
|
|
1322
|
+
output: {
|
|
1323
|
+
maxTokens: 32768,
|
|
1324
|
+
costPer1MTokens: 1.5
|
|
1325
|
+
},
|
|
1326
|
+
tags: ["coding", "general-purpose", "low-cost"],
|
|
1327
|
+
lifecycle: "live"
|
|
1328
|
+
},
|
|
1329
|
+
"xai:grok-4-fast-reasoning": {
|
|
1330
|
+
id: "xai:grok-4-fast-reasoning",
|
|
1331
|
+
name: "Grok 4 Fast (Reasoning)",
|
|
1332
|
+
description: "Advanced fast Grok model with reasoning and very large context.",
|
|
1333
|
+
input: {
|
|
1334
|
+
maxTokens: 2e6,
|
|
1335
|
+
costPer1MTokens: 0.2
|
|
1336
|
+
},
|
|
1337
|
+
output: {
|
|
1338
|
+
maxTokens: 128e3,
|
|
1339
|
+
costPer1MTokens: 0.5
|
|
1340
|
+
},
|
|
1341
|
+
tags: ["reasoning", "recommended", "general-purpose"],
|
|
1342
|
+
lifecycle: "live"
|
|
1343
|
+
},
|
|
1344
|
+
"xai:grok-4-fast-non-reasoning": {
|
|
1345
|
+
id: "xai:grok-4-fast-non-reasoning",
|
|
1346
|
+
name: "Grok 4 Fast (Non-Reasoning)",
|
|
1347
|
+
description: "Fast, cost-effective Grok model for non-reasoning tasks.",
|
|
1348
|
+
input: {
|
|
1349
|
+
maxTokens: 2e6,
|
|
1350
|
+
costPer1MTokens: 0.2
|
|
1351
|
+
},
|
|
1352
|
+
output: {
|
|
1353
|
+
maxTokens: 128e3,
|
|
1354
|
+
costPer1MTokens: 0.5
|
|
1355
|
+
},
|
|
1356
|
+
tags: ["low-cost", "recommended", "general-purpose"],
|
|
1357
|
+
lifecycle: "live"
|
|
1358
|
+
},
|
|
1359
|
+
"xai:grok-4-0709": {
|
|
1360
|
+
id: "xai:grok-4-0709",
|
|
1361
|
+
name: "Grok 4 (0709)",
|
|
1362
|
+
description: "Comprehensive Grok 4 model for general-purpose tasks.",
|
|
1363
|
+
input: {
|
|
1364
|
+
maxTokens: 256e3,
|
|
1365
|
+
costPer1MTokens: 3
|
|
1366
|
+
},
|
|
1367
|
+
output: {
|
|
1368
|
+
maxTokens: 32768,
|
|
1369
|
+
costPer1MTokens: 15
|
|
1370
|
+
},
|
|
1371
|
+
tags: ["reasoning", "general-purpose"],
|
|
1372
|
+
lifecycle: "live"
|
|
1373
|
+
},
|
|
1374
|
+
"xai:grok-3-mini": {
|
|
1375
|
+
id: "xai:grok-3-mini",
|
|
1376
|
+
name: "Grok 3 Mini",
|
|
1377
|
+
description: "Lightweight Grok model for cost-sensitive workloads.",
|
|
1378
|
+
input: {
|
|
1379
|
+
maxTokens: 131072,
|
|
1380
|
+
costPer1MTokens: 0.3
|
|
1381
|
+
},
|
|
1382
|
+
output: {
|
|
1383
|
+
maxTokens: 16384,
|
|
1384
|
+
costPer1MTokens: 0.5
|
|
1385
|
+
},
|
|
1386
|
+
tags: ["low-cost", "general-purpose"],
|
|
1387
|
+
lifecycle: "live"
|
|
1388
|
+
},
|
|
1389
|
+
"xai:grok-3": {
|
|
1390
|
+
id: "xai:grok-3",
|
|
1391
|
+
name: "Grok 3",
|
|
1392
|
+
description: "Enterprise-grade Grok model for general-purpose tasks.",
|
|
1393
|
+
input: {
|
|
1394
|
+
maxTokens: 131072,
|
|
1395
|
+
costPer1MTokens: 3
|
|
1396
|
+
},
|
|
1397
|
+
output: {
|
|
1398
|
+
maxTokens: 16384,
|
|
1399
|
+
costPer1MTokens: 15
|
|
1400
|
+
},
|
|
1401
|
+
tags: ["general-purpose"],
|
|
1402
|
+
lifecycle: "live"
|
|
1403
|
+
},
|
|
1404
|
+
"openrouter:gpt-oss-120b": {
|
|
1405
|
+
id: "openrouter:gpt-oss-120b",
|
|
1406
|
+
name: "GPT-OSS 120B (Preview)",
|
|
1407
|
+
description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
|
|
1408
|
+
input: {
|
|
1409
|
+
maxTokens: 131e3,
|
|
1410
|
+
costPer1MTokens: 0.15
|
|
1411
|
+
},
|
|
1412
|
+
output: {
|
|
1413
|
+
maxTokens: 32e3,
|
|
1414
|
+
costPer1MTokens: 0.75
|
|
1415
|
+
},
|
|
1416
|
+
tags: ["preview", "general-purpose", "reasoning"],
|
|
1417
|
+
lifecycle: "live"
|
|
1418
|
+
},
|
|
1419
|
+
"fireworks-ai:gpt-oss-20b": {
|
|
1420
|
+
id: "fireworks-ai:gpt-oss-20b",
|
|
1421
|
+
name: "GPT-OSS 20B",
|
|
1422
|
+
description: "gpt-oss-20b is a compact, open-weight language model optimized for low-latency. It shares the same training foundation and capabilities as the GPT-OSS 120B model, with faster responses and lower cost.",
|
|
1423
|
+
input: {
|
|
1424
|
+
maxTokens: 128e3,
|
|
1425
|
+
costPer1MTokens: 0.07
|
|
1426
|
+
},
|
|
1427
|
+
output: {
|
|
1428
|
+
maxTokens: 16e3,
|
|
1429
|
+
costPer1MTokens: 0.3
|
|
1430
|
+
},
|
|
1431
|
+
tags: ["general-purpose", "reasoning", "low-cost"],
|
|
1432
|
+
lifecycle: "live",
|
|
1433
|
+
aliases: ["accounts/fireworks/models/gpt-oss-20b"]
|
|
1434
|
+
},
|
|
1435
|
+
"fireworks-ai:gpt-oss-120b": {
|
|
1436
|
+
id: "fireworks-ai:gpt-oss-120b",
|
|
1437
|
+
name: "GPT-OSS 120B",
|
|
1438
|
+
description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
|
|
1439
|
+
input: {
|
|
1440
|
+
maxTokens: 128e3,
|
|
1441
|
+
costPer1MTokens: 0.15
|
|
1442
|
+
},
|
|
1443
|
+
output: {
|
|
1444
|
+
maxTokens: 16e3,
|
|
1445
|
+
costPer1MTokens: 0.6
|
|
1446
|
+
},
|
|
1447
|
+
tags: ["general-purpose", "reasoning"],
|
|
1448
|
+
lifecycle: "live",
|
|
1449
|
+
aliases: ["accounts/fireworks/models/gpt-oss-120b"]
|
|
1450
|
+
},
|
|
1451
|
+
"fireworks-ai:deepseek-r1-0528": {
|
|
1452
|
+
id: "fireworks-ai:deepseek-r1-0528",
|
|
1453
|
+
name: "DeepSeek R1 0528",
|
|
1454
|
+
description: "The updated DeepSeek R1 0528 model delivers major improvements in reasoning, inference, and accuracy through enhanced post-training optimization and greater computational resources. It now performs at a level approaching top-tier models like OpenAI o3 and Gemini 2.5 Pro, with notable gains in complex tasks such as math and programming. The update also reduces hallucinations, improves function calling, and enhances the coding experience.",
|
|
1455
|
+
input: {
|
|
1456
|
+
maxTokens: 16e4,
|
|
1457
|
+
costPer1MTokens: 3
|
|
1458
|
+
},
|
|
1459
|
+
output: {
|
|
1460
|
+
maxTokens: 16384,
|
|
1461
|
+
costPer1MTokens: 8
|
|
1462
|
+
},
|
|
1463
|
+
tags: ["recommended", "reasoning", "general-purpose", "coding"],
|
|
1464
|
+
lifecycle: "live",
|
|
1465
|
+
aliases: ["accounts/fireworks/models/deepseek-r1-0528"]
|
|
1466
|
+
},
|
|
1467
|
+
"fireworks-ai:deepseek-v3-0324": {
|
|
1468
|
+
id: "fireworks-ai:deepseek-v3-0324",
|
|
1469
|
+
name: "DeepSeek V3 0324",
|
|
1470
|
+
description: "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team. It succeeds the DeepSeek V3 model and performs really well on a variety of tasks.",
|
|
1471
|
+
input: {
|
|
1472
|
+
maxTokens: 16e4,
|
|
1473
|
+
costPer1MTokens: 0.9
|
|
1474
|
+
},
|
|
1475
|
+
output: {
|
|
1476
|
+
maxTokens: 16384,
|
|
1477
|
+
costPer1MTokens: 0.9
|
|
1478
|
+
},
|
|
1479
|
+
tags: ["recommended", "general-purpose"],
|
|
1480
|
+
lifecycle: "live",
|
|
1481
|
+
aliases: ["accounts/fireworks/models/deepseek-v3-0324"]
|
|
1482
|
+
},
|
|
1483
|
+
"fireworks-ai:llama4-maverick-instruct-basic": {
|
|
1484
|
+
id: "fireworks-ai:llama4-maverick-instruct-basic",
|
|
1485
|
+
name: "Llama 4 Maverick Instruct (Basic)",
|
|
1486
|
+
description: "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction, and suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.",
|
|
1487
|
+
input: {
|
|
1488
|
+
maxTokens: 1e6,
|
|
1489
|
+
costPer1MTokens: 0.22
|
|
1490
|
+
},
|
|
1491
|
+
output: {
|
|
1492
|
+
maxTokens: 16384,
|
|
1493
|
+
costPer1MTokens: 0.88
|
|
1494
|
+
},
|
|
1495
|
+
tags: ["general-purpose", "vision"],
|
|
1496
|
+
lifecycle: "live",
|
|
1497
|
+
aliases: ["accounts/fireworks/models/llama4-maverick-instruct-basic"]
|
|
1498
|
+
},
|
|
1499
|
+
"fireworks-ai:llama4-scout-instruct-basic": {
|
|
1500
|
+
id: "fireworks-ai:llama4-scout-instruct-basic",
|
|
1501
|
+
name: "Llama 4 Scout Instruct (Basic)",
|
|
1502
|
+
description: "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, uses 16 experts per forward pass, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, it is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks.",
|
|
1503
|
+
input: {
|
|
1504
|
+
maxTokens: 1048576,
|
|
1505
|
+
costPer1MTokens: 0.15
|
|
1506
|
+
},
|
|
1507
|
+
output: {
|
|
1508
|
+
maxTokens: 16384,
|
|
1509
|
+
costPer1MTokens: 0.6
|
|
1510
|
+
},
|
|
1511
|
+
tags: ["general-purpose", "vision"],
|
|
1512
|
+
lifecycle: "live",
|
|
1513
|
+
aliases: ["accounts/fireworks/models/llama4-scout-instruct-basic"]
|
|
1514
|
+
},
|
|
1515
|
+
"fireworks-ai:llama-v3p3-70b-instruct": {
|
|
1516
|
+
id: "fireworks-ai:llama-v3p3-70b-instruct",
|
|
1517
|
+
name: "Llama 3.3 70B Instruct",
|
|
1518
|
+
description: "Llama 3.3 70B Instruct is the December update of Llama 3.1 70B. The model improves upon Llama 3.1 70B (released July 2024) with advances in tool calling, multilingual text support, math and coding. The model achieves industry leading results in reasoning, math and instruction following and provides similar performance as 3.1 405B but with significant speed and cost improvements.",
|
|
1519
|
+
input: {
|
|
1520
|
+
maxTokens: 131072,
|
|
1521
|
+
costPer1MTokens: 0.9
|
|
1522
|
+
},
|
|
1523
|
+
output: {
|
|
1524
|
+
maxTokens: 16384,
|
|
1525
|
+
costPer1MTokens: 0.9
|
|
1526
|
+
},
|
|
1527
|
+
tags: ["general-purpose"],
|
|
1528
|
+
lifecycle: "live",
|
|
1529
|
+
aliases: ["accounts/fireworks/models/llama-v3p3-70b-instruct"]
|
|
1530
|
+
},
|
|
1531
|
+
"fireworks-ai:deepseek-r1": {
|
|
1532
|
+
id: "fireworks-ai:deepseek-r1",
|
|
1533
|
+
name: "DeepSeek R1 (Fast)",
|
|
1534
|
+
description: "This version of the R1 model has a perfect balance between speed and cost-efficiency for real-time interactive experiences, with speeds up to 90 tokens per second.\n\nDeepSeek-R1 is a state-of-the-art large language model optimized with reinforcement learning and cold-start data for exceptional reasoning, math, and code performance. **Note**: This model will always use a temperature of 0.6 as recommended by DeepSeek.",
|
|
1535
|
+
input: {
|
|
1536
|
+
maxTokens: 128e3,
|
|
1537
|
+
costPer1MTokens: 3
|
|
1538
|
+
},
|
|
1539
|
+
output: {
|
|
1540
|
+
maxTokens: 32768,
|
|
1541
|
+
costPer1MTokens: 8
|
|
1542
|
+
},
|
|
1543
|
+
tags: ["reasoning", "general-purpose", "coding"],
|
|
1544
|
+
lifecycle: "live",
|
|
1545
|
+
aliases: ["accounts/fireworks/models/deepseek-r1"]
|
|
1546
|
+
},
|
|
1547
|
+
"fireworks-ai:deepseek-r1-basic": {
|
|
1548
|
+
id: "fireworks-ai:deepseek-r1-basic",
|
|
1549
|
+
name: "DeepSeek R1 (Basic)",
|
|
1550
|
+
description: 'This version of the R1 model is optimized for throughput and cost-effectiveness and has a lower cost but slightly higher latency than the "Fast" version of the model.\n\nDeepSeek-R1 is a state-of-the-art large language model optimized with reinforcement learning and cold-start data for exceptional reasoning, math, and code performance. **Note**: This model will always use a temperature of 0.6 as recommended by DeepSeek.',
|
|
1551
|
+
input: {
|
|
1552
|
+
maxTokens: 128e3,
|
|
1553
|
+
costPer1MTokens: 0.55
|
|
1554
|
+
},
|
|
1555
|
+
output: {
|
|
1556
|
+
maxTokens: 32768,
|
|
1557
|
+
costPer1MTokens: 2.19
|
|
1558
|
+
},
|
|
1559
|
+
tags: ["recommended", "reasoning", "general-purpose", "coding"],
|
|
1560
|
+
lifecycle: "live",
|
|
1561
|
+
aliases: ["accounts/fireworks/models/deepseek-r1-basic"]
|
|
1562
|
+
},
|
|
1563
|
+
"fireworks-ai:deepseek-v3": {
|
|
1564
|
+
id: "fireworks-ai:deepseek-v3",
|
|
1565
|
+
name: "DeepSeek V3",
|
|
1566
|
+
description: "A a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek.",
|
|
1567
|
+
input: {
|
|
1568
|
+
maxTokens: 128e3,
|
|
1569
|
+
costPer1MTokens: 0.9
|
|
1570
|
+
},
|
|
1571
|
+
output: {
|
|
1572
|
+
maxTokens: 8e3,
|
|
1573
|
+
costPer1MTokens: 0.9
|
|
1574
|
+
},
|
|
1575
|
+
tags: ["deprecated", "general-purpose"],
|
|
1576
|
+
lifecycle: "deprecated",
|
|
1577
|
+
aliases: ["accounts/fireworks/models/deepseek-v3"]
|
|
1578
|
+
},
|
|
1579
|
+
"fireworks-ai:llama-v3p1-405b-instruct": {
|
|
1580
|
+
id: "fireworks-ai:llama-v3p1-405b-instruct",
|
|
1581
|
+
name: "Llama 3.1 405B Instruct",
|
|
1582
|
+
description: "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
|
|
1583
|
+
input: {
|
|
1584
|
+
maxTokens: 131072,
|
|
1585
|
+
costPer1MTokens: 3
|
|
1586
|
+
},
|
|
1587
|
+
output: {
|
|
1588
|
+
maxTokens: 131072,
|
|
1589
|
+
costPer1MTokens: 3
|
|
1590
|
+
},
|
|
1591
|
+
tags: ["deprecated", "general-purpose"],
|
|
1592
|
+
lifecycle: "deprecated",
|
|
1593
|
+
aliases: ["accounts/fireworks/models/llama-v3p1-405b-instruct"]
|
|
1594
|
+
},
|
|
1595
|
+
"fireworks-ai:llama-v3p1-70b-instruct": {
|
|
1596
|
+
id: "fireworks-ai:llama-v3p1-70b-instruct",
|
|
1597
|
+
name: "Llama 3.1 70B Instruct",
|
|
1598
|
+
description: "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
|
|
1599
|
+
input: {
|
|
1600
|
+
maxTokens: 131072,
|
|
1601
|
+
costPer1MTokens: 0.9
|
|
1602
|
+
},
|
|
1603
|
+
output: {
|
|
1604
|
+
maxTokens: 131072,
|
|
1605
|
+
costPer1MTokens: 0.9
|
|
1606
|
+
},
|
|
1607
|
+
tags: ["deprecated", "general-purpose"],
|
|
1608
|
+
lifecycle: "deprecated",
|
|
1609
|
+
aliases: ["accounts/fireworks/models/llama-v3p1-70b-instruct"]
|
|
1610
|
+
},
|
|
1611
|
+
"fireworks-ai:llama-v3p1-8b-instruct": {
|
|
1612
|
+
id: "fireworks-ai:llama-v3p1-8b-instruct",
|
|
1613
|
+
name: "Llama 3.1 8B Instruct",
|
|
1614
|
+
description: "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
|
|
1615
|
+
input: {
|
|
1616
|
+
maxTokens: 131072,
|
|
1617
|
+
costPer1MTokens: 0.2
|
|
1618
|
+
},
|
|
1619
|
+
output: {
|
|
1620
|
+
maxTokens: 131072,
|
|
1621
|
+
costPer1MTokens: 0.2
|
|
1622
|
+
},
|
|
1623
|
+
tags: ["low-cost", "general-purpose"],
|
|
1624
|
+
lifecycle: "live",
|
|
1625
|
+
aliases: ["accounts/fireworks/models/llama-v3p1-8b-instruct"]
|
|
1626
|
+
},
|
|
1627
|
+
"fireworks-ai:mixtral-8x22b-instruct": {
|
|
1628
|
+
id: "fireworks-ai:mixtral-8x22b-instruct",
|
|
1629
|
+
name: "Mixtral MoE 8x22B Instruct",
|
|
1630
|
+
description: "Mistral MoE 8x22B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following.",
|
|
1631
|
+
input: {
|
|
1632
|
+
maxTokens: 65536,
|
|
1633
|
+
costPer1MTokens: 1.2
|
|
1634
|
+
},
|
|
1635
|
+
output: {
|
|
1636
|
+
maxTokens: 65536,
|
|
1637
|
+
costPer1MTokens: 1.2
|
|
1638
|
+
},
|
|
1639
|
+
tags: ["general-purpose"],
|
|
1640
|
+
lifecycle: "live",
|
|
1641
|
+
aliases: ["accounts/fireworks/models/mixtral-8x22b-instruct"]
|
|
1642
|
+
},
|
|
1643
|
+
"fireworks-ai:mixtral-8x7b-instruct": {
|
|
1644
|
+
id: "fireworks-ai:mixtral-8x7b-instruct",
|
|
1645
|
+
name: "Mixtral MoE 8x7B Instruct",
|
|
1646
|
+
description: "Mistral MoE 8x7B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following",
|
|
1647
|
+
input: {
|
|
1648
|
+
maxTokens: 32768,
|
|
1649
|
+
costPer1MTokens: 0.5
|
|
1650
|
+
},
|
|
1651
|
+
output: {
|
|
1652
|
+
maxTokens: 32768,
|
|
1653
|
+
costPer1MTokens: 0.5
|
|
1654
|
+
},
|
|
1655
|
+
tags: ["low-cost", "general-purpose"],
|
|
1656
|
+
lifecycle: "live",
|
|
1657
|
+
aliases: ["accounts/fireworks/models/mixtral-8x7b-instruct"]
|
|
1658
|
+
},
|
|
1659
|
+
"fireworks-ai:mythomax-l2-13b": {
|
|
1660
|
+
id: "fireworks-ai:mythomax-l2-13b",
|
|
1661
|
+
name: "MythoMax L2 13b",
|
|
1662
|
+
description: "MythoMax L2 is designed to excel at both roleplaying and storytelling, and is an improved variant of the previous MythoMix model, combining the MythoLogic-L2 and Huginn models.",
|
|
1663
|
+
input: {
|
|
1664
|
+
maxTokens: 4096,
|
|
1665
|
+
costPer1MTokens: 0.2
|
|
1666
|
+
},
|
|
1667
|
+
output: {
|
|
1668
|
+
maxTokens: 4096,
|
|
1669
|
+
costPer1MTokens: 0.2
|
|
1670
|
+
},
|
|
1671
|
+
tags: ["roleplay", "storytelling", "low-cost"],
|
|
1672
|
+
lifecycle: "live",
|
|
1673
|
+
aliases: ["accounts/fireworks/models/mythomax-l2-13b"]
|
|
1674
|
+
},
|
|
1675
|
+
"fireworks-ai:gemma2-9b-it": {
|
|
1676
|
+
id: "fireworks-ai:gemma2-9b-it",
|
|
1677
|
+
name: "Gemma 2 9B Instruct",
|
|
1678
|
+
description: "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
|
|
1679
|
+
input: {
|
|
1680
|
+
maxTokens: 8192,
|
|
1681
|
+
costPer1MTokens: 0.2
|
|
1682
|
+
},
|
|
1683
|
+
output: {
|
|
1684
|
+
maxTokens: 8192,
|
|
1685
|
+
costPer1MTokens: 0.2
|
|
1686
|
+
},
|
|
1687
|
+
tags: ["deprecated", "low-cost", "general-purpose"],
|
|
1688
|
+
lifecycle: "deprecated",
|
|
1689
|
+
aliases: ["accounts/fireworks/models/gemma2-9b-it"]
|
|
1690
|
+
}
|
|
1691
|
+
};
|
|
1692
|
+
var knownTags = [
|
|
1693
|
+
"auto",
|
|
1694
|
+
"best",
|
|
1695
|
+
"fast",
|
|
1696
|
+
"reasoning",
|
|
1697
|
+
"cheapest",
|
|
1698
|
+
"balance",
|
|
1699
|
+
"recommended",
|
|
1700
|
+
"reasoning",
|
|
1701
|
+
"general-purpose",
|
|
1702
|
+
"low-cost",
|
|
1703
|
+
"vision",
|
|
1704
|
+
"coding",
|
|
1705
|
+
"function-calling",
|
|
1706
|
+
"agents",
|
|
1707
|
+
"storytelling",
|
|
1708
|
+
"preview",
|
|
1709
|
+
"roleplay"
|
|
1710
|
+
];
|
|
1711
|
+
var defaultModel = {
|
|
1712
|
+
id: "",
|
|
1713
|
+
name: "",
|
|
1714
|
+
description: "",
|
|
1715
|
+
input: {
|
|
1716
|
+
costPer1MTokens: 0,
|
|
1717
|
+
maxTokens: 1e6
|
|
1718
|
+
},
|
|
1719
|
+
output: {
|
|
1720
|
+
costPer1MTokens: 0,
|
|
1721
|
+
maxTokens: 1e6
|
|
1722
|
+
},
|
|
1723
|
+
tags: [],
|
|
1724
|
+
lifecycle: "live"
|
|
1725
|
+
};
|
|
1726
|
+
|
|
1727
|
+
// src/cognitive-v2/index.ts
|
|
636
1728
|
var isBrowser = () => typeof window !== "undefined" && typeof window.fetch === "function";
|
|
637
1729
|
var CognitiveBeta = class {
|
|
638
1730
|
_axiosClient;
|
|
@@ -667,15 +1759,11 @@ var CognitiveBeta = class {
|
|
|
667
1759
|
);
|
|
668
1760
|
return data;
|
|
669
1761
|
}
|
|
670
|
-
async listModels(
|
|
671
|
-
const signal = options.signal ?? AbortSignal.timeout(this._timeout);
|
|
1762
|
+
async listModels() {
|
|
672
1763
|
const { data } = await this._withServerRetry(
|
|
673
|
-
() => this._axiosClient.
|
|
674
|
-
signal,
|
|
675
|
-
timeout: options.timeout ?? this._timeout
|
|
676
|
-
})
|
|
1764
|
+
() => this._axiosClient.get("/v2/cognitive/models")
|
|
677
1765
|
);
|
|
678
|
-
return data;
|
|
1766
|
+
return data.models;
|
|
679
1767
|
}
|
|
680
1768
|
async *generateTextStream(request, options = {}) {
|
|
681
1769
|
const signal = options.signal ?? AbortSignal.timeout(this._timeout);
|
|
@@ -785,6 +1873,19 @@ var CognitiveBeta = class {
|
|
|
785
1873
|
});
|
|
786
1874
|
}
|
|
787
1875
|
};
|
|
1876
|
+
var getCognitiveV2Model = (model) => {
|
|
1877
|
+
if (models[model]) {
|
|
1878
|
+
return models[model];
|
|
1879
|
+
}
|
|
1880
|
+
const alias = Object.values(models).find((x) => x.aliases?.includes(model));
|
|
1881
|
+
if (alias) {
|
|
1882
|
+
return alias;
|
|
1883
|
+
}
|
|
1884
|
+
if (knownTags.includes(model)) {
|
|
1885
|
+
return { ...defaultModel, id: model, name: model };
|
|
1886
|
+
}
|
|
1887
|
+
return void 0;
|
|
1888
|
+
};
|
|
788
1889
|
|
|
789
1890
|
// src/errors.ts
|
|
790
1891
|
var getActionFromError = (error) => {
|
|
@@ -879,7 +1980,7 @@ var scoreModel = (model, type, boosts = {}) => {
|
|
|
879
1980
|
const scores = [
|
|
880
1981
|
["input price penalty", model.input.costPer1MTokens > InputPricePenalty, -1],
|
|
881
1982
|
["output price penalty", model.output.costPer1MTokens > OutputPricePenalty, -1],
|
|
882
|
-
["low tokens penalty", (model.input.maxTokens ?? 0 + model.output.maxTokens ?? 0) < LowTokensPenalty, -1],
|
|
1983
|
+
["low tokens penalty", (model.input.maxTokens ?? 0) + (model.output.maxTokens ?? 0) < LowTokensPenalty, -1],
|
|
883
1984
|
["recommended", isRecommended(model), 2],
|
|
884
1985
|
["deprecated", isDeprecated(model), -2],
|
|
885
1986
|
["vision support", hasVisionSupport(model), 1],
|
|
@@ -902,10 +2003,10 @@ var scoreModel = (model, type, boosts = {}) => {
|
|
|
902
2003
|
}
|
|
903
2004
|
return score;
|
|
904
2005
|
};
|
|
905
|
-
var getBestModels = (
|
|
906
|
-
var getFastModels = (
|
|
907
|
-
var pickModel = (
|
|
908
|
-
const copy = [...
|
|
2006
|
+
var getBestModels = (models2, boosts = {}) => models2.sort((a, b) => scoreModel(b, "best", boosts) - scoreModel(a, "best", boosts));
|
|
2007
|
+
var getFastModels = (models2, boosts = {}) => models2.sort((a, b) => scoreModel(b, "fast", boosts) - scoreModel(a, "fast", boosts));
|
|
2008
|
+
var pickModel = (models2, downtimes = []) => {
|
|
2009
|
+
const copy = [...models2];
|
|
909
2010
|
const elasped = (date) => (/* @__PURE__ */ new Date()).getTime() - new Date(date).getTime();
|
|
910
2011
|
const DOWNTIME_THRESHOLD = 1e3 * 60 * DOWNTIME_THRESHOLD_MINUTES;
|
|
911
2012
|
if (!copy.length) {
|
|
@@ -920,7 +2021,7 @@ var pickModel = (models, downtimes = []) => {
|
|
|
920
2021
|
return ref;
|
|
921
2022
|
}
|
|
922
2023
|
}
|
|
923
|
-
throw new Error(`All models are down: ${
|
|
2024
|
+
throw new Error(`All models are down: ${models2.join(", ")}`);
|
|
924
2025
|
};
|
|
925
2026
|
var ModelProvider = class {
|
|
926
2027
|
};
|
|
@@ -944,7 +2045,7 @@ var RemoteModelProvider = class extends ModelProvider {
|
|
|
944
2045
|
}
|
|
945
2046
|
async fetchInstalledModels() {
|
|
946
2047
|
const integrationNames = await this._fetchInstalledIntegrationNames();
|
|
947
|
-
const
|
|
2048
|
+
const models2 = [];
|
|
948
2049
|
await Promise.allSettled(
|
|
949
2050
|
integrationNames.map(async (integration) => {
|
|
950
2051
|
const { output } = await this._client.callAction({
|
|
@@ -956,7 +2057,7 @@ var RemoteModelProvider = class extends ModelProvider {
|
|
|
956
2057
|
}
|
|
957
2058
|
for (const model of output.models) {
|
|
958
2059
|
if (model.name && model.id && model.input && model.tags) {
|
|
959
|
-
|
|
2060
|
+
models2.push({
|
|
960
2061
|
ref: `${integration}:${model.id}`,
|
|
961
2062
|
integration,
|
|
962
2063
|
id: model.id,
|
|
@@ -970,7 +2071,7 @@ var RemoteModelProvider = class extends ModelProvider {
|
|
|
970
2071
|
}
|
|
971
2072
|
})
|
|
972
2073
|
);
|
|
973
|
-
return
|
|
2074
|
+
return models2;
|
|
974
2075
|
}
|
|
975
2076
|
async fetchModelPreferences() {
|
|
976
2077
|
try {
|
|
@@ -1081,10 +2182,10 @@ var Cognitive = class _Cognitive {
|
|
|
1081
2182
|
if (this._preferences) {
|
|
1082
2183
|
return this._preferences;
|
|
1083
2184
|
}
|
|
1084
|
-
const
|
|
2185
|
+
const models2 = await this.fetchInstalledModels();
|
|
1085
2186
|
this._preferences = {
|
|
1086
|
-
best: getBestModels(
|
|
1087
|
-
fast: getFastModels(
|
|
2187
|
+
best: getBestModels(models2).map((m) => m.ref),
|
|
2188
|
+
fast: getFastModels(models2).map((m) => m.ref),
|
|
1088
2189
|
downtimes: []
|
|
1089
2190
|
};
|
|
1090
2191
|
await this._provider.saveModelPreferences(this._preferences);
|
|
@@ -1123,6 +2224,12 @@ var Cognitive = class _Cognitive {
|
|
|
1123
2224
|
return parseRef(pickModel([ref, ...preferences.best, ...preferences.fast], downtimes));
|
|
1124
2225
|
}
|
|
1125
2226
|
async getModelDetails(model) {
|
|
2227
|
+
if (this._useBeta) {
|
|
2228
|
+
const resolvedModel = getCognitiveV2Model(model);
|
|
2229
|
+
if (resolvedModel) {
|
|
2230
|
+
return { ...resolvedModel, ref: resolvedModel.id, integration: "cognitive-v2" };
|
|
2231
|
+
}
|
|
2232
|
+
}
|
|
1126
2233
|
await this.fetchInstalledModels();
|
|
1127
2234
|
const { integration, model: modelName } = await this._selectModel(model);
|
|
1128
2235
|
const def = this._models.find((m) => m.integration === integration && (m.name === modelName || m.id === modelName));
|
|
@@ -1132,7 +2239,7 @@ var Cognitive = class _Cognitive {
|
|
|
1132
2239
|
return def;
|
|
1133
2240
|
}
|
|
1134
2241
|
async generateContent(input) {
|
|
1135
|
-
if (!this._useBeta) {
|
|
2242
|
+
if (!this._useBeta || !getCognitiveV2Model(input.model)) {
|
|
1136
2243
|
return this._generateContent(input);
|
|
1137
2244
|
}
|
|
1138
2245
|
const betaClient = new CognitiveBeta(this._client.config);
|
|
@@ -1251,6 +2358,7 @@ export {
|
|
|
1251
2358
|
Cognitive,
|
|
1252
2359
|
CognitiveBeta,
|
|
1253
2360
|
ModelProvider,
|
|
1254
|
-
RemoteModelProvider
|
|
2361
|
+
RemoteModelProvider,
|
|
2362
|
+
getCognitiveV2Model
|
|
1255
2363
|
};
|
|
1256
2364
|
//# sourceMappingURL=index.mjs.map
|