@botpress/cognitive 0.1.41 → 0.1.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -581,7 +581,8 @@ __export(index_exports, {
581
581
  Cognitive: () => Cognitive,
582
582
  CognitiveBeta: () => CognitiveBeta,
583
583
  ModelProvider: () => ModelProvider,
584
- RemoteModelProvider: () => RemoteModelProvider
584
+ RemoteModelProvider: () => RemoteModelProvider,
585
+ getCognitiveV2Model: () => getCognitiveV2Model
585
586
  });
586
587
  module.exports = __toCommonJS(index_exports);
587
588
 
@@ -645,9 +646,1101 @@ var getExtendedClient = (_client) => {
645
646
  };
646
647
  };
647
648
 
648
- // src/cognitive_beta/index.ts
649
+ // src/cognitive-v2/index.ts
649
650
  var import_axios = __toESM(require("axios"));
650
651
  var import_exponential_backoff = __toESM(require_backoff());
652
+
653
+ // src/cognitive-v2/models.ts
654
+ var models = {
655
+ "openai:gpt-5-2025-08-07": {
656
+ id: "openai:gpt-5-2025-08-07",
657
+ name: "GPT-5",
658
+ description: "GPT-5 is OpenAI's latest and most advanced AI model. It is a reasoning model that chooses the best way to respond based on task complexity and user intent. GPT-5 delivers expert-level performance across coding, math, writing, health, and visual perception, with improved accuracy, speed, and reduced hallucinations. It excels in complex tasks, long-context understanding, multimodal inputs (text and images), and safe, nuanced responses.",
659
+ input: {
660
+ maxTokens: 4e5,
661
+ costPer1MTokens: 1.25
662
+ },
663
+ output: {
664
+ maxTokens: 128e3,
665
+ costPer1MTokens: 10
666
+ },
667
+ tags: ["recommended", "reasoning", "general-purpose"],
668
+ lifecycle: "live"
669
+ },
670
+ "openai:gpt-5-mini-2025-08-07": {
671
+ id: "openai:gpt-5-mini-2025-08-07",
672
+ name: "GPT-5 Mini",
673
+ description: "GPT-5 Mini is a lightweight and cost-effective version of GPT-5, optimized for applications where speed and efficiency matter more than full advanced capabilities. It is designed for cost-sensitive use cases such as chatbots, content generation, and high-volume usage, striking a balance between performance and affordability, making it suitable for simpler tasks that do not require deep multi-step reasoning or the full reasoning power of GPT-5",
674
+ input: {
675
+ maxTokens: 4e5,
676
+ costPer1MTokens: 0.25
677
+ },
678
+ output: {
679
+ maxTokens: 128e3,
680
+ costPer1MTokens: 2
681
+ },
682
+ tags: ["recommended", "reasoning", "general-purpose"],
683
+ lifecycle: "live"
684
+ },
685
+ "openai:gpt-5-nano-2025-08-07": {
686
+ id: "openai:gpt-5-nano-2025-08-07",
687
+ name: "GPT-5 Nano",
688
+ description: "GPT-5 Nano is an ultra-lightweight version of GPT-5 optimized for speed and very low latency, making it ideal for use cases like simple chatbots, basic content generation, summarization, and classification tasks.",
689
+ input: {
690
+ maxTokens: 4e5,
691
+ costPer1MTokens: 0.05
692
+ },
693
+ output: {
694
+ maxTokens: 128e3,
695
+ costPer1MTokens: 0.4
696
+ },
697
+ tags: ["low-cost", "reasoning", "general-purpose"],
698
+ lifecycle: "live"
699
+ },
700
+ "openai:o4-mini-2025-04-16": {
701
+ id: "openai:o4-mini-2025-04-16",
702
+ name: "GPT o4-mini",
703
+ description: "o4-mini is OpenAI's latest small o-series model. It's optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks.",
704
+ input: {
705
+ maxTokens: 2e5,
706
+ costPer1MTokens: 1.1
707
+ },
708
+ output: {
709
+ maxTokens: 1e5,
710
+ costPer1MTokens: 4.4
711
+ },
712
+ tags: ["reasoning", "vision", "coding"],
713
+ lifecycle: "live"
714
+ },
715
+ "openai:o3-2025-04-16": {
716
+ id: "openai:o3-2025-04-16",
717
+ name: "GPT o3",
718
+ description: "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following.",
719
+ input: {
720
+ maxTokens: 2e5,
721
+ costPer1MTokens: 2
722
+ },
723
+ output: {
724
+ maxTokens: 1e5,
725
+ costPer1MTokens: 8
726
+ },
727
+ tags: ["reasoning", "vision", "coding"],
728
+ lifecycle: "live"
729
+ },
730
+ "openai:gpt-4.1-2025-04-14": {
731
+ id: "openai:gpt-4.1-2025-04-14",
732
+ name: "GPT 4.1",
733
+ description: "GPT 4.1 is our flagship model for complex tasks. It is well suited for problem solving across domains. The knowledge cutoff is June 2024.",
734
+ input: {
735
+ maxTokens: 1047576,
736
+ costPer1MTokens: 2
737
+ },
738
+ output: {
739
+ maxTokens: 32768,
740
+ costPer1MTokens: 8
741
+ },
742
+ tags: ["recommended", "vision", "general-purpose"],
743
+ lifecycle: "live"
744
+ },
745
+ "openai:gpt-4.1-mini-2025-04-14": {
746
+ id: "openai:gpt-4.1-mini-2025-04-14",
747
+ name: "GPT 4.1 Mini",
748
+ description: "GPT 4.1 mini provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases. The knowledge cutoff is June 2024.",
749
+ input: {
750
+ maxTokens: 1047576,
751
+ costPer1MTokens: 0.4
752
+ },
753
+ output: {
754
+ maxTokens: 32768,
755
+ costPer1MTokens: 1.6
756
+ },
757
+ tags: ["recommended", "vision", "general-purpose"],
758
+ lifecycle: "live"
759
+ },
760
+ "openai:gpt-4.1-nano-2025-04-14": {
761
+ id: "openai:gpt-4.1-nano-2025-04-14",
762
+ name: "GPT 4.1 Nano",
763
+ description: "GPT-4.1 nano is the fastest, most cost-effective GPT 4.1 model. The knowledge cutoff is June 2024.",
764
+ input: {
765
+ maxTokens: 1047576,
766
+ costPer1MTokens: 0.1
767
+ },
768
+ output: {
769
+ maxTokens: 32768,
770
+ costPer1MTokens: 0.4
771
+ },
772
+ tags: ["low-cost", "vision", "general-purpose"],
773
+ lifecycle: "live"
774
+ },
775
+ "openai:o3-mini-2025-01-31": {
776
+ id: "openai:o3-mini-2025-01-31",
777
+ name: "GPT o3-mini",
778
+ description: "o3-mini is the most recent small reasoning model from OpenAI, providing high intelligence at the same cost and latency targets of o1-mini. Also supports key developer features like Structured Outputs and function calling.",
779
+ input: {
780
+ maxTokens: 2e5,
781
+ costPer1MTokens: 1.1
782
+ },
783
+ output: {
784
+ maxTokens: 1e5,
785
+ costPer1MTokens: 4.4
786
+ },
787
+ tags: ["reasoning", "general-purpose", "coding"],
788
+ lifecycle: "live"
789
+ },
790
+ "openai:o1-2024-12-17": {
791
+ id: "openai:o1-2024-12-17",
792
+ name: "GPT o1",
793
+ description: "The o1 model is designed to solve hard problems across domains. Trained with reinforcement learning to perform complex reasoning with a long internal chain of thought.",
794
+ input: {
795
+ maxTokens: 2e5,
796
+ costPer1MTokens: 15
797
+ },
798
+ output: {
799
+ maxTokens: 1e5,
800
+ costPer1MTokens: 60
801
+ },
802
+ tags: ["reasoning", "vision", "general-purpose"],
803
+ lifecycle: "live"
804
+ },
805
+ "openai:o1-mini-2024-09-12": {
806
+ id: "openai:o1-mini-2024-09-12",
807
+ name: "GPT o1-mini",
808
+ description: "The o1-mini model is a fast and affordable reasoning model for specialized tasks. Trained with reinforcement learning to perform complex reasoning.",
809
+ input: {
810
+ maxTokens: 128e3,
811
+ costPer1MTokens: 1.1
812
+ },
813
+ output: {
814
+ maxTokens: 65536,
815
+ costPer1MTokens: 4.4
816
+ },
817
+ tags: ["reasoning", "vision", "general-purpose"],
818
+ lifecycle: "live"
819
+ },
820
+ "openai:gpt-4o-mini-2024-07-18": {
821
+ id: "openai:gpt-4o-mini-2024-07-18",
822
+ name: "GPT-4o Mini",
823
+ description: "GPT-4o mini is OpenAI's most advanced model in the small models category, and their cheapest model yet. Multimodal with higher intelligence than gpt-3.5-turbo but just as fast.",
824
+ input: {
825
+ maxTokens: 128e3,
826
+ costPer1MTokens: 0.15
827
+ },
828
+ output: {
829
+ maxTokens: 16384,
830
+ costPer1MTokens: 0.6
831
+ },
832
+ tags: ["recommended", "vision", "low-cost", "general-purpose", "function-calling"],
833
+ lifecycle: "live"
834
+ },
835
+ "openai:gpt-4o-2024-11-20": {
836
+ id: "openai:gpt-4o-2024-11-20",
837
+ name: "GPT-4o (November 2024)",
838
+ description: "GPT-4o is OpenAI's most advanced model. Multimodal with the same high intelligence as GPT-4 Turbo but cheaper and more efficient.",
839
+ input: {
840
+ maxTokens: 128e3,
841
+ costPer1MTokens: 2.5
842
+ },
843
+ output: {
844
+ maxTokens: 16384,
845
+ costPer1MTokens: 10
846
+ },
847
+ tags: ["recommended", "vision", "general-purpose", "coding", "agents", "function-calling"],
848
+ lifecycle: "live"
849
+ },
850
+ "openai:gpt-4o-2024-08-06": {
851
+ id: "openai:gpt-4o-2024-08-06",
852
+ name: "GPT-4o (August 2024)",
853
+ description: "GPT-4o is OpenAI's most advanced model. Multimodal with the same high intelligence as GPT-4 Turbo but cheaper and more efficient.",
854
+ input: {
855
+ maxTokens: 128e3,
856
+ costPer1MTokens: 2.5
857
+ },
858
+ output: {
859
+ maxTokens: 16384,
860
+ costPer1MTokens: 10
861
+ },
862
+ tags: ["deprecated", "vision", "general-purpose", "coding", "agents", "function-calling"],
863
+ lifecycle: "deprecated"
864
+ },
865
+ "openai:gpt-4o-2024-05-13": {
866
+ id: "openai:gpt-4o-2024-05-13",
867
+ name: "GPT-4o (May 2024)",
868
+ description: "GPT-4o is OpenAI's most advanced model. Multimodal with the same high intelligence as GPT-4 Turbo but cheaper and more efficient.",
869
+ input: {
870
+ maxTokens: 128e3,
871
+ costPer1MTokens: 5
872
+ },
873
+ output: {
874
+ maxTokens: 4096,
875
+ costPer1MTokens: 15
876
+ },
877
+ tags: ["deprecated", "vision", "general-purpose", "coding", "agents", "function-calling"],
878
+ lifecycle: "deprecated"
879
+ },
880
+ "openai:gpt-4-turbo-2024-04-09": {
881
+ id: "openai:gpt-4-turbo-2024-04-09",
882
+ name: "GPT-4 Turbo",
883
+ description: "GPT-4 is a large multimodal model that can solve difficult problems with greater accuracy than previous models, thanks to its broader general knowledge and advanced reasoning capabilities.",
884
+ input: {
885
+ maxTokens: 128e3,
886
+ costPer1MTokens: 10
887
+ },
888
+ output: {
889
+ maxTokens: 4096,
890
+ costPer1MTokens: 30
891
+ },
892
+ tags: ["deprecated", "general-purpose", "coding", "agents", "function-calling"],
893
+ lifecycle: "deprecated"
894
+ },
895
+ "openai:gpt-3.5-turbo-0125": {
896
+ id: "openai:gpt-3.5-turbo-0125",
897
+ name: "GPT-3.5 Turbo",
898
+ description: "GPT-3.5 Turbo can understand and generate natural language or code and has been optimized for chat but works well for non-chat tasks as well.",
899
+ input: {
900
+ maxTokens: 128e3,
901
+ costPer1MTokens: 0.5
902
+ },
903
+ output: {
904
+ maxTokens: 4096,
905
+ costPer1MTokens: 1.5
906
+ },
907
+ tags: ["deprecated", "general-purpose", "low-cost"],
908
+ lifecycle: "deprecated"
909
+ },
910
+ "anthropic:claude-sonnet-4-20250514": {
911
+ id: "anthropic:claude-sonnet-4-20250514",
912
+ name: "Claude Sonnet 4",
913
+ description: "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions.",
914
+ input: {
915
+ maxTokens: 2e5,
916
+ costPer1MTokens: 3
917
+ },
918
+ output: {
919
+ maxTokens: 64e3,
920
+ costPer1MTokens: 15
921
+ },
922
+ tags: ["recommended", "reasoning", "agents", "vision", "general-purpose", "coding"],
923
+ lifecycle: "live"
924
+ },
925
+ "anthropic:claude-sonnet-4-reasoning-20250514": {
926
+ id: "anthropic:claude-sonnet-4-reasoning-20250514",
927
+ name: "Claude Sonnet 4 (Reasoning Mode)",
928
+ description: 'This model uses the "Extended Thinking" mode and will use a significantly higher amount of output tokens than the Standard Mode, so this model should only be used for tasks that actually require it.\n\nClaude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability.',
929
+ input: {
930
+ maxTokens: 2e5,
931
+ costPer1MTokens: 3
932
+ },
933
+ output: {
934
+ maxTokens: 64e3,
935
+ costPer1MTokens: 15
936
+ },
937
+ tags: ["deprecated", "vision", "reasoning", "general-purpose", "agents", "coding"],
938
+ lifecycle: "deprecated"
939
+ },
940
+ "anthropic:claude-3-7-sonnet-20250219": {
941
+ id: "anthropic:claude-3-7-sonnet-20250219",
942
+ name: "Claude 3.7 Sonnet",
943
+ description: "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes.",
944
+ input: {
945
+ maxTokens: 2e5,
946
+ costPer1MTokens: 3
947
+ },
948
+ output: {
949
+ maxTokens: 64e3,
950
+ costPer1MTokens: 15
951
+ },
952
+ tags: ["recommended", "reasoning", "agents", "vision", "general-purpose", "coding"],
953
+ lifecycle: "live"
954
+ },
955
+ "anthropic:claude-3-7-sonnet-reasoning-20250219": {
956
+ id: "anthropic:claude-3-7-sonnet-reasoning-20250219",
957
+ name: "Claude 3.7 Sonnet (Reasoning Mode)",
958
+ description: 'This model uses the "Extended Thinking" mode and will use a significantly higher amount of output tokens than the Standard Mode, so this model should only be used for tasks that actually require it.\n\nClaude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities.',
959
+ input: {
960
+ maxTokens: 2e5,
961
+ costPer1MTokens: 3
962
+ },
963
+ output: {
964
+ maxTokens: 64e3,
965
+ costPer1MTokens: 15
966
+ },
967
+ tags: ["deprecated", "vision", "reasoning", "general-purpose", "agents", "coding"],
968
+ lifecycle: "deprecated"
969
+ },
970
+ "anthropic:claude-3-5-haiku-20241022": {
971
+ id: "anthropic:claude-3-5-haiku-20241022",
972
+ name: "Claude 3.5 Haiku",
973
+ description: "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.",
974
+ input: {
975
+ maxTokens: 2e5,
976
+ costPer1MTokens: 0.8
977
+ },
978
+ output: {
979
+ maxTokens: 8192,
980
+ costPer1MTokens: 4
981
+ },
982
+ tags: ["general-purpose", "low-cost"],
983
+ lifecycle: "live"
984
+ },
985
+ "anthropic:claude-3-5-sonnet-20241022": {
986
+ id: "anthropic:claude-3-5-sonnet-20241022",
987
+ name: "Claude 3.5 Sonnet (October 2024)",
988
+ description: "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at coding, data science, visual processing, and agentic tasks.",
989
+ input: {
990
+ maxTokens: 2e5,
991
+ costPer1MTokens: 3
992
+ },
993
+ output: {
994
+ maxTokens: 8192,
995
+ costPer1MTokens: 15
996
+ },
997
+ tags: ["vision", "general-purpose", "agents", "coding", "function-calling", "storytelling"],
998
+ lifecycle: "live"
999
+ },
1000
+ "anthropic:claude-3-5-sonnet-20240620": {
1001
+ id: "anthropic:claude-3-5-sonnet-20240620",
1002
+ name: "Claude 3.5 Sonnet (June 2024)",
1003
+ description: "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at coding, data science, visual processing, and agentic tasks.",
1004
+ input: {
1005
+ maxTokens: 2e5,
1006
+ costPer1MTokens: 3
1007
+ },
1008
+ output: {
1009
+ maxTokens: 4096,
1010
+ costPer1MTokens: 15
1011
+ },
1012
+ tags: ["vision", "general-purpose", "agents", "coding", "function-calling", "storytelling"],
1013
+ lifecycle: "live"
1014
+ },
1015
+ "anthropic:claude-3-haiku-20240307": {
1016
+ id: "anthropic:claude-3-haiku-20240307",
1017
+ name: "Claude 3 Haiku",
1018
+ description: "Claude 3 Haiku is Anthropic's fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance.",
1019
+ input: {
1020
+ maxTokens: 2e5,
1021
+ costPer1MTokens: 0.25
1022
+ },
1023
+ output: {
1024
+ maxTokens: 4096,
1025
+ costPer1MTokens: 1.25
1026
+ },
1027
+ tags: ["low-cost", "general-purpose"],
1028
+ lifecycle: "live"
1029
+ },
1030
+ "google-ai:gemini-2.5-flash": {
1031
+ id: "google-ai:gemini-2.5-flash",
1032
+ name: "Gemini 2.5 Flash",
1033
+ description: `Google's state-of-the-art workhorse model with advanced reasoning, coding, mathematics, and scientific capabilities. Includes built-in "thinking" capabilities for enhanced accuracy.`,
1034
+ input: {
1035
+ maxTokens: 1048576,
1036
+ costPer1MTokens: 0.3
1037
+ },
1038
+ output: {
1039
+ maxTokens: 65536,
1040
+ costPer1MTokens: 2.5
1041
+ },
1042
+ tags: ["recommended", "reasoning", "agents", "general-purpose", "vision"],
1043
+ lifecycle: "live"
1044
+ },
1045
+ "google-ai:gemini-2.5-pro": {
1046
+ id: "google-ai:gemini-2.5-pro",
1047
+ name: "Gemini 2.5 Pro",
1048
+ description: `Google's most advanced AI model designed for complex reasoning, coding, mathematics, and scientific tasks. Features "thinking" capabilities for superior human-preference alignment and problem-solving.`,
1049
+ input: {
1050
+ maxTokens: 2e5,
1051
+ costPer1MTokens: 1.25
1052
+ },
1053
+ output: {
1054
+ maxTokens: 65536,
1055
+ costPer1MTokens: 10
1056
+ },
1057
+ tags: ["recommended", "reasoning", "agents", "general-purpose", "vision", "coding"],
1058
+ lifecycle: "live"
1059
+ },
1060
+ "google-ai:models/gemini-2.0-flash": {
1061
+ id: "google-ai:models/gemini-2.0-flash",
1062
+ name: "Gemini 2.0 Flash",
1063
+ description: "Next-gen Gemini model with improved capabilities, superior speed, native tool use, multimodal generation, and 1M token context window.",
1064
+ input: {
1065
+ maxTokens: 1048576,
1066
+ costPer1MTokens: 0.1
1067
+ },
1068
+ output: {
1069
+ maxTokens: 8192,
1070
+ costPer1MTokens: 0.4
1071
+ },
1072
+ tags: ["low-cost", "general-purpose", "vision"],
1073
+ lifecycle: "live"
1074
+ },
1075
+ "cerebras:gpt-oss-120b": {
1076
+ id: "cerebras:gpt-oss-120b",
1077
+ name: "GPT-OSS 120B (Preview)",
1078
+ description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
1079
+ input: {
1080
+ maxTokens: 131e3,
1081
+ costPer1MTokens: 0.35
1082
+ },
1083
+ output: {
1084
+ maxTokens: 16e3,
1085
+ costPer1MTokens: 0.75
1086
+ },
1087
+ tags: ["preview", "general-purpose", "reasoning"],
1088
+ lifecycle: "live"
1089
+ },
1090
+ "cerebras:qwen-3-32b": {
1091
+ id: "cerebras:qwen-3-32b",
1092
+ name: "Qwen3 32B",
1093
+ description: "Qwen3-32B is a world-class reasoning model with comparable quality to DeepSeek R1 while outperforming GPT-4.1 and Claude Sonnet 3.7. It excels in code-gen, tool-calling, and advanced reasoning, making it an exceptional model for a wide range of production use cases.",
1094
+ input: {
1095
+ maxTokens: 128e3,
1096
+ costPer1MTokens: 0.4
1097
+ },
1098
+ output: {
1099
+ maxTokens: 16e3,
1100
+ costPer1MTokens: 0.8
1101
+ },
1102
+ tags: ["general-purpose", "reasoning"],
1103
+ lifecycle: "live"
1104
+ },
1105
+ "cerebras:llama-4-scout-17b-16e-instruct": {
1106
+ id: "cerebras:llama-4-scout-17b-16e-instruct",
1107
+ name: "Llama 4 Scout 17B",
1108
+ description: "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, uses 16 experts per forward pass, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages.",
1109
+ input: {
1110
+ maxTokens: 32e3,
1111
+ costPer1MTokens: 0.65
1112
+ },
1113
+ output: {
1114
+ maxTokens: 16e3,
1115
+ costPer1MTokens: 0.85
1116
+ },
1117
+ tags: ["general-purpose", "vision"],
1118
+ lifecycle: "live"
1119
+ },
1120
+ "cerebras:llama3.1-8b": {
1121
+ id: "cerebras:llama3.1-8b",
1122
+ name: "Llama 3.1 8B",
1123
+ description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8B and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
1124
+ input: {
1125
+ maxTokens: 32e3,
1126
+ costPer1MTokens: 0.1
1127
+ },
1128
+ output: {
1129
+ maxTokens: 16e3,
1130
+ costPer1MTokens: 0.1
1131
+ },
1132
+ tags: ["low-cost", "general-purpose"],
1133
+ lifecycle: "live"
1134
+ },
1135
+ "cerebras:llama3.3-70b": {
1136
+ id: "cerebras:llama3.3-70b",
1137
+ name: "Llama 3.3 70B",
1138
+ description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8B and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
1139
+ input: {
1140
+ maxTokens: 128e3,
1141
+ costPer1MTokens: 0.85
1142
+ },
1143
+ output: {
1144
+ maxTokens: 16e3,
1145
+ costPer1MTokens: 1.2
1146
+ },
1147
+ tags: ["general-purpose"],
1148
+ lifecycle: "live"
1149
+ },
1150
+ "groq:openai/gpt-oss-20b": {
1151
+ id: "groq:openai/gpt-oss-20b",
1152
+ name: "GPT-OSS 20B (Preview)",
1153
+ description: "gpt-oss-20b is a compact, open-weight language model optimized for low-latency. It shares the same training foundation and capabilities as the GPT-OSS 120B model, with faster responses and lower cost.",
1154
+ input: {
1155
+ maxTokens: 131e3,
1156
+ costPer1MTokens: 0.1
1157
+ },
1158
+ output: {
1159
+ maxTokens: 32e3,
1160
+ costPer1MTokens: 0.5
1161
+ },
1162
+ tags: ["preview", "general-purpose", "reasoning", "low-cost"],
1163
+ lifecycle: "live"
1164
+ },
1165
+ "groq:openai/gpt-oss-120b": {
1166
+ id: "groq:openai/gpt-oss-120b",
1167
+ name: "GPT-OSS 120B (Preview)",
1168
+ description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
1169
+ input: {
1170
+ maxTokens: 131e3,
1171
+ costPer1MTokens: 0.15
1172
+ },
1173
+ output: {
1174
+ maxTokens: 32e3,
1175
+ costPer1MTokens: 0.75
1176
+ },
1177
+ tags: ["preview", "general-purpose", "reasoning"],
1178
+ lifecycle: "live"
1179
+ },
1180
+ "groq:deepseek-r1-distill-llama-70b": {
1181
+ id: "groq:deepseek-r1-distill-llama-70b",
1182
+ name: "DeepSeek R1-Distill Llama 3.3 70B (Preview)",
1183
+ description: "A fine-tuned version of Llama 3.3 70B using samples generated by DeepSeek-R1, making it smarter than the original Llama 70B, particularly for tasks requiring mathematical and factual precision.",
1184
+ input: {
1185
+ maxTokens: 128e3,
1186
+ costPer1MTokens: 0.75
1187
+ },
1188
+ output: {
1189
+ maxTokens: 32768,
1190
+ costPer1MTokens: 0.99
1191
+ },
1192
+ tags: ["general-purpose", "reasoning", "preview"],
1193
+ lifecycle: "live"
1194
+ },
1195
+ "groq:llama-3.3-70b-versatile": {
1196
+ id: "groq:llama-3.3-70b-versatile",
1197
+ name: "LLaMA 3.3 70B",
1198
+ description: "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.",
1199
+ input: {
1200
+ maxTokens: 128e3,
1201
+ costPer1MTokens: 0.59
1202
+ },
1203
+ output: {
1204
+ maxTokens: 32768,
1205
+ costPer1MTokens: 0.79
1206
+ },
1207
+ tags: ["recommended", "general-purpose", "coding"],
1208
+ lifecycle: "live"
1209
+ },
1210
+ "groq:llama-3.2-1b-preview": {
1211
+ id: "groq:llama-3.2-1b-preview",
1212
+ name: "LLaMA 3.2 1B (Preview)",
1213
+ description: "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
1214
+ input: {
1215
+ maxTokens: 128e3,
1216
+ costPer1MTokens: 0.04
1217
+ },
1218
+ output: {
1219
+ maxTokens: 8192,
1220
+ costPer1MTokens: 0.04
1221
+ },
1222
+ tags: ["low-cost", "deprecated"],
1223
+ lifecycle: "discontinued"
1224
+ },
1225
+ "groq:llama-3.2-3b-preview": {
1226
+ id: "groq:llama-3.2-3b-preview",
1227
+ name: "LLaMA 3.2 3B (Preview)",
1228
+ description: "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
1229
+ input: {
1230
+ maxTokens: 128e3,
1231
+ costPer1MTokens: 0.06
1232
+ },
1233
+ output: {
1234
+ maxTokens: 8192,
1235
+ costPer1MTokens: 0.06
1236
+ },
1237
+ tags: ["low-cost", "general-purpose", "deprecated"],
1238
+ lifecycle: "discontinued"
1239
+ },
1240
+ "groq:llama-3.2-11b-vision-preview": {
1241
+ id: "groq:llama-3.2-11b-vision-preview",
1242
+ name: "LLaMA 3.2 11B Vision (Preview)",
1243
+ description: "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
1244
+ input: {
1245
+ maxTokens: 128e3,
1246
+ costPer1MTokens: 0.18
1247
+ },
1248
+ output: {
1249
+ maxTokens: 8192,
1250
+ costPer1MTokens: 0.18
1251
+ },
1252
+ tags: ["low-cost", "vision", "general-purpose", "deprecated"],
1253
+ lifecycle: "discontinued"
1254
+ },
1255
+ "groq:llama-3.2-90b-vision-preview": {
1256
+ id: "groq:llama-3.2-90b-vision-preview",
1257
+ name: "LLaMA 3.2 90B Vision (Preview)",
1258
+ description: "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
1259
+ input: {
1260
+ maxTokens: 128e3,
1261
+ costPer1MTokens: 0.9
1262
+ },
1263
+ output: {
1264
+ maxTokens: 8192,
1265
+ costPer1MTokens: 0.9
1266
+ },
1267
+ tags: ["vision", "general-purpose", "deprecated"],
1268
+ lifecycle: "discontinued"
1269
+ },
1270
+ "groq:llama-3.1-8b-instant": {
1271
+ id: "groq:llama-3.1-8b-instant",
1272
+ name: "LLaMA 3.1 8B",
1273
+ description: "The Llama 3.1 instruction-tuned, text-only models are optimized for multilingual dialogue use cases.",
1274
+ input: {
1275
+ maxTokens: 128e3,
1276
+ costPer1MTokens: 0.05
1277
+ },
1278
+ output: {
1279
+ maxTokens: 8192,
1280
+ costPer1MTokens: 0.08
1281
+ },
1282
+ tags: ["low-cost", "general-purpose"],
1283
+ lifecycle: "live"
1284
+ },
1285
+ "groq:llama3-8b-8192": {
1286
+ id: "groq:llama3-8b-8192",
1287
+ name: "LLaMA 3 8B",
1288
+ description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
1289
+ input: {
1290
+ maxTokens: 8192,
1291
+ costPer1MTokens: 0.05
1292
+ },
1293
+ output: {
1294
+ maxTokens: 8192,
1295
+ costPer1MTokens: 0.08
1296
+ },
1297
+ tags: ["low-cost", "general-purpose", "deprecated"],
1298
+ lifecycle: "discontinued"
1299
+ },
1300
+ "groq:llama3-70b-8192": {
1301
+ id: "groq:llama3-70b-8192",
1302
+ name: "LLaMA 3 70B",
1303
+ description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
1304
+ input: {
1305
+ maxTokens: 8192,
1306
+ costPer1MTokens: 0.59
1307
+ },
1308
+ output: {
1309
+ maxTokens: 8192,
1310
+ costPer1MTokens: 0.79
1311
+ },
1312
+ tags: ["general-purpose", "deprecated"],
1313
+ lifecycle: "discontinued"
1314
+ },
1315
+ "groq:gemma2-9b-it": {
1316
+ id: "groq:gemma2-9b-it",
1317
+ name: "Gemma2 9B",
1318
+ description: "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
1319
+ input: {
1320
+ maxTokens: 8192,
1321
+ costPer1MTokens: 0.2
1322
+ },
1323
+ output: {
1324
+ maxTokens: 8192,
1325
+ costPer1MTokens: 0.2
1326
+ },
1327
+ tags: ["low-cost", "general-purpose"],
1328
+ lifecycle: "live"
1329
+ },
1330
+ "xai:grok-code-fast-1": {
1331
+ id: "xai:grok-code-fast-1",
1332
+ name: "Grok Code Fast 1",
1333
+ description: "Fast coding-optimized Grok model with large context window.",
1334
+ input: {
1335
+ maxTokens: 256e3,
1336
+ costPer1MTokens: 0.2
1337
+ },
1338
+ output: {
1339
+ maxTokens: 32768,
1340
+ costPer1MTokens: 1.5
1341
+ },
1342
+ tags: ["coding", "general-purpose", "low-cost"],
1343
+ lifecycle: "live"
1344
+ },
1345
+ "xai:grok-4-fast-reasoning": {
1346
+ id: "xai:grok-4-fast-reasoning",
1347
+ name: "Grok 4 Fast (Reasoning)",
1348
+ description: "Advanced fast Grok model with reasoning and very large context.",
1349
+ input: {
1350
+ maxTokens: 2e6,
1351
+ costPer1MTokens: 0.2
1352
+ },
1353
+ output: {
1354
+ maxTokens: 128e3,
1355
+ costPer1MTokens: 0.5
1356
+ },
1357
+ tags: ["reasoning", "recommended", "general-purpose"],
1358
+ lifecycle: "live"
1359
+ },
1360
+ "xai:grok-4-fast-non-reasoning": {
1361
+ id: "xai:grok-4-fast-non-reasoning",
1362
+ name: "Grok 4 Fast (Non-Reasoning)",
1363
+ description: "Fast, cost-effective Grok model for non-reasoning tasks.",
1364
+ input: {
1365
+ maxTokens: 2e6,
1366
+ costPer1MTokens: 0.2
1367
+ },
1368
+ output: {
1369
+ maxTokens: 128e3,
1370
+ costPer1MTokens: 0.5
1371
+ },
1372
+ tags: ["low-cost", "recommended", "general-purpose"],
1373
+ lifecycle: "live"
1374
+ },
1375
+ "xai:grok-4-0709": {
1376
+ id: "xai:grok-4-0709",
1377
+ name: "Grok 4 (0709)",
1378
+ description: "Comprehensive Grok 4 model for general-purpose tasks.",
1379
+ input: {
1380
+ maxTokens: 256e3,
1381
+ costPer1MTokens: 3
1382
+ },
1383
+ output: {
1384
+ maxTokens: 32768,
1385
+ costPer1MTokens: 15
1386
+ },
1387
+ tags: ["reasoning", "general-purpose"],
1388
+ lifecycle: "live"
1389
+ },
1390
+ "xai:grok-3-mini": {
1391
+ id: "xai:grok-3-mini",
1392
+ name: "Grok 3 Mini",
1393
+ description: "Lightweight Grok model for cost-sensitive workloads.",
1394
+ input: {
1395
+ maxTokens: 131072,
1396
+ costPer1MTokens: 0.3
1397
+ },
1398
+ output: {
1399
+ maxTokens: 16384,
1400
+ costPer1MTokens: 0.5
1401
+ },
1402
+ tags: ["low-cost", "general-purpose"],
1403
+ lifecycle: "live"
1404
+ },
1405
+ "xai:grok-3": {
1406
+ id: "xai:grok-3",
1407
+ name: "Grok 3",
1408
+ description: "Enterprise-grade Grok model for general-purpose tasks.",
1409
+ input: {
1410
+ maxTokens: 131072,
1411
+ costPer1MTokens: 3
1412
+ },
1413
+ output: {
1414
+ maxTokens: 16384,
1415
+ costPer1MTokens: 15
1416
+ },
1417
+ tags: ["general-purpose"],
1418
+ lifecycle: "live"
1419
+ },
1420
+ "openrouter:gpt-oss-120b": {
1421
+ id: "openrouter:gpt-oss-120b",
1422
+ name: "GPT-OSS 120B (Preview)",
1423
+ description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
1424
+ input: {
1425
+ maxTokens: 131e3,
1426
+ costPer1MTokens: 0.15
1427
+ },
1428
+ output: {
1429
+ maxTokens: 32e3,
1430
+ costPer1MTokens: 0.75
1431
+ },
1432
+ tags: ["preview", "general-purpose", "reasoning"],
1433
+ lifecycle: "live"
1434
+ },
1435
+ "fireworks-ai:gpt-oss-20b": {
1436
+ id: "fireworks-ai:gpt-oss-20b",
1437
+ name: "GPT-OSS 20B",
1438
+ description: "gpt-oss-20b is a compact, open-weight language model optimized for low-latency. It shares the same training foundation and capabilities as the GPT-OSS 120B model, with faster responses and lower cost.",
1439
+ input: {
1440
+ maxTokens: 128e3,
1441
+ costPer1MTokens: 0.07
1442
+ },
1443
+ output: {
1444
+ maxTokens: 16e3,
1445
+ costPer1MTokens: 0.3
1446
+ },
1447
+ tags: ["general-purpose", "reasoning", "low-cost"],
1448
+ lifecycle: "live",
1449
+ aliases: ["accounts/fireworks/models/gpt-oss-20b"]
1450
+ },
1451
+ "fireworks-ai:gpt-oss-120b": {
1452
+ id: "fireworks-ai:gpt-oss-120b",
1453
+ name: "GPT-OSS 120B",
1454
+ description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
1455
+ input: {
1456
+ maxTokens: 128e3,
1457
+ costPer1MTokens: 0.15
1458
+ },
1459
+ output: {
1460
+ maxTokens: 16e3,
1461
+ costPer1MTokens: 0.6
1462
+ },
1463
+ tags: ["general-purpose", "reasoning"],
1464
+ lifecycle: "live",
1465
+ aliases: ["accounts/fireworks/models/gpt-oss-120b"]
1466
+ },
1467
+ "fireworks-ai:deepseek-r1-0528": {
1468
+ id: "fireworks-ai:deepseek-r1-0528",
1469
+ name: "DeepSeek R1 0528",
1470
+ description: "The updated DeepSeek R1 0528 model delivers major improvements in reasoning, inference, and accuracy through enhanced post-training optimization and greater computational resources. It now performs at a level approaching top-tier models like OpenAI o3 and Gemini 2.5 Pro, with notable gains in complex tasks such as math and programming. The update also reduces hallucinations, improves function calling, and enhances the coding experience.",
1471
+ input: {
1472
+ maxTokens: 16e4,
1473
+ costPer1MTokens: 3
1474
+ },
1475
+ output: {
1476
+ maxTokens: 16384,
1477
+ costPer1MTokens: 8
1478
+ },
1479
+ tags: ["recommended", "reasoning", "general-purpose", "coding"],
1480
+ lifecycle: "live",
1481
+ aliases: ["accounts/fireworks/models/deepseek-r1-0528"]
1482
+ },
1483
+ "fireworks-ai:deepseek-v3-0324": {
1484
+ id: "fireworks-ai:deepseek-v3-0324",
1485
+ name: "DeepSeek V3 0324",
1486
+ description: "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team. It succeeds the DeepSeek V3 model and performs really well on a variety of tasks.",
1487
+ input: {
1488
+ maxTokens: 16e4,
1489
+ costPer1MTokens: 0.9
1490
+ },
1491
+ output: {
1492
+ maxTokens: 16384,
1493
+ costPer1MTokens: 0.9
1494
+ },
1495
+ tags: ["recommended", "general-purpose"],
1496
+ lifecycle: "live",
1497
+ aliases: ["accounts/fireworks/models/deepseek-v3-0324"]
1498
+ },
1499
+ "fireworks-ai:llama4-maverick-instruct-basic": {
1500
+ id: "fireworks-ai:llama4-maverick-instruct-basic",
1501
+ name: "Llama 4 Maverick Instruct (Basic)",
1502
+ description: "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction, and suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.",
1503
+ input: {
1504
+ maxTokens: 1e6,
1505
+ costPer1MTokens: 0.22
1506
+ },
1507
+ output: {
1508
+ maxTokens: 16384,
1509
+ costPer1MTokens: 0.88
1510
+ },
1511
+ tags: ["general-purpose", "vision"],
1512
+ lifecycle: "live",
1513
+ aliases: ["accounts/fireworks/models/llama4-maverick-instruct-basic"]
1514
+ },
1515
+ "fireworks-ai:llama4-scout-instruct-basic": {
1516
+ id: "fireworks-ai:llama4-scout-instruct-basic",
1517
+ name: "Llama 4 Scout Instruct (Basic)",
1518
+ description: "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, uses 16 experts per forward pass, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, it is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks.",
1519
+ input: {
1520
+ maxTokens: 1048576,
1521
+ costPer1MTokens: 0.15
1522
+ },
1523
+ output: {
1524
+ maxTokens: 16384,
1525
+ costPer1MTokens: 0.6
1526
+ },
1527
+ tags: ["general-purpose", "vision"],
1528
+ lifecycle: "live",
1529
+ aliases: ["accounts/fireworks/models/llama4-scout-instruct-basic"]
1530
+ },
1531
+ "fireworks-ai:llama-v3p3-70b-instruct": {
1532
+ id: "fireworks-ai:llama-v3p3-70b-instruct",
1533
+ name: "Llama 3.3 70B Instruct",
1534
+ description: "Llama 3.3 70B Instruct is the December update of Llama 3.1 70B. The model improves upon Llama 3.1 70B (released July 2024) with advances in tool calling, multilingual text support, math and coding. The model achieves industry leading results in reasoning, math and instruction following and provides similar performance as 3.1 405B but with significant speed and cost improvements.",
1535
+ input: {
1536
+ maxTokens: 131072,
1537
+ costPer1MTokens: 0.9
1538
+ },
1539
+ output: {
1540
+ maxTokens: 16384,
1541
+ costPer1MTokens: 0.9
1542
+ },
1543
+ tags: ["general-purpose"],
1544
+ lifecycle: "live",
1545
+ aliases: ["accounts/fireworks/models/llama-v3p3-70b-instruct"]
1546
+ },
1547
+ "fireworks-ai:deepseek-r1": {
1548
+ id: "fireworks-ai:deepseek-r1",
1549
+ name: "DeepSeek R1 (Fast)",
1550
+ description: "This version of the R1 model has a perfect balance between speed and cost-efficiency for real-time interactive experiences, with speeds up to 90 tokens per second.\n\nDeepSeek-R1 is a state-of-the-art large language model optimized with reinforcement learning and cold-start data for exceptional reasoning, math, and code performance. **Note**: This model will always use a temperature of 0.6 as recommended by DeepSeek.",
1551
+ input: {
1552
+ maxTokens: 128e3,
1553
+ costPer1MTokens: 3
1554
+ },
1555
+ output: {
1556
+ maxTokens: 32768,
1557
+ costPer1MTokens: 8
1558
+ },
1559
+ tags: ["reasoning", "general-purpose", "coding"],
1560
+ lifecycle: "live",
1561
+ aliases: ["accounts/fireworks/models/deepseek-r1"]
1562
+ },
1563
+ "fireworks-ai:deepseek-r1-basic": {
1564
+ id: "fireworks-ai:deepseek-r1-basic",
1565
+ name: "DeepSeek R1 (Basic)",
1566
+ description: 'This version of the R1 model is optimized for throughput and cost-effectiveness and has a lower cost but slightly higher latency than the "Fast" version of the model.\n\nDeepSeek-R1 is a state-of-the-art large language model optimized with reinforcement learning and cold-start data for exceptional reasoning, math, and code performance. **Note**: This model will always use a temperature of 0.6 as recommended by DeepSeek.',
1567
+ input: {
1568
+ maxTokens: 128e3,
1569
+ costPer1MTokens: 0.55
1570
+ },
1571
+ output: {
1572
+ maxTokens: 32768,
1573
+ costPer1MTokens: 2.19
1574
+ },
1575
+ tags: ["recommended", "reasoning", "general-purpose", "coding"],
1576
+ lifecycle: "live",
1577
+ aliases: ["accounts/fireworks/models/deepseek-r1-basic"]
1578
+ },
1579
+ "fireworks-ai:deepseek-v3": {
1580
+ id: "fireworks-ai:deepseek-v3",
1581
+ name: "DeepSeek V3",
1582
+ description: "A a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek.",
1583
+ input: {
1584
+ maxTokens: 128e3,
1585
+ costPer1MTokens: 0.9
1586
+ },
1587
+ output: {
1588
+ maxTokens: 8e3,
1589
+ costPer1MTokens: 0.9
1590
+ },
1591
+ tags: ["deprecated", "general-purpose"],
1592
+ lifecycle: "deprecated",
1593
+ aliases: ["accounts/fireworks/models/deepseek-v3"]
1594
+ },
1595
+ "fireworks-ai:llama-v3p1-405b-instruct": {
1596
+ id: "fireworks-ai:llama-v3p1-405b-instruct",
1597
+ name: "Llama 3.1 405B Instruct",
1598
+ description: "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
1599
+ input: {
1600
+ maxTokens: 131072,
1601
+ costPer1MTokens: 3
1602
+ },
1603
+ output: {
1604
+ maxTokens: 131072,
1605
+ costPer1MTokens: 3
1606
+ },
1607
+ tags: ["deprecated", "general-purpose"],
1608
+ lifecycle: "deprecated",
1609
+ aliases: ["accounts/fireworks/models/llama-v3p1-405b-instruct"]
1610
+ },
1611
+ "fireworks-ai:llama-v3p1-70b-instruct": {
1612
+ id: "fireworks-ai:llama-v3p1-70b-instruct",
1613
+ name: "Llama 3.1 70B Instruct",
1614
+ description: "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
1615
+ input: {
1616
+ maxTokens: 131072,
1617
+ costPer1MTokens: 0.9
1618
+ },
1619
+ output: {
1620
+ maxTokens: 131072,
1621
+ costPer1MTokens: 0.9
1622
+ },
1623
+ tags: ["deprecated", "general-purpose"],
1624
+ lifecycle: "deprecated",
1625
+ aliases: ["accounts/fireworks/models/llama-v3p1-70b-instruct"]
1626
+ },
1627
+ "fireworks-ai:llama-v3p1-8b-instruct": {
1628
+ id: "fireworks-ai:llama-v3p1-8b-instruct",
1629
+ name: "Llama 3.1 8B Instruct",
1630
+ description: "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
1631
+ input: {
1632
+ maxTokens: 131072,
1633
+ costPer1MTokens: 0.2
1634
+ },
1635
+ output: {
1636
+ maxTokens: 131072,
1637
+ costPer1MTokens: 0.2
1638
+ },
1639
+ tags: ["low-cost", "general-purpose"],
1640
+ lifecycle: "live",
1641
+ aliases: ["accounts/fireworks/models/llama-v3p1-8b-instruct"]
1642
+ },
1643
+ "fireworks-ai:mixtral-8x22b-instruct": {
1644
+ id: "fireworks-ai:mixtral-8x22b-instruct",
1645
+ name: "Mixtral MoE 8x22B Instruct",
1646
+ description: "Mistral MoE 8x22B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following.",
1647
+ input: {
1648
+ maxTokens: 65536,
1649
+ costPer1MTokens: 1.2
1650
+ },
1651
+ output: {
1652
+ maxTokens: 65536,
1653
+ costPer1MTokens: 1.2
1654
+ },
1655
+ tags: ["general-purpose"],
1656
+ lifecycle: "live",
1657
+ aliases: ["accounts/fireworks/models/mixtral-8x22b-instruct"]
1658
+ },
1659
+ "fireworks-ai:mixtral-8x7b-instruct": {
1660
+ id: "fireworks-ai:mixtral-8x7b-instruct",
1661
+ name: "Mixtral MoE 8x7B Instruct",
1662
+ description: "Mistral MoE 8x7B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following",
1663
+ input: {
1664
+ maxTokens: 32768,
1665
+ costPer1MTokens: 0.5
1666
+ },
1667
+ output: {
1668
+ maxTokens: 32768,
1669
+ costPer1MTokens: 0.5
1670
+ },
1671
+ tags: ["low-cost", "general-purpose"],
1672
+ lifecycle: "live",
1673
+ aliases: ["accounts/fireworks/models/mixtral-8x7b-instruct"]
1674
+ },
1675
+ "fireworks-ai:mythomax-l2-13b": {
1676
+ id: "fireworks-ai:mythomax-l2-13b",
1677
+ name: "MythoMax L2 13b",
1678
+ description: "MythoMax L2 is designed to excel at both roleplaying and storytelling, and is an improved variant of the previous MythoMix model, combining the MythoLogic-L2 and Huginn models.",
1679
+ input: {
1680
+ maxTokens: 4096,
1681
+ costPer1MTokens: 0.2
1682
+ },
1683
+ output: {
1684
+ maxTokens: 4096,
1685
+ costPer1MTokens: 0.2
1686
+ },
1687
+ tags: ["roleplay", "storytelling", "low-cost"],
1688
+ lifecycle: "live",
1689
+ aliases: ["accounts/fireworks/models/mythomax-l2-13b"]
1690
+ },
1691
+ "fireworks-ai:gemma2-9b-it": {
1692
+ id: "fireworks-ai:gemma2-9b-it",
1693
+ name: "Gemma 2 9B Instruct",
1694
+ description: "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
1695
+ input: {
1696
+ maxTokens: 8192,
1697
+ costPer1MTokens: 0.2
1698
+ },
1699
+ output: {
1700
+ maxTokens: 8192,
1701
+ costPer1MTokens: 0.2
1702
+ },
1703
+ tags: ["deprecated", "low-cost", "general-purpose"],
1704
+ lifecycle: "deprecated",
1705
+ aliases: ["accounts/fireworks/models/gemma2-9b-it"]
1706
+ }
1707
+ };
1708
+ var knownTags = [
1709
+ "auto",
1710
+ "best",
1711
+ "fast",
1712
+ "reasoning",
1713
+ "cheapest",
1714
+ "balance",
1715
+ "recommended",
1716
+ "reasoning",
1717
+ "general-purpose",
1718
+ "low-cost",
1719
+ "vision",
1720
+ "coding",
1721
+ "function-calling",
1722
+ "agents",
1723
+ "storytelling",
1724
+ "preview",
1725
+ "roleplay"
1726
+ ];
1727
+ var defaultModel = {
1728
+ id: "",
1729
+ name: "",
1730
+ description: "",
1731
+ input: {
1732
+ costPer1MTokens: 0,
1733
+ maxTokens: 1e6
1734
+ },
1735
+ output: {
1736
+ costPer1MTokens: 0,
1737
+ maxTokens: 1e6
1738
+ },
1739
+ tags: [],
1740
+ lifecycle: "live"
1741
+ };
1742
+
1743
+ // src/cognitive-v2/index.ts
651
1744
  var isBrowser = () => typeof window !== "undefined" && typeof window.fetch === "function";
652
1745
  var CognitiveBeta = class {
653
1746
  _axiosClient;
@@ -682,15 +1775,11 @@ var CognitiveBeta = class {
682
1775
  );
683
1776
  return data;
684
1777
  }
685
- async listModels(input, options = {}) {
686
- const signal = options.signal ?? AbortSignal.timeout(this._timeout);
1778
+ async listModels() {
687
1779
  const { data } = await this._withServerRetry(
688
- () => this._axiosClient.post("/v2/cognitive/models", input, {
689
- signal,
690
- timeout: options.timeout ?? this._timeout
691
- })
1780
+ () => this._axiosClient.get("/v2/cognitive/models")
692
1781
  );
693
- return data;
1782
+ return data.models;
694
1783
  }
695
1784
  async *generateTextStream(request, options = {}) {
696
1785
  const signal = options.signal ?? AbortSignal.timeout(this._timeout);
@@ -800,6 +1889,19 @@ var CognitiveBeta = class {
800
1889
  });
801
1890
  }
802
1891
  };
1892
+ var getCognitiveV2Model = (model) => {
1893
+ if (models[model]) {
1894
+ return models[model];
1895
+ }
1896
+ const alias = Object.values(models).find((x) => x.aliases?.includes(model));
1897
+ if (alias) {
1898
+ return alias;
1899
+ }
1900
+ if (knownTags.includes(model)) {
1901
+ return { ...defaultModel, id: model, name: model };
1902
+ }
1903
+ return void 0;
1904
+ };
803
1905
 
804
1906
  // src/errors.ts
805
1907
  var getActionFromError = (error) => {
@@ -894,7 +1996,7 @@ var scoreModel = (model, type, boosts = {}) => {
894
1996
  const scores = [
895
1997
  ["input price penalty", model.input.costPer1MTokens > InputPricePenalty, -1],
896
1998
  ["output price penalty", model.output.costPer1MTokens > OutputPricePenalty, -1],
897
- ["low tokens penalty", (model.input.maxTokens ?? 0 + model.output.maxTokens ?? 0) < LowTokensPenalty, -1],
1999
+ ["low tokens penalty", (model.input.maxTokens ?? 0) + (model.output.maxTokens ?? 0) < LowTokensPenalty, -1],
898
2000
  ["recommended", isRecommended(model), 2],
899
2001
  ["deprecated", isDeprecated(model), -2],
900
2002
  ["vision support", hasVisionSupport(model), 1],
@@ -917,10 +2019,10 @@ var scoreModel = (model, type, boosts = {}) => {
917
2019
  }
918
2020
  return score;
919
2021
  };
920
- var getBestModels = (models, boosts = {}) => models.sort((a, b) => scoreModel(b, "best", boosts) - scoreModel(a, "best", boosts));
921
- var getFastModels = (models, boosts = {}) => models.sort((a, b) => scoreModel(b, "fast", boosts) - scoreModel(a, "fast", boosts));
922
- var pickModel = (models, downtimes = []) => {
923
- const copy = [...models];
2022
+ var getBestModels = (models2, boosts = {}) => models2.sort((a, b) => scoreModel(b, "best", boosts) - scoreModel(a, "best", boosts));
2023
+ var getFastModels = (models2, boosts = {}) => models2.sort((a, b) => scoreModel(b, "fast", boosts) - scoreModel(a, "fast", boosts));
2024
+ var pickModel = (models2, downtimes = []) => {
2025
+ const copy = [...models2];
924
2026
  const elasped = (date) => (/* @__PURE__ */ new Date()).getTime() - new Date(date).getTime();
925
2027
  const DOWNTIME_THRESHOLD = 1e3 * 60 * DOWNTIME_THRESHOLD_MINUTES;
926
2028
  if (!copy.length) {
@@ -935,7 +2037,7 @@ var pickModel = (models, downtimes = []) => {
935
2037
  return ref;
936
2038
  }
937
2039
  }
938
- throw new Error(`All models are down: ${models.join(", ")}`);
2040
+ throw new Error(`All models are down: ${models2.join(", ")}`);
939
2041
  };
940
2042
  var ModelProvider = class {
941
2043
  };
@@ -959,7 +2061,7 @@ var RemoteModelProvider = class extends ModelProvider {
959
2061
  }
960
2062
  async fetchInstalledModels() {
961
2063
  const integrationNames = await this._fetchInstalledIntegrationNames();
962
- const models = [];
2064
+ const models2 = [];
963
2065
  await Promise.allSettled(
964
2066
  integrationNames.map(async (integration) => {
965
2067
  const { output } = await this._client.callAction({
@@ -971,7 +2073,7 @@ var RemoteModelProvider = class extends ModelProvider {
971
2073
  }
972
2074
  for (const model of output.models) {
973
2075
  if (model.name && model.id && model.input && model.tags) {
974
- models.push({
2076
+ models2.push({
975
2077
  ref: `${integration}:${model.id}`,
976
2078
  integration,
977
2079
  id: model.id,
@@ -985,7 +2087,7 @@ var RemoteModelProvider = class extends ModelProvider {
985
2087
  }
986
2088
  })
987
2089
  );
988
- return models;
2090
+ return models2;
989
2091
  }
990
2092
  async fetchModelPreferences() {
991
2093
  try {
@@ -1096,10 +2198,10 @@ var Cognitive = class _Cognitive {
1096
2198
  if (this._preferences) {
1097
2199
  return this._preferences;
1098
2200
  }
1099
- const models = await this.fetchInstalledModels();
2201
+ const models2 = await this.fetchInstalledModels();
1100
2202
  this._preferences = {
1101
- best: getBestModels(models).map((m) => m.ref),
1102
- fast: getFastModels(models).map((m) => m.ref),
2203
+ best: getBestModels(models2).map((m) => m.ref),
2204
+ fast: getFastModels(models2).map((m) => m.ref),
1103
2205
  downtimes: []
1104
2206
  };
1105
2207
  await this._provider.saveModelPreferences(this._preferences);
@@ -1138,6 +2240,12 @@ var Cognitive = class _Cognitive {
1138
2240
  return parseRef(pickModel([ref, ...preferences.best, ...preferences.fast], downtimes));
1139
2241
  }
1140
2242
  async getModelDetails(model) {
2243
+ if (this._useBeta) {
2244
+ const resolvedModel = getCognitiveV2Model(model);
2245
+ if (resolvedModel) {
2246
+ return { ...resolvedModel, ref: resolvedModel.id, integration: "cognitive-v2" };
2247
+ }
2248
+ }
1141
2249
  await this.fetchInstalledModels();
1142
2250
  const { integration, model: modelName } = await this._selectModel(model);
1143
2251
  const def = this._models.find((m) => m.integration === integration && (m.name === modelName || m.id === modelName));
@@ -1147,7 +2255,7 @@ var Cognitive = class _Cognitive {
1147
2255
  return def;
1148
2256
  }
1149
2257
  async generateContent(input) {
1150
- if (!this._useBeta) {
2258
+ if (!this._useBeta || !getCognitiveV2Model(input.model)) {
1151
2259
  return this._generateContent(input);
1152
2260
  }
1153
2261
  const betaClient = new CognitiveBeta(this._client.config);
@@ -1267,6 +2375,7 @@ var Cognitive = class _Cognitive {
1267
2375
  Cognitive,
1268
2376
  CognitiveBeta,
1269
2377
  ModelProvider,
1270
- RemoteModelProvider
2378
+ RemoteModelProvider,
2379
+ getCognitiveV2Model
1271
2380
  });
1272
2381
  //# sourceMappingURL=index.cjs.map