@botpress/cognitive 0.1.41 → 0.1.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +7 -7
- package/dist/index.cjs +1042 -23
- package/dist/index.cjs.map +4 -4
- package/dist/index.d.ts +26 -6
- package/dist/index.mjs +1040 -22
- package/dist/index.mjs.map +4 -4
- package/package.json +3 -2
- package/refresh-models.ts +100 -0
package/dist/index.cjs
CHANGED
|
@@ -581,7 +581,8 @@ __export(index_exports, {
|
|
|
581
581
|
Cognitive: () => Cognitive,
|
|
582
582
|
CognitiveBeta: () => CognitiveBeta,
|
|
583
583
|
ModelProvider: () => ModelProvider,
|
|
584
|
-
RemoteModelProvider: () => RemoteModelProvider
|
|
584
|
+
RemoteModelProvider: () => RemoteModelProvider,
|
|
585
|
+
getCognitiveV2Model: () => getCognitiveV2Model
|
|
585
586
|
});
|
|
586
587
|
module.exports = __toCommonJS(index_exports);
|
|
587
588
|
|
|
@@ -645,9 +646,1011 @@ var getExtendedClient = (_client) => {
|
|
|
645
646
|
};
|
|
646
647
|
};
|
|
647
648
|
|
|
648
|
-
// src/
|
|
649
|
+
// src/cognitive-v2/index.ts
|
|
649
650
|
var import_axios = __toESM(require("axios"));
|
|
650
651
|
var import_exponential_backoff = __toESM(require_backoff());
|
|
652
|
+
|
|
653
|
+
// src/cognitive-v2/models.ts
|
|
654
|
+
var models = {
|
|
655
|
+
"openai:gpt-5-2025-08-07": {
|
|
656
|
+
id: "openai:gpt-5-2025-08-07",
|
|
657
|
+
name: "GPT-5",
|
|
658
|
+
description: "GPT-5 is OpenAI's latest and most advanced AI model. It is a reasoning model that chooses the best way to respond based on task complexity and user intent. GPT-5 delivers expert-level performance across coding, math, writing, health, and visual perception, with improved accuracy, speed, and reduced hallucinations. It excels in complex tasks, long-context understanding, multimodal inputs (text and images), and safe, nuanced responses.",
|
|
659
|
+
input: {
|
|
660
|
+
maxTokens: 4e5,
|
|
661
|
+
costPer1MTokens: 1.25
|
|
662
|
+
},
|
|
663
|
+
output: {
|
|
664
|
+
maxTokens: 128e3,
|
|
665
|
+
costPer1MTokens: 10
|
|
666
|
+
},
|
|
667
|
+
tags: ["recommended", "reasoning", "general-purpose"],
|
|
668
|
+
lifecycle: "live"
|
|
669
|
+
},
|
|
670
|
+
"openai:gpt-5-mini-2025-08-07": {
|
|
671
|
+
id: "openai:gpt-5-mini-2025-08-07",
|
|
672
|
+
name: "GPT-5 Mini",
|
|
673
|
+
description: "GPT-5 Mini is a lightweight and cost-effective version of GPT-5, optimized for applications where speed and efficiency matter more than full advanced capabilities. It is designed for cost-sensitive use cases such as chatbots, content generation, and high-volume usage, striking a balance between performance and affordability, making it suitable for simpler tasks that do not require deep multi-step reasoning or the full reasoning power of GPT-5",
|
|
674
|
+
input: {
|
|
675
|
+
maxTokens: 4e5,
|
|
676
|
+
costPer1MTokens: 0.25
|
|
677
|
+
},
|
|
678
|
+
output: {
|
|
679
|
+
maxTokens: 128e3,
|
|
680
|
+
costPer1MTokens: 2
|
|
681
|
+
},
|
|
682
|
+
tags: ["recommended", "reasoning", "general-purpose"],
|
|
683
|
+
lifecycle: "live"
|
|
684
|
+
},
|
|
685
|
+
"openai:gpt-5-nano-2025-08-07": {
|
|
686
|
+
id: "openai:gpt-5-nano-2025-08-07",
|
|
687
|
+
name: "GPT-5 Nano",
|
|
688
|
+
description: "GPT-5 Nano is an ultra-lightweight version of GPT-5 optimized for speed and very low latency, making it ideal for use cases like simple chatbots, basic content generation, summarization, and classification tasks.",
|
|
689
|
+
input: {
|
|
690
|
+
maxTokens: 4e5,
|
|
691
|
+
costPer1MTokens: 0.05
|
|
692
|
+
},
|
|
693
|
+
output: {
|
|
694
|
+
maxTokens: 128e3,
|
|
695
|
+
costPer1MTokens: 0.4
|
|
696
|
+
},
|
|
697
|
+
tags: ["low-cost", "reasoning", "general-purpose"],
|
|
698
|
+
lifecycle: "live"
|
|
699
|
+
},
|
|
700
|
+
"openai:o4-mini-2025-04-16": {
|
|
701
|
+
id: "openai:o4-mini-2025-04-16",
|
|
702
|
+
name: "GPT o4-mini",
|
|
703
|
+
description: "o4-mini is OpenAI's latest small o-series model. It's optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks.",
|
|
704
|
+
input: {
|
|
705
|
+
maxTokens: 2e5,
|
|
706
|
+
costPer1MTokens: 1.1
|
|
707
|
+
},
|
|
708
|
+
output: {
|
|
709
|
+
maxTokens: 1e5,
|
|
710
|
+
costPer1MTokens: 4.4
|
|
711
|
+
},
|
|
712
|
+
tags: ["reasoning", "vision", "coding"],
|
|
713
|
+
lifecycle: "live"
|
|
714
|
+
},
|
|
715
|
+
"openai:o3-2025-04-16": {
|
|
716
|
+
id: "openai:o3-2025-04-16",
|
|
717
|
+
name: "GPT o3",
|
|
718
|
+
description: "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following.",
|
|
719
|
+
input: {
|
|
720
|
+
maxTokens: 2e5,
|
|
721
|
+
costPer1MTokens: 2
|
|
722
|
+
},
|
|
723
|
+
output: {
|
|
724
|
+
maxTokens: 1e5,
|
|
725
|
+
costPer1MTokens: 8
|
|
726
|
+
},
|
|
727
|
+
tags: ["reasoning", "vision", "coding"],
|
|
728
|
+
lifecycle: "live"
|
|
729
|
+
},
|
|
730
|
+
"openai:gpt-4.1-2025-04-14": {
|
|
731
|
+
id: "openai:gpt-4.1-2025-04-14",
|
|
732
|
+
name: "GPT 4.1",
|
|
733
|
+
description: "GPT 4.1 is our flagship model for complex tasks. It is well suited for problem solving across domains. The knowledge cutoff is June 2024.",
|
|
734
|
+
input: {
|
|
735
|
+
maxTokens: 1047576,
|
|
736
|
+
costPer1MTokens: 2
|
|
737
|
+
},
|
|
738
|
+
output: {
|
|
739
|
+
maxTokens: 32768,
|
|
740
|
+
costPer1MTokens: 8
|
|
741
|
+
},
|
|
742
|
+
tags: ["recommended", "vision", "general-purpose"],
|
|
743
|
+
lifecycle: "live"
|
|
744
|
+
},
|
|
745
|
+
"openai:gpt-4.1-mini-2025-04-14": {
|
|
746
|
+
id: "openai:gpt-4.1-mini-2025-04-14",
|
|
747
|
+
name: "GPT 4.1 Mini",
|
|
748
|
+
description: "GPT 4.1 mini provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases. The knowledge cutoff is June 2024.",
|
|
749
|
+
input: {
|
|
750
|
+
maxTokens: 1047576,
|
|
751
|
+
costPer1MTokens: 0.4
|
|
752
|
+
},
|
|
753
|
+
output: {
|
|
754
|
+
maxTokens: 32768,
|
|
755
|
+
costPer1MTokens: 1.6
|
|
756
|
+
},
|
|
757
|
+
tags: ["recommended", "vision", "general-purpose"],
|
|
758
|
+
lifecycle: "live"
|
|
759
|
+
},
|
|
760
|
+
"openai:gpt-4.1-nano-2025-04-14": {
|
|
761
|
+
id: "openai:gpt-4.1-nano-2025-04-14",
|
|
762
|
+
name: "GPT 4.1 Nano",
|
|
763
|
+
description: "GPT-4.1 nano is the fastest, most cost-effective GPT 4.1 model. The knowledge cutoff is June 2024.",
|
|
764
|
+
input: {
|
|
765
|
+
maxTokens: 1047576,
|
|
766
|
+
costPer1MTokens: 0.1
|
|
767
|
+
},
|
|
768
|
+
output: {
|
|
769
|
+
maxTokens: 32768,
|
|
770
|
+
costPer1MTokens: 0.4
|
|
771
|
+
},
|
|
772
|
+
tags: ["low-cost", "vision", "general-purpose"],
|
|
773
|
+
lifecycle: "live"
|
|
774
|
+
},
|
|
775
|
+
"openai:o3-mini-2025-01-31": {
|
|
776
|
+
id: "openai:o3-mini-2025-01-31",
|
|
777
|
+
name: "GPT o3-mini",
|
|
778
|
+
description: "o3-mini is the most recent small reasoning model from OpenAI, providing high intelligence at the same cost and latency targets of o1-mini. Also supports key developer features like Structured Outputs and function calling.",
|
|
779
|
+
input: {
|
|
780
|
+
maxTokens: 2e5,
|
|
781
|
+
costPer1MTokens: 1.1
|
|
782
|
+
},
|
|
783
|
+
output: {
|
|
784
|
+
maxTokens: 1e5,
|
|
785
|
+
costPer1MTokens: 4.4
|
|
786
|
+
},
|
|
787
|
+
tags: ["reasoning", "general-purpose", "coding"],
|
|
788
|
+
lifecycle: "live"
|
|
789
|
+
},
|
|
790
|
+
"openai:o1-2024-12-17": {
|
|
791
|
+
id: "openai:o1-2024-12-17",
|
|
792
|
+
name: "GPT o1",
|
|
793
|
+
description: "The o1 model is designed to solve hard problems across domains. Trained with reinforcement learning to perform complex reasoning with a long internal chain of thought.",
|
|
794
|
+
input: {
|
|
795
|
+
maxTokens: 2e5,
|
|
796
|
+
costPer1MTokens: 15
|
|
797
|
+
},
|
|
798
|
+
output: {
|
|
799
|
+
maxTokens: 1e5,
|
|
800
|
+
costPer1MTokens: 60
|
|
801
|
+
},
|
|
802
|
+
tags: ["reasoning", "vision", "general-purpose"],
|
|
803
|
+
lifecycle: "live"
|
|
804
|
+
},
|
|
805
|
+
"openai:o1-mini-2024-09-12": {
|
|
806
|
+
id: "openai:o1-mini-2024-09-12",
|
|
807
|
+
name: "GPT o1-mini",
|
|
808
|
+
description: "The o1-mini model is a fast and affordable reasoning model for specialized tasks. Trained with reinforcement learning to perform complex reasoning.",
|
|
809
|
+
input: {
|
|
810
|
+
maxTokens: 128e3,
|
|
811
|
+
costPer1MTokens: 1.1
|
|
812
|
+
},
|
|
813
|
+
output: {
|
|
814
|
+
maxTokens: 65536,
|
|
815
|
+
costPer1MTokens: 4.4
|
|
816
|
+
},
|
|
817
|
+
tags: ["reasoning", "vision", "general-purpose"],
|
|
818
|
+
lifecycle: "live"
|
|
819
|
+
},
|
|
820
|
+
"openai:gpt-4o-mini-2024-07-18": {
|
|
821
|
+
id: "openai:gpt-4o-mini-2024-07-18",
|
|
822
|
+
name: "GPT-4o Mini",
|
|
823
|
+
description: "GPT-4o mini is OpenAI's most advanced model in the small models category, and their cheapest model yet. Multimodal with higher intelligence than gpt-3.5-turbo but just as fast.",
|
|
824
|
+
input: {
|
|
825
|
+
maxTokens: 128e3,
|
|
826
|
+
costPer1MTokens: 0.15
|
|
827
|
+
},
|
|
828
|
+
output: {
|
|
829
|
+
maxTokens: 16384,
|
|
830
|
+
costPer1MTokens: 0.6
|
|
831
|
+
},
|
|
832
|
+
tags: ["recommended", "vision", "low-cost", "general-purpose", "function-calling"],
|
|
833
|
+
lifecycle: "live"
|
|
834
|
+
},
|
|
835
|
+
"openai:gpt-4o-2024-11-20": {
|
|
836
|
+
id: "openai:gpt-4o-2024-11-20",
|
|
837
|
+
name: "GPT-4o (November 2024)",
|
|
838
|
+
description: "GPT-4o is OpenAI's most advanced model. Multimodal with the same high intelligence as GPT-4 Turbo but cheaper and more efficient.",
|
|
839
|
+
input: {
|
|
840
|
+
maxTokens: 128e3,
|
|
841
|
+
costPer1MTokens: 2.5
|
|
842
|
+
},
|
|
843
|
+
output: {
|
|
844
|
+
maxTokens: 16384,
|
|
845
|
+
costPer1MTokens: 10
|
|
846
|
+
},
|
|
847
|
+
tags: ["recommended", "vision", "general-purpose", "coding", "agents", "function-calling"],
|
|
848
|
+
lifecycle: "live"
|
|
849
|
+
},
|
|
850
|
+
"openai:gpt-4o-2024-08-06": {
|
|
851
|
+
id: "openai:gpt-4o-2024-08-06",
|
|
852
|
+
name: "GPT-4o (August 2024)",
|
|
853
|
+
description: "GPT-4o is OpenAI's most advanced model. Multimodal with the same high intelligence as GPT-4 Turbo but cheaper and more efficient.",
|
|
854
|
+
input: {
|
|
855
|
+
maxTokens: 128e3,
|
|
856
|
+
costPer1MTokens: 2.5
|
|
857
|
+
},
|
|
858
|
+
output: {
|
|
859
|
+
maxTokens: 16384,
|
|
860
|
+
costPer1MTokens: 10
|
|
861
|
+
},
|
|
862
|
+
tags: ["deprecated", "vision", "general-purpose", "coding", "agents", "function-calling"],
|
|
863
|
+
lifecycle: "deprecated"
|
|
864
|
+
},
|
|
865
|
+
"openai:gpt-4o-2024-05-13": {
|
|
866
|
+
id: "openai:gpt-4o-2024-05-13",
|
|
867
|
+
name: "GPT-4o (May 2024)",
|
|
868
|
+
description: "GPT-4o is OpenAI's most advanced model. Multimodal with the same high intelligence as GPT-4 Turbo but cheaper and more efficient.",
|
|
869
|
+
input: {
|
|
870
|
+
maxTokens: 128e3,
|
|
871
|
+
costPer1MTokens: 5
|
|
872
|
+
},
|
|
873
|
+
output: {
|
|
874
|
+
maxTokens: 4096,
|
|
875
|
+
costPer1MTokens: 15
|
|
876
|
+
},
|
|
877
|
+
tags: ["deprecated", "vision", "general-purpose", "coding", "agents", "function-calling"],
|
|
878
|
+
lifecycle: "deprecated"
|
|
879
|
+
},
|
|
880
|
+
"openai:gpt-4-turbo-2024-04-09": {
|
|
881
|
+
id: "openai:gpt-4-turbo-2024-04-09",
|
|
882
|
+
name: "GPT-4 Turbo",
|
|
883
|
+
description: "GPT-4 is a large multimodal model that can solve difficult problems with greater accuracy than previous models, thanks to its broader general knowledge and advanced reasoning capabilities.",
|
|
884
|
+
input: {
|
|
885
|
+
maxTokens: 128e3,
|
|
886
|
+
costPer1MTokens: 10
|
|
887
|
+
},
|
|
888
|
+
output: {
|
|
889
|
+
maxTokens: 4096,
|
|
890
|
+
costPer1MTokens: 30
|
|
891
|
+
},
|
|
892
|
+
tags: ["deprecated", "general-purpose", "coding", "agents", "function-calling"],
|
|
893
|
+
lifecycle: "deprecated"
|
|
894
|
+
},
|
|
895
|
+
"openai:gpt-3.5-turbo-0125": {
|
|
896
|
+
id: "openai:gpt-3.5-turbo-0125",
|
|
897
|
+
name: "GPT-3.5 Turbo",
|
|
898
|
+
description: "GPT-3.5 Turbo can understand and generate natural language or code and has been optimized for chat but works well for non-chat tasks as well.",
|
|
899
|
+
input: {
|
|
900
|
+
maxTokens: 128e3,
|
|
901
|
+
costPer1MTokens: 0.5
|
|
902
|
+
},
|
|
903
|
+
output: {
|
|
904
|
+
maxTokens: 4096,
|
|
905
|
+
costPer1MTokens: 1.5
|
|
906
|
+
},
|
|
907
|
+
tags: ["deprecated", "general-purpose", "low-cost"],
|
|
908
|
+
lifecycle: "deprecated"
|
|
909
|
+
},
|
|
910
|
+
"anthropic:claude-sonnet-4-20250514": {
|
|
911
|
+
id: "anthropic:claude-sonnet-4-20250514",
|
|
912
|
+
name: "Claude Sonnet 4",
|
|
913
|
+
description: "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions.",
|
|
914
|
+
input: {
|
|
915
|
+
maxTokens: 2e5,
|
|
916
|
+
costPer1MTokens: 3
|
|
917
|
+
},
|
|
918
|
+
output: {
|
|
919
|
+
maxTokens: 64e3,
|
|
920
|
+
costPer1MTokens: 15
|
|
921
|
+
},
|
|
922
|
+
tags: ["recommended", "reasoning", "agents", "vision", "general-purpose", "coding"],
|
|
923
|
+
lifecycle: "live"
|
|
924
|
+
},
|
|
925
|
+
"anthropic:claude-sonnet-4-reasoning-20250514": {
|
|
926
|
+
id: "anthropic:claude-sonnet-4-reasoning-20250514",
|
|
927
|
+
name: "Claude Sonnet 4 (Reasoning Mode)",
|
|
928
|
+
description: 'This model uses the "Extended Thinking" mode and will use a significantly higher amount of output tokens than the Standard Mode, so this model should only be used for tasks that actually require it.\n\nClaude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability.',
|
|
929
|
+
input: {
|
|
930
|
+
maxTokens: 2e5,
|
|
931
|
+
costPer1MTokens: 3
|
|
932
|
+
},
|
|
933
|
+
output: {
|
|
934
|
+
maxTokens: 64e3,
|
|
935
|
+
costPer1MTokens: 15
|
|
936
|
+
},
|
|
937
|
+
tags: ["deprecated", "vision", "reasoning", "general-purpose", "agents", "coding"],
|
|
938
|
+
lifecycle: "deprecated"
|
|
939
|
+
},
|
|
940
|
+
"anthropic:claude-3-7-sonnet-20250219": {
|
|
941
|
+
id: "anthropic:claude-3-7-sonnet-20250219",
|
|
942
|
+
name: "Claude 3.7 Sonnet",
|
|
943
|
+
description: "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes.",
|
|
944
|
+
input: {
|
|
945
|
+
maxTokens: 2e5,
|
|
946
|
+
costPer1MTokens: 3
|
|
947
|
+
},
|
|
948
|
+
output: {
|
|
949
|
+
maxTokens: 64e3,
|
|
950
|
+
costPer1MTokens: 15
|
|
951
|
+
},
|
|
952
|
+
tags: ["recommended", "reasoning", "agents", "vision", "general-purpose", "coding"],
|
|
953
|
+
lifecycle: "live"
|
|
954
|
+
},
|
|
955
|
+
"anthropic:claude-3-7-sonnet-reasoning-20250219": {
|
|
956
|
+
id: "anthropic:claude-3-7-sonnet-reasoning-20250219",
|
|
957
|
+
name: "Claude 3.7 Sonnet (Reasoning Mode)",
|
|
958
|
+
description: 'This model uses the "Extended Thinking" mode and will use a significantly higher amount of output tokens than the Standard Mode, so this model should only be used for tasks that actually require it.\n\nClaude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities.',
|
|
959
|
+
input: {
|
|
960
|
+
maxTokens: 2e5,
|
|
961
|
+
costPer1MTokens: 3
|
|
962
|
+
},
|
|
963
|
+
output: {
|
|
964
|
+
maxTokens: 64e3,
|
|
965
|
+
costPer1MTokens: 15
|
|
966
|
+
},
|
|
967
|
+
tags: ["deprecated", "vision", "reasoning", "general-purpose", "agents", "coding"],
|
|
968
|
+
lifecycle: "deprecated"
|
|
969
|
+
},
|
|
970
|
+
"anthropic:claude-3-5-haiku-20241022": {
|
|
971
|
+
id: "anthropic:claude-3-5-haiku-20241022",
|
|
972
|
+
name: "Claude 3.5 Haiku",
|
|
973
|
+
description: "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.",
|
|
974
|
+
input: {
|
|
975
|
+
maxTokens: 2e5,
|
|
976
|
+
costPer1MTokens: 0.8
|
|
977
|
+
},
|
|
978
|
+
output: {
|
|
979
|
+
maxTokens: 8192,
|
|
980
|
+
costPer1MTokens: 4
|
|
981
|
+
},
|
|
982
|
+
tags: ["general-purpose", "low-cost"],
|
|
983
|
+
lifecycle: "live"
|
|
984
|
+
},
|
|
985
|
+
"anthropic:claude-3-5-sonnet-20241022": {
|
|
986
|
+
id: "anthropic:claude-3-5-sonnet-20241022",
|
|
987
|
+
name: "Claude 3.5 Sonnet (October 2024)",
|
|
988
|
+
description: "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at coding, data science, visual processing, and agentic tasks.",
|
|
989
|
+
input: {
|
|
990
|
+
maxTokens: 2e5,
|
|
991
|
+
costPer1MTokens: 3
|
|
992
|
+
},
|
|
993
|
+
output: {
|
|
994
|
+
maxTokens: 8192,
|
|
995
|
+
costPer1MTokens: 15
|
|
996
|
+
},
|
|
997
|
+
tags: ["vision", "general-purpose", "agents", "coding", "function-calling", "storytelling"],
|
|
998
|
+
lifecycle: "live"
|
|
999
|
+
},
|
|
1000
|
+
"anthropic:claude-3-5-sonnet-20240620": {
|
|
1001
|
+
id: "anthropic:claude-3-5-sonnet-20240620",
|
|
1002
|
+
name: "Claude 3.5 Sonnet (June 2024)",
|
|
1003
|
+
description: "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at coding, data science, visual processing, and agentic tasks.",
|
|
1004
|
+
input: {
|
|
1005
|
+
maxTokens: 2e5,
|
|
1006
|
+
costPer1MTokens: 3
|
|
1007
|
+
},
|
|
1008
|
+
output: {
|
|
1009
|
+
maxTokens: 4096,
|
|
1010
|
+
costPer1MTokens: 15
|
|
1011
|
+
},
|
|
1012
|
+
tags: ["vision", "general-purpose", "agents", "coding", "function-calling", "storytelling"],
|
|
1013
|
+
lifecycle: "live"
|
|
1014
|
+
},
|
|
1015
|
+
"anthropic:claude-3-haiku-20240307": {
|
|
1016
|
+
id: "anthropic:claude-3-haiku-20240307",
|
|
1017
|
+
name: "Claude 3 Haiku",
|
|
1018
|
+
description: "Claude 3 Haiku is Anthropic's fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance.",
|
|
1019
|
+
input: {
|
|
1020
|
+
maxTokens: 2e5,
|
|
1021
|
+
costPer1MTokens: 0.25
|
|
1022
|
+
},
|
|
1023
|
+
output: {
|
|
1024
|
+
maxTokens: 4096,
|
|
1025
|
+
costPer1MTokens: 1.25
|
|
1026
|
+
},
|
|
1027
|
+
tags: ["low-cost", "general-purpose"],
|
|
1028
|
+
lifecycle: "live"
|
|
1029
|
+
},
|
|
1030
|
+
"google-ai:gemini-2.5-flash": {
|
|
1031
|
+
id: "google-ai:gemini-2.5-flash",
|
|
1032
|
+
name: "Gemini 2.5 Flash",
|
|
1033
|
+
description: `Google's state-of-the-art workhorse model with advanced reasoning, coding, mathematics, and scientific capabilities. Includes built-in "thinking" capabilities for enhanced accuracy.`,
|
|
1034
|
+
input: {
|
|
1035
|
+
maxTokens: 1048576,
|
|
1036
|
+
costPer1MTokens: 0.3
|
|
1037
|
+
},
|
|
1038
|
+
output: {
|
|
1039
|
+
maxTokens: 65536,
|
|
1040
|
+
costPer1MTokens: 2.5
|
|
1041
|
+
},
|
|
1042
|
+
tags: ["recommended", "reasoning", "agents", "general-purpose", "vision"],
|
|
1043
|
+
lifecycle: "live"
|
|
1044
|
+
},
|
|
1045
|
+
"google-ai:gemini-2.5-pro": {
|
|
1046
|
+
id: "google-ai:gemini-2.5-pro",
|
|
1047
|
+
name: "Gemini 2.5 Pro",
|
|
1048
|
+
description: `Google's most advanced AI model designed for complex reasoning, coding, mathematics, and scientific tasks. Features "thinking" capabilities for superior human-preference alignment and problem-solving.`,
|
|
1049
|
+
input: {
|
|
1050
|
+
maxTokens: 2e5,
|
|
1051
|
+
costPer1MTokens: 1.25
|
|
1052
|
+
},
|
|
1053
|
+
output: {
|
|
1054
|
+
maxTokens: 65536,
|
|
1055
|
+
costPer1MTokens: 10
|
|
1056
|
+
},
|
|
1057
|
+
tags: ["recommended", "reasoning", "agents", "general-purpose", "vision", "coding"],
|
|
1058
|
+
lifecycle: "live"
|
|
1059
|
+
},
|
|
1060
|
+
"google-ai:models/gemini-2.0-flash": {
|
|
1061
|
+
id: "google-ai:models/gemini-2.0-flash",
|
|
1062
|
+
name: "Gemini 2.0 Flash",
|
|
1063
|
+
description: "Next-gen Gemini model with improved capabilities, superior speed, native tool use, multimodal generation, and 1M token context window.",
|
|
1064
|
+
input: {
|
|
1065
|
+
maxTokens: 1048576,
|
|
1066
|
+
costPer1MTokens: 0.1
|
|
1067
|
+
},
|
|
1068
|
+
output: {
|
|
1069
|
+
maxTokens: 8192,
|
|
1070
|
+
costPer1MTokens: 0.4
|
|
1071
|
+
},
|
|
1072
|
+
tags: ["low-cost", "general-purpose", "vision"],
|
|
1073
|
+
lifecycle: "live"
|
|
1074
|
+
},
|
|
1075
|
+
"cerebras:gpt-oss-120b": {
|
|
1076
|
+
id: "cerebras:gpt-oss-120b",
|
|
1077
|
+
name: "GPT-OSS 120B (Preview)",
|
|
1078
|
+
description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
|
|
1079
|
+
input: {
|
|
1080
|
+
maxTokens: 131e3,
|
|
1081
|
+
costPer1MTokens: 0.35
|
|
1082
|
+
},
|
|
1083
|
+
output: {
|
|
1084
|
+
maxTokens: 16e3,
|
|
1085
|
+
costPer1MTokens: 0.75
|
|
1086
|
+
},
|
|
1087
|
+
tags: ["preview", "general-purpose", "reasoning"],
|
|
1088
|
+
lifecycle: "live"
|
|
1089
|
+
},
|
|
1090
|
+
"cerebras:qwen-3-32b": {
|
|
1091
|
+
id: "cerebras:qwen-3-32b",
|
|
1092
|
+
name: "Qwen3 32B",
|
|
1093
|
+
description: "Qwen3-32B is a world-class reasoning model with comparable quality to DeepSeek R1 while outperforming GPT-4.1 and Claude Sonnet 3.7. It excels in code-gen, tool-calling, and advanced reasoning, making it an exceptional model for a wide range of production use cases.",
|
|
1094
|
+
input: {
|
|
1095
|
+
maxTokens: 128e3,
|
|
1096
|
+
costPer1MTokens: 0.4
|
|
1097
|
+
},
|
|
1098
|
+
output: {
|
|
1099
|
+
maxTokens: 16e3,
|
|
1100
|
+
costPer1MTokens: 0.8
|
|
1101
|
+
},
|
|
1102
|
+
tags: ["general-purpose", "reasoning"],
|
|
1103
|
+
lifecycle: "live"
|
|
1104
|
+
},
|
|
1105
|
+
"cerebras:llama-4-scout-17b-16e-instruct": {
|
|
1106
|
+
id: "cerebras:llama-4-scout-17b-16e-instruct",
|
|
1107
|
+
name: "Llama 4 Scout 17B",
|
|
1108
|
+
description: "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, uses 16 experts per forward pass, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages.",
|
|
1109
|
+
input: {
|
|
1110
|
+
maxTokens: 32e3,
|
|
1111
|
+
costPer1MTokens: 0.65
|
|
1112
|
+
},
|
|
1113
|
+
output: {
|
|
1114
|
+
maxTokens: 16e3,
|
|
1115
|
+
costPer1MTokens: 0.85
|
|
1116
|
+
},
|
|
1117
|
+
tags: ["general-purpose", "vision"],
|
|
1118
|
+
lifecycle: "live"
|
|
1119
|
+
},
|
|
1120
|
+
"cerebras:llama3.1-8b": {
|
|
1121
|
+
id: "cerebras:llama3.1-8b",
|
|
1122
|
+
name: "Llama 3.1 8B",
|
|
1123
|
+
description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8B and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
|
|
1124
|
+
input: {
|
|
1125
|
+
maxTokens: 32e3,
|
|
1126
|
+
costPer1MTokens: 0.1
|
|
1127
|
+
},
|
|
1128
|
+
output: {
|
|
1129
|
+
maxTokens: 16e3,
|
|
1130
|
+
costPer1MTokens: 0.1
|
|
1131
|
+
},
|
|
1132
|
+
tags: ["low-cost", "general-purpose"],
|
|
1133
|
+
lifecycle: "live"
|
|
1134
|
+
},
|
|
1135
|
+
"cerebras:llama3.3-70b": {
|
|
1136
|
+
id: "cerebras:llama3.3-70b",
|
|
1137
|
+
name: "Llama 3.3 70B",
|
|
1138
|
+
description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8B and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
|
|
1139
|
+
input: {
|
|
1140
|
+
maxTokens: 128e3,
|
|
1141
|
+
costPer1MTokens: 0.85
|
|
1142
|
+
},
|
|
1143
|
+
output: {
|
|
1144
|
+
maxTokens: 16e3,
|
|
1145
|
+
costPer1MTokens: 1.2
|
|
1146
|
+
},
|
|
1147
|
+
tags: ["general-purpose"],
|
|
1148
|
+
lifecycle: "live"
|
|
1149
|
+
},
|
|
1150
|
+
"groq:openai/gpt-oss-20b": {
|
|
1151
|
+
id: "groq:openai/gpt-oss-20b",
|
|
1152
|
+
name: "GPT-OSS 20B (Preview)",
|
|
1153
|
+
description: "gpt-oss-20b is a compact, open-weight language model optimized for low-latency. It shares the same training foundation and capabilities as the GPT-OSS 120B model, with faster responses and lower cost.",
|
|
1154
|
+
input: {
|
|
1155
|
+
maxTokens: 131e3,
|
|
1156
|
+
costPer1MTokens: 0.1
|
|
1157
|
+
},
|
|
1158
|
+
output: {
|
|
1159
|
+
maxTokens: 32e3,
|
|
1160
|
+
costPer1MTokens: 0.5
|
|
1161
|
+
},
|
|
1162
|
+
tags: ["preview", "general-purpose", "reasoning", "low-cost"],
|
|
1163
|
+
lifecycle: "live"
|
|
1164
|
+
},
|
|
1165
|
+
"groq:openai/gpt-oss-120b": {
|
|
1166
|
+
id: "groq:openai/gpt-oss-120b",
|
|
1167
|
+
name: "GPT-OSS 120B (Preview)",
|
|
1168
|
+
description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
|
|
1169
|
+
input: {
|
|
1170
|
+
maxTokens: 131e3,
|
|
1171
|
+
costPer1MTokens: 0.15
|
|
1172
|
+
},
|
|
1173
|
+
output: {
|
|
1174
|
+
maxTokens: 32e3,
|
|
1175
|
+
costPer1MTokens: 0.75
|
|
1176
|
+
},
|
|
1177
|
+
tags: ["preview", "general-purpose", "reasoning"],
|
|
1178
|
+
lifecycle: "live"
|
|
1179
|
+
},
|
|
1180
|
+
"groq:deepseek-r1-distill-llama-70b": {
|
|
1181
|
+
id: "groq:deepseek-r1-distill-llama-70b",
|
|
1182
|
+
name: "DeepSeek R1-Distill Llama 3.3 70B (Preview)",
|
|
1183
|
+
description: "A fine-tuned version of Llama 3.3 70B using samples generated by DeepSeek-R1, making it smarter than the original Llama 70B, particularly for tasks requiring mathematical and factual precision.",
|
|
1184
|
+
input: {
|
|
1185
|
+
maxTokens: 128e3,
|
|
1186
|
+
costPer1MTokens: 0.75
|
|
1187
|
+
},
|
|
1188
|
+
output: {
|
|
1189
|
+
maxTokens: 32768,
|
|
1190
|
+
costPer1MTokens: 0.99
|
|
1191
|
+
},
|
|
1192
|
+
tags: ["general-purpose", "reasoning", "preview"],
|
|
1193
|
+
lifecycle: "live"
|
|
1194
|
+
},
|
|
1195
|
+
"groq:llama-3.3-70b-versatile": {
|
|
1196
|
+
id: "groq:llama-3.3-70b-versatile",
|
|
1197
|
+
name: "LLaMA 3.3 70B",
|
|
1198
|
+
description: "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.",
|
|
1199
|
+
input: {
|
|
1200
|
+
maxTokens: 128e3,
|
|
1201
|
+
costPer1MTokens: 0.59
|
|
1202
|
+
},
|
|
1203
|
+
output: {
|
|
1204
|
+
maxTokens: 32768,
|
|
1205
|
+
costPer1MTokens: 0.79
|
|
1206
|
+
},
|
|
1207
|
+
tags: ["recommended", "general-purpose", "coding"],
|
|
1208
|
+
lifecycle: "live"
|
|
1209
|
+
},
|
|
1210
|
+
"groq:llama-3.2-1b-preview": {
|
|
1211
|
+
id: "groq:llama-3.2-1b-preview",
|
|
1212
|
+
name: "LLaMA 3.2 1B (Preview)",
|
|
1213
|
+
description: "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
|
|
1214
|
+
input: {
|
|
1215
|
+
maxTokens: 128e3,
|
|
1216
|
+
costPer1MTokens: 0.04
|
|
1217
|
+
},
|
|
1218
|
+
output: {
|
|
1219
|
+
maxTokens: 8192,
|
|
1220
|
+
costPer1MTokens: 0.04
|
|
1221
|
+
},
|
|
1222
|
+
tags: ["low-cost", "deprecated"],
|
|
1223
|
+
lifecycle: "discontinued"
|
|
1224
|
+
},
|
|
1225
|
+
"groq:llama-3.2-3b-preview": {
|
|
1226
|
+
id: "groq:llama-3.2-3b-preview",
|
|
1227
|
+
name: "LLaMA 3.2 3B (Preview)",
|
|
1228
|
+
description: "The Llama 3.2 instruction-tuned, text-only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
|
|
1229
|
+
input: {
|
|
1230
|
+
maxTokens: 128e3,
|
|
1231
|
+
costPer1MTokens: 0.06
|
|
1232
|
+
},
|
|
1233
|
+
output: {
|
|
1234
|
+
maxTokens: 8192,
|
|
1235
|
+
costPer1MTokens: 0.06
|
|
1236
|
+
},
|
|
1237
|
+
tags: ["low-cost", "general-purpose", "deprecated"],
|
|
1238
|
+
lifecycle: "discontinued"
|
|
1239
|
+
},
|
|
1240
|
+
"groq:llama-3.2-11b-vision-preview": {
|
|
1241
|
+
id: "groq:llama-3.2-11b-vision-preview",
|
|
1242
|
+
name: "LLaMA 3.2 11B Vision (Preview)",
|
|
1243
|
+
description: "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
|
|
1244
|
+
input: {
|
|
1245
|
+
maxTokens: 128e3,
|
|
1246
|
+
costPer1MTokens: 0.18
|
|
1247
|
+
},
|
|
1248
|
+
output: {
|
|
1249
|
+
maxTokens: 8192,
|
|
1250
|
+
costPer1MTokens: 0.18
|
|
1251
|
+
},
|
|
1252
|
+
tags: ["low-cost", "vision", "general-purpose", "deprecated"],
|
|
1253
|
+
lifecycle: "discontinued"
|
|
1254
|
+
},
|
|
1255
|
+
"groq:llama-3.2-90b-vision-preview": {
|
|
1256
|
+
id: "groq:llama-3.2-90b-vision-preview",
|
|
1257
|
+
name: "LLaMA 3.2 90B Vision (Preview)",
|
|
1258
|
+
description: "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
|
|
1259
|
+
input: {
|
|
1260
|
+
maxTokens: 128e3,
|
|
1261
|
+
costPer1MTokens: 0.9
|
|
1262
|
+
},
|
|
1263
|
+
output: {
|
|
1264
|
+
maxTokens: 8192,
|
|
1265
|
+
costPer1MTokens: 0.9
|
|
1266
|
+
},
|
|
1267
|
+
tags: ["vision", "general-purpose", "deprecated"],
|
|
1268
|
+
lifecycle: "discontinued"
|
|
1269
|
+
},
|
|
1270
|
+
"groq:llama-3.1-8b-instant": {
|
|
1271
|
+
id: "groq:llama-3.1-8b-instant",
|
|
1272
|
+
name: "LLaMA 3.1 8B",
|
|
1273
|
+
description: "The Llama 3.1 instruction-tuned, text-only models are optimized for multilingual dialogue use cases.",
|
|
1274
|
+
input: {
|
|
1275
|
+
maxTokens: 128e3,
|
|
1276
|
+
costPer1MTokens: 0.05
|
|
1277
|
+
},
|
|
1278
|
+
output: {
|
|
1279
|
+
maxTokens: 8192,
|
|
1280
|
+
costPer1MTokens: 0.08
|
|
1281
|
+
},
|
|
1282
|
+
tags: ["low-cost", "general-purpose"],
|
|
1283
|
+
lifecycle: "live"
|
|
1284
|
+
},
|
|
1285
|
+
"groq:llama3-8b-8192": {
|
|
1286
|
+
id: "groq:llama3-8b-8192",
|
|
1287
|
+
name: "LLaMA 3 8B",
|
|
1288
|
+
description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
|
|
1289
|
+
input: {
|
|
1290
|
+
maxTokens: 8192,
|
|
1291
|
+
costPer1MTokens: 0.05
|
|
1292
|
+
},
|
|
1293
|
+
output: {
|
|
1294
|
+
maxTokens: 8192,
|
|
1295
|
+
costPer1MTokens: 0.08
|
|
1296
|
+
},
|
|
1297
|
+
tags: ["low-cost", "general-purpose", "deprecated"],
|
|
1298
|
+
lifecycle: "discontinued"
|
|
1299
|
+
},
|
|
1300
|
+
"groq:llama3-70b-8192": {
|
|
1301
|
+
id: "groq:llama3-70b-8192",
|
|
1302
|
+
name: "LLaMA 3 70B",
|
|
1303
|
+
description: "Meta developed and released the Meta Llama 3 family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks.",
|
|
1304
|
+
input: {
|
|
1305
|
+
maxTokens: 8192,
|
|
1306
|
+
costPer1MTokens: 0.59
|
|
1307
|
+
},
|
|
1308
|
+
output: {
|
|
1309
|
+
maxTokens: 8192,
|
|
1310
|
+
costPer1MTokens: 0.79
|
|
1311
|
+
},
|
|
1312
|
+
tags: ["general-purpose", "deprecated"],
|
|
1313
|
+
lifecycle: "discontinued"
|
|
1314
|
+
},
|
|
1315
|
+
"groq:gemma2-9b-it": {
|
|
1316
|
+
id: "groq:gemma2-9b-it",
|
|
1317
|
+
name: "Gemma2 9B",
|
|
1318
|
+
description: "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
|
|
1319
|
+
input: {
|
|
1320
|
+
maxTokens: 8192,
|
|
1321
|
+
costPer1MTokens: 0.2
|
|
1322
|
+
},
|
|
1323
|
+
output: {
|
|
1324
|
+
maxTokens: 8192,
|
|
1325
|
+
costPer1MTokens: 0.2
|
|
1326
|
+
},
|
|
1327
|
+
tags: ["low-cost", "general-purpose"],
|
|
1328
|
+
lifecycle: "live"
|
|
1329
|
+
},
|
|
1330
|
+
"openrouter:gpt-oss-120b": {
|
|
1331
|
+
id: "openrouter:gpt-oss-120b",
|
|
1332
|
+
name: "GPT-OSS 120B (Preview)",
|
|
1333
|
+
description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
|
|
1334
|
+
input: {
|
|
1335
|
+
maxTokens: 131e3,
|
|
1336
|
+
costPer1MTokens: 0.15
|
|
1337
|
+
},
|
|
1338
|
+
output: {
|
|
1339
|
+
maxTokens: 32e3,
|
|
1340
|
+
costPer1MTokens: 0.75
|
|
1341
|
+
},
|
|
1342
|
+
tags: ["preview", "general-purpose", "reasoning"],
|
|
1343
|
+
lifecycle: "live"
|
|
1344
|
+
},
|
|
1345
|
+
"fireworks:gpt-oss-20b": {
|
|
1346
|
+
id: "fireworks:gpt-oss-20b",
|
|
1347
|
+
name: "GPT-OSS 20B",
|
|
1348
|
+
description: "gpt-oss-20b is a compact, open-weight language model optimized for low-latency. It shares the same training foundation and capabilities as the GPT-OSS 120B model, with faster responses and lower cost.",
|
|
1349
|
+
input: {
|
|
1350
|
+
maxTokens: 128e3,
|
|
1351
|
+
costPer1MTokens: 0.07
|
|
1352
|
+
},
|
|
1353
|
+
output: {
|
|
1354
|
+
maxTokens: 16e3,
|
|
1355
|
+
costPer1MTokens: 0.3
|
|
1356
|
+
},
|
|
1357
|
+
tags: ["general-purpose", "reasoning", "low-cost"],
|
|
1358
|
+
lifecycle: "live",
|
|
1359
|
+
aliases: ["accounts/fireworks/models/gpt-oss-20b"]
|
|
1360
|
+
},
|
|
1361
|
+
"fireworks:gpt-oss-120b": {
|
|
1362
|
+
id: "fireworks:gpt-oss-120b",
|
|
1363
|
+
name: "GPT-OSS 120B",
|
|
1364
|
+
description: "gpt-oss-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.",
|
|
1365
|
+
input: {
|
|
1366
|
+
maxTokens: 128e3,
|
|
1367
|
+
costPer1MTokens: 0.15
|
|
1368
|
+
},
|
|
1369
|
+
output: {
|
|
1370
|
+
maxTokens: 16e3,
|
|
1371
|
+
costPer1MTokens: 0.6
|
|
1372
|
+
},
|
|
1373
|
+
tags: ["general-purpose", "reasoning"],
|
|
1374
|
+
lifecycle: "live",
|
|
1375
|
+
aliases: ["accounts/fireworks/models/gpt-oss-120b"]
|
|
1376
|
+
},
|
|
1377
|
+
"fireworks:deepseek-r1-0528": {
|
|
1378
|
+
id: "fireworks:deepseek-r1-0528",
|
|
1379
|
+
name: "DeepSeek R1 0528",
|
|
1380
|
+
description: "The updated DeepSeek R1 0528 model delivers major improvements in reasoning, inference, and accuracy through enhanced post-training optimization and greater computational resources. It now performs at a level approaching top-tier models like OpenAI o3 and Gemini 2.5 Pro, with notable gains in complex tasks such as math and programming. The update also reduces hallucinations, improves function calling, and enhances the coding experience.",
|
|
1381
|
+
input: {
|
|
1382
|
+
maxTokens: 16e4,
|
|
1383
|
+
costPer1MTokens: 3
|
|
1384
|
+
},
|
|
1385
|
+
output: {
|
|
1386
|
+
maxTokens: 16384,
|
|
1387
|
+
costPer1MTokens: 8
|
|
1388
|
+
},
|
|
1389
|
+
tags: ["recommended", "reasoning", "general-purpose", "coding"],
|
|
1390
|
+
lifecycle: "live",
|
|
1391
|
+
aliases: ["accounts/fireworks/models/deepseek-r1-0528"]
|
|
1392
|
+
},
|
|
1393
|
+
"fireworks:deepseek-v3-0324": {
|
|
1394
|
+
id: "fireworks:deepseek-v3-0324",
|
|
1395
|
+
name: "DeepSeek V3 0324",
|
|
1396
|
+
description: "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team. It succeeds the DeepSeek V3 model and performs really well on a variety of tasks.",
|
|
1397
|
+
input: {
|
|
1398
|
+
maxTokens: 16e4,
|
|
1399
|
+
costPer1MTokens: 0.9
|
|
1400
|
+
},
|
|
1401
|
+
output: {
|
|
1402
|
+
maxTokens: 16384,
|
|
1403
|
+
costPer1MTokens: 0.9
|
|
1404
|
+
},
|
|
1405
|
+
tags: ["recommended", "general-purpose"],
|
|
1406
|
+
lifecycle: "live",
|
|
1407
|
+
aliases: ["accounts/fireworks/models/deepseek-v3-0324"]
|
|
1408
|
+
},
|
|
1409
|
+
"fireworks:llama4-maverick-instruct-basic": {
|
|
1410
|
+
id: "fireworks:llama4-maverick-instruct-basic",
|
|
1411
|
+
name: "Llama 4 Maverick Instruct (Basic)",
|
|
1412
|
+
description: "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction, and suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.",
|
|
1413
|
+
input: {
|
|
1414
|
+
maxTokens: 1e6,
|
|
1415
|
+
costPer1MTokens: 0.22
|
|
1416
|
+
},
|
|
1417
|
+
output: {
|
|
1418
|
+
maxTokens: 16384,
|
|
1419
|
+
costPer1MTokens: 0.88
|
|
1420
|
+
},
|
|
1421
|
+
tags: ["general-purpose", "vision"],
|
|
1422
|
+
lifecycle: "live",
|
|
1423
|
+
aliases: ["accounts/fireworks/models/llama4-maverick-instruct-basic"]
|
|
1424
|
+
},
|
|
1425
|
+
"fireworks:llama4-scout-instruct-basic": {
|
|
1426
|
+
id: "fireworks:llama4-scout-instruct-basic",
|
|
1427
|
+
name: "Llama 4 Scout Instruct (Basic)",
|
|
1428
|
+
description: "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, uses 16 experts per forward pass, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, it is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks.",
|
|
1429
|
+
input: {
|
|
1430
|
+
maxTokens: 1048576,
|
|
1431
|
+
costPer1MTokens: 0.15
|
|
1432
|
+
},
|
|
1433
|
+
output: {
|
|
1434
|
+
maxTokens: 16384,
|
|
1435
|
+
costPer1MTokens: 0.6
|
|
1436
|
+
},
|
|
1437
|
+
tags: ["general-purpose", "vision"],
|
|
1438
|
+
lifecycle: "live",
|
|
1439
|
+
aliases: ["accounts/fireworks/models/llama4-scout-instruct-basic"]
|
|
1440
|
+
},
|
|
1441
|
+
"fireworks:llama-v3p3-70b-instruct": {
|
|
1442
|
+
id: "fireworks:llama-v3p3-70b-instruct",
|
|
1443
|
+
name: "Llama 3.3 70B Instruct",
|
|
1444
|
+
description: "Llama 3.3 70B Instruct is the December update of Llama 3.1 70B. The model improves upon Llama 3.1 70B (released July 2024) with advances in tool calling, multilingual text support, math and coding. The model achieves industry leading results in reasoning, math and instruction following and provides similar performance as 3.1 405B but with significant speed and cost improvements.",
|
|
1445
|
+
input: {
|
|
1446
|
+
maxTokens: 131072,
|
|
1447
|
+
costPer1MTokens: 0.9
|
|
1448
|
+
},
|
|
1449
|
+
output: {
|
|
1450
|
+
maxTokens: 16384,
|
|
1451
|
+
costPer1MTokens: 0.9
|
|
1452
|
+
},
|
|
1453
|
+
tags: ["general-purpose"],
|
|
1454
|
+
lifecycle: "live",
|
|
1455
|
+
aliases: ["accounts/fireworks/models/llama-v3p3-70b-instruct"]
|
|
1456
|
+
},
|
|
1457
|
+
"fireworks:deepseek-r1": {
|
|
1458
|
+
id: "fireworks:deepseek-r1",
|
|
1459
|
+
name: "DeepSeek R1 (Fast)",
|
|
1460
|
+
description: "This version of the R1 model has a perfect balance between speed and cost-efficiency for real-time interactive experiences, with speeds up to 90 tokens per second.\n\nDeepSeek-R1 is a state-of-the-art large language model optimized with reinforcement learning and cold-start data for exceptional reasoning, math, and code performance. **Note**: This model will always use a temperature of 0.6 as recommended by DeepSeek.",
|
|
1461
|
+
input: {
|
|
1462
|
+
maxTokens: 128e3,
|
|
1463
|
+
costPer1MTokens: 3
|
|
1464
|
+
},
|
|
1465
|
+
output: {
|
|
1466
|
+
maxTokens: 32768,
|
|
1467
|
+
costPer1MTokens: 8
|
|
1468
|
+
},
|
|
1469
|
+
tags: ["reasoning", "general-purpose", "coding"],
|
|
1470
|
+
lifecycle: "live",
|
|
1471
|
+
aliases: ["accounts/fireworks/models/deepseek-r1"]
|
|
1472
|
+
},
|
|
1473
|
+
"fireworks:deepseek-r1-basic": {
|
|
1474
|
+
id: "fireworks:deepseek-r1-basic",
|
|
1475
|
+
name: "DeepSeek R1 (Basic)",
|
|
1476
|
+
description: 'This version of the R1 model is optimized for throughput and cost-effectiveness and has a lower cost but slightly higher latency than the "Fast" version of the model.\n\nDeepSeek-R1 is a state-of-the-art large language model optimized with reinforcement learning and cold-start data for exceptional reasoning, math, and code performance. **Note**: This model will always use a temperature of 0.6 as recommended by DeepSeek.',
|
|
1477
|
+
input: {
|
|
1478
|
+
maxTokens: 128e3,
|
|
1479
|
+
costPer1MTokens: 0.55
|
|
1480
|
+
},
|
|
1481
|
+
output: {
|
|
1482
|
+
maxTokens: 32768,
|
|
1483
|
+
costPer1MTokens: 2.19
|
|
1484
|
+
},
|
|
1485
|
+
tags: ["recommended", "reasoning", "general-purpose", "coding"],
|
|
1486
|
+
lifecycle: "live",
|
|
1487
|
+
aliases: ["accounts/fireworks/models/deepseek-r1-basic"]
|
|
1488
|
+
},
|
|
1489
|
+
"fireworks:deepseek-v3": {
|
|
1490
|
+
id: "fireworks:deepseek-v3",
|
|
1491
|
+
name: "DeepSeek V3",
|
|
1492
|
+
description: "A a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek.",
|
|
1493
|
+
input: {
|
|
1494
|
+
maxTokens: 128e3,
|
|
1495
|
+
costPer1MTokens: 0.9
|
|
1496
|
+
},
|
|
1497
|
+
output: {
|
|
1498
|
+
maxTokens: 8e3,
|
|
1499
|
+
costPer1MTokens: 0.9
|
|
1500
|
+
},
|
|
1501
|
+
tags: ["deprecated", "general-purpose"],
|
|
1502
|
+
lifecycle: "deprecated",
|
|
1503
|
+
aliases: ["accounts/fireworks/models/deepseek-v3"]
|
|
1504
|
+
},
|
|
1505
|
+
"fireworks:llama-v3p1-405b-instruct": {
|
|
1506
|
+
id: "fireworks:llama-v3p1-405b-instruct",
|
|
1507
|
+
name: "Llama 3.1 405B Instruct",
|
|
1508
|
+
description: "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
|
|
1509
|
+
input: {
|
|
1510
|
+
maxTokens: 131072,
|
|
1511
|
+
costPer1MTokens: 3
|
|
1512
|
+
},
|
|
1513
|
+
output: {
|
|
1514
|
+
maxTokens: 131072,
|
|
1515
|
+
costPer1MTokens: 3
|
|
1516
|
+
},
|
|
1517
|
+
tags: ["deprecated", "general-purpose"],
|
|
1518
|
+
lifecycle: "deprecated",
|
|
1519
|
+
aliases: ["accounts/fireworks/models/llama-v3p1-405b-instruct"]
|
|
1520
|
+
},
|
|
1521
|
+
"fireworks:llama-v3p1-70b-instruct": {
|
|
1522
|
+
id: "fireworks:llama-v3p1-70b-instruct",
|
|
1523
|
+
name: "Llama 3.1 70B Instruct",
|
|
1524
|
+
description: "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
|
|
1525
|
+
input: {
|
|
1526
|
+
maxTokens: 131072,
|
|
1527
|
+
costPer1MTokens: 0.9
|
|
1528
|
+
},
|
|
1529
|
+
output: {
|
|
1530
|
+
maxTokens: 131072,
|
|
1531
|
+
costPer1MTokens: 0.9
|
|
1532
|
+
},
|
|
1533
|
+
tags: ["deprecated", "general-purpose"],
|
|
1534
|
+
lifecycle: "deprecated",
|
|
1535
|
+
aliases: ["accounts/fireworks/models/llama-v3p1-70b-instruct"]
|
|
1536
|
+
},
|
|
1537
|
+
"fireworks:llama-v3p1-8b-instruct": {
|
|
1538
|
+
id: "fireworks:llama-v3p1-8b-instruct",
|
|
1539
|
+
name: "Llama 3.1 8B Instruct",
|
|
1540
|
+
description: "The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes. The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.",
|
|
1541
|
+
input: {
|
|
1542
|
+
maxTokens: 131072,
|
|
1543
|
+
costPer1MTokens: 0.2
|
|
1544
|
+
},
|
|
1545
|
+
output: {
|
|
1546
|
+
maxTokens: 131072,
|
|
1547
|
+
costPer1MTokens: 0.2
|
|
1548
|
+
},
|
|
1549
|
+
tags: ["low-cost", "general-purpose"],
|
|
1550
|
+
lifecycle: "live",
|
|
1551
|
+
aliases: ["accounts/fireworks/models/llama-v3p1-8b-instruct"]
|
|
1552
|
+
},
|
|
1553
|
+
"fireworks:mixtral-8x22b-instruct": {
|
|
1554
|
+
id: "fireworks:mixtral-8x22b-instruct",
|
|
1555
|
+
name: "Mixtral MoE 8x22B Instruct",
|
|
1556
|
+
description: "Mistral MoE 8x22B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following.",
|
|
1557
|
+
input: {
|
|
1558
|
+
maxTokens: 65536,
|
|
1559
|
+
costPer1MTokens: 1.2
|
|
1560
|
+
},
|
|
1561
|
+
output: {
|
|
1562
|
+
maxTokens: 65536,
|
|
1563
|
+
costPer1MTokens: 1.2
|
|
1564
|
+
},
|
|
1565
|
+
tags: ["general-purpose"],
|
|
1566
|
+
lifecycle: "live",
|
|
1567
|
+
aliases: ["accounts/fireworks/models/mixtral-8x22b-instruct"]
|
|
1568
|
+
},
|
|
1569
|
+
"fireworks:mixtral-8x7b-instruct": {
|
|
1570
|
+
id: "fireworks:mixtral-8x7b-instruct",
|
|
1571
|
+
name: "Mixtral MoE 8x7B Instruct",
|
|
1572
|
+
description: "Mistral MoE 8x7B Instruct v0.1 model with Sparse Mixture of Experts. Fine tuned for instruction following",
|
|
1573
|
+
input: {
|
|
1574
|
+
maxTokens: 32768,
|
|
1575
|
+
costPer1MTokens: 0.5
|
|
1576
|
+
},
|
|
1577
|
+
output: {
|
|
1578
|
+
maxTokens: 32768,
|
|
1579
|
+
costPer1MTokens: 0.5
|
|
1580
|
+
},
|
|
1581
|
+
tags: ["low-cost", "general-purpose"],
|
|
1582
|
+
lifecycle: "live",
|
|
1583
|
+
aliases: ["accounts/fireworks/models/mixtral-8x7b-instruct"]
|
|
1584
|
+
},
|
|
1585
|
+
"fireworks:mythomax-l2-13b": {
|
|
1586
|
+
id: "fireworks:mythomax-l2-13b",
|
|
1587
|
+
name: "MythoMax L2 13b",
|
|
1588
|
+
description: "MythoMax L2 is designed to excel at both roleplaying and storytelling, and is an improved variant of the previous MythoMix model, combining the MythoLogic-L2 and Huginn models.",
|
|
1589
|
+
input: {
|
|
1590
|
+
maxTokens: 4096,
|
|
1591
|
+
costPer1MTokens: 0.2
|
|
1592
|
+
},
|
|
1593
|
+
output: {
|
|
1594
|
+
maxTokens: 4096,
|
|
1595
|
+
costPer1MTokens: 0.2
|
|
1596
|
+
},
|
|
1597
|
+
tags: ["roleplay", "storytelling", "low-cost"],
|
|
1598
|
+
lifecycle: "live",
|
|
1599
|
+
aliases: ["accounts/fireworks/models/mythomax-l2-13b"]
|
|
1600
|
+
},
|
|
1601
|
+
"fireworks:gemma2-9b-it": {
|
|
1602
|
+
id: "fireworks:gemma2-9b-it",
|
|
1603
|
+
name: "Gemma 2 9B Instruct",
|
|
1604
|
+
description: "Redesigned for outsized performance and unmatched efficiency, Gemma 2 optimizes for blazing-fast inference on diverse hardware. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.",
|
|
1605
|
+
input: {
|
|
1606
|
+
maxTokens: 8192,
|
|
1607
|
+
costPer1MTokens: 0.2
|
|
1608
|
+
},
|
|
1609
|
+
output: {
|
|
1610
|
+
maxTokens: 8192,
|
|
1611
|
+
costPer1MTokens: 0.2
|
|
1612
|
+
},
|
|
1613
|
+
tags: ["deprecated", "low-cost", "general-purpose"],
|
|
1614
|
+
lifecycle: "deprecated",
|
|
1615
|
+
aliases: ["accounts/fireworks/models/gemma2-9b-it"]
|
|
1616
|
+
}
|
|
1617
|
+
};
|
|
1618
|
+
var knownTags = [
|
|
1619
|
+
"auto",
|
|
1620
|
+
"best",
|
|
1621
|
+
"fast",
|
|
1622
|
+
"reasoning",
|
|
1623
|
+
"cheapest",
|
|
1624
|
+
"balance",
|
|
1625
|
+
"recommended",
|
|
1626
|
+
"reasoning",
|
|
1627
|
+
"general-purpose",
|
|
1628
|
+
"low-cost",
|
|
1629
|
+
"vision",
|
|
1630
|
+
"coding",
|
|
1631
|
+
"function-calling",
|
|
1632
|
+
"agents",
|
|
1633
|
+
"storytelling",
|
|
1634
|
+
"preview",
|
|
1635
|
+
"roleplay"
|
|
1636
|
+
];
|
|
1637
|
+
var defaultModel = {
|
|
1638
|
+
id: "",
|
|
1639
|
+
name: "",
|
|
1640
|
+
description: "",
|
|
1641
|
+
input: {
|
|
1642
|
+
costPer1MTokens: 0,
|
|
1643
|
+
maxTokens: 1e6
|
|
1644
|
+
},
|
|
1645
|
+
output: {
|
|
1646
|
+
costPer1MTokens: 0,
|
|
1647
|
+
maxTokens: 1e6
|
|
1648
|
+
},
|
|
1649
|
+
tags: [],
|
|
1650
|
+
lifecycle: "live"
|
|
1651
|
+
};
|
|
1652
|
+
|
|
1653
|
+
// src/cognitive-v2/index.ts
|
|
651
1654
|
var isBrowser = () => typeof window !== "undefined" && typeof window.fetch === "function";
|
|
652
1655
|
var CognitiveBeta = class {
|
|
653
1656
|
_axiosClient;
|
|
@@ -682,15 +1685,11 @@ var CognitiveBeta = class {
|
|
|
682
1685
|
);
|
|
683
1686
|
return data;
|
|
684
1687
|
}
|
|
685
|
-
async listModels(
|
|
686
|
-
const signal = options.signal ?? AbortSignal.timeout(this._timeout);
|
|
1688
|
+
async listModels() {
|
|
687
1689
|
const { data } = await this._withServerRetry(
|
|
688
|
-
() => this._axiosClient.
|
|
689
|
-
signal,
|
|
690
|
-
timeout: options.timeout ?? this._timeout
|
|
691
|
-
})
|
|
1690
|
+
() => this._axiosClient.get("/v2/cognitive/models")
|
|
692
1691
|
);
|
|
693
|
-
return data;
|
|
1692
|
+
return data.models;
|
|
694
1693
|
}
|
|
695
1694
|
async *generateTextStream(request, options = {}) {
|
|
696
1695
|
const signal = options.signal ?? AbortSignal.timeout(this._timeout);
|
|
@@ -800,6 +1799,19 @@ var CognitiveBeta = class {
|
|
|
800
1799
|
});
|
|
801
1800
|
}
|
|
802
1801
|
};
|
|
1802
|
+
var getCognitiveV2Model = (model) => {
|
|
1803
|
+
if (models[model]) {
|
|
1804
|
+
return models[model];
|
|
1805
|
+
}
|
|
1806
|
+
const alias = Object.values(models).find((x) => x.aliases?.includes(model));
|
|
1807
|
+
if (alias) {
|
|
1808
|
+
return alias;
|
|
1809
|
+
}
|
|
1810
|
+
if (knownTags.includes(model)) {
|
|
1811
|
+
return { ...defaultModel, id: model, name: model };
|
|
1812
|
+
}
|
|
1813
|
+
return void 0;
|
|
1814
|
+
};
|
|
803
1815
|
|
|
804
1816
|
// src/errors.ts
|
|
805
1817
|
var getActionFromError = (error) => {
|
|
@@ -894,7 +1906,7 @@ var scoreModel = (model, type, boosts = {}) => {
|
|
|
894
1906
|
const scores = [
|
|
895
1907
|
["input price penalty", model.input.costPer1MTokens > InputPricePenalty, -1],
|
|
896
1908
|
["output price penalty", model.output.costPer1MTokens > OutputPricePenalty, -1],
|
|
897
|
-
["low tokens penalty", (model.input.maxTokens ?? 0 + model.output.maxTokens ?? 0) < LowTokensPenalty, -1],
|
|
1909
|
+
["low tokens penalty", (model.input.maxTokens ?? 0) + (model.output.maxTokens ?? 0) < LowTokensPenalty, -1],
|
|
898
1910
|
["recommended", isRecommended(model), 2],
|
|
899
1911
|
["deprecated", isDeprecated(model), -2],
|
|
900
1912
|
["vision support", hasVisionSupport(model), 1],
|
|
@@ -917,10 +1929,10 @@ var scoreModel = (model, type, boosts = {}) => {
|
|
|
917
1929
|
}
|
|
918
1930
|
return score;
|
|
919
1931
|
};
|
|
920
|
-
var getBestModels = (
|
|
921
|
-
var getFastModels = (
|
|
922
|
-
var pickModel = (
|
|
923
|
-
const copy = [...
|
|
1932
|
+
var getBestModels = (models2, boosts = {}) => models2.sort((a, b) => scoreModel(b, "best", boosts) - scoreModel(a, "best", boosts));
|
|
1933
|
+
var getFastModels = (models2, boosts = {}) => models2.sort((a, b) => scoreModel(b, "fast", boosts) - scoreModel(a, "fast", boosts));
|
|
1934
|
+
var pickModel = (models2, downtimes = []) => {
|
|
1935
|
+
const copy = [...models2];
|
|
924
1936
|
const elasped = (date) => (/* @__PURE__ */ new Date()).getTime() - new Date(date).getTime();
|
|
925
1937
|
const DOWNTIME_THRESHOLD = 1e3 * 60 * DOWNTIME_THRESHOLD_MINUTES;
|
|
926
1938
|
if (!copy.length) {
|
|
@@ -935,7 +1947,7 @@ var pickModel = (models, downtimes = []) => {
|
|
|
935
1947
|
return ref;
|
|
936
1948
|
}
|
|
937
1949
|
}
|
|
938
|
-
throw new Error(`All models are down: ${
|
|
1950
|
+
throw new Error(`All models are down: ${models2.join(", ")}`);
|
|
939
1951
|
};
|
|
940
1952
|
var ModelProvider = class {
|
|
941
1953
|
};
|
|
@@ -959,7 +1971,7 @@ var RemoteModelProvider = class extends ModelProvider {
|
|
|
959
1971
|
}
|
|
960
1972
|
async fetchInstalledModels() {
|
|
961
1973
|
const integrationNames = await this._fetchInstalledIntegrationNames();
|
|
962
|
-
const
|
|
1974
|
+
const models2 = [];
|
|
963
1975
|
await Promise.allSettled(
|
|
964
1976
|
integrationNames.map(async (integration) => {
|
|
965
1977
|
const { output } = await this._client.callAction({
|
|
@@ -971,7 +1983,7 @@ var RemoteModelProvider = class extends ModelProvider {
|
|
|
971
1983
|
}
|
|
972
1984
|
for (const model of output.models) {
|
|
973
1985
|
if (model.name && model.id && model.input && model.tags) {
|
|
974
|
-
|
|
1986
|
+
models2.push({
|
|
975
1987
|
ref: `${integration}:${model.id}`,
|
|
976
1988
|
integration,
|
|
977
1989
|
id: model.id,
|
|
@@ -985,7 +1997,7 @@ var RemoteModelProvider = class extends ModelProvider {
|
|
|
985
1997
|
}
|
|
986
1998
|
})
|
|
987
1999
|
);
|
|
988
|
-
return
|
|
2000
|
+
return models2;
|
|
989
2001
|
}
|
|
990
2002
|
async fetchModelPreferences() {
|
|
991
2003
|
try {
|
|
@@ -1096,10 +2108,10 @@ var Cognitive = class _Cognitive {
|
|
|
1096
2108
|
if (this._preferences) {
|
|
1097
2109
|
return this._preferences;
|
|
1098
2110
|
}
|
|
1099
|
-
const
|
|
2111
|
+
const models2 = await this.fetchInstalledModels();
|
|
1100
2112
|
this._preferences = {
|
|
1101
|
-
best: getBestModels(
|
|
1102
|
-
fast: getFastModels(
|
|
2113
|
+
best: getBestModels(models2).map((m) => m.ref),
|
|
2114
|
+
fast: getFastModels(models2).map((m) => m.ref),
|
|
1103
2115
|
downtimes: []
|
|
1104
2116
|
};
|
|
1105
2117
|
await this._provider.saveModelPreferences(this._preferences);
|
|
@@ -1138,6 +2150,12 @@ var Cognitive = class _Cognitive {
|
|
|
1138
2150
|
return parseRef(pickModel([ref, ...preferences.best, ...preferences.fast], downtimes));
|
|
1139
2151
|
}
|
|
1140
2152
|
async getModelDetails(model) {
|
|
2153
|
+
if (this._useBeta) {
|
|
2154
|
+
const resolvedModel = getCognitiveV2Model(model);
|
|
2155
|
+
if (resolvedModel) {
|
|
2156
|
+
return { ...resolvedModel, ref: resolvedModel.id, integration: "cognitive-v2" };
|
|
2157
|
+
}
|
|
2158
|
+
}
|
|
1141
2159
|
await this.fetchInstalledModels();
|
|
1142
2160
|
const { integration, model: modelName } = await this._selectModel(model);
|
|
1143
2161
|
const def = this._models.find((m) => m.integration === integration && (m.name === modelName || m.id === modelName));
|
|
@@ -1147,7 +2165,7 @@ var Cognitive = class _Cognitive {
|
|
|
1147
2165
|
return def;
|
|
1148
2166
|
}
|
|
1149
2167
|
async generateContent(input) {
|
|
1150
|
-
if (!this._useBeta) {
|
|
2168
|
+
if (!this._useBeta || !getCognitiveV2Model(input.model)) {
|
|
1151
2169
|
return this._generateContent(input);
|
|
1152
2170
|
}
|
|
1153
2171
|
const betaClient = new CognitiveBeta(this._client.config);
|
|
@@ -1267,6 +2285,7 @@ var Cognitive = class _Cognitive {
|
|
|
1267
2285
|
Cognitive,
|
|
1268
2286
|
CognitiveBeta,
|
|
1269
2287
|
ModelProvider,
|
|
1270
|
-
RemoteModelProvider
|
|
2288
|
+
RemoteModelProvider,
|
|
2289
|
+
getCognitiveV2Model
|
|
1271
2290
|
});
|
|
1272
2291
|
//# sourceMappingURL=index.cjs.map
|