@gammatech/aijsx 0.2.0-beta.3 → 0.2.0-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -34,6 +34,10 @@ declare function OpenAIChatCompletion(props: OpenAIChatCompletionProps, { logger
34
34
  type ValidOpenAIVisionModel = 'gpt-4-vision-preview';
35
35
  declare const ContentTypeImage: (_props: {
36
36
  url: string;
37
+ dimensions?: {
38
+ width: number;
39
+ height: number;
40
+ };
37
41
  detail?: 'auto' | 'high' | 'low';
38
42
  }) => null;
39
43
  type OpenAIVisionChatCompletionProps = {
package/dist/index.d.ts CHANGED
@@ -34,6 +34,10 @@ declare function OpenAIChatCompletion(props: OpenAIChatCompletionProps, { logger
34
34
  type ValidOpenAIVisionModel = 'gpt-4-vision-preview';
35
35
  declare const ContentTypeImage: (_props: {
36
36
  url: string;
37
+ dimensions?: {
38
+ width: number;
39
+ height: number;
40
+ };
37
41
  detail?: 'auto' | 'high' | 'low';
38
42
  }) => null;
39
43
  type OpenAIVisionChatCompletionProps = {
package/dist/index.js CHANGED
@@ -740,7 +740,8 @@ function tokenCountForOpenAIMessage(message) {
740
740
  case "system":
741
741
  case "user":
742
742
  return (
743
- // TODO this isn't working for vision
743
+ // NOTE: this function should only be called for non vision requests,
744
+ // so message.content will be a string and not ChatCompletionContentPart[]
744
745
  TOKENS_PER_MESSAGE + tokenizer.encode(message.content).length
745
746
  );
746
747
  }
@@ -772,6 +773,24 @@ function tokenCountForOpenAIVisionMessage(message) {
772
773
  }
773
774
  }
774
775
 
776
+ // src/lib/openai/utils.ts
777
+ var renderChatMessageContent = (content) => {
778
+ if (content == null) {
779
+ return "";
780
+ }
781
+ if (typeof content === "string") {
782
+ return content;
783
+ }
784
+ return content.map((part) => {
785
+ if (part.type === "text") {
786
+ return part.text;
787
+ } else if (part.type === "image_url") {
788
+ return `<ContentTypeImage url="${part.image_url.url}" detail="${part.image_url.detail || "auto"}" />`;
789
+ }
790
+ throw new Error("Invalid ChatCompletionContentPart type");
791
+ }).join("\n\n");
792
+ };
793
+
775
794
  // src/lib/openai/OpenAI.tsx
776
795
  var defaultClient = null;
777
796
  var OpenAIClientContext = createContext(() => {
@@ -826,8 +845,7 @@ async function* OpenAIChatCompletion(props, { logger, render, getContext }) {
826
845
  const renderedMessages = openAIMessages.map((message) => {
827
846
  return {
828
847
  role: message.role,
829
- // TODO support gpt4 vision
830
- content: message.content,
848
+ content: renderChatMessageContent(message.content),
831
849
  tokens: tokenCountForOpenAIMessage(message)
832
850
  };
833
851
  });
@@ -915,6 +933,7 @@ function buildOpenAIVisionChatMessages(childrenXml) {
915
933
  if (!topLevelValid) {
916
934
  throw new Error("Invalid top level chat message tags");
917
935
  }
936
+ const dimensions = /* @__PURE__ */ new WeakMap();
918
937
  for (const node of parsed.childNodes) {
919
938
  if (node.nodeName === "UserMessage") {
920
939
  const parts = node.childNodes.map((n) => {
@@ -924,13 +943,15 @@ function buildOpenAIVisionChatMessages(childrenXml) {
924
943
  text: n.value
925
944
  };
926
945
  } else if (n.nodeName === "ContentTypeImage") {
927
- return {
946
+ const imagePart = {
928
947
  type: "image_url",
929
948
  image_url: {
930
949
  url: n.attributes.url,
931
950
  detail: n.attributes.detail || "auto"
932
951
  }
933
952
  };
953
+ dimensions.set(imagePart, n.attributes.dimensions);
954
+ return imagePart;
934
955
  }
935
956
  throw new Error(
936
957
  "Invalid ChatCompletionContentPart, expecting text or ContentTypeImage"
@@ -952,7 +973,7 @@ function buildOpenAIVisionChatMessages(childrenXml) {
952
973
  });
953
974
  }
954
975
  }
955
- return messages;
976
+ return { messages, dimensions };
956
977
  }
957
978
  async function* OpenAIVisionChatCompletion(props, { logger, render, getContext }) {
958
979
  const startTime = performance.now();
@@ -961,38 +982,61 @@ async function* OpenAIVisionChatCompletion(props, { logger, render, getContext }
961
982
  if (!client) {
962
983
  throw new Error("[OpenAI] must supply OpenAI model via context");
963
984
  }
964
- const openAIMessages = buildOpenAIVisionChatMessages(
985
+ const { messages: openAIMessages, dimensions } = buildOpenAIVisionChatMessages(
965
986
  await render(props.children, {
966
987
  preserveTags: true,
967
988
  renderedProps: {
968
989
  ContentTypeImage: {
969
990
  url: true,
991
+ dimensions: true,
970
992
  detail: true
971
993
  }
972
994
  }
973
995
  })
974
996
  );
975
997
  const renderedMessages = openAIMessages.map((message) => {
976
- const renderContent = (content2) => {
977
- if (content2 == null) {
978
- return "";
979
- }
980
- if (typeof content2 === "string") {
981
- return content2;
998
+ if (message.role === "user") {
999
+ if (typeof message.content === "string") {
1000
+ return {
1001
+ role: message.role,
1002
+ content: message.content,
1003
+ tokens: tokenCountForOpenAIMessage(message)
1004
+ };
982
1005
  }
983
- return content2.map((part) => {
1006
+ const BASE_COST = 85;
1007
+ const tokens = message.content.reduce((acc, part) => {
984
1008
  if (part.type === "text") {
985
- return part.text;
986
- } else if (part.type === "image_url") {
987
- return `<ContentTypeImage url="${part.image_url.url}" detail="${part.image_url.detail || "auto"}" />`;
1009
+ return acc + tokenCountForOpenAIMessage({
1010
+ role: message.role,
1011
+ content: part.text
1012
+ });
988
1013
  }
989
- throw new Error("Invalid ChatCompletionContentPart type");
990
- }).join(" ");
991
- };
1014
+ const detail = part.image_url.detail || "auto";
1015
+ if (detail === "low") {
1016
+ return acc + BASE_COST;
1017
+ } else if (detail === "high") {
1018
+ const dim = dimensions.get(part);
1019
+ if (!dim) {
1020
+ return acc + (170 * 4 + BASE_COST);
1021
+ }
1022
+ const area = dim.width * dim.height;
1023
+ const num512Images = area / (512 * 512);
1024
+ const highCost = num512Images * 170;
1025
+ return acc + highCost + BASE_COST;
1026
+ } else {
1027
+ return acc + (170 * 4 + BASE_COST);
1028
+ }
1029
+ }, 0);
1030
+ return {
1031
+ role: message.role,
1032
+ content: renderChatMessageContent(message.content),
1033
+ tokens
1034
+ };
1035
+ }
992
1036
  return {
993
1037
  role: message.role,
994
- content: renderContent(message.content),
995
- tokens: tokenCountForOpenAIVisionMessage(message)
1038
+ content: renderChatMessageContent(message.content),
1039
+ tokens: tokenCountForOpenAIMessage(message)
996
1040
  };
997
1041
  });
998
1042
  const chatCompletionRequest = {
package/dist/index.mjs CHANGED
@@ -657,7 +657,8 @@ function tokenCountForOpenAIMessage(message) {
657
657
  case "system":
658
658
  case "user":
659
659
  return (
660
- // TODO this isn't working for vision
660
+ // NOTE: this function should only be called for non vision requests,
661
+ // so message.content will be a string and not ChatCompletionContentPart[]
661
662
  TOKENS_PER_MESSAGE + tokenizer.encode(message.content).length
662
663
  );
663
664
  }
@@ -689,6 +690,24 @@ function tokenCountForOpenAIVisionMessage(message) {
689
690
  }
690
691
  }
691
692
 
693
+ // src/lib/openai/utils.ts
694
+ var renderChatMessageContent = (content) => {
695
+ if (content == null) {
696
+ return "";
697
+ }
698
+ if (typeof content === "string") {
699
+ return content;
700
+ }
701
+ return content.map((part) => {
702
+ if (part.type === "text") {
703
+ return part.text;
704
+ } else if (part.type === "image_url") {
705
+ return `<ContentTypeImage url="${part.image_url.url}" detail="${part.image_url.detail || "auto"}" />`;
706
+ }
707
+ throw new Error("Invalid ChatCompletionContentPart type");
708
+ }).join("\n\n");
709
+ };
710
+
692
711
  // src/lib/openai/OpenAI.tsx
693
712
  var defaultClient = null;
694
713
  var OpenAIClientContext = createContext(() => {
@@ -743,8 +762,7 @@ async function* OpenAIChatCompletion(props, { logger, render, getContext }) {
743
762
  const renderedMessages = openAIMessages.map((message) => {
744
763
  return {
745
764
  role: message.role,
746
- // TODO support gpt4 vision
747
- content: message.content,
765
+ content: renderChatMessageContent(message.content),
748
766
  tokens: tokenCountForOpenAIMessage(message)
749
767
  };
750
768
  });
@@ -832,6 +850,7 @@ function buildOpenAIVisionChatMessages(childrenXml) {
832
850
  if (!topLevelValid) {
833
851
  throw new Error("Invalid top level chat message tags");
834
852
  }
853
+ const dimensions = /* @__PURE__ */ new WeakMap();
835
854
  for (const node of parsed.childNodes) {
836
855
  if (node.nodeName === "UserMessage") {
837
856
  const parts = node.childNodes.map((n) => {
@@ -841,13 +860,15 @@ function buildOpenAIVisionChatMessages(childrenXml) {
841
860
  text: n.value
842
861
  };
843
862
  } else if (n.nodeName === "ContentTypeImage") {
844
- return {
863
+ const imagePart = {
845
864
  type: "image_url",
846
865
  image_url: {
847
866
  url: n.attributes.url,
848
867
  detail: n.attributes.detail || "auto"
849
868
  }
850
869
  };
870
+ dimensions.set(imagePart, n.attributes.dimensions);
871
+ return imagePart;
851
872
  }
852
873
  throw new Error(
853
874
  "Invalid ChatCompletionContentPart, expecting text or ContentTypeImage"
@@ -869,7 +890,7 @@ function buildOpenAIVisionChatMessages(childrenXml) {
869
890
  });
870
891
  }
871
892
  }
872
- return messages;
893
+ return { messages, dimensions };
873
894
  }
874
895
  async function* OpenAIVisionChatCompletion(props, { logger, render, getContext }) {
875
896
  const startTime = performance.now();
@@ -878,38 +899,61 @@ async function* OpenAIVisionChatCompletion(props, { logger, render, getContext }
878
899
  if (!client) {
879
900
  throw new Error("[OpenAI] must supply OpenAI model via context");
880
901
  }
881
- const openAIMessages = buildOpenAIVisionChatMessages(
902
+ const { messages: openAIMessages, dimensions } = buildOpenAIVisionChatMessages(
882
903
  await render(props.children, {
883
904
  preserveTags: true,
884
905
  renderedProps: {
885
906
  ContentTypeImage: {
886
907
  url: true,
908
+ dimensions: true,
887
909
  detail: true
888
910
  }
889
911
  }
890
912
  })
891
913
  );
892
914
  const renderedMessages = openAIMessages.map((message) => {
893
- const renderContent = (content2) => {
894
- if (content2 == null) {
895
- return "";
896
- }
897
- if (typeof content2 === "string") {
898
- return content2;
915
+ if (message.role === "user") {
916
+ if (typeof message.content === "string") {
917
+ return {
918
+ role: message.role,
919
+ content: message.content,
920
+ tokens: tokenCountForOpenAIMessage(message)
921
+ };
899
922
  }
900
- return content2.map((part) => {
923
+ const BASE_COST = 85;
924
+ const tokens = message.content.reduce((acc, part) => {
901
925
  if (part.type === "text") {
902
- return part.text;
903
- } else if (part.type === "image_url") {
904
- return `<ContentTypeImage url="${part.image_url.url}" detail="${part.image_url.detail || "auto"}" />`;
926
+ return acc + tokenCountForOpenAIMessage({
927
+ role: message.role,
928
+ content: part.text
929
+ });
905
930
  }
906
- throw new Error("Invalid ChatCompletionContentPart type");
907
- }).join(" ");
908
- };
931
+ const detail = part.image_url.detail || "auto";
932
+ if (detail === "low") {
933
+ return acc + BASE_COST;
934
+ } else if (detail === "high") {
935
+ const dim = dimensions.get(part);
936
+ if (!dim) {
937
+ return acc + (170 * 4 + BASE_COST);
938
+ }
939
+ const area = dim.width * dim.height;
940
+ const num512Images = area / (512 * 512);
941
+ const highCost = num512Images * 170;
942
+ return acc + highCost + BASE_COST;
943
+ } else {
944
+ return acc + (170 * 4 + BASE_COST);
945
+ }
946
+ }, 0);
947
+ return {
948
+ role: message.role,
949
+ content: renderChatMessageContent(message.content),
950
+ tokens
951
+ };
952
+ }
909
953
  return {
910
954
  role: message.role,
911
- content: renderContent(message.content),
912
- tokens: tokenCountForOpenAIVisionMessage(message)
955
+ content: renderChatMessageContent(message.content),
956
+ tokens: tokenCountForOpenAIMessage(message)
913
957
  };
914
958
  });
915
959
  const chatCompletionRequest = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gammatech/aijsx",
3
- "version": "0.2.0-beta.3",
3
+ "version": "0.2.0-beta.5",
4
4
  "description": "Rewrite of aijsx",
5
5
  "author": "Jordan Garcia",
6
6
  "license": "MIT",