ag2 0.4.1__py3-none-any.whl → 0.4.2b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ag2 might be problematic. Click here for more details.

Files changed (161) hide show
  1. ag2-0.4.2b1.dist-info/METADATA +19 -0
  2. ag2-0.4.2b1.dist-info/RECORD +6 -0
  3. ag2-0.4.2b1.dist-info/top_level.txt +1 -0
  4. ag2-0.4.1.dist-info/METADATA +0 -500
  5. ag2-0.4.1.dist-info/RECORD +0 -158
  6. ag2-0.4.1.dist-info/top_level.txt +0 -1
  7. autogen/__init__.py +0 -17
  8. autogen/_pydantic.py +0 -116
  9. autogen/agentchat/__init__.py +0 -42
  10. autogen/agentchat/agent.py +0 -142
  11. autogen/agentchat/assistant_agent.py +0 -85
  12. autogen/agentchat/chat.py +0 -306
  13. autogen/agentchat/contrib/__init__.py +0 -0
  14. autogen/agentchat/contrib/agent_builder.py +0 -788
  15. autogen/agentchat/contrib/agent_eval/agent_eval.py +0 -107
  16. autogen/agentchat/contrib/agent_eval/criterion.py +0 -47
  17. autogen/agentchat/contrib/agent_eval/critic_agent.py +0 -47
  18. autogen/agentchat/contrib/agent_eval/quantifier_agent.py +0 -42
  19. autogen/agentchat/contrib/agent_eval/subcritic_agent.py +0 -48
  20. autogen/agentchat/contrib/agent_eval/task.py +0 -43
  21. autogen/agentchat/contrib/agent_optimizer.py +0 -450
  22. autogen/agentchat/contrib/capabilities/__init__.py +0 -0
  23. autogen/agentchat/contrib/capabilities/agent_capability.py +0 -21
  24. autogen/agentchat/contrib/capabilities/generate_images.py +0 -297
  25. autogen/agentchat/contrib/capabilities/teachability.py +0 -406
  26. autogen/agentchat/contrib/capabilities/text_compressors.py +0 -72
  27. autogen/agentchat/contrib/capabilities/transform_messages.py +0 -92
  28. autogen/agentchat/contrib/capabilities/transforms.py +0 -565
  29. autogen/agentchat/contrib/capabilities/transforms_util.py +0 -120
  30. autogen/agentchat/contrib/capabilities/vision_capability.py +0 -217
  31. autogen/agentchat/contrib/captainagent/tools/__init__.py +0 -0
  32. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +0 -41
  33. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +0 -29
  34. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +0 -29
  35. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +0 -29
  36. autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +0 -22
  37. autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +0 -31
  38. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +0 -26
  39. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +0 -55
  40. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +0 -54
  41. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +0 -39
  42. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +0 -22
  43. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +0 -35
  44. autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +0 -61
  45. autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +0 -62
  46. autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +0 -48
  47. autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +0 -34
  48. autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +0 -22
  49. autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +0 -36
  50. autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +0 -22
  51. autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +0 -19
  52. autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +0 -29
  53. autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +0 -32
  54. autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +0 -17
  55. autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +0 -26
  56. autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +0 -24
  57. autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +0 -28
  58. autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +0 -29
  59. autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +0 -35
  60. autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +0 -40
  61. autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +0 -23
  62. autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +0 -37
  63. autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +0 -16
  64. autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +0 -16
  65. autogen/agentchat/contrib/captainagent/tools/requirements.txt +0 -10
  66. autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +0 -34
  67. autogen/agentchat/contrib/captainagent.py +0 -490
  68. autogen/agentchat/contrib/gpt_assistant_agent.py +0 -545
  69. autogen/agentchat/contrib/graph_rag/__init__.py +0 -0
  70. autogen/agentchat/contrib/graph_rag/document.py +0 -30
  71. autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +0 -111
  72. autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +0 -81
  73. autogen/agentchat/contrib/graph_rag/graph_query_engine.py +0 -56
  74. autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +0 -64
  75. autogen/agentchat/contrib/img_utils.py +0 -390
  76. autogen/agentchat/contrib/llamaindex_conversable_agent.py +0 -123
  77. autogen/agentchat/contrib/llava_agent.py +0 -176
  78. autogen/agentchat/contrib/math_user_proxy_agent.py +0 -471
  79. autogen/agentchat/contrib/multimodal_conversable_agent.py +0 -128
  80. autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +0 -325
  81. autogen/agentchat/contrib/retrieve_assistant_agent.py +0 -56
  82. autogen/agentchat/contrib/retrieve_user_proxy_agent.py +0 -705
  83. autogen/agentchat/contrib/society_of_mind_agent.py +0 -203
  84. autogen/agentchat/contrib/swarm_agent.py +0 -463
  85. autogen/agentchat/contrib/text_analyzer_agent.py +0 -76
  86. autogen/agentchat/contrib/tool_retriever.py +0 -120
  87. autogen/agentchat/contrib/vectordb/__init__.py +0 -0
  88. autogen/agentchat/contrib/vectordb/base.py +0 -243
  89. autogen/agentchat/contrib/vectordb/chromadb.py +0 -326
  90. autogen/agentchat/contrib/vectordb/mongodb.py +0 -559
  91. autogen/agentchat/contrib/vectordb/pgvectordb.py +0 -958
  92. autogen/agentchat/contrib/vectordb/qdrant.py +0 -334
  93. autogen/agentchat/contrib/vectordb/utils.py +0 -126
  94. autogen/agentchat/contrib/web_surfer.py +0 -305
  95. autogen/agentchat/conversable_agent.py +0 -2908
  96. autogen/agentchat/groupchat.py +0 -1668
  97. autogen/agentchat/user_proxy_agent.py +0 -109
  98. autogen/agentchat/utils.py +0 -207
  99. autogen/browser_utils.py +0 -291
  100. autogen/cache/__init__.py +0 -10
  101. autogen/cache/abstract_cache_base.py +0 -78
  102. autogen/cache/cache.py +0 -182
  103. autogen/cache/cache_factory.py +0 -85
  104. autogen/cache/cosmos_db_cache.py +0 -150
  105. autogen/cache/disk_cache.py +0 -109
  106. autogen/cache/in_memory_cache.py +0 -61
  107. autogen/cache/redis_cache.py +0 -128
  108. autogen/code_utils.py +0 -745
  109. autogen/coding/__init__.py +0 -22
  110. autogen/coding/base.py +0 -113
  111. autogen/coding/docker_commandline_code_executor.py +0 -262
  112. autogen/coding/factory.py +0 -45
  113. autogen/coding/func_with_reqs.py +0 -203
  114. autogen/coding/jupyter/__init__.py +0 -22
  115. autogen/coding/jupyter/base.py +0 -32
  116. autogen/coding/jupyter/docker_jupyter_server.py +0 -164
  117. autogen/coding/jupyter/embedded_ipython_code_executor.py +0 -182
  118. autogen/coding/jupyter/jupyter_client.py +0 -224
  119. autogen/coding/jupyter/jupyter_code_executor.py +0 -161
  120. autogen/coding/jupyter/local_jupyter_server.py +0 -168
  121. autogen/coding/local_commandline_code_executor.py +0 -410
  122. autogen/coding/markdown_code_extractor.py +0 -44
  123. autogen/coding/utils.py +0 -57
  124. autogen/exception_utils.py +0 -46
  125. autogen/extensions/__init__.py +0 -0
  126. autogen/formatting_utils.py +0 -76
  127. autogen/function_utils.py +0 -362
  128. autogen/graph_utils.py +0 -148
  129. autogen/io/__init__.py +0 -15
  130. autogen/io/base.py +0 -105
  131. autogen/io/console.py +0 -43
  132. autogen/io/websockets.py +0 -213
  133. autogen/logger/__init__.py +0 -11
  134. autogen/logger/base_logger.py +0 -140
  135. autogen/logger/file_logger.py +0 -287
  136. autogen/logger/logger_factory.py +0 -29
  137. autogen/logger/logger_utils.py +0 -42
  138. autogen/logger/sqlite_logger.py +0 -459
  139. autogen/math_utils.py +0 -356
  140. autogen/oai/__init__.py +0 -33
  141. autogen/oai/anthropic.py +0 -428
  142. autogen/oai/bedrock.py +0 -606
  143. autogen/oai/cerebras.py +0 -270
  144. autogen/oai/client.py +0 -1148
  145. autogen/oai/client_utils.py +0 -167
  146. autogen/oai/cohere.py +0 -453
  147. autogen/oai/completion.py +0 -1216
  148. autogen/oai/gemini.py +0 -469
  149. autogen/oai/groq.py +0 -281
  150. autogen/oai/mistral.py +0 -279
  151. autogen/oai/ollama.py +0 -582
  152. autogen/oai/openai_utils.py +0 -811
  153. autogen/oai/together.py +0 -343
  154. autogen/retrieve_utils.py +0 -487
  155. autogen/runtime_logging.py +0 -163
  156. autogen/token_count_utils.py +0 -259
  157. autogen/types.py +0 -20
  158. autogen/version.py +0 -7
  159. {ag2-0.4.1.dist-info → ag2-0.4.2b1.dist-info}/LICENSE +0 -0
  160. {ag2-0.4.1.dist-info → ag2-0.4.2b1.dist-info}/NOTICE.md +0 -0
  161. {ag2-0.4.1.dist-info → ag2-0.4.2b1.dist-info}/WHEEL +0 -0
@@ -1,81 +0,0 @@
1
- # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
- from typing import Any, Dict, List, Optional, Tuple, Union
6
-
7
- from autogen import Agent, ConversableAgent, UserProxyAgent
8
-
9
- from .falkor_graph_query_engine import FalkorGraphQueryEngine
10
- from .graph_query_engine import GraphStoreQueryResult
11
- from .graph_rag_capability import GraphRagCapability
12
-
13
-
14
- class FalkorGraphRagCapability(GraphRagCapability):
15
- """
16
- The FalkorDB GraphRAG capability integrate FalkorDB with graphrag_sdk version: 0.1.3b0.
17
- Ref: https://github.com/FalkorDB/GraphRAG-SDK/tree/2-move-away-from-sql-to-json-ontology-detection
18
-
19
- For usage, please refer to example notebook/agentchat_graph_rag_falkordb.ipynb
20
- """
21
-
22
- def __init__(self, query_engine: FalkorGraphQueryEngine):
23
- """
24
- initialize GraphRAG capability with a graph query engine
25
- """
26
- self.query_engine = query_engine
27
-
28
- def add_to_agent(self, agent: UserProxyAgent):
29
- """
30
- Add FalkorDB GraphRAG capability to a UserProxyAgent.
31
- The restriction to a UserProxyAgent to make sure the returned message does not contain information retrieved from the graph DB instead of any LLMs.
32
- """
33
- self.graph_rag_agent = agent
34
-
35
- # Validate the agent config
36
- if agent.llm_config not in (None, False):
37
- raise Exception(
38
- "Agents with GraphRAG capabilities do not use an LLM configuration. Please set your llm_config to None or False."
39
- )
40
-
41
- # Register method to generate the reply using a FalkorDB query
42
- # All other reply methods will be removed
43
- agent.register_reply(
44
- [ConversableAgent, None], self._reply_using_falkordb_query, position=0, remove_other_reply_funcs=True
45
- )
46
-
47
- def _reply_using_falkordb_query(
48
- self,
49
- recipient: ConversableAgent,
50
- messages: Optional[List[Dict]] = None,
51
- sender: Optional[Agent] = None,
52
- config: Optional[Any] = None,
53
- ) -> Tuple[bool, Union[str, Dict, None]]:
54
- """
55
- Query FalkorDB and return the message. Internally, it utilises OpenAI to generate a reply based on the given messages.
56
- The history with FalkorDB is also logged and updated.
57
-
58
- If no results are found, a default message is returned: "I'm sorry, I don't have an answer for that."
59
-
60
- Args:
61
- recipient: The agent instance that will receive the message.
62
- messages: A list of messages in the conversation history with the sender.
63
- sender: The agent instance that sent the message.
64
- config: Optional configuration for message processing.
65
-
66
- Returns:
67
- A tuple containing a boolean indicating success and the assistant's reply.
68
- """
69
- question = self._get_last_question(messages[-1])
70
- result: GraphStoreQueryResult = self.query_engine.query(question)
71
-
72
- return True, result.answer if result.answer else "I'm sorry, I don't have an answer for that."
73
-
74
- def _get_last_question(self, message: Union[Dict, str]):
75
- """Retrieves the last message from the conversation history."""
76
- if isinstance(message, str):
77
- return message
78
- if isinstance(message, Dict):
79
- if "content" in message:
80
- return message["content"]
81
- return None
@@ -1,56 +0,0 @@
1
- # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
- #
5
- # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
- # SPDX-License-Identifier: MIT
7
- from dataclasses import dataclass, field
8
- from typing import List, Optional, Protocol
9
-
10
- from .document import Document
11
-
12
-
13
- @dataclass
14
- class GraphStoreQueryResult:
15
- """
16
- A wrapper of graph store query results.
17
-
18
- answer: human readable answer to question/query.
19
- results: intermediate results to question/query, e.g. node entities.
20
- """
21
-
22
- answer: Optional[str] = None
23
- results: list = field(default_factory=list)
24
-
25
-
26
- class GraphQueryEngine(Protocol):
27
- """An abstract base class that represents a graph query engine on top of a underlying graph database.
28
-
29
- This interface defines the basic methods for graph-based RAG.
30
- """
31
-
32
- def init_db(self, input_doc: List[Document] | None = None):
33
- """
34
- This method initializes graph database with the input documents or records.
35
- Usually, it takes the following steps,
36
- 1. connecting to a graph database.
37
- 2. extract graph nodes, edges based on input data, graph schema and etc.
38
- 3. build indexes etc.
39
-
40
- Args:
41
- input_doc: a list of input documents that are used to build the graph in database.
42
-
43
- """
44
- pass
45
-
46
- def add_records(self, new_records: List) -> bool:
47
- """
48
- Add new records to the underlying database and add to the graph if required.
49
- """
50
- pass
51
-
52
- def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult:
53
- """
54
- This method transform a string format question into database query and return the result.
55
- """
56
- pass
@@ -1,64 +0,0 @@
1
- # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
- #
5
- # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
- # SPDX-License-Identifier: MIT
7
- from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability
8
- from autogen.agentchat.conversable_agent import ConversableAgent
9
-
10
- from .graph_query_engine import GraphQueryEngine
11
-
12
-
13
- class GraphRagCapability(AgentCapability):
14
- """
15
- A graph-based RAG capability uses a graph query engine to give a conversable agent the graph-based RAG ability.
16
-
17
- An agent class with graph-based RAG capability could
18
- 1. create a graph in the underlying database with input documents.
19
- 2. retrieved relevant information based on messages received by the agent.
20
- 3. generate answers from retrieved information and send messages back.
21
-
22
- For example,
23
- graph_query_engine = GraphQueryEngine(...)
24
- graph_query_engine.init_db([Document(doc1), Document(doc2), ...])
25
-
26
- graph_rag_agent = ConversableAgent(
27
- name="graph_rag_agent",
28
- max_consecutive_auto_reply=3,
29
- ...
30
- )
31
- graph_rag_capability = GraphRagCapbility(graph_query_engine)
32
- graph_rag_capability.add_to_agent(graph_rag_agent)
33
-
34
- user_proxy = UserProxyAgent(
35
- name="user_proxy",
36
- code_execution_config=False,
37
- is_termination_msg=lambda msg: "TERMINATE" in msg["content"],
38
- human_input_mode="ALWAYS",
39
- )
40
- user_proxy.initiate_chat(graph_rag_agent, message="Name a few actors who've played in 'The Matrix'")
41
-
42
- # ChatResult(
43
- # chat_id=None,
44
- # chat_history=[
45
- # {'content': 'Name a few actors who've played in \'The Matrix\'', 'role': 'graph_rag_agent'},
46
- # {'content': 'A few actors who have played in The Matrix are:
47
- # - Keanu Reeves
48
- # - Laurence Fishburne
49
- # - Carrie-Anne Moss
50
- # - Hugo Weaving',
51
- # 'role': 'user_proxy'},
52
- # ...)
53
-
54
- """
55
-
56
- def __init__(self, query_engine: GraphQueryEngine):
57
- """
58
- Initialize graph-based RAG capability with a graph query engine
59
- """
60
- ...
61
-
62
- def add_to_agent(self, agent: ConversableAgent):
63
- """Add the capability to an agent"""
64
- ...
@@ -1,390 +0,0 @@
1
- # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
- #
5
- # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
- # SPDX-License-Identifier: MIT
7
- import base64
8
- import copy
9
- import os
10
- import re
11
- from io import BytesIO
12
- from math import ceil
13
- from typing import Dict, List, Tuple, Union
14
-
15
- import requests
16
- from PIL import Image
17
-
18
- from autogen.agentchat import utils
19
-
20
- # Parameters for token counting for images for different models
21
- MODEL_PARAMS = {
22
- "gpt-4-vision": {
23
- "max_edge": 2048,
24
- "min_edge": 768,
25
- "tile_size": 512,
26
- "base_token_count": 85,
27
- "token_multiplier": 170,
28
- },
29
- "gpt-4o-mini": {
30
- "max_edge": 2048,
31
- "min_edge": 768,
32
- "tile_size": 512,
33
- "base_token_count": 2833,
34
- "token_multiplier": 5667,
35
- },
36
- "gpt-4o": {"max_edge": 2048, "min_edge": 768, "tile_size": 512, "base_token_count": 85, "token_multiplier": 170},
37
- }
38
-
39
-
40
- def get_pil_image(image_file: Union[str, Image.Image]) -> Image.Image:
41
- """
42
- Loads an image from a file and returns a PIL Image object.
43
-
44
- Parameters:
45
- image_file (str, or Image): The filename, URL, URI, or base64 string of the image file.
46
-
47
- Returns:
48
- Image.Image: The PIL Image object.
49
- """
50
- if isinstance(image_file, Image.Image):
51
- # Already a PIL Image object
52
- return image_file
53
-
54
- # Remove quotes if existed
55
- if image_file.startswith('"') and image_file.endswith('"'):
56
- image_file = image_file[1:-1]
57
- if image_file.startswith("'") and image_file.endswith("'"):
58
- image_file = image_file[1:-1]
59
-
60
- if image_file.startswith("http://") or image_file.startswith("https://"):
61
- # A URL file
62
- response = requests.get(image_file)
63
- content = BytesIO(response.content)
64
- image = Image.open(content)
65
- elif re.match(r"data:image/(?:png|jpeg);base64,", image_file):
66
- # A URI. Remove the prefix and decode the base64 string.
67
- base64_data = re.sub(r"data:image/(?:png|jpeg);base64,", "", image_file)
68
- image = _to_pil(base64_data)
69
- elif os.path.exists(image_file):
70
- # A local file
71
- image = Image.open(image_file)
72
- else:
73
- # base64 encoded string
74
- image = _to_pil(image_file)
75
-
76
- return image.convert("RGB")
77
-
78
-
79
- def get_image_data(image_file: Union[str, Image.Image], use_b64=True) -> bytes:
80
- """
81
- Loads an image and returns its data either as raw bytes or in base64-encoded format.
82
-
83
- This function first loads an image from the specified file, URL, or base64 string using
84
- the `get_pil_image` function. It then saves this image in memory in PNG format and
85
- retrieves its binary content. Depending on the `use_b64` flag, this binary content is
86
- either returned directly or as a base64-encoded string.
87
-
88
- Parameters:
89
- image_file (str, or Image): The path to the image file, a URL to an image, or a base64-encoded
90
- string of the image.
91
- use_b64 (bool): If True, the function returns a base64-encoded string of the image data.
92
- If False, it returns the raw byte data of the image. Defaults to True.
93
-
94
- Returns:
95
- bytes: The image data in raw bytes if `use_b64` is False, or a base64-encoded string
96
- if `use_b64` is True.
97
- """
98
- image = get_pil_image(image_file)
99
-
100
- buffered = BytesIO()
101
- image.save(buffered, format="PNG")
102
- content = buffered.getvalue()
103
-
104
- if use_b64:
105
- return base64.b64encode(content).decode("utf-8")
106
- else:
107
- return content
108
-
109
-
110
- def llava_formatter(prompt: str, order_image_tokens: bool = False) -> Tuple[str, List[str]]:
111
- """
112
- Formats the input prompt by replacing image tags and returns the new prompt along with image locations.
113
-
114
- Parameters:
115
- - prompt (str): The input string that may contain image tags like <img ...>.
116
- - order_image_tokens (bool, optional): Whether to order the image tokens with numbers.
117
- It will be useful for GPT-4V. Defaults to False.
118
-
119
- Returns:
120
- - Tuple[str, List[str]]: A tuple containing the formatted string and a list of images (loaded in b64 format).
121
- """
122
-
123
- # Initialize variables
124
- new_prompt = prompt
125
- image_locations = []
126
- images = []
127
- image_count = 0
128
-
129
- # Regular expression pattern for matching <img ...> tags
130
- img_tag_pattern = re.compile(r"<img ([^>]+)>")
131
-
132
- # Find all image tags
133
- for match in img_tag_pattern.finditer(prompt):
134
- image_location = match.group(1)
135
-
136
- try:
137
- img_data = get_image_data(image_location)
138
- except Exception as e:
139
- # Remove the token
140
- print(f"Warning! Unable to load image from {image_location}, because of {e}")
141
- new_prompt = new_prompt.replace(match.group(0), "", 1)
142
- continue
143
-
144
- image_locations.append(image_location)
145
- images.append(img_data)
146
-
147
- # Increment the image count and replace the tag in the prompt
148
- new_token = f"<image {image_count}>" if order_image_tokens else "<image>"
149
-
150
- new_prompt = new_prompt.replace(match.group(0), new_token, 1)
151
- image_count += 1
152
-
153
- return new_prompt, images
154
-
155
-
156
- def pil_to_data_uri(image: Image.Image) -> str:
157
- """
158
- Converts a PIL Image object to a data URI.
159
-
160
- Parameters:
161
- image (Image.Image): The PIL Image object.
162
-
163
- Returns:
164
- str: The data URI string.
165
- """
166
- buffered = BytesIO()
167
- image.save(buffered, format="PNG")
168
- content = buffered.getvalue()
169
- return convert_base64_to_data_uri(base64.b64encode(content).decode("utf-8"))
170
-
171
-
172
- def convert_base64_to_data_uri(base64_image):
173
- def _get_mime_type_from_data_uri(base64_image):
174
- # Decode the base64 string
175
- image_data = base64.b64decode(base64_image)
176
- # Check the first few bytes for known signatures
177
- if image_data.startswith(b"\xff\xd8\xff"):
178
- return "image/jpeg"
179
- elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
180
- return "image/png"
181
- elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
182
- return "image/gif"
183
- elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
184
- return "image/webp"
185
- return "image/jpeg" # use jpeg for unknown formats, best guess.
186
-
187
- mime_type = _get_mime_type_from_data_uri(base64_image)
188
- data_uri = f"data:{mime_type};base64,{base64_image}"
189
- return data_uri
190
-
191
-
192
- def gpt4v_formatter(prompt: str, img_format: str = "uri") -> List[Union[str, dict]]:
193
- """
194
- Formats the input prompt by replacing image tags and returns a list of text and images.
195
-
196
- Args:
197
- - prompt (str): The input string that may contain image tags like <img ...>.
198
- - img_format (str): what image format should be used. One of "uri", "url", "pil".
199
-
200
- Returns:
201
- - List[Union[str, dict]]: A list of alternating text and image dictionary items.
202
- """
203
- assert img_format in ["uri", "url", "pil"]
204
-
205
- output = []
206
- last_index = 0
207
- image_count = 0
208
-
209
- # Find all image tags
210
- for parsed_tag in utils.parse_tags_from_content("img", prompt):
211
- image_location = parsed_tag["attr"]["src"]
212
- try:
213
- if img_format == "pil":
214
- img_data = get_pil_image(image_location)
215
- elif img_format == "uri":
216
- img_data = get_image_data(image_location)
217
- img_data = convert_base64_to_data_uri(img_data)
218
- elif img_format == "url":
219
- img_data = image_location
220
- else:
221
- raise ValueError(f"Unknown image format {img_format}")
222
- except Exception as e:
223
- # Warning and skip this token
224
- print(f"Warning! Unable to load image from {image_location}, because {e}")
225
- continue
226
-
227
- # Add text before this image tag to output list
228
- output.append({"type": "text", "text": prompt[last_index : parsed_tag["match"].start()]})
229
-
230
- # Add image data to output list
231
- output.append({"type": "image_url", "image_url": {"url": img_data}})
232
-
233
- last_index = parsed_tag["match"].end()
234
- image_count += 1
235
-
236
- # Add remaining text to output list
237
- output.append({"type": "text", "text": prompt[last_index:]})
238
- return output
239
-
240
-
241
- def extract_img_paths(paragraph: str) -> list:
242
- """
243
- Extract image paths (URLs or local paths) from a text paragraph.
244
-
245
- Parameters:
246
- paragraph (str): The input text paragraph.
247
-
248
- Returns:
249
- list: A list of extracted image paths.
250
- """
251
- # Regular expression to match image URLs and file paths
252
- img_path_pattern = re.compile(
253
- r"\b(?:http[s]?://\S+\.(?:jpg|jpeg|png|gif|bmp)|\S+\.(?:jpg|jpeg|png|gif|bmp))\b", re.IGNORECASE
254
- )
255
-
256
- # Find all matches in the paragraph
257
- img_paths = re.findall(img_path_pattern, paragraph)
258
- return img_paths
259
-
260
-
261
- def _to_pil(data: str) -> Image.Image:
262
- """
263
- Converts a base64 encoded image data string to a PIL Image object.
264
-
265
- This function first decodes the base64 encoded string to bytes, then creates a BytesIO object from the bytes,
266
- and finally creates and returns a PIL Image object from the BytesIO object.
267
-
268
- Parameters:
269
- data (str): The encoded image data string.
270
-
271
- Returns:
272
- Image.Image: The PIL Image object created from the input data.
273
- """
274
- return Image.open(BytesIO(base64.b64decode(data)))
275
-
276
-
277
- def message_formatter_pil_to_b64(messages: List[Dict]) -> List[Dict]:
278
- """
279
- Converts the PIL image URLs in the messages to base64 encoded data URIs.
280
-
281
- This function iterates over a list of message dictionaries. For each message,
282
- if it contains a 'content' key with a list of items, it looks for items
283
- with an 'image_url' key. The function then converts the PIL image URL
284
- (pointed to by 'image_url') to a base64 encoded data URI.
285
-
286
- Parameters:
287
- messages (List[Dict]): A list of message dictionaries. Each dictionary
288
- may contain a 'content' key with a list of items,
289
- some of which might be image URLs.
290
-
291
- Returns:
292
- List[Dict]: A new list of message dictionaries with PIL image URLs in the
293
- 'image_url' key converted to base64 encoded data URIs.
294
-
295
- Example Input:
296
- [
297
- {'content': [{'type': 'text', 'text': 'You are a helpful AI assistant.'}], 'role': 'system'},
298
- {'content': [
299
- {'type': 'text', 'text': "What's the breed of this dog here? \n"},
300
- {'type': 'image_url', 'image_url': {'url': a PIL.Image.Image}},
301
- {'type': 'text', 'text': '.'}],
302
- 'role': 'user'}
303
- ]
304
-
305
- Example Output:
306
- [
307
- {'content': [{'type': 'text', 'text': 'You are a helpful AI assistant.'}], 'role': 'system'},
308
- {'content': [
309
- {'type': 'text', 'text': "What's the breed of this dog here? \n"},
310
- {'type': 'image_url', 'image_url': {'url': a B64 Image}},
311
- {'type': 'text', 'text': '.'}],
312
- 'role': 'user'}
313
- ]
314
- """
315
- new_messages = []
316
- for message in messages:
317
- # Handle the new GPT messages format.
318
- if isinstance(message, dict) and "content" in message and isinstance(message["content"], list):
319
- message = copy.deepcopy(message)
320
- for item in message["content"]:
321
- if isinstance(item, dict) and "image_url" in item:
322
- item["image_url"]["url"] = pil_to_data_uri(item["image_url"]["url"])
323
-
324
- new_messages.append(message)
325
-
326
- return new_messages
327
-
328
-
329
- def num_tokens_from_gpt_image(
330
- image_data: Union[str, Image.Image], model: str = "gpt-4-vision", low_quality: bool = False
331
- ) -> int:
332
- """
333
- Calculate the number of tokens required to process an image based on its dimensions
334
- after scaling for different GPT models. Supports "gpt-4-vision", "gpt-4o", and "gpt-4o-mini".
335
- This function scales the image so that its longest edge is at most 2048 pixels and its shortest
336
- edge is at most 768 pixels (for "gpt-4-vision"). It then calculates the number of 512x512 tiles
337
- needed to cover the scaled image and computes the total tokens based on the number of these tiles.
338
-
339
- Reference: https://openai.com/api/pricing/
340
-
341
- Args:
342
- image_data : Union[str, Image.Image]: The image data which can either be a base64
343
- encoded string, a URL, a file path, or a PIL Image object.
344
- model: str: The model being used for image processing. Can be "gpt-4-vision", "gpt-4o", or "gpt-4o-mini".
345
-
346
- Returns:
347
- int: The total number of tokens required for processing the image.
348
-
349
- Examples:
350
- --------
351
- >>> from PIL import Image
352
- >>> img = Image.new('RGB', (2500, 2500), color = 'red')
353
- >>> num_tokens_from_gpt_image(img, model="gpt-4-vision")
354
- 765
355
- """
356
-
357
- image = get_pil_image(image_data) # PIL Image
358
- width, height = image.size
359
-
360
- # Determine model parameters
361
- if "gpt-4-vision" in model or "gpt-4-turbo" in model or "gpt-4v" in model or "gpt-4-v" in model:
362
- params = MODEL_PARAMS["gpt-4-vision"]
363
- elif "gpt-4o-mini" in model:
364
- params = MODEL_PARAMS["gpt-4o-mini"]
365
- elif "gpt-4o" in model:
366
- params = MODEL_PARAMS["gpt-4o"]
367
- else:
368
- raise ValueError(
369
- f"Model {model} is not supported. Choose 'gpt-4-vision', 'gpt-4-turbo', 'gpt-4v', 'gpt-4-v', 'gpt-4o', or 'gpt-4o-mini'."
370
- )
371
-
372
- if low_quality:
373
- return params["base_token_count"]
374
-
375
- # 1. Constrain the longest edge
376
- if max(width, height) > params["max_edge"]:
377
- scale_factor = params["max_edge"] / max(width, height)
378
- width, height = int(width * scale_factor), int(height * scale_factor)
379
-
380
- # 2. Further constrain the shortest edge
381
- if min(width, height) > params["min_edge"]:
382
- scale_factor = params["min_edge"] / min(width, height)
383
- width, height = int(width * scale_factor), int(height * scale_factor)
384
-
385
- # 3. Count how many tiles are needed to cover the image
386
- tiles_width = ceil(width / params["tile_size"])
387
- tiles_height = ceil(height / params["tile_size"])
388
- total_tokens = params["base_token_count"] + params["token_multiplier"] * (tiles_width * tiles_height)
389
-
390
- return total_tokens