ag2 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ag2 might be problematic. Click here for more details.

Files changed (160) hide show
  1. {ag2-0.4.1.dist-info → ag2-0.5.0.dist-info}/METADATA +5 -146
  2. ag2-0.5.0.dist-info/RECORD +6 -0
  3. ag2-0.5.0.dist-info/top_level.txt +1 -0
  4. ag2-0.4.1.dist-info/RECORD +0 -158
  5. ag2-0.4.1.dist-info/top_level.txt +0 -1
  6. autogen/__init__.py +0 -17
  7. autogen/_pydantic.py +0 -116
  8. autogen/agentchat/__init__.py +0 -42
  9. autogen/agentchat/agent.py +0 -142
  10. autogen/agentchat/assistant_agent.py +0 -85
  11. autogen/agentchat/chat.py +0 -306
  12. autogen/agentchat/contrib/__init__.py +0 -0
  13. autogen/agentchat/contrib/agent_builder.py +0 -788
  14. autogen/agentchat/contrib/agent_eval/agent_eval.py +0 -107
  15. autogen/agentchat/contrib/agent_eval/criterion.py +0 -47
  16. autogen/agentchat/contrib/agent_eval/critic_agent.py +0 -47
  17. autogen/agentchat/contrib/agent_eval/quantifier_agent.py +0 -42
  18. autogen/agentchat/contrib/agent_eval/subcritic_agent.py +0 -48
  19. autogen/agentchat/contrib/agent_eval/task.py +0 -43
  20. autogen/agentchat/contrib/agent_optimizer.py +0 -450
  21. autogen/agentchat/contrib/capabilities/__init__.py +0 -0
  22. autogen/agentchat/contrib/capabilities/agent_capability.py +0 -21
  23. autogen/agentchat/contrib/capabilities/generate_images.py +0 -297
  24. autogen/agentchat/contrib/capabilities/teachability.py +0 -406
  25. autogen/agentchat/contrib/capabilities/text_compressors.py +0 -72
  26. autogen/agentchat/contrib/capabilities/transform_messages.py +0 -92
  27. autogen/agentchat/contrib/capabilities/transforms.py +0 -565
  28. autogen/agentchat/contrib/capabilities/transforms_util.py +0 -120
  29. autogen/agentchat/contrib/capabilities/vision_capability.py +0 -217
  30. autogen/agentchat/contrib/captainagent/tools/__init__.py +0 -0
  31. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +0 -41
  32. autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +0 -29
  33. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +0 -29
  34. autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +0 -29
  35. autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +0 -22
  36. autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +0 -31
  37. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +0 -26
  38. autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +0 -55
  39. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +0 -54
  40. autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +0 -39
  41. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +0 -22
  42. autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +0 -35
  43. autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +0 -61
  44. autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +0 -62
  45. autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +0 -48
  46. autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +0 -34
  47. autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +0 -22
  48. autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +0 -36
  49. autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +0 -22
  50. autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +0 -19
  51. autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +0 -29
  52. autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +0 -32
  53. autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +0 -17
  54. autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +0 -26
  55. autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +0 -24
  56. autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +0 -28
  57. autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +0 -29
  58. autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +0 -35
  59. autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +0 -40
  60. autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +0 -23
  61. autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +0 -37
  62. autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +0 -16
  63. autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +0 -16
  64. autogen/agentchat/contrib/captainagent/tools/requirements.txt +0 -10
  65. autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +0 -34
  66. autogen/agentchat/contrib/captainagent.py +0 -490
  67. autogen/agentchat/contrib/gpt_assistant_agent.py +0 -545
  68. autogen/agentchat/contrib/graph_rag/__init__.py +0 -0
  69. autogen/agentchat/contrib/graph_rag/document.py +0 -30
  70. autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +0 -111
  71. autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +0 -81
  72. autogen/agentchat/contrib/graph_rag/graph_query_engine.py +0 -56
  73. autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +0 -64
  74. autogen/agentchat/contrib/img_utils.py +0 -390
  75. autogen/agentchat/contrib/llamaindex_conversable_agent.py +0 -123
  76. autogen/agentchat/contrib/llava_agent.py +0 -176
  77. autogen/agentchat/contrib/math_user_proxy_agent.py +0 -471
  78. autogen/agentchat/contrib/multimodal_conversable_agent.py +0 -128
  79. autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +0 -325
  80. autogen/agentchat/contrib/retrieve_assistant_agent.py +0 -56
  81. autogen/agentchat/contrib/retrieve_user_proxy_agent.py +0 -705
  82. autogen/agentchat/contrib/society_of_mind_agent.py +0 -203
  83. autogen/agentchat/contrib/swarm_agent.py +0 -463
  84. autogen/agentchat/contrib/text_analyzer_agent.py +0 -76
  85. autogen/agentchat/contrib/tool_retriever.py +0 -120
  86. autogen/agentchat/contrib/vectordb/__init__.py +0 -0
  87. autogen/agentchat/contrib/vectordb/base.py +0 -243
  88. autogen/agentchat/contrib/vectordb/chromadb.py +0 -326
  89. autogen/agentchat/contrib/vectordb/mongodb.py +0 -559
  90. autogen/agentchat/contrib/vectordb/pgvectordb.py +0 -958
  91. autogen/agentchat/contrib/vectordb/qdrant.py +0 -334
  92. autogen/agentchat/contrib/vectordb/utils.py +0 -126
  93. autogen/agentchat/contrib/web_surfer.py +0 -305
  94. autogen/agentchat/conversable_agent.py +0 -2908
  95. autogen/agentchat/groupchat.py +0 -1668
  96. autogen/agentchat/user_proxy_agent.py +0 -109
  97. autogen/agentchat/utils.py +0 -207
  98. autogen/browser_utils.py +0 -291
  99. autogen/cache/__init__.py +0 -10
  100. autogen/cache/abstract_cache_base.py +0 -78
  101. autogen/cache/cache.py +0 -182
  102. autogen/cache/cache_factory.py +0 -85
  103. autogen/cache/cosmos_db_cache.py +0 -150
  104. autogen/cache/disk_cache.py +0 -109
  105. autogen/cache/in_memory_cache.py +0 -61
  106. autogen/cache/redis_cache.py +0 -128
  107. autogen/code_utils.py +0 -745
  108. autogen/coding/__init__.py +0 -22
  109. autogen/coding/base.py +0 -113
  110. autogen/coding/docker_commandline_code_executor.py +0 -262
  111. autogen/coding/factory.py +0 -45
  112. autogen/coding/func_with_reqs.py +0 -203
  113. autogen/coding/jupyter/__init__.py +0 -22
  114. autogen/coding/jupyter/base.py +0 -32
  115. autogen/coding/jupyter/docker_jupyter_server.py +0 -164
  116. autogen/coding/jupyter/embedded_ipython_code_executor.py +0 -182
  117. autogen/coding/jupyter/jupyter_client.py +0 -224
  118. autogen/coding/jupyter/jupyter_code_executor.py +0 -161
  119. autogen/coding/jupyter/local_jupyter_server.py +0 -168
  120. autogen/coding/local_commandline_code_executor.py +0 -410
  121. autogen/coding/markdown_code_extractor.py +0 -44
  122. autogen/coding/utils.py +0 -57
  123. autogen/exception_utils.py +0 -46
  124. autogen/extensions/__init__.py +0 -0
  125. autogen/formatting_utils.py +0 -76
  126. autogen/function_utils.py +0 -362
  127. autogen/graph_utils.py +0 -148
  128. autogen/io/__init__.py +0 -15
  129. autogen/io/base.py +0 -105
  130. autogen/io/console.py +0 -43
  131. autogen/io/websockets.py +0 -213
  132. autogen/logger/__init__.py +0 -11
  133. autogen/logger/base_logger.py +0 -140
  134. autogen/logger/file_logger.py +0 -287
  135. autogen/logger/logger_factory.py +0 -29
  136. autogen/logger/logger_utils.py +0 -42
  137. autogen/logger/sqlite_logger.py +0 -459
  138. autogen/math_utils.py +0 -356
  139. autogen/oai/__init__.py +0 -33
  140. autogen/oai/anthropic.py +0 -428
  141. autogen/oai/bedrock.py +0 -606
  142. autogen/oai/cerebras.py +0 -270
  143. autogen/oai/client.py +0 -1148
  144. autogen/oai/client_utils.py +0 -167
  145. autogen/oai/cohere.py +0 -453
  146. autogen/oai/completion.py +0 -1216
  147. autogen/oai/gemini.py +0 -469
  148. autogen/oai/groq.py +0 -281
  149. autogen/oai/mistral.py +0 -279
  150. autogen/oai/ollama.py +0 -582
  151. autogen/oai/openai_utils.py +0 -811
  152. autogen/oai/together.py +0 -343
  153. autogen/retrieve_utils.py +0 -487
  154. autogen/runtime_logging.py +0 -163
  155. autogen/token_count_utils.py +0 -259
  156. autogen/types.py +0 -20
  157. autogen/version.py +0 -7
  158. {ag2-0.4.1.dist-info → ag2-0.5.0.dist-info}/LICENSE +0 -0
  159. {ag2-0.4.1.dist-info → ag2-0.5.0.dist-info}/NOTICE.md +0 -0
  160. {ag2-0.4.1.dist-info → ag2-0.5.0.dist-info}/WHEEL +0 -0
@@ -1,81 +0,0 @@
1
- # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
- from typing import Any, Dict, List, Optional, Tuple, Union
6
-
7
- from autogen import Agent, ConversableAgent, UserProxyAgent
8
-
9
- from .falkor_graph_query_engine import FalkorGraphQueryEngine
10
- from .graph_query_engine import GraphStoreQueryResult
11
- from .graph_rag_capability import GraphRagCapability
12
-
13
-
14
- class FalkorGraphRagCapability(GraphRagCapability):
15
- """
16
- The FalkorDB GraphRAG capability integrate FalkorDB with graphrag_sdk version: 0.1.3b0.
17
- Ref: https://github.com/FalkorDB/GraphRAG-SDK/tree/2-move-away-from-sql-to-json-ontology-detection
18
-
19
- For usage, please refer to example notebook/agentchat_graph_rag_falkordb.ipynb
20
- """
21
-
22
- def __init__(self, query_engine: FalkorGraphQueryEngine):
23
- """
24
- initialize GraphRAG capability with a graph query engine
25
- """
26
- self.query_engine = query_engine
27
-
28
- def add_to_agent(self, agent: UserProxyAgent):
29
- """
30
- Add FalkorDB GraphRAG capability to a UserProxyAgent.
31
- The restriction to a UserProxyAgent to make sure the returned message does not contain information retrieved from the graph DB instead of any LLMs.
32
- """
33
- self.graph_rag_agent = agent
34
-
35
- # Validate the agent config
36
- if agent.llm_config not in (None, False):
37
- raise Exception(
38
- "Agents with GraphRAG capabilities do not use an LLM configuration. Please set your llm_config to None or False."
39
- )
40
-
41
- # Register method to generate the reply using a FalkorDB query
42
- # All other reply methods will be removed
43
- agent.register_reply(
44
- [ConversableAgent, None], self._reply_using_falkordb_query, position=0, remove_other_reply_funcs=True
45
- )
46
-
47
- def _reply_using_falkordb_query(
48
- self,
49
- recipient: ConversableAgent,
50
- messages: Optional[List[Dict]] = None,
51
- sender: Optional[Agent] = None,
52
- config: Optional[Any] = None,
53
- ) -> Tuple[bool, Union[str, Dict, None]]:
54
- """
55
- Query FalkorDB and return the message. Internally, it utilises OpenAI to generate a reply based on the given messages.
56
- The history with FalkorDB is also logged and updated.
57
-
58
- If no results are found, a default message is returned: "I'm sorry, I don't have an answer for that."
59
-
60
- Args:
61
- recipient: The agent instance that will receive the message.
62
- messages: A list of messages in the conversation history with the sender.
63
- sender: The agent instance that sent the message.
64
- config: Optional configuration for message processing.
65
-
66
- Returns:
67
- A tuple containing a boolean indicating success and the assistant's reply.
68
- """
69
- question = self._get_last_question(messages[-1])
70
- result: GraphStoreQueryResult = self.query_engine.query(question)
71
-
72
- return True, result.answer if result.answer else "I'm sorry, I don't have an answer for that."
73
-
74
- def _get_last_question(self, message: Union[Dict, str]):
75
- """Retrieves the last message from the conversation history."""
76
- if isinstance(message, str):
77
- return message
78
- if isinstance(message, Dict):
79
- if "content" in message:
80
- return message["content"]
81
- return None
@@ -1,56 +0,0 @@
1
- # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
- #
5
- # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
- # SPDX-License-Identifier: MIT
7
- from dataclasses import dataclass, field
8
- from typing import List, Optional, Protocol
9
-
10
- from .document import Document
11
-
12
-
13
- @dataclass
14
- class GraphStoreQueryResult:
15
- """
16
- A wrapper of graph store query results.
17
-
18
- answer: human readable answer to question/query.
19
- results: intermediate results to question/query, e.g. node entities.
20
- """
21
-
22
- answer: Optional[str] = None
23
- results: list = field(default_factory=list)
24
-
25
-
26
- class GraphQueryEngine(Protocol):
27
- """An abstract base class that represents a graph query engine on top of a underlying graph database.
28
-
29
- This interface defines the basic methods for graph-based RAG.
30
- """
31
-
32
- def init_db(self, input_doc: List[Document] | None = None):
33
- """
34
- This method initializes graph database with the input documents or records.
35
- Usually, it takes the following steps,
36
- 1. connecting to a graph database.
37
- 2. extract graph nodes, edges based on input data, graph schema and etc.
38
- 3. build indexes etc.
39
-
40
- Args:
41
- input_doc: a list of input documents that are used to build the graph in database.
42
-
43
- """
44
- pass
45
-
46
- def add_records(self, new_records: List) -> bool:
47
- """
48
- Add new records to the underlying database and add to the graph if required.
49
- """
50
- pass
51
-
52
- def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult:
53
- """
54
- This method transform a string format question into database query and return the result.
55
- """
56
- pass
@@ -1,64 +0,0 @@
1
- # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
- #
5
- # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
- # SPDX-License-Identifier: MIT
7
- from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability
8
- from autogen.agentchat.conversable_agent import ConversableAgent
9
-
10
- from .graph_query_engine import GraphQueryEngine
11
-
12
-
13
- class GraphRagCapability(AgentCapability):
14
- """
15
- A graph-based RAG capability uses a graph query engine to give a conversable agent the graph-based RAG ability.
16
-
17
- An agent class with graph-based RAG capability could
18
- 1. create a graph in the underlying database with input documents.
19
- 2. retrieved relevant information based on messages received by the agent.
20
- 3. generate answers from retrieved information and send messages back.
21
-
22
- For example,
23
- graph_query_engine = GraphQueryEngine(...)
24
- graph_query_engine.init_db([Document(doc1), Document(doc2), ...])
25
-
26
- graph_rag_agent = ConversableAgent(
27
- name="graph_rag_agent",
28
- max_consecutive_auto_reply=3,
29
- ...
30
- )
31
- graph_rag_capability = GraphRagCapbility(graph_query_engine)
32
- graph_rag_capability.add_to_agent(graph_rag_agent)
33
-
34
- user_proxy = UserProxyAgent(
35
- name="user_proxy",
36
- code_execution_config=False,
37
- is_termination_msg=lambda msg: "TERMINATE" in msg["content"],
38
- human_input_mode="ALWAYS",
39
- )
40
- user_proxy.initiate_chat(graph_rag_agent, message="Name a few actors who've played in 'The Matrix'")
41
-
42
- # ChatResult(
43
- # chat_id=None,
44
- # chat_history=[
45
- # {'content': 'Name a few actors who've played in \'The Matrix\'', 'role': 'graph_rag_agent'},
46
- # {'content': 'A few actors who have played in The Matrix are:
47
- # - Keanu Reeves
48
- # - Laurence Fishburne
49
- # - Carrie-Anne Moss
50
- # - Hugo Weaving',
51
- # 'role': 'user_proxy'},
52
- # ...)
53
-
54
- """
55
-
56
- def __init__(self, query_engine: GraphQueryEngine):
57
- """
58
- Initialize graph-based RAG capability with a graph query engine
59
- """
60
- ...
61
-
62
- def add_to_agent(self, agent: ConversableAgent):
63
- """Add the capability to an agent"""
64
- ...
@@ -1,390 +0,0 @@
1
- # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
- #
5
- # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
- # SPDX-License-Identifier: MIT
7
- import base64
8
- import copy
9
- import os
10
- import re
11
- from io import BytesIO
12
- from math import ceil
13
- from typing import Dict, List, Tuple, Union
14
-
15
- import requests
16
- from PIL import Image
17
-
18
- from autogen.agentchat import utils
19
-
20
- # Parameters for token counting for images for different models
21
- MODEL_PARAMS = {
22
- "gpt-4-vision": {
23
- "max_edge": 2048,
24
- "min_edge": 768,
25
- "tile_size": 512,
26
- "base_token_count": 85,
27
- "token_multiplier": 170,
28
- },
29
- "gpt-4o-mini": {
30
- "max_edge": 2048,
31
- "min_edge": 768,
32
- "tile_size": 512,
33
- "base_token_count": 2833,
34
- "token_multiplier": 5667,
35
- },
36
- "gpt-4o": {"max_edge": 2048, "min_edge": 768, "tile_size": 512, "base_token_count": 85, "token_multiplier": 170},
37
- }
38
-
39
-
40
- def get_pil_image(image_file: Union[str, Image.Image]) -> Image.Image:
41
- """
42
- Loads an image from a file and returns a PIL Image object.
43
-
44
- Parameters:
45
- image_file (str, or Image): The filename, URL, URI, or base64 string of the image file.
46
-
47
- Returns:
48
- Image.Image: The PIL Image object.
49
- """
50
- if isinstance(image_file, Image.Image):
51
- # Already a PIL Image object
52
- return image_file
53
-
54
- # Remove quotes if existed
55
- if image_file.startswith('"') and image_file.endswith('"'):
56
- image_file = image_file[1:-1]
57
- if image_file.startswith("'") and image_file.endswith("'"):
58
- image_file = image_file[1:-1]
59
-
60
- if image_file.startswith("http://") or image_file.startswith("https://"):
61
- # A URL file
62
- response = requests.get(image_file)
63
- content = BytesIO(response.content)
64
- image = Image.open(content)
65
- elif re.match(r"data:image/(?:png|jpeg);base64,", image_file):
66
- # A URI. Remove the prefix and decode the base64 string.
67
- base64_data = re.sub(r"data:image/(?:png|jpeg);base64,", "", image_file)
68
- image = _to_pil(base64_data)
69
- elif os.path.exists(image_file):
70
- # A local file
71
- image = Image.open(image_file)
72
- else:
73
- # base64 encoded string
74
- image = _to_pil(image_file)
75
-
76
- return image.convert("RGB")
77
-
78
-
79
- def get_image_data(image_file: Union[str, Image.Image], use_b64=True) -> bytes:
80
- """
81
- Loads an image and returns its data either as raw bytes or in base64-encoded format.
82
-
83
- This function first loads an image from the specified file, URL, or base64 string using
84
- the `get_pil_image` function. It then saves this image in memory in PNG format and
85
- retrieves its binary content. Depending on the `use_b64` flag, this binary content is
86
- either returned directly or as a base64-encoded string.
87
-
88
- Parameters:
89
- image_file (str, or Image): The path to the image file, a URL to an image, or a base64-encoded
90
- string of the image.
91
- use_b64 (bool): If True, the function returns a base64-encoded string of the image data.
92
- If False, it returns the raw byte data of the image. Defaults to True.
93
-
94
- Returns:
95
- bytes: The image data in raw bytes if `use_b64` is False, or a base64-encoded string
96
- if `use_b64` is True.
97
- """
98
- image = get_pil_image(image_file)
99
-
100
- buffered = BytesIO()
101
- image.save(buffered, format="PNG")
102
- content = buffered.getvalue()
103
-
104
- if use_b64:
105
- return base64.b64encode(content).decode("utf-8")
106
- else:
107
- return content
108
-
109
-
110
- def llava_formatter(prompt: str, order_image_tokens: bool = False) -> Tuple[str, List[str]]:
111
- """
112
- Formats the input prompt by replacing image tags and returns the new prompt along with image locations.
113
-
114
- Parameters:
115
- - prompt (str): The input string that may contain image tags like <img ...>.
116
- - order_image_tokens (bool, optional): Whether to order the image tokens with numbers.
117
- It will be useful for GPT-4V. Defaults to False.
118
-
119
- Returns:
120
- - Tuple[str, List[str]]: A tuple containing the formatted string and a list of images (loaded in b64 format).
121
- """
122
-
123
- # Initialize variables
124
- new_prompt = prompt
125
- image_locations = []
126
- images = []
127
- image_count = 0
128
-
129
- # Regular expression pattern for matching <img ...> tags
130
- img_tag_pattern = re.compile(r"<img ([^>]+)>")
131
-
132
- # Find all image tags
133
- for match in img_tag_pattern.finditer(prompt):
134
- image_location = match.group(1)
135
-
136
- try:
137
- img_data = get_image_data(image_location)
138
- except Exception as e:
139
- # Remove the token
140
- print(f"Warning! Unable to load image from {image_location}, because of {e}")
141
- new_prompt = new_prompt.replace(match.group(0), "", 1)
142
- continue
143
-
144
- image_locations.append(image_location)
145
- images.append(img_data)
146
-
147
- # Increment the image count and replace the tag in the prompt
148
- new_token = f"<image {image_count}>" if order_image_tokens else "<image>"
149
-
150
- new_prompt = new_prompt.replace(match.group(0), new_token, 1)
151
- image_count += 1
152
-
153
- return new_prompt, images
154
-
155
-
156
- def pil_to_data_uri(image: Image.Image) -> str:
157
- """
158
- Converts a PIL Image object to a data URI.
159
-
160
- Parameters:
161
- image (Image.Image): The PIL Image object.
162
-
163
- Returns:
164
- str: The data URI string.
165
- """
166
- buffered = BytesIO()
167
- image.save(buffered, format="PNG")
168
- content = buffered.getvalue()
169
- return convert_base64_to_data_uri(base64.b64encode(content).decode("utf-8"))
170
-
171
-
172
- def convert_base64_to_data_uri(base64_image):
173
- def _get_mime_type_from_data_uri(base64_image):
174
- # Decode the base64 string
175
- image_data = base64.b64decode(base64_image)
176
- # Check the first few bytes for known signatures
177
- if image_data.startswith(b"\xff\xd8\xff"):
178
- return "image/jpeg"
179
- elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
180
- return "image/png"
181
- elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
182
- return "image/gif"
183
- elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
184
- return "image/webp"
185
- return "image/jpeg" # use jpeg for unknown formats, best guess.
186
-
187
- mime_type = _get_mime_type_from_data_uri(base64_image)
188
- data_uri = f"data:{mime_type};base64,{base64_image}"
189
- return data_uri
190
-
191
-
192
- def gpt4v_formatter(prompt: str, img_format: str = "uri") -> List[Union[str, dict]]:
193
- """
194
- Formats the input prompt by replacing image tags and returns a list of text and images.
195
-
196
- Args:
197
- - prompt (str): The input string that may contain image tags like <img ...>.
198
- - img_format (str): what image format should be used. One of "uri", "url", "pil".
199
-
200
- Returns:
201
- - List[Union[str, dict]]: A list of alternating text and image dictionary items.
202
- """
203
- assert img_format in ["uri", "url", "pil"]
204
-
205
- output = []
206
- last_index = 0
207
- image_count = 0
208
-
209
- # Find all image tags
210
- for parsed_tag in utils.parse_tags_from_content("img", prompt):
211
- image_location = parsed_tag["attr"]["src"]
212
- try:
213
- if img_format == "pil":
214
- img_data = get_pil_image(image_location)
215
- elif img_format == "uri":
216
- img_data = get_image_data(image_location)
217
- img_data = convert_base64_to_data_uri(img_data)
218
- elif img_format == "url":
219
- img_data = image_location
220
- else:
221
- raise ValueError(f"Unknown image format {img_format}")
222
- except Exception as e:
223
- # Warning and skip this token
224
- print(f"Warning! Unable to load image from {image_location}, because {e}")
225
- continue
226
-
227
- # Add text before this image tag to output list
228
- output.append({"type": "text", "text": prompt[last_index : parsed_tag["match"].start()]})
229
-
230
- # Add image data to output list
231
- output.append({"type": "image_url", "image_url": {"url": img_data}})
232
-
233
- last_index = parsed_tag["match"].end()
234
- image_count += 1
235
-
236
- # Add remaining text to output list
237
- output.append({"type": "text", "text": prompt[last_index:]})
238
- return output
239
-
240
-
241
- def extract_img_paths(paragraph: str) -> list:
242
- """
243
- Extract image paths (URLs or local paths) from a text paragraph.
244
-
245
- Parameters:
246
- paragraph (str): The input text paragraph.
247
-
248
- Returns:
249
- list: A list of extracted image paths.
250
- """
251
- # Regular expression to match image URLs and file paths
252
- img_path_pattern = re.compile(
253
- r"\b(?:http[s]?://\S+\.(?:jpg|jpeg|png|gif|bmp)|\S+\.(?:jpg|jpeg|png|gif|bmp))\b", re.IGNORECASE
254
- )
255
-
256
- # Find all matches in the paragraph
257
- img_paths = re.findall(img_path_pattern, paragraph)
258
- return img_paths
259
-
260
-
261
- def _to_pil(data: str) -> Image.Image:
262
- """
263
- Converts a base64 encoded image data string to a PIL Image object.
264
-
265
- This function first decodes the base64 encoded string to bytes, then creates a BytesIO object from the bytes,
266
- and finally creates and returns a PIL Image object from the BytesIO object.
267
-
268
- Parameters:
269
- data (str): The encoded image data string.
270
-
271
- Returns:
272
- Image.Image: The PIL Image object created from the input data.
273
- """
274
- return Image.open(BytesIO(base64.b64decode(data)))
275
-
276
-
277
- def message_formatter_pil_to_b64(messages: List[Dict]) -> List[Dict]:
278
- """
279
- Converts the PIL image URLs in the messages to base64 encoded data URIs.
280
-
281
- This function iterates over a list of message dictionaries. For each message,
282
- if it contains a 'content' key with a list of items, it looks for items
283
- with an 'image_url' key. The function then converts the PIL image URL
284
- (pointed to by 'image_url') to a base64 encoded data URI.
285
-
286
- Parameters:
287
- messages (List[Dict]): A list of message dictionaries. Each dictionary
288
- may contain a 'content' key with a list of items,
289
- some of which might be image URLs.
290
-
291
- Returns:
292
- List[Dict]: A new list of message dictionaries with PIL image URLs in the
293
- 'image_url' key converted to base64 encoded data URIs.
294
-
295
- Example Input:
296
- [
297
- {'content': [{'type': 'text', 'text': 'You are a helpful AI assistant.'}], 'role': 'system'},
298
- {'content': [
299
- {'type': 'text', 'text': "What's the breed of this dog here? \n"},
300
- {'type': 'image_url', 'image_url': {'url': a PIL.Image.Image}},
301
- {'type': 'text', 'text': '.'}],
302
- 'role': 'user'}
303
- ]
304
-
305
- Example Output:
306
- [
307
- {'content': [{'type': 'text', 'text': 'You are a helpful AI assistant.'}], 'role': 'system'},
308
- {'content': [
309
- {'type': 'text', 'text': "What's the breed of this dog here? \n"},
310
- {'type': 'image_url', 'image_url': {'url': a B64 Image}},
311
- {'type': 'text', 'text': '.'}],
312
- 'role': 'user'}
313
- ]
314
- """
315
- new_messages = []
316
- for message in messages:
317
- # Handle the new GPT messages format.
318
- if isinstance(message, dict) and "content" in message and isinstance(message["content"], list):
319
- message = copy.deepcopy(message)
320
- for item in message["content"]:
321
- if isinstance(item, dict) and "image_url" in item:
322
- item["image_url"]["url"] = pil_to_data_uri(item["image_url"]["url"])
323
-
324
- new_messages.append(message)
325
-
326
- return new_messages
327
-
328
-
329
- def num_tokens_from_gpt_image(
330
- image_data: Union[str, Image.Image], model: str = "gpt-4-vision", low_quality: bool = False
331
- ) -> int:
332
- """
333
- Calculate the number of tokens required to process an image based on its dimensions
334
- after scaling for different GPT models. Supports "gpt-4-vision", "gpt-4o", and "gpt-4o-mini".
335
- This function scales the image so that its longest edge is at most 2048 pixels and its shortest
336
- edge is at most 768 pixels (for "gpt-4-vision"). It then calculates the number of 512x512 tiles
337
- needed to cover the scaled image and computes the total tokens based on the number of these tiles.
338
-
339
- Reference: https://openai.com/api/pricing/
340
-
341
- Args:
342
- image_data : Union[str, Image.Image]: The image data which can either be a base64
343
- encoded string, a URL, a file path, or a PIL Image object.
344
- model: str: The model being used for image processing. Can be "gpt-4-vision", "gpt-4o", or "gpt-4o-mini".
345
-
346
- Returns:
347
- int: The total number of tokens required for processing the image.
348
-
349
- Examples:
350
- --------
351
- >>> from PIL import Image
352
- >>> img = Image.new('RGB', (2500, 2500), color = 'red')
353
- >>> num_tokens_from_gpt_image(img, model="gpt-4-vision")
354
- 765
355
- """
356
-
357
- image = get_pil_image(image_data) # PIL Image
358
- width, height = image.size
359
-
360
- # Determine model parameters
361
- if "gpt-4-vision" in model or "gpt-4-turbo" in model or "gpt-4v" in model or "gpt-4-v" in model:
362
- params = MODEL_PARAMS["gpt-4-vision"]
363
- elif "gpt-4o-mini" in model:
364
- params = MODEL_PARAMS["gpt-4o-mini"]
365
- elif "gpt-4o" in model:
366
- params = MODEL_PARAMS["gpt-4o"]
367
- else:
368
- raise ValueError(
369
- f"Model {model} is not supported. Choose 'gpt-4-vision', 'gpt-4-turbo', 'gpt-4v', 'gpt-4-v', 'gpt-4o', or 'gpt-4o-mini'."
370
- )
371
-
372
- if low_quality:
373
- return params["base_token_count"]
374
-
375
- # 1. Constrain the longest edge
376
- if max(width, height) > params["max_edge"]:
377
- scale_factor = params["max_edge"] / max(width, height)
378
- width, height = int(width * scale_factor), int(height * scale_factor)
379
-
380
- # 2. Further constrain the shortest edge
381
- if min(width, height) > params["min_edge"]:
382
- scale_factor = params["min_edge"] / min(width, height)
383
- width, height = int(width * scale_factor), int(height * scale_factor)
384
-
385
- # 3. Count how many tiles are needed to cover the image
386
- tiles_width = ceil(width / params["tile_size"])
387
- tiles_height = ceil(height / params["tile_size"])
388
- total_tokens = params["base_token_count"] + params["token_multiplier"] * (tiles_width * tiles_height)
389
-
390
- return total_tokens