ag2 0.4.1__py3-none-any.whl → 0.4.2b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ag2 might be problematic. Click here for more details.
- ag2-0.4.2b1.dist-info/METADATA +19 -0
- ag2-0.4.2b1.dist-info/RECORD +6 -0
- ag2-0.4.2b1.dist-info/top_level.txt +1 -0
- ag2-0.4.1.dist-info/METADATA +0 -500
- ag2-0.4.1.dist-info/RECORD +0 -158
- ag2-0.4.1.dist-info/top_level.txt +0 -1
- autogen/__init__.py +0 -17
- autogen/_pydantic.py +0 -116
- autogen/agentchat/__init__.py +0 -42
- autogen/agentchat/agent.py +0 -142
- autogen/agentchat/assistant_agent.py +0 -85
- autogen/agentchat/chat.py +0 -306
- autogen/agentchat/contrib/__init__.py +0 -0
- autogen/agentchat/contrib/agent_builder.py +0 -788
- autogen/agentchat/contrib/agent_eval/agent_eval.py +0 -107
- autogen/agentchat/contrib/agent_eval/criterion.py +0 -47
- autogen/agentchat/contrib/agent_eval/critic_agent.py +0 -47
- autogen/agentchat/contrib/agent_eval/quantifier_agent.py +0 -42
- autogen/agentchat/contrib/agent_eval/subcritic_agent.py +0 -48
- autogen/agentchat/contrib/agent_eval/task.py +0 -43
- autogen/agentchat/contrib/agent_optimizer.py +0 -450
- autogen/agentchat/contrib/capabilities/__init__.py +0 -0
- autogen/agentchat/contrib/capabilities/agent_capability.py +0 -21
- autogen/agentchat/contrib/capabilities/generate_images.py +0 -297
- autogen/agentchat/contrib/capabilities/teachability.py +0 -406
- autogen/agentchat/contrib/capabilities/text_compressors.py +0 -72
- autogen/agentchat/contrib/capabilities/transform_messages.py +0 -92
- autogen/agentchat/contrib/capabilities/transforms.py +0 -565
- autogen/agentchat/contrib/capabilities/transforms_util.py +0 -120
- autogen/agentchat/contrib/capabilities/vision_capability.py +0 -217
- autogen/agentchat/contrib/captainagent/tools/__init__.py +0 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +0 -41
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +0 -31
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +0 -26
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +0 -55
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +0 -54
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +0 -39
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +0 -35
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +0 -61
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +0 -62
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +0 -48
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +0 -34
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +0 -36
- autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +0 -19
- autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +0 -32
- autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +0 -17
- autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +0 -26
- autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +0 -24
- autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +0 -28
- autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +0 -35
- autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +0 -40
- autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +0 -23
- autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +0 -37
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +0 -16
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +0 -16
- autogen/agentchat/contrib/captainagent/tools/requirements.txt +0 -10
- autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +0 -34
- autogen/agentchat/contrib/captainagent.py +0 -490
- autogen/agentchat/contrib/gpt_assistant_agent.py +0 -545
- autogen/agentchat/contrib/graph_rag/__init__.py +0 -0
- autogen/agentchat/contrib/graph_rag/document.py +0 -30
- autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +0 -111
- autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +0 -81
- autogen/agentchat/contrib/graph_rag/graph_query_engine.py +0 -56
- autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +0 -64
- autogen/agentchat/contrib/img_utils.py +0 -390
- autogen/agentchat/contrib/llamaindex_conversable_agent.py +0 -123
- autogen/agentchat/contrib/llava_agent.py +0 -176
- autogen/agentchat/contrib/math_user_proxy_agent.py +0 -471
- autogen/agentchat/contrib/multimodal_conversable_agent.py +0 -128
- autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +0 -325
- autogen/agentchat/contrib/retrieve_assistant_agent.py +0 -56
- autogen/agentchat/contrib/retrieve_user_proxy_agent.py +0 -705
- autogen/agentchat/contrib/society_of_mind_agent.py +0 -203
- autogen/agentchat/contrib/swarm_agent.py +0 -463
- autogen/agentchat/contrib/text_analyzer_agent.py +0 -76
- autogen/agentchat/contrib/tool_retriever.py +0 -120
- autogen/agentchat/contrib/vectordb/__init__.py +0 -0
- autogen/agentchat/contrib/vectordb/base.py +0 -243
- autogen/agentchat/contrib/vectordb/chromadb.py +0 -326
- autogen/agentchat/contrib/vectordb/mongodb.py +0 -559
- autogen/agentchat/contrib/vectordb/pgvectordb.py +0 -958
- autogen/agentchat/contrib/vectordb/qdrant.py +0 -334
- autogen/agentchat/contrib/vectordb/utils.py +0 -126
- autogen/agentchat/contrib/web_surfer.py +0 -305
- autogen/agentchat/conversable_agent.py +0 -2908
- autogen/agentchat/groupchat.py +0 -1668
- autogen/agentchat/user_proxy_agent.py +0 -109
- autogen/agentchat/utils.py +0 -207
- autogen/browser_utils.py +0 -291
- autogen/cache/__init__.py +0 -10
- autogen/cache/abstract_cache_base.py +0 -78
- autogen/cache/cache.py +0 -182
- autogen/cache/cache_factory.py +0 -85
- autogen/cache/cosmos_db_cache.py +0 -150
- autogen/cache/disk_cache.py +0 -109
- autogen/cache/in_memory_cache.py +0 -61
- autogen/cache/redis_cache.py +0 -128
- autogen/code_utils.py +0 -745
- autogen/coding/__init__.py +0 -22
- autogen/coding/base.py +0 -113
- autogen/coding/docker_commandline_code_executor.py +0 -262
- autogen/coding/factory.py +0 -45
- autogen/coding/func_with_reqs.py +0 -203
- autogen/coding/jupyter/__init__.py +0 -22
- autogen/coding/jupyter/base.py +0 -32
- autogen/coding/jupyter/docker_jupyter_server.py +0 -164
- autogen/coding/jupyter/embedded_ipython_code_executor.py +0 -182
- autogen/coding/jupyter/jupyter_client.py +0 -224
- autogen/coding/jupyter/jupyter_code_executor.py +0 -161
- autogen/coding/jupyter/local_jupyter_server.py +0 -168
- autogen/coding/local_commandline_code_executor.py +0 -410
- autogen/coding/markdown_code_extractor.py +0 -44
- autogen/coding/utils.py +0 -57
- autogen/exception_utils.py +0 -46
- autogen/extensions/__init__.py +0 -0
- autogen/formatting_utils.py +0 -76
- autogen/function_utils.py +0 -362
- autogen/graph_utils.py +0 -148
- autogen/io/__init__.py +0 -15
- autogen/io/base.py +0 -105
- autogen/io/console.py +0 -43
- autogen/io/websockets.py +0 -213
- autogen/logger/__init__.py +0 -11
- autogen/logger/base_logger.py +0 -140
- autogen/logger/file_logger.py +0 -287
- autogen/logger/logger_factory.py +0 -29
- autogen/logger/logger_utils.py +0 -42
- autogen/logger/sqlite_logger.py +0 -459
- autogen/math_utils.py +0 -356
- autogen/oai/__init__.py +0 -33
- autogen/oai/anthropic.py +0 -428
- autogen/oai/bedrock.py +0 -606
- autogen/oai/cerebras.py +0 -270
- autogen/oai/client.py +0 -1148
- autogen/oai/client_utils.py +0 -167
- autogen/oai/cohere.py +0 -453
- autogen/oai/completion.py +0 -1216
- autogen/oai/gemini.py +0 -469
- autogen/oai/groq.py +0 -281
- autogen/oai/mistral.py +0 -279
- autogen/oai/ollama.py +0 -582
- autogen/oai/openai_utils.py +0 -811
- autogen/oai/together.py +0 -343
- autogen/retrieve_utils.py +0 -487
- autogen/runtime_logging.py +0 -163
- autogen/token_count_utils.py +0 -259
- autogen/types.py +0 -20
- autogen/version.py +0 -7
- {ag2-0.4.1.dist-info → ag2-0.4.2b1.dist-info}/LICENSE +0 -0
- {ag2-0.4.1.dist-info → ag2-0.4.2b1.dist-info}/NOTICE.md +0 -0
- {ag2-0.4.1.dist-info → ag2-0.4.2b1.dist-info}/WHEEL +0 -0
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
|
|
5
|
-
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
6
|
-
|
|
7
|
-
from autogen import Agent, ConversableAgent, UserProxyAgent
|
|
8
|
-
|
|
9
|
-
from .falkor_graph_query_engine import FalkorGraphQueryEngine
|
|
10
|
-
from .graph_query_engine import GraphStoreQueryResult
|
|
11
|
-
from .graph_rag_capability import GraphRagCapability
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class FalkorGraphRagCapability(GraphRagCapability):
|
|
15
|
-
"""
|
|
16
|
-
The FalkorDB GraphRAG capability integrate FalkorDB with graphrag_sdk version: 0.1.3b0.
|
|
17
|
-
Ref: https://github.com/FalkorDB/GraphRAG-SDK/tree/2-move-away-from-sql-to-json-ontology-detection
|
|
18
|
-
|
|
19
|
-
For usage, please refer to example notebook/agentchat_graph_rag_falkordb.ipynb
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
def __init__(self, query_engine: FalkorGraphQueryEngine):
|
|
23
|
-
"""
|
|
24
|
-
initialize GraphRAG capability with a graph query engine
|
|
25
|
-
"""
|
|
26
|
-
self.query_engine = query_engine
|
|
27
|
-
|
|
28
|
-
def add_to_agent(self, agent: UserProxyAgent):
|
|
29
|
-
"""
|
|
30
|
-
Add FalkorDB GraphRAG capability to a UserProxyAgent.
|
|
31
|
-
The restriction to a UserProxyAgent to make sure the returned message does not contain information retrieved from the graph DB instead of any LLMs.
|
|
32
|
-
"""
|
|
33
|
-
self.graph_rag_agent = agent
|
|
34
|
-
|
|
35
|
-
# Validate the agent config
|
|
36
|
-
if agent.llm_config not in (None, False):
|
|
37
|
-
raise Exception(
|
|
38
|
-
"Agents with GraphRAG capabilities do not use an LLM configuration. Please set your llm_config to None or False."
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
# Register method to generate the reply using a FalkorDB query
|
|
42
|
-
# All other reply methods will be removed
|
|
43
|
-
agent.register_reply(
|
|
44
|
-
[ConversableAgent, None], self._reply_using_falkordb_query, position=0, remove_other_reply_funcs=True
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
def _reply_using_falkordb_query(
|
|
48
|
-
self,
|
|
49
|
-
recipient: ConversableAgent,
|
|
50
|
-
messages: Optional[List[Dict]] = None,
|
|
51
|
-
sender: Optional[Agent] = None,
|
|
52
|
-
config: Optional[Any] = None,
|
|
53
|
-
) -> Tuple[bool, Union[str, Dict, None]]:
|
|
54
|
-
"""
|
|
55
|
-
Query FalkorDB and return the message. Internally, it utilises OpenAI to generate a reply based on the given messages.
|
|
56
|
-
The history with FalkorDB is also logged and updated.
|
|
57
|
-
|
|
58
|
-
If no results are found, a default message is returned: "I'm sorry, I don't have an answer for that."
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
recipient: The agent instance that will receive the message.
|
|
62
|
-
messages: A list of messages in the conversation history with the sender.
|
|
63
|
-
sender: The agent instance that sent the message.
|
|
64
|
-
config: Optional configuration for message processing.
|
|
65
|
-
|
|
66
|
-
Returns:
|
|
67
|
-
A tuple containing a boolean indicating success and the assistant's reply.
|
|
68
|
-
"""
|
|
69
|
-
question = self._get_last_question(messages[-1])
|
|
70
|
-
result: GraphStoreQueryResult = self.query_engine.query(question)
|
|
71
|
-
|
|
72
|
-
return True, result.answer if result.answer else "I'm sorry, I don't have an answer for that."
|
|
73
|
-
|
|
74
|
-
def _get_last_question(self, message: Union[Dict, str]):
|
|
75
|
-
"""Retrieves the last message from the conversation history."""
|
|
76
|
-
if isinstance(message, str):
|
|
77
|
-
return message
|
|
78
|
-
if isinstance(message, Dict):
|
|
79
|
-
if "content" in message:
|
|
80
|
-
return message["content"]
|
|
81
|
-
return None
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
#
|
|
5
|
-
# Portions derived from https://github.com/microsoft/autogen are under the MIT License.
|
|
6
|
-
# SPDX-License-Identifier: MIT
|
|
7
|
-
from dataclasses import dataclass, field
|
|
8
|
-
from typing import List, Optional, Protocol
|
|
9
|
-
|
|
10
|
-
from .document import Document
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class GraphStoreQueryResult:
|
|
15
|
-
"""
|
|
16
|
-
A wrapper of graph store query results.
|
|
17
|
-
|
|
18
|
-
answer: human readable answer to question/query.
|
|
19
|
-
results: intermediate results to question/query, e.g. node entities.
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
answer: Optional[str] = None
|
|
23
|
-
results: list = field(default_factory=list)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class GraphQueryEngine(Protocol):
|
|
27
|
-
"""An abstract base class that represents a graph query engine on top of a underlying graph database.
|
|
28
|
-
|
|
29
|
-
This interface defines the basic methods for graph-based RAG.
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
def init_db(self, input_doc: List[Document] | None = None):
|
|
33
|
-
"""
|
|
34
|
-
This method initializes graph database with the input documents or records.
|
|
35
|
-
Usually, it takes the following steps,
|
|
36
|
-
1. connecting to a graph database.
|
|
37
|
-
2. extract graph nodes, edges based on input data, graph schema and etc.
|
|
38
|
-
3. build indexes etc.
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
input_doc: a list of input documents that are used to build the graph in database.
|
|
42
|
-
|
|
43
|
-
"""
|
|
44
|
-
pass
|
|
45
|
-
|
|
46
|
-
def add_records(self, new_records: List) -> bool:
|
|
47
|
-
"""
|
|
48
|
-
Add new records to the underlying database and add to the graph if required.
|
|
49
|
-
"""
|
|
50
|
-
pass
|
|
51
|
-
|
|
52
|
-
def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult:
|
|
53
|
-
"""
|
|
54
|
-
This method transform a string format question into database query and return the result.
|
|
55
|
-
"""
|
|
56
|
-
pass
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
#
|
|
5
|
-
# Portions derived from https://github.com/microsoft/autogen are under the MIT License.
|
|
6
|
-
# SPDX-License-Identifier: MIT
|
|
7
|
-
from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability
|
|
8
|
-
from autogen.agentchat.conversable_agent import ConversableAgent
|
|
9
|
-
|
|
10
|
-
from .graph_query_engine import GraphQueryEngine
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class GraphRagCapability(AgentCapability):
|
|
14
|
-
"""
|
|
15
|
-
A graph-based RAG capability uses a graph query engine to give a conversable agent the graph-based RAG ability.
|
|
16
|
-
|
|
17
|
-
An agent class with graph-based RAG capability could
|
|
18
|
-
1. create a graph in the underlying database with input documents.
|
|
19
|
-
2. retrieved relevant information based on messages received by the agent.
|
|
20
|
-
3. generate answers from retrieved information and send messages back.
|
|
21
|
-
|
|
22
|
-
For example,
|
|
23
|
-
graph_query_engine = GraphQueryEngine(...)
|
|
24
|
-
graph_query_engine.init_db([Document(doc1), Document(doc2), ...])
|
|
25
|
-
|
|
26
|
-
graph_rag_agent = ConversableAgent(
|
|
27
|
-
name="graph_rag_agent",
|
|
28
|
-
max_consecutive_auto_reply=3,
|
|
29
|
-
...
|
|
30
|
-
)
|
|
31
|
-
graph_rag_capability = GraphRagCapbility(graph_query_engine)
|
|
32
|
-
graph_rag_capability.add_to_agent(graph_rag_agent)
|
|
33
|
-
|
|
34
|
-
user_proxy = UserProxyAgent(
|
|
35
|
-
name="user_proxy",
|
|
36
|
-
code_execution_config=False,
|
|
37
|
-
is_termination_msg=lambda msg: "TERMINATE" in msg["content"],
|
|
38
|
-
human_input_mode="ALWAYS",
|
|
39
|
-
)
|
|
40
|
-
user_proxy.initiate_chat(graph_rag_agent, message="Name a few actors who've played in 'The Matrix'")
|
|
41
|
-
|
|
42
|
-
# ChatResult(
|
|
43
|
-
# chat_id=None,
|
|
44
|
-
# chat_history=[
|
|
45
|
-
# {'content': 'Name a few actors who've played in \'The Matrix\'', 'role': 'graph_rag_agent'},
|
|
46
|
-
# {'content': 'A few actors who have played in The Matrix are:
|
|
47
|
-
# - Keanu Reeves
|
|
48
|
-
# - Laurence Fishburne
|
|
49
|
-
# - Carrie-Anne Moss
|
|
50
|
-
# - Hugo Weaving',
|
|
51
|
-
# 'role': 'user_proxy'},
|
|
52
|
-
# ...)
|
|
53
|
-
|
|
54
|
-
"""
|
|
55
|
-
|
|
56
|
-
def __init__(self, query_engine: GraphQueryEngine):
|
|
57
|
-
"""
|
|
58
|
-
Initialize graph-based RAG capability with a graph query engine
|
|
59
|
-
"""
|
|
60
|
-
...
|
|
61
|
-
|
|
62
|
-
def add_to_agent(self, agent: ConversableAgent):
|
|
63
|
-
"""Add the capability to an agent"""
|
|
64
|
-
...
|
|
@@ -1,390 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
#
|
|
5
|
-
# Portions derived from https://github.com/microsoft/autogen are under the MIT License.
|
|
6
|
-
# SPDX-License-Identifier: MIT
|
|
7
|
-
import base64
|
|
8
|
-
import copy
|
|
9
|
-
import os
|
|
10
|
-
import re
|
|
11
|
-
from io import BytesIO
|
|
12
|
-
from math import ceil
|
|
13
|
-
from typing import Dict, List, Tuple, Union
|
|
14
|
-
|
|
15
|
-
import requests
|
|
16
|
-
from PIL import Image
|
|
17
|
-
|
|
18
|
-
from autogen.agentchat import utils
|
|
19
|
-
|
|
20
|
-
# Parameters for token counting for images for different models
|
|
21
|
-
MODEL_PARAMS = {
|
|
22
|
-
"gpt-4-vision": {
|
|
23
|
-
"max_edge": 2048,
|
|
24
|
-
"min_edge": 768,
|
|
25
|
-
"tile_size": 512,
|
|
26
|
-
"base_token_count": 85,
|
|
27
|
-
"token_multiplier": 170,
|
|
28
|
-
},
|
|
29
|
-
"gpt-4o-mini": {
|
|
30
|
-
"max_edge": 2048,
|
|
31
|
-
"min_edge": 768,
|
|
32
|
-
"tile_size": 512,
|
|
33
|
-
"base_token_count": 2833,
|
|
34
|
-
"token_multiplier": 5667,
|
|
35
|
-
},
|
|
36
|
-
"gpt-4o": {"max_edge": 2048, "min_edge": 768, "tile_size": 512, "base_token_count": 85, "token_multiplier": 170},
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def get_pil_image(image_file: Union[str, Image.Image]) -> Image.Image:
|
|
41
|
-
"""
|
|
42
|
-
Loads an image from a file and returns a PIL Image object.
|
|
43
|
-
|
|
44
|
-
Parameters:
|
|
45
|
-
image_file (str, or Image): The filename, URL, URI, or base64 string of the image file.
|
|
46
|
-
|
|
47
|
-
Returns:
|
|
48
|
-
Image.Image: The PIL Image object.
|
|
49
|
-
"""
|
|
50
|
-
if isinstance(image_file, Image.Image):
|
|
51
|
-
# Already a PIL Image object
|
|
52
|
-
return image_file
|
|
53
|
-
|
|
54
|
-
# Remove quotes if existed
|
|
55
|
-
if image_file.startswith('"') and image_file.endswith('"'):
|
|
56
|
-
image_file = image_file[1:-1]
|
|
57
|
-
if image_file.startswith("'") and image_file.endswith("'"):
|
|
58
|
-
image_file = image_file[1:-1]
|
|
59
|
-
|
|
60
|
-
if image_file.startswith("http://") or image_file.startswith("https://"):
|
|
61
|
-
# A URL file
|
|
62
|
-
response = requests.get(image_file)
|
|
63
|
-
content = BytesIO(response.content)
|
|
64
|
-
image = Image.open(content)
|
|
65
|
-
elif re.match(r"data:image/(?:png|jpeg);base64,", image_file):
|
|
66
|
-
# A URI. Remove the prefix and decode the base64 string.
|
|
67
|
-
base64_data = re.sub(r"data:image/(?:png|jpeg);base64,", "", image_file)
|
|
68
|
-
image = _to_pil(base64_data)
|
|
69
|
-
elif os.path.exists(image_file):
|
|
70
|
-
# A local file
|
|
71
|
-
image = Image.open(image_file)
|
|
72
|
-
else:
|
|
73
|
-
# base64 encoded string
|
|
74
|
-
image = _to_pil(image_file)
|
|
75
|
-
|
|
76
|
-
return image.convert("RGB")
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def get_image_data(image_file: Union[str, Image.Image], use_b64=True) -> bytes:
|
|
80
|
-
"""
|
|
81
|
-
Loads an image and returns its data either as raw bytes or in base64-encoded format.
|
|
82
|
-
|
|
83
|
-
This function first loads an image from the specified file, URL, or base64 string using
|
|
84
|
-
the `get_pil_image` function. It then saves this image in memory in PNG format and
|
|
85
|
-
retrieves its binary content. Depending on the `use_b64` flag, this binary content is
|
|
86
|
-
either returned directly or as a base64-encoded string.
|
|
87
|
-
|
|
88
|
-
Parameters:
|
|
89
|
-
image_file (str, or Image): The path to the image file, a URL to an image, or a base64-encoded
|
|
90
|
-
string of the image.
|
|
91
|
-
use_b64 (bool): If True, the function returns a base64-encoded string of the image data.
|
|
92
|
-
If False, it returns the raw byte data of the image. Defaults to True.
|
|
93
|
-
|
|
94
|
-
Returns:
|
|
95
|
-
bytes: The image data in raw bytes if `use_b64` is False, or a base64-encoded string
|
|
96
|
-
if `use_b64` is True.
|
|
97
|
-
"""
|
|
98
|
-
image = get_pil_image(image_file)
|
|
99
|
-
|
|
100
|
-
buffered = BytesIO()
|
|
101
|
-
image.save(buffered, format="PNG")
|
|
102
|
-
content = buffered.getvalue()
|
|
103
|
-
|
|
104
|
-
if use_b64:
|
|
105
|
-
return base64.b64encode(content).decode("utf-8")
|
|
106
|
-
else:
|
|
107
|
-
return content
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def llava_formatter(prompt: str, order_image_tokens: bool = False) -> Tuple[str, List[str]]:
|
|
111
|
-
"""
|
|
112
|
-
Formats the input prompt by replacing image tags and returns the new prompt along with image locations.
|
|
113
|
-
|
|
114
|
-
Parameters:
|
|
115
|
-
- prompt (str): The input string that may contain image tags like <img ...>.
|
|
116
|
-
- order_image_tokens (bool, optional): Whether to order the image tokens with numbers.
|
|
117
|
-
It will be useful for GPT-4V. Defaults to False.
|
|
118
|
-
|
|
119
|
-
Returns:
|
|
120
|
-
- Tuple[str, List[str]]: A tuple containing the formatted string and a list of images (loaded in b64 format).
|
|
121
|
-
"""
|
|
122
|
-
|
|
123
|
-
# Initialize variables
|
|
124
|
-
new_prompt = prompt
|
|
125
|
-
image_locations = []
|
|
126
|
-
images = []
|
|
127
|
-
image_count = 0
|
|
128
|
-
|
|
129
|
-
# Regular expression pattern for matching <img ...> tags
|
|
130
|
-
img_tag_pattern = re.compile(r"<img ([^>]+)>")
|
|
131
|
-
|
|
132
|
-
# Find all image tags
|
|
133
|
-
for match in img_tag_pattern.finditer(prompt):
|
|
134
|
-
image_location = match.group(1)
|
|
135
|
-
|
|
136
|
-
try:
|
|
137
|
-
img_data = get_image_data(image_location)
|
|
138
|
-
except Exception as e:
|
|
139
|
-
# Remove the token
|
|
140
|
-
print(f"Warning! Unable to load image from {image_location}, because of {e}")
|
|
141
|
-
new_prompt = new_prompt.replace(match.group(0), "", 1)
|
|
142
|
-
continue
|
|
143
|
-
|
|
144
|
-
image_locations.append(image_location)
|
|
145
|
-
images.append(img_data)
|
|
146
|
-
|
|
147
|
-
# Increment the image count and replace the tag in the prompt
|
|
148
|
-
new_token = f"<image {image_count}>" if order_image_tokens else "<image>"
|
|
149
|
-
|
|
150
|
-
new_prompt = new_prompt.replace(match.group(0), new_token, 1)
|
|
151
|
-
image_count += 1
|
|
152
|
-
|
|
153
|
-
return new_prompt, images
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
def pil_to_data_uri(image: Image.Image) -> str:
|
|
157
|
-
"""
|
|
158
|
-
Converts a PIL Image object to a data URI.
|
|
159
|
-
|
|
160
|
-
Parameters:
|
|
161
|
-
image (Image.Image): The PIL Image object.
|
|
162
|
-
|
|
163
|
-
Returns:
|
|
164
|
-
str: The data URI string.
|
|
165
|
-
"""
|
|
166
|
-
buffered = BytesIO()
|
|
167
|
-
image.save(buffered, format="PNG")
|
|
168
|
-
content = buffered.getvalue()
|
|
169
|
-
return convert_base64_to_data_uri(base64.b64encode(content).decode("utf-8"))
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
def convert_base64_to_data_uri(base64_image):
|
|
173
|
-
def _get_mime_type_from_data_uri(base64_image):
|
|
174
|
-
# Decode the base64 string
|
|
175
|
-
image_data = base64.b64decode(base64_image)
|
|
176
|
-
# Check the first few bytes for known signatures
|
|
177
|
-
if image_data.startswith(b"\xff\xd8\xff"):
|
|
178
|
-
return "image/jpeg"
|
|
179
|
-
elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
|
|
180
|
-
return "image/png"
|
|
181
|
-
elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
|
|
182
|
-
return "image/gif"
|
|
183
|
-
elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
|
|
184
|
-
return "image/webp"
|
|
185
|
-
return "image/jpeg" # use jpeg for unknown formats, best guess.
|
|
186
|
-
|
|
187
|
-
mime_type = _get_mime_type_from_data_uri(base64_image)
|
|
188
|
-
data_uri = f"data:{mime_type};base64,{base64_image}"
|
|
189
|
-
return data_uri
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
def gpt4v_formatter(prompt: str, img_format: str = "uri") -> List[Union[str, dict]]:
|
|
193
|
-
"""
|
|
194
|
-
Formats the input prompt by replacing image tags and returns a list of text and images.
|
|
195
|
-
|
|
196
|
-
Args:
|
|
197
|
-
- prompt (str): The input string that may contain image tags like <img ...>.
|
|
198
|
-
- img_format (str): what image format should be used. One of "uri", "url", "pil".
|
|
199
|
-
|
|
200
|
-
Returns:
|
|
201
|
-
- List[Union[str, dict]]: A list of alternating text and image dictionary items.
|
|
202
|
-
"""
|
|
203
|
-
assert img_format in ["uri", "url", "pil"]
|
|
204
|
-
|
|
205
|
-
output = []
|
|
206
|
-
last_index = 0
|
|
207
|
-
image_count = 0
|
|
208
|
-
|
|
209
|
-
# Find all image tags
|
|
210
|
-
for parsed_tag in utils.parse_tags_from_content("img", prompt):
|
|
211
|
-
image_location = parsed_tag["attr"]["src"]
|
|
212
|
-
try:
|
|
213
|
-
if img_format == "pil":
|
|
214
|
-
img_data = get_pil_image(image_location)
|
|
215
|
-
elif img_format == "uri":
|
|
216
|
-
img_data = get_image_data(image_location)
|
|
217
|
-
img_data = convert_base64_to_data_uri(img_data)
|
|
218
|
-
elif img_format == "url":
|
|
219
|
-
img_data = image_location
|
|
220
|
-
else:
|
|
221
|
-
raise ValueError(f"Unknown image format {img_format}")
|
|
222
|
-
except Exception as e:
|
|
223
|
-
# Warning and skip this token
|
|
224
|
-
print(f"Warning! Unable to load image from {image_location}, because {e}")
|
|
225
|
-
continue
|
|
226
|
-
|
|
227
|
-
# Add text before this image tag to output list
|
|
228
|
-
output.append({"type": "text", "text": prompt[last_index : parsed_tag["match"].start()]})
|
|
229
|
-
|
|
230
|
-
# Add image data to output list
|
|
231
|
-
output.append({"type": "image_url", "image_url": {"url": img_data}})
|
|
232
|
-
|
|
233
|
-
last_index = parsed_tag["match"].end()
|
|
234
|
-
image_count += 1
|
|
235
|
-
|
|
236
|
-
# Add remaining text to output list
|
|
237
|
-
output.append({"type": "text", "text": prompt[last_index:]})
|
|
238
|
-
return output
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def extract_img_paths(paragraph: str) -> list:
|
|
242
|
-
"""
|
|
243
|
-
Extract image paths (URLs or local paths) from a text paragraph.
|
|
244
|
-
|
|
245
|
-
Parameters:
|
|
246
|
-
paragraph (str): The input text paragraph.
|
|
247
|
-
|
|
248
|
-
Returns:
|
|
249
|
-
list: A list of extracted image paths.
|
|
250
|
-
"""
|
|
251
|
-
# Regular expression to match image URLs and file paths
|
|
252
|
-
img_path_pattern = re.compile(
|
|
253
|
-
r"\b(?:http[s]?://\S+\.(?:jpg|jpeg|png|gif|bmp)|\S+\.(?:jpg|jpeg|png|gif|bmp))\b", re.IGNORECASE
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
# Find all matches in the paragraph
|
|
257
|
-
img_paths = re.findall(img_path_pattern, paragraph)
|
|
258
|
-
return img_paths
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
def _to_pil(data: str) -> Image.Image:
|
|
262
|
-
"""
|
|
263
|
-
Converts a base64 encoded image data string to a PIL Image object.
|
|
264
|
-
|
|
265
|
-
This function first decodes the base64 encoded string to bytes, then creates a BytesIO object from the bytes,
|
|
266
|
-
and finally creates and returns a PIL Image object from the BytesIO object.
|
|
267
|
-
|
|
268
|
-
Parameters:
|
|
269
|
-
data (str): The encoded image data string.
|
|
270
|
-
|
|
271
|
-
Returns:
|
|
272
|
-
Image.Image: The PIL Image object created from the input data.
|
|
273
|
-
"""
|
|
274
|
-
return Image.open(BytesIO(base64.b64decode(data)))
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
def message_formatter_pil_to_b64(messages: List[Dict]) -> List[Dict]:
|
|
278
|
-
"""
|
|
279
|
-
Converts the PIL image URLs in the messages to base64 encoded data URIs.
|
|
280
|
-
|
|
281
|
-
This function iterates over a list of message dictionaries. For each message,
|
|
282
|
-
if it contains a 'content' key with a list of items, it looks for items
|
|
283
|
-
with an 'image_url' key. The function then converts the PIL image URL
|
|
284
|
-
(pointed to by 'image_url') to a base64 encoded data URI.
|
|
285
|
-
|
|
286
|
-
Parameters:
|
|
287
|
-
messages (List[Dict]): A list of message dictionaries. Each dictionary
|
|
288
|
-
may contain a 'content' key with a list of items,
|
|
289
|
-
some of which might be image URLs.
|
|
290
|
-
|
|
291
|
-
Returns:
|
|
292
|
-
List[Dict]: A new list of message dictionaries with PIL image URLs in the
|
|
293
|
-
'image_url' key converted to base64 encoded data URIs.
|
|
294
|
-
|
|
295
|
-
Example Input:
|
|
296
|
-
[
|
|
297
|
-
{'content': [{'type': 'text', 'text': 'You are a helpful AI assistant.'}], 'role': 'system'},
|
|
298
|
-
{'content': [
|
|
299
|
-
{'type': 'text', 'text': "What's the breed of this dog here? \n"},
|
|
300
|
-
{'type': 'image_url', 'image_url': {'url': a PIL.Image.Image}},
|
|
301
|
-
{'type': 'text', 'text': '.'}],
|
|
302
|
-
'role': 'user'}
|
|
303
|
-
]
|
|
304
|
-
|
|
305
|
-
Example Output:
|
|
306
|
-
[
|
|
307
|
-
{'content': [{'type': 'text', 'text': 'You are a helpful AI assistant.'}], 'role': 'system'},
|
|
308
|
-
{'content': [
|
|
309
|
-
{'type': 'text', 'text': "What's the breed of this dog here? \n"},
|
|
310
|
-
{'type': 'image_url', 'image_url': {'url': a B64 Image}},
|
|
311
|
-
{'type': 'text', 'text': '.'}],
|
|
312
|
-
'role': 'user'}
|
|
313
|
-
]
|
|
314
|
-
"""
|
|
315
|
-
new_messages = []
|
|
316
|
-
for message in messages:
|
|
317
|
-
# Handle the new GPT messages format.
|
|
318
|
-
if isinstance(message, dict) and "content" in message and isinstance(message["content"], list):
|
|
319
|
-
message = copy.deepcopy(message)
|
|
320
|
-
for item in message["content"]:
|
|
321
|
-
if isinstance(item, dict) and "image_url" in item:
|
|
322
|
-
item["image_url"]["url"] = pil_to_data_uri(item["image_url"]["url"])
|
|
323
|
-
|
|
324
|
-
new_messages.append(message)
|
|
325
|
-
|
|
326
|
-
return new_messages
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
def num_tokens_from_gpt_image(
|
|
330
|
-
image_data: Union[str, Image.Image], model: str = "gpt-4-vision", low_quality: bool = False
|
|
331
|
-
) -> int:
|
|
332
|
-
"""
|
|
333
|
-
Calculate the number of tokens required to process an image based on its dimensions
|
|
334
|
-
after scaling for different GPT models. Supports "gpt-4-vision", "gpt-4o", and "gpt-4o-mini".
|
|
335
|
-
This function scales the image so that its longest edge is at most 2048 pixels and its shortest
|
|
336
|
-
edge is at most 768 pixels (for "gpt-4-vision"). It then calculates the number of 512x512 tiles
|
|
337
|
-
needed to cover the scaled image and computes the total tokens based on the number of these tiles.
|
|
338
|
-
|
|
339
|
-
Reference: https://openai.com/api/pricing/
|
|
340
|
-
|
|
341
|
-
Args:
|
|
342
|
-
image_data : Union[str, Image.Image]: The image data which can either be a base64
|
|
343
|
-
encoded string, a URL, a file path, or a PIL Image object.
|
|
344
|
-
model: str: The model being used for image processing. Can be "gpt-4-vision", "gpt-4o", or "gpt-4o-mini".
|
|
345
|
-
|
|
346
|
-
Returns:
|
|
347
|
-
int: The total number of tokens required for processing the image.
|
|
348
|
-
|
|
349
|
-
Examples:
|
|
350
|
-
--------
|
|
351
|
-
>>> from PIL import Image
|
|
352
|
-
>>> img = Image.new('RGB', (2500, 2500), color = 'red')
|
|
353
|
-
>>> num_tokens_from_gpt_image(img, model="gpt-4-vision")
|
|
354
|
-
765
|
|
355
|
-
"""
|
|
356
|
-
|
|
357
|
-
image = get_pil_image(image_data) # PIL Image
|
|
358
|
-
width, height = image.size
|
|
359
|
-
|
|
360
|
-
# Determine model parameters
|
|
361
|
-
if "gpt-4-vision" in model or "gpt-4-turbo" in model or "gpt-4v" in model or "gpt-4-v" in model:
|
|
362
|
-
params = MODEL_PARAMS["gpt-4-vision"]
|
|
363
|
-
elif "gpt-4o-mini" in model:
|
|
364
|
-
params = MODEL_PARAMS["gpt-4o-mini"]
|
|
365
|
-
elif "gpt-4o" in model:
|
|
366
|
-
params = MODEL_PARAMS["gpt-4o"]
|
|
367
|
-
else:
|
|
368
|
-
raise ValueError(
|
|
369
|
-
f"Model {model} is not supported. Choose 'gpt-4-vision', 'gpt-4-turbo', 'gpt-4v', 'gpt-4-v', 'gpt-4o', or 'gpt-4o-mini'."
|
|
370
|
-
)
|
|
371
|
-
|
|
372
|
-
if low_quality:
|
|
373
|
-
return params["base_token_count"]
|
|
374
|
-
|
|
375
|
-
# 1. Constrain the longest edge
|
|
376
|
-
if max(width, height) > params["max_edge"]:
|
|
377
|
-
scale_factor = params["max_edge"] / max(width, height)
|
|
378
|
-
width, height = int(width * scale_factor), int(height * scale_factor)
|
|
379
|
-
|
|
380
|
-
# 2. Further constrain the shortest edge
|
|
381
|
-
if min(width, height) > params["min_edge"]:
|
|
382
|
-
scale_factor = params["min_edge"] / min(width, height)
|
|
383
|
-
width, height = int(width * scale_factor), int(height * scale_factor)
|
|
384
|
-
|
|
385
|
-
# 3. Count how many tiles are needed to cover the image
|
|
386
|
-
tiles_width = ceil(width / params["tile_size"])
|
|
387
|
-
tiles_height = ceil(height / params["tile_size"])
|
|
388
|
-
total_tokens = params["base_token_count"] + params["token_multiplier"] * (tiles_width * tiles_height)
|
|
389
|
-
|
|
390
|
-
return total_tokens
|