ag2 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ag2 might be problematic. Click here for more details.

Files changed (112) hide show
  1. ag2-0.3.2.dist-info/LICENSE +201 -0
  2. ag2-0.3.2.dist-info/METADATA +490 -0
  3. ag2-0.3.2.dist-info/NOTICE.md +19 -0
  4. ag2-0.3.2.dist-info/RECORD +112 -0
  5. ag2-0.3.2.dist-info/WHEEL +5 -0
  6. ag2-0.3.2.dist-info/top_level.txt +1 -0
  7. autogen/__init__.py +17 -0
  8. autogen/_pydantic.py +116 -0
  9. autogen/agentchat/__init__.py +26 -0
  10. autogen/agentchat/agent.py +142 -0
  11. autogen/agentchat/assistant_agent.py +85 -0
  12. autogen/agentchat/chat.py +306 -0
  13. autogen/agentchat/contrib/__init__.py +0 -0
  14. autogen/agentchat/contrib/agent_builder.py +785 -0
  15. autogen/agentchat/contrib/agent_optimizer.py +450 -0
  16. autogen/agentchat/contrib/capabilities/__init__.py +0 -0
  17. autogen/agentchat/contrib/capabilities/agent_capability.py +21 -0
  18. autogen/agentchat/contrib/capabilities/generate_images.py +297 -0
  19. autogen/agentchat/contrib/capabilities/teachability.py +406 -0
  20. autogen/agentchat/contrib/capabilities/text_compressors.py +72 -0
  21. autogen/agentchat/contrib/capabilities/transform_messages.py +92 -0
  22. autogen/agentchat/contrib/capabilities/transforms.py +565 -0
  23. autogen/agentchat/contrib/capabilities/transforms_util.py +120 -0
  24. autogen/agentchat/contrib/capabilities/vision_capability.py +217 -0
  25. autogen/agentchat/contrib/gpt_assistant_agent.py +545 -0
  26. autogen/agentchat/contrib/graph_rag/__init__.py +0 -0
  27. autogen/agentchat/contrib/graph_rag/document.py +24 -0
  28. autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +76 -0
  29. autogen/agentchat/contrib/graph_rag/graph_query_engine.py +50 -0
  30. autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +56 -0
  31. autogen/agentchat/contrib/img_utils.py +390 -0
  32. autogen/agentchat/contrib/llamaindex_conversable_agent.py +114 -0
  33. autogen/agentchat/contrib/llava_agent.py +176 -0
  34. autogen/agentchat/contrib/math_user_proxy_agent.py +471 -0
  35. autogen/agentchat/contrib/multimodal_conversable_agent.py +128 -0
  36. autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +325 -0
  37. autogen/agentchat/contrib/retrieve_assistant_agent.py +56 -0
  38. autogen/agentchat/contrib/retrieve_user_proxy_agent.py +701 -0
  39. autogen/agentchat/contrib/society_of_mind_agent.py +203 -0
  40. autogen/agentchat/contrib/text_analyzer_agent.py +76 -0
  41. autogen/agentchat/contrib/vectordb/__init__.py +0 -0
  42. autogen/agentchat/contrib/vectordb/base.py +243 -0
  43. autogen/agentchat/contrib/vectordb/chromadb.py +326 -0
  44. autogen/agentchat/contrib/vectordb/mongodb.py +559 -0
  45. autogen/agentchat/contrib/vectordb/pgvectordb.py +958 -0
  46. autogen/agentchat/contrib/vectordb/qdrant.py +334 -0
  47. autogen/agentchat/contrib/vectordb/utils.py +126 -0
  48. autogen/agentchat/contrib/web_surfer.py +305 -0
  49. autogen/agentchat/conversable_agent.py +2904 -0
  50. autogen/agentchat/groupchat.py +1666 -0
  51. autogen/agentchat/user_proxy_agent.py +109 -0
  52. autogen/agentchat/utils.py +207 -0
  53. autogen/browser_utils.py +291 -0
  54. autogen/cache/__init__.py +10 -0
  55. autogen/cache/abstract_cache_base.py +78 -0
  56. autogen/cache/cache.py +182 -0
  57. autogen/cache/cache_factory.py +85 -0
  58. autogen/cache/cosmos_db_cache.py +150 -0
  59. autogen/cache/disk_cache.py +109 -0
  60. autogen/cache/in_memory_cache.py +61 -0
  61. autogen/cache/redis_cache.py +128 -0
  62. autogen/code_utils.py +745 -0
  63. autogen/coding/__init__.py +22 -0
  64. autogen/coding/base.py +113 -0
  65. autogen/coding/docker_commandline_code_executor.py +262 -0
  66. autogen/coding/factory.py +45 -0
  67. autogen/coding/func_with_reqs.py +203 -0
  68. autogen/coding/jupyter/__init__.py +22 -0
  69. autogen/coding/jupyter/base.py +32 -0
  70. autogen/coding/jupyter/docker_jupyter_server.py +164 -0
  71. autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
  72. autogen/coding/jupyter/jupyter_client.py +224 -0
  73. autogen/coding/jupyter/jupyter_code_executor.py +161 -0
  74. autogen/coding/jupyter/local_jupyter_server.py +168 -0
  75. autogen/coding/local_commandline_code_executor.py +410 -0
  76. autogen/coding/markdown_code_extractor.py +44 -0
  77. autogen/coding/utils.py +57 -0
  78. autogen/exception_utils.py +46 -0
  79. autogen/extensions/__init__.py +0 -0
  80. autogen/formatting_utils.py +76 -0
  81. autogen/function_utils.py +362 -0
  82. autogen/graph_utils.py +148 -0
  83. autogen/io/__init__.py +15 -0
  84. autogen/io/base.py +105 -0
  85. autogen/io/console.py +43 -0
  86. autogen/io/websockets.py +213 -0
  87. autogen/logger/__init__.py +11 -0
  88. autogen/logger/base_logger.py +140 -0
  89. autogen/logger/file_logger.py +287 -0
  90. autogen/logger/logger_factory.py +29 -0
  91. autogen/logger/logger_utils.py +42 -0
  92. autogen/logger/sqlite_logger.py +459 -0
  93. autogen/math_utils.py +356 -0
  94. autogen/oai/__init__.py +33 -0
  95. autogen/oai/anthropic.py +428 -0
  96. autogen/oai/bedrock.py +600 -0
  97. autogen/oai/cerebras.py +264 -0
  98. autogen/oai/client.py +1148 -0
  99. autogen/oai/client_utils.py +167 -0
  100. autogen/oai/cohere.py +453 -0
  101. autogen/oai/completion.py +1216 -0
  102. autogen/oai/gemini.py +469 -0
  103. autogen/oai/groq.py +281 -0
  104. autogen/oai/mistral.py +279 -0
  105. autogen/oai/ollama.py +576 -0
  106. autogen/oai/openai_utils.py +810 -0
  107. autogen/oai/together.py +343 -0
  108. autogen/retrieve_utils.py +487 -0
  109. autogen/runtime_logging.py +163 -0
  110. autogen/token_count_utils.py +257 -0
  111. autogen/types.py +20 -0
  112. autogen/version.py +7 -0
@@ -0,0 +1,334 @@
1
+ # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ import abc
8
+ import logging
9
+ import os
10
+ from typing import Callable, List, Optional, Sequence, Tuple, Union
11
+
12
+ from .base import Document, ItemID, QueryResults, VectorDB
13
+ from .utils import get_logger
14
+
15
+ try:
16
+ from qdrant_client import QdrantClient, models
17
+ except ImportError:
18
+ raise ImportError("Please install qdrant-client: `pip install qdrant-client`")
19
+
20
+ logger = get_logger(__name__)
21
+
22
+ Embeddings = Union[Sequence[float], Sequence[int]]
23
+
24
+
25
+ class EmbeddingFunction(abc.ABC):
26
+ @abc.abstractmethod
27
+ def __call__(self, inputs: List[str]) -> List[Embeddings]:
28
+ raise NotImplementedError
29
+
30
+
31
+ class FastEmbedEmbeddingFunction(EmbeddingFunction):
32
+ """Embedding function implementation using FastEmbed - https://qdrant.github.io/fastembed."""
33
+
34
+ def __init__(
35
+ self,
36
+ model_name: str = "BAAI/bge-small-en-v1.5",
37
+ batch_size: int = 256,
38
+ cache_dir: Optional[str] = None,
39
+ threads: Optional[int] = None,
40
+ parallel: Optional[int] = None,
41
+ **kwargs,
42
+ ):
43
+ """Initialize fastembed.TextEmbedding.
44
+
45
+ Args:
46
+ model_name (str): The name of the model to use. Defaults to `"BAAI/bge-small-en-v1.5"`.
47
+ batch_size (int): Batch size for encoding. Higher values will use more memory, but be faster.\
48
+ Defaults to 256.
49
+ cache_dir (str, optional): The path to the model cache directory.\
50
+ Can also be set using the `FASTEMBED_CACHE_PATH` env variable.
51
+ threads (int, optional): The number of threads single onnxruntime session can use.
52
+ parallel (int, optional): If `>1`, data-parallel encoding will be used, recommended for large datasets.\
53
+ If `0`, use all available cores.\
54
+ If `None`, don't use data-parallel processing, use default onnxruntime threading.\
55
+ Defaults to None.
56
+ **kwargs: Additional options to pass to fastembed.TextEmbedding
57
+ Raises:
58
+ ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-small-en-v1.5.
59
+ """
60
+ try:
61
+ from fastembed import TextEmbedding
62
+ except ImportError as e:
63
+ raise ValueError(
64
+ "The 'fastembed' package is not installed. Please install it with `pip install fastembed`",
65
+ ) from e
66
+ self._batch_size = batch_size
67
+ self._parallel = parallel
68
+ self._model = TextEmbedding(model_name=model_name, cache_dir=cache_dir, threads=threads, **kwargs)
69
+
70
+ def __call__(self, inputs: List[str]) -> List[Embeddings]:
71
+ embeddings = self._model.embed(inputs, batch_size=self._batch_size, parallel=self._parallel)
72
+
73
+ return [embedding.tolist() for embedding in embeddings]
74
+
75
+
76
+ class QdrantVectorDB(VectorDB):
77
+ """
78
+ A vector database implementation that uses Qdrant as the backend.
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ *,
84
+ client=None,
85
+ embedding_function: EmbeddingFunction = None,
86
+ content_payload_key: str = "_content",
87
+ metadata_payload_key: str = "_metadata",
88
+ collection_options: dict = {},
89
+ **kwargs,
90
+ ) -> None:
91
+ """
92
+ Initialize the vector database.
93
+
94
+ Args:
95
+ client: qdrant_client.QdrantClient | An instance of QdrantClient.
96
+ embedding_function: Callable | The embedding function used to generate the vector representation
97
+ of the documents. Defaults to FastEmbedEmbeddingFunction.
98
+ collection_options: dict | The options for creating the collection.
99
+ kwargs: dict | Additional keyword arguments.
100
+ """
101
+ self.client: QdrantClient = client or QdrantClient(location=":memory:")
102
+ self.embedding_function = embedding_function or FastEmbedEmbeddingFunction()
103
+ self.collection_options = collection_options
104
+ self.content_payload_key = content_payload_key
105
+ self.metadata_payload_key = metadata_payload_key
106
+ self.type = "qdrant"
107
+
108
+ def create_collection(self, collection_name: str, overwrite: bool = False, get_or_create: bool = True) -> None:
109
+ """
110
+ Create a collection in the vector database.
111
+ Case 1. if the collection does not exist, create the collection.
112
+ Case 2. the collection exists, if overwrite is True, it will overwrite the collection.
113
+ Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection,
114
+ otherwise it raise a ValueError.
115
+
116
+ Args:
117
+ collection_name: str | The name of the collection.
118
+ overwrite: bool | Whether to overwrite the collection if it exists. Default is False.
119
+ get_or_create: bool | Whether to get the collection if it exists. Default is True.
120
+
121
+ Returns:
122
+ Any | The collection object.
123
+ """
124
+ embeddings_size = len(self.embedding_function(["test"])[0])
125
+
126
+ if self.client.collection_exists(collection_name) and overwrite:
127
+ self.client.delete_collection(collection_name)
128
+
129
+ if not self.client.collection_exists(collection_name):
130
+ self.client.create_collection(
131
+ collection_name,
132
+ vectors_config=models.VectorParams(size=embeddings_size, distance=models.Distance.COSINE),
133
+ **self.collection_options,
134
+ )
135
+ elif not get_or_create:
136
+ raise ValueError(f"Collection {collection_name} already exists.")
137
+
138
+ def get_collection(self, collection_name: str = None):
139
+ """
140
+ Get the collection from the vector database.
141
+
142
+ Args:
143
+ collection_name: str | The name of the collection.
144
+
145
+ Returns:
146
+ Any | The collection object.
147
+ """
148
+ if collection_name is None:
149
+ raise ValueError("The collection name is required.")
150
+
151
+ return self.client.get_collection(collection_name)
152
+
153
+ def delete_collection(self, collection_name: str) -> None:
154
+ """Delete the collection from the vector database.
155
+
156
+ Args:
157
+ collection_name: str | The name of the collection.
158
+
159
+ Returns:
160
+ Any
161
+ """
162
+ return self.client.delete_collection(collection_name)
163
+
164
+ def insert_docs(self, docs: List[Document], collection_name: str = None, upsert: bool = False) -> None:
165
+ """
166
+ Insert documents into the collection of the vector database.
167
+
168
+ Args:
169
+ docs: List[Document] | A list of documents. Each document is a TypedDict `Document`.
170
+ collection_name: str | The name of the collection. Default is None.
171
+ upsert: bool | Whether to update the document if it exists. Default is False.
172
+ kwargs: Dict | Additional keyword arguments.
173
+
174
+ Returns:
175
+ None
176
+ """
177
+ if not docs:
178
+ return
179
+ if any(doc.get("content") is None for doc in docs):
180
+ raise ValueError("The document content is required.")
181
+ if any(doc.get("id") is None for doc in docs):
182
+ raise ValueError("The document id is required.")
183
+
184
+ if not upsert and not self._validate_upsert_ids(collection_name, [doc["id"] for doc in docs]):
185
+ logger.log("Some IDs already exist. Skipping insert", level=logging.WARN)
186
+
187
+ self.client.upsert(collection_name, points=self._documents_to_points(docs))
188
+
189
+ def update_docs(self, docs: List[Document], collection_name: str = None) -> None:
190
+ if not docs:
191
+ return
192
+ if any(doc.get("id") is None for doc in docs):
193
+ raise ValueError("The document id is required.")
194
+ if any(doc.get("content") is None for doc in docs):
195
+ raise ValueError("The document content is required.")
196
+ if self._validate_update_ids(collection_name, [doc["id"] for doc in docs]):
197
+ return self.client.upsert(collection_name, points=self._documents_to_points(docs))
198
+
199
+ raise ValueError("Some IDs do not exist. Skipping update")
200
+
201
+ def delete_docs(self, ids: List[ItemID], collection_name: str = None, **kwargs) -> None:
202
+ """
203
+ Delete documents from the collection of the vector database.
204
+
205
+ Args:
206
+ ids: List[ItemID] | A list of document ids. Each id is a typed `ItemID`.
207
+ collection_name: str | The name of the collection. Default is None.
208
+ kwargs: Dict | Additional keyword arguments.
209
+
210
+ Returns:
211
+ None
212
+ """
213
+ self.client.delete(collection_name, ids)
214
+
215
+ def retrieve_docs(
216
+ self,
217
+ queries: List[str],
218
+ collection_name: str = None,
219
+ n_results: int = 10,
220
+ distance_threshold: float = 0,
221
+ **kwargs,
222
+ ) -> QueryResults:
223
+ """
224
+ Retrieve documents from the collection of the vector database based on the queries.
225
+
226
+ Args:
227
+ queries: List[str] | A list of queries. Each query is a string.
228
+ collection_name: str | The name of the collection. Default is None.
229
+ n_results: int | The number of relevant documents to return. Default is 10.
230
+ distance_threshold: float | The threshold for the distance score, only distance smaller than it will be
231
+ returned. Don't filter with it if < 0. Default is 0.
232
+ kwargs: Dict | Additional keyword arguments.
233
+
234
+ Returns:
235
+ QueryResults | The query results. Each query result is a list of list of tuples containing the document and
236
+ the distance.
237
+ """
238
+ embeddings = self.embedding_function(queries)
239
+ requests = [
240
+ models.SearchRequest(
241
+ vector=embedding,
242
+ limit=n_results,
243
+ score_threshold=distance_threshold,
244
+ with_payload=True,
245
+ with_vector=False,
246
+ )
247
+ for embedding in embeddings
248
+ ]
249
+
250
+ batch_results = self.client.search_batch(collection_name, requests)
251
+ return [self._scored_points_to_documents(results) for results in batch_results]
252
+
253
+ def get_docs_by_ids(
254
+ self, ids: List[ItemID] = None, collection_name: str = None, include=True, **kwargs
255
+ ) -> List[Document]:
256
+ """
257
+ Retrieve documents from the collection of the vector database based on the ids.
258
+
259
+ Args:
260
+ ids: List[ItemID] | A list of document ids. If None, will return all the documents. Default is None.
261
+ collection_name: str | The name of the collection. Default is None.
262
+ include: List[str] | The fields to include. Default is True.
263
+ If None, will include ["metadatas", "documents"], ids will always be included.
264
+ kwargs: dict | Additional keyword arguments.
265
+
266
+ Returns:
267
+ List[Document] | The results.
268
+ """
269
+ if ids is None:
270
+ results = self.client.scroll(collection_name=collection_name, with_payload=include, with_vectors=True)[0]
271
+ else:
272
+ results = self.client.retrieve(collection_name, ids=ids, with_payload=include, with_vectors=True)
273
+ return [self._point_to_document(result) for result in results]
274
+
275
+ def _point_to_document(self, point) -> Document:
276
+ return {
277
+ "id": point.id,
278
+ "content": point.payload.get(self.content_payload_key, ""),
279
+ "metadata": point.payload.get(self.metadata_payload_key, {}),
280
+ "embedding": point.vector,
281
+ }
282
+
283
+ def _points_to_documents(self, points) -> List[Document]:
284
+ return [self._point_to_document(point) for point in points]
285
+
286
+ def _scored_point_to_document(self, scored_point: models.ScoredPoint) -> Tuple[Document, float]:
287
+ return self._point_to_document(scored_point), scored_point.score
288
+
289
+ def _documents_to_points(self, documents: List[Document]):
290
+ contents = [document["content"] for document in documents]
291
+ embeddings = self.embedding_function(contents)
292
+ points = [
293
+ models.PointStruct(
294
+ id=documents[i]["id"],
295
+ vector=embeddings[i],
296
+ payload={
297
+ self.content_payload_key: documents[i].get("content"),
298
+ self.metadata_payload_key: documents[i].get("metadata"),
299
+ },
300
+ )
301
+ for i in range(len(documents))
302
+ ]
303
+ return points
304
+
305
+ def _scored_points_to_documents(self, scored_points: List[models.ScoredPoint]) -> List[Tuple[Document, float]]:
306
+ return [self._scored_point_to_document(scored_point) for scored_point in scored_points]
307
+
308
+ def _validate_update_ids(self, collection_name: str, ids: List[str]) -> bool:
309
+ """
310
+ Validates all the IDs exist in the collection
311
+ """
312
+ retrieved_ids = [
313
+ point.id for point in self.client.retrieve(collection_name, ids=ids, with_payload=False, with_vectors=False)
314
+ ]
315
+
316
+ if missing_ids := set(ids) - set(retrieved_ids):
317
+ logger.log(f"Missing IDs: {missing_ids}. Skipping update", level=logging.WARN)
318
+ return False
319
+
320
+ return True
321
+
322
+ def _validate_upsert_ids(self, collection_name: str, ids: List[str]) -> bool:
323
+ """
324
+ Validate none of the IDs exist in the collection
325
+ """
326
+ retrieved_ids = [
327
+ point.id for point in self.client.retrieve(collection_name, ids=ids, with_payload=False, with_vectors=False)
328
+ ]
329
+
330
+ if existing_ids := set(ids) & set(retrieved_ids):
331
+ logger.log(f"Existing IDs: {existing_ids}.", level=logging.WARN)
332
+ return False
333
+
334
+ return True
@@ -0,0 +1,126 @@
1
+ # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ import logging
8
+ from typing import Any, Dict, List
9
+
10
+ from termcolor import colored
11
+
12
+ from .base import QueryResults
13
+
14
+
15
+ class ColoredLogger(logging.Logger):
16
+ def __init__(self, name, level=logging.NOTSET):
17
+ super().__init__(name, level)
18
+
19
+ def debug(self, msg, *args, color=None, **kwargs):
20
+ super().debug(colored(msg, color), *args, **kwargs)
21
+
22
+ def info(self, msg, *args, color=None, **kwargs):
23
+ super().info(colored(msg, color), *args, **kwargs)
24
+
25
+ def warning(self, msg, *args, color="yellow", **kwargs):
26
+ super().warning(colored(msg, color), *args, **kwargs)
27
+
28
+ def error(self, msg, *args, color="light_red", **kwargs):
29
+ super().error(colored(msg, color), *args, **kwargs)
30
+
31
+ def critical(self, msg, *args, color="red", **kwargs):
32
+ super().critical(colored(msg, color), *args, **kwargs)
33
+
34
+ def fatal(self, msg, *args, color="red", **kwargs):
35
+ super().fatal(colored(msg, color), *args, **kwargs)
36
+
37
+
38
+ def get_logger(name: str, level: int = logging.INFO) -> ColoredLogger:
39
+ logger = ColoredLogger(name, level)
40
+ console_handler = logging.StreamHandler()
41
+ logger.addHandler(console_handler)
42
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
43
+ logger.handlers[0].setFormatter(formatter)
44
+ return logger
45
+
46
+
47
+ logger = get_logger(__name__)
48
+
49
+
50
+ def filter_results_by_distance(results: QueryResults, distance_threshold: float = -1) -> QueryResults:
51
+ """Filters results based on a distance threshold.
52
+
53
+ Args:
54
+ results: QueryResults | The query results. List[List[Tuple[Document, float]]]
55
+ distance_threshold: The maximum distance allowed for results.
56
+
57
+ Returns:
58
+ QueryResults | A filtered results containing only distances smaller than the threshold.
59
+ """
60
+
61
+ if distance_threshold > 0:
62
+ results = [[(key, value) for key, value in data if value < distance_threshold] for data in results]
63
+
64
+ return results
65
+
66
+
67
+ def chroma_results_to_query_results(data_dict: Dict[str, List[List[Any]]], special_key="distances") -> QueryResults:
68
+ """Converts a dictionary with list-of-list values to a list of tuples.
69
+
70
+ Args:
71
+ data_dict: A dictionary where keys map to lists of lists or None.
72
+ special_key: The key in the dictionary containing the special values
73
+ for each tuple.
74
+
75
+ Returns:
76
+ A list of tuples, where each tuple contains a sub-dictionary with
77
+ some keys from the original dictionary and the value from the
78
+ special_key.
79
+
80
+ Example:
81
+ data_dict = {
82
+ "key1s": [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
83
+ "key2s": [["a", "b", "c"], ["c", "d", "e"], ["e", "f", "g"]],
84
+ "key3s": None,
85
+ "key4s": [["x", "y", "z"], ["1", "2", "3"], ["4", "5", "6"]],
86
+ "distances": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]],
87
+ }
88
+
89
+ results = [
90
+ [
91
+ ({"key1": 1, "key2": "a", "key4": "x"}, 0.1),
92
+ ({"key1": 2, "key2": "b", "key4": "y"}, 0.2),
93
+ ({"key1": 3, "key2": "c", "key4": "z"}, 0.3),
94
+ ],
95
+ [
96
+ ({"key1": 4, "key2": "c", "key4": "1"}, 0.4),
97
+ ({"key1": 5, "key2": "d", "key4": "2"}, 0.5),
98
+ ({"key1": 6, "key2": "e", "key4": "3"}, 0.6),
99
+ ],
100
+ [
101
+ ({"key1": 7, "key2": "e", "key4": "4"}, 0.7),
102
+ ({"key1": 8, "key2": "f", "key4": "5"}, 0.8),
103
+ ({"key1": 9, "key2": "g", "key4": "6"}, 0.9),
104
+ ],
105
+ ]
106
+ """
107
+
108
+ keys = [
109
+ key
110
+ for key in data_dict
111
+ if key != special_key and data_dict[key] is not None and isinstance(data_dict[key][0], list)
112
+ ]
113
+ result = []
114
+ data_special_key = data_dict[special_key]
115
+
116
+ for i in range(len(data_special_key)):
117
+ sub_result = []
118
+ for j, distance in enumerate(data_special_key[i]):
119
+ sub_dict = {}
120
+ for key in keys:
121
+ if len(data_dict[key]) > i:
122
+ sub_dict[key[:-1]] = data_dict[key][i][j] # remove 's' in the end from key
123
+ sub_result.append((sub_dict, distance))
124
+ result.append(sub_result)
125
+
126
+ return result