flock-core 0.5.0b5__py3-none-any.whl → 0.5.0b6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flock-core might be problematic. Click here for more details.
- {flock_core-0.5.0b5.dist-info → flock_core-0.5.0b6.dist-info}/METADATA +2 -41
- {flock_core-0.5.0b5.dist-info → flock_core-0.5.0b6.dist-info}/RECORD +5 -15
- flock/tools/__init__.py +0 -0
- flock/tools/azure_tools.py +0 -781
- flock/tools/code_tools.py +0 -167
- flock/tools/file_tools.py +0 -149
- flock/tools/github_tools.py +0 -157
- flock/tools/markdown_tools.py +0 -204
- flock/tools/system_tools.py +0 -9
- flock/tools/text_tools.py +0 -809
- flock/tools/web_tools.py +0 -90
- flock/tools/zendesk_tools.py +0 -147
- {flock_core-0.5.0b5.dist-info → flock_core-0.5.0b6.dist-info}/WHEEL +0 -0
- {flock_core-0.5.0b5.dist-info → flock_core-0.5.0b6.dist-info}/entry_points.txt +0 -0
- {flock_core-0.5.0b5.dist-info → flock_core-0.5.0b6.dist-info}/licenses/LICENSE +0 -0
flock/tools/azure_tools.py
DELETED
|
@@ -1,781 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from typing import Any
|
|
3
|
-
|
|
4
|
-
from azure.core.credentials import AzureKeyCredential
|
|
5
|
-
from azure.search.documents import SearchClient
|
|
6
|
-
from azure.search.documents.indexes import SearchIndexClient
|
|
7
|
-
from azure.search.documents.indexes.models import (
|
|
8
|
-
ExhaustiveKnnAlgorithmConfiguration,
|
|
9
|
-
HnswAlgorithmConfiguration,
|
|
10
|
-
SearchableField,
|
|
11
|
-
SearchField,
|
|
12
|
-
SearchFieldDataType,
|
|
13
|
-
SearchIndex,
|
|
14
|
-
SimpleField,
|
|
15
|
-
VectorSearch,
|
|
16
|
-
VectorSearchProfile,
|
|
17
|
-
)
|
|
18
|
-
from azure.search.documents.models import VectorizedQuery
|
|
19
|
-
from azure.storage.blob import (
|
|
20
|
-
BlobServiceClient,
|
|
21
|
-
ContentSettings,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
from flock.core.logging.trace_and_logged import traced_and_logged
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def _get_default_endpoint() -> str:
|
|
28
|
-
"""Get the default Azure Search endpoint from environment variables."""
|
|
29
|
-
endpoint = os.environ.get("AZURE_SEARCH_ENDPOINT")
|
|
30
|
-
if not endpoint:
|
|
31
|
-
raise ValueError(
|
|
32
|
-
"AZURE_SEARCH_ENDPOINT environment variable is not set"
|
|
33
|
-
)
|
|
34
|
-
return endpoint
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def _get_default_api_key() -> str:
|
|
38
|
-
"""Get the default Azure Search API key from environment variables."""
|
|
39
|
-
api_key = os.environ.get("AZURE_SEARCH_API_KEY")
|
|
40
|
-
if not api_key:
|
|
41
|
-
raise ValueError("AZURE_SEARCH_API_KEY environment variable is not set")
|
|
42
|
-
return api_key
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def _get_default_index_name() -> str:
|
|
46
|
-
"""Get the default Azure Search index name from environment variables."""
|
|
47
|
-
index_name = os.environ.get("AZURE_SEARCH_INDEX_NAME")
|
|
48
|
-
if not index_name:
|
|
49
|
-
raise ValueError(
|
|
50
|
-
"AZURE_SEARCH_INDEX_NAME environment variable is not set"
|
|
51
|
-
)
|
|
52
|
-
return index_name
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
@traced_and_logged
|
|
56
|
-
def azure_search_initialize_clients(
|
|
57
|
-
endpoint: str | None = None,
|
|
58
|
-
api_key: str | None = None,
|
|
59
|
-
index_name: str | None = None,
|
|
60
|
-
) -> dict[str, Any]:
|
|
61
|
-
"""Initialize Azure AI Search clients.
|
|
62
|
-
|
|
63
|
-
Args:
|
|
64
|
-
endpoint: The Azure AI Search service endpoint URL (defaults to AZURE_SEARCH_ENDPOINT env var)
|
|
65
|
-
api_key: The Azure AI Search API key (defaults to AZURE_SEARCH_API_KEY env var)
|
|
66
|
-
index_name: Optional index name for SearchClient initialization (defaults to AZURE_SEARCH_INDEX_NAME env var if not None)
|
|
67
|
-
|
|
68
|
-
Returns:
|
|
69
|
-
Dictionary containing the initialized clients
|
|
70
|
-
"""
|
|
71
|
-
# Use environment variables as defaults if not provided
|
|
72
|
-
endpoint = endpoint or _get_default_endpoint()
|
|
73
|
-
api_key = api_key or _get_default_api_key()
|
|
74
|
-
|
|
75
|
-
credential = AzureKeyCredential(api_key)
|
|
76
|
-
|
|
77
|
-
# Create the search index client
|
|
78
|
-
search_index_client = SearchIndexClient(
|
|
79
|
-
endpoint=endpoint, credential=credential
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
# Create clients dictionary
|
|
83
|
-
clients = {
|
|
84
|
-
"index_client": search_index_client,
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
# Add search client if index_name was provided or available in env
|
|
88
|
-
if index_name is None and os.environ.get("AZURE_SEARCH_INDEX_NAME"):
|
|
89
|
-
index_name = _get_default_index_name()
|
|
90
|
-
|
|
91
|
-
if index_name:
|
|
92
|
-
search_client = SearchClient(
|
|
93
|
-
endpoint=endpoint, index_name=index_name, credential=credential
|
|
94
|
-
)
|
|
95
|
-
clients["search_client"] = search_client
|
|
96
|
-
|
|
97
|
-
return clients
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
@traced_and_logged
|
|
101
|
-
def azure_search_create_index(
|
|
102
|
-
index_name: str | None = None,
|
|
103
|
-
fields: list[SearchField] = None,
|
|
104
|
-
vector_search: VectorSearch | None = None,
|
|
105
|
-
endpoint: str | None = None,
|
|
106
|
-
api_key: str | None = None,
|
|
107
|
-
) -> dict[str, Any]:
|
|
108
|
-
"""Create a new search index in Azure AI Search.
|
|
109
|
-
|
|
110
|
-
Args:
|
|
111
|
-
index_name: Name of the search index to create (defaults to AZURE_SEARCH_INDEX_NAME env var)
|
|
112
|
-
fields: List of field definitions for the index
|
|
113
|
-
vector_search: Optional vector search configuration
|
|
114
|
-
endpoint: The Azure AI Search service endpoint URL (defaults to AZURE_SEARCH_ENDPOINT env var)
|
|
115
|
-
api_key: The Azure AI Search API key (defaults to AZURE_SEARCH_API_KEY env var)
|
|
116
|
-
|
|
117
|
-
Returns:
|
|
118
|
-
Dictionary containing information about the created index
|
|
119
|
-
"""
|
|
120
|
-
# Use environment variables as defaults if not provided
|
|
121
|
-
endpoint = endpoint or _get_default_endpoint()
|
|
122
|
-
api_key = api_key or _get_default_api_key()
|
|
123
|
-
index_name = index_name or _get_default_index_name()
|
|
124
|
-
|
|
125
|
-
if fields is None:
|
|
126
|
-
raise ValueError("Fields must be provided for index creation")
|
|
127
|
-
|
|
128
|
-
clients = azure_search_initialize_clients(endpoint, api_key)
|
|
129
|
-
index_client = clients["index_client"]
|
|
130
|
-
|
|
131
|
-
# Create the index
|
|
132
|
-
index = SearchIndex(
|
|
133
|
-
name=index_name, fields=fields, vector_search=vector_search
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
result = index_client.create_or_update_index(index)
|
|
137
|
-
|
|
138
|
-
return {
|
|
139
|
-
"index_name": result.name,
|
|
140
|
-
"fields": [field.name for field in result.fields],
|
|
141
|
-
"created": True,
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
@traced_and_logged
|
|
146
|
-
def azure_search_upload_documents(
|
|
147
|
-
documents: list[dict[str, Any]],
|
|
148
|
-
index_name: str | None = None,
|
|
149
|
-
endpoint: str | None = None,
|
|
150
|
-
api_key: str | None = None,
|
|
151
|
-
) -> dict[str, Any]:
|
|
152
|
-
"""Upload documents to an Azure AI Search index.
|
|
153
|
-
|
|
154
|
-
Args:
|
|
155
|
-
documents: List of documents to upload (as dictionaries)
|
|
156
|
-
index_name: Name of the search index (defaults to AZURE_SEARCH_INDEX_NAME env var)
|
|
157
|
-
endpoint: The Azure AI Search service endpoint URL (defaults to AZURE_SEARCH_ENDPOINT env var)
|
|
158
|
-
api_key: The Azure AI Search API key (defaults to AZURE_SEARCH_API_KEY env var)
|
|
159
|
-
|
|
160
|
-
Returns:
|
|
161
|
-
Dictionary containing the upload results
|
|
162
|
-
"""
|
|
163
|
-
# Use environment variables as defaults if not provided
|
|
164
|
-
endpoint = endpoint or _get_default_endpoint()
|
|
165
|
-
api_key = api_key or _get_default_api_key()
|
|
166
|
-
index_name = index_name or _get_default_index_name()
|
|
167
|
-
|
|
168
|
-
clients = azure_search_initialize_clients(endpoint, api_key, index_name)
|
|
169
|
-
search_client = clients["search_client"]
|
|
170
|
-
|
|
171
|
-
result = search_client.upload_documents(documents=documents)
|
|
172
|
-
|
|
173
|
-
# Process results
|
|
174
|
-
succeeded = sum(1 for r in result if r.succeeded)
|
|
175
|
-
|
|
176
|
-
return {
|
|
177
|
-
"succeeded": succeeded,
|
|
178
|
-
"failed": len(result) - succeeded,
|
|
179
|
-
"total": len(result),
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
@traced_and_logged
|
|
184
|
-
def azure_search_query(
|
|
185
|
-
search_text: str | None = None,
|
|
186
|
-
filter: str | None = None,
|
|
187
|
-
select: list[str] | None = None,
|
|
188
|
-
top: int | None = 50,
|
|
189
|
-
vector: list[float] | None = None,
|
|
190
|
-
vector_field: str | None = None,
|
|
191
|
-
vector_k: int | None = 10,
|
|
192
|
-
index_name: str | None = None,
|
|
193
|
-
endpoint: str | None = None,
|
|
194
|
-
api_key: str | None = None,
|
|
195
|
-
) -> list[dict[str, Any]]:
|
|
196
|
-
"""Search documents in an Azure AI Search index.
|
|
197
|
-
|
|
198
|
-
Args:
|
|
199
|
-
search_text: Optional text to search for (keyword search)
|
|
200
|
-
filter: Optional OData filter expression
|
|
201
|
-
select: Optional list of fields to return
|
|
202
|
-
top: Maximum number of results to return
|
|
203
|
-
vector: Optional vector for vector search
|
|
204
|
-
vector_field: Name of the field containing vectors for vector search
|
|
205
|
-
vector_k: Number of nearest neighbors to retrieve in vector search
|
|
206
|
-
index_name: Name of the search index (defaults to AZURE_SEARCH_INDEX_NAME env var)
|
|
207
|
-
endpoint: The Azure AI Search service endpoint URL (defaults to AZURE_SEARCH_ENDPOINT env var)
|
|
208
|
-
api_key: The Azure AI Search API key (defaults to AZURE_SEARCH_API_KEY env var)
|
|
209
|
-
|
|
210
|
-
Returns:
|
|
211
|
-
List of search results as dictionaries
|
|
212
|
-
"""
|
|
213
|
-
# Use environment variables as defaults if not provided
|
|
214
|
-
endpoint = endpoint or _get_default_endpoint()
|
|
215
|
-
api_key = api_key or _get_default_api_key()
|
|
216
|
-
index_name = index_name or _get_default_index_name()
|
|
217
|
-
|
|
218
|
-
clients = azure_search_initialize_clients(endpoint, api_key, index_name)
|
|
219
|
-
search_client = clients["search_client"]
|
|
220
|
-
|
|
221
|
-
# Set up vector query if vector is provided
|
|
222
|
-
vectorized_query = None
|
|
223
|
-
if vector and vector_field:
|
|
224
|
-
vectorized_query = VectorizedQuery(
|
|
225
|
-
vector=vector, k=vector_k, fields=[vector_field]
|
|
226
|
-
)
|
|
227
|
-
|
|
228
|
-
# Execute the search
|
|
229
|
-
results = search_client.search(
|
|
230
|
-
search_text=search_text,
|
|
231
|
-
filter=filter,
|
|
232
|
-
select=select,
|
|
233
|
-
top=top,
|
|
234
|
-
vector_queries=[vectorized_query] if vectorized_query else None,
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
# Convert results to list of dictionaries
|
|
238
|
-
# filter out the text_vector field
|
|
239
|
-
result_list = [{**dict(result), "text_vector": ""} for result in results]
|
|
240
|
-
|
|
241
|
-
return result_list
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
@traced_and_logged
|
|
245
|
-
def azure_search_get_document(
|
|
246
|
-
key: str,
|
|
247
|
-
select: list[str] | None = None,
|
|
248
|
-
index_name: str | None = None,
|
|
249
|
-
endpoint: str | None = None,
|
|
250
|
-
api_key: str | None = None,
|
|
251
|
-
) -> dict[str, Any]:
|
|
252
|
-
"""Retrieve a specific document from an Azure AI Search index by key.
|
|
253
|
-
|
|
254
|
-
Args:
|
|
255
|
-
key: The unique key of the document to retrieve
|
|
256
|
-
select: Optional list of fields to return
|
|
257
|
-
index_name: Name of the search index (defaults to AZURE_SEARCH_INDEX_NAME env var)
|
|
258
|
-
endpoint: The Azure AI Search service endpoint URL (defaults to AZURE_SEARCH_ENDPOINT env var)
|
|
259
|
-
api_key: The Azure AI Search API key (defaults to AZURE_SEARCH_API_KEY env var)
|
|
260
|
-
|
|
261
|
-
Returns:
|
|
262
|
-
The retrieved document as a dictionary
|
|
263
|
-
"""
|
|
264
|
-
# Use environment variables as defaults if not provided
|
|
265
|
-
endpoint = endpoint or _get_default_endpoint()
|
|
266
|
-
api_key = api_key or _get_default_api_key()
|
|
267
|
-
index_name = index_name or _get_default_index_name()
|
|
268
|
-
|
|
269
|
-
clients = azure_search_initialize_clients(endpoint, api_key, index_name)
|
|
270
|
-
search_client = clients["search_client"]
|
|
271
|
-
|
|
272
|
-
result = search_client.get_document(key=key, selected_fields=select)
|
|
273
|
-
|
|
274
|
-
return dict(result)
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
@traced_and_logged
|
|
278
|
-
def azure_search_delete_documents(
|
|
279
|
-
keys: list[str],
|
|
280
|
-
key_field_name: str = "id",
|
|
281
|
-
index_name: str | None = None,
|
|
282
|
-
endpoint: str | None = None,
|
|
283
|
-
api_key: str | None = None,
|
|
284
|
-
) -> dict[str, Any]:
|
|
285
|
-
"""Delete documents from an Azure AI Search index.
|
|
286
|
-
|
|
287
|
-
Args:
|
|
288
|
-
keys: List of document keys to delete
|
|
289
|
-
key_field_name: Name of the key field (defaults to "id")
|
|
290
|
-
index_name: Name of the search index (defaults to AZURE_SEARCH_INDEX_NAME env var)
|
|
291
|
-
endpoint: The Azure AI Search service endpoint URL (defaults to AZURE_SEARCH_ENDPOINT env var)
|
|
292
|
-
api_key: The Azure AI Search API key (defaults to AZURE_SEARCH_API_KEY env var)
|
|
293
|
-
|
|
294
|
-
Returns:
|
|
295
|
-
Dictionary containing the deletion results
|
|
296
|
-
"""
|
|
297
|
-
# Use environment variables as defaults if not provided
|
|
298
|
-
endpoint = endpoint or _get_default_endpoint()
|
|
299
|
-
api_key = api_key or _get_default_api_key()
|
|
300
|
-
index_name = index_name or _get_default_index_name()
|
|
301
|
-
|
|
302
|
-
clients = azure_search_initialize_clients(endpoint, api_key, index_name)
|
|
303
|
-
search_client = clients["search_client"]
|
|
304
|
-
|
|
305
|
-
# Format documents for deletion (only need the key field)
|
|
306
|
-
documents_to_delete = [{key_field_name: key} for key in keys]
|
|
307
|
-
|
|
308
|
-
result = search_client.delete_documents(documents=documents_to_delete)
|
|
309
|
-
|
|
310
|
-
# Process results
|
|
311
|
-
succeeded = sum(1 for r in result if r.succeeded)
|
|
312
|
-
|
|
313
|
-
return {
|
|
314
|
-
"succeeded": succeeded,
|
|
315
|
-
"failed": len(result) - succeeded,
|
|
316
|
-
"total": len(result),
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
@traced_and_logged
|
|
321
|
-
def azure_search_list_indexes(
|
|
322
|
-
endpoint: str | None = None, api_key: str | None = None
|
|
323
|
-
) -> list[dict[str, Any]]:
|
|
324
|
-
"""List all indexes in the Azure AI Search service.
|
|
325
|
-
|
|
326
|
-
Args:
|
|
327
|
-
endpoint: The Azure AI Search service endpoint URL (defaults to AZURE_SEARCH_ENDPOINT env var)
|
|
328
|
-
api_key: The Azure AI Search API key (defaults to AZURE_SEARCH_API_KEY env var)
|
|
329
|
-
|
|
330
|
-
Returns:
|
|
331
|
-
List of indexes as dictionaries
|
|
332
|
-
"""
|
|
333
|
-
# Use environment variables as defaults if not provided
|
|
334
|
-
endpoint = endpoint or _get_default_endpoint()
|
|
335
|
-
api_key = api_key or _get_default_api_key()
|
|
336
|
-
|
|
337
|
-
clients = azure_search_initialize_clients(endpoint, api_key)
|
|
338
|
-
index_client = clients["index_client"]
|
|
339
|
-
|
|
340
|
-
result = index_client.list_indexes()
|
|
341
|
-
|
|
342
|
-
# Convert index objects to dictionaries with basic information
|
|
343
|
-
indexes = [
|
|
344
|
-
{
|
|
345
|
-
"name": index.name,
|
|
346
|
-
"fields": [field.name for field in index.fields],
|
|
347
|
-
"field_count": len(index.fields),
|
|
348
|
-
}
|
|
349
|
-
for index in result
|
|
350
|
-
]
|
|
351
|
-
|
|
352
|
-
return indexes
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
@traced_and_logged
|
|
356
|
-
def azure_search_get_index_statistics(
|
|
357
|
-
index_name: str | None = None,
|
|
358
|
-
endpoint: str | None = None,
|
|
359
|
-
api_key: str | None = None,
|
|
360
|
-
) -> dict[str, Any]:
|
|
361
|
-
"""Get statistics for a specific Azure AI Search index.
|
|
362
|
-
|
|
363
|
-
Args:
|
|
364
|
-
index_name: Name of the search index (defaults to AZURE_SEARCH_INDEX_NAME env var)
|
|
365
|
-
endpoint: The Azure AI Search service endpoint URL (defaults to AZURE_SEARCH_ENDPOINT env var)
|
|
366
|
-
api_key: The Azure AI Search API key (defaults to AZURE_SEARCH_API_KEY env var)
|
|
367
|
-
|
|
368
|
-
Returns:
|
|
369
|
-
Dictionary containing index statistics
|
|
370
|
-
"""
|
|
371
|
-
# Use environment variables as defaults if not provided
|
|
372
|
-
endpoint = endpoint or _get_default_endpoint()
|
|
373
|
-
api_key = api_key or _get_default_api_key()
|
|
374
|
-
index_name = index_name or _get_default_index_name()
|
|
375
|
-
|
|
376
|
-
clients = azure_search_initialize_clients(endpoint, api_key, index_name)
|
|
377
|
-
search_client = clients["search_client"]
|
|
378
|
-
|
|
379
|
-
stats = search_client.get_document_count()
|
|
380
|
-
|
|
381
|
-
return {"document_count": stats}
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
@traced_and_logged
|
|
385
|
-
def azure_search_create_vector_index(
|
|
386
|
-
fields: list[dict[str, Any]],
|
|
387
|
-
vector_dimensions: int,
|
|
388
|
-
index_name: str | None = None,
|
|
389
|
-
algorithm_kind: str = "hnsw",
|
|
390
|
-
endpoint: str | None = None,
|
|
391
|
-
api_key: str | None = None,
|
|
392
|
-
) -> dict[str, Any]:
|
|
393
|
-
"""Create a vector search index in Azure AI Search.
|
|
394
|
-
|
|
395
|
-
Args:
|
|
396
|
-
fields: List of field configurations (dicts with name, type, etc.)
|
|
397
|
-
vector_dimensions: Dimensions of the vector field
|
|
398
|
-
index_name: Name of the search index (defaults to AZURE_SEARCH_INDEX_NAME env var)
|
|
399
|
-
algorithm_kind: Vector search algorithm ("hnsw" or "exhaustive")
|
|
400
|
-
endpoint: The Azure AI Search service endpoint URL (defaults to AZURE_SEARCH_ENDPOINT env var)
|
|
401
|
-
api_key: The Azure AI Search API key (defaults to AZURE_SEARCH_API_KEY env var)
|
|
402
|
-
|
|
403
|
-
Returns:
|
|
404
|
-
Dictionary with index creation result
|
|
405
|
-
"""
|
|
406
|
-
# Use environment variables as defaults if not provided
|
|
407
|
-
endpoint = endpoint or _get_default_endpoint()
|
|
408
|
-
api_key = api_key or _get_default_api_key()
|
|
409
|
-
index_name = index_name or _get_default_index_name()
|
|
410
|
-
|
|
411
|
-
clients = azure_search_initialize_clients(endpoint, api_key)
|
|
412
|
-
index_client = clients["index_client"]
|
|
413
|
-
|
|
414
|
-
# Convert field configurations to SearchField objects
|
|
415
|
-
index_fields = []
|
|
416
|
-
vector_fields = []
|
|
417
|
-
|
|
418
|
-
for field_config in fields:
|
|
419
|
-
field_name = field_config["name"]
|
|
420
|
-
field_type = field_config["type"]
|
|
421
|
-
field_searchable = field_config.get("searchable", False)
|
|
422
|
-
field_filterable = field_config.get("filterable", False)
|
|
423
|
-
field_sortable = field_config.get("sortable", False)
|
|
424
|
-
field_key = field_config.get("key", False)
|
|
425
|
-
field_vector = field_config.get("vector", False)
|
|
426
|
-
|
|
427
|
-
if field_searchable and field_type == "string":
|
|
428
|
-
field = SearchableField(
|
|
429
|
-
name=field_name,
|
|
430
|
-
type=SearchFieldDataType.String,
|
|
431
|
-
key=field_key,
|
|
432
|
-
filterable=field_filterable,
|
|
433
|
-
sortable=field_sortable,
|
|
434
|
-
)
|
|
435
|
-
else:
|
|
436
|
-
data_type = None
|
|
437
|
-
if field_type == "string":
|
|
438
|
-
data_type = SearchFieldDataType.String
|
|
439
|
-
elif field_type == "int":
|
|
440
|
-
data_type = SearchFieldDataType.Int32
|
|
441
|
-
elif field_type == "double":
|
|
442
|
-
data_type = SearchFieldDataType.Double
|
|
443
|
-
elif field_type == "boolean":
|
|
444
|
-
data_type = SearchFieldDataType.Boolean
|
|
445
|
-
elif field_type == "collection":
|
|
446
|
-
data_type = SearchFieldDataType.Collection(
|
|
447
|
-
SearchFieldDataType.String
|
|
448
|
-
)
|
|
449
|
-
|
|
450
|
-
field = SimpleField(
|
|
451
|
-
name=field_name,
|
|
452
|
-
type=data_type,
|
|
453
|
-
key=field_key,
|
|
454
|
-
filterable=field_filterable,
|
|
455
|
-
sortable=field_sortable,
|
|
456
|
-
)
|
|
457
|
-
|
|
458
|
-
index_fields.append(field)
|
|
459
|
-
|
|
460
|
-
if field_vector:
|
|
461
|
-
vector_fields.append(field_name)
|
|
462
|
-
|
|
463
|
-
# Set up vector search configuration
|
|
464
|
-
algorithm_config = None
|
|
465
|
-
if algorithm_kind.lower() == "hnsw":
|
|
466
|
-
algorithm_config = HnswAlgorithmConfiguration(
|
|
467
|
-
name="hnsw-config",
|
|
468
|
-
parameters={"m": 4, "efConstruction": 400, "efSearch": 500},
|
|
469
|
-
)
|
|
470
|
-
else:
|
|
471
|
-
algorithm_config = ExhaustiveKnnAlgorithmConfiguration(
|
|
472
|
-
name="exhaustive-config"
|
|
473
|
-
)
|
|
474
|
-
|
|
475
|
-
# Create vector search configuration
|
|
476
|
-
vector_search = VectorSearch(
|
|
477
|
-
algorithms=[algorithm_config],
|
|
478
|
-
profiles=[
|
|
479
|
-
VectorSearchProfile(
|
|
480
|
-
name="vector-profile",
|
|
481
|
-
algorithm_configuration_name=algorithm_config.name,
|
|
482
|
-
)
|
|
483
|
-
],
|
|
484
|
-
)
|
|
485
|
-
|
|
486
|
-
# Create the search index
|
|
487
|
-
index = SearchIndex(
|
|
488
|
-
name=index_name, fields=index_fields, vector_search=vector_search
|
|
489
|
-
)
|
|
490
|
-
|
|
491
|
-
try:
|
|
492
|
-
result = index_client.create_or_update_index(index)
|
|
493
|
-
return {
|
|
494
|
-
"index_name": result.name,
|
|
495
|
-
"vector_fields": vector_fields,
|
|
496
|
-
"vector_dimensions": vector_dimensions,
|
|
497
|
-
"algorithm": algorithm_kind,
|
|
498
|
-
"created": True,
|
|
499
|
-
}
|
|
500
|
-
except Exception as e:
|
|
501
|
-
return {"error": str(e), "created": False}
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
# --- Azure Blob Storage Tools ---
|
|
505
|
-
|
|
506
|
-
def _get_blob_service_client(conn_string_env_var: str) -> BlobServiceClient:
|
|
507
|
-
"""Helper function to get BlobServiceClient using a connection string from an environment variable."""
|
|
508
|
-
actual_connection_string = os.environ.get(conn_string_env_var)
|
|
509
|
-
if not actual_connection_string:
|
|
510
|
-
raise ValueError(f"Environment variable '{conn_string_env_var}' for Azure Storage connection string is not set or is empty.")
|
|
511
|
-
return BlobServiceClient.from_connection_string(actual_connection_string)
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
@traced_and_logged
|
|
515
|
-
def azure_storage_list_containers(conn_string_env_var: str) -> list[str]:
|
|
516
|
-
"""Lists all containers in the Azure Storage account.
|
|
517
|
-
|
|
518
|
-
Args:
|
|
519
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
520
|
-
|
|
521
|
-
Returns:
|
|
522
|
-
A list of container names.
|
|
523
|
-
"""
|
|
524
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
525
|
-
containers = blob_service_client.list_containers()
|
|
526
|
-
return [container.name for container in containers]
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
@traced_and_logged
|
|
530
|
-
def azure_storage_create_container(container_name: str, conn_string_env_var: str) -> dict[str, Any]:
|
|
531
|
-
"""Creates a new container in the Azure Storage account.
|
|
532
|
-
|
|
533
|
-
Args:
|
|
534
|
-
container_name: The name of the container to create.
|
|
535
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
536
|
-
|
|
537
|
-
Returns:
|
|
538
|
-
A dictionary with creation status.
|
|
539
|
-
"""
|
|
540
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
541
|
-
try:
|
|
542
|
-
blob_service_client.create_container(container_name)
|
|
543
|
-
return {"container_name": container_name, "created": True, "message": f"Container '{container_name}' created successfully."}
|
|
544
|
-
except Exception as e:
|
|
545
|
-
return {"container_name": container_name, "created": False, "error": str(e)}
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
@traced_and_logged
|
|
549
|
-
def azure_storage_delete_container(container_name: str, conn_string_env_var: str) -> dict[str, Any]:
|
|
550
|
-
"""Deletes an existing container from the Azure Storage account.
|
|
551
|
-
|
|
552
|
-
Args:
|
|
553
|
-
container_name: The name of the container to delete.
|
|
554
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
555
|
-
|
|
556
|
-
Returns:
|
|
557
|
-
A dictionary with deletion status.
|
|
558
|
-
"""
|
|
559
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
560
|
-
try:
|
|
561
|
-
blob_service_client.delete_container(container_name)
|
|
562
|
-
return {"container_name": container_name, "deleted": True, "message": f"Container '{container_name}' deleted successfully."}
|
|
563
|
-
except Exception as e:
|
|
564
|
-
return {"container_name": container_name, "deleted": False, "error": str(e)}
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
@traced_and_logged
|
|
568
|
-
def azure_storage_list_blobs(container_name: str, conn_string_env_var: str) -> list[str]:
|
|
569
|
-
"""Lists all blobs in a specified container.
|
|
570
|
-
|
|
571
|
-
Args:
|
|
572
|
-
container_name: The name of the container.
|
|
573
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
574
|
-
|
|
575
|
-
Returns:
|
|
576
|
-
A list of blob names.
|
|
577
|
-
"""
|
|
578
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
579
|
-
container_client = blob_service_client.get_container_client(container_name)
|
|
580
|
-
blob_list = container_client.list_blobs()
|
|
581
|
-
return [blob.name for blob in blob_list]
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
@traced_and_logged
|
|
585
|
-
def azure_storage_upload_blob_text(container_name: str, blob_name: str, text_content: str, conn_string_env_var: str, overwrite: bool = True) -> dict[str, Any]:
|
|
586
|
-
"""Uploads text content as a blob to the specified container.
|
|
587
|
-
|
|
588
|
-
Args:
|
|
589
|
-
container_name: The name of the container.
|
|
590
|
-
blob_name: The name of the blob to create.
|
|
591
|
-
text_content: The string content to upload.
|
|
592
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
593
|
-
overwrite: Whether to overwrite the blob if it already exists. Defaults to True.
|
|
594
|
-
|
|
595
|
-
Returns:
|
|
596
|
-
A dictionary with upload status.
|
|
597
|
-
"""
|
|
598
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
599
|
-
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
|
|
600
|
-
try:
|
|
601
|
-
content_settings = ContentSettings(content_type='text/plain')
|
|
602
|
-
blob_client.upload_blob(text_content.encode('utf-8'), overwrite=overwrite, content_settings=content_settings)
|
|
603
|
-
return {"container_name": container_name, "blob_name": blob_name, "uploaded": True, "message": "Text content uploaded successfully."}
|
|
604
|
-
except Exception as e:
|
|
605
|
-
return {"container_name": container_name, "blob_name": blob_name, "uploaded": False, "error": str(e)}
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
@traced_and_logged
|
|
609
|
-
def azure_storage_upload_blob_bytes(container_name: str, blob_name: str, bytes_content: bytes, conn_string_env_var: str, overwrite: bool = True) -> dict[str, Any]:
|
|
610
|
-
"""Uploads bytes content as a blob to the specified container.
|
|
611
|
-
|
|
612
|
-
Args:
|
|
613
|
-
container_name: The name of the container.
|
|
614
|
-
blob_name: The name of the blob to create.
|
|
615
|
-
bytes_content: The bytes content to upload.
|
|
616
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
617
|
-
overwrite: Whether to overwrite the blob if it already exists. Defaults to True.
|
|
618
|
-
|
|
619
|
-
Returns:
|
|
620
|
-
A dictionary with upload status.
|
|
621
|
-
"""
|
|
622
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
623
|
-
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
|
|
624
|
-
try:
|
|
625
|
-
content_settings = ContentSettings(content_type='application/octet-stream')
|
|
626
|
-
blob_client.upload_blob(bytes_content, overwrite=overwrite, content_settings=content_settings)
|
|
627
|
-
return {"container_name": container_name, "blob_name": blob_name, "uploaded": True, "message": "Bytes content uploaded successfully."}
|
|
628
|
-
except Exception as e:
|
|
629
|
-
return {"container_name": container_name, "blob_name": blob_name, "uploaded": False, "error": str(e)}
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
@traced_and_logged
|
|
633
|
-
def azure_storage_upload_blob_from_file(container_name: str, blob_name: str, file_path: str, conn_string_env_var: str, overwrite: bool = True) -> dict[str, Any]:
|
|
634
|
-
"""Uploads a local file to a blob in the specified container.
|
|
635
|
-
|
|
636
|
-
Args:
|
|
637
|
-
container_name: The name of the container.
|
|
638
|
-
blob_name: The name of the blob to create.
|
|
639
|
-
file_path: The local path to the file to upload.
|
|
640
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
641
|
-
overwrite: Whether to overwrite the blob if it already exists. Defaults to True.
|
|
642
|
-
|
|
643
|
-
Returns:
|
|
644
|
-
A dictionary with upload status.
|
|
645
|
-
"""
|
|
646
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
647
|
-
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
|
|
648
|
-
try:
|
|
649
|
-
with open(file_path, "rb") as data:
|
|
650
|
-
blob_client.upload_blob(data, overwrite=overwrite)
|
|
651
|
-
return {"container_name": container_name, "blob_name": blob_name, "file_path": file_path, "uploaded": True, "message": "File uploaded successfully."}
|
|
652
|
-
except FileNotFoundError:
|
|
653
|
-
return {"container_name": container_name, "blob_name": blob_name, "file_path": file_path, "uploaded": False, "error": "File not found."}
|
|
654
|
-
except Exception as e:
|
|
655
|
-
return {"container_name": container_name, "blob_name": blob_name, "file_path": file_path, "uploaded": False, "error": str(e)}
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
@traced_and_logged
|
|
659
|
-
def azure_storage_download_blob_to_text(container_name: str, blob_name: str, conn_string_env_var: str) -> str:
|
|
660
|
-
"""Downloads a blob's content as text.
|
|
661
|
-
|
|
662
|
-
Args:
|
|
663
|
-
container_name: The name of the container.
|
|
664
|
-
blob_name: The name of the blob to download.
|
|
665
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
666
|
-
|
|
667
|
-
Returns:
|
|
668
|
-
The blob content as a string.
|
|
669
|
-
|
|
670
|
-
Raises:
|
|
671
|
-
Exception: If download fails or blob is not text.
|
|
672
|
-
"""
|
|
673
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
674
|
-
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
|
|
675
|
-
try:
|
|
676
|
-
download_stream = blob_client.download_blob()
|
|
677
|
-
return download_stream.readall().decode('utf-8')
|
|
678
|
-
except Exception as e:
|
|
679
|
-
raise Exception(f"Failed to download or decode blob '{blob_name}' from container '{container_name}': {e!s}")
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
@traced_and_logged
|
|
683
|
-
def azure_storage_download_blob_to_bytes(container_name: str, blob_name: str, conn_string_env_var: str) -> bytes:
|
|
684
|
-
"""Downloads a blob's content as bytes.
|
|
685
|
-
|
|
686
|
-
Args:
|
|
687
|
-
container_name: The name of the container.
|
|
688
|
-
blob_name: The name of the blob to download.
|
|
689
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
690
|
-
|
|
691
|
-
Returns:
|
|
692
|
-
The blob content as bytes.
|
|
693
|
-
"""
|
|
694
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
695
|
-
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
|
|
696
|
-
download_stream = blob_client.download_blob()
|
|
697
|
-
return download_stream.readall()
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
@traced_and_logged
|
|
701
|
-
def azure_storage_download_blob_to_file(container_name: str, blob_name: str, file_path: str, conn_string_env_var: str, overwrite: bool = True) -> dict[str, Any]:
|
|
702
|
-
"""Downloads a blob to a local file.
|
|
703
|
-
|
|
704
|
-
Args:
|
|
705
|
-
container_name: The name of the container.
|
|
706
|
-
blob_name: The name of the blob to download.
|
|
707
|
-
file_path: The local path to save the downloaded file.
|
|
708
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
709
|
-
overwrite: Whether to overwrite the local file if it exists. Defaults to True.
|
|
710
|
-
|
|
711
|
-
Returns:
|
|
712
|
-
A dictionary with download status.
|
|
713
|
-
"""
|
|
714
|
-
if not overwrite and os.path.exists(file_path):
|
|
715
|
-
return {"container_name": container_name, "blob_name": blob_name, "file_path": file_path, "downloaded": False, "error": "File exists and overwrite is False."}
|
|
716
|
-
|
|
717
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
718
|
-
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
|
|
719
|
-
try:
|
|
720
|
-
with open(file_path, "wb") as download_file:
|
|
721
|
-
download_stream = blob_client.download_blob()
|
|
722
|
-
download_file.write(download_stream.readall())
|
|
723
|
-
return {"container_name": container_name, "blob_name": blob_name, "file_path": file_path, "downloaded": True, "message": "File downloaded successfully."}
|
|
724
|
-
except Exception as e:
|
|
725
|
-
return {"container_name": container_name, "blob_name": blob_name, "file_path": file_path, "downloaded": False, "error": str(e)}
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
@traced_and_logged
|
|
729
|
-
def azure_storage_delete_blob(container_name: str, blob_name: str, conn_string_env_var: str) -> dict[str, Any]:
|
|
730
|
-
"""Deletes a specified blob from a container.
|
|
731
|
-
|
|
732
|
-
Args:
|
|
733
|
-
container_name: The name of the container.
|
|
734
|
-
blob_name: The name of the blob to delete.
|
|
735
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
736
|
-
|
|
737
|
-
Returns:
|
|
738
|
-
A dictionary with deletion status.
|
|
739
|
-
"""
|
|
740
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
741
|
-
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
|
|
742
|
-
try:
|
|
743
|
-
blob_client.delete_blob()
|
|
744
|
-
return {"container_name": container_name, "blob_name": blob_name, "deleted": True, "message": "Blob deleted successfully."}
|
|
745
|
-
except Exception as e:
|
|
746
|
-
return {"container_name": container_name, "blob_name": blob_name, "deleted": False, "error": str(e)}
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
@traced_and_logged
|
|
750
|
-
def azure_storage_get_blob_properties(container_name: str, blob_name: str, conn_string_env_var: str) -> dict[str, Any]:
|
|
751
|
-
"""Retrieves properties of a specified blob.
|
|
752
|
-
|
|
753
|
-
Args:
|
|
754
|
-
container_name: The name of the container.
|
|
755
|
-
blob_name: The name of the blob.
|
|
756
|
-
conn_string_env_var: The name of the environment variable holding the Azure Storage connection string.
|
|
757
|
-
|
|
758
|
-
Returns:
|
|
759
|
-
A dictionary containing blob properties.
|
|
760
|
-
"""
|
|
761
|
-
blob_service_client = _get_blob_service_client(conn_string_env_var)
|
|
762
|
-
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
|
|
763
|
-
try:
|
|
764
|
-
properties = blob_client.get_blob_properties()
|
|
765
|
-
return {
|
|
766
|
-
"name": properties.name,
|
|
767
|
-
"container": properties.container,
|
|
768
|
-
"size": properties.size,
|
|
769
|
-
"content_type": properties.content_settings.content_type,
|
|
770
|
-
"last_modified": properties.last_modified.isoformat() if properties.last_modified else None,
|
|
771
|
-
"etag": properties.etag,
|
|
772
|
-
# Add more properties as needed
|
|
773
|
-
}
|
|
774
|
-
except Exception as e:
|
|
775
|
-
return {"container_name": container_name, "blob_name": blob_name, "error": str(e)}
|
|
776
|
-
|
|
777
|
-
# Potential future tools:
|
|
778
|
-
# - azure_storage_set_blob_metadata
|
|
779
|
-
# - azure_storage_get_blob_metadata
|
|
780
|
-
# - azure_storage_generate_sas_token_blob
|
|
781
|
-
# - azure_storage_copy_blob
|